anki/qt/i18n/check-po-files.py
2020-01-03 07:59:00 +10:00

109 lines
2.5 KiB
Python

#!/usr/bin/env python3
#
# locate any translations that have invalid format strings
#
import os, re, sys
po_dir = "translations/anki.pot"
msg_re = re.compile(r"^(msgid|msgid_plural|msgstr|)(\[[\d]\])? \"(.*)\"$")
cont_re = re.compile(r"^\"(.*)\"$")
pct_re = re.compile(r"%(?:\([^\)]+?\))?[#+\-\d.]*[a-zA-Z]")
path_re = re.compile(r"#: (.+)$")
def check_reps(path, t1, strs):
allZero = True
for s in strs:
if s:
allZero = False
if allZero:
return
orig = t1 or ""
for s in strs:
if not reps_match(orig, s):
return "{}\n{}\n{}\n".format(path, orig, strs)
def reps_match(t1, t2):
for char in "{}":
if t1.count(char) != t2.count(char):
return False
t1 = t1.replace("%%", "")
t2 = t2.replace("%%", "")
if t1.count("%"):
matches = set(pct_re.findall(t1))
matches2 = set(pct_re.findall(t2))
return matches == matches2
return True
def fix_po(path):
last_msgid = None
last_msgstr = None
last_path = None
lines = []
state = "outside"
problems = []
strs = []
for line in open(path):
lines.append(line)
# comment?
m = path_re.match(line)
if m:
last_path = m.group(1)
# starting new id/str?
m = msg_re.match(line)
if m:
label, num, text = m.group(1), m.group(2), m.group(3)
if label == "msgid":
last_msgid = text
state = "id"
strs = []
elif label == "msgid_plural":
continue
else:
state = "str"
strs.append(text)
continue
# continuing previous id/str?
m = cont_re.match(line)
if m:
if state == "id":
last_msgid += m.group(1)
elif state == "str":
strs[-1] += m.group(1)
else:
assert 0
continue
state = "outside"
if last_msgid:
p = check_reps(last_path, last_msgid, strs)
if p:
problems.append(p)
# print("{0}\nProblems in {1}:\n{0}\n{2}".format("*"*60, path, "\n".join(problems)))
# return 1
last_msgid = None
if problems:
print("{0}\nProblems in {1}:\n{0}\n{2}".format("*"*60, path, "\n".join(problems)))
return len(problems)
problems = 0
for po in os.listdir(po_dir):
path = os.path.join(po_dir, po)
problems += fix_po(path)
if problems:
sys.exit(1)