2 changed files with 235 additions and 0 deletions
@ -0,0 +1,234 @@
@@ -0,0 +1,234 @@
|
||||
#!/usr/bin/python -u |
||||
# |
||||
# Helper script to check syntax of translation files. |
||||
# |
||||
# (c)2016 struktur AG |
||||
try: |
||||
from collections import OrderedDict |
||||
except ImportError: |
||||
OrderedDict = dict |
||||
import glob |
||||
import os |
||||
import sys |
||||
|
||||
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) |
||||
|
||||
def parsepo(fn): |
||||
""" |
||||
Open po file and return a dict containing msgid as keys and msgstr as value. |
||||
Return None on syntax errors, raise an exception on other errors. |
||||
""" |
||||
data = OrderedDict() |
||||
if isinstance(fn, basestring): |
||||
with file(fn, 'rb') as fp: |
||||
lines = fp.readlines() |
||||
else: |
||||
lines = fn.readlines() |
||||
|
||||
msgid = None |
||||
msgstr = None |
||||
default = None |
||||
first = True |
||||
lastline = None |
||||
untranslated = [] |
||||
errors = 0 |
||||
|
||||
# find end of header |
||||
for line_number, line in enumerate(lines): |
||||
try: |
||||
line = unicode(line, 'utf-8') |
||||
except UnicodeError: |
||||
try: |
||||
line = unicode(line, 'latin-1') |
||||
except UnicodeError: |
||||
errors += 1 |
||||
print >> sys.stderr, 'ERROR: Could not decode data in line %d: %r' % (line_number+1, line) |
||||
continue |
||||
|
||||
if line[-2:] == '\r\n': |
||||
errors += 1 |
||||
print >> sys.stderr, 'ERROR: line %d has Windoze line endings' % (line_number+1) |
||||
line = line[:-2] |
||||
|
||||
if line[-1:] == '\n': |
||||
line = line[:-1] |
||||
|
||||
if line != line.strip(): |
||||
errors += 1 |
||||
print >> sys.stderr, 'ERROR: line %d contains leading and/or trailing whitespaces' % (line_number+1) |
||||
|
||||
if line.startswith("msgid"): |
||||
msgid = line.strip()[7:-1] |
||||
msgstr = None |
||||
elif line.startswith("msgstr"): |
||||
msgstr = line.strip()[8:-1] |
||||
elif line.startswith('"') and msgstr is not None and msgid is not None: |
||||
msgstr += line.strip()[1:-1] |
||||
elif line.startswith("#. Default: "): |
||||
default = line.strip()[13:-1] |
||||
elif line.startswith("#"): |
||||
lastline = line |
||||
continue |
||||
elif not line: |
||||
# blank line -> must be finished |
||||
if msgid is None and msgstr is None: |
||||
if lastline.startswith('#~'): |
||||
lastline = line |
||||
continue |
||||
else: |
||||
errors += 1 |
||||
print >> sys.stderr, 'ERROR: Got blank line in %d without msgstr or msgid.' % (line_number+1) |
||||
continue |
||||
|
||||
if first: |
||||
# skip first occurance as this is the header |
||||
first = False |
||||
lastline = line |
||||
continue |
||||
|
||||
if not msgstr: |
||||
untranslated.append(msgid) |
||||
|
||||
# set data |
||||
data[msgid] = (msgstr, default) |
||||
msgid = msgstr = default = None |
||||
|
||||
lastline = line |
||||
|
||||
if msgid is not None and msgstr is not None: |
||||
# add last line if not followed by empty line |
||||
if not msgstr: |
||||
untranslated.append(msgid) |
||||
|
||||
data[msgid] = (msgstr, default) |
||||
msgid = msgstr = None |
||||
|
||||
if errors: |
||||
return errors, data, untranslated |
||||
|
||||
return 0, data, untranslated |
||||
|
||||
def check_translation(msgid, msgstr, default, value, language): |
||||
if not default: |
||||
default = msgid |
||||
|
||||
errors = 0 |
||||
if ' ' in value: |
||||
errors += 1 |
||||
print >> sys.stderr, 'ERROR: Translation for %r contains too many whitespaces (%s)' % (msgid, value) |
||||
|
||||
start_quote = default and (default.startswith('\\"') or default.startswith('"') \ |
||||
or default.startswith("'")) |
||||
if value.startswith('\\"') and not start_quote: |
||||
print >> sys.stderr, 'ERROR: Translation for %r starts with an " (%s)' % (msgid, value) |
||||
value = value[2:] |
||||
errors += 1 |
||||
elif value.startswith('"') and not start_quote: |
||||
print >> sys.stderr, 'ERROR: Translation for %r starts with an " (%s)' % (msgid, value) |
||||
value = value[1:] |
||||
errors += 1 |
||||
|
||||
end_quote = default and (default.endswith('\\"') or default.endswith('"') \ |
||||
or default.endswith("'")) |
||||
if value.endswith('\\"') and not end_quote: |
||||
print >> sys.stderr, 'ERROR: Translation for %r ends with an " (%s)' % (msgid, value) |
||||
value = value[:-2] |
||||
errors += 1 |
||||
elif value.endswith('"') and not end_quote: |
||||
print >> sys.stderr, 'ERROR: Translation for %r ends with an " (%s)' % (msgid, value) |
||||
value = value[:-1] |
||||
errors += 1 |
||||
|
||||
leading_space = default and default.startswith(' ') |
||||
if leading_space and not value.startswith(' '): |
||||
print >> sys.stderr, 'ERROR: Translation for %r does not start with a leading whitespace (%s)' % (msgid, value) |
||||
value = ' ' + value |
||||
errors += 1 |
||||
elif not leading_space and value.startswith(' '): |
||||
print >> sys.stderr, 'ERROR: Translation for %r starts with a leading whitespace (%s)' % (msgid, value) |
||||
value = ' ' + value |
||||
errors += 1 |
||||
|
||||
if not language.startswith('zh') and not language.startswith('ko') and not language.startswith('ja'): |
||||
# TODO(fancycode): Is it correct to skip for these languages? |
||||
trailing_dot = default and default.endswith('.') |
||||
if trailing_dot and not value.endswith('.'): |
||||
print >> sys.stderr, 'ERROR: Translation for %r does not end with a tailing dot (%s)' % (msgid, value) |
||||
value += '.' |
||||
errors += 1 |
||||
|
||||
punct = False |
||||
for ch in ('.', ',', ';', ':', '?', '!', ')', ']'): |
||||
if ' '+ch in value and not punct: |
||||
if ch != '.' or not ' ..' in value: |
||||
print >> sys.stderr, 'ERROR: Translation for %r contains invalid punctuation (%s)' % (msgid, value) |
||||
punct = True |
||||
errors += 1 |
||||
|
||||
while ' '+ch+' ' in value: |
||||
value = value.replace(' '+ch+' ', ch+' ') |
||||
if value.endswith(' '+ch): |
||||
value = value[:-1-len(ch)]+ch |
||||
for ch in ('(', '['): |
||||
if ch+' ' in value and not punct: |
||||
print >> sys.stderr, 'ERROR: Translation for %r contains invalid punctuation (%s)' % (msgid, value) |
||||
punct = True |
||||
errors += 1 |
||||
|
||||
while ' '+ch+' ' in value: |
||||
value = value.replace(' '+ch+' ', ' '+ch) |
||||
|
||||
return errors |
||||
|
||||
def main(): |
||||
_, POT_DATA, _ = parsepo(os.path.join(ROOT, 'messages.pot')) |
||||
|
||||
errors = 0 |
||||
filenames = sys.argv[1:] |
||||
show_filenames = False |
||||
if not filenames: |
||||
filenames = glob.glob(os.path.join(ROOT, 'messages-*.po')) |
||||
show_filenames = True |
||||
for filename in filenames: |
||||
language = os.path.basename(filename)[9:-3] |
||||
if show_filenames: |
||||
print 'Checking %s (%s)' % (filename, language) |
||||
try: |
||||
parse_errors, data, untranslated = parsepo(filename) |
||||
if parse_errors: |
||||
errors += parse_errors |
||||
except Exception, e: |
||||
print >> sys.stderr, 'ERROR: Could not parse (%s)' % (e) |
||||
import traceback |
||||
traceback.print_exc(file=sys.stderr) |
||||
data = None |
||||
|
||||
if data is None: |
||||
errors += 1 |
||||
continue |
||||
|
||||
file_errors = 0 |
||||
for msgid, (msgstr, default) in POT_DATA.iteritems(): |
||||
v = data.pop(msgid, None) |
||||
if v is None or not v[0]: |
||||
print >> sys.stderr, 'WARNING: Missing translation for %r' % (msgid) |
||||
continue |
||||
|
||||
file_errors += check_translation(msgid, msgstr, default, v[0], language) or 0 |
||||
|
||||
if show_filenames: |
||||
print 'Found %d errors in %s' % (file_errors, filename) |
||||
|
||||
print |
||||
errors += file_errors |
||||
|
||||
if errors: |
||||
print >> sys.stderr, 'Found %d total errors' % (errors) |
||||
return 1 |
||||
|
||||
return 0 |
||||
|
||||
if __name__ == '__main__': |
||||
import locale |
||||
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') |
||||
sys.exit(main()) |
Loading…
Reference in new issue