Browse Source

Added script to check style of .po files.

Also changed Makefile so it runs during "make build-i18n".
pull/333/head
Joachim Bauch 9 years ago
parent
commit
410ab9dc10
  1. 1
      src/i18n/Makefile.am
  2. 234
      src/i18n/helpers/polint.py

1
src/i18n/Makefile.am

@ -26,6 +26,7 @@ build: LANGUAGES = $(shell $(FIND) ./ -iname '*.po' -printf '%f\n' | $(SED) 's/\ @@ -26,6 +26,7 @@ build: LANGUAGES = $(shell $(FIND) ./ -iname '*.po' -printf '%f\n' | $(SED) 's/\
build: update
@if [ "$(NODEJS_SUPPORT_PO2JSON)" = "no" ]; then echo "Node.js module 'po2json' not found, required when building i18n"; exit 1; fi
@if [ "$(PYTHON)" = "" ]; then echo "Command 'python' not found, required when building i18n"; exit 1; fi
$(PYTHON) "$(CURDIR)/helpers/polint.py"
for lang in $(LANGUAGES) ; do \
$(NODEJS) "$(CURDIR)/helpers/po2json" "$(CURDIR)/messages-$$lang.po" "$(ROOT)/static/translation/messages-$$lang.json" ; \
done

234
src/i18n/helpers/polint.py

@ -0,0 +1,234 @@ @@ -0,0 +1,234 @@
#!/usr/bin/python -u
#
# Helper script to check syntax of translation files.
#
# (c)2016 struktur AG
try:
from collections import OrderedDict
except ImportError:
OrderedDict = dict
import glob
import os
import sys
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
def parsepo(fn):
"""
Open po file and return a dict containing msgid as keys and msgstr as value.
Return None on syntax errors, raise an exception on other errors.
"""
data = OrderedDict()
if isinstance(fn, basestring):
with file(fn, 'rb') as fp:
lines = fp.readlines()
else:
lines = fn.readlines()
msgid = None
msgstr = None
default = None
first = True
lastline = None
untranslated = []
errors = 0
# find end of header
for line_number, line in enumerate(lines):
try:
line = unicode(line, 'utf-8')
except UnicodeError:
try:
line = unicode(line, 'latin-1')
except UnicodeError:
errors += 1
print >> sys.stderr, 'ERROR: Could not decode data in line %d: %r' % (line_number+1, line)
continue
if line[-2:] == '\r\n':
errors += 1
print >> sys.stderr, 'ERROR: line %d has Windoze line endings' % (line_number+1)
line = line[:-2]
if line[-1:] == '\n':
line = line[:-1]
if line != line.strip():
errors += 1
print >> sys.stderr, 'ERROR: line %d contains leading and/or trailing whitespaces' % (line_number+1)
if line.startswith("msgid"):
msgid = line.strip()[7:-1]
msgstr = None
elif line.startswith("msgstr"):
msgstr = line.strip()[8:-1]
elif line.startswith('"') and msgstr is not None and msgid is not None:
msgstr += line.strip()[1:-1]
elif line.startswith("#. Default: "):
default = line.strip()[13:-1]
elif line.startswith("#"):
lastline = line
continue
elif not line:
# blank line -> must be finished
if msgid is None and msgstr is None:
if lastline.startswith('#~'):
lastline = line
continue
else:
errors += 1
print >> sys.stderr, 'ERROR: Got blank line in %d without msgstr or msgid.' % (line_number+1)
continue
if first:
# skip first occurance as this is the header
first = False
lastline = line
continue
if not msgstr:
untranslated.append(msgid)
# set data
data[msgid] = (msgstr, default)
msgid = msgstr = default = None
lastline = line
if msgid is not None and msgstr is not None:
# add last line if not followed by empty line
if not msgstr:
untranslated.append(msgid)
data[msgid] = (msgstr, default)
msgid = msgstr = None
if errors:
return errors, data, untranslated
return 0, data, untranslated
def check_translation(msgid, msgstr, default, value, language):
if not default:
default = msgid
errors = 0
if ' ' in value:
errors += 1
print >> sys.stderr, 'ERROR: Translation for %r contains too many whitespaces (%s)' % (msgid, value)
start_quote = default and (default.startswith('\\"') or default.startswith('"') \
or default.startswith("'"))
if value.startswith('\\"') and not start_quote:
print >> sys.stderr, 'ERROR: Translation for %r starts with an " (%s)' % (msgid, value)
value = value[2:]
errors += 1
elif value.startswith('"') and not start_quote:
print >> sys.stderr, 'ERROR: Translation for %r starts with an " (%s)' % (msgid, value)
value = value[1:]
errors += 1
end_quote = default and (default.endswith('\\"') or default.endswith('"') \
or default.endswith("'"))
if value.endswith('\\"') and not end_quote:
print >> sys.stderr, 'ERROR: Translation for %r ends with an " (%s)' % (msgid, value)
value = value[:-2]
errors += 1
elif value.endswith('"') and not end_quote:
print >> sys.stderr, 'ERROR: Translation for %r ends with an " (%s)' % (msgid, value)
value = value[:-1]
errors += 1
leading_space = default and default.startswith(' ')
if leading_space and not value.startswith(' '):
print >> sys.stderr, 'ERROR: Translation for %r does not start with a leading whitespace (%s)' % (msgid, value)
value = ' ' + value
errors += 1
elif not leading_space and value.startswith(' '):
print >> sys.stderr, 'ERROR: Translation for %r starts with a leading whitespace (%s)' % (msgid, value)
value = ' ' + value
errors += 1
if not language.startswith('zh') and not language.startswith('ko') and not language.startswith('ja'):
# TODO(fancycode): Is it correct to skip for these languages?
trailing_dot = default and default.endswith('.')
if trailing_dot and not value.endswith('.'):
print >> sys.stderr, 'ERROR: Translation for %r does not end with a tailing dot (%s)' % (msgid, value)
value += '.'
errors += 1
punct = False
for ch in ('.', ',', ';', ':', '?', '!', ')', ']'):
if ' '+ch in value and not punct:
if ch != '.' or not ' ..' in value:
print >> sys.stderr, 'ERROR: Translation for %r contains invalid punctuation (%s)' % (msgid, value)
punct = True
errors += 1
while ' '+ch+' ' in value:
value = value.replace(' '+ch+' ', ch+' ')
if value.endswith(' '+ch):
value = value[:-1-len(ch)]+ch
for ch in ('(', '['):
if ch+' ' in value and not punct:
print >> sys.stderr, 'ERROR: Translation for %r contains invalid punctuation (%s)' % (msgid, value)
punct = True
errors += 1
while ' '+ch+' ' in value:
value = value.replace(' '+ch+' ', ' '+ch)
return errors
def main():
_, POT_DATA, _ = parsepo(os.path.join(ROOT, 'messages.pot'))
errors = 0
filenames = sys.argv[1:]
show_filenames = False
if not filenames:
filenames = glob.glob(os.path.join(ROOT, 'messages-*.po'))
show_filenames = True
for filename in filenames:
language = os.path.basename(filename)[9:-3]
if show_filenames:
print 'Checking %s (%s)' % (filename, language)
try:
parse_errors, data, untranslated = parsepo(filename)
if parse_errors:
errors += parse_errors
except Exception, e:
print >> sys.stderr, 'ERROR: Could not parse (%s)' % (e)
import traceback
traceback.print_exc(file=sys.stderr)
data = None
if data is None:
errors += 1
continue
file_errors = 0
for msgid, (msgstr, default) in POT_DATA.iteritems():
v = data.pop(msgid, None)
if v is None or not v[0]:
print >> sys.stderr, 'WARNING: Missing translation for %r' % (msgid)
continue
file_errors += check_translation(msgid, msgstr, default, v[0], language) or 0
if show_filenames:
print 'Found %d errors in %s' % (file_errors, filename)
print
errors += file_errors
if errors:
print >> sys.stderr, 'Found %d total errors' % (errors)
return 1
return 0
if __name__ == '__main__':
import locale
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
sys.exit(main())
Loading…
Cancel
Save