michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: import re michael@0: import codecs michael@0: michael@0: class MalformedLocaleFileError(Exception): michael@0: pass michael@0: michael@0: def parse_file(path): michael@0: return parse(read_file(path), path) michael@0: michael@0: def read_file(path): michael@0: try: michael@0: return codecs.open( path, "r", "utf-8" ).readlines() michael@0: except UnicodeDecodeError, e: michael@0: raise MalformedLocaleFileError( michael@0: 'Following locale file is not a valid ' + michael@0: 'UTF-8 file: %s\n%s"' % (path, str(e))) michael@0: michael@0: COMMENT = re.compile(r'\s*#') michael@0: EMPTY = re.compile(r'^\s+$') michael@0: KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)") michael@0: michael@0: def parse(lines, path=None): michael@0: lines = iter(lines) michael@0: lineNo = 1 michael@0: pairs = dict() michael@0: for line in lines: michael@0: if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0: michael@0: continue michael@0: m = KEYVALUE.match(line) michael@0: if not m: michael@0: raise MalformedLocaleFileError( michael@0: 'Following locale file is not a valid .properties file: %s\n' michael@0: 'Line %d is incorrect:\n%s' % (path, lineNo, line)) michael@0: michael@0: # All spaces are strip. Spaces at the beginning are stripped michael@0: # by the regular expression. We have to strip spaces at the end. michael@0: key = m.group(1).rstrip() michael@0: val = m.group(3).rstrip() michael@0: val = val.encode('raw-unicode-escape').decode('raw-unicode-escape') michael@0: michael@0: # `key` can be empty when key is only made of spaces michael@0: if not key: michael@0: raise MalformedLocaleFileError( michael@0: 'Following locale file is not a valid .properties file: %s\n' michael@0: 'Key is invalid on line %d is incorrect:\n%s' % michael@0: (path, lineNo, line)) michael@0: michael@0: # Multiline value: keep reading lines, while lines end with backslash michael@0: # and strip spaces at the beginning of lines except the last line michael@0: # that doesn't end up with backslash, we strip all spaces for this one. michael@0: if val.endswith("\\"): michael@0: val = val[:-1] michael@0: try: michael@0: # remove spaces before/after and especially the \n at EOL michael@0: line = lines.next().strip() michael@0: while line.endswith("\\"): michael@0: val += line[:-1].lstrip() michael@0: line = lines.next() michael@0: lineNo += 1 michael@0: val += line.strip() michael@0: except StopIteration: michael@0: raise MalformedLocaleFileError( michael@0: 'Following locale file is not a valid .properties file: %s\n' michael@0: 'Unexpected EOF in multiline sequence at line %d:\n%s' % michael@0: (path, lineNo, line)) michael@0: # Save this new pair michael@0: pairs[key] = val michael@0: lineNo += 1 michael@0: michael@0: normalize_plural(path, pairs) michael@0: return pairs michael@0: michael@0: # Plural forms in properties files are defined like this: michael@0: # key = other form michael@0: # key[one] = one form michael@0: # key[...] = ... michael@0: # Parse them and merge each key into one object containing all forms: michael@0: # key: { michael@0: # other: "other form", michael@0: # one: "one form", michael@0: # ...: ... michael@0: # } michael@0: PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$') michael@0: def normalize_plural(path, pairs): michael@0: for key in list(pairs.keys()): michael@0: m = PLURAL_FORM.match(key) michael@0: if not m: michael@0: continue michael@0: main_key = m.group(1) michael@0: plural_form = m.group(2) michael@0: # Allows not specifying a generic key (i.e a key without [form]) michael@0: if not main_key in pairs: michael@0: pairs[main_key] = {} michael@0: # Ensure that we always have the [other] form michael@0: if not main_key + "[other]" in pairs: michael@0: raise MalformedLocaleFileError( michael@0: 'Following locale file is not a valid UTF-8 file: %s\n' michael@0: 'This plural form doesn\'t have a matching `%s[other]` form:\n' michael@0: '%s\n' michael@0: 'You have to defined following key:\n%s' michael@0: % (path, main_key, key, main_key)) michael@0: # convert generic form into an object if it is still a string michael@0: if isinstance(pairs[main_key], unicode): michael@0: pairs[main_key] = {"other": pairs[main_key]} michael@0: # then, add this new plural form michael@0: pairs[main_key][plural_form] = pairs[key] michael@0: del pairs[key]