1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/addon-sdk/source/python-lib/cuddlefish/property_parser.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,111 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +import re 1.9 +import codecs 1.10 + 1.11 +class MalformedLocaleFileError(Exception): 1.12 + pass 1.13 + 1.14 +def parse_file(path): 1.15 + return parse(read_file(path), path) 1.16 + 1.17 +def read_file(path): 1.18 + try: 1.19 + return codecs.open( path, "r", "utf-8" ).readlines() 1.20 + except UnicodeDecodeError, e: 1.21 + raise MalformedLocaleFileError( 1.22 + 'Following locale file is not a valid ' + 1.23 + 'UTF-8 file: %s\n%s"' % (path, str(e))) 1.24 + 1.25 +COMMENT = re.compile(r'\s*#') 1.26 +EMPTY = re.compile(r'^\s+$') 1.27 +KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)") 1.28 + 1.29 +def parse(lines, path=None): 1.30 + lines = iter(lines) 1.31 + lineNo = 1 1.32 + pairs = dict() 1.33 + for line in lines: 1.34 + if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0: 1.35 + continue 1.36 + m = KEYVALUE.match(line) 1.37 + if not m: 1.38 + raise MalformedLocaleFileError( 1.39 + 'Following locale file is not a valid .properties file: %s\n' 1.40 + 'Line %d is incorrect:\n%s' % (path, lineNo, line)) 1.41 + 1.42 + # All spaces are strip. Spaces at the beginning are stripped 1.43 + # by the regular expression. We have to strip spaces at the end. 1.44 + key = m.group(1).rstrip() 1.45 + val = m.group(3).rstrip() 1.46 + val = val.encode('raw-unicode-escape').decode('raw-unicode-escape') 1.47 + 1.48 + # `key` can be empty when key is only made of spaces 1.49 + if not key: 1.50 + raise MalformedLocaleFileError( 1.51 + 'Following locale file is not a valid .properties file: %s\n' 1.52 + 'Key is invalid on line %d is incorrect:\n%s' % 1.53 + (path, lineNo, line)) 1.54 + 1.55 + # Multiline value: keep reading lines, while lines end with backslash 1.56 + # and strip spaces at the beginning of lines except the last line 1.57 + # that doesn't end up with backslash, we strip all spaces for this one. 1.58 + if val.endswith("\\"): 1.59 + val = val[:-1] 1.60 + try: 1.61 + # remove spaces before/after and especially the \n at EOL 1.62 + line = lines.next().strip() 1.63 + while line.endswith("\\"): 1.64 + val += line[:-1].lstrip() 1.65 + line = lines.next() 1.66 + lineNo += 1 1.67 + val += line.strip() 1.68 + except StopIteration: 1.69 + raise MalformedLocaleFileError( 1.70 + 'Following locale file is not a valid .properties file: %s\n' 1.71 + 'Unexpected EOF in multiline sequence at line %d:\n%s' % 1.72 + (path, lineNo, line)) 1.73 + # Save this new pair 1.74 + pairs[key] = val 1.75 + lineNo += 1 1.76 + 1.77 + normalize_plural(path, pairs) 1.78 + return pairs 1.79 + 1.80 +# Plural forms in properties files are defined like this: 1.81 +# key = other form 1.82 +# key[one] = one form 1.83 +# key[...] = ... 1.84 +# Parse them and merge each key into one object containing all forms: 1.85 +# key: { 1.86 +# other: "other form", 1.87 +# one: "one form", 1.88 +# ...: ... 1.89 +# } 1.90 +PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$') 1.91 +def normalize_plural(path, pairs): 1.92 + for key in list(pairs.keys()): 1.93 + m = PLURAL_FORM.match(key) 1.94 + if not m: 1.95 + continue 1.96 + main_key = m.group(1) 1.97 + plural_form = m.group(2) 1.98 + # Allows not specifying a generic key (i.e a key without [form]) 1.99 + if not main_key in pairs: 1.100 + pairs[main_key] = {} 1.101 + # Ensure that we always have the [other] form 1.102 + if not main_key + "[other]" in pairs: 1.103 + raise MalformedLocaleFileError( 1.104 + 'Following locale file is not a valid UTF-8 file: %s\n' 1.105 + 'This plural form doesn\'t have a matching `%s[other]` form:\n' 1.106 + '%s\n' 1.107 + 'You have to defined following key:\n%s' 1.108 + % (path, main_key, key, main_key)) 1.109 + # convert generic form into an object if it is still a string 1.110 + if isinstance(pairs[main_key], unicode): 1.111 + pairs[main_key] = {"other": pairs[main_key]} 1.112 + # then, add this new plural form 1.113 + pairs[main_key][plural_form] = pairs[key] 1.114 + del pairs[key]