|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 import re |
|
6 import codecs |
|
7 |
|
8 class MalformedLocaleFileError(Exception): |
|
9 pass |
|
10 |
|
11 def parse_file(path): |
|
12 return parse(read_file(path), path) |
|
13 |
|
14 def read_file(path): |
|
15 try: |
|
16 return codecs.open( path, "r", "utf-8" ).readlines() |
|
17 except UnicodeDecodeError, e: |
|
18 raise MalformedLocaleFileError( |
|
19 'Following locale file is not a valid ' + |
|
20 'UTF-8 file: %s\n%s"' % (path, str(e))) |
|
21 |
|
22 COMMENT = re.compile(r'\s*#') |
|
23 EMPTY = re.compile(r'^\s+$') |
|
24 KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)") |
|
25 |
|
26 def parse(lines, path=None): |
|
27 lines = iter(lines) |
|
28 lineNo = 1 |
|
29 pairs = dict() |
|
30 for line in lines: |
|
31 if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0: |
|
32 continue |
|
33 m = KEYVALUE.match(line) |
|
34 if not m: |
|
35 raise MalformedLocaleFileError( |
|
36 'Following locale file is not a valid .properties file: %s\n' |
|
37 'Line %d is incorrect:\n%s' % (path, lineNo, line)) |
|
38 |
|
39 # All spaces are strip. Spaces at the beginning are stripped |
|
40 # by the regular expression. We have to strip spaces at the end. |
|
41 key = m.group(1).rstrip() |
|
42 val = m.group(3).rstrip() |
|
43 val = val.encode('raw-unicode-escape').decode('raw-unicode-escape') |
|
44 |
|
45 # `key` can be empty when key is only made of spaces |
|
46 if not key: |
|
47 raise MalformedLocaleFileError( |
|
48 'Following locale file is not a valid .properties file: %s\n' |
|
49 'Key is invalid on line %d is incorrect:\n%s' % |
|
50 (path, lineNo, line)) |
|
51 |
|
52 # Multiline value: keep reading lines, while lines end with backslash |
|
53 # and strip spaces at the beginning of lines except the last line |
|
54 # that doesn't end up with backslash, we strip all spaces for this one. |
|
55 if val.endswith("\\"): |
|
56 val = val[:-1] |
|
57 try: |
|
58 # remove spaces before/after and especially the \n at EOL |
|
59 line = lines.next().strip() |
|
60 while line.endswith("\\"): |
|
61 val += line[:-1].lstrip() |
|
62 line = lines.next() |
|
63 lineNo += 1 |
|
64 val += line.strip() |
|
65 except StopIteration: |
|
66 raise MalformedLocaleFileError( |
|
67 'Following locale file is not a valid .properties file: %s\n' |
|
68 'Unexpected EOF in multiline sequence at line %d:\n%s' % |
|
69 (path, lineNo, line)) |
|
70 # Save this new pair |
|
71 pairs[key] = val |
|
72 lineNo += 1 |
|
73 |
|
74 normalize_plural(path, pairs) |
|
75 return pairs |
|
76 |
|
77 # Plural forms in properties files are defined like this: |
|
78 # key = other form |
|
79 # key[one] = one form |
|
80 # key[...] = ... |
|
81 # Parse them and merge each key into one object containing all forms: |
|
82 # key: { |
|
83 # other: "other form", |
|
84 # one: "one form", |
|
85 # ...: ... |
|
86 # } |
|
87 PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$') |
|
88 def normalize_plural(path, pairs): |
|
89 for key in list(pairs.keys()): |
|
90 m = PLURAL_FORM.match(key) |
|
91 if not m: |
|
92 continue |
|
93 main_key = m.group(1) |
|
94 plural_form = m.group(2) |
|
95 # Allows not specifying a generic key (i.e a key without [form]) |
|
96 if not main_key in pairs: |
|
97 pairs[main_key] = {} |
|
98 # Ensure that we always have the [other] form |
|
99 if not main_key + "[other]" in pairs: |
|
100 raise MalformedLocaleFileError( |
|
101 'Following locale file is not a valid UTF-8 file: %s\n' |
|
102 'This plural form doesn\'t have a matching `%s[other]` form:\n' |
|
103 '%s\n' |
|
104 'You have to defined following key:\n%s' |
|
105 % (path, main_key, key, main_key)) |
|
106 # convert generic form into an object if it is still a string |
|
107 if isinstance(pairs[main_key], unicode): |
|
108 pairs[main_key] = {"other": pairs[main_key]} |
|
109 # then, add this new plural form |
|
110 pairs[main_key][plural_form] = pairs[key] |
|
111 del pairs[key] |