|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 # Program used to generate /packages/api-utils/lib/l10n/plural-rules.js |
|
6 # Fetch unicode.org data in order to build functions specific to each language |
|
7 # that will return for a given integer, its plural form name. |
|
8 # Plural form names are: zero, one, two, few, many, other. |
|
9 # |
|
10 # More information here: |
|
11 # http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html |
|
12 # http://cldr.unicode.org/index/cldr-spec/plural-rules |
|
13 |
|
14 # Usage: |
|
15 # $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js |
|
16 |
|
17 import urllib2 |
|
18 import xml.dom.minidom |
|
19 import json |
|
20 import re |
|
21 |
|
22 PRINT_CONDITIONS_IN_COMMENTS = False |
|
23 |
|
24 UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml" |
|
25 |
|
26 CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)' |
|
27 |
|
28 # For a given regexp.MatchObject `g` for `CONDITION_RE`, |
|
29 # returns the equivalent JS piece of code |
|
30 # i.e. maps pseudo conditional language from unicode.org XML to JS code |
|
31 def parseCondition(g): |
|
32 lvalue = "n" |
|
33 if g.group(1): |
|
34 lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", "")) |
|
35 |
|
36 operator = g.group(2) |
|
37 if g.group(4): |
|
38 operator += " not" |
|
39 |
|
40 rvalue = g.group(5) |
|
41 |
|
42 if operator == "is": |
|
43 return "%s == %s" % (lvalue, rvalue) |
|
44 if operator == "is not": |
|
45 return "%s != %s" % (lvalue, rvalue) |
|
46 |
|
47 # "in", "within" or "not in" case: |
|
48 notPrefix = "" |
|
49 if operator == "not in": |
|
50 notPrefix = "!" |
|
51 |
|
52 # `rvalue` is a comma seperated list of either: |
|
53 # - numbers: 42 |
|
54 # - ranges: 42..72 |
|
55 sections = rvalue.split(',') |
|
56 |
|
57 if ".." not in rvalue: |
|
58 # If we don't have range, but only a list of integer, |
|
59 # we can simplify the generated code by using `isIn` |
|
60 # n in 1,3,6,42 |
|
61 return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections)) |
|
62 |
|
63 # n in 1..42 |
|
64 # n in 1..3,42 |
|
65 subCondition = [] |
|
66 integers = [] |
|
67 for sub in sections: |
|
68 if ".." in sub: |
|
69 left, right = sub.split("..") |
|
70 subCondition.append("isBetween(%s, %d, %d)" % ( |
|
71 lvalue, |
|
72 int(left), |
|
73 int(right) |
|
74 )) |
|
75 else: |
|
76 integers.append(int(sub)) |
|
77 if len(integers) > 1: |
|
78 subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers))) |
|
79 elif len(integers) == 1: |
|
80 subCondition.append("(%s == %s)" % (lvalue, integers[0])) |
|
81 return "%s(%s)" % (notPrefix, " || ".join(subCondition)) |
|
82 |
|
83 def computeRules(): |
|
84 # Fetch plural rules data directly from unicode.org website: |
|
85 url = UNICODE_ORG_XML_URL |
|
86 f = urllib2.urlopen(url) |
|
87 doc = xml.dom.minidom.parse(f) |
|
88 |
|
89 # Read XML document and extract locale to rules mapping |
|
90 localesMapping = {} |
|
91 algorithms = {} |
|
92 for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")): |
|
93 if not index in algorithms: |
|
94 algorithms[index] = {} |
|
95 for locale in pluralRules.getAttribute("locales").split(): |
|
96 localesMapping[locale] = index |
|
97 for rule in pluralRules.childNodes: |
|
98 if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule": |
|
99 continue |
|
100 pluralForm = rule.getAttribute("count") |
|
101 algorithm = rule.firstChild.nodeValue |
|
102 algorithms[index][pluralForm] = algorithm |
|
103 |
|
104 # Go through all rules and compute a Javascript code for each of them |
|
105 rules = {} |
|
106 for index,rule in algorithms.iteritems(): |
|
107 lines = [] |
|
108 for pluralForm in rule: |
|
109 condition = rule[pluralForm] |
|
110 originalCondition = str(condition) |
|
111 |
|
112 # Convert pseudo language to JS code |
|
113 condition = rule[pluralForm].lower() |
|
114 condition = re.sub(CONDITION_RE, parseCondition, condition) |
|
115 condition = re.sub(r'or', "||", condition) |
|
116 condition = re.sub(r'and', "&&", condition) |
|
117 |
|
118 # Prints original condition in unicode.org pseudo language |
|
119 if PRINT_CONDITIONS_IN_COMMENTS: |
|
120 lines.append( '// %s' % originalCondition ) |
|
121 |
|
122 lines.append( 'if (%s)' % condition ) |
|
123 lines.append( ' return "%s";' % pluralForm ) |
|
124 |
|
125 rules[index] = "\n ".join(lines) |
|
126 return localesMapping, rules |
|
127 |
|
128 |
|
129 localesMapping, rules = computeRules() |
|
130 |
|
131 rulesLines = [] |
|
132 for index in rules: |
|
133 lines = rules[index] |
|
134 rulesLines.append('"%d": function (n) {' % index) |
|
135 rulesLines.append(' %s' % lines) |
|
136 rulesLines.append(' return "other"') |
|
137 rulesLines.append('},') |
|
138 |
|
139 print """/* This Source Code Form is subject to the terms of the Mozilla Public |
|
140 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
141 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
142 |
|
143 // This file is automatically generated with /python-lib/plural-rules-generator.py |
|
144 // Fetching data from: %s |
|
145 |
|
146 // Mapping of short locale name == to == > rule index in following list |
|
147 const LOCALES_TO_RULES = %s; |
|
148 |
|
149 // Utility functions for plural rules methods |
|
150 function isIn(n, list) list.indexOf(n) !== -1; |
|
151 function isBetween(n, start, end) start <= n && n <= end; |
|
152 |
|
153 // List of all plural rules methods, that maps an integer to the plural form name to use |
|
154 const RULES = { |
|
155 %s |
|
156 }; |
|
157 |
|
158 /** |
|
159 * Return a function that gives the plural form name for a given integer |
|
160 * for the specified `locale` |
|
161 * let fun = getRulesForLocale('en'); |
|
162 * fun(1) -> 'one' |
|
163 * fun(0) -> 'other' |
|
164 * fun(1000) -> 'other' |
|
165 */ |
|
166 exports.getRulesForLocale = function getRulesForLocale(locale) { |
|
167 let index = LOCALES_TO_RULES[locale]; |
|
168 if (!(index in RULES)) { |
|
169 console.warn('Plural form unknown for locale "' + locale + '"'); |
|
170 return function () { return "other"; }; |
|
171 } |
|
172 return RULES[index]; |
|
173 } |
|
174 """ % (UNICODE_ORG_XML_URL, |
|
175 json.dumps(localesMapping, sort_keys=True, indent=2), |
|
176 "\n ".join(rulesLines)) |