michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # Program used to generate /packages/api-utils/lib/l10n/plural-rules.js michael@0: # Fetch unicode.org data in order to build functions specific to each language michael@0: # that will return for a given integer, its plural form name. michael@0: # Plural form names are: zero, one, two, few, many, other. michael@0: # michael@0: # More information here: michael@0: # http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html michael@0: # http://cldr.unicode.org/index/cldr-spec/plural-rules michael@0: michael@0: # Usage: michael@0: # $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js michael@0: michael@0: import urllib2 michael@0: import xml.dom.minidom michael@0: import json michael@0: import re michael@0: michael@0: PRINT_CONDITIONS_IN_COMMENTS = False michael@0: michael@0: UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml" michael@0: michael@0: CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)' michael@0: michael@0: # For a given regexp.MatchObject `g` for `CONDITION_RE`, michael@0: # returns the equivalent JS piece of code michael@0: # i.e. maps pseudo conditional language from unicode.org XML to JS code michael@0: def parseCondition(g): michael@0: lvalue = "n" michael@0: if g.group(1): michael@0: lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", "")) michael@0: michael@0: operator = g.group(2) michael@0: if g.group(4): michael@0: operator += " not" michael@0: michael@0: rvalue = g.group(5) michael@0: michael@0: if operator == "is": michael@0: return "%s == %s" % (lvalue, rvalue) michael@0: if operator == "is not": michael@0: return "%s != %s" % (lvalue, rvalue) michael@0: michael@0: # "in", "within" or "not in" case: michael@0: notPrefix = "" michael@0: if operator == "not in": michael@0: notPrefix = "!" michael@0: michael@0: # `rvalue` is a comma seperated list of either: michael@0: # - numbers: 42 michael@0: # - ranges: 42..72 michael@0: sections = rvalue.split(',') michael@0: michael@0: if ".." not in rvalue: michael@0: # If we don't have range, but only a list of integer, michael@0: # we can simplify the generated code by using `isIn` michael@0: # n in 1,3,6,42 michael@0: return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections)) michael@0: michael@0: # n in 1..42 michael@0: # n in 1..3,42 michael@0: subCondition = [] michael@0: integers = [] michael@0: for sub in sections: michael@0: if ".." in sub: michael@0: left, right = sub.split("..") michael@0: subCondition.append("isBetween(%s, %d, %d)" % ( michael@0: lvalue, michael@0: int(left), michael@0: int(right) michael@0: )) michael@0: else: michael@0: integers.append(int(sub)) michael@0: if len(integers) > 1: michael@0: subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers))) michael@0: elif len(integers) == 1: michael@0: subCondition.append("(%s == %s)" % (lvalue, integers[0])) michael@0: return "%s(%s)" % (notPrefix, " || ".join(subCondition)) michael@0: michael@0: def computeRules(): michael@0: # Fetch plural rules data directly from unicode.org website: michael@0: url = UNICODE_ORG_XML_URL michael@0: f = urllib2.urlopen(url) michael@0: doc = xml.dom.minidom.parse(f) michael@0: michael@0: # Read XML document and extract locale to rules mapping michael@0: localesMapping = {} michael@0: algorithms = {} michael@0: for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")): michael@0: if not index in algorithms: michael@0: algorithms[index] = {} michael@0: for locale in pluralRules.getAttribute("locales").split(): michael@0: localesMapping[locale] = index michael@0: for rule in pluralRules.childNodes: michael@0: if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule": michael@0: continue michael@0: pluralForm = rule.getAttribute("count") michael@0: algorithm = rule.firstChild.nodeValue michael@0: algorithms[index][pluralForm] = algorithm michael@0: michael@0: # Go through all rules and compute a Javascript code for each of them michael@0: rules = {} michael@0: for index,rule in algorithms.iteritems(): michael@0: lines = [] michael@0: for pluralForm in rule: michael@0: condition = rule[pluralForm] michael@0: originalCondition = str(condition) michael@0: michael@0: # Convert pseudo language to JS code michael@0: condition = rule[pluralForm].lower() michael@0: condition = re.sub(CONDITION_RE, parseCondition, condition) michael@0: condition = re.sub(r'or', "||", condition) michael@0: condition = re.sub(r'and', "&&", condition) michael@0: michael@0: # Prints original condition in unicode.org pseudo language michael@0: if PRINT_CONDITIONS_IN_COMMENTS: michael@0: lines.append( '// %s' % originalCondition ) michael@0: michael@0: lines.append( 'if (%s)' % condition ) michael@0: lines.append( ' return "%s";' % pluralForm ) michael@0: michael@0: rules[index] = "\n ".join(lines) michael@0: return localesMapping, rules michael@0: michael@0: michael@0: localesMapping, rules = computeRules() michael@0: michael@0: rulesLines = [] michael@0: for index in rules: michael@0: lines = rules[index] michael@0: rulesLines.append('"%d": function (n) {' % index) michael@0: rulesLines.append(' %s' % lines) michael@0: rulesLines.append(' return "other"') michael@0: rulesLines.append('},') michael@0: michael@0: print """/* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: // This file is automatically generated with /python-lib/plural-rules-generator.py michael@0: // Fetching data from: %s michael@0: michael@0: // Mapping of short locale name == to == > rule index in following list michael@0: const LOCALES_TO_RULES = %s; michael@0: michael@0: // Utility functions for plural rules methods michael@0: function isIn(n, list) list.indexOf(n) !== -1; michael@0: function isBetween(n, start, end) start <= n && n <= end; michael@0: michael@0: // List of all plural rules methods, that maps an integer to the plural form name to use michael@0: const RULES = { michael@0: %s michael@0: }; michael@0: michael@0: /** michael@0: * Return a function that gives the plural form name for a given integer michael@0: * for the specified `locale` michael@0: * let fun = getRulesForLocale('en'); michael@0: * fun(1) -> 'one' michael@0: * fun(0) -> 'other' michael@0: * fun(1000) -> 'other' michael@0: */ michael@0: exports.getRulesForLocale = function getRulesForLocale(locale) { michael@0: let index = LOCALES_TO_RULES[locale]; michael@0: if (!(index in RULES)) { michael@0: console.warn('Plural form unknown for locale "' + locale + '"'); michael@0: return function () { return "other"; }; michael@0: } michael@0: return RULES[index]; michael@0: } michael@0: """ % (UNICODE_ORG_XML_URL, michael@0: json.dumps(localesMapping, sort_keys=True, indent=2), michael@0: "\n ".join(rulesLines))