1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/addon-sdk/source/python-lib/plural-rules-generator.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,176 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +# Program used to generate /packages/api-utils/lib/l10n/plural-rules.js 1.9 +# Fetch unicode.org data in order to build functions specific to each language 1.10 +# that will return for a given integer, its plural form name. 1.11 +# Plural form names are: zero, one, two, few, many, other. 1.12 +# 1.13 +# More information here: 1.14 +# http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html 1.15 +# http://cldr.unicode.org/index/cldr-spec/plural-rules 1.16 + 1.17 +# Usage: 1.18 +# $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js 1.19 + 1.20 +import urllib2 1.21 +import xml.dom.minidom 1.22 +import json 1.23 +import re 1.24 + 1.25 +PRINT_CONDITIONS_IN_COMMENTS = False 1.26 + 1.27 +UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml" 1.28 + 1.29 +CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)' 1.30 + 1.31 +# For a given regexp.MatchObject `g` for `CONDITION_RE`, 1.32 +# returns the equivalent JS piece of code 1.33 +# i.e. maps pseudo conditional language from unicode.org XML to JS code 1.34 +def parseCondition(g): 1.35 + lvalue = "n" 1.36 + if g.group(1): 1.37 + lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", "")) 1.38 + 1.39 + operator = g.group(2) 1.40 + if g.group(4): 1.41 + operator += " not" 1.42 + 1.43 + rvalue = g.group(5) 1.44 + 1.45 + if operator == "is": 1.46 + return "%s == %s" % (lvalue, rvalue) 1.47 + if operator == "is not": 1.48 + return "%s != %s" % (lvalue, rvalue) 1.49 + 1.50 + # "in", "within" or "not in" case: 1.51 + notPrefix = "" 1.52 + if operator == "not in": 1.53 + notPrefix = "!" 1.54 + 1.55 + # `rvalue` is a comma seperated list of either: 1.56 + # - numbers: 42 1.57 + # - ranges: 42..72 1.58 + sections = rvalue.split(',') 1.59 + 1.60 + if ".." not in rvalue: 1.61 + # If we don't have range, but only a list of integer, 1.62 + # we can simplify the generated code by using `isIn` 1.63 + # n in 1,3,6,42 1.64 + return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections)) 1.65 + 1.66 + # n in 1..42 1.67 + # n in 1..3,42 1.68 + subCondition = [] 1.69 + integers = [] 1.70 + for sub in sections: 1.71 + if ".." in sub: 1.72 + left, right = sub.split("..") 1.73 + subCondition.append("isBetween(%s, %d, %d)" % ( 1.74 + lvalue, 1.75 + int(left), 1.76 + int(right) 1.77 + )) 1.78 + else: 1.79 + integers.append(int(sub)) 1.80 + if len(integers) > 1: 1.81 + subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers))) 1.82 + elif len(integers) == 1: 1.83 + subCondition.append("(%s == %s)" % (lvalue, integers[0])) 1.84 + return "%s(%s)" % (notPrefix, " || ".join(subCondition)) 1.85 + 1.86 +def computeRules(): 1.87 + # Fetch plural rules data directly from unicode.org website: 1.88 + url = UNICODE_ORG_XML_URL 1.89 + f = urllib2.urlopen(url) 1.90 + doc = xml.dom.minidom.parse(f) 1.91 + 1.92 + # Read XML document and extract locale to rules mapping 1.93 + localesMapping = {} 1.94 + algorithms = {} 1.95 + for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")): 1.96 + if not index in algorithms: 1.97 + algorithms[index] = {} 1.98 + for locale in pluralRules.getAttribute("locales").split(): 1.99 + localesMapping[locale] = index 1.100 + for rule in pluralRules.childNodes: 1.101 + if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule": 1.102 + continue 1.103 + pluralForm = rule.getAttribute("count") 1.104 + algorithm = rule.firstChild.nodeValue 1.105 + algorithms[index][pluralForm] = algorithm 1.106 + 1.107 + # Go through all rules and compute a Javascript code for each of them 1.108 + rules = {} 1.109 + for index,rule in algorithms.iteritems(): 1.110 + lines = [] 1.111 + for pluralForm in rule: 1.112 + condition = rule[pluralForm] 1.113 + originalCondition = str(condition) 1.114 + 1.115 + # Convert pseudo language to JS code 1.116 + condition = rule[pluralForm].lower() 1.117 + condition = re.sub(CONDITION_RE, parseCondition, condition) 1.118 + condition = re.sub(r'or', "||", condition) 1.119 + condition = re.sub(r'and', "&&", condition) 1.120 + 1.121 + # Prints original condition in unicode.org pseudo language 1.122 + if PRINT_CONDITIONS_IN_COMMENTS: 1.123 + lines.append( '// %s' % originalCondition ) 1.124 + 1.125 + lines.append( 'if (%s)' % condition ) 1.126 + lines.append( ' return "%s";' % pluralForm ) 1.127 + 1.128 + rules[index] = "\n ".join(lines) 1.129 + return localesMapping, rules 1.130 + 1.131 + 1.132 +localesMapping, rules = computeRules() 1.133 + 1.134 +rulesLines = [] 1.135 +for index in rules: 1.136 + lines = rules[index] 1.137 + rulesLines.append('"%d": function (n) {' % index) 1.138 + rulesLines.append(' %s' % lines) 1.139 + rulesLines.append(' return "other"') 1.140 + rulesLines.append('},') 1.141 + 1.142 +print """/* This Source Code Form is subject to the terms of the Mozilla Public 1.143 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.144 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.145 + 1.146 +// This file is automatically generated with /python-lib/plural-rules-generator.py 1.147 +// Fetching data from: %s 1.148 + 1.149 +// Mapping of short locale name == to == > rule index in following list 1.150 +const LOCALES_TO_RULES = %s; 1.151 + 1.152 +// Utility functions for plural rules methods 1.153 +function isIn(n, list) list.indexOf(n) !== -1; 1.154 +function isBetween(n, start, end) start <= n && n <= end; 1.155 + 1.156 +// List of all plural rules methods, that maps an integer to the plural form name to use 1.157 +const RULES = { 1.158 + %s 1.159 +}; 1.160 + 1.161 +/** 1.162 + * Return a function that gives the plural form name for a given integer 1.163 + * for the specified `locale` 1.164 + * let fun = getRulesForLocale('en'); 1.165 + * fun(1) -> 'one' 1.166 + * fun(0) -> 'other' 1.167 + * fun(1000) -> 'other' 1.168 + */ 1.169 +exports.getRulesForLocale = function getRulesForLocale(locale) { 1.170 + let index = LOCALES_TO_RULES[locale]; 1.171 + if (!(index in RULES)) { 1.172 + console.warn('Plural form unknown for locale "' + locale + '"'); 1.173 + return function () { return "other"; }; 1.174 + } 1.175 + return RULES[index]; 1.176 +} 1.177 +""" % (UNICODE_ORG_XML_URL, 1.178 + json.dumps(localesMapping, sort_keys=True, indent=2), 1.179 + "\n ".join(rulesLines))