addon-sdk/source/python-lib/plural-rules-generator.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/addon-sdk/source/python-lib/plural-rules-generator.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,176 @@
     1.4 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.7 +
     1.8 +# Program used to generate /packages/api-utils/lib/l10n/plural-rules.js
     1.9 +# Fetch unicode.org data in order to build functions specific to each language
    1.10 +# that will return for a given integer, its plural form name.
    1.11 +# Plural form names are: zero, one, two, few, many, other.
    1.12 +#
    1.13 +# More information here:
    1.14 +#   http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
    1.15 +#   http://cldr.unicode.org/index/cldr-spec/plural-rules
    1.16 +
    1.17 +# Usage:
    1.18 +# $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js
    1.19 +
    1.20 +import urllib2
    1.21 +import xml.dom.minidom
    1.22 +import json
    1.23 +import re
    1.24 +
    1.25 +PRINT_CONDITIONS_IN_COMMENTS = False
    1.26 +
    1.27 +UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml"
    1.28 +
    1.29 +CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)'
    1.30 +
    1.31 +# For a given regexp.MatchObject `g` for `CONDITION_RE`, 
    1.32 +# returns the equivalent JS piece of code
    1.33 +# i.e. maps pseudo conditional language from unicode.org XML to JS code
    1.34 +def parseCondition(g):
    1.35 +    lvalue = "n"
    1.36 +    if g.group(1):
    1.37 +        lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", ""))
    1.38 +
    1.39 +    operator = g.group(2)
    1.40 +    if g.group(4):
    1.41 +        operator += " not"
    1.42 +
    1.43 +    rvalue = g.group(5)
    1.44 +
    1.45 +    if operator == "is":
    1.46 +        return "%s == %s" % (lvalue, rvalue)
    1.47 +    if operator == "is not":
    1.48 +        return "%s != %s" % (lvalue, rvalue)
    1.49 +
    1.50 +    # "in", "within" or "not in" case:
    1.51 +    notPrefix = ""
    1.52 +    if operator == "not in":
    1.53 +        notPrefix = "!"
    1.54 +
    1.55 +    # `rvalue` is a comma seperated list of either:
    1.56 +    #  - numbers: 42
    1.57 +    #  - ranges: 42..72
    1.58 +    sections = rvalue.split(',')
    1.59 +
    1.60 +    if ".." not in rvalue:
    1.61 +        # If we don't have range, but only a list of integer,
    1.62 +        # we can simplify the generated code by using `isIn`
    1.63 +        # n in 1,3,6,42
    1.64 +        return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections))
    1.65 +
    1.66 +    # n in 1..42
    1.67 +    # n in 1..3,42
    1.68 +    subCondition = []
    1.69 +    integers = []
    1.70 +    for sub in sections:
    1.71 +        if ".." in sub:
    1.72 +            left, right = sub.split("..")
    1.73 +            subCondition.append("isBetween(%s, %d, %d)" % (
    1.74 +                                lvalue,
    1.75 +                                int(left),
    1.76 +                                int(right)
    1.77 +                               ))
    1.78 +        else:
    1.79 +            integers.append(int(sub))
    1.80 +    if len(integers) > 1:
    1.81 +      subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers)))
    1.82 +    elif len(integers) == 1:
    1.83 +      subCondition.append("(%s == %s)" % (lvalue, integers[0]))
    1.84 +    return "%s(%s)" % (notPrefix, " || ".join(subCondition))
    1.85 +
    1.86 +def computeRules():
    1.87 +    # Fetch plural rules data directly from unicode.org website:
    1.88 +    url = UNICODE_ORG_XML_URL
    1.89 +    f = urllib2.urlopen(url)
    1.90 +    doc = xml.dom.minidom.parse(f)
    1.91 +
    1.92 +    # Read XML document and extract locale to rules mapping
    1.93 +    localesMapping = {}
    1.94 +    algorithms = {}
    1.95 +    for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")):
    1.96 +        if not index in algorithms:
    1.97 +            algorithms[index] = {}
    1.98 +        for locale in pluralRules.getAttribute("locales").split():
    1.99 +            localesMapping[locale] = index
   1.100 +        for rule in pluralRules.childNodes:
   1.101 +            if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule":
   1.102 +                continue
   1.103 +            pluralForm = rule.getAttribute("count")
   1.104 +            algorithm = rule.firstChild.nodeValue
   1.105 +            algorithms[index][pluralForm] = algorithm
   1.106 +
   1.107 +    # Go through all rules and compute a Javascript code for each of them
   1.108 +    rules = {}
   1.109 +    for index,rule in algorithms.iteritems():
   1.110 +        lines = []
   1.111 +        for pluralForm in rule:
   1.112 +            condition = rule[pluralForm]
   1.113 +            originalCondition = str(condition)
   1.114 +
   1.115 +            # Convert pseudo language to JS code
   1.116 +            condition = rule[pluralForm].lower()
   1.117 +            condition = re.sub(CONDITION_RE, parseCondition, condition)
   1.118 +            condition = re.sub(r'or', "||", condition)
   1.119 +            condition = re.sub(r'and', "&&", condition)
   1.120 +
   1.121 +            # Prints original condition in unicode.org pseudo language
   1.122 +            if PRINT_CONDITIONS_IN_COMMENTS:
   1.123 +                lines.append( '// %s' % originalCondition )
   1.124 +
   1.125 +            lines.append( 'if (%s)' % condition )
   1.126 +            lines.append( '  return "%s";' % pluralForm )
   1.127 +            
   1.128 +        rules[index] = "\n    ".join(lines)
   1.129 +    return localesMapping, rules
   1.130 +
   1.131 +
   1.132 +localesMapping, rules = computeRules()
   1.133 +
   1.134 +rulesLines = []
   1.135 +for index in rules:
   1.136 +    lines = rules[index]
   1.137 +    rulesLines.append('"%d": function (n) {' % index)
   1.138 +    rulesLines.append('  %s' % lines)
   1.139 +    rulesLines.append('  return "other"')
   1.140 +    rulesLines.append('},')
   1.141 +
   1.142 +print """/* This Source Code Form is subject to the terms of the Mozilla Public
   1.143 + * License, v. 2.0. If a copy of the MPL was not distributed with this
   1.144 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   1.145 +
   1.146 +// This file is automatically generated with /python-lib/plural-rules-generator.py
   1.147 +// Fetching data from: %s
   1.148 +
   1.149 +// Mapping of short locale name == to == > rule index in following list
   1.150 +const LOCALES_TO_RULES = %s;
   1.151 +
   1.152 +// Utility functions for plural rules methods
   1.153 +function isIn(n, list) list.indexOf(n) !== -1;
   1.154 +function isBetween(n, start, end) start <= n && n <= end;
   1.155 +
   1.156 +// List of all plural rules methods, that maps an integer to the plural form name to use
   1.157 +const RULES = {
   1.158 +  %s
   1.159 +};
   1.160 +
   1.161 +/**
   1.162 +  * Return a function that gives the plural form name for a given integer
   1.163 +  * for the specified `locale`
   1.164 +  *   let fun = getRulesForLocale('en');
   1.165 +  *   fun(1)    -> 'one'
   1.166 +  *   fun(0)    -> 'other'
   1.167 +  *   fun(1000) -> 'other'
   1.168 +  */
   1.169 +exports.getRulesForLocale = function getRulesForLocale(locale) {
   1.170 +  let index = LOCALES_TO_RULES[locale];
   1.171 +  if (!(index in RULES)) {
   1.172 +    console.warn('Plural form unknown for locale "' + locale + '"');
   1.173 +    return function () { return "other"; };
   1.174 +  }
   1.175 +  return RULES[index];
   1.176 +}
   1.177 +""" % (UNICODE_ORG_XML_URL,
   1.178 +        json.dumps(localesMapping, sort_keys=True, indent=2),
   1.179 +        "\n  ".join(rulesLines))

mercurial