addon-sdk/source/python-lib/plural-rules-generator.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 # This Source Code Form is subject to the terms of the Mozilla Public
     2 # License, v. 2.0. If a copy of the MPL was not distributed with this
     3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 # Program used to generate /packages/api-utils/lib/l10n/plural-rules.js
     6 # Fetch unicode.org data in order to build functions specific to each language
     7 # that will return for a given integer, its plural form name.
     8 # Plural form names are: zero, one, two, few, many, other.
     9 #
    10 # More information here:
    11 #   http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
    12 #   http://cldr.unicode.org/index/cldr-spec/plural-rules
    14 # Usage:
    15 # $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js
    17 import urllib2
    18 import xml.dom.minidom
    19 import json
    20 import re
    22 PRINT_CONDITIONS_IN_COMMENTS = False
    24 UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml"
    26 CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)'
    28 # For a given regexp.MatchObject `g` for `CONDITION_RE`, 
    29 # returns the equivalent JS piece of code
    30 # i.e. maps pseudo conditional language from unicode.org XML to JS code
    31 def parseCondition(g):
    32     lvalue = "n"
    33     if g.group(1):
    34         lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", ""))
    36     operator = g.group(2)
    37     if g.group(4):
    38         operator += " not"
    40     rvalue = g.group(5)
    42     if operator == "is":
    43         return "%s == %s" % (lvalue, rvalue)
    44     if operator == "is not":
    45         return "%s != %s" % (lvalue, rvalue)
    47     # "in", "within" or "not in" case:
    48     notPrefix = ""
    49     if operator == "not in":
    50         notPrefix = "!"
    52     # `rvalue` is a comma seperated list of either:
    53     #  - numbers: 42
    54     #  - ranges: 42..72
    55     sections = rvalue.split(',')
    57     if ".." not in rvalue:
    58         # If we don't have range, but only a list of integer,
    59         # we can simplify the generated code by using `isIn`
    60         # n in 1,3,6,42
    61         return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections))
    63     # n in 1..42
    64     # n in 1..3,42
    65     subCondition = []
    66     integers = []
    67     for sub in sections:
    68         if ".." in sub:
    69             left, right = sub.split("..")
    70             subCondition.append("isBetween(%s, %d, %d)" % (
    71                                 lvalue,
    72                                 int(left),
    73                                 int(right)
    74                                ))
    75         else:
    76             integers.append(int(sub))
    77     if len(integers) > 1:
    78       subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers)))
    79     elif len(integers) == 1:
    80       subCondition.append("(%s == %s)" % (lvalue, integers[0]))
    81     return "%s(%s)" % (notPrefix, " || ".join(subCondition))
    83 def computeRules():
    84     # Fetch plural rules data directly from unicode.org website:
    85     url = UNICODE_ORG_XML_URL
    86     f = urllib2.urlopen(url)
    87     doc = xml.dom.minidom.parse(f)
    89     # Read XML document and extract locale to rules mapping
    90     localesMapping = {}
    91     algorithms = {}
    92     for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")):
    93         if not index in algorithms:
    94             algorithms[index] = {}
    95         for locale in pluralRules.getAttribute("locales").split():
    96             localesMapping[locale] = index
    97         for rule in pluralRules.childNodes:
    98             if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule":
    99                 continue
   100             pluralForm = rule.getAttribute("count")
   101             algorithm = rule.firstChild.nodeValue
   102             algorithms[index][pluralForm] = algorithm
   104     # Go through all rules and compute a Javascript code for each of them
   105     rules = {}
   106     for index,rule in algorithms.iteritems():
   107         lines = []
   108         for pluralForm in rule:
   109             condition = rule[pluralForm]
   110             originalCondition = str(condition)
   112             # Convert pseudo language to JS code
   113             condition = rule[pluralForm].lower()
   114             condition = re.sub(CONDITION_RE, parseCondition, condition)
   115             condition = re.sub(r'or', "||", condition)
   116             condition = re.sub(r'and', "&&", condition)
   118             # Prints original condition in unicode.org pseudo language
   119             if PRINT_CONDITIONS_IN_COMMENTS:
   120                 lines.append( '// %s' % originalCondition )
   122             lines.append( 'if (%s)' % condition )
   123             lines.append( '  return "%s";' % pluralForm )
   125         rules[index] = "\n    ".join(lines)
   126     return localesMapping, rules
   129 localesMapping, rules = computeRules()
   131 rulesLines = []
   132 for index in rules:
   133     lines = rules[index]
   134     rulesLines.append('"%d": function (n) {' % index)
   135     rulesLines.append('  %s' % lines)
   136     rulesLines.append('  return "other"')
   137     rulesLines.append('},')
   139 print """/* This Source Code Form is subject to the terms of the Mozilla Public
   140  * License, v. 2.0. If a copy of the MPL was not distributed with this
   141  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   143 // This file is automatically generated with /python-lib/plural-rules-generator.py
   144 // Fetching data from: %s
   146 // Mapping of short locale name == to == > rule index in following list
   147 const LOCALES_TO_RULES = %s;
   149 // Utility functions for plural rules methods
   150 function isIn(n, list) list.indexOf(n) !== -1;
   151 function isBetween(n, start, end) start <= n && n <= end;
   153 // List of all plural rules methods, that maps an integer to the plural form name to use
   154 const RULES = {
   155   %s
   156 };
   158 /**
   159   * Return a function that gives the plural form name for a given integer
   160   * for the specified `locale`
   161   *   let fun = getRulesForLocale('en');
   162   *   fun(1)    -> 'one'
   163   *   fun(0)    -> 'other'
   164   *   fun(1000) -> 'other'
   165   */
   166 exports.getRulesForLocale = function getRulesForLocale(locale) {
   167   let index = LOCALES_TO_RULES[locale];
   168   if (!(index in RULES)) {
   169     console.warn('Plural form unknown for locale "' + locale + '"');
   170     return function () { return "other"; };
   171   }
   172   return RULES[index];
   173 }
   174 """ % (UNICODE_ORG_XML_URL,
   175         json.dumps(localesMapping, sort_keys=True, indent=2),
   176         "\n  ".join(rulesLines))

mercurial