addon-sdk/source/python-lib/plural-rules-generator.py

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 # Program used to generate /packages/api-utils/lib/l10n/plural-rules.js
michael@0 6 # Fetch unicode.org data in order to build functions specific to each language
michael@0 7 # that will return for a given integer, its plural form name.
michael@0 8 # Plural form names are: zero, one, two, few, many, other.
michael@0 9 #
michael@0 10 # More information here:
michael@0 11 # http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
michael@0 12 # http://cldr.unicode.org/index/cldr-spec/plural-rules
michael@0 13
michael@0 14 # Usage:
michael@0 15 # $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js
michael@0 16
michael@0 17 import urllib2
michael@0 18 import xml.dom.minidom
michael@0 19 import json
michael@0 20 import re
michael@0 21
michael@0 22 PRINT_CONDITIONS_IN_COMMENTS = False
michael@0 23
michael@0 24 UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml"
michael@0 25
michael@0 26 CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)'
michael@0 27
michael@0 28 # For a given regexp.MatchObject `g` for `CONDITION_RE`,
michael@0 29 # returns the equivalent JS piece of code
michael@0 30 # i.e. maps pseudo conditional language from unicode.org XML to JS code
michael@0 31 def parseCondition(g):
michael@0 32 lvalue = "n"
michael@0 33 if g.group(1):
michael@0 34 lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", ""))
michael@0 35
michael@0 36 operator = g.group(2)
michael@0 37 if g.group(4):
michael@0 38 operator += " not"
michael@0 39
michael@0 40 rvalue = g.group(5)
michael@0 41
michael@0 42 if operator == "is":
michael@0 43 return "%s == %s" % (lvalue, rvalue)
michael@0 44 if operator == "is not":
michael@0 45 return "%s != %s" % (lvalue, rvalue)
michael@0 46
michael@0 47 # "in", "within" or "not in" case:
michael@0 48 notPrefix = ""
michael@0 49 if operator == "not in":
michael@0 50 notPrefix = "!"
michael@0 51
michael@0 52 # `rvalue` is a comma seperated list of either:
michael@0 53 # - numbers: 42
michael@0 54 # - ranges: 42..72
michael@0 55 sections = rvalue.split(',')
michael@0 56
michael@0 57 if ".." not in rvalue:
michael@0 58 # If we don't have range, but only a list of integer,
michael@0 59 # we can simplify the generated code by using `isIn`
michael@0 60 # n in 1,3,6,42
michael@0 61 return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections))
michael@0 62
michael@0 63 # n in 1..42
michael@0 64 # n in 1..3,42
michael@0 65 subCondition = []
michael@0 66 integers = []
michael@0 67 for sub in sections:
michael@0 68 if ".." in sub:
michael@0 69 left, right = sub.split("..")
michael@0 70 subCondition.append("isBetween(%s, %d, %d)" % (
michael@0 71 lvalue,
michael@0 72 int(left),
michael@0 73 int(right)
michael@0 74 ))
michael@0 75 else:
michael@0 76 integers.append(int(sub))
michael@0 77 if len(integers) > 1:
michael@0 78 subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers)))
michael@0 79 elif len(integers) == 1:
michael@0 80 subCondition.append("(%s == %s)" % (lvalue, integers[0]))
michael@0 81 return "%s(%s)" % (notPrefix, " || ".join(subCondition))
michael@0 82
michael@0 83 def computeRules():
michael@0 84 # Fetch plural rules data directly from unicode.org website:
michael@0 85 url = UNICODE_ORG_XML_URL
michael@0 86 f = urllib2.urlopen(url)
michael@0 87 doc = xml.dom.minidom.parse(f)
michael@0 88
michael@0 89 # Read XML document and extract locale to rules mapping
michael@0 90 localesMapping = {}
michael@0 91 algorithms = {}
michael@0 92 for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")):
michael@0 93 if not index in algorithms:
michael@0 94 algorithms[index] = {}
michael@0 95 for locale in pluralRules.getAttribute("locales").split():
michael@0 96 localesMapping[locale] = index
michael@0 97 for rule in pluralRules.childNodes:
michael@0 98 if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule":
michael@0 99 continue
michael@0 100 pluralForm = rule.getAttribute("count")
michael@0 101 algorithm = rule.firstChild.nodeValue
michael@0 102 algorithms[index][pluralForm] = algorithm
michael@0 103
michael@0 104 # Go through all rules and compute a Javascript code for each of them
michael@0 105 rules = {}
michael@0 106 for index,rule in algorithms.iteritems():
michael@0 107 lines = []
michael@0 108 for pluralForm in rule:
michael@0 109 condition = rule[pluralForm]
michael@0 110 originalCondition = str(condition)
michael@0 111
michael@0 112 # Convert pseudo language to JS code
michael@0 113 condition = rule[pluralForm].lower()
michael@0 114 condition = re.sub(CONDITION_RE, parseCondition, condition)
michael@0 115 condition = re.sub(r'or', "||", condition)
michael@0 116 condition = re.sub(r'and', "&&", condition)
michael@0 117
michael@0 118 # Prints original condition in unicode.org pseudo language
michael@0 119 if PRINT_CONDITIONS_IN_COMMENTS:
michael@0 120 lines.append( '// %s' % originalCondition )
michael@0 121
michael@0 122 lines.append( 'if (%s)' % condition )
michael@0 123 lines.append( ' return "%s";' % pluralForm )
michael@0 124
michael@0 125 rules[index] = "\n ".join(lines)
michael@0 126 return localesMapping, rules
michael@0 127
michael@0 128
michael@0 129 localesMapping, rules = computeRules()
michael@0 130
michael@0 131 rulesLines = []
michael@0 132 for index in rules:
michael@0 133 lines = rules[index]
michael@0 134 rulesLines.append('"%d": function (n) {' % index)
michael@0 135 rulesLines.append(' %s' % lines)
michael@0 136 rulesLines.append(' return "other"')
michael@0 137 rulesLines.append('},')
michael@0 138
michael@0 139 print """/* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 140 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 141 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 142
michael@0 143 // This file is automatically generated with /python-lib/plural-rules-generator.py
michael@0 144 // Fetching data from: %s
michael@0 145
michael@0 146 // Mapping of short locale name == to == > rule index in following list
michael@0 147 const LOCALES_TO_RULES = %s;
michael@0 148
michael@0 149 // Utility functions for plural rules methods
michael@0 150 function isIn(n, list) list.indexOf(n) !== -1;
michael@0 151 function isBetween(n, start, end) start <= n && n <= end;
michael@0 152
michael@0 153 // List of all plural rules methods, that maps an integer to the plural form name to use
michael@0 154 const RULES = {
michael@0 155 %s
michael@0 156 };
michael@0 157
michael@0 158 /**
michael@0 159 * Return a function that gives the plural form name for a given integer
michael@0 160 * for the specified `locale`
michael@0 161 * let fun = getRulesForLocale('en');
michael@0 162 * fun(1) -> 'one'
michael@0 163 * fun(0) -> 'other'
michael@0 164 * fun(1000) -> 'other'
michael@0 165 */
michael@0 166 exports.getRulesForLocale = function getRulesForLocale(locale) {
michael@0 167 let index = LOCALES_TO_RULES[locale];
michael@0 168 if (!(index in RULES)) {
michael@0 169 console.warn('Plural form unknown for locale "' + locale + '"');
michael@0 170 return function () { return "other"; };
michael@0 171 }
michael@0 172 return RULES[index];
michael@0 173 }
michael@0 174 """ % (UNICODE_ORG_XML_URL,
michael@0 175 json.dumps(localesMapping, sort_keys=True, indent=2),
michael@0 176 "\n ".join(rulesLines))

mercurial