js/src/builtin/make_intl_data.py

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rwxr-xr-x

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 #!/usr/bin/env python
     2 # -*- coding: utf-8 -*-
     3 #
     4 # This Source Code Form is subject to the terms of the Mozilla Public
     5 # License, v. 2.0. If a copy of the MPL was not distributed with this
     6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     8 """ Usage: make_intl_data.py [language-subtag-registry.txt]
    10     This script extracts information about mappings between deprecated and
    11     current BCP 47 language tags from the IANA Language Subtag Registry and
    12     converts it to JavaScript object definitions in IntlData.js. The definitions
    13     are used in Intl.js.
    15     The IANA Language Subtag Registry is imported from
    16     http://www.iana.org/assignments/language-subtag-registry
    17     and uses the syntax specified in
    18     http://tools.ietf.org/html/rfc5646#section-3
    19 """
    21 def readRegistryRecord(registry):
    22     """ Yields the records of the IANA Language Subtag Registry as dictionaries. """
    23     record = {}
    24     for line in registry:
    25         line = line.strip()
    26         if line == "":
    27             continue
    28         if line == "%%":
    29             yield record
    30             record = {}
    31         else:
    32             if ":" in line:
    33                 key, value = line.split(":", 1)
    34                 key, value = key.strip(), value.strip()
    35                 record[key] = value
    36             else:
    37                 # continuation line
    38                 record[key] += " " + line
    39     if record:
    40         yield record
    41     return
    44 def readRegistry(registry):
    45     """ Reads IANA Language Subtag Registry and extracts information for Intl.js.
    47         Information extracted:
    48         - langTagMappings: mappings from complete language tags to preferred
    49           complete language tags
    50         - langSubtagMappings: mappings from subtags to preferred subtags
    51         - extlangMappings: mappings from extlang subtags to preferred subtags,
    52           with prefix to be removed
    53         Returns these three mappings as dictionaries, along with the registry's
    54         file date.
    56         We also check that mappings for language subtags don't affect extlang
    57         subtags and vice versa, so that CanonicalizeLanguageTag doesn't have
    58         to separate them for processing. Region codes are separated by case,
    59         and script codes by length, so they're unproblematic.
    60     """
    61     langTagMappings = {}
    62     langSubtagMappings = {}
    63     extlangMappings = {}
    64     languageSubtags = set()
    65     extlangSubtags = set()
    67     for record in readRegistryRecord(registry):
    68         if "File-Date" in record:
    69             fileDate = record["File-Date"]
    70             continue
    72         if record["Type"] == "grandfathered":
    73             # Grandfathered tags don't use standard syntax, so
    74             # CanonicalizeLanguageTag expects the mapping table to provide
    75             # the final form for all.
    76             # For langTagMappings, keys must be in lower case; values in
    77             # the case used in the registry.
    78             tag = record["Tag"]
    79             if "Preferred-Value" in record:
    80                 langTagMappings[tag.lower()] = record["Preferred-Value"]
    81             else:
    82                 langTagMappings[tag.lower()] = tag
    83         elif record["Type"] == "redundant":
    84             # For langTagMappings, keys must be in lower case; values in
    85             # the case used in the registry.
    86             if "Preferred-Value" in record:
    87                 langTagMappings[record["Tag"].lower()] = record["Preferred-Value"]
    88         elif record["Type"] in ("language", "script", "region", "variant"):
    89             # For langSubtagMappings, keys and values must be in the case used
    90             # in the registry.
    91             subtag = record["Subtag"]
    92             if record["Type"] == "language":
    93                 languageSubtags.add(subtag)
    94             if "Preferred-Value" in record:
    95                 if subtag == "heploc":
    96                     # The entry for heploc is unique in its complexity; handle
    97                     # it as special case below.
    98                     continue
    99                 if "Prefix" in record:
   100                     # This might indicate another heploc-like complex case.
   101                     raise Exception("Please evaluate: subtag mapping with prefix value.")
   102                 langSubtagMappings[subtag] = record["Preferred-Value"]
   103         elif record["Type"] == "extlang":
   104             # For extlangMappings, keys must be in the case used in the
   105             # registry; values are records with the preferred value and the
   106             # prefix to be removed.
   107             subtag = record["Subtag"]
   108             extlangSubtags.add(subtag)
   109             if "Preferred-Value" in record:
   110                 preferred = record["Preferred-Value"]
   111                 prefix = record["Prefix"]
   112                 extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix}
   113         else:
   114             # No other types are allowed by
   115             # http://tools.ietf.org/html/rfc5646#section-3.1.3
   116             assert False, "Unrecognized Type: {0}".format(record["Type"])
   118     # Check that mappings for language subtags and extlang subtags don't affect
   119     # each other.
   120     for lang in languageSubtags:
   121         if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang:
   122             raise Exception("Conflict: lang with extlang mapping: " + lang)
   123     for extlang in extlangSubtags:
   124         if extlang in langSubtagMappings:
   125             raise Exception("Conflict: extlang with lang mapping: " + extlang)
   127     # Special case for heploc.
   128     langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97"
   130     return {"fileDate": fileDate,
   131             "langTagMappings": langTagMappings,
   132             "langSubtagMappings": langSubtagMappings,
   133             "extlangMappings": extlangMappings}
   136 def writeMappingsVar(intlData, dict, name, description, fileDate, url):
   137     """ Writes a variable definition with a mapping table to file intlData.
   139         Writes the contents of dictionary dict to file intlData with the given
   140         variable name and a comment with description, fileDate, and URL.
   141     """
   142     intlData.write("\n")
   143     intlData.write("// {0}.\n".format(description))
   144     intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate))
   145     intlData.write("// {0}\n".format(url))
   146     intlData.write("var {0} = {{\n".format(name))
   147     keys = sorted(dict)
   148     for key in keys:
   149         if isinstance(dict[key], basestring):
   150             value = '"{0}"'.format(dict[key])
   151         else:
   152             preferred = dict[key]["preferred"]
   153             prefix = dict[key]["prefix"]
   154             value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix)
   155         intlData.write('    "{0}": {1},\n'.format(key, value))
   156     intlData.write("};\n")
   159 def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings):
   160     """ Writes the language tag data to the Intl data file. """
   161     writeMappingsVar(intlData, langTagMappings, "langTagMappings",
   162                      "Mappings from complete tags to preferred values", fileDate, url)
   163     writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings",
   164                      "Mappings from non-extlang subtags to preferred values", fileDate, url)
   165     writeMappingsVar(intlData, extlangMappings, "extlangMappings",
   166                      "Mappings from extlang subtags to preferred values", fileDate, url)
   169 if __name__ == '__main__':
   170     import codecs
   171     import sys
   172     import urllib2
   174     url = "http://www.iana.org/assignments/language-subtag-registry"
   175     if len(sys.argv) > 1:
   176         print("Always make sure you have the newest language-subtag-registry.txt!")
   177         registry = codecs.open(sys.argv[1], "r", encoding="utf-8")
   178     else:
   179         print("Downloading IANA Language Subtag Registry...")
   180         reader = urllib2.urlopen(url)
   181         text = reader.read().decode("utf-8")
   182         reader.close()
   183         registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8")
   184         registry.write(text)
   185         registry.seek(0)
   187     print("Processing IANA Language Subtag Registry...")
   188     data = readRegistry(registry)
   189     fileDate = data["fileDate"]
   190     langTagMappings = data["langTagMappings"]
   191     langSubtagMappings = data["langSubtagMappings"]
   192     extlangMappings = data["extlangMappings"]
   193     registry.close()
   195     print("Writing Intl data...")
   196     intlData = codecs.open("IntlData.js", "w", encoding="utf-8")
   197     intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n")
   198     writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings)
   199     intlData.close()

mercurial