Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | #!/usr/bin/env python |
michael@0 | 2 | # -*- coding: utf-8 -*- |
michael@0 | 3 | # |
michael@0 | 4 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 6 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 7 | |
michael@0 | 8 | """ Usage: make_intl_data.py [language-subtag-registry.txt] |
michael@0 | 9 | |
michael@0 | 10 | This script extracts information about mappings between deprecated and |
michael@0 | 11 | current BCP 47 language tags from the IANA Language Subtag Registry and |
michael@0 | 12 | converts it to JavaScript object definitions in IntlData.js. The definitions |
michael@0 | 13 | are used in Intl.js. |
michael@0 | 14 | |
michael@0 | 15 | The IANA Language Subtag Registry is imported from |
michael@0 | 16 | http://www.iana.org/assignments/language-subtag-registry |
michael@0 | 17 | and uses the syntax specified in |
michael@0 | 18 | http://tools.ietf.org/html/rfc5646#section-3 |
michael@0 | 19 | """ |
michael@0 | 20 | |
michael@0 | 21 | def readRegistryRecord(registry): |
michael@0 | 22 | """ Yields the records of the IANA Language Subtag Registry as dictionaries. """ |
michael@0 | 23 | record = {} |
michael@0 | 24 | for line in registry: |
michael@0 | 25 | line = line.strip() |
michael@0 | 26 | if line == "": |
michael@0 | 27 | continue |
michael@0 | 28 | if line == "%%": |
michael@0 | 29 | yield record |
michael@0 | 30 | record = {} |
michael@0 | 31 | else: |
michael@0 | 32 | if ":" in line: |
michael@0 | 33 | key, value = line.split(":", 1) |
michael@0 | 34 | key, value = key.strip(), value.strip() |
michael@0 | 35 | record[key] = value |
michael@0 | 36 | else: |
michael@0 | 37 | # continuation line |
michael@0 | 38 | record[key] += " " + line |
michael@0 | 39 | if record: |
michael@0 | 40 | yield record |
michael@0 | 41 | return |
michael@0 | 42 | |
michael@0 | 43 | |
michael@0 | 44 | def readRegistry(registry): |
michael@0 | 45 | """ Reads IANA Language Subtag Registry and extracts information for Intl.js. |
michael@0 | 46 | |
michael@0 | 47 | Information extracted: |
michael@0 | 48 | - langTagMappings: mappings from complete language tags to preferred |
michael@0 | 49 | complete language tags |
michael@0 | 50 | - langSubtagMappings: mappings from subtags to preferred subtags |
michael@0 | 51 | - extlangMappings: mappings from extlang subtags to preferred subtags, |
michael@0 | 52 | with prefix to be removed |
michael@0 | 53 | Returns these three mappings as dictionaries, along with the registry's |
michael@0 | 54 | file date. |
michael@0 | 55 | |
michael@0 | 56 | We also check that mappings for language subtags don't affect extlang |
michael@0 | 57 | subtags and vice versa, so that CanonicalizeLanguageTag doesn't have |
michael@0 | 58 | to separate them for processing. Region codes are separated by case, |
michael@0 | 59 | and script codes by length, so they're unproblematic. |
michael@0 | 60 | """ |
michael@0 | 61 | langTagMappings = {} |
michael@0 | 62 | langSubtagMappings = {} |
michael@0 | 63 | extlangMappings = {} |
michael@0 | 64 | languageSubtags = set() |
michael@0 | 65 | extlangSubtags = set() |
michael@0 | 66 | |
michael@0 | 67 | for record in readRegistryRecord(registry): |
michael@0 | 68 | if "File-Date" in record: |
michael@0 | 69 | fileDate = record["File-Date"] |
michael@0 | 70 | continue |
michael@0 | 71 | |
michael@0 | 72 | if record["Type"] == "grandfathered": |
michael@0 | 73 | # Grandfathered tags don't use standard syntax, so |
michael@0 | 74 | # CanonicalizeLanguageTag expects the mapping table to provide |
michael@0 | 75 | # the final form for all. |
michael@0 | 76 | # For langTagMappings, keys must be in lower case; values in |
michael@0 | 77 | # the case used in the registry. |
michael@0 | 78 | tag = record["Tag"] |
michael@0 | 79 | if "Preferred-Value" in record: |
michael@0 | 80 | langTagMappings[tag.lower()] = record["Preferred-Value"] |
michael@0 | 81 | else: |
michael@0 | 82 | langTagMappings[tag.lower()] = tag |
michael@0 | 83 | elif record["Type"] == "redundant": |
michael@0 | 84 | # For langTagMappings, keys must be in lower case; values in |
michael@0 | 85 | # the case used in the registry. |
michael@0 | 86 | if "Preferred-Value" in record: |
michael@0 | 87 | langTagMappings[record["Tag"].lower()] = record["Preferred-Value"] |
michael@0 | 88 | elif record["Type"] in ("language", "script", "region", "variant"): |
michael@0 | 89 | # For langSubtagMappings, keys and values must be in the case used |
michael@0 | 90 | # in the registry. |
michael@0 | 91 | subtag = record["Subtag"] |
michael@0 | 92 | if record["Type"] == "language": |
michael@0 | 93 | languageSubtags.add(subtag) |
michael@0 | 94 | if "Preferred-Value" in record: |
michael@0 | 95 | if subtag == "heploc": |
michael@0 | 96 | # The entry for heploc is unique in its complexity; handle |
michael@0 | 97 | # it as special case below. |
michael@0 | 98 | continue |
michael@0 | 99 | if "Prefix" in record: |
michael@0 | 100 | # This might indicate another heploc-like complex case. |
michael@0 | 101 | raise Exception("Please evaluate: subtag mapping with prefix value.") |
michael@0 | 102 | langSubtagMappings[subtag] = record["Preferred-Value"] |
michael@0 | 103 | elif record["Type"] == "extlang": |
michael@0 | 104 | # For extlangMappings, keys must be in the case used in the |
michael@0 | 105 | # registry; values are records with the preferred value and the |
michael@0 | 106 | # prefix to be removed. |
michael@0 | 107 | subtag = record["Subtag"] |
michael@0 | 108 | extlangSubtags.add(subtag) |
michael@0 | 109 | if "Preferred-Value" in record: |
michael@0 | 110 | preferred = record["Preferred-Value"] |
michael@0 | 111 | prefix = record["Prefix"] |
michael@0 | 112 | extlangMappings[subtag] = {"preferred": preferred, "prefix": prefix} |
michael@0 | 113 | else: |
michael@0 | 114 | # No other types are allowed by |
michael@0 | 115 | # http://tools.ietf.org/html/rfc5646#section-3.1.3 |
michael@0 | 116 | assert False, "Unrecognized Type: {0}".format(record["Type"]) |
michael@0 | 117 | |
michael@0 | 118 | # Check that mappings for language subtags and extlang subtags don't affect |
michael@0 | 119 | # each other. |
michael@0 | 120 | for lang in languageSubtags: |
michael@0 | 121 | if lang in extlangMappings and extlangMappings[lang]["preferred"] != lang: |
michael@0 | 122 | raise Exception("Conflict: lang with extlang mapping: " + lang) |
michael@0 | 123 | for extlang in extlangSubtags: |
michael@0 | 124 | if extlang in langSubtagMappings: |
michael@0 | 125 | raise Exception("Conflict: extlang with lang mapping: " + extlang) |
michael@0 | 126 | |
michael@0 | 127 | # Special case for heploc. |
michael@0 | 128 | langTagMappings["ja-latn-hepburn-heploc"] = "ja-Latn-alalc97" |
michael@0 | 129 | |
michael@0 | 130 | return {"fileDate": fileDate, |
michael@0 | 131 | "langTagMappings": langTagMappings, |
michael@0 | 132 | "langSubtagMappings": langSubtagMappings, |
michael@0 | 133 | "extlangMappings": extlangMappings} |
michael@0 | 134 | |
michael@0 | 135 | |
michael@0 | 136 | def writeMappingsVar(intlData, dict, name, description, fileDate, url): |
michael@0 | 137 | """ Writes a variable definition with a mapping table to file intlData. |
michael@0 | 138 | |
michael@0 | 139 | Writes the contents of dictionary dict to file intlData with the given |
michael@0 | 140 | variable name and a comment with description, fileDate, and URL. |
michael@0 | 141 | """ |
michael@0 | 142 | intlData.write("\n") |
michael@0 | 143 | intlData.write("// {0}.\n".format(description)) |
michael@0 | 144 | intlData.write("// Derived from IANA Language Subtag Registry, file date {0}.\n".format(fileDate)) |
michael@0 | 145 | intlData.write("// {0}\n".format(url)) |
michael@0 | 146 | intlData.write("var {0} = {{\n".format(name)) |
michael@0 | 147 | keys = sorted(dict) |
michael@0 | 148 | for key in keys: |
michael@0 | 149 | if isinstance(dict[key], basestring): |
michael@0 | 150 | value = '"{0}"'.format(dict[key]) |
michael@0 | 151 | else: |
michael@0 | 152 | preferred = dict[key]["preferred"] |
michael@0 | 153 | prefix = dict[key]["prefix"] |
michael@0 | 154 | value = '{{preferred: "{0}", prefix: "{1}"}}'.format(preferred, prefix) |
michael@0 | 155 | intlData.write(' "{0}": {1},\n'.format(key, value)) |
michael@0 | 156 | intlData.write("};\n") |
michael@0 | 157 | |
michael@0 | 158 | |
michael@0 | 159 | def writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings): |
michael@0 | 160 | """ Writes the language tag data to the Intl data file. """ |
michael@0 | 161 | writeMappingsVar(intlData, langTagMappings, "langTagMappings", |
michael@0 | 162 | "Mappings from complete tags to preferred values", fileDate, url) |
michael@0 | 163 | writeMappingsVar(intlData, langSubtagMappings, "langSubtagMappings", |
michael@0 | 164 | "Mappings from non-extlang subtags to preferred values", fileDate, url) |
michael@0 | 165 | writeMappingsVar(intlData, extlangMappings, "extlangMappings", |
michael@0 | 166 | "Mappings from extlang subtags to preferred values", fileDate, url) |
michael@0 | 167 | |
michael@0 | 168 | |
michael@0 | 169 | if __name__ == '__main__': |
michael@0 | 170 | import codecs |
michael@0 | 171 | import sys |
michael@0 | 172 | import urllib2 |
michael@0 | 173 | |
michael@0 | 174 | url = "http://www.iana.org/assignments/language-subtag-registry" |
michael@0 | 175 | if len(sys.argv) > 1: |
michael@0 | 176 | print("Always make sure you have the newest language-subtag-registry.txt!") |
michael@0 | 177 | registry = codecs.open(sys.argv[1], "r", encoding="utf-8") |
michael@0 | 178 | else: |
michael@0 | 179 | print("Downloading IANA Language Subtag Registry...") |
michael@0 | 180 | reader = urllib2.urlopen(url) |
michael@0 | 181 | text = reader.read().decode("utf-8") |
michael@0 | 182 | reader.close() |
michael@0 | 183 | registry = codecs.open("language-subtag-registry.txt", "w+", encoding="utf-8") |
michael@0 | 184 | registry.write(text) |
michael@0 | 185 | registry.seek(0) |
michael@0 | 186 | |
michael@0 | 187 | print("Processing IANA Language Subtag Registry...") |
michael@0 | 188 | data = readRegistry(registry) |
michael@0 | 189 | fileDate = data["fileDate"] |
michael@0 | 190 | langTagMappings = data["langTagMappings"] |
michael@0 | 191 | langSubtagMappings = data["langSubtagMappings"] |
michael@0 | 192 | extlangMappings = data["extlangMappings"] |
michael@0 | 193 | registry.close() |
michael@0 | 194 | |
michael@0 | 195 | print("Writing Intl data...") |
michael@0 | 196 | intlData = codecs.open("IntlData.js", "w", encoding="utf-8") |
michael@0 | 197 | intlData.write("// Generated by make_intl_data.py. DO NOT EDIT.\n") |
michael@0 | 198 | writeLanguageTagData(intlData, fileDate, url, langTagMappings, langSubtagMappings, extlangMappings) |
michael@0 | 199 | intlData.close() |