dom/encoding/domainsfallbacks.properties

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 4
michael@0 5 # This file contains educated guesses about which top-level domains are
michael@0 6 # likely to host legacy content that assumes a non-windows-1252 encoding.
michael@0 7 # Punycode TLDs are included on the theory that legacy content might appear
michael@0 8 # behind those relatively new TLDs if DNS just points to a legacy server.
michael@0 9 #
michael@0 10 # Encodings for which a confident-enough educated guess is missing are
michael@0 11 # listed in nonparticipatingdomains.properties. Domains that are listed
michael@0 12 # neither there nor here get windows-1252 as the associated fallback.
michael@0 13 #
michael@0 14 # The list below includes Arabic-script TLDs not on IANA list but on the
michael@0 15 # ICANN list:
michael@0 16 # http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion
michael@0 17 # Otherwise, the list includes non-windows-1252-affilited country TLDs from
michael@0 18 # https://data.iana.org/TLD/tlds-alpha-by-domain.txt
michael@0 19 #
michael@0 20 # The guesses are assigned as follows:
michael@0 21 # * If the country has a dominant country-affiliated language and that language
michael@0 22 # is part of the languages to fallbacks mapping, use the encoding for that
michael@0 23 # language from that mapping.
michael@0 24 # * Use windows-1256 for countries that have a dominant Arabic-script
michael@0 25 # language or whose all languages are Arabic-script languages.
michael@0 26 # * Use windows-1251 likewise but for Cyrillic script.
michael@0 27
michael@0 28 ae=windows-1256
michael@0 29 xn--mgbaam7a8h=windows-1256
michael@0 30
michael@0 31 af=windows-1256
michael@0 32
michael@0 33 bg=windows-1251
michael@0 34
michael@0 35 bh=windows-1256
michael@0 36
michael@0 37 by=windows-1251
michael@0 38
michael@0 39 cn=gbk
michael@0 40 xn--fiqs8s=gbk
michael@0 41 # Assume that Traditional Chinese TLD is meant to work if URL input happens to
michael@0 42 # be in the traditional mode. Expect content to be simplified anyway.
michael@0 43 xn--fiqz9s=gbk
michael@0 44
michael@0 45 cz=windows-1250
michael@0 46
michael@0 47 dz=windows-1256
michael@0 48 xn--lgbbat1ad8j=windows-1256
michael@0 49
michael@0 50 ee=windows-1257
michael@0 51
michael@0 52 eg=windows-1256
michael@0 53 xn--wgbh1c=windows-1256
michael@0 54
michael@0 55 gr=ISO-8859-7
michael@0 56
michael@0 57 hk=Big5-HKSCS
michael@0 58 xn--j6w193g=Big5-HKSCS
michael@0 59
michael@0 60 hr=windows-1250
michael@0 61
michael@0 62 hu=ISO-8859-2
michael@0 63
michael@0 64 iq=windows-1256
michael@0 65
michael@0 66 ir=windows-1256
michael@0 67 xn--mgba3a4f16a=windows-1256
michael@0 68
michael@0 69 jo=windows-1256
michael@0 70 xn--mgbayh7gpa=windows-1256
michael@0 71
michael@0 72 jp=Shift_JIS
michael@0 73
michael@0 74 kg=windows-1251
michael@0 75
michael@0 76 kp=EUC-KR
michael@0 77
michael@0 78 kr=EUC-KR
michael@0 79 xn--3e0b707e=EUC-KR
michael@0 80
michael@0 81 kw=windows-1256
michael@0 82
michael@0 83 kz=windows-1251
michael@0 84 xn--80ao21a=windows-1251
michael@0 85
michael@0 86 lb=windows-1256
michael@0 87
michael@0 88 lt=windows-1257
michael@0 89
michael@0 90 lv=windows-1257
michael@0 91
michael@0 92 ma=windows-1256
michael@0 93 xn--mgbc0a9azcg=windows-1256
michael@0 94
michael@0 95 mk=windows-1251
michael@0 96
michael@0 97 mn=windows-1251
michael@0 98 xn--l1acc=windows-1251
michael@0 99
michael@0 100 mo=Big5
michael@0 101
michael@0 102 # my
michael@0 103 xn--mgbx4cd0ab=windows-1256
michael@0 104
michael@0 105 om=windows-1256
michael@0 106 xn--mgb9awbf=windows-1256
michael@0 107
michael@0 108 #pk
michael@0 109 xn--mgbai9azgqp6j=windows-1256
michael@0 110
michael@0 111 pl=ISO-8859-2
michael@0 112
michael@0 113 ps=windows-1256
michael@0 114 xn--ygbi2ammx=windows-1256
michael@0 115
michael@0 116 qa=windows-1256
michael@0 117 xn--wgbl6a=windows-1256
michael@0 118
michael@0 119 rs=windows-1251
michael@0 120 xn--90a3ac=windows-1251
michael@0 121
michael@0 122 ru=windows-1251
michael@0 123 xn--p1ai=windows-1251
michael@0 124
michael@0 125 sa=windows-1256
michael@0 126 xn--mgberp4a5d4ar=windows-1256
michael@0 127
michael@0 128 sd=windows-1256
michael@0 129 xn--mgbpl2fh=windows-1256
michael@0 130
michael@0 131 sg=gbk
michael@0 132 xn--yfro4i67o=gbk
michael@0 133
michael@0 134 si=ISO-8859-2
michael@0 135
michael@0 136 sk=windows-1250
michael@0 137
michael@0 138 su=windows-1251
michael@0 139
michael@0 140 sy=windows-1256
michael@0 141 xn--mgbtf8fl=windows-1256
michael@0 142
michael@0 143 th=windows-874
michael@0 144 xn--o3cw4h=windows-874
michael@0 145
michael@0 146 tj=windows-1251
michael@0 147
michael@0 148 tn=windows-1256
michael@0 149 xn--pgbs0dh=windows-1256
michael@0 150
michael@0 151 tr=windows-1254
michael@0 152
michael@0 153 tw=Big5
michael@0 154 # Assume that the Simplified Chinese TLD is meant to work when URL input
michael@0 155 # happens in the simplified mode. Assume content is tradition anyway.
michael@0 156 xn--kprw13d=Big5
michael@0 157 xn--kpry57d=Big5
michael@0 158
michael@0 159 ua=windows-1251
michael@0 160 xn--j1amh=windows-1251
michael@0 161
michael@0 162 uz=windows-1251
michael@0 163
michael@0 164 vn=windows-1258
michael@0 165
michael@0 166 ye=windows-1256
michael@0 167 xn--mgb2ddes=windows-1256

mercurial