|
1 # This Source Code Form is subject to the terms of the Mozilla Public |
|
2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4 |
|
5 # This file contains educated guesses about which top-level domains are |
|
6 # likely to host legacy content that assumes a non-windows-1252 encoding. |
|
7 # Punycode TLDs are included on the theory that legacy content might appear |
|
8 # behind those relatively new TLDs if DNS just points to a legacy server. |
|
9 # |
|
10 # Encodings for which a confident-enough educated guess is missing are |
|
11 # listed in nonparticipatingdomains.properties. Domains that are listed |
|
12 # neither there nor here get windows-1252 as the associated fallback. |
|
13 # |
|
14 # The list below includes Arabic-script TLDs not on IANA list but on the |
|
15 # ICANN list: |
|
16 # http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion |
|
17 # Otherwise, the list includes non-windows-1252-affilited country TLDs from |
|
18 # https://data.iana.org/TLD/tlds-alpha-by-domain.txt |
|
19 # |
|
20 # The guesses are assigned as follows: |
|
21 # * If the country has a dominant country-affiliated language and that language |
|
22 # is part of the languages to fallbacks mapping, use the encoding for that |
|
23 # language from that mapping. |
|
24 # * Use windows-1256 for countries that have a dominant Arabic-script |
|
25 # language or whose all languages are Arabic-script languages. |
|
26 # * Use windows-1251 likewise but for Cyrillic script. |
|
27 |
|
28 ae=windows-1256 |
|
29 xn--mgbaam7a8h=windows-1256 |
|
30 |
|
31 af=windows-1256 |
|
32 |
|
33 bg=windows-1251 |
|
34 |
|
35 bh=windows-1256 |
|
36 |
|
37 by=windows-1251 |
|
38 |
|
39 cn=gbk |
|
40 xn--fiqs8s=gbk |
|
41 # Assume that Traditional Chinese TLD is meant to work if URL input happens to |
|
42 # be in the traditional mode. Expect content to be simplified anyway. |
|
43 xn--fiqz9s=gbk |
|
44 |
|
45 cz=windows-1250 |
|
46 |
|
47 dz=windows-1256 |
|
48 xn--lgbbat1ad8j=windows-1256 |
|
49 |
|
50 ee=windows-1257 |
|
51 |
|
52 eg=windows-1256 |
|
53 xn--wgbh1c=windows-1256 |
|
54 |
|
55 gr=ISO-8859-7 |
|
56 |
|
57 hk=Big5-HKSCS |
|
58 xn--j6w193g=Big5-HKSCS |
|
59 |
|
60 hr=windows-1250 |
|
61 |
|
62 hu=ISO-8859-2 |
|
63 |
|
64 iq=windows-1256 |
|
65 |
|
66 ir=windows-1256 |
|
67 xn--mgba3a4f16a=windows-1256 |
|
68 |
|
69 jo=windows-1256 |
|
70 xn--mgbayh7gpa=windows-1256 |
|
71 |
|
72 jp=Shift_JIS |
|
73 |
|
74 kg=windows-1251 |
|
75 |
|
76 kp=EUC-KR |
|
77 |
|
78 kr=EUC-KR |
|
79 xn--3e0b707e=EUC-KR |
|
80 |
|
81 kw=windows-1256 |
|
82 |
|
83 kz=windows-1251 |
|
84 xn--80ao21a=windows-1251 |
|
85 |
|
86 lb=windows-1256 |
|
87 |
|
88 lt=windows-1257 |
|
89 |
|
90 lv=windows-1257 |
|
91 |
|
92 ma=windows-1256 |
|
93 xn--mgbc0a9azcg=windows-1256 |
|
94 |
|
95 mk=windows-1251 |
|
96 |
|
97 mn=windows-1251 |
|
98 xn--l1acc=windows-1251 |
|
99 |
|
100 mo=Big5 |
|
101 |
|
102 # my |
|
103 xn--mgbx4cd0ab=windows-1256 |
|
104 |
|
105 om=windows-1256 |
|
106 xn--mgb9awbf=windows-1256 |
|
107 |
|
108 #pk |
|
109 xn--mgbai9azgqp6j=windows-1256 |
|
110 |
|
111 pl=ISO-8859-2 |
|
112 |
|
113 ps=windows-1256 |
|
114 xn--ygbi2ammx=windows-1256 |
|
115 |
|
116 qa=windows-1256 |
|
117 xn--wgbl6a=windows-1256 |
|
118 |
|
119 rs=windows-1251 |
|
120 xn--90a3ac=windows-1251 |
|
121 |
|
122 ru=windows-1251 |
|
123 xn--p1ai=windows-1251 |
|
124 |
|
125 sa=windows-1256 |
|
126 xn--mgberp4a5d4ar=windows-1256 |
|
127 |
|
128 sd=windows-1256 |
|
129 xn--mgbpl2fh=windows-1256 |
|
130 |
|
131 sg=gbk |
|
132 xn--yfro4i67o=gbk |
|
133 |
|
134 si=ISO-8859-2 |
|
135 |
|
136 sk=windows-1250 |
|
137 |
|
138 su=windows-1251 |
|
139 |
|
140 sy=windows-1256 |
|
141 xn--mgbtf8fl=windows-1256 |
|
142 |
|
143 th=windows-874 |
|
144 xn--o3cw4h=windows-874 |
|
145 |
|
146 tj=windows-1251 |
|
147 |
|
148 tn=windows-1256 |
|
149 xn--pgbs0dh=windows-1256 |
|
150 |
|
151 tr=windows-1254 |
|
152 |
|
153 tw=Big5 |
|
154 # Assume that the Simplified Chinese TLD is meant to work when URL input |
|
155 # happens in the simplified mode. Assume content is tradition anyway. |
|
156 xn--kprw13d=Big5 |
|
157 xn--kpry57d=Big5 |
|
158 |
|
159 ua=windows-1251 |
|
160 xn--j1amh=windows-1251 |
|
161 |
|
162 uz=windows-1251 |
|
163 |
|
164 vn=windows-1258 |
|
165 |
|
166 ye=windows-1256 |
|
167 xn--mgb2ddes=windows-1256 |