michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # This file contains educated guesses about which top-level domains are michael@0: # likely to host legacy content that assumes a non-windows-1252 encoding. michael@0: # Punycode TLDs are included on the theory that legacy content might appear michael@0: # behind those relatively new TLDs if DNS just points to a legacy server. michael@0: # michael@0: # Encodings for which a confident-enough educated guess is missing are michael@0: # listed in nonparticipatingdomains.properties. Domains that are listed michael@0: # neither there nor here get windows-1252 as the associated fallback. michael@0: # michael@0: # The list below includes Arabic-script TLDs not on IANA list but on the michael@0: # ICANN list: michael@0: # http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion michael@0: # Otherwise, the list includes non-windows-1252-affilited country TLDs from michael@0: # https://data.iana.org/TLD/tlds-alpha-by-domain.txt michael@0: # michael@0: # The guesses are assigned as follows: michael@0: # * If the country has a dominant country-affiliated language and that language michael@0: # is part of the languages to fallbacks mapping, use the encoding for that michael@0: # language from that mapping. michael@0: # * Use windows-1256 for countries that have a dominant Arabic-script michael@0: # language or whose all languages are Arabic-script languages. michael@0: # * Use windows-1251 likewise but for Cyrillic script. michael@0: michael@0: ae=windows-1256 michael@0: xn--mgbaam7a8h=windows-1256 michael@0: michael@0: af=windows-1256 michael@0: michael@0: bg=windows-1251 michael@0: michael@0: bh=windows-1256 michael@0: michael@0: by=windows-1251 michael@0: michael@0: cn=gbk michael@0: xn--fiqs8s=gbk michael@0: # Assume that Traditional Chinese TLD is meant to work if URL input happens to michael@0: # be in the traditional mode. Expect content to be simplified anyway. michael@0: xn--fiqz9s=gbk michael@0: michael@0: cz=windows-1250 michael@0: michael@0: dz=windows-1256 michael@0: xn--lgbbat1ad8j=windows-1256 michael@0: michael@0: ee=windows-1257 michael@0: michael@0: eg=windows-1256 michael@0: xn--wgbh1c=windows-1256 michael@0: michael@0: gr=ISO-8859-7 michael@0: michael@0: hk=Big5-HKSCS michael@0: xn--j6w193g=Big5-HKSCS michael@0: michael@0: hr=windows-1250 michael@0: michael@0: hu=ISO-8859-2 michael@0: michael@0: iq=windows-1256 michael@0: michael@0: ir=windows-1256 michael@0: xn--mgba3a4f16a=windows-1256 michael@0: michael@0: jo=windows-1256 michael@0: xn--mgbayh7gpa=windows-1256 michael@0: michael@0: jp=Shift_JIS michael@0: michael@0: kg=windows-1251 michael@0: michael@0: kp=EUC-KR michael@0: michael@0: kr=EUC-KR michael@0: xn--3e0b707e=EUC-KR michael@0: michael@0: kw=windows-1256 michael@0: michael@0: kz=windows-1251 michael@0: xn--80ao21a=windows-1251 michael@0: michael@0: lb=windows-1256 michael@0: michael@0: lt=windows-1257 michael@0: michael@0: lv=windows-1257 michael@0: michael@0: ma=windows-1256 michael@0: xn--mgbc0a9azcg=windows-1256 michael@0: michael@0: mk=windows-1251 michael@0: michael@0: mn=windows-1251 michael@0: xn--l1acc=windows-1251 michael@0: michael@0: mo=Big5 michael@0: michael@0: # my michael@0: xn--mgbx4cd0ab=windows-1256 michael@0: michael@0: om=windows-1256 michael@0: xn--mgb9awbf=windows-1256 michael@0: michael@0: #pk michael@0: xn--mgbai9azgqp6j=windows-1256 michael@0: michael@0: pl=ISO-8859-2 michael@0: michael@0: ps=windows-1256 michael@0: xn--ygbi2ammx=windows-1256 michael@0: michael@0: qa=windows-1256 michael@0: xn--wgbl6a=windows-1256 michael@0: michael@0: rs=windows-1251 michael@0: xn--90a3ac=windows-1251 michael@0: michael@0: ru=windows-1251 michael@0: xn--p1ai=windows-1251 michael@0: michael@0: sa=windows-1256 michael@0: xn--mgberp4a5d4ar=windows-1256 michael@0: michael@0: sd=windows-1256 michael@0: xn--mgbpl2fh=windows-1256 michael@0: michael@0: sg=gbk michael@0: xn--yfro4i67o=gbk michael@0: michael@0: si=ISO-8859-2 michael@0: michael@0: sk=windows-1250 michael@0: michael@0: su=windows-1251 michael@0: michael@0: sy=windows-1256 michael@0: xn--mgbtf8fl=windows-1256 michael@0: michael@0: th=windows-874 michael@0: xn--o3cw4h=windows-874 michael@0: michael@0: tj=windows-1251 michael@0: michael@0: tn=windows-1256 michael@0: xn--pgbs0dh=windows-1256 michael@0: michael@0: tr=windows-1254 michael@0: michael@0: tw=Big5 michael@0: # Assume that the Simplified Chinese TLD is meant to work when URL input michael@0: # happens in the simplified mode. Assume content is tradition anyway. michael@0: xn--kprw13d=Big5 michael@0: xn--kpry57d=Big5 michael@0: michael@0: ua=windows-1251 michael@0: xn--j1amh=windows-1251 michael@0: michael@0: uz=windows-1251 michael@0: michael@0: vn=windows-1258 michael@0: michael@0: ye=windows-1256 michael@0: xn--mgb2ddes=windows-1256