browser/components/translation/cld2/public/encodings.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 // Copyright 2013 Google Inc. All Rights Reserved.
michael@0 2 //
michael@0 3 // Licensed under the Apache License, Version 2.0 (the "License");
michael@0 4 // you may not use this file except in compliance with the License.
michael@0 5 // You may obtain a copy of the License at
michael@0 6 //
michael@0 7 // http://www.apache.org/licenses/LICENSE-2.0
michael@0 8 //
michael@0 9 // Unless required by applicable law or agreed to in writing, software
michael@0 10 // distributed under the License is distributed on an "AS IS" BASIS,
michael@0 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
michael@0 12 // See the License for the specific language governing permissions and
michael@0 13 // limitations under the License.
michael@0 14
michael@0 15 //
michael@0 16 // Author: dsites@google.com (Dick Sites)
michael@0 17 //
michael@0 18
michael@0 19 #ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__
michael@0 20 #define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__
michael@0 21
michael@0 22 namespace CLD2 {
michael@0 23
michael@0 24 enum Encoding {
michael@0 25 ISO_8859_1 = 0, // ASCII
michael@0 26 ISO_8859_2 = 1, // Latin2
michael@0 27 ISO_8859_3 = 2, //
michael@0 28 ISO_8859_4 = 3, // Latin4
michael@0 29 ISO_8859_5 = 4, // ISO-8859-5
michael@0 30 ISO_8859_6 = 5, // Arabic
michael@0 31 ISO_8859_7 = 6, // Greek
michael@0 32 ISO_8859_8 = 7, // Hebrew
michael@0 33 ISO_8859_9 = 8, //
michael@0 34 ISO_8859_10 = 9, //
michael@0 35 JAPANESE_EUC_JP = 10, // EUC_JP
michael@0 36 JAPANESE_SHIFT_JIS = 11, // SJS
michael@0 37 JAPANESE_JIS = 12, // JIS
michael@0 38 CHINESE_BIG5 = 13, // BIG5
michael@0 39 CHINESE_GB = 14, // GB
michael@0 40 CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech
michael@0 41 // CNS11643EUC, before that EUC-CN(!)
michael@0 42 KOREAN_EUC_KR = 16, // KSC
michael@0 43 UNICODE_UNUSED = 17, // Unicode
michael@0 44 CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was
michael@0 45 // CNS11643EUC, before that EUC.
michael@0 46 CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was
michael@0 47 // CNS11643EUC, before that CNS.
michael@0 48 CHINESE_BIG5_CP950 = 20, // BIG5_CP950
michael@0 49 JAPANESE_CP932 = 21, // CP932
michael@0 50 UTF8 = 22,
michael@0 51 UNKNOWN_ENCODING = 23,
michael@0 52 ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127.
michael@0 53 RUSSIAN_KOI8_R = 25, // KOI8R
michael@0 54 RUSSIAN_CP1251 = 26, // CP1251
michael@0 55
michael@0 56 //----------------------------------------------------------
michael@0 57 MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii
michael@0 58 RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian.
michael@0 59 // Misnamed, this is _not_ KOI8-RU but KOI8-U.
michael@0 60 // KOI8-U is used much more often than KOI8-RU.
michael@0 61 MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european
michael@0 62 ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized
michael@0 63 //----------------------------------------------------------
michael@0 64
michael@0 65 //----------------------------------------------------------
michael@0 66 MSFT_CP1254 = 31, // used for Turkish
michael@0 67 MSFT_CP1257 = 32, // used in Baltic countries
michael@0 68 //----------------------------------------------------------
michael@0 69
michael@0 70 //----------------------------------------------------------
michael@0 71 //----------------------------------------------------------
michael@0 72 ISO_8859_11 = 33, // aka TIS-620, used for Thai
michael@0 73 MSFT_CP874 = 34, // used for Thai
michael@0 74 MSFT_CP1256 = 35, // used for Arabic
michael@0 75
michael@0 76 //----------------------------------------------------------
michael@0 77 MSFT_CP1255 = 36, // Logical Hebrew Microsoft
michael@0 78 ISO_8859_8_I = 37, // Iso Hebrew Logical
michael@0 79 HEBREW_VISUAL = 38, // Iso Hebrew Visual
michael@0 80 //----------------------------------------------------------
michael@0 81
michael@0 82 //----------------------------------------------------------
michael@0 83 CZECH_CP852 = 39,
michael@0 84 CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS
michael@0 85 MSFT_CP1253 = 41, // used for Greek
michael@0 86 RUSSIAN_CP866 = 42,
michael@0 87 //----------------------------------------------------------
michael@0 88
michael@0 89 //----------------------------------------------------------
michael@0 90 // Handled by iconv in glibc
michael@0 91 ISO_8859_13 = 43,
michael@0 92 ISO_2022_KR = 44,
michael@0 93 GBK = 45,
michael@0 94 GB18030 = 46,
michael@0 95 BIG5_HKSCS = 47,
michael@0 96 ISO_2022_CN = 48,
michael@0 97
michael@0 98 //-----------------------------------------------------------
michael@0 99 // Following 4 encodings are deprecated (font encodings)
michael@0 100 TSCII = 49,
michael@0 101 TAMIL_MONO = 50,
michael@0 102 TAMIL_BI = 51,
michael@0 103 JAGRAN = 52,
michael@0 104
michael@0 105
michael@0 106 MACINTOSH_ROMAN = 53,
michael@0 107 UTF7 = 54,
michael@0 108
michael@0 109 //-----------------------------------------------------------
michael@0 110 // Following 2 encodings are deprecated (font encodings)
michael@0 111 BHASKAR = 55, // Indic encoding - Devanagari
michael@0 112 HTCHANAKYA = 56, // 56 Indic encoding - Devanagari
michael@0 113
michael@0 114 //-----------------------------------------------------------
michael@0 115 UTF16BE = 57, // big-endian UTF-16
michael@0 116 UTF16LE = 58, // little-endian UTF-16
michael@0 117 UTF32BE = 59, // big-endian UTF-32
michael@0 118 UTF32LE = 60, // little-endian UTF-32
michael@0 119 //-----------------------------------------------------------
michael@0 120
michael@0 121 //-----------------------------------------------------------
michael@0 122 // An encoding that means "This is not text, but it may have some
michael@0 123 // simple ASCII text embedded". Intended input conversion
michael@0 124 // is to keep strings of >=4 seven-bit ASCII characters
michael@0 125 BINARYENC = 61,
michael@0 126 //-----------------------------------------------------------
michael@0 127
michael@0 128 //-----------------------------------------------------------
michael@0 129 // Some Web pages allow a mixture of HZ-GB and GB-2312 by using
michael@0 130 // ~{ ... ~} for 2-byte pairs, and the browsers support this.
michael@0 131 HZ_GB_2312 = 62,
michael@0 132 //-----------------------------------------------------------
michael@0 133
michael@0 134 //-----------------------------------------------------------
michael@0 135 // Some external vendors make the common input error of
michael@0 136 // converting MSFT_CP1252 to UTF8 *twice*.
michael@0 137 UTF8UTF8 = 63,
michael@0 138 //-----------------------------------------------------------
michael@0 139
michael@0 140 //-----------------------------------------------------------
michael@0 141 // Following 6 encodings are deprecated (font encodings)
michael@0 142 TAM_ELANGO = 64, // Elango - Tamil
michael@0 143 TAM_LTTMBARANI = 65, // Barani - Tamil
michael@0 144 TAM_SHREE = 66, // Shree - Tamil
michael@0 145 TAM_TBOOMIS = 67, // TBoomis - Tamil
michael@0 146 TAM_TMNEWS = 68, // TMNews - Tamil
michael@0 147 TAM_WEBTAMIL = 69, // Webtamil - Tamil
michael@0 148 //-----------------------------------------------------------
michael@0 149
michael@0 150 //-----------------------------------------------------------
michael@0 151 // Shift_JIS variants used by Japanese cell phone carriers.
michael@0 152 KDDI_SHIFT_JIS = 70,
michael@0 153 DOCOMO_SHIFT_JIS = 71,
michael@0 154 SOFTBANK_SHIFT_JIS = 72,
michael@0 155 // ISO-2022-JP variants used by KDDI and SoftBank.
michael@0 156 KDDI_ISO_2022_JP = 73,
michael@0 157 SOFTBANK_ISO_2022_JP = 74,
michael@0 158 //-----------------------------------------------------------
michael@0 159
michael@0 160 NUM_ENCODINGS = 75, // Always keep this at the end. It is not a
michael@0 161 // valid Encoding enum, it is only used to
michael@0 162 // indicate the total number of Encodings.
michael@0 163 };
michael@0 164
michael@0 165 } // End namespace CLD2
michael@0 166
michael@0 167 #endif // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__
michael@0 168
michael@0 169

mercurial