|
1 // Copyright 2013 Google Inc. All Rights Reserved. |
|
2 // |
|
3 // Licensed under the Apache License, Version 2.0 (the "License"); |
|
4 // you may not use this file except in compliance with the License. |
|
5 // You may obtain a copy of the License at |
|
6 // |
|
7 // http://www.apache.org/licenses/LICENSE-2.0 |
|
8 // |
|
9 // Unless required by applicable law or agreed to in writing, software |
|
10 // distributed under the License is distributed on an "AS IS" BASIS, |
|
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
12 // See the License for the specific language governing permissions and |
|
13 // limitations under the License. |
|
14 |
|
15 // |
|
16 // Author: dsites@google.com (Dick Sites) |
|
17 // |
|
18 |
|
19 #ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ |
|
20 #define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ |
|
21 |
|
22 namespace CLD2 { |
|
23 |
|
24 enum Encoding { |
|
25 ISO_8859_1 = 0, // ASCII |
|
26 ISO_8859_2 = 1, // Latin2 |
|
27 ISO_8859_3 = 2, // |
|
28 ISO_8859_4 = 3, // Latin4 |
|
29 ISO_8859_5 = 4, // ISO-8859-5 |
|
30 ISO_8859_6 = 5, // Arabic |
|
31 ISO_8859_7 = 6, // Greek |
|
32 ISO_8859_8 = 7, // Hebrew |
|
33 ISO_8859_9 = 8, // |
|
34 ISO_8859_10 = 9, // |
|
35 JAPANESE_EUC_JP = 10, // EUC_JP |
|
36 JAPANESE_SHIFT_JIS = 11, // SJS |
|
37 JAPANESE_JIS = 12, // JIS |
|
38 CHINESE_BIG5 = 13, // BIG5 |
|
39 CHINESE_GB = 14, // GB |
|
40 CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech |
|
41 // CNS11643EUC, before that EUC-CN(!) |
|
42 KOREAN_EUC_KR = 16, // KSC |
|
43 UNICODE_UNUSED = 17, // Unicode |
|
44 CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was |
|
45 // CNS11643EUC, before that EUC. |
|
46 CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was |
|
47 // CNS11643EUC, before that CNS. |
|
48 CHINESE_BIG5_CP950 = 20, // BIG5_CP950 |
|
49 JAPANESE_CP932 = 21, // CP932 |
|
50 UTF8 = 22, |
|
51 UNKNOWN_ENCODING = 23, |
|
52 ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127. |
|
53 RUSSIAN_KOI8_R = 25, // KOI8R |
|
54 RUSSIAN_CP1251 = 26, // CP1251 |
|
55 |
|
56 //---------------------------------------------------------- |
|
57 MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii |
|
58 RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian. |
|
59 // Misnamed, this is _not_ KOI8-RU but KOI8-U. |
|
60 // KOI8-U is used much more often than KOI8-RU. |
|
61 MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european |
|
62 ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized |
|
63 //---------------------------------------------------------- |
|
64 |
|
65 //---------------------------------------------------------- |
|
66 MSFT_CP1254 = 31, // used for Turkish |
|
67 MSFT_CP1257 = 32, // used in Baltic countries |
|
68 //---------------------------------------------------------- |
|
69 |
|
70 //---------------------------------------------------------- |
|
71 //---------------------------------------------------------- |
|
72 ISO_8859_11 = 33, // aka TIS-620, used for Thai |
|
73 MSFT_CP874 = 34, // used for Thai |
|
74 MSFT_CP1256 = 35, // used for Arabic |
|
75 |
|
76 //---------------------------------------------------------- |
|
77 MSFT_CP1255 = 36, // Logical Hebrew Microsoft |
|
78 ISO_8859_8_I = 37, // Iso Hebrew Logical |
|
79 HEBREW_VISUAL = 38, // Iso Hebrew Visual |
|
80 //---------------------------------------------------------- |
|
81 |
|
82 //---------------------------------------------------------- |
|
83 CZECH_CP852 = 39, |
|
84 CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS |
|
85 MSFT_CP1253 = 41, // used for Greek |
|
86 RUSSIAN_CP866 = 42, |
|
87 //---------------------------------------------------------- |
|
88 |
|
89 //---------------------------------------------------------- |
|
90 // Handled by iconv in glibc |
|
91 ISO_8859_13 = 43, |
|
92 ISO_2022_KR = 44, |
|
93 GBK = 45, |
|
94 GB18030 = 46, |
|
95 BIG5_HKSCS = 47, |
|
96 ISO_2022_CN = 48, |
|
97 |
|
98 //----------------------------------------------------------- |
|
99 // Following 4 encodings are deprecated (font encodings) |
|
100 TSCII = 49, |
|
101 TAMIL_MONO = 50, |
|
102 TAMIL_BI = 51, |
|
103 JAGRAN = 52, |
|
104 |
|
105 |
|
106 MACINTOSH_ROMAN = 53, |
|
107 UTF7 = 54, |
|
108 |
|
109 //----------------------------------------------------------- |
|
110 // Following 2 encodings are deprecated (font encodings) |
|
111 BHASKAR = 55, // Indic encoding - Devanagari |
|
112 HTCHANAKYA = 56, // 56 Indic encoding - Devanagari |
|
113 |
|
114 //----------------------------------------------------------- |
|
115 UTF16BE = 57, // big-endian UTF-16 |
|
116 UTF16LE = 58, // little-endian UTF-16 |
|
117 UTF32BE = 59, // big-endian UTF-32 |
|
118 UTF32LE = 60, // little-endian UTF-32 |
|
119 //----------------------------------------------------------- |
|
120 |
|
121 //----------------------------------------------------------- |
|
122 // An encoding that means "This is not text, but it may have some |
|
123 // simple ASCII text embedded". Intended input conversion |
|
124 // is to keep strings of >=4 seven-bit ASCII characters |
|
125 BINARYENC = 61, |
|
126 //----------------------------------------------------------- |
|
127 |
|
128 //----------------------------------------------------------- |
|
129 // Some Web pages allow a mixture of HZ-GB and GB-2312 by using |
|
130 // ~{ ... ~} for 2-byte pairs, and the browsers support this. |
|
131 HZ_GB_2312 = 62, |
|
132 //----------------------------------------------------------- |
|
133 |
|
134 //----------------------------------------------------------- |
|
135 // Some external vendors make the common input error of |
|
136 // converting MSFT_CP1252 to UTF8 *twice*. |
|
137 UTF8UTF8 = 63, |
|
138 //----------------------------------------------------------- |
|
139 |
|
140 //----------------------------------------------------------- |
|
141 // Following 6 encodings are deprecated (font encodings) |
|
142 TAM_ELANGO = 64, // Elango - Tamil |
|
143 TAM_LTTMBARANI = 65, // Barani - Tamil |
|
144 TAM_SHREE = 66, // Shree - Tamil |
|
145 TAM_TBOOMIS = 67, // TBoomis - Tamil |
|
146 TAM_TMNEWS = 68, // TMNews - Tamil |
|
147 TAM_WEBTAMIL = 69, // Webtamil - Tamil |
|
148 //----------------------------------------------------------- |
|
149 |
|
150 //----------------------------------------------------------- |
|
151 // Shift_JIS variants used by Japanese cell phone carriers. |
|
152 KDDI_SHIFT_JIS = 70, |
|
153 DOCOMO_SHIFT_JIS = 71, |
|
154 SOFTBANK_SHIFT_JIS = 72, |
|
155 // ISO-2022-JP variants used by KDDI and SoftBank. |
|
156 KDDI_ISO_2022_JP = 73, |
|
157 SOFTBANK_ISO_2022_JP = 74, |
|
158 //----------------------------------------------------------- |
|
159 |
|
160 NUM_ENCODINGS = 75, // Always keep this at the end. It is not a |
|
161 // valid Encoding enum, it is only used to |
|
162 // indicate the total number of Encodings. |
|
163 }; |
|
164 |
|
165 } // End namespace CLD2 |
|
166 |
|
167 #endif // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ |
|
168 |
|
169 |