michael@0: // Copyright 2013 Google Inc. All Rights Reserved. michael@0: // michael@0: // Licensed under the Apache License, Version 2.0 (the "License"); michael@0: // you may not use this file except in compliance with the License. michael@0: // You may obtain a copy of the License at michael@0: // michael@0: // http://www.apache.org/licenses/LICENSE-2.0 michael@0: // michael@0: // Unless required by applicable law or agreed to in writing, software michael@0: // distributed under the License is distributed on an "AS IS" BASIS, michael@0: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. michael@0: // See the License for the specific language governing permissions and michael@0: // limitations under the License. michael@0: michael@0: // michael@0: // Author: dsites@google.com (Dick Sites) michael@0: // michael@0: michael@0: #ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ michael@0: #define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ michael@0: michael@0: namespace CLD2 { michael@0: michael@0: enum Encoding { michael@0: ISO_8859_1 = 0, // ASCII michael@0: ISO_8859_2 = 1, // Latin2 michael@0: ISO_8859_3 = 2, // michael@0: ISO_8859_4 = 3, // Latin4 michael@0: ISO_8859_5 = 4, // ISO-8859-5 michael@0: ISO_8859_6 = 5, // Arabic michael@0: ISO_8859_7 = 6, // Greek michael@0: ISO_8859_8 = 7, // Hebrew michael@0: ISO_8859_9 = 8, // michael@0: ISO_8859_10 = 9, // michael@0: JAPANESE_EUC_JP = 10, // EUC_JP michael@0: JAPANESE_SHIFT_JIS = 11, // SJS michael@0: JAPANESE_JIS = 12, // JIS michael@0: CHINESE_BIG5 = 13, // BIG5 michael@0: CHINESE_GB = 14, // GB michael@0: CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech michael@0: // CNS11643EUC, before that EUC-CN(!) michael@0: KOREAN_EUC_KR = 16, // KSC michael@0: UNICODE_UNUSED = 17, // Unicode michael@0: CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was michael@0: // CNS11643EUC, before that EUC. michael@0: CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was michael@0: // CNS11643EUC, before that CNS. michael@0: CHINESE_BIG5_CP950 = 20, // BIG5_CP950 michael@0: JAPANESE_CP932 = 21, // CP932 michael@0: UTF8 = 22, michael@0: UNKNOWN_ENCODING = 23, michael@0: ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127. michael@0: RUSSIAN_KOI8_R = 25, // KOI8R michael@0: RUSSIAN_CP1251 = 26, // CP1251 michael@0: michael@0: //---------------------------------------------------------- michael@0: MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii michael@0: RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian. michael@0: // Misnamed, this is _not_ KOI8-RU but KOI8-U. michael@0: // KOI8-U is used much more often than KOI8-RU. michael@0: MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european michael@0: ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized michael@0: //---------------------------------------------------------- michael@0: michael@0: //---------------------------------------------------------- michael@0: MSFT_CP1254 = 31, // used for Turkish michael@0: MSFT_CP1257 = 32, // used in Baltic countries michael@0: //---------------------------------------------------------- michael@0: michael@0: //---------------------------------------------------------- michael@0: //---------------------------------------------------------- michael@0: ISO_8859_11 = 33, // aka TIS-620, used for Thai michael@0: MSFT_CP874 = 34, // used for Thai michael@0: MSFT_CP1256 = 35, // used for Arabic michael@0: michael@0: //---------------------------------------------------------- michael@0: MSFT_CP1255 = 36, // Logical Hebrew Microsoft michael@0: ISO_8859_8_I = 37, // Iso Hebrew Logical michael@0: HEBREW_VISUAL = 38, // Iso Hebrew Visual michael@0: //---------------------------------------------------------- michael@0: michael@0: //---------------------------------------------------------- michael@0: CZECH_CP852 = 39, michael@0: CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS michael@0: MSFT_CP1253 = 41, // used for Greek michael@0: RUSSIAN_CP866 = 42, michael@0: //---------------------------------------------------------- michael@0: michael@0: //---------------------------------------------------------- michael@0: // Handled by iconv in glibc michael@0: ISO_8859_13 = 43, michael@0: ISO_2022_KR = 44, michael@0: GBK = 45, michael@0: GB18030 = 46, michael@0: BIG5_HKSCS = 47, michael@0: ISO_2022_CN = 48, michael@0: michael@0: //----------------------------------------------------------- michael@0: // Following 4 encodings are deprecated (font encodings) michael@0: TSCII = 49, michael@0: TAMIL_MONO = 50, michael@0: TAMIL_BI = 51, michael@0: JAGRAN = 52, michael@0: michael@0: michael@0: MACINTOSH_ROMAN = 53, michael@0: UTF7 = 54, michael@0: michael@0: //----------------------------------------------------------- michael@0: // Following 2 encodings are deprecated (font encodings) michael@0: BHASKAR = 55, // Indic encoding - Devanagari michael@0: HTCHANAKYA = 56, // 56 Indic encoding - Devanagari michael@0: michael@0: //----------------------------------------------------------- michael@0: UTF16BE = 57, // big-endian UTF-16 michael@0: UTF16LE = 58, // little-endian UTF-16 michael@0: UTF32BE = 59, // big-endian UTF-32 michael@0: UTF32LE = 60, // little-endian UTF-32 michael@0: //----------------------------------------------------------- michael@0: michael@0: //----------------------------------------------------------- michael@0: // An encoding that means "This is not text, but it may have some michael@0: // simple ASCII text embedded". Intended input conversion michael@0: // is to keep strings of >=4 seven-bit ASCII characters michael@0: BINARYENC = 61, michael@0: //----------------------------------------------------------- michael@0: michael@0: //----------------------------------------------------------- michael@0: // Some Web pages allow a mixture of HZ-GB and GB-2312 by using michael@0: // ~{ ... ~} for 2-byte pairs, and the browsers support this. michael@0: HZ_GB_2312 = 62, michael@0: //----------------------------------------------------------- michael@0: michael@0: //----------------------------------------------------------- michael@0: // Some external vendors make the common input error of michael@0: // converting MSFT_CP1252 to UTF8 *twice*. michael@0: UTF8UTF8 = 63, michael@0: //----------------------------------------------------------- michael@0: michael@0: //----------------------------------------------------------- michael@0: // Following 6 encodings are deprecated (font encodings) michael@0: TAM_ELANGO = 64, // Elango - Tamil michael@0: TAM_LTTMBARANI = 65, // Barani - Tamil michael@0: TAM_SHREE = 66, // Shree - Tamil michael@0: TAM_TBOOMIS = 67, // TBoomis - Tamil michael@0: TAM_TMNEWS = 68, // TMNews - Tamil michael@0: TAM_WEBTAMIL = 69, // Webtamil - Tamil michael@0: //----------------------------------------------------------- michael@0: michael@0: //----------------------------------------------------------- michael@0: // Shift_JIS variants used by Japanese cell phone carriers. michael@0: KDDI_SHIFT_JIS = 70, michael@0: DOCOMO_SHIFT_JIS = 71, michael@0: SOFTBANK_SHIFT_JIS = 72, michael@0: // ISO-2022-JP variants used by KDDI and SoftBank. michael@0: KDDI_ISO_2022_JP = 73, michael@0: SOFTBANK_ISO_2022_JP = 74, michael@0: //----------------------------------------------------------- michael@0: michael@0: NUM_ENCODINGS = 75, // Always keep this at the end. It is not a michael@0: // valid Encoding enum, it is only used to michael@0: // indicate the total number of Encodings. michael@0: }; michael@0: michael@0: } // End namespace CLD2 michael@0: michael@0: #endif // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ michael@0: michael@0: