browser/components/translation/cld2/public/encodings.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright 2013 Google Inc. All Rights Reserved.
     2 //
     3 // Licensed under the Apache License, Version 2.0 (the "License");
     4 // you may not use this file except in compliance with the License.
     5 // You may obtain a copy of the License at
     6 //
     7 //     http://www.apache.org/licenses/LICENSE-2.0
     8 //
     9 // Unless required by applicable law or agreed to in writing, software
    10 // distributed under the License is distributed on an "AS IS" BASIS,
    11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12 // See the License for the specific language governing permissions and
    13 // limitations under the License.
    15 //
    16 // Author: dsites@google.com (Dick Sites)
    17 //
    19 #ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__
    20 #define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__
    22 namespace CLD2 {
    24 enum Encoding {
    25   ISO_8859_1           =  0,  //   ASCII
    26   ISO_8859_2           =  1,  //   Latin2
    27   ISO_8859_3           =  2,  //
    28   ISO_8859_4           =  3,  //   Latin4
    29   ISO_8859_5           =  4,  //   ISO-8859-5
    30   ISO_8859_6           =  5,  //   Arabic
    31   ISO_8859_7           =  6,  //   Greek
    32   ISO_8859_8           =  7,  //   Hebrew
    33   ISO_8859_9           =  8,  //
    34   ISO_8859_10          =  9,  //
    35   JAPANESE_EUC_JP      = 10,  //   EUC_JP
    36   JAPANESE_SHIFT_JIS   = 11,  //   SJS
    37   JAPANESE_JIS         = 12,  //   JIS
    38   CHINESE_BIG5         = 13,  //   BIG5
    39   CHINESE_GB           = 14,  //   GB
    40   CHINESE_EUC_CN       = 15,  // Misnamed. Should be EUC_TW. Was Basis Tech
    41                               // CNS11643EUC, before that   EUC-CN(!)
    42   KOREAN_EUC_KR        = 16,  //   KSC
    43   UNICODE_UNUSED       = 17,  //   Unicode
    44   CHINESE_EUC_DEC      = 18,  // Misnamed. Should be EUC_TW. Was
    45                               // CNS11643EUC, before that   EUC.
    46   CHINESE_CNS          = 19,  // Misnamed. Should be EUC_TW. Was
    47                               // CNS11643EUC, before that   CNS.
    48   CHINESE_BIG5_CP950   = 20,  //   BIG5_CP950
    49   JAPANESE_CP932       = 21,  //   CP932
    50   UTF8                 = 22,
    51   UNKNOWN_ENCODING     = 23,
    52   ASCII_7BIT           = 24,  // ISO_8859_1 with all characters <= 127.
    53   RUSSIAN_KOI8_R       = 25,  //   KOI8R
    54   RUSSIAN_CP1251       = 26,  //   CP1251
    56   //----------------------------------------------------------
    57   MSFT_CP1252          = 27,  // 27: CP1252 aka MSFT euro ascii
    58   RUSSIAN_KOI8_RU      = 28,  // CP21866 aka KOI8-U, used for Ukrainian.
    59                               // Misnamed, this is _not_ KOI8-RU but KOI8-U.
    60                               // KOI8-U is used much more often than KOI8-RU.
    61   MSFT_CP1250          = 29,  // CP1250 aka MSFT eastern european
    62   ISO_8859_15          = 30,  // aka ISO_8859_0 aka ISO_8859_1 euroized
    63   //----------------------------------------------------------
    65   //----------------------------------------------------------
    66   MSFT_CP1254          = 31,  // used for Turkish
    67   MSFT_CP1257          = 32,  // used in Baltic countries
    68   //----------------------------------------------------------
    70   //----------------------------------------------------------
    71   //----------------------------------------------------------
    72   ISO_8859_11          = 33,  // aka TIS-620, used for Thai
    73   MSFT_CP874           = 34,  // used for Thai
    74   MSFT_CP1256          = 35,  // used for Arabic
    76   //----------------------------------------------------------
    77   MSFT_CP1255          = 36,  // Logical Hebrew Microsoft
    78   ISO_8859_8_I         = 37,  // Iso Hebrew Logical
    79   HEBREW_VISUAL        = 38,  // Iso Hebrew Visual
    80   //----------------------------------------------------------
    82   //----------------------------------------------------------
    83   CZECH_CP852          = 39,
    84   CZECH_CSN_369103     = 40,  // aka ISO_IR_139 aka KOI8_CS
    85   MSFT_CP1253          = 41,  // used for Greek
    86   RUSSIAN_CP866        = 42,
    87   //----------------------------------------------------------
    89   //----------------------------------------------------------
    90   // Handled by iconv in glibc
    91   ISO_8859_13          = 43,
    92   ISO_2022_KR          = 44,
    93   GBK                  = 45,
    94   GB18030              = 46,
    95   BIG5_HKSCS           = 47,
    96   ISO_2022_CN          = 48,
    98   //-----------------------------------------------------------
    99   // Following 4 encodings are deprecated (font encodings)
   100   TSCII                = 49,
   101   TAMIL_MONO           = 50,
   102   TAMIL_BI             = 51,
   103   JAGRAN               = 52,
   106   MACINTOSH_ROMAN      = 53,
   107   UTF7                 = 54,
   109   //-----------------------------------------------------------
   110   // Following 2 encodings are deprecated (font encodings)
   111   BHASKAR              = 55,  // Indic encoding - Devanagari
   112   HTCHANAKYA           = 56,  // 56 Indic encoding - Devanagari
   114   //-----------------------------------------------------------
   115   UTF16BE              = 57,  // big-endian UTF-16
   116   UTF16LE              = 58,  // little-endian UTF-16
   117   UTF32BE              = 59,  // big-endian UTF-32
   118   UTF32LE              = 60,  // little-endian UTF-32
   119   //-----------------------------------------------------------
   121   //-----------------------------------------------------------
   122   // An encoding that means "This is not text, but it may have some
   123   // simple ASCII text embedded". Intended input conversion
   124   // is to keep strings of >=4 seven-bit ASCII characters
   125   BINARYENC            = 61,
   126   //-----------------------------------------------------------
   128   //-----------------------------------------------------------
   129   // Some Web pages allow a mixture of HZ-GB and GB-2312 by using
   130   // ~{ ... ~} for 2-byte pairs, and the browsers support this.
   131   HZ_GB_2312           = 62,
   132   //-----------------------------------------------------------
   134   //-----------------------------------------------------------
   135   // Some external vendors make the common input error of
   136   // converting MSFT_CP1252 to UTF8 *twice*.
   137   UTF8UTF8             = 63,
   138   //-----------------------------------------------------------
   140   //-----------------------------------------------------------
   141   // Following 6 encodings are deprecated (font encodings)
   142   TAM_ELANGO           = 64,  // Elango - Tamil
   143   TAM_LTTMBARANI       = 65,  // Barani - Tamil
   144   TAM_SHREE            = 66,  // Shree - Tamil
   145   TAM_TBOOMIS          = 67,  // TBoomis - Tamil
   146   TAM_TMNEWS           = 68,  // TMNews - Tamil
   147   TAM_WEBTAMIL         = 69,  // Webtamil - Tamil
   148   //-----------------------------------------------------------
   150   //-----------------------------------------------------------
   151   // Shift_JIS variants used by Japanese cell phone carriers.
   152   KDDI_SHIFT_JIS       = 70,
   153   DOCOMO_SHIFT_JIS     = 71,
   154   SOFTBANK_SHIFT_JIS   = 72,
   155   // ISO-2022-JP variants used by KDDI and SoftBank.
   156   KDDI_ISO_2022_JP     = 73,
   157   SOFTBANK_ISO_2022_JP = 74,
   158   //-----------------------------------------------------------
   160   NUM_ENCODINGS        = 75,  // Always keep this at the end. It is not a
   161                               // valid Encoding enum, it is only used to
   162                               // indicate the total number of Encodings.
   163 };
   165 }       // End namespace CLD2
   167 #endif  // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__

mercurial