intl/icu/source/i18n/csrsbcs.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  **********************************************************************
     3  *   Copyright (C) 2005-2013, International Business Machines
     4  *   Corporation and others.  All Rights Reserved.
     5  **********************************************************************
     6  */
     8 #ifndef __CSRSBCS_H
     9 #define __CSRSBCS_H
    11 #include "unicode/uobject.h"
    13 #if !UCONFIG_NO_CONVERSION
    15 #include "csrecog.h"
    17 U_NAMESPACE_BEGIN
    19 class NGramParser : public UMemory
    20 {
    21 private:
    22     int32_t ngram;
    23     const int32_t *ngramList;    
    25     int32_t ngramCount;
    26     int32_t hitCount;
    28 protected:
    29 	int32_t byteIndex;
    30     const uint8_t *charMap;
    32 	void addByte(int32_t b);
    34 public:
    35     NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
    37 private:
    38     /*
    39     * Binary search for value in table, which must have exactly 64 entries.
    40     */
    41     int32_t search(const int32_t *table, int32_t value);
    43     void lookup(int32_t thisNgram);
    45     virtual int32_t nextByte(InputText *det);
    46 	virtual void parseCharacters(InputText *det);
    48 public:
    49     int32_t parse(InputText *det);
    51 };
    53 class NGramParser_IBM420 : public NGramParser
    54 {
    55 private:
    56 	int32_t alef;
    57 	int32_t isLamAlef(int32_t b);
    58 	int32_t nextByte(InputText *det);
    59 	void parseCharacters(InputText *det);
    61 public:
    62     NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
    63 };
    66 class CharsetRecog_sbcs : public CharsetRecognizer
    67 {
    68 public:
    69     CharsetRecog_sbcs();
    70     virtual ~CharsetRecog_sbcs();
    71     virtual const char *getName() const = 0;
    72     virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
    73     virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
    74 };
    76 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
    77 {
    78 public:
    79     virtual ~CharsetRecog_8859_1();
    80     const char *getName() const;
    81     virtual UBool match(InputText *det, CharsetMatch *results) const;
    82 };
    84 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
    85 {
    86 public:
    87     virtual ~CharsetRecog_8859_2();
    88     const char *getName() const;
    89     virtual UBool match(InputText *det, CharsetMatch *results) const;
    90 };
    92 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
    93 {
    94 public:
    95     virtual ~CharsetRecog_8859_5();
    96     const char *getName() const;
    97 };
    99 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
   100 {
   101 public:
   102     virtual ~CharsetRecog_8859_6();
   104     const char *getName() const;
   105 };
   107 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
   108 {
   109 public:
   110     virtual ~CharsetRecog_8859_7();
   112     const char *getName() const;
   113 };
   115 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
   116 {
   117 public:
   118     virtual ~CharsetRecog_8859_8();
   120     virtual const char *getName() const;
   121 };
   123 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
   124 {
   125 public:
   126     virtual ~CharsetRecog_8859_9();
   128     const char *getName() const;
   129 };
   133 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
   134 {
   135 public:
   136     virtual ~CharsetRecog_8859_5_ru();
   138     const char *getLanguage() const;
   140     virtual UBool match(InputText *det, CharsetMatch *results) const;
   141 };
   143 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
   144 {
   145 public:
   146     virtual ~CharsetRecog_8859_6_ar();
   148     const char *getLanguage() const;
   150     virtual UBool match(InputText *det, CharsetMatch *results) const;
   151 };
   153 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
   154 {
   155 public:
   156     virtual ~CharsetRecog_8859_7_el();
   158     const char *getLanguage() const;
   160     virtual UBool match(InputText *det, CharsetMatch *results) const;
   161 };
   163 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
   164 {
   165 public:
   166     virtual ~CharsetRecog_8859_8_I_he();
   168     const char *getName() const;
   170     const char *getLanguage() const;
   172     virtual UBool match(InputText *det, CharsetMatch *results) const;
   173 };
   175 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
   176 {
   177 public:
   178     virtual ~CharsetRecog_8859_8_he ();
   180     const char *getLanguage() const;
   182     virtual UBool match(InputText *det, CharsetMatch *results) const;
   183 };
   185 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
   186 {
   187 public:
   188     virtual ~CharsetRecog_8859_9_tr ();
   190     const char *getLanguage() const;
   192     virtual UBool match(InputText *det, CharsetMatch *results) const;
   193 };
   195 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
   196 {
   197 public:
   198     virtual ~CharsetRecog_windows_1256();
   200     const char *getName() const;
   202     const char *getLanguage() const;
   204     virtual UBool match(InputText *det, CharsetMatch *results) const;
   205 };
   207 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
   208 {
   209 public:
   210     virtual ~CharsetRecog_windows_1251();
   212     const char *getName() const;
   214     const char *getLanguage() const;
   216     virtual UBool match(InputText *det, CharsetMatch *results) const;
   217 };
   220 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
   221 {
   222 public:
   223     virtual ~CharsetRecog_KOI8_R();
   225     const char *getName() const;
   227     const char *getLanguage() const;
   229     virtual UBool match(InputText *det, CharsetMatch *results) const;
   230 };
   232 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
   233 {
   234 public:
   235     virtual ~CharsetRecog_IBM424_he();
   237     const char *getLanguage() const;
   238 };
   240 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
   241 public:
   242     virtual ~CharsetRecog_IBM424_he_rtl();
   244     const char *getName() const;
   246     virtual UBool match(InputText *det, CharsetMatch *results) const;
   247 };
   249 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
   250     virtual ~CharsetRecog_IBM424_he_ltr();
   252     const char *getName() const;
   254     virtual UBool match(InputText *det, CharsetMatch *results) const;
   255 };
   257 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
   258 {
   259 public:
   260     virtual ~CharsetRecog_IBM420_ar();
   262     const char *getLanguage() const;
   263 	int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
   265 };
   267 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
   268 public:
   269     virtual ~CharsetRecog_IBM420_ar_rtl();
   271     const char *getName() const;
   273     virtual UBool match(InputText *det, CharsetMatch *results) const;
   274 };
   276 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
   277     virtual ~CharsetRecog_IBM420_ar_ltr();
   279     const char *getName() const;
   281     virtual UBool match(InputText *det, CharsetMatch *results) const;
   282 };
   284 U_NAMESPACE_END
   286 #endif /* !UCONFIG_NO_CONVERSION */
   287 #endif /* __CSRSBCS_H */

mercurial