intl/icu/source/i18n/csrsbcs.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2005-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 */
michael@0 7
michael@0 8 #ifndef __CSRSBCS_H
michael@0 9 #define __CSRSBCS_H
michael@0 10
michael@0 11 #include "unicode/uobject.h"
michael@0 12
michael@0 13 #if !UCONFIG_NO_CONVERSION
michael@0 14
michael@0 15 #include "csrecog.h"
michael@0 16
michael@0 17 U_NAMESPACE_BEGIN
michael@0 18
michael@0 19 class NGramParser : public UMemory
michael@0 20 {
michael@0 21 private:
michael@0 22 int32_t ngram;
michael@0 23 const int32_t *ngramList;
michael@0 24
michael@0 25 int32_t ngramCount;
michael@0 26 int32_t hitCount;
michael@0 27
michael@0 28 protected:
michael@0 29 int32_t byteIndex;
michael@0 30 const uint8_t *charMap;
michael@0 31
michael@0 32 void addByte(int32_t b);
michael@0 33
michael@0 34 public:
michael@0 35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
michael@0 36
michael@0 37 private:
michael@0 38 /*
michael@0 39 * Binary search for value in table, which must have exactly 64 entries.
michael@0 40 */
michael@0 41 int32_t search(const int32_t *table, int32_t value);
michael@0 42
michael@0 43 void lookup(int32_t thisNgram);
michael@0 44
michael@0 45 virtual int32_t nextByte(InputText *det);
michael@0 46 virtual void parseCharacters(InputText *det);
michael@0 47
michael@0 48 public:
michael@0 49 int32_t parse(InputText *det);
michael@0 50
michael@0 51 };
michael@0 52
michael@0 53 class NGramParser_IBM420 : public NGramParser
michael@0 54 {
michael@0 55 private:
michael@0 56 int32_t alef;
michael@0 57 int32_t isLamAlef(int32_t b);
michael@0 58 int32_t nextByte(InputText *det);
michael@0 59 void parseCharacters(InputText *det);
michael@0 60
michael@0 61 public:
michael@0 62 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
michael@0 63 };
michael@0 64
michael@0 65
michael@0 66 class CharsetRecog_sbcs : public CharsetRecognizer
michael@0 67 {
michael@0 68 public:
michael@0 69 CharsetRecog_sbcs();
michael@0 70 virtual ~CharsetRecog_sbcs();
michael@0 71 virtual const char *getName() const = 0;
michael@0 72 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
michael@0 73 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
michael@0 74 };
michael@0 75
michael@0 76 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
michael@0 77 {
michael@0 78 public:
michael@0 79 virtual ~CharsetRecog_8859_1();
michael@0 80 const char *getName() const;
michael@0 81 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 82 };
michael@0 83
michael@0 84 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
michael@0 85 {
michael@0 86 public:
michael@0 87 virtual ~CharsetRecog_8859_2();
michael@0 88 const char *getName() const;
michael@0 89 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 90 };
michael@0 91
michael@0 92 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
michael@0 93 {
michael@0 94 public:
michael@0 95 virtual ~CharsetRecog_8859_5();
michael@0 96 const char *getName() const;
michael@0 97 };
michael@0 98
michael@0 99 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
michael@0 100 {
michael@0 101 public:
michael@0 102 virtual ~CharsetRecog_8859_6();
michael@0 103
michael@0 104 const char *getName() const;
michael@0 105 };
michael@0 106
michael@0 107 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
michael@0 108 {
michael@0 109 public:
michael@0 110 virtual ~CharsetRecog_8859_7();
michael@0 111
michael@0 112 const char *getName() const;
michael@0 113 };
michael@0 114
michael@0 115 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
michael@0 116 {
michael@0 117 public:
michael@0 118 virtual ~CharsetRecog_8859_8();
michael@0 119
michael@0 120 virtual const char *getName() const;
michael@0 121 };
michael@0 122
michael@0 123 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
michael@0 124 {
michael@0 125 public:
michael@0 126 virtual ~CharsetRecog_8859_9();
michael@0 127
michael@0 128 const char *getName() const;
michael@0 129 };
michael@0 130
michael@0 131
michael@0 132
michael@0 133 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
michael@0 134 {
michael@0 135 public:
michael@0 136 virtual ~CharsetRecog_8859_5_ru();
michael@0 137
michael@0 138 const char *getLanguage() const;
michael@0 139
michael@0 140 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 141 };
michael@0 142
michael@0 143 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
michael@0 144 {
michael@0 145 public:
michael@0 146 virtual ~CharsetRecog_8859_6_ar();
michael@0 147
michael@0 148 const char *getLanguage() const;
michael@0 149
michael@0 150 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 151 };
michael@0 152
michael@0 153 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
michael@0 154 {
michael@0 155 public:
michael@0 156 virtual ~CharsetRecog_8859_7_el();
michael@0 157
michael@0 158 const char *getLanguage() const;
michael@0 159
michael@0 160 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 161 };
michael@0 162
michael@0 163 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
michael@0 164 {
michael@0 165 public:
michael@0 166 virtual ~CharsetRecog_8859_8_I_he();
michael@0 167
michael@0 168 const char *getName() const;
michael@0 169
michael@0 170 const char *getLanguage() const;
michael@0 171
michael@0 172 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 173 };
michael@0 174
michael@0 175 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
michael@0 176 {
michael@0 177 public:
michael@0 178 virtual ~CharsetRecog_8859_8_he ();
michael@0 179
michael@0 180 const char *getLanguage() const;
michael@0 181
michael@0 182 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 183 };
michael@0 184
michael@0 185 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
michael@0 186 {
michael@0 187 public:
michael@0 188 virtual ~CharsetRecog_8859_9_tr ();
michael@0 189
michael@0 190 const char *getLanguage() const;
michael@0 191
michael@0 192 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 193 };
michael@0 194
michael@0 195 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
michael@0 196 {
michael@0 197 public:
michael@0 198 virtual ~CharsetRecog_windows_1256();
michael@0 199
michael@0 200 const char *getName() const;
michael@0 201
michael@0 202 const char *getLanguage() const;
michael@0 203
michael@0 204 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 205 };
michael@0 206
michael@0 207 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
michael@0 208 {
michael@0 209 public:
michael@0 210 virtual ~CharsetRecog_windows_1251();
michael@0 211
michael@0 212 const char *getName() const;
michael@0 213
michael@0 214 const char *getLanguage() const;
michael@0 215
michael@0 216 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 217 };
michael@0 218
michael@0 219
michael@0 220 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
michael@0 221 {
michael@0 222 public:
michael@0 223 virtual ~CharsetRecog_KOI8_R();
michael@0 224
michael@0 225 const char *getName() const;
michael@0 226
michael@0 227 const char *getLanguage() const;
michael@0 228
michael@0 229 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 230 };
michael@0 231
michael@0 232 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
michael@0 233 {
michael@0 234 public:
michael@0 235 virtual ~CharsetRecog_IBM424_he();
michael@0 236
michael@0 237 const char *getLanguage() const;
michael@0 238 };
michael@0 239
michael@0 240 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
michael@0 241 public:
michael@0 242 virtual ~CharsetRecog_IBM424_he_rtl();
michael@0 243
michael@0 244 const char *getName() const;
michael@0 245
michael@0 246 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 247 };
michael@0 248
michael@0 249 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
michael@0 250 virtual ~CharsetRecog_IBM424_he_ltr();
michael@0 251
michael@0 252 const char *getName() const;
michael@0 253
michael@0 254 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 255 };
michael@0 256
michael@0 257 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
michael@0 258 {
michael@0 259 public:
michael@0 260 virtual ~CharsetRecog_IBM420_ar();
michael@0 261
michael@0 262 const char *getLanguage() const;
michael@0 263 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
michael@0 264
michael@0 265 };
michael@0 266
michael@0 267 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
michael@0 268 public:
michael@0 269 virtual ~CharsetRecog_IBM420_ar_rtl();
michael@0 270
michael@0 271 const char *getName() const;
michael@0 272
michael@0 273 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 274 };
michael@0 275
michael@0 276 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
michael@0 277 virtual ~CharsetRecog_IBM420_ar_ltr();
michael@0 278
michael@0 279 const char *getName() const;
michael@0 280
michael@0 281 virtual UBool match(InputText *det, CharsetMatch *results) const;
michael@0 282 };
michael@0 283
michael@0 284 U_NAMESPACE_END
michael@0 285
michael@0 286 #endif /* !UCONFIG_NO_CONVERSION */
michael@0 287 #endif /* __CSRSBCS_H */

mercurial