Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
11 #include "unicode/uobject.h"
13 #if !UCONFIG_NO_CONVERSION
15 #include "csrecog.h"
17 U_NAMESPACE_BEGIN
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t ngram;
23 const int32_t *ngramList;
25 int32_t ngramCount;
26 int32_t hitCount;
28 protected:
29 int32_t byteIndex;
30 const uint8_t *charMap;
32 void addByte(int32_t b);
34 public:
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
37 private:
38 /*
39 * Binary search for value in table, which must have exactly 64 entries.
40 */
41 int32_t search(const int32_t *table, int32_t value);
43 void lookup(int32_t thisNgram);
45 virtual int32_t nextByte(InputText *det);
46 virtual void parseCharacters(InputText *det);
48 public:
49 int32_t parse(InputText *det);
51 };
53 class NGramParser_IBM420 : public NGramParser
54 {
55 private:
56 int32_t alef;
57 int32_t isLamAlef(int32_t b);
58 int32_t nextByte(InputText *det);
59 void parseCharacters(InputText *det);
61 public:
62 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
63 };
66 class CharsetRecog_sbcs : public CharsetRecognizer
67 {
68 public:
69 CharsetRecog_sbcs();
70 virtual ~CharsetRecog_sbcs();
71 virtual const char *getName() const = 0;
72 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
73 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
74 };
76 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
77 {
78 public:
79 virtual ~CharsetRecog_8859_1();
80 const char *getName() const;
81 virtual UBool match(InputText *det, CharsetMatch *results) const;
82 };
84 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
85 {
86 public:
87 virtual ~CharsetRecog_8859_2();
88 const char *getName() const;
89 virtual UBool match(InputText *det, CharsetMatch *results) const;
90 };
92 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
93 {
94 public:
95 virtual ~CharsetRecog_8859_5();
96 const char *getName() const;
97 };
99 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
100 {
101 public:
102 virtual ~CharsetRecog_8859_6();
104 const char *getName() const;
105 };
107 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
108 {
109 public:
110 virtual ~CharsetRecog_8859_7();
112 const char *getName() const;
113 };
115 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
116 {
117 public:
118 virtual ~CharsetRecog_8859_8();
120 virtual const char *getName() const;
121 };
123 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
124 {
125 public:
126 virtual ~CharsetRecog_8859_9();
128 const char *getName() const;
129 };
133 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
134 {
135 public:
136 virtual ~CharsetRecog_8859_5_ru();
138 const char *getLanguage() const;
140 virtual UBool match(InputText *det, CharsetMatch *results) const;
141 };
143 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
144 {
145 public:
146 virtual ~CharsetRecog_8859_6_ar();
148 const char *getLanguage() const;
150 virtual UBool match(InputText *det, CharsetMatch *results) const;
151 };
153 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
154 {
155 public:
156 virtual ~CharsetRecog_8859_7_el();
158 const char *getLanguage() const;
160 virtual UBool match(InputText *det, CharsetMatch *results) const;
161 };
163 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
164 {
165 public:
166 virtual ~CharsetRecog_8859_8_I_he();
168 const char *getName() const;
170 const char *getLanguage() const;
172 virtual UBool match(InputText *det, CharsetMatch *results) const;
173 };
175 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
176 {
177 public:
178 virtual ~CharsetRecog_8859_8_he ();
180 const char *getLanguage() const;
182 virtual UBool match(InputText *det, CharsetMatch *results) const;
183 };
185 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
186 {
187 public:
188 virtual ~CharsetRecog_8859_9_tr ();
190 const char *getLanguage() const;
192 virtual UBool match(InputText *det, CharsetMatch *results) const;
193 };
195 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
196 {
197 public:
198 virtual ~CharsetRecog_windows_1256();
200 const char *getName() const;
202 const char *getLanguage() const;
204 virtual UBool match(InputText *det, CharsetMatch *results) const;
205 };
207 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
208 {
209 public:
210 virtual ~CharsetRecog_windows_1251();
212 const char *getName() const;
214 const char *getLanguage() const;
216 virtual UBool match(InputText *det, CharsetMatch *results) const;
217 };
220 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
221 {
222 public:
223 virtual ~CharsetRecog_KOI8_R();
225 const char *getName() const;
227 const char *getLanguage() const;
229 virtual UBool match(InputText *det, CharsetMatch *results) const;
230 };
232 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
233 {
234 public:
235 virtual ~CharsetRecog_IBM424_he();
237 const char *getLanguage() const;
238 };
240 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
241 public:
242 virtual ~CharsetRecog_IBM424_he_rtl();
244 const char *getName() const;
246 virtual UBool match(InputText *det, CharsetMatch *results) const;
247 };
249 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
250 virtual ~CharsetRecog_IBM424_he_ltr();
252 const char *getName() const;
254 virtual UBool match(InputText *det, CharsetMatch *results) const;
255 };
257 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
258 {
259 public:
260 virtual ~CharsetRecog_IBM420_ar();
262 const char *getLanguage() const;
263 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
265 };
267 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
268 public:
269 virtual ~CharsetRecog_IBM420_ar_rtl();
271 const char *getName() const;
273 virtual UBool match(InputText *det, CharsetMatch *results) const;
274 };
276 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
277 virtual ~CharsetRecog_IBM420_ar_ltr();
279 const char *getName() const;
281 virtual UBool match(InputText *det, CharsetMatch *results) const;
282 };
284 U_NAMESPACE_END
286 #endif /* !UCONFIG_NO_CONVERSION */
287 #endif /* __CSRSBCS_H */