intl/icu/source/i18n/csrsbcs.h

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:042d75a6ab32
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
10
11 #include "unicode/uobject.h"
12
13 #if !UCONFIG_NO_CONVERSION
14
15 #include "csrecog.h"
16
17 U_NAMESPACE_BEGIN
18
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t ngram;
23 const int32_t *ngramList;
24
25 int32_t ngramCount;
26 int32_t hitCount;
27
28 protected:
29 int32_t byteIndex;
30 const uint8_t *charMap;
31
32 void addByte(int32_t b);
33
34 public:
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
36
37 private:
38 /*
39 * Binary search for value in table, which must have exactly 64 entries.
40 */
41 int32_t search(const int32_t *table, int32_t value);
42
43 void lookup(int32_t thisNgram);
44
45 virtual int32_t nextByte(InputText *det);
46 virtual void parseCharacters(InputText *det);
47
48 public:
49 int32_t parse(InputText *det);
50
51 };
52
53 class NGramParser_IBM420 : public NGramParser
54 {
55 private:
56 int32_t alef;
57 int32_t isLamAlef(int32_t b);
58 int32_t nextByte(InputText *det);
59 void parseCharacters(InputText *det);
60
61 public:
62 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
63 };
64
65
66 class CharsetRecog_sbcs : public CharsetRecognizer
67 {
68 public:
69 CharsetRecog_sbcs();
70 virtual ~CharsetRecog_sbcs();
71 virtual const char *getName() const = 0;
72 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
73 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
74 };
75
76 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
77 {
78 public:
79 virtual ~CharsetRecog_8859_1();
80 const char *getName() const;
81 virtual UBool match(InputText *det, CharsetMatch *results) const;
82 };
83
84 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
85 {
86 public:
87 virtual ~CharsetRecog_8859_2();
88 const char *getName() const;
89 virtual UBool match(InputText *det, CharsetMatch *results) const;
90 };
91
92 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
93 {
94 public:
95 virtual ~CharsetRecog_8859_5();
96 const char *getName() const;
97 };
98
99 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
100 {
101 public:
102 virtual ~CharsetRecog_8859_6();
103
104 const char *getName() const;
105 };
106
107 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
108 {
109 public:
110 virtual ~CharsetRecog_8859_7();
111
112 const char *getName() const;
113 };
114
115 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
116 {
117 public:
118 virtual ~CharsetRecog_8859_8();
119
120 virtual const char *getName() const;
121 };
122
123 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
124 {
125 public:
126 virtual ~CharsetRecog_8859_9();
127
128 const char *getName() const;
129 };
130
131
132
133 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
134 {
135 public:
136 virtual ~CharsetRecog_8859_5_ru();
137
138 const char *getLanguage() const;
139
140 virtual UBool match(InputText *det, CharsetMatch *results) const;
141 };
142
143 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
144 {
145 public:
146 virtual ~CharsetRecog_8859_6_ar();
147
148 const char *getLanguage() const;
149
150 virtual UBool match(InputText *det, CharsetMatch *results) const;
151 };
152
153 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
154 {
155 public:
156 virtual ~CharsetRecog_8859_7_el();
157
158 const char *getLanguage() const;
159
160 virtual UBool match(InputText *det, CharsetMatch *results) const;
161 };
162
163 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
164 {
165 public:
166 virtual ~CharsetRecog_8859_8_I_he();
167
168 const char *getName() const;
169
170 const char *getLanguage() const;
171
172 virtual UBool match(InputText *det, CharsetMatch *results) const;
173 };
174
175 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
176 {
177 public:
178 virtual ~CharsetRecog_8859_8_he ();
179
180 const char *getLanguage() const;
181
182 virtual UBool match(InputText *det, CharsetMatch *results) const;
183 };
184
185 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
186 {
187 public:
188 virtual ~CharsetRecog_8859_9_tr ();
189
190 const char *getLanguage() const;
191
192 virtual UBool match(InputText *det, CharsetMatch *results) const;
193 };
194
195 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
196 {
197 public:
198 virtual ~CharsetRecog_windows_1256();
199
200 const char *getName() const;
201
202 const char *getLanguage() const;
203
204 virtual UBool match(InputText *det, CharsetMatch *results) const;
205 };
206
207 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
208 {
209 public:
210 virtual ~CharsetRecog_windows_1251();
211
212 const char *getName() const;
213
214 const char *getLanguage() const;
215
216 virtual UBool match(InputText *det, CharsetMatch *results) const;
217 };
218
219
220 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
221 {
222 public:
223 virtual ~CharsetRecog_KOI8_R();
224
225 const char *getName() const;
226
227 const char *getLanguage() const;
228
229 virtual UBool match(InputText *det, CharsetMatch *results) const;
230 };
231
232 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
233 {
234 public:
235 virtual ~CharsetRecog_IBM424_he();
236
237 const char *getLanguage() const;
238 };
239
240 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
241 public:
242 virtual ~CharsetRecog_IBM424_he_rtl();
243
244 const char *getName() const;
245
246 virtual UBool match(InputText *det, CharsetMatch *results) const;
247 };
248
249 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
250 virtual ~CharsetRecog_IBM424_he_ltr();
251
252 const char *getName() const;
253
254 virtual UBool match(InputText *det, CharsetMatch *results) const;
255 };
256
257 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
258 {
259 public:
260 virtual ~CharsetRecog_IBM420_ar();
261
262 const char *getLanguage() const;
263 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
264
265 };
266
267 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
268 public:
269 virtual ~CharsetRecog_IBM420_ar_rtl();
270
271 const char *getName() const;
272
273 virtual UBool match(InputText *det, CharsetMatch *results) const;
274 };
275
276 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
277 virtual ~CharsetRecog_IBM420_ar_ltr();
278
279 const char *getName() const;
280
281 virtual UBool match(InputText *det, CharsetMatch *results) const;
282 };
283
284 U_NAMESPACE_END
285
286 #endif /* !UCONFIG_NO_CONVERSION */
287 #endif /* __CSRSBCS_H */

mercurial