|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2000-2012, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * file name: ucnvisci.c |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2001JUN26 |
|
12 * created by: Ram Viswanadha |
|
13 * |
|
14 * Date Name Description |
|
15 * 24/7/2001 Ram Added support for EXT character handling |
|
16 */ |
|
17 |
|
18 #include "unicode/utypes.h" |
|
19 |
|
20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
|
21 |
|
22 #include "unicode/ucnv.h" |
|
23 #include "unicode/ucnv_cb.h" |
|
24 #include "unicode/utf16.h" |
|
25 #include "cmemory.h" |
|
26 #include "ucnv_bld.h" |
|
27 #include "ucnv_cnv.h" |
|
28 #include "cstring.h" |
|
29 #include "uassert.h" |
|
30 |
|
31 #define UCNV_OPTIONS_VERSION_MASK 0xf |
|
32 #define NUKTA 0x093c |
|
33 #define HALANT 0x094d |
|
34 #define ZWNJ 0x200c /* Zero Width Non Joiner */ |
|
35 #define ZWJ 0x200d /* Zero width Joiner */ |
|
36 #define INVALID_CHAR 0xffff |
|
37 #define ATR 0xEF /* Attribute code */ |
|
38 #define EXT 0xF0 /* Extension code */ |
|
39 #define DANDA 0x0964 |
|
40 #define DOUBLE_DANDA 0x0965 |
|
41 #define ISCII_NUKTA 0xE9 |
|
42 #define ISCII_HALANT 0xE8 |
|
43 #define ISCII_DANDA 0xEA |
|
44 #define ISCII_INV 0xD9 |
|
45 #define ISCII_VOWEL_SIGN_E 0xE0 |
|
46 #define INDIC_BLOCK_BEGIN 0x0900 |
|
47 #define INDIC_BLOCK_END 0x0D7F |
|
48 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) |
|
49 #define VOCALLIC_RR 0x0931 |
|
50 #define LF 0x0A |
|
51 #define ASCII_END 0xA0 |
|
52 #define NO_CHAR_MARKER 0xFFFE |
|
53 #define TELUGU_DELTA DELTA * TELUGU |
|
54 #define DEV_ABBR_SIGN 0x0970 |
|
55 #define DEV_ANUDATTA 0x0952 |
|
56 #define EXT_RANGE_BEGIN 0xA1 |
|
57 #define EXT_RANGE_END 0xEE |
|
58 |
|
59 #define PNJ_DELTA 0x0100 |
|
60 #define PNJ_BINDI 0x0A02 |
|
61 #define PNJ_TIPPI 0x0A70 |
|
62 #define PNJ_SIGN_VIRAMA 0x0A4D |
|
63 #define PNJ_ADHAK 0x0A71 |
|
64 #define PNJ_HA 0x0A39 |
|
65 #define PNJ_RRA 0x0A5C |
|
66 |
|
67 typedef enum { |
|
68 DEVANAGARI =0, |
|
69 BENGALI, |
|
70 GURMUKHI, |
|
71 GUJARATI, |
|
72 ORIYA, |
|
73 TAMIL, |
|
74 TELUGU, |
|
75 KANNADA, |
|
76 MALAYALAM, |
|
77 DELTA=0x80 |
|
78 }UniLang; |
|
79 |
|
80 /** |
|
81 * Enumeration for switching code pages if <ATR>+<one of below values> |
|
82 * is encountered |
|
83 */ |
|
84 typedef enum { |
|
85 DEF = 0x40, |
|
86 RMN = 0x41, |
|
87 DEV = 0x42, |
|
88 BNG = 0x43, |
|
89 TML = 0x44, |
|
90 TLG = 0x45, |
|
91 ASM = 0x46, |
|
92 ORI = 0x47, |
|
93 KND = 0x48, |
|
94 MLM = 0x49, |
|
95 GJR = 0x4A, |
|
96 PNJ = 0x4B, |
|
97 ARB = 0x71, |
|
98 PES = 0x72, |
|
99 URD = 0x73, |
|
100 SND = 0x74, |
|
101 KSM = 0x75, |
|
102 PST = 0x76 |
|
103 }ISCIILang; |
|
104 |
|
105 typedef enum { |
|
106 DEV_MASK =0x80, |
|
107 PNJ_MASK =0x40, |
|
108 GJR_MASK =0x20, |
|
109 ORI_MASK =0x10, |
|
110 BNG_MASK =0x08, |
|
111 KND_MASK =0x04, |
|
112 MLM_MASK =0x02, |
|
113 TML_MASK =0x01, |
|
114 ZERO =0x00 |
|
115 }MaskEnum; |
|
116 |
|
117 #define ISCII_CNV_PREFIX "ISCII,version=" |
|
118 |
|
119 typedef struct { |
|
120 UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ |
|
121 UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ |
|
122 uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ |
|
123 uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ |
|
124 uint16_t currentDeltaToUnicode; /* current delta in Indic block */ |
|
125 MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ |
|
126 MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ |
|
127 MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ |
|
128 UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ |
|
129 UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ |
|
130 char name[sizeof(ISCII_CNV_PREFIX) + 1]; |
|
131 UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ |
|
132 } UConverterDataISCII; |
|
133 |
|
134 typedef struct LookupDataStruct { |
|
135 UniLang uniLang; |
|
136 MaskEnum maskEnum; |
|
137 ISCIILang isciiLang; |
|
138 } LookupDataStruct; |
|
139 |
|
140 static const LookupDataStruct lookupInitialData[]={ |
|
141 { DEVANAGARI, DEV_MASK, DEV }, |
|
142 { BENGALI, BNG_MASK, BNG }, |
|
143 { GURMUKHI, PNJ_MASK, PNJ }, |
|
144 { GUJARATI, GJR_MASK, GJR }, |
|
145 { ORIYA, ORI_MASK, ORI }, |
|
146 { TAMIL, TML_MASK, TML }, |
|
147 { TELUGU, KND_MASK, TLG }, |
|
148 { KANNADA, KND_MASK, KND }, |
|
149 { MALAYALAM, MLM_MASK, MLM } |
|
150 }; |
|
151 |
|
152 /* |
|
153 * For special handling of certain Gurmukhi characters. |
|
154 * Bit 0 (value 1): PNJ consonant |
|
155 * Bit 1 (value 2): PNJ Bindi Tippi |
|
156 */ |
|
157 static const uint8_t pnjMap[80] = { |
|
158 /* 0A00..0A0F */ |
|
159 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, |
|
160 /* 0A10..0A1F */ |
|
161 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
|
162 /* 0A20..0A2F */ |
|
163 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, |
|
164 /* 0A30..0A3F */ |
|
165 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, |
|
166 /* 0A40..0A4F */ |
|
167 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
|
168 }; |
|
169 |
|
170 static UBool |
|
171 isPNJConsonant(UChar32 c) { |
|
172 if (c < 0xa00 || 0xa50 <= c) { |
|
173 return FALSE; |
|
174 } else { |
|
175 return (UBool)(pnjMap[c - 0xa00] & 1); |
|
176 } |
|
177 } |
|
178 |
|
179 static UBool |
|
180 isPNJBindiTippi(UChar32 c) { |
|
181 if (c < 0xa00 || 0xa50 <= c) { |
|
182 return FALSE; |
|
183 } else { |
|
184 return (UBool)(pnjMap[c - 0xa00] >> 1); |
|
185 } |
|
186 } |
|
187 |
|
188 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { |
|
189 if(pArgs->onlyTestIsLoadable) { |
|
190 return; |
|
191 } |
|
192 |
|
193 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); |
|
194 |
|
195 if (cnv->extraInfo != NULL) { |
|
196 int32_t len=0; |
|
197 UConverterDataISCII *converterData= |
|
198 (UConverterDataISCII *) cnv->extraInfo; |
|
199 converterData->contextCharToUnicode=NO_CHAR_MARKER; |
|
200 cnv->toUnicodeStatus = missingCharMarker; |
|
201 converterData->contextCharFromUnicode=0x0000; |
|
202 converterData->resetToDefaultToUnicode=FALSE; |
|
203 /* check if the version requested is supported */ |
|
204 if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { |
|
205 /* initialize state variables */ |
|
206 converterData->currentDeltaFromUnicode |
|
207 = converterData->currentDeltaToUnicode |
|
208 = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); |
|
209 |
|
210 converterData->currentMaskFromUnicode |
|
211 = converterData->currentMaskToUnicode |
|
212 = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; |
|
213 |
|
214 converterData->isFirstBuffer=TRUE; |
|
215 (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); |
|
216 len = (int32_t)uprv_strlen(converterData->name); |
|
217 converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); |
|
218 converterData->name[len+1]=0; |
|
219 |
|
220 converterData->prevToUnicodeStatus = 0x0000; |
|
221 } else { |
|
222 uprv_free(cnv->extraInfo); |
|
223 cnv->extraInfo = NULL; |
|
224 *errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
225 } |
|
226 |
|
227 } else { |
|
228 *errorCode =U_MEMORY_ALLOCATION_ERROR; |
|
229 } |
|
230 } |
|
231 |
|
232 static void _ISCIIClose(UConverter *cnv) { |
|
233 if (cnv->extraInfo!=NULL) { |
|
234 if (!cnv->isExtraLocal) { |
|
235 uprv_free(cnv->extraInfo); |
|
236 } |
|
237 cnv->extraInfo=NULL; |
|
238 } |
|
239 } |
|
240 |
|
241 static const char* _ISCIIgetName(const UConverter* cnv) { |
|
242 if (cnv->extraInfo) { |
|
243 UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; |
|
244 return myData->name; |
|
245 } |
|
246 return NULL; |
|
247 } |
|
248 |
|
249 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { |
|
250 UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); |
|
251 if (choice<=UCNV_RESET_TO_UNICODE) { |
|
252 cnv->toUnicodeStatus = missingCharMarker; |
|
253 cnv->mode=0; |
|
254 data->currentDeltaToUnicode=data->defDeltaToUnicode; |
|
255 data->currentMaskToUnicode = data->defMaskToUnicode; |
|
256 data->contextCharToUnicode=NO_CHAR_MARKER; |
|
257 data->prevToUnicodeStatus = 0x0000; |
|
258 } |
|
259 if (choice!=UCNV_RESET_TO_UNICODE) { |
|
260 cnv->fromUChar32=0x0000; |
|
261 data->contextCharFromUnicode=0x00; |
|
262 data->currentMaskFromUnicode=data->defMaskToUnicode; |
|
263 data->currentDeltaFromUnicode=data->defDeltaToUnicode; |
|
264 data->isFirstBuffer=TRUE; |
|
265 data->resetToDefaultToUnicode=FALSE; |
|
266 } |
|
267 } |
|
268 |
|
269 /** |
|
270 * The values in validity table are indexed by the lower bits of Unicode |
|
271 * range 0x0900 - 0x09ff. The values have a structure like: |
|
272 * --------------------------------------------------------------- |
|
273 * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | |
|
274 * | | | | | ASM | KND | | | |
|
275 * --------------------------------------------------------------- |
|
276 * If a code point is valid in a particular script |
|
277 * then that bit is turned on |
|
278 * |
|
279 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for |
|
280 * to represent these languages |
|
281 * |
|
282 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case |
|
283 * and combine and use 1 bit to represent these languages. |
|
284 * |
|
285 * TODO: It is probably easier to understand and maintain to change this |
|
286 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. |
|
287 */ |
|
288 |
|
289 static const uint8_t validityTable[128] = { |
|
290 /* This state table is tool generated please do not edit unless you know exactly what you are doing */ |
|
291 /* Note: This table was edited to mirror the Windows XP implementation */ |
|
292 /*ISCII:Valid:Unicode */ |
|
293 /*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
294 /*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , |
|
295 /*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
296 /*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
297 /*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
298 /*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
299 /*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
300 /*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
301 /*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
302 /*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
303 /*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
304 /*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
305 /*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
306 /*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
307 /*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
308 /*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
309 /*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
310 /*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
311 /*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
312 /*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
313 /*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
314 /*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
315 /*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
316 /*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
317 /*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
318 /*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
319 /*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
320 /*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
321 /*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
322 /*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
323 /*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
324 /*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
325 /*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
326 /*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
327 /*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
328 /*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
329 /*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
330 /*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
331 /*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
332 /*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
333 /*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
334 /*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , |
|
335 /*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
336 /*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
337 /*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
338 /*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
339 /*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
340 /*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
341 /*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
342 /*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , |
|
343 /*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
344 /*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
345 /*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , |
|
346 /*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
347 /*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
348 /*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
349 /*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
350 /*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
351 /*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
352 /*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
353 /*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , |
|
354 /*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
355 /*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
356 /*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
357 /*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
358 /*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
359 /*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
360 /*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
361 /*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , |
|
362 /*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
363 /*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
364 /*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
365 /*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
366 /*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
367 /*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , |
|
368 /*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
369 /*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
370 /*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
371 /*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
372 /*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
373 /*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
374 /*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
375 /*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
376 /*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
377 /*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
378 /*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , |
|
379 /*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , |
|
380 /*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , |
|
381 /*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
382 /*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
383 /*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
384 /*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
385 /*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , |
|
386 /*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , |
|
387 /*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
388 /*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , |
|
389 /*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
390 /*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , |
|
391 /*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , |
|
392 /*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , |
|
393 /*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
394 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
395 /*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
396 /*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
397 /*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
398 /*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
399 /*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
400 /*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
401 /*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
402 /*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
403 /*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
404 /*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , |
|
405 /*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , |
|
406 /* |
|
407 * The length of the array is 128 to provide values for 0x900..0x97f. |
|
408 * The last 15 entries for 0x971..0x97f of the validity table are all zero |
|
409 * because no Indic script uses such Unicode code points. |
|
410 */ |
|
411 /*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO |
|
412 }; |
|
413 |
|
414 static const uint16_t fromUnicodeTable[128]={ |
|
415 0x00a0 ,/* 0x0900 */ |
|
416 0x00a1 ,/* 0x0901 */ |
|
417 0x00a2 ,/* 0x0902 */ |
|
418 0x00a3 ,/* 0x0903 */ |
|
419 0xa4e0 ,/* 0x0904 */ |
|
420 0x00a4 ,/* 0x0905 */ |
|
421 0x00a5 ,/* 0x0906 */ |
|
422 0x00a6 ,/* 0x0907 */ |
|
423 0x00a7 ,/* 0x0908 */ |
|
424 0x00a8 ,/* 0x0909 */ |
|
425 0x00a9 ,/* 0x090a */ |
|
426 0x00aa ,/* 0x090b */ |
|
427 0xA6E9 ,/* 0x090c */ |
|
428 0x00ae ,/* 0x090d */ |
|
429 0x00ab ,/* 0x090e */ |
|
430 0x00ac ,/* 0x090f */ |
|
431 0x00ad ,/* 0x0910 */ |
|
432 0x00b2 ,/* 0x0911 */ |
|
433 0x00af ,/* 0x0912 */ |
|
434 0x00b0 ,/* 0x0913 */ |
|
435 0x00b1 ,/* 0x0914 */ |
|
436 0x00b3 ,/* 0x0915 */ |
|
437 0x00b4 ,/* 0x0916 */ |
|
438 0x00b5 ,/* 0x0917 */ |
|
439 0x00b6 ,/* 0x0918 */ |
|
440 0x00b7 ,/* 0x0919 */ |
|
441 0x00b8 ,/* 0x091a */ |
|
442 0x00b9 ,/* 0x091b */ |
|
443 0x00ba ,/* 0x091c */ |
|
444 0x00bb ,/* 0x091d */ |
|
445 0x00bc ,/* 0x091e */ |
|
446 0x00bd ,/* 0x091f */ |
|
447 0x00be ,/* 0x0920 */ |
|
448 0x00bf ,/* 0x0921 */ |
|
449 0x00c0 ,/* 0x0922 */ |
|
450 0x00c1 ,/* 0x0923 */ |
|
451 0x00c2 ,/* 0x0924 */ |
|
452 0x00c3 ,/* 0x0925 */ |
|
453 0x00c4 ,/* 0x0926 */ |
|
454 0x00c5 ,/* 0x0927 */ |
|
455 0x00c6 ,/* 0x0928 */ |
|
456 0x00c7 ,/* 0x0929 */ |
|
457 0x00c8 ,/* 0x092a */ |
|
458 0x00c9 ,/* 0x092b */ |
|
459 0x00ca ,/* 0x092c */ |
|
460 0x00cb ,/* 0x092d */ |
|
461 0x00cc ,/* 0x092e */ |
|
462 0x00cd ,/* 0x092f */ |
|
463 0x00cf ,/* 0x0930 */ |
|
464 0x00d0 ,/* 0x0931 */ |
|
465 0x00d1 ,/* 0x0932 */ |
|
466 0x00d2 ,/* 0x0933 */ |
|
467 0x00d3 ,/* 0x0934 */ |
|
468 0x00d4 ,/* 0x0935 */ |
|
469 0x00d5 ,/* 0x0936 */ |
|
470 0x00d6 ,/* 0x0937 */ |
|
471 0x00d7 ,/* 0x0938 */ |
|
472 0x00d8 ,/* 0x0939 */ |
|
473 0xFFFF ,/* 0x093A */ |
|
474 0xFFFF ,/* 0x093B */ |
|
475 0x00e9 ,/* 0x093c */ |
|
476 0xEAE9 ,/* 0x093d */ |
|
477 0x00da ,/* 0x093e */ |
|
478 0x00db ,/* 0x093f */ |
|
479 0x00dc ,/* 0x0940 */ |
|
480 0x00dd ,/* 0x0941 */ |
|
481 0x00de ,/* 0x0942 */ |
|
482 0x00df ,/* 0x0943 */ |
|
483 0xDFE9 ,/* 0x0944 */ |
|
484 0x00e3 ,/* 0x0945 */ |
|
485 0x00e0 ,/* 0x0946 */ |
|
486 0x00e1 ,/* 0x0947 */ |
|
487 0x00e2 ,/* 0x0948 */ |
|
488 0x00e7 ,/* 0x0949 */ |
|
489 0x00e4 ,/* 0x094a */ |
|
490 0x00e5 ,/* 0x094b */ |
|
491 0x00e6 ,/* 0x094c */ |
|
492 0x00e8 ,/* 0x094d */ |
|
493 0x00ec ,/* 0x094e */ |
|
494 0x00ed ,/* 0x094f */ |
|
495 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ |
|
496 0xFFFF ,/* 0x0951 */ |
|
497 0xF0B8 ,/* 0x0952 */ |
|
498 0xFFFF ,/* 0x0953 */ |
|
499 0xFFFF ,/* 0x0954 */ |
|
500 0xFFFF ,/* 0x0955 */ |
|
501 0xFFFF ,/* 0x0956 */ |
|
502 0xFFFF ,/* 0x0957 */ |
|
503 0xb3e9 ,/* 0x0958 */ |
|
504 0xb4e9 ,/* 0x0959 */ |
|
505 0xb5e9 ,/* 0x095a */ |
|
506 0xbae9 ,/* 0x095b */ |
|
507 0xbfe9 ,/* 0x095c */ |
|
508 0xC0E9 ,/* 0x095d */ |
|
509 0xc9e9 ,/* 0x095e */ |
|
510 0x00ce ,/* 0x095f */ |
|
511 0xAAe9 ,/* 0x0960 */ |
|
512 0xA7E9 ,/* 0x0961 */ |
|
513 0xDBE9 ,/* 0x0962 */ |
|
514 0xDCE9 ,/* 0x0963 */ |
|
515 0x00ea ,/* 0x0964 */ |
|
516 0xeaea ,/* 0x0965 */ |
|
517 0x00f1 ,/* 0x0966 */ |
|
518 0x00f2 ,/* 0x0967 */ |
|
519 0x00f3 ,/* 0x0968 */ |
|
520 0x00f4 ,/* 0x0969 */ |
|
521 0x00f5 ,/* 0x096a */ |
|
522 0x00f6 ,/* 0x096b */ |
|
523 0x00f7 ,/* 0x096c */ |
|
524 0x00f8 ,/* 0x096d */ |
|
525 0x00f9 ,/* 0x096e */ |
|
526 0x00fa ,/* 0x096f */ |
|
527 0xF0BF ,/* 0x0970 */ |
|
528 0xFFFF ,/* 0x0971 */ |
|
529 0xFFFF ,/* 0x0972 */ |
|
530 0xFFFF ,/* 0x0973 */ |
|
531 0xFFFF ,/* 0x0974 */ |
|
532 0xFFFF ,/* 0x0975 */ |
|
533 0xFFFF ,/* 0x0976 */ |
|
534 0xFFFF ,/* 0x0977 */ |
|
535 0xFFFF ,/* 0x0978 */ |
|
536 0xFFFF ,/* 0x0979 */ |
|
537 0xFFFF ,/* 0x097a */ |
|
538 0xFFFF ,/* 0x097b */ |
|
539 0xFFFF ,/* 0x097c */ |
|
540 0xFFFF ,/* 0x097d */ |
|
541 0xFFFF ,/* 0x097e */ |
|
542 0xFFFF ,/* 0x097f */ |
|
543 }; |
|
544 static const uint16_t toUnicodeTable[256]={ |
|
545 0x0000,/* 0x00 */ |
|
546 0x0001,/* 0x01 */ |
|
547 0x0002,/* 0x02 */ |
|
548 0x0003,/* 0x03 */ |
|
549 0x0004,/* 0x04 */ |
|
550 0x0005,/* 0x05 */ |
|
551 0x0006,/* 0x06 */ |
|
552 0x0007,/* 0x07 */ |
|
553 0x0008,/* 0x08 */ |
|
554 0x0009,/* 0x09 */ |
|
555 0x000a,/* 0x0a */ |
|
556 0x000b,/* 0x0b */ |
|
557 0x000c,/* 0x0c */ |
|
558 0x000d,/* 0x0d */ |
|
559 0x000e,/* 0x0e */ |
|
560 0x000f,/* 0x0f */ |
|
561 0x0010,/* 0x10 */ |
|
562 0x0011,/* 0x11 */ |
|
563 0x0012,/* 0x12 */ |
|
564 0x0013,/* 0x13 */ |
|
565 0x0014,/* 0x14 */ |
|
566 0x0015,/* 0x15 */ |
|
567 0x0016,/* 0x16 */ |
|
568 0x0017,/* 0x17 */ |
|
569 0x0018,/* 0x18 */ |
|
570 0x0019,/* 0x19 */ |
|
571 0x001a,/* 0x1a */ |
|
572 0x001b,/* 0x1b */ |
|
573 0x001c,/* 0x1c */ |
|
574 0x001d,/* 0x1d */ |
|
575 0x001e,/* 0x1e */ |
|
576 0x001f,/* 0x1f */ |
|
577 0x0020,/* 0x20 */ |
|
578 0x0021,/* 0x21 */ |
|
579 0x0022,/* 0x22 */ |
|
580 0x0023,/* 0x23 */ |
|
581 0x0024,/* 0x24 */ |
|
582 0x0025,/* 0x25 */ |
|
583 0x0026,/* 0x26 */ |
|
584 0x0027,/* 0x27 */ |
|
585 0x0028,/* 0x28 */ |
|
586 0x0029,/* 0x29 */ |
|
587 0x002a,/* 0x2a */ |
|
588 0x002b,/* 0x2b */ |
|
589 0x002c,/* 0x2c */ |
|
590 0x002d,/* 0x2d */ |
|
591 0x002e,/* 0x2e */ |
|
592 0x002f,/* 0x2f */ |
|
593 0x0030,/* 0x30 */ |
|
594 0x0031,/* 0x31 */ |
|
595 0x0032,/* 0x32 */ |
|
596 0x0033,/* 0x33 */ |
|
597 0x0034,/* 0x34 */ |
|
598 0x0035,/* 0x35 */ |
|
599 0x0036,/* 0x36 */ |
|
600 0x0037,/* 0x37 */ |
|
601 0x0038,/* 0x38 */ |
|
602 0x0039,/* 0x39 */ |
|
603 0x003A,/* 0x3A */ |
|
604 0x003B,/* 0x3B */ |
|
605 0x003c,/* 0x3c */ |
|
606 0x003d,/* 0x3d */ |
|
607 0x003e,/* 0x3e */ |
|
608 0x003f,/* 0x3f */ |
|
609 0x0040,/* 0x40 */ |
|
610 0x0041,/* 0x41 */ |
|
611 0x0042,/* 0x42 */ |
|
612 0x0043,/* 0x43 */ |
|
613 0x0044,/* 0x44 */ |
|
614 0x0045,/* 0x45 */ |
|
615 0x0046,/* 0x46 */ |
|
616 0x0047,/* 0x47 */ |
|
617 0x0048,/* 0x48 */ |
|
618 0x0049,/* 0x49 */ |
|
619 0x004a,/* 0x4a */ |
|
620 0x004b,/* 0x4b */ |
|
621 0x004c,/* 0x4c */ |
|
622 0x004d,/* 0x4d */ |
|
623 0x004e,/* 0x4e */ |
|
624 0x004f,/* 0x4f */ |
|
625 0x0050,/* 0x50 */ |
|
626 0x0051,/* 0x51 */ |
|
627 0x0052,/* 0x52 */ |
|
628 0x0053,/* 0x53 */ |
|
629 0x0054,/* 0x54 */ |
|
630 0x0055,/* 0x55 */ |
|
631 0x0056,/* 0x56 */ |
|
632 0x0057,/* 0x57 */ |
|
633 0x0058,/* 0x58 */ |
|
634 0x0059,/* 0x59 */ |
|
635 0x005a,/* 0x5a */ |
|
636 0x005b,/* 0x5b */ |
|
637 0x005c,/* 0x5c */ |
|
638 0x005d,/* 0x5d */ |
|
639 0x005e,/* 0x5e */ |
|
640 0x005f,/* 0x5f */ |
|
641 0x0060,/* 0x60 */ |
|
642 0x0061,/* 0x61 */ |
|
643 0x0062,/* 0x62 */ |
|
644 0x0063,/* 0x63 */ |
|
645 0x0064,/* 0x64 */ |
|
646 0x0065,/* 0x65 */ |
|
647 0x0066,/* 0x66 */ |
|
648 0x0067,/* 0x67 */ |
|
649 0x0068,/* 0x68 */ |
|
650 0x0069,/* 0x69 */ |
|
651 0x006a,/* 0x6a */ |
|
652 0x006b,/* 0x6b */ |
|
653 0x006c,/* 0x6c */ |
|
654 0x006d,/* 0x6d */ |
|
655 0x006e,/* 0x6e */ |
|
656 0x006f,/* 0x6f */ |
|
657 0x0070,/* 0x70 */ |
|
658 0x0071,/* 0x71 */ |
|
659 0x0072,/* 0x72 */ |
|
660 0x0073,/* 0x73 */ |
|
661 0x0074,/* 0x74 */ |
|
662 0x0075,/* 0x75 */ |
|
663 0x0076,/* 0x76 */ |
|
664 0x0077,/* 0x77 */ |
|
665 0x0078,/* 0x78 */ |
|
666 0x0079,/* 0x79 */ |
|
667 0x007a,/* 0x7a */ |
|
668 0x007b,/* 0x7b */ |
|
669 0x007c,/* 0x7c */ |
|
670 0x007d,/* 0x7d */ |
|
671 0x007e,/* 0x7e */ |
|
672 0x007f,/* 0x7f */ |
|
673 0x0080,/* 0x80 */ |
|
674 0x0081,/* 0x81 */ |
|
675 0x0082,/* 0x82 */ |
|
676 0x0083,/* 0x83 */ |
|
677 0x0084,/* 0x84 */ |
|
678 0x0085,/* 0x85 */ |
|
679 0x0086,/* 0x86 */ |
|
680 0x0087,/* 0x87 */ |
|
681 0x0088,/* 0x88 */ |
|
682 0x0089,/* 0x89 */ |
|
683 0x008a,/* 0x8a */ |
|
684 0x008b,/* 0x8b */ |
|
685 0x008c,/* 0x8c */ |
|
686 0x008d,/* 0x8d */ |
|
687 0x008e,/* 0x8e */ |
|
688 0x008f,/* 0x8f */ |
|
689 0x0090,/* 0x90 */ |
|
690 0x0091,/* 0x91 */ |
|
691 0x0092,/* 0x92 */ |
|
692 0x0093,/* 0x93 */ |
|
693 0x0094,/* 0x94 */ |
|
694 0x0095,/* 0x95 */ |
|
695 0x0096,/* 0x96 */ |
|
696 0x0097,/* 0x97 */ |
|
697 0x0098,/* 0x98 */ |
|
698 0x0099,/* 0x99 */ |
|
699 0x009a,/* 0x9a */ |
|
700 0x009b,/* 0x9b */ |
|
701 0x009c,/* 0x9c */ |
|
702 0x009d,/* 0x9d */ |
|
703 0x009e,/* 0x9e */ |
|
704 0x009f,/* 0x9f */ |
|
705 0x00A0,/* 0xa0 */ |
|
706 0x0901,/* 0xa1 */ |
|
707 0x0902,/* 0xa2 */ |
|
708 0x0903,/* 0xa3 */ |
|
709 0x0905,/* 0xa4 */ |
|
710 0x0906,/* 0xa5 */ |
|
711 0x0907,/* 0xa6 */ |
|
712 0x0908,/* 0xa7 */ |
|
713 0x0909,/* 0xa8 */ |
|
714 0x090a,/* 0xa9 */ |
|
715 0x090b,/* 0xaa */ |
|
716 0x090e,/* 0xab */ |
|
717 0x090f,/* 0xac */ |
|
718 0x0910,/* 0xad */ |
|
719 0x090d,/* 0xae */ |
|
720 0x0912,/* 0xaf */ |
|
721 0x0913,/* 0xb0 */ |
|
722 0x0914,/* 0xb1 */ |
|
723 0x0911,/* 0xb2 */ |
|
724 0x0915,/* 0xb3 */ |
|
725 0x0916,/* 0xb4 */ |
|
726 0x0917,/* 0xb5 */ |
|
727 0x0918,/* 0xb6 */ |
|
728 0x0919,/* 0xb7 */ |
|
729 0x091a,/* 0xb8 */ |
|
730 0x091b,/* 0xb9 */ |
|
731 0x091c,/* 0xba */ |
|
732 0x091d,/* 0xbb */ |
|
733 0x091e,/* 0xbc */ |
|
734 0x091f,/* 0xbd */ |
|
735 0x0920,/* 0xbe */ |
|
736 0x0921,/* 0xbf */ |
|
737 0x0922,/* 0xc0 */ |
|
738 0x0923,/* 0xc1 */ |
|
739 0x0924,/* 0xc2 */ |
|
740 0x0925,/* 0xc3 */ |
|
741 0x0926,/* 0xc4 */ |
|
742 0x0927,/* 0xc5 */ |
|
743 0x0928,/* 0xc6 */ |
|
744 0x0929,/* 0xc7 */ |
|
745 0x092a,/* 0xc8 */ |
|
746 0x092b,/* 0xc9 */ |
|
747 0x092c,/* 0xca */ |
|
748 0x092d,/* 0xcb */ |
|
749 0x092e,/* 0xcc */ |
|
750 0x092f,/* 0xcd */ |
|
751 0x095f,/* 0xce */ |
|
752 0x0930,/* 0xcf */ |
|
753 0x0931,/* 0xd0 */ |
|
754 0x0932,/* 0xd1 */ |
|
755 0x0933,/* 0xd2 */ |
|
756 0x0934,/* 0xd3 */ |
|
757 0x0935,/* 0xd4 */ |
|
758 0x0936,/* 0xd5 */ |
|
759 0x0937,/* 0xd6 */ |
|
760 0x0938,/* 0xd7 */ |
|
761 0x0939,/* 0xd8 */ |
|
762 0x200D,/* 0xd9 */ |
|
763 0x093e,/* 0xda */ |
|
764 0x093f,/* 0xdb */ |
|
765 0x0940,/* 0xdc */ |
|
766 0x0941,/* 0xdd */ |
|
767 0x0942,/* 0xde */ |
|
768 0x0943,/* 0xdf */ |
|
769 0x0946,/* 0xe0 */ |
|
770 0x0947,/* 0xe1 */ |
|
771 0x0948,/* 0xe2 */ |
|
772 0x0945,/* 0xe3 */ |
|
773 0x094a,/* 0xe4 */ |
|
774 0x094b,/* 0xe5 */ |
|
775 0x094c,/* 0xe6 */ |
|
776 0x0949,/* 0xe7 */ |
|
777 0x094d,/* 0xe8 */ |
|
778 0x093c,/* 0xe9 */ |
|
779 0x0964,/* 0xea */ |
|
780 0xFFFF,/* 0xeb */ |
|
781 0xFFFF,/* 0xec */ |
|
782 0xFFFF,/* 0xed */ |
|
783 0xFFFF,/* 0xee */ |
|
784 0xFFFF,/* 0xef */ |
|
785 0xFFFF,/* 0xf0 */ |
|
786 0x0966,/* 0xf1 */ |
|
787 0x0967,/* 0xf2 */ |
|
788 0x0968,/* 0xf3 */ |
|
789 0x0969,/* 0xf4 */ |
|
790 0x096a,/* 0xf5 */ |
|
791 0x096b,/* 0xf6 */ |
|
792 0x096c,/* 0xf7 */ |
|
793 0x096d,/* 0xf8 */ |
|
794 0x096e,/* 0xf9 */ |
|
795 0x096f,/* 0xfa */ |
|
796 0xFFFF,/* 0xfb */ |
|
797 0xFFFF,/* 0xfc */ |
|
798 0xFFFF,/* 0xfd */ |
|
799 0xFFFF,/* 0xfe */ |
|
800 0xFFFF /* 0xff */ |
|
801 }; |
|
802 |
|
803 static const uint16_t vowelSignESpecialCases[][2]={ |
|
804 { 2 /*length of array*/ , 0 }, |
|
805 { 0xA4 , 0x0904 }, |
|
806 }; |
|
807 |
|
808 static const uint16_t nuktaSpecialCases[][2]={ |
|
809 { 16 /*length of array*/ , 0 }, |
|
810 { 0xA6 , 0x090c }, |
|
811 { 0xEA , 0x093D }, |
|
812 { 0xDF , 0x0944 }, |
|
813 { 0xA1 , 0x0950 }, |
|
814 { 0xb3 , 0x0958 }, |
|
815 { 0xb4 , 0x0959 }, |
|
816 { 0xb5 , 0x095a }, |
|
817 { 0xba , 0x095b }, |
|
818 { 0xbf , 0x095c }, |
|
819 { 0xC0 , 0x095d }, |
|
820 { 0xc9 , 0x095e }, |
|
821 { 0xAA , 0x0960 }, |
|
822 { 0xA7 , 0x0961 }, |
|
823 { 0xDB , 0x0962 }, |
|
824 { 0xDC , 0x0963 }, |
|
825 }; |
|
826 |
|
827 |
|
828 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ |
|
829 int32_t offset = (int32_t)(source - args->source-1); \ |
|
830 /* write the targetUniChar to target */ \ |
|
831 if(target < targetLimit){ \ |
|
832 if(targetByteUnit <= 0xFF){ \ |
|
833 *(target)++ = (uint8_t)(targetByteUnit); \ |
|
834 if(offsets){ \ |
|
835 *(offsets++) = offset; \ |
|
836 } \ |
|
837 }else{ \ |
|
838 if (targetByteUnit > 0xFFFF) { \ |
|
839 *(target)++ = (uint8_t)(targetByteUnit>>16); \ |
|
840 if (offsets) { \ |
|
841 --offset; \ |
|
842 *(offsets++) = offset; \ |
|
843 } \ |
|
844 } \ |
|
845 if (!(target < targetLimit)) { \ |
|
846 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ |
|
847 (uint8_t)(targetByteUnit >> 8); \ |
|
848 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ |
|
849 (uint8_t)targetByteUnit; \ |
|
850 *err = U_BUFFER_OVERFLOW_ERROR; \ |
|
851 } else { \ |
|
852 *(target)++ = (uint8_t)(targetByteUnit>>8); \ |
|
853 if(offsets){ \ |
|
854 *(offsets++) = offset; \ |
|
855 } \ |
|
856 if(target < targetLimit){ \ |
|
857 *(target)++ = (uint8_t) targetByteUnit; \ |
|
858 if(offsets){ \ |
|
859 *(offsets++) = offset ; \ |
|
860 } \ |
|
861 }else{ \ |
|
862 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ |
|
863 (uint8_t) (targetByteUnit); \ |
|
864 *err = U_BUFFER_OVERFLOW_ERROR; \ |
|
865 } \ |
|
866 } \ |
|
867 } \ |
|
868 }else{ \ |
|
869 if (targetByteUnit & 0xFF0000) { \ |
|
870 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ |
|
871 (uint8_t) (targetByteUnit >>16); \ |
|
872 } \ |
|
873 if(targetByteUnit & 0xFF00){ \ |
|
874 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ |
|
875 (uint8_t) (targetByteUnit >>8); \ |
|
876 } \ |
|
877 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ |
|
878 (uint8_t) (targetByteUnit); \ |
|
879 *err = U_BUFFER_OVERFLOW_ERROR; \ |
|
880 } \ |
|
881 } |
|
882 |
|
883 /* Rules: |
|
884 * Explicit Halant : |
|
885 * <HALANT> + <ZWNJ> |
|
886 * Soft Halant : |
|
887 * <HALANT> + <ZWJ> |
|
888 */ |
|
889 |
|
890 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( |
|
891 UConverterFromUnicodeArgs * args, UErrorCode * err) { |
|
892 const UChar *source = args->source; |
|
893 const UChar *sourceLimit = args->sourceLimit; |
|
894 unsigned char *target = (unsigned char *) args->target; |
|
895 unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
|
896 int32_t* offsets = args->offsets; |
|
897 uint32_t targetByteUnit = 0x0000; |
|
898 UChar32 sourceChar = 0x0000; |
|
899 UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ |
|
900 UConverterDataISCII *converterData; |
|
901 uint16_t newDelta=0; |
|
902 uint16_t range = 0; |
|
903 UBool deltaChanged = FALSE; |
|
904 |
|
905 if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { |
|
906 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
907 return; |
|
908 } |
|
909 /* initialize data */ |
|
910 converterData=(UConverterDataISCII*)args->converter->extraInfo; |
|
911 newDelta=converterData->currentDeltaFromUnicode; |
|
912 range = (uint16_t)(newDelta/DELTA); |
|
913 |
|
914 if ((sourceChar = args->converter->fromUChar32)!=0) { |
|
915 goto getTrail; |
|
916 } |
|
917 |
|
918 /*writing the char to the output stream */ |
|
919 while (source < sourceLimit) { |
|
920 /* Write the language code following LF only if LF is not the last character. */ |
|
921 if (args->converter->fromUnicodeStatus == LF) { |
|
922 targetByteUnit = ATR<<8; |
|
923 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; |
|
924 args->converter->fromUnicodeStatus = 0x0000; |
|
925 /* now append ATR and language code */ |
|
926 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); |
|
927 if (U_FAILURE(*err)) { |
|
928 break; |
|
929 } |
|
930 } |
|
931 |
|
932 sourceChar = *source++; |
|
933 tempContextFromUnicode = converterData->contextCharFromUnicode; |
|
934 |
|
935 targetByteUnit = missingCharMarker; |
|
936 |
|
937 /*check if input is in ASCII and C0 control codes range*/ |
|
938 if (sourceChar <= ASCII_END) { |
|
939 args->converter->fromUnicodeStatus = sourceChar; |
|
940 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); |
|
941 if (U_FAILURE(*err)) { |
|
942 break; |
|
943 } |
|
944 continue; |
|
945 } |
|
946 switch (sourceChar) { |
|
947 case ZWNJ: |
|
948 /* contextChar has HALANT */ |
|
949 if (converterData->contextCharFromUnicode) { |
|
950 converterData->contextCharFromUnicode = 0x00; |
|
951 targetByteUnit = ISCII_HALANT; |
|
952 } else { |
|
953 /* consume ZWNJ and continue */ |
|
954 converterData->contextCharFromUnicode = 0x00; |
|
955 continue; |
|
956 } |
|
957 break; |
|
958 case ZWJ: |
|
959 /* contextChar has HALANT */ |
|
960 if (converterData->contextCharFromUnicode) { |
|
961 targetByteUnit = ISCII_NUKTA; |
|
962 } else { |
|
963 targetByteUnit =ISCII_INV; |
|
964 } |
|
965 converterData->contextCharFromUnicode = 0x00; |
|
966 break; |
|
967 default: |
|
968 /* is the sourceChar in the INDIC_RANGE? */ |
|
969 if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { |
|
970 /* Danda and Double Danda are valid in Northern scripts.. since Unicode |
|
971 * does not include these codepoints in all Northern scrips we need to |
|
972 * filter them out |
|
973 */ |
|
974 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { |
|
975 /* find out to which block the souceChar belongs*/ |
|
976 range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); |
|
977 newDelta =(uint16_t)(range*DELTA); |
|
978 |
|
979 /* Now are we in the same block as the previous? */ |
|
980 if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { |
|
981 converterData->currentDeltaFromUnicode = newDelta; |
|
982 converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; |
|
983 deltaChanged =TRUE; |
|
984 converterData->isFirstBuffer=FALSE; |
|
985 } |
|
986 |
|
987 if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { |
|
988 if (sourceChar == PNJ_TIPPI) { |
|
989 /* Make sure Tippi is converterd to Bindi. */ |
|
990 sourceChar = PNJ_BINDI; |
|
991 } else if (sourceChar == PNJ_ADHAK) { |
|
992 /* This is for consonant cluster handling. */ |
|
993 converterData->contextCharFromUnicode = PNJ_ADHAK; |
|
994 } |
|
995 |
|
996 } |
|
997 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ |
|
998 /* now subtract the new delta from sourceChar*/ |
|
999 sourceChar -= converterData->currentDeltaFromUnicode; |
|
1000 } |
|
1001 |
|
1002 /* get the target byte unit */ |
|
1003 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; |
|
1004 |
|
1005 /* is the code point valid in current script? */ |
|
1006 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { |
|
1007 /* Vocallic RR is assigned in ISCII Telugu and Unicode */ |
|
1008 if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { |
|
1009 targetByteUnit=missingCharMarker; |
|
1010 } |
|
1011 } |
|
1012 |
|
1013 if (deltaChanged) { |
|
1014 /* we are in a script block which is different than |
|
1015 * previous sourceChar's script block write ATR and language codes |
|
1016 */ |
|
1017 uint32_t temp=0; |
|
1018 temp =(uint16_t)(ATR<<8); |
|
1019 temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); |
|
1020 /* reset */ |
|
1021 deltaChanged=FALSE; |
|
1022 /* now append ATR and language code */ |
|
1023 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); |
|
1024 if (U_FAILURE(*err)) { |
|
1025 break; |
|
1026 } |
|
1027 } |
|
1028 |
|
1029 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { |
|
1030 continue; |
|
1031 } |
|
1032 } |
|
1033 /* reset context char */ |
|
1034 converterData->contextCharFromUnicode = 0x00; |
|
1035 break; |
|
1036 } |
|
1037 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { |
|
1038 /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ |
|
1039 /* reset context char */ |
|
1040 converterData->contextCharFromUnicode = 0x0000; |
|
1041 targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; |
|
1042 /* write targetByteUnit to target */ |
|
1043 WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); |
|
1044 if (U_FAILURE(*err)) { |
|
1045 break; |
|
1046 } |
|
1047 } else if (targetByteUnit != missingCharMarker) { |
|
1048 if (targetByteUnit==ISCII_HALANT) { |
|
1049 converterData->contextCharFromUnicode = (UChar)targetByteUnit; |
|
1050 } |
|
1051 /* write targetByteUnit to target*/ |
|
1052 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); |
|
1053 if (U_FAILURE(*err)) { |
|
1054 break; |
|
1055 } |
|
1056 } else { |
|
1057 /* oops.. the code point is unassigned */ |
|
1058 /*check if the char is a First surrogate*/ |
|
1059 if (U16_IS_SURROGATE(sourceChar)) { |
|
1060 if (U16_IS_SURROGATE_LEAD(sourceChar)) { |
|
1061 getTrail: |
|
1062 /*look ahead to find the trail surrogate*/ |
|
1063 if (source < sourceLimit) { |
|
1064 /* test the following code unit */ |
|
1065 UChar trail= (*source); |
|
1066 if (U16_IS_TRAIL(trail)) { |
|
1067 source++; |
|
1068 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); |
|
1069 *err =U_INVALID_CHAR_FOUND; |
|
1070 /* convert this surrogate code point */ |
|
1071 /* exit this condition tree */ |
|
1072 } else { |
|
1073 /* this is an unmatched lead code unit (1st surrogate) */ |
|
1074 /* callback(illegal) */ |
|
1075 *err=U_ILLEGAL_CHAR_FOUND; |
|
1076 } |
|
1077 } else { |
|
1078 /* no more input */ |
|
1079 *err = U_ZERO_ERROR; |
|
1080 } |
|
1081 } else { |
|
1082 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
1083 /* callback(illegal) */ |
|
1084 *err=U_ILLEGAL_CHAR_FOUND; |
|
1085 } |
|
1086 } else { |
|
1087 /* callback(unassigned) for a BMP code point */ |
|
1088 *err = U_INVALID_CHAR_FOUND; |
|
1089 } |
|
1090 |
|
1091 args->converter->fromUChar32=sourceChar; |
|
1092 break; |
|
1093 } |
|
1094 }/* end while(mySourceIndex<mySourceLength) */ |
|
1095 |
|
1096 /*save the state and return */ |
|
1097 args->source = source; |
|
1098 args->target = (char*)target; |
|
1099 } |
|
1100 |
|
1101 static const uint16_t lookupTable[][2]={ |
|
1102 { ZERO, ZERO }, /*DEFALT*/ |
|
1103 { ZERO, ZERO }, /*ROMAN*/ |
|
1104 { DEVANAGARI, DEV_MASK }, |
|
1105 { BENGALI, BNG_MASK }, |
|
1106 { TAMIL, TML_MASK }, |
|
1107 { TELUGU, KND_MASK }, |
|
1108 { BENGALI, BNG_MASK }, |
|
1109 { ORIYA, ORI_MASK }, |
|
1110 { KANNADA, KND_MASK }, |
|
1111 { MALAYALAM, MLM_MASK }, |
|
1112 { GUJARATI, GJR_MASK }, |
|
1113 { GURMUKHI, PNJ_MASK } |
|
1114 }; |
|
1115 |
|
1116 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ |
|
1117 /* add offset to current Indic Block */ \ |
|
1118 if(targetUniChar>ASCII_END && \ |
|
1119 targetUniChar != ZWJ && \ |
|
1120 targetUniChar != ZWNJ && \ |
|
1121 targetUniChar != DANDA && \ |
|
1122 targetUniChar != DOUBLE_DANDA){ \ |
|
1123 \ |
|
1124 targetUniChar+=(uint16_t)(delta); \ |
|
1125 } \ |
|
1126 /* now write the targetUniChar */ \ |
|
1127 if(target<args->targetLimit){ \ |
|
1128 *(target)++ = (UChar)targetUniChar; \ |
|
1129 if(offsets){ \ |
|
1130 *(offsets)++ = (int32_t)(offset); \ |
|
1131 } \ |
|
1132 }else{ \ |
|
1133 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ |
|
1134 (UChar)targetUniChar; \ |
|
1135 *err = U_BUFFER_OVERFLOW_ERROR; \ |
|
1136 } \ |
|
1137 } |
|
1138 |
|
1139 #define GET_MAPPING(sourceChar,targetUniChar,data){ \ |
|
1140 targetUniChar = toUnicodeTable[(sourceChar)] ; \ |
|
1141 /* is the code point valid in current script? */ \ |
|
1142 if(sourceChar> ASCII_END && \ |
|
1143 (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ |
|
1144 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ |
|
1145 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ |
|
1146 targetUniChar!=VOCALLIC_RR){ \ |
|
1147 targetUniChar=missingCharMarker; \ |
|
1148 } \ |
|
1149 } \ |
|
1150 } |
|
1151 |
|
1152 /*********** |
|
1153 * Rules for ISCII to Unicode converter |
|
1154 * ISCII is stateful encoding. To convert ISCII bytes to Unicode, |
|
1155 * which has both precomposed and decomposed forms characters |
|
1156 * pre-context and post-context need to be considered. |
|
1157 * |
|
1158 * Post context |
|
1159 * i) ATR : Attribute code is used to declare the font and script switching. |
|
1160 * Currently we only switch scripts and font codes consumed without generating an error |
|
1161 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, |
|
1162 * obsolete characters |
|
1163 * Pre context |
|
1164 * i) Halant: if preceeded by a halant then it is a explicit halant |
|
1165 * ii) Nukta : |
|
1166 * a) if preceeded by a halant then it is a soft halant |
|
1167 * b) if preceeded by specific consonants and the ligatures have pre-composed |
|
1168 * characters in Unicode then convert to pre-composed characters |
|
1169 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda |
|
1170 * |
|
1171 */ |
|
1172 |
|
1173 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { |
|
1174 const char *source = ( char *) args->source; |
|
1175 UChar *target = args->target; |
|
1176 const char *sourceLimit = args->sourceLimit; |
|
1177 const UChar* targetLimit = args->targetLimit; |
|
1178 uint32_t targetUniChar = 0x0000; |
|
1179 uint8_t sourceChar = 0x0000; |
|
1180 UConverterDataISCII* data; |
|
1181 UChar32* toUnicodeStatus=NULL; |
|
1182 UChar32 tempTargetUniChar = 0x0000; |
|
1183 UChar* contextCharToUnicode= NULL; |
|
1184 UBool found; |
|
1185 int i; |
|
1186 int offset = 0; |
|
1187 |
|
1188 if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { |
|
1189 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
1190 return; |
|
1191 } |
|
1192 |
|
1193 data = (UConverterDataISCII*)(args->converter->extraInfo); |
|
1194 contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ |
|
1195 toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ |
|
1196 |
|
1197 while (U_SUCCESS(*err) && source<sourceLimit) { |
|
1198 |
|
1199 targetUniChar = missingCharMarker; |
|
1200 |
|
1201 if (target < targetLimit) { |
|
1202 sourceChar = (unsigned char)*(source)++; |
|
1203 |
|
1204 /* look at the post-context preform special processing */ |
|
1205 if (*contextCharToUnicode==ATR) { |
|
1206 |
|
1207 /* If we have ATR in *contextCharToUnicode then we need to change our |
|
1208 * state to the Indic Script specified by sourceChar |
|
1209 */ |
|
1210 |
|
1211 /* check if the sourceChar is supported script range*/ |
|
1212 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { |
|
1213 data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); |
|
1214 data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; |
|
1215 } else if (sourceChar==DEF) { |
|
1216 /* switch back to default */ |
|
1217 data->currentDeltaToUnicode = data->defDeltaToUnicode; |
|
1218 data->currentMaskToUnicode = data->defMaskToUnicode; |
|
1219 } else { |
|
1220 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { |
|
1221 /* these are display codes consume and continue */ |
|
1222 } else { |
|
1223 *err =U_ILLEGAL_CHAR_FOUND; |
|
1224 /* reset */ |
|
1225 *contextCharToUnicode=NO_CHAR_MARKER; |
|
1226 goto CALLBACK; |
|
1227 } |
|
1228 } |
|
1229 |
|
1230 /* reset */ |
|
1231 *contextCharToUnicode=NO_CHAR_MARKER; |
|
1232 |
|
1233 continue; |
|
1234 |
|
1235 } else if (*contextCharToUnicode==EXT) { |
|
1236 /* check if sourceChar is in 0xA1-0xEE range */ |
|
1237 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { |
|
1238 /* We currently support only Anudatta and Devanagari abbreviation sign */ |
|
1239 if (sourceChar==0xBF || sourceChar == 0xB8) { |
|
1240 targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; |
|
1241 |
|
1242 /* find out if the mapping is valid in this state */ |
|
1243 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { |
|
1244 *contextCharToUnicode= NO_CHAR_MARKER; |
|
1245 |
|
1246 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1247 if (data->prevToUnicodeStatus) { |
|
1248 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1249 data->prevToUnicodeStatus = 0x0000; |
|
1250 } |
|
1251 /* write to target */ |
|
1252 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
|
1253 |
|
1254 continue; |
|
1255 } |
|
1256 } |
|
1257 /* byte unit is unassigned */ |
|
1258 targetUniChar = missingCharMarker; |
|
1259 *err= U_INVALID_CHAR_FOUND; |
|
1260 } else { |
|
1261 /* only 0xA1 - 0xEE are legal after EXT char */ |
|
1262 *contextCharToUnicode= NO_CHAR_MARKER; |
|
1263 *err = U_ILLEGAL_CHAR_FOUND; |
|
1264 } |
|
1265 goto CALLBACK; |
|
1266 } else if (*contextCharToUnicode==ISCII_INV) { |
|
1267 if (sourceChar==ISCII_HALANT) { |
|
1268 targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ |
|
1269 } else { |
|
1270 targetUniChar = ZWJ; |
|
1271 } |
|
1272 |
|
1273 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1274 if (data->prevToUnicodeStatus) { |
|
1275 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1276 data->prevToUnicodeStatus = 0x0000; |
|
1277 } |
|
1278 /* write to target */ |
|
1279 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
|
1280 /* reset */ |
|
1281 *contextCharToUnicode=NO_CHAR_MARKER; |
|
1282 } |
|
1283 |
|
1284 /* look at the pre-context and perform special processing */ |
|
1285 switch (sourceChar) { |
|
1286 case ISCII_INV: |
|
1287 case EXT: /*falls through*/ |
|
1288 case ATR: |
|
1289 *contextCharToUnicode = (UChar)sourceChar; |
|
1290 |
|
1291 if (*toUnicodeStatus != missingCharMarker) { |
|
1292 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1293 if (data->prevToUnicodeStatus) { |
|
1294 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1295 data->prevToUnicodeStatus = 0x0000; |
|
1296 } |
|
1297 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
|
1298 *toUnicodeStatus = missingCharMarker; |
|
1299 } |
|
1300 continue; |
|
1301 case ISCII_DANDA: |
|
1302 /* handle double danda*/ |
|
1303 if (*contextCharToUnicode== ISCII_DANDA) { |
|
1304 targetUniChar = DOUBLE_DANDA; |
|
1305 /* clear the context */ |
|
1306 *contextCharToUnicode = NO_CHAR_MARKER; |
|
1307 *toUnicodeStatus = missingCharMarker; |
|
1308 } else { |
|
1309 GET_MAPPING(sourceChar,targetUniChar,data); |
|
1310 *contextCharToUnicode = sourceChar; |
|
1311 } |
|
1312 break; |
|
1313 case ISCII_HALANT: |
|
1314 /* handle explicit halant */ |
|
1315 if (*contextCharToUnicode == ISCII_HALANT) { |
|
1316 targetUniChar = ZWNJ; |
|
1317 /* clear the context */ |
|
1318 *contextCharToUnicode = NO_CHAR_MARKER; |
|
1319 } else { |
|
1320 GET_MAPPING(sourceChar,targetUniChar,data); |
|
1321 *contextCharToUnicode = sourceChar; |
|
1322 } |
|
1323 break; |
|
1324 case 0x0A: |
|
1325 /* fall through */ |
|
1326 case 0x0D: |
|
1327 data->resetToDefaultToUnicode = TRUE; |
|
1328 GET_MAPPING(sourceChar,targetUniChar,data) |
|
1329 ; |
|
1330 *contextCharToUnicode = sourceChar; |
|
1331 break; |
|
1332 |
|
1333 case ISCII_VOWEL_SIGN_E: |
|
1334 i=1; |
|
1335 found=FALSE; |
|
1336 for (; i<vowelSignESpecialCases[0][0]; i++) { |
|
1337 U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0])); |
|
1338 if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) { |
|
1339 targetUniChar=vowelSignESpecialCases[i][1]; |
|
1340 found=TRUE; |
|
1341 break; |
|
1342 } |
|
1343 } |
|
1344 if (found) { |
|
1345 /* find out if the mapping is valid in this state */ |
|
1346 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { |
|
1347 /*targetUniChar += data->currentDeltaToUnicode ;*/ |
|
1348 *contextCharToUnicode= NO_CHAR_MARKER; |
|
1349 *toUnicodeStatus = missingCharMarker; |
|
1350 break; |
|
1351 } |
|
1352 } |
|
1353 GET_MAPPING(sourceChar,targetUniChar,data); |
|
1354 *contextCharToUnicode = sourceChar; |
|
1355 break; |
|
1356 |
|
1357 case ISCII_NUKTA: |
|
1358 /* handle soft halant */ |
|
1359 if (*contextCharToUnicode == ISCII_HALANT) { |
|
1360 targetUniChar = ZWJ; |
|
1361 /* clear the context */ |
|
1362 *contextCharToUnicode = NO_CHAR_MARKER; |
|
1363 break; |
|
1364 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { |
|
1365 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1366 if (data->prevToUnicodeStatus) { |
|
1367 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1368 data->prevToUnicodeStatus = 0x0000; |
|
1369 } |
|
1370 /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. |
|
1371 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). |
|
1372 */ |
|
1373 targetUniChar = PNJ_RRA; |
|
1374 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); |
|
1375 if (U_SUCCESS(*err)) { |
|
1376 targetUniChar = PNJ_SIGN_VIRAMA; |
|
1377 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); |
|
1378 if (U_SUCCESS(*err)) { |
|
1379 targetUniChar = PNJ_HA; |
|
1380 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); |
|
1381 } else { |
|
1382 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; |
|
1383 } |
|
1384 } else { |
|
1385 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; |
|
1386 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; |
|
1387 } |
|
1388 *toUnicodeStatus = missingCharMarker; |
|
1389 data->contextCharToUnicode = NO_CHAR_MARKER; |
|
1390 continue; |
|
1391 } else { |
|
1392 /* try to handle <CHAR> + ISCII_NUKTA special mappings */ |
|
1393 i=1; |
|
1394 found =FALSE; |
|
1395 for (; i<nuktaSpecialCases[0][0]; i++) { |
|
1396 if (nuktaSpecialCases[i][0]==(uint8_t) |
|
1397 *contextCharToUnicode) { |
|
1398 targetUniChar=nuktaSpecialCases[i][1]; |
|
1399 found =TRUE; |
|
1400 break; |
|
1401 } |
|
1402 } |
|
1403 if (found) { |
|
1404 /* find out if the mapping is valid in this state */ |
|
1405 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { |
|
1406 /*targetUniChar += data->currentDeltaToUnicode ;*/ |
|
1407 *contextCharToUnicode= NO_CHAR_MARKER; |
|
1408 *toUnicodeStatus = missingCharMarker; |
|
1409 if (data->currentDeltaToUnicode == PNJ_DELTA) { |
|
1410 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1411 if (data->prevToUnicodeStatus) { |
|
1412 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1413 data->prevToUnicodeStatus = 0x0000; |
|
1414 } |
|
1415 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); |
|
1416 continue; |
|
1417 } |
|
1418 break; |
|
1419 } |
|
1420 /* else fall through to default */ |
|
1421 } |
|
1422 /* else fall through to default */ |
|
1423 } |
|
1424 default:GET_MAPPING(sourceChar,targetUniChar,data) |
|
1425 ; |
|
1426 *contextCharToUnicode = sourceChar; |
|
1427 break; |
|
1428 } |
|
1429 |
|
1430 if (*toUnicodeStatus != missingCharMarker) { |
|
1431 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ |
|
1432 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && |
|
1433 (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) { |
|
1434 /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ |
|
1435 offset = (int)(source-args->source - 3); |
|
1436 tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ |
|
1437 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); |
|
1438 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); |
|
1439 data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ |
|
1440 *toUnicodeStatus = missingCharMarker; |
|
1441 continue; |
|
1442 } else { |
|
1443 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ |
|
1444 if (data->prevToUnicodeStatus) { |
|
1445 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); |
|
1446 data->prevToUnicodeStatus = 0x0000; |
|
1447 } |
|
1448 /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. |
|
1449 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. |
|
1450 */ |
|
1451 if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { |
|
1452 targetUniChar = PNJ_TIPPI - PNJ_DELTA; |
|
1453 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); |
|
1454 } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { |
|
1455 /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ |
|
1456 data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; |
|
1457 } else { |
|
1458 /* write the previously mapped codepoint */ |
|
1459 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
|
1460 } |
|
1461 } |
|
1462 *toUnicodeStatus = missingCharMarker; |
|
1463 } |
|
1464 |
|
1465 if (targetUniChar != missingCharMarker) { |
|
1466 /* now save the targetUniChar for delayed write */ |
|
1467 *toUnicodeStatus = (UChar) targetUniChar; |
|
1468 if (data->resetToDefaultToUnicode==TRUE) { |
|
1469 data->currentDeltaToUnicode = data->defDeltaToUnicode; |
|
1470 data->currentMaskToUnicode = data->defMaskToUnicode; |
|
1471 data->resetToDefaultToUnicode=FALSE; |
|
1472 } |
|
1473 } else { |
|
1474 |
|
1475 /* we reach here only if targetUniChar == missingCharMarker |
|
1476 * so assign codes to reason and err |
|
1477 */ |
|
1478 *err = U_INVALID_CHAR_FOUND; |
|
1479 CALLBACK: |
|
1480 args->converter->toUBytes[0] = (uint8_t) sourceChar; |
|
1481 args->converter->toULength = 1; |
|
1482 break; |
|
1483 } |
|
1484 |
|
1485 } else { |
|
1486 *err =U_BUFFER_OVERFLOW_ERROR; |
|
1487 break; |
|
1488 } |
|
1489 } |
|
1490 |
|
1491 if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { |
|
1492 /* end of the input stream */ |
|
1493 UConverter *cnv = args->converter; |
|
1494 |
|
1495 if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { |
|
1496 /* set toUBytes[] */ |
|
1497 cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; |
|
1498 cnv->toULength = 1; |
|
1499 |
|
1500 /* avoid looping on truncated sequences */ |
|
1501 *contextCharToUnicode = NO_CHAR_MARKER; |
|
1502 } else { |
|
1503 cnv->toULength = 0; |
|
1504 } |
|
1505 |
|
1506 if (*toUnicodeStatus != missingCharMarker) { |
|
1507 /* output a remaining target character */ |
|
1508 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); |
|
1509 *toUnicodeStatus = missingCharMarker; |
|
1510 } |
|
1511 } |
|
1512 |
|
1513 args->target = target; |
|
1514 args->source = source; |
|
1515 } |
|
1516 |
|
1517 /* structure for SafeClone calculations */ |
|
1518 struct cloneISCIIStruct { |
|
1519 UConverter cnv; |
|
1520 UConverterDataISCII mydata; |
|
1521 }; |
|
1522 |
|
1523 static UConverter * |
|
1524 _ISCII_SafeClone(const UConverter *cnv, |
|
1525 void *stackBuffer, |
|
1526 int32_t *pBufferSize, |
|
1527 UErrorCode *status) |
|
1528 { |
|
1529 struct cloneISCIIStruct * localClone; |
|
1530 int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); |
|
1531 |
|
1532 if (U_FAILURE(*status)) { |
|
1533 return 0; |
|
1534 } |
|
1535 |
|
1536 if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ |
|
1537 *pBufferSize = bufferSizeNeeded; |
|
1538 return 0; |
|
1539 } |
|
1540 |
|
1541 localClone = (struct cloneISCIIStruct *)stackBuffer; |
|
1542 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
|
1543 |
|
1544 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); |
|
1545 localClone->cnv.extraInfo = &localClone->mydata; |
|
1546 localClone->cnv.isExtraLocal = TRUE; |
|
1547 |
|
1548 return &localClone->cnv; |
|
1549 } |
|
1550 |
|
1551 static void |
|
1552 _ISCIIGetUnicodeSet(const UConverter *cnv, |
|
1553 const USetAdder *sa, |
|
1554 UConverterUnicodeSet which, |
|
1555 UErrorCode *pErrorCode) |
|
1556 { |
|
1557 int32_t idx, script; |
|
1558 uint8_t mask; |
|
1559 |
|
1560 /* Since all ISCII versions allow switching to other ISCII |
|
1561 scripts, we add all roundtrippable characters to this set. */ |
|
1562 sa->addRange(sa->set, 0, ASCII_END); |
|
1563 for (script = DEVANAGARI; script <= MALAYALAM; script++) { |
|
1564 mask = (uint8_t)(lookupInitialData[script].maskEnum); |
|
1565 for (idx = 0; idx < DELTA; idx++) { |
|
1566 /* added check for TELUGU character */ |
|
1567 if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { |
|
1568 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); |
|
1569 } |
|
1570 } |
|
1571 } |
|
1572 sa->add(sa->set, DANDA); |
|
1573 sa->add(sa->set, DOUBLE_DANDA); |
|
1574 sa->add(sa->set, ZWNJ); |
|
1575 sa->add(sa->set, ZWJ); |
|
1576 } |
|
1577 |
|
1578 static const UConverterImpl _ISCIIImpl={ |
|
1579 |
|
1580 UCNV_ISCII, |
|
1581 |
|
1582 NULL, |
|
1583 NULL, |
|
1584 |
|
1585 _ISCIIOpen, |
|
1586 _ISCIIClose, |
|
1587 _ISCIIReset, |
|
1588 |
|
1589 UConverter_toUnicode_ISCII_OFFSETS_LOGIC, |
|
1590 UConverter_toUnicode_ISCII_OFFSETS_LOGIC, |
|
1591 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, |
|
1592 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, |
|
1593 NULL, |
|
1594 |
|
1595 NULL, |
|
1596 _ISCIIgetName, |
|
1597 NULL, |
|
1598 _ISCII_SafeClone, |
|
1599 _ISCIIGetUnicodeSet |
|
1600 }; |
|
1601 |
|
1602 static const UConverterStaticData _ISCIIStaticData={ |
|
1603 sizeof(UConverterStaticData), |
|
1604 "ISCII", |
|
1605 0, |
|
1606 UCNV_IBM, |
|
1607 UCNV_ISCII, |
|
1608 1, |
|
1609 4, |
|
1610 { 0x1a, 0, 0, 0 }, |
|
1611 0x1, |
|
1612 FALSE, |
|
1613 FALSE, |
|
1614 0x0, |
|
1615 0x0, |
|
1616 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ |
|
1617 |
|
1618 }; |
|
1619 |
|
1620 const UConverterSharedData _ISCIIData={ |
|
1621 sizeof(UConverterSharedData), |
|
1622 ~((uint32_t) 0), |
|
1623 NULL, |
|
1624 NULL, |
|
1625 &_ISCIIStaticData, |
|
1626 FALSE, |
|
1627 &_ISCIIImpl, |
|
1628 0 |
|
1629 }; |
|
1630 |
|
1631 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |