|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 2003-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: ucnv_ext.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2003jun13 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Conversion extensions |
|
17 */ |
|
18 |
|
19 #include "unicode/utypes.h" |
|
20 |
|
21 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
|
22 |
|
23 #include "unicode/uset.h" |
|
24 #include "ucnv_bld.h" |
|
25 #include "ucnv_cnv.h" |
|
26 #include "ucnv_ext.h" |
|
27 #include "cmemory.h" |
|
28 #include "uassert.h" |
|
29 |
|
30 /* to Unicode --------------------------------------------------------------- */ |
|
31 |
|
32 /* |
|
33 * @return lookup value for the byte, if found; else 0 |
|
34 */ |
|
35 static inline uint32_t |
|
36 ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { |
|
37 uint32_t word0, word; |
|
38 int32_t i, start, limit; |
|
39 |
|
40 /* check the input byte against the lowest and highest section bytes */ |
|
41 start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); |
|
42 limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); |
|
43 if(byte<start || limit<byte) { |
|
44 return 0; /* the byte is out of range */ |
|
45 } |
|
46 |
|
47 if(length==((limit-start)+1)) { |
|
48 /* direct access on a linear array */ |
|
49 return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ |
|
50 } |
|
51 |
|
52 /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ |
|
53 word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); |
|
54 |
|
55 /* |
|
56 * Shift byte once instead of each section word and add 0xffffff. |
|
57 * We will compare the shifted/added byte (bbffffff) against |
|
58 * section words which have byte values in the same bit position. |
|
59 * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv |
|
60 * for all v=0..f |
|
61 * so we need not mask off the lower 24 bits of each section word. |
|
62 */ |
|
63 word=word0|UCNV_EXT_TO_U_VALUE_MASK; |
|
64 |
|
65 /* binary search */ |
|
66 start=0; |
|
67 limit=length; |
|
68 for(;;) { |
|
69 i=limit-start; |
|
70 if(i<=1) { |
|
71 break; /* done */ |
|
72 } |
|
73 /* start<limit-1 */ |
|
74 |
|
75 if(i<=4) { |
|
76 /* linear search for the last part */ |
|
77 if(word0<=toUSection[start]) { |
|
78 break; |
|
79 } |
|
80 if(++start<limit && word0<=toUSection[start]) { |
|
81 break; |
|
82 } |
|
83 if(++start<limit && word0<=toUSection[start]) { |
|
84 break; |
|
85 } |
|
86 /* always break at start==limit-1 */ |
|
87 ++start; |
|
88 break; |
|
89 } |
|
90 |
|
91 i=(start+limit)/2; |
|
92 if(word<toUSection[i]) { |
|
93 limit=i; |
|
94 } else { |
|
95 start=i; |
|
96 } |
|
97 } |
|
98 |
|
99 /* did we really find it? */ |
|
100 if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { |
|
101 return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ |
|
102 } else { |
|
103 return 0; /* not found */ |
|
104 } |
|
105 } |
|
106 |
|
107 /* |
|
108 * TRUE if not an SI/SO stateful converter, |
|
109 * or if the match length fits with the current converter state |
|
110 */ |
|
111 #define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ |
|
112 ((sisoState)<0 || ((sisoState)==0) == (match==1)) |
|
113 |
|
114 /* |
|
115 * this works like ucnv_extMatchFromU() except |
|
116 * - the first character is in pre |
|
117 * - no trie is used |
|
118 * - the returned matchLength is not offset by 2 |
|
119 */ |
|
120 static int32_t |
|
121 ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, |
|
122 const char *pre, int32_t preLength, |
|
123 const char *src, int32_t srcLength, |
|
124 uint32_t *pMatchValue, |
|
125 UBool /*useFallback*/, UBool flush) { |
|
126 const uint32_t *toUTable, *toUSection; |
|
127 |
|
128 uint32_t value, matchValue; |
|
129 int32_t i, j, idx, length, matchLength; |
|
130 uint8_t b; |
|
131 |
|
132 if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { |
|
133 return 0; /* no extension data, no match */ |
|
134 } |
|
135 |
|
136 /* initialize */ |
|
137 toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); |
|
138 idx=0; |
|
139 |
|
140 matchValue=0; |
|
141 i=j=matchLength=0; |
|
142 |
|
143 if(sisoState==0) { |
|
144 /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ |
|
145 if(preLength>1) { |
|
146 return 0; /* no match of a DBCS sequence in SBCS mode */ |
|
147 } else if(preLength==1) { |
|
148 srcLength=0; |
|
149 } else /* preLength==0 */ { |
|
150 if(srcLength>1) { |
|
151 srcLength=1; |
|
152 } |
|
153 } |
|
154 flush=TRUE; |
|
155 } |
|
156 |
|
157 /* we must not remember fallback matches when not using fallbacks */ |
|
158 |
|
159 /* match input units until there is a full match or the input is consumed */ |
|
160 for(;;) { |
|
161 /* go to the next section */ |
|
162 toUSection=toUTable+idx; |
|
163 |
|
164 /* read first pair of the section */ |
|
165 value=*toUSection++; |
|
166 length=UCNV_EXT_TO_U_GET_BYTE(value); |
|
167 value=UCNV_EXT_TO_U_GET_VALUE(value); |
|
168 if( value!=0 && |
|
169 (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || |
|
170 TO_U_USE_FALLBACK(useFallback)) && |
|
171 UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) |
|
172 ) { |
|
173 /* remember longest match so far */ |
|
174 matchValue=value; |
|
175 matchLength=i+j; |
|
176 } |
|
177 |
|
178 /* match pre[] then src[] */ |
|
179 if(i<preLength) { |
|
180 b=(uint8_t)pre[i++]; |
|
181 } else if(j<srcLength) { |
|
182 b=(uint8_t)src[j++]; |
|
183 } else { |
|
184 /* all input consumed, partial match */ |
|
185 if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { |
|
186 /* |
|
187 * end of the entire input stream, stop with the longest match so far |
|
188 * or: partial match must not be longer than UCNV_EXT_MAX_BYTES |
|
189 * because it must fit into state buffers |
|
190 */ |
|
191 break; |
|
192 } else { |
|
193 /* continue with more input next time */ |
|
194 return -length; |
|
195 } |
|
196 } |
|
197 |
|
198 /* search for the current UChar */ |
|
199 value=ucnv_extFindToU(toUSection, length, b); |
|
200 if(value==0) { |
|
201 /* no match here, stop with the longest match so far */ |
|
202 break; |
|
203 } else { |
|
204 if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
|
205 /* partial match, continue */ |
|
206 idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); |
|
207 } else { |
|
208 if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || |
|
209 TO_U_USE_FALLBACK(useFallback)) && |
|
210 UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) |
|
211 ) { |
|
212 /* full match, stop with result */ |
|
213 matchValue=value; |
|
214 matchLength=i+j; |
|
215 } else { |
|
216 /* full match on fallback not taken, stop with the longest match so far */ |
|
217 } |
|
218 break; |
|
219 } |
|
220 } |
|
221 } |
|
222 |
|
223 if(matchLength==0) { |
|
224 /* no match at all */ |
|
225 return 0; |
|
226 } |
|
227 |
|
228 /* return result */ |
|
229 *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); |
|
230 return matchLength; |
|
231 } |
|
232 |
|
233 static inline void |
|
234 ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, |
|
235 uint32_t value, |
|
236 UChar **target, const UChar *targetLimit, |
|
237 int32_t **offsets, int32_t srcIndex, |
|
238 UErrorCode *pErrorCode) { |
|
239 /* output the result */ |
|
240 if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { |
|
241 /* output a single code point */ |
|
242 ucnv_toUWriteCodePoint( |
|
243 cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), |
|
244 target, targetLimit, |
|
245 offsets, srcIndex, |
|
246 pErrorCode); |
|
247 } else { |
|
248 /* output a string - with correct data we have resultLength>0 */ |
|
249 ucnv_toUWriteUChars( |
|
250 cnv, |
|
251 UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ |
|
252 UCNV_EXT_TO_U_GET_INDEX(value), |
|
253 UCNV_EXT_TO_U_GET_LENGTH(value), |
|
254 target, targetLimit, |
|
255 offsets, srcIndex, |
|
256 pErrorCode); |
|
257 } |
|
258 } |
|
259 |
|
260 /* |
|
261 * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), |
|
262 * or 1 for DBCS-only, |
|
263 * or -1 if the converter is not SI/SO stateful |
|
264 * |
|
265 * Note: For SI/SO stateful converters getting here, |
|
266 * cnv->mode==0 is equivalent to firstLength==1. |
|
267 */ |
|
268 #define UCNV_SISO_STATE(cnv) \ |
|
269 ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ |
|
270 (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) |
|
271 |
|
272 /* |
|
273 * target<targetLimit; set error code for overflow |
|
274 */ |
|
275 U_CFUNC UBool |
|
276 ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, |
|
277 int32_t firstLength, |
|
278 const char **src, const char *srcLimit, |
|
279 UChar **target, const UChar *targetLimit, |
|
280 int32_t **offsets, int32_t srcIndex, |
|
281 UBool flush, |
|
282 UErrorCode *pErrorCode) { |
|
283 uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
|
284 int32_t match; |
|
285 |
|
286 /* try to match */ |
|
287 match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), |
|
288 (const char *)cnv->toUBytes, firstLength, |
|
289 *src, (int32_t)(srcLimit-*src), |
|
290 &value, |
|
291 cnv->useFallback, flush); |
|
292 if(match>0) { |
|
293 /* advance src pointer for the consumed input */ |
|
294 *src+=match-firstLength; |
|
295 |
|
296 /* write result to target */ |
|
297 ucnv_extWriteToU(cnv, cx, |
|
298 value, |
|
299 target, targetLimit, |
|
300 offsets, srcIndex, |
|
301 pErrorCode); |
|
302 return TRUE; |
|
303 } else if(match<0) { |
|
304 /* save state for partial match */ |
|
305 const char *s; |
|
306 int32_t j; |
|
307 |
|
308 /* copy the first code point */ |
|
309 s=(const char *)cnv->toUBytes; |
|
310 cnv->preToUFirstLength=(int8_t)firstLength; |
|
311 for(j=0; j<firstLength; ++j) { |
|
312 cnv->preToU[j]=*s++; |
|
313 } |
|
314 |
|
315 /* now copy the newly consumed input */ |
|
316 s=*src; |
|
317 match=-match; |
|
318 for(; j<match; ++j) { |
|
319 cnv->preToU[j]=*s++; |
|
320 } |
|
321 *src=s; /* same as *src=srcLimit; because we reached the end of input */ |
|
322 cnv->preToULength=(int8_t)match; |
|
323 return TRUE; |
|
324 } else /* match==0 no match */ { |
|
325 return FALSE; |
|
326 } |
|
327 } |
|
328 |
|
329 U_CFUNC UChar32 |
|
330 ucnv_extSimpleMatchToU(const int32_t *cx, |
|
331 const char *source, int32_t length, |
|
332 UBool useFallback) { |
|
333 uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
|
334 int32_t match; |
|
335 |
|
336 if(length<=0) { |
|
337 return 0xffff; |
|
338 } |
|
339 |
|
340 /* try to match */ |
|
341 match=ucnv_extMatchToU(cx, -1, |
|
342 source, length, |
|
343 NULL, 0, |
|
344 &value, |
|
345 useFallback, TRUE); |
|
346 if(match==length) { |
|
347 /* write result for simple, single-character conversion */ |
|
348 if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { |
|
349 return UCNV_EXT_TO_U_GET_CODE_POINT(value); |
|
350 } |
|
351 } |
|
352 |
|
353 /* |
|
354 * return no match because |
|
355 * - match>0 && value points to string: simple conversion cannot handle multiple code points |
|
356 * - match>0 && match!=length: not all input consumed, forbidden for this function |
|
357 * - match==0: no match found in the first place |
|
358 * - match<0: partial match, not supported for simple conversion (and flush==TRUE) |
|
359 */ |
|
360 return 0xfffe; |
|
361 } |
|
362 |
|
363 /* |
|
364 * continue partial match with new input |
|
365 * never called for simple, single-character conversion |
|
366 */ |
|
367 U_CFUNC void |
|
368 ucnv_extContinueMatchToU(UConverter *cnv, |
|
369 UConverterToUnicodeArgs *pArgs, int32_t srcIndex, |
|
370 UErrorCode *pErrorCode) { |
|
371 uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
|
372 int32_t match, length; |
|
373 |
|
374 match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), |
|
375 cnv->preToU, cnv->preToULength, |
|
376 pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), |
|
377 &value, |
|
378 cnv->useFallback, pArgs->flush); |
|
379 if(match>0) { |
|
380 if(match>=cnv->preToULength) { |
|
381 /* advance src pointer for the consumed input */ |
|
382 pArgs->source+=match-cnv->preToULength; |
|
383 cnv->preToULength=0; |
|
384 } else { |
|
385 /* the match did not use all of preToU[] - keep the rest for replay */ |
|
386 length=cnv->preToULength-match; |
|
387 uprv_memmove(cnv->preToU, cnv->preToU+match, length); |
|
388 cnv->preToULength=(int8_t)-length; |
|
389 } |
|
390 |
|
391 /* write result */ |
|
392 ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, |
|
393 value, |
|
394 &pArgs->target, pArgs->targetLimit, |
|
395 &pArgs->offsets, srcIndex, |
|
396 pErrorCode); |
|
397 } else if(match<0) { |
|
398 /* save state for partial match */ |
|
399 const char *s; |
|
400 int32_t j; |
|
401 |
|
402 /* just _append_ the newly consumed input to preToU[] */ |
|
403 s=pArgs->source; |
|
404 match=-match; |
|
405 for(j=cnv->preToULength; j<match; ++j) { |
|
406 cnv->preToU[j]=*s++; |
|
407 } |
|
408 pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ |
|
409 cnv->preToULength=(int8_t)match; |
|
410 } else /* match==0 */ { |
|
411 /* |
|
412 * no match |
|
413 * |
|
414 * We need to split the previous input into two parts: |
|
415 * |
|
416 * 1. The first codepage character is unmappable - that's how we got into |
|
417 * trying the extension data in the first place. |
|
418 * We need to move it from the preToU buffer |
|
419 * to the error buffer, set an error code, |
|
420 * and prepare the rest of the previous input for 2. |
|
421 * |
|
422 * 2. The rest of the previous input must be converted once we |
|
423 * come back from the callback for the first character. |
|
424 * At that time, we have to try again from scratch to convert |
|
425 * these input characters. |
|
426 * The replay will be handled by the ucnv.c conversion code. |
|
427 */ |
|
428 |
|
429 /* move the first codepage character to the error field */ |
|
430 uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); |
|
431 cnv->toULength=cnv->preToUFirstLength; |
|
432 |
|
433 /* move the rest up inside the buffer */ |
|
434 length=cnv->preToULength-cnv->preToUFirstLength; |
|
435 if(length>0) { |
|
436 uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); |
|
437 } |
|
438 |
|
439 /* mark preToU for replay */ |
|
440 cnv->preToULength=(int8_t)-length; |
|
441 |
|
442 /* set the error code for unassigned */ |
|
443 *pErrorCode=U_INVALID_CHAR_FOUND; |
|
444 } |
|
445 } |
|
446 |
|
447 /* from Unicode ------------------------------------------------------------- */ |
|
448 |
|
449 // Use roundtrips, "good one-way" mappings, and some normal fallbacks. |
|
450 static inline UBool |
|
451 extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { |
|
452 return |
|
453 ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || |
|
454 FROM_U_USE_FALLBACK(useFallback, firstCP)) && |
|
455 (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; |
|
456 } |
|
457 |
|
458 /* |
|
459 * @return index of the UChar, if found; else <0 |
|
460 */ |
|
461 static inline int32_t |
|
462 ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { |
|
463 int32_t i, start, limit; |
|
464 |
|
465 /* binary search */ |
|
466 start=0; |
|
467 limit=length; |
|
468 for(;;) { |
|
469 i=limit-start; |
|
470 if(i<=1) { |
|
471 break; /* done */ |
|
472 } |
|
473 /* start<limit-1 */ |
|
474 |
|
475 if(i<=4) { |
|
476 /* linear search for the last part */ |
|
477 if(u<=fromUSection[start]) { |
|
478 break; |
|
479 } |
|
480 if(++start<limit && u<=fromUSection[start]) { |
|
481 break; |
|
482 } |
|
483 if(++start<limit && u<=fromUSection[start]) { |
|
484 break; |
|
485 } |
|
486 /* always break at start==limit-1 */ |
|
487 ++start; |
|
488 break; |
|
489 } |
|
490 |
|
491 i=(start+limit)/2; |
|
492 if(u<fromUSection[i]) { |
|
493 limit=i; |
|
494 } else { |
|
495 start=i; |
|
496 } |
|
497 } |
|
498 |
|
499 /* did we really find it? */ |
|
500 if(start<limit && u==fromUSection[start]) { |
|
501 return start; |
|
502 } else { |
|
503 return -1; /* not found */ |
|
504 } |
|
505 } |
|
506 |
|
507 /* |
|
508 * @param cx pointer to extension data; if NULL, returns 0 |
|
509 * @param firstCP the first code point before all the other UChars |
|
510 * @param pre UChars that must match; !initialMatch: partial match with them |
|
511 * @param preLength length of pre, >=0 |
|
512 * @param src UChars that can be used to complete a match |
|
513 * @param srcLength length of src, >=0 |
|
514 * @param pMatchValue [out] output result value for the match from the data structure |
|
515 * @param useFallback "use fallback" flag, usually from cnv->useFallback |
|
516 * @param flush TRUE if the end of the input stream is reached |
|
517 * @return >1: matched, return value=total match length (number of input units matched) |
|
518 * 1: matched, no mapping but request for <subchar1> |
|
519 * (only for the first code point) |
|
520 * 0: no match |
|
521 * <0: partial match, return value=negative total match length |
|
522 * (partial matches are never returned for flush==TRUE) |
|
523 * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) |
|
524 * the matchLength is 2 if only firstCP matched, and >2 if firstCP and |
|
525 * further code units matched |
|
526 */ |
|
527 static int32_t |
|
528 ucnv_extMatchFromU(const int32_t *cx, |
|
529 UChar32 firstCP, |
|
530 const UChar *pre, int32_t preLength, |
|
531 const UChar *src, int32_t srcLength, |
|
532 uint32_t *pMatchValue, |
|
533 UBool useFallback, UBool flush) { |
|
534 const uint16_t *stage12, *stage3; |
|
535 const uint32_t *stage3b; |
|
536 |
|
537 const UChar *fromUTableUChars, *fromUSectionUChars; |
|
538 const uint32_t *fromUTableValues, *fromUSectionValues; |
|
539 |
|
540 uint32_t value, matchValue; |
|
541 int32_t i, j, idx, length, matchLength; |
|
542 UChar c; |
|
543 |
|
544 if(cx==NULL) { |
|
545 return 0; /* no extension data, no match */ |
|
546 } |
|
547 |
|
548 /* trie lookup of firstCP */ |
|
549 idx=firstCP>>10; /* stage 1 index */ |
|
550 if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { |
|
551 return 0; /* the first code point is outside the trie */ |
|
552 } |
|
553 |
|
554 stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); |
|
555 stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); |
|
556 idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); |
|
557 |
|
558 stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); |
|
559 value=stage3b[idx]; |
|
560 if(value==0) { |
|
561 return 0; |
|
562 } |
|
563 |
|
564 /* |
|
565 * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: |
|
566 * Do not interpret values with reserved bits used, for forward compatibility, |
|
567 * and do not even remember intermediate results with reserved bits used. |
|
568 */ |
|
569 |
|
570 if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { |
|
571 /* partial match, enter the loop below */ |
|
572 idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
|
573 |
|
574 /* initialize */ |
|
575 fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); |
|
576 fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); |
|
577 |
|
578 matchValue=0; |
|
579 i=j=matchLength=0; |
|
580 |
|
581 /* we must not remember fallback matches when not using fallbacks */ |
|
582 |
|
583 /* match input units until there is a full match or the input is consumed */ |
|
584 for(;;) { |
|
585 /* go to the next section */ |
|
586 fromUSectionUChars=fromUTableUChars+idx; |
|
587 fromUSectionValues=fromUTableValues+idx; |
|
588 |
|
589 /* read first pair of the section */ |
|
590 length=*fromUSectionUChars++; |
|
591 value=*fromUSectionValues++; |
|
592 if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { |
|
593 /* remember longest match so far */ |
|
594 matchValue=value; |
|
595 matchLength=2+i+j; |
|
596 } |
|
597 |
|
598 /* match pre[] then src[] */ |
|
599 if(i<preLength) { |
|
600 c=pre[i++]; |
|
601 } else if(j<srcLength) { |
|
602 c=src[j++]; |
|
603 } else { |
|
604 /* all input consumed, partial match */ |
|
605 if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { |
|
606 /* |
|
607 * end of the entire input stream, stop with the longest match so far |
|
608 * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS |
|
609 * because it must fit into state buffers |
|
610 */ |
|
611 break; |
|
612 } else { |
|
613 /* continue with more input next time */ |
|
614 return -(2+length); |
|
615 } |
|
616 } |
|
617 |
|
618 /* search for the current UChar */ |
|
619 idx=ucnv_extFindFromU(fromUSectionUChars, length, c); |
|
620 if(idx<0) { |
|
621 /* no match here, stop with the longest match so far */ |
|
622 break; |
|
623 } else { |
|
624 value=fromUSectionValues[idx]; |
|
625 if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
|
626 /* partial match, continue */ |
|
627 idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); |
|
628 } else { |
|
629 if(extFromUUseMapping(useFallback, value, firstCP)) { |
|
630 /* full match, stop with result */ |
|
631 matchValue=value; |
|
632 matchLength=2+i+j; |
|
633 } else { |
|
634 /* full match on fallback not taken, stop with the longest match so far */ |
|
635 } |
|
636 break; |
|
637 } |
|
638 } |
|
639 } |
|
640 |
|
641 if(matchLength==0) { |
|
642 /* no match at all */ |
|
643 return 0; |
|
644 } |
|
645 } else /* result from firstCP trie lookup */ { |
|
646 if(extFromUUseMapping(useFallback, value, firstCP)) { |
|
647 /* full match, stop with result */ |
|
648 matchValue=value; |
|
649 matchLength=2; |
|
650 } else { |
|
651 /* fallback not taken */ |
|
652 return 0; |
|
653 } |
|
654 } |
|
655 |
|
656 /* return result */ |
|
657 if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { |
|
658 return 1; /* assert matchLength==2 */ |
|
659 } |
|
660 |
|
661 *pMatchValue=matchValue; |
|
662 return matchLength; |
|
663 } |
|
664 |
|
665 /* |
|
666 * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits |
|
667 */ |
|
668 static inline void |
|
669 ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, |
|
670 uint32_t value, |
|
671 char **target, const char *targetLimit, |
|
672 int32_t **offsets, int32_t srcIndex, |
|
673 UErrorCode *pErrorCode) { |
|
674 uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; |
|
675 const uint8_t *result; |
|
676 int32_t length, prevLength; |
|
677 |
|
678 length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
|
679 value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); |
|
680 |
|
681 /* output the result */ |
|
682 if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { |
|
683 /* |
|
684 * Generate a byte array and then write it below. |
|
685 * This is not the fastest possible way, but it should be ok for |
|
686 * extension mappings, and it is much simpler. |
|
687 * Offset and overflow handling are only done once this way. |
|
688 */ |
|
689 uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ |
|
690 switch(length) { |
|
691 case 3: |
|
692 *p++=(uint8_t)(value>>16); |
|
693 case 2: /*fall through*/ |
|
694 *p++=(uint8_t)(value>>8); |
|
695 case 1: /*fall through*/ |
|
696 *p++=(uint8_t)value; |
|
697 default: |
|
698 break; /* will never occur */ |
|
699 } |
|
700 result=buffer+1; |
|
701 } else { |
|
702 result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; |
|
703 } |
|
704 |
|
705 /* with correct data we have length>0 */ |
|
706 |
|
707 if((prevLength=cnv->fromUnicodeStatus)!=0) { |
|
708 /* handle SI/SO stateful output */ |
|
709 uint8_t shiftByte; |
|
710 |
|
711 if(prevLength>1 && length==1) { |
|
712 /* change from double-byte mode to single-byte */ |
|
713 shiftByte=(uint8_t)UCNV_SI; |
|
714 cnv->fromUnicodeStatus=1; |
|
715 } else if(prevLength==1 && length>1) { |
|
716 /* change from single-byte mode to double-byte */ |
|
717 shiftByte=(uint8_t)UCNV_SO; |
|
718 cnv->fromUnicodeStatus=2; |
|
719 } else { |
|
720 shiftByte=0; |
|
721 } |
|
722 |
|
723 if(shiftByte!=0) { |
|
724 /* prepend the shift byte to the result bytes */ |
|
725 buffer[0]=shiftByte; |
|
726 if(result!=buffer+1) { |
|
727 uprv_memcpy(buffer+1, result, length); |
|
728 } |
|
729 result=buffer; |
|
730 ++length; |
|
731 } |
|
732 } |
|
733 |
|
734 ucnv_fromUWriteBytes(cnv, (const char *)result, length, |
|
735 target, targetLimit, |
|
736 offsets, srcIndex, |
|
737 pErrorCode); |
|
738 } |
|
739 |
|
740 /* |
|
741 * target<targetLimit; set error code for overflow |
|
742 */ |
|
743 U_CFUNC UBool |
|
744 ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, |
|
745 UChar32 cp, |
|
746 const UChar **src, const UChar *srcLimit, |
|
747 char **target, const char *targetLimit, |
|
748 int32_t **offsets, int32_t srcIndex, |
|
749 UBool flush, |
|
750 UErrorCode *pErrorCode) { |
|
751 uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
|
752 int32_t match; |
|
753 |
|
754 /* try to match */ |
|
755 match=ucnv_extMatchFromU(cx, cp, |
|
756 NULL, 0, |
|
757 *src, (int32_t)(srcLimit-*src), |
|
758 &value, |
|
759 cnv->useFallback, flush); |
|
760 |
|
761 /* reject a match if the result is a single byte for DBCS-only */ |
|
762 if( match>=2 && |
|
763 !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && |
|
764 cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) |
|
765 ) { |
|
766 /* advance src pointer for the consumed input */ |
|
767 *src+=match-2; /* remove 2 for the initial code point */ |
|
768 |
|
769 /* write result to target */ |
|
770 ucnv_extWriteFromU(cnv, cx, |
|
771 value, |
|
772 target, targetLimit, |
|
773 offsets, srcIndex, |
|
774 pErrorCode); |
|
775 return TRUE; |
|
776 } else if(match<0) { |
|
777 /* save state for partial match */ |
|
778 const UChar *s; |
|
779 int32_t j; |
|
780 |
|
781 /* copy the first code point */ |
|
782 cnv->preFromUFirstCP=cp; |
|
783 |
|
784 /* now copy the newly consumed input */ |
|
785 s=*src; |
|
786 match=-match-2; /* remove 2 for the initial code point */ |
|
787 for(j=0; j<match; ++j) { |
|
788 cnv->preFromU[j]=*s++; |
|
789 } |
|
790 *src=s; /* same as *src=srcLimit; because we reached the end of input */ |
|
791 cnv->preFromULength=(int8_t)match; |
|
792 return TRUE; |
|
793 } else if(match==1) { |
|
794 /* matched, no mapping but request for <subchar1> */ |
|
795 cnv->useSubChar1=TRUE; |
|
796 return FALSE; |
|
797 } else /* match==0 no match */ { |
|
798 return FALSE; |
|
799 } |
|
800 } |
|
801 |
|
802 /* |
|
803 * Used by ISO 2022 implementation. |
|
804 * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping |
|
805 */ |
|
806 U_CFUNC int32_t |
|
807 ucnv_extSimpleMatchFromU(const int32_t *cx, |
|
808 UChar32 cp, uint32_t *pValue, |
|
809 UBool useFallback) { |
|
810 uint32_t value; |
|
811 int32_t match; |
|
812 |
|
813 /* try to match */ |
|
814 match=ucnv_extMatchFromU(cx, |
|
815 cp, |
|
816 NULL, 0, |
|
817 NULL, 0, |
|
818 &value, |
|
819 useFallback, TRUE); |
|
820 if(match>=2) { |
|
821 /* write result for simple, single-character conversion */ |
|
822 int32_t length; |
|
823 int isRoundtrip; |
|
824 |
|
825 isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); |
|
826 length=UCNV_EXT_FROM_U_GET_LENGTH(value); |
|
827 value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); |
|
828 |
|
829 if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { |
|
830 *pValue=value; |
|
831 return isRoundtrip ? length : -length; |
|
832 #if 0 /* not currently used */ |
|
833 } else if(length==4) { |
|
834 /* de-serialize a 4-byte result */ |
|
835 const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; |
|
836 *pValue= |
|
837 ((uint32_t)result[0]<<24)| |
|
838 ((uint32_t)result[1]<<16)| |
|
839 ((uint32_t)result[2]<<8)| |
|
840 result[3]; |
|
841 return isRoundtrip ? 4 : -4; |
|
842 #endif |
|
843 } |
|
844 } |
|
845 |
|
846 /* |
|
847 * return no match because |
|
848 * - match>1 && resultLength>4: result too long for simple conversion |
|
849 * - match==1: no match found, <subchar1> preferred |
|
850 * - match==0: no match found in the first place |
|
851 * - match<0: partial match, not supported for simple conversion (and flush==TRUE) |
|
852 */ |
|
853 return 0; |
|
854 } |
|
855 |
|
856 /* |
|
857 * continue partial match with new input, requires cnv->preFromUFirstCP>=0 |
|
858 * never called for simple, single-character conversion |
|
859 */ |
|
860 U_CFUNC void |
|
861 ucnv_extContinueMatchFromU(UConverter *cnv, |
|
862 UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, |
|
863 UErrorCode *pErrorCode) { |
|
864 uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ |
|
865 int32_t match; |
|
866 |
|
867 match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, |
|
868 cnv->preFromUFirstCP, |
|
869 cnv->preFromU, cnv->preFromULength, |
|
870 pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), |
|
871 &value, |
|
872 cnv->useFallback, pArgs->flush); |
|
873 if(match>=2) { |
|
874 match-=2; /* remove 2 for the initial code point */ |
|
875 |
|
876 if(match>=cnv->preFromULength) { |
|
877 /* advance src pointer for the consumed input */ |
|
878 pArgs->source+=match-cnv->preFromULength; |
|
879 cnv->preFromULength=0; |
|
880 } else { |
|
881 /* the match did not use all of preFromU[] - keep the rest for replay */ |
|
882 int32_t length=cnv->preFromULength-match; |
|
883 uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); |
|
884 cnv->preFromULength=(int8_t)-length; |
|
885 } |
|
886 |
|
887 /* finish the partial match */ |
|
888 cnv->preFromUFirstCP=U_SENTINEL; |
|
889 |
|
890 /* write result */ |
|
891 ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, |
|
892 value, |
|
893 &pArgs->target, pArgs->targetLimit, |
|
894 &pArgs->offsets, srcIndex, |
|
895 pErrorCode); |
|
896 } else if(match<0) { |
|
897 /* save state for partial match */ |
|
898 const UChar *s; |
|
899 int32_t j; |
|
900 |
|
901 /* just _append_ the newly consumed input to preFromU[] */ |
|
902 s=pArgs->source; |
|
903 match=-match-2; /* remove 2 for the initial code point */ |
|
904 for(j=cnv->preFromULength; j<match; ++j) { |
|
905 U_ASSERT(j>=0); |
|
906 cnv->preFromU[j]=*s++; |
|
907 } |
|
908 pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ |
|
909 cnv->preFromULength=(int8_t)match; |
|
910 } else /* match==0 or 1 */ { |
|
911 /* |
|
912 * no match |
|
913 * |
|
914 * We need to split the previous input into two parts: |
|
915 * |
|
916 * 1. The first code point is unmappable - that's how we got into |
|
917 * trying the extension data in the first place. |
|
918 * We need to move it from the preFromU buffer |
|
919 * to the error buffer, set an error code, |
|
920 * and prepare the rest of the previous input for 2. |
|
921 * |
|
922 * 2. The rest of the previous input must be converted once we |
|
923 * come back from the callback for the first code point. |
|
924 * At that time, we have to try again from scratch to convert |
|
925 * these input characters. |
|
926 * The replay will be handled by the ucnv.c conversion code. |
|
927 */ |
|
928 |
|
929 if(match==1) { |
|
930 /* matched, no mapping but request for <subchar1> */ |
|
931 cnv->useSubChar1=TRUE; |
|
932 } |
|
933 |
|
934 /* move the first code point to the error field */ |
|
935 cnv->fromUChar32=cnv->preFromUFirstCP; |
|
936 cnv->preFromUFirstCP=U_SENTINEL; |
|
937 |
|
938 /* mark preFromU for replay */ |
|
939 cnv->preFromULength=-cnv->preFromULength; |
|
940 |
|
941 /* set the error code for unassigned */ |
|
942 *pErrorCode=U_INVALID_CHAR_FOUND; |
|
943 } |
|
944 } |
|
945 |
|
946 static UBool |
|
947 extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { |
|
948 if(which==UCNV_ROUNDTRIP_SET) { |
|
949 // Add only code points for which the roundtrip flag is set. |
|
950 // Do not add any fallbacks, even if ucnv_fromUnicode() would use them |
|
951 // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). |
|
952 // |
|
953 // By analogy, also do not add "good one-way" mappings. |
|
954 // |
|
955 // Do not add entries with reserved bits set. |
|
956 if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= |
|
957 UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { |
|
958 return FALSE; |
|
959 } |
|
960 } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { |
|
961 // Do not add entries with reserved bits set. |
|
962 if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { |
|
963 return FALSE; |
|
964 } |
|
965 } |
|
966 // Do not add <subchar1> entries or other (future?) pseudo-entries |
|
967 // with an output length of 0. |
|
968 return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; |
|
969 } |
|
970 |
|
971 static void |
|
972 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, |
|
973 const int32_t *cx, |
|
974 const USetAdder *sa, |
|
975 UConverterUnicodeSet which, |
|
976 int32_t minLength, |
|
977 UChar32 firstCP, |
|
978 UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, |
|
979 int32_t sectionIndex, |
|
980 UErrorCode *pErrorCode) { |
|
981 const UChar *fromUSectionUChars; |
|
982 const uint32_t *fromUSectionValues; |
|
983 |
|
984 uint32_t value; |
|
985 int32_t i, count; |
|
986 |
|
987 fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; |
|
988 fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; |
|
989 |
|
990 /* read first pair of the section */ |
|
991 count=*fromUSectionUChars++; |
|
992 value=*fromUSectionValues++; |
|
993 |
|
994 if(extSetUseMapping(which, minLength, value)) { |
|
995 if(length==U16_LENGTH(firstCP)) { |
|
996 /* add the initial code point */ |
|
997 sa->add(sa->set, firstCP); |
|
998 } else { |
|
999 /* add the string so far */ |
|
1000 sa->addString(sa->set, s, length); |
|
1001 } |
|
1002 } |
|
1003 |
|
1004 for(i=0; i<count; ++i) { |
|
1005 /* append this code unit and recurse or add the string */ |
|
1006 s[length]=fromUSectionUChars[i]; |
|
1007 value=fromUSectionValues[i]; |
|
1008 |
|
1009 if(value==0) { |
|
1010 /* no mapping, do nothing */ |
|
1011 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
|
1012 ucnv_extGetUnicodeSetString( |
|
1013 sharedData, cx, sa, which, minLength, |
|
1014 firstCP, s, length+1, |
|
1015 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
|
1016 pErrorCode); |
|
1017 } else if(extSetUseMapping(which, minLength, value)) { |
|
1018 sa->addString(sa->set, s, length+1); |
|
1019 } |
|
1020 } |
|
1021 } |
|
1022 |
|
1023 U_CFUNC void |
|
1024 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, |
|
1025 const USetAdder *sa, |
|
1026 UConverterUnicodeSet which, |
|
1027 UConverterSetFilter filter, |
|
1028 UErrorCode *pErrorCode) { |
|
1029 const int32_t *cx; |
|
1030 const uint16_t *stage12, *stage3, *ps2, *ps3; |
|
1031 const uint32_t *stage3b; |
|
1032 |
|
1033 uint32_t value; |
|
1034 int32_t st1, stage1Length, st2, st3, minLength; |
|
1035 |
|
1036 UChar s[UCNV_EXT_MAX_UCHARS]; |
|
1037 UChar32 c; |
|
1038 int32_t length; |
|
1039 |
|
1040 cx=sharedData->mbcs.extIndexes; |
|
1041 if(cx==NULL) { |
|
1042 return; |
|
1043 } |
|
1044 |
|
1045 stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); |
|
1046 stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); |
|
1047 stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); |
|
1048 |
|
1049 stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; |
|
1050 |
|
1051 /* enumerate the from-Unicode trie table */ |
|
1052 c=0; /* keep track of the current code point while enumerating */ |
|
1053 |
|
1054 if(filter==UCNV_SET_FILTER_2022_CN) { |
|
1055 minLength=3; |
|
1056 } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || |
|
1057 filter!=UCNV_SET_FILTER_NONE |
|
1058 ) { |
|
1059 /* DBCS-only, ignore single-byte results */ |
|
1060 minLength=2; |
|
1061 } else { |
|
1062 minLength=1; |
|
1063 } |
|
1064 |
|
1065 /* |
|
1066 * the trie enumeration is almost the same as |
|
1067 * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 |
|
1068 */ |
|
1069 for(st1=0; st1<stage1Length; ++st1) { |
|
1070 st2=stage12[st1]; |
|
1071 if(st2>stage1Length) { |
|
1072 ps2=stage12+st2; |
|
1073 for(st2=0; st2<64; ++st2) { |
|
1074 if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { |
|
1075 /* read the stage 3 block */ |
|
1076 ps3=stage3+st3; |
|
1077 |
|
1078 do { |
|
1079 value=stage3b[*ps3++]; |
|
1080 if(value==0) { |
|
1081 /* no mapping, do nothing */ |
|
1082 } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { |
|
1083 // Recurse for partial results. |
|
1084 length=0; |
|
1085 U16_APPEND_UNSAFE(s, length, c); |
|
1086 ucnv_extGetUnicodeSetString( |
|
1087 sharedData, cx, sa, which, minLength, |
|
1088 c, s, length, |
|
1089 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), |
|
1090 pErrorCode); |
|
1091 } else if(extSetUseMapping(which, minLength, value)) { |
|
1092 switch(filter) { |
|
1093 case UCNV_SET_FILTER_2022_CN: |
|
1094 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { |
|
1095 continue; |
|
1096 } |
|
1097 break; |
|
1098 case UCNV_SET_FILTER_SJIS: |
|
1099 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { |
|
1100 continue; |
|
1101 } |
|
1102 break; |
|
1103 case UCNV_SET_FILTER_GR94DBCS: |
|
1104 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
|
1105 (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && |
|
1106 (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
|
1107 continue; |
|
1108 } |
|
1109 break; |
|
1110 case UCNV_SET_FILTER_HZ: |
|
1111 if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && |
|
1112 (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && |
|
1113 (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { |
|
1114 continue; |
|
1115 } |
|
1116 break; |
|
1117 default: |
|
1118 /* |
|
1119 * UCNV_SET_FILTER_NONE, |
|
1120 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength |
|
1121 */ |
|
1122 break; |
|
1123 } |
|
1124 sa->add(sa->set, c); |
|
1125 } |
|
1126 } while((++c&0xf)!=0); |
|
1127 } else { |
|
1128 c+=16; /* empty stage 3 block */ |
|
1129 } |
|
1130 } |
|
1131 } else { |
|
1132 c+=1024; /* empty stage 2 block */ |
|
1133 } |
|
1134 } |
|
1135 } |
|
1136 |
|
1137 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |