1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_ext.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1137 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 2003-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: ucnv_ext.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2003jun13 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Conversion extensions 1.20 +*/ 1.21 + 1.22 +#include "unicode/utypes.h" 1.23 + 1.24 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 1.25 + 1.26 +#include "unicode/uset.h" 1.27 +#include "ucnv_bld.h" 1.28 +#include "ucnv_cnv.h" 1.29 +#include "ucnv_ext.h" 1.30 +#include "cmemory.h" 1.31 +#include "uassert.h" 1.32 + 1.33 +/* to Unicode --------------------------------------------------------------- */ 1.34 + 1.35 +/* 1.36 + * @return lookup value for the byte, if found; else 0 1.37 + */ 1.38 +static inline uint32_t 1.39 +ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { 1.40 + uint32_t word0, word; 1.41 + int32_t i, start, limit; 1.42 + 1.43 + /* check the input byte against the lowest and highest section bytes */ 1.44 + start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); 1.45 + limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); 1.46 + if(byte<start || limit<byte) { 1.47 + return 0; /* the byte is out of range */ 1.48 + } 1.49 + 1.50 + if(length==((limit-start)+1)) { 1.51 + /* direct access on a linear array */ 1.52 + return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ 1.53 + } 1.54 + 1.55 + /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ 1.56 + word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); 1.57 + 1.58 + /* 1.59 + * Shift byte once instead of each section word and add 0xffffff. 1.60 + * We will compare the shifted/added byte (bbffffff) against 1.61 + * section words which have byte values in the same bit position. 1.62 + * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv 1.63 + * for all v=0..f 1.64 + * so we need not mask off the lower 24 bits of each section word. 1.65 + */ 1.66 + word=word0|UCNV_EXT_TO_U_VALUE_MASK; 1.67 + 1.68 + /* binary search */ 1.69 + start=0; 1.70 + limit=length; 1.71 + for(;;) { 1.72 + i=limit-start; 1.73 + if(i<=1) { 1.74 + break; /* done */ 1.75 + } 1.76 + /* start<limit-1 */ 1.77 + 1.78 + if(i<=4) { 1.79 + /* linear search for the last part */ 1.80 + if(word0<=toUSection[start]) { 1.81 + break; 1.82 + } 1.83 + if(++start<limit && word0<=toUSection[start]) { 1.84 + break; 1.85 + } 1.86 + if(++start<limit && word0<=toUSection[start]) { 1.87 + break; 1.88 + } 1.89 + /* always break at start==limit-1 */ 1.90 + ++start; 1.91 + break; 1.92 + } 1.93 + 1.94 + i=(start+limit)/2; 1.95 + if(word<toUSection[i]) { 1.96 + limit=i; 1.97 + } else { 1.98 + start=i; 1.99 + } 1.100 + } 1.101 + 1.102 + /* did we really find it? */ 1.103 + if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { 1.104 + return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ 1.105 + } else { 1.106 + return 0; /* not found */ 1.107 + } 1.108 +} 1.109 + 1.110 +/* 1.111 + * TRUE if not an SI/SO stateful converter, 1.112 + * or if the match length fits with the current converter state 1.113 + */ 1.114 +#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ 1.115 + ((sisoState)<0 || ((sisoState)==0) == (match==1)) 1.116 + 1.117 +/* 1.118 + * this works like ucnv_extMatchFromU() except 1.119 + * - the first character is in pre 1.120 + * - no trie is used 1.121 + * - the returned matchLength is not offset by 2 1.122 + */ 1.123 +static int32_t 1.124 +ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, 1.125 + const char *pre, int32_t preLength, 1.126 + const char *src, int32_t srcLength, 1.127 + uint32_t *pMatchValue, 1.128 + UBool /*useFallback*/, UBool flush) { 1.129 + const uint32_t *toUTable, *toUSection; 1.130 + 1.131 + uint32_t value, matchValue; 1.132 + int32_t i, j, idx, length, matchLength; 1.133 + uint8_t b; 1.134 + 1.135 + if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { 1.136 + return 0; /* no extension data, no match */ 1.137 + } 1.138 + 1.139 + /* initialize */ 1.140 + toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); 1.141 + idx=0; 1.142 + 1.143 + matchValue=0; 1.144 + i=j=matchLength=0; 1.145 + 1.146 + if(sisoState==0) { 1.147 + /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ 1.148 + if(preLength>1) { 1.149 + return 0; /* no match of a DBCS sequence in SBCS mode */ 1.150 + } else if(preLength==1) { 1.151 + srcLength=0; 1.152 + } else /* preLength==0 */ { 1.153 + if(srcLength>1) { 1.154 + srcLength=1; 1.155 + } 1.156 + } 1.157 + flush=TRUE; 1.158 + } 1.159 + 1.160 + /* we must not remember fallback matches when not using fallbacks */ 1.161 + 1.162 + /* match input units until there is a full match or the input is consumed */ 1.163 + for(;;) { 1.164 + /* go to the next section */ 1.165 + toUSection=toUTable+idx; 1.166 + 1.167 + /* read first pair of the section */ 1.168 + value=*toUSection++; 1.169 + length=UCNV_EXT_TO_U_GET_BYTE(value); 1.170 + value=UCNV_EXT_TO_U_GET_VALUE(value); 1.171 + if( value!=0 && 1.172 + (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 1.173 + TO_U_USE_FALLBACK(useFallback)) && 1.174 + UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 1.175 + ) { 1.176 + /* remember longest match so far */ 1.177 + matchValue=value; 1.178 + matchLength=i+j; 1.179 + } 1.180 + 1.181 + /* match pre[] then src[] */ 1.182 + if(i<preLength) { 1.183 + b=(uint8_t)pre[i++]; 1.184 + } else if(j<srcLength) { 1.185 + b=(uint8_t)src[j++]; 1.186 + } else { 1.187 + /* all input consumed, partial match */ 1.188 + if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { 1.189 + /* 1.190 + * end of the entire input stream, stop with the longest match so far 1.191 + * or: partial match must not be longer than UCNV_EXT_MAX_BYTES 1.192 + * because it must fit into state buffers 1.193 + */ 1.194 + break; 1.195 + } else { 1.196 + /* continue with more input next time */ 1.197 + return -length; 1.198 + } 1.199 + } 1.200 + 1.201 + /* search for the current UChar */ 1.202 + value=ucnv_extFindToU(toUSection, length, b); 1.203 + if(value==0) { 1.204 + /* no match here, stop with the longest match so far */ 1.205 + break; 1.206 + } else { 1.207 + if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 1.208 + /* partial match, continue */ 1.209 + idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); 1.210 + } else { 1.211 + if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 1.212 + TO_U_USE_FALLBACK(useFallback)) && 1.213 + UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 1.214 + ) { 1.215 + /* full match, stop with result */ 1.216 + matchValue=value; 1.217 + matchLength=i+j; 1.218 + } else { 1.219 + /* full match on fallback not taken, stop with the longest match so far */ 1.220 + } 1.221 + break; 1.222 + } 1.223 + } 1.224 + } 1.225 + 1.226 + if(matchLength==0) { 1.227 + /* no match at all */ 1.228 + return 0; 1.229 + } 1.230 + 1.231 + /* return result */ 1.232 + *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); 1.233 + return matchLength; 1.234 +} 1.235 + 1.236 +static inline void 1.237 +ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, 1.238 + uint32_t value, 1.239 + UChar **target, const UChar *targetLimit, 1.240 + int32_t **offsets, int32_t srcIndex, 1.241 + UErrorCode *pErrorCode) { 1.242 + /* output the result */ 1.243 + if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 1.244 + /* output a single code point */ 1.245 + ucnv_toUWriteCodePoint( 1.246 + cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), 1.247 + target, targetLimit, 1.248 + offsets, srcIndex, 1.249 + pErrorCode); 1.250 + } else { 1.251 + /* output a string - with correct data we have resultLength>0 */ 1.252 + ucnv_toUWriteUChars( 1.253 + cnv, 1.254 + UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ 1.255 + UCNV_EXT_TO_U_GET_INDEX(value), 1.256 + UCNV_EXT_TO_U_GET_LENGTH(value), 1.257 + target, targetLimit, 1.258 + offsets, srcIndex, 1.259 + pErrorCode); 1.260 + } 1.261 +} 1.262 + 1.263 +/* 1.264 + * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), 1.265 + * or 1 for DBCS-only, 1.266 + * or -1 if the converter is not SI/SO stateful 1.267 + * 1.268 + * Note: For SI/SO stateful converters getting here, 1.269 + * cnv->mode==0 is equivalent to firstLength==1. 1.270 + */ 1.271 +#define UCNV_SISO_STATE(cnv) \ 1.272 + ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ 1.273 + (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) 1.274 + 1.275 +/* 1.276 + * target<targetLimit; set error code for overflow 1.277 + */ 1.278 +U_CFUNC UBool 1.279 +ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, 1.280 + int32_t firstLength, 1.281 + const char **src, const char *srcLimit, 1.282 + UChar **target, const UChar *targetLimit, 1.283 + int32_t **offsets, int32_t srcIndex, 1.284 + UBool flush, 1.285 + UErrorCode *pErrorCode) { 1.286 + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 1.287 + int32_t match; 1.288 + 1.289 + /* try to match */ 1.290 + match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), 1.291 + (const char *)cnv->toUBytes, firstLength, 1.292 + *src, (int32_t)(srcLimit-*src), 1.293 + &value, 1.294 + cnv->useFallback, flush); 1.295 + if(match>0) { 1.296 + /* advance src pointer for the consumed input */ 1.297 + *src+=match-firstLength; 1.298 + 1.299 + /* write result to target */ 1.300 + ucnv_extWriteToU(cnv, cx, 1.301 + value, 1.302 + target, targetLimit, 1.303 + offsets, srcIndex, 1.304 + pErrorCode); 1.305 + return TRUE; 1.306 + } else if(match<0) { 1.307 + /* save state for partial match */ 1.308 + const char *s; 1.309 + int32_t j; 1.310 + 1.311 + /* copy the first code point */ 1.312 + s=(const char *)cnv->toUBytes; 1.313 + cnv->preToUFirstLength=(int8_t)firstLength; 1.314 + for(j=0; j<firstLength; ++j) { 1.315 + cnv->preToU[j]=*s++; 1.316 + } 1.317 + 1.318 + /* now copy the newly consumed input */ 1.319 + s=*src; 1.320 + match=-match; 1.321 + for(; j<match; ++j) { 1.322 + cnv->preToU[j]=*s++; 1.323 + } 1.324 + *src=s; /* same as *src=srcLimit; because we reached the end of input */ 1.325 + cnv->preToULength=(int8_t)match; 1.326 + return TRUE; 1.327 + } else /* match==0 no match */ { 1.328 + return FALSE; 1.329 + } 1.330 +} 1.331 + 1.332 +U_CFUNC UChar32 1.333 +ucnv_extSimpleMatchToU(const int32_t *cx, 1.334 + const char *source, int32_t length, 1.335 + UBool useFallback) { 1.336 + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 1.337 + int32_t match; 1.338 + 1.339 + if(length<=0) { 1.340 + return 0xffff; 1.341 + } 1.342 + 1.343 + /* try to match */ 1.344 + match=ucnv_extMatchToU(cx, -1, 1.345 + source, length, 1.346 + NULL, 0, 1.347 + &value, 1.348 + useFallback, TRUE); 1.349 + if(match==length) { 1.350 + /* write result for simple, single-character conversion */ 1.351 + if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 1.352 + return UCNV_EXT_TO_U_GET_CODE_POINT(value); 1.353 + } 1.354 + } 1.355 + 1.356 + /* 1.357 + * return no match because 1.358 + * - match>0 && value points to string: simple conversion cannot handle multiple code points 1.359 + * - match>0 && match!=length: not all input consumed, forbidden for this function 1.360 + * - match==0: no match found in the first place 1.361 + * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 1.362 + */ 1.363 + return 0xfffe; 1.364 +} 1.365 + 1.366 +/* 1.367 + * continue partial match with new input 1.368 + * never called for simple, single-character conversion 1.369 + */ 1.370 +U_CFUNC void 1.371 +ucnv_extContinueMatchToU(UConverter *cnv, 1.372 + UConverterToUnicodeArgs *pArgs, int32_t srcIndex, 1.373 + UErrorCode *pErrorCode) { 1.374 + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 1.375 + int32_t match, length; 1.376 + 1.377 + match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), 1.378 + cnv->preToU, cnv->preToULength, 1.379 + pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 1.380 + &value, 1.381 + cnv->useFallback, pArgs->flush); 1.382 + if(match>0) { 1.383 + if(match>=cnv->preToULength) { 1.384 + /* advance src pointer for the consumed input */ 1.385 + pArgs->source+=match-cnv->preToULength; 1.386 + cnv->preToULength=0; 1.387 + } else { 1.388 + /* the match did not use all of preToU[] - keep the rest for replay */ 1.389 + length=cnv->preToULength-match; 1.390 + uprv_memmove(cnv->preToU, cnv->preToU+match, length); 1.391 + cnv->preToULength=(int8_t)-length; 1.392 + } 1.393 + 1.394 + /* write result */ 1.395 + ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, 1.396 + value, 1.397 + &pArgs->target, pArgs->targetLimit, 1.398 + &pArgs->offsets, srcIndex, 1.399 + pErrorCode); 1.400 + } else if(match<0) { 1.401 + /* save state for partial match */ 1.402 + const char *s; 1.403 + int32_t j; 1.404 + 1.405 + /* just _append_ the newly consumed input to preToU[] */ 1.406 + s=pArgs->source; 1.407 + match=-match; 1.408 + for(j=cnv->preToULength; j<match; ++j) { 1.409 + cnv->preToU[j]=*s++; 1.410 + } 1.411 + pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 1.412 + cnv->preToULength=(int8_t)match; 1.413 + } else /* match==0 */ { 1.414 + /* 1.415 + * no match 1.416 + * 1.417 + * We need to split the previous input into two parts: 1.418 + * 1.419 + * 1. The first codepage character is unmappable - that's how we got into 1.420 + * trying the extension data in the first place. 1.421 + * We need to move it from the preToU buffer 1.422 + * to the error buffer, set an error code, 1.423 + * and prepare the rest of the previous input for 2. 1.424 + * 1.425 + * 2. The rest of the previous input must be converted once we 1.426 + * come back from the callback for the first character. 1.427 + * At that time, we have to try again from scratch to convert 1.428 + * these input characters. 1.429 + * The replay will be handled by the ucnv.c conversion code. 1.430 + */ 1.431 + 1.432 + /* move the first codepage character to the error field */ 1.433 + uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); 1.434 + cnv->toULength=cnv->preToUFirstLength; 1.435 + 1.436 + /* move the rest up inside the buffer */ 1.437 + length=cnv->preToULength-cnv->preToUFirstLength; 1.438 + if(length>0) { 1.439 + uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); 1.440 + } 1.441 + 1.442 + /* mark preToU for replay */ 1.443 + cnv->preToULength=(int8_t)-length; 1.444 + 1.445 + /* set the error code for unassigned */ 1.446 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.447 + } 1.448 +} 1.449 + 1.450 +/* from Unicode ------------------------------------------------------------- */ 1.451 + 1.452 +// Use roundtrips, "good one-way" mappings, and some normal fallbacks. 1.453 +static inline UBool 1.454 +extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { 1.455 + return 1.456 + ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || 1.457 + FROM_U_USE_FALLBACK(useFallback, firstCP)) && 1.458 + (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; 1.459 +} 1.460 + 1.461 +/* 1.462 + * @return index of the UChar, if found; else <0 1.463 + */ 1.464 +static inline int32_t 1.465 +ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { 1.466 + int32_t i, start, limit; 1.467 + 1.468 + /* binary search */ 1.469 + start=0; 1.470 + limit=length; 1.471 + for(;;) { 1.472 + i=limit-start; 1.473 + if(i<=1) { 1.474 + break; /* done */ 1.475 + } 1.476 + /* start<limit-1 */ 1.477 + 1.478 + if(i<=4) { 1.479 + /* linear search for the last part */ 1.480 + if(u<=fromUSection[start]) { 1.481 + break; 1.482 + } 1.483 + if(++start<limit && u<=fromUSection[start]) { 1.484 + break; 1.485 + } 1.486 + if(++start<limit && u<=fromUSection[start]) { 1.487 + break; 1.488 + } 1.489 + /* always break at start==limit-1 */ 1.490 + ++start; 1.491 + break; 1.492 + } 1.493 + 1.494 + i=(start+limit)/2; 1.495 + if(u<fromUSection[i]) { 1.496 + limit=i; 1.497 + } else { 1.498 + start=i; 1.499 + } 1.500 + } 1.501 + 1.502 + /* did we really find it? */ 1.503 + if(start<limit && u==fromUSection[start]) { 1.504 + return start; 1.505 + } else { 1.506 + return -1; /* not found */ 1.507 + } 1.508 +} 1.509 + 1.510 +/* 1.511 + * @param cx pointer to extension data; if NULL, returns 0 1.512 + * @param firstCP the first code point before all the other UChars 1.513 + * @param pre UChars that must match; !initialMatch: partial match with them 1.514 + * @param preLength length of pre, >=0 1.515 + * @param src UChars that can be used to complete a match 1.516 + * @param srcLength length of src, >=0 1.517 + * @param pMatchValue [out] output result value for the match from the data structure 1.518 + * @param useFallback "use fallback" flag, usually from cnv->useFallback 1.519 + * @param flush TRUE if the end of the input stream is reached 1.520 + * @return >1: matched, return value=total match length (number of input units matched) 1.521 + * 1: matched, no mapping but request for <subchar1> 1.522 + * (only for the first code point) 1.523 + * 0: no match 1.524 + * <0: partial match, return value=negative total match length 1.525 + * (partial matches are never returned for flush==TRUE) 1.526 + * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) 1.527 + * the matchLength is 2 if only firstCP matched, and >2 if firstCP and 1.528 + * further code units matched 1.529 + */ 1.530 +static int32_t 1.531 +ucnv_extMatchFromU(const int32_t *cx, 1.532 + UChar32 firstCP, 1.533 + const UChar *pre, int32_t preLength, 1.534 + const UChar *src, int32_t srcLength, 1.535 + uint32_t *pMatchValue, 1.536 + UBool useFallback, UBool flush) { 1.537 + const uint16_t *stage12, *stage3; 1.538 + const uint32_t *stage3b; 1.539 + 1.540 + const UChar *fromUTableUChars, *fromUSectionUChars; 1.541 + const uint32_t *fromUTableValues, *fromUSectionValues; 1.542 + 1.543 + uint32_t value, matchValue; 1.544 + int32_t i, j, idx, length, matchLength; 1.545 + UChar c; 1.546 + 1.547 + if(cx==NULL) { 1.548 + return 0; /* no extension data, no match */ 1.549 + } 1.550 + 1.551 + /* trie lookup of firstCP */ 1.552 + idx=firstCP>>10; /* stage 1 index */ 1.553 + if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { 1.554 + return 0; /* the first code point is outside the trie */ 1.555 + } 1.556 + 1.557 + stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 1.558 + stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 1.559 + idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); 1.560 + 1.561 + stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 1.562 + value=stage3b[idx]; 1.563 + if(value==0) { 1.564 + return 0; 1.565 + } 1.566 + 1.567 + /* 1.568 + * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: 1.569 + * Do not interpret values with reserved bits used, for forward compatibility, 1.570 + * and do not even remember intermediate results with reserved bits used. 1.571 + */ 1.572 + 1.573 + if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 1.574 + /* partial match, enter the loop below */ 1.575 + idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 1.576 + 1.577 + /* initialize */ 1.578 + fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); 1.579 + fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); 1.580 + 1.581 + matchValue=0; 1.582 + i=j=matchLength=0; 1.583 + 1.584 + /* we must not remember fallback matches when not using fallbacks */ 1.585 + 1.586 + /* match input units until there is a full match or the input is consumed */ 1.587 + for(;;) { 1.588 + /* go to the next section */ 1.589 + fromUSectionUChars=fromUTableUChars+idx; 1.590 + fromUSectionValues=fromUTableValues+idx; 1.591 + 1.592 + /* read first pair of the section */ 1.593 + length=*fromUSectionUChars++; 1.594 + value=*fromUSectionValues++; 1.595 + if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { 1.596 + /* remember longest match so far */ 1.597 + matchValue=value; 1.598 + matchLength=2+i+j; 1.599 + } 1.600 + 1.601 + /* match pre[] then src[] */ 1.602 + if(i<preLength) { 1.603 + c=pre[i++]; 1.604 + } else if(j<srcLength) { 1.605 + c=src[j++]; 1.606 + } else { 1.607 + /* all input consumed, partial match */ 1.608 + if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { 1.609 + /* 1.610 + * end of the entire input stream, stop with the longest match so far 1.611 + * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS 1.612 + * because it must fit into state buffers 1.613 + */ 1.614 + break; 1.615 + } else { 1.616 + /* continue with more input next time */ 1.617 + return -(2+length); 1.618 + } 1.619 + } 1.620 + 1.621 + /* search for the current UChar */ 1.622 + idx=ucnv_extFindFromU(fromUSectionUChars, length, c); 1.623 + if(idx<0) { 1.624 + /* no match here, stop with the longest match so far */ 1.625 + break; 1.626 + } else { 1.627 + value=fromUSectionValues[idx]; 1.628 + if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1.629 + /* partial match, continue */ 1.630 + idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 1.631 + } else { 1.632 + if(extFromUUseMapping(useFallback, value, firstCP)) { 1.633 + /* full match, stop with result */ 1.634 + matchValue=value; 1.635 + matchLength=2+i+j; 1.636 + } else { 1.637 + /* full match on fallback not taken, stop with the longest match so far */ 1.638 + } 1.639 + break; 1.640 + } 1.641 + } 1.642 + } 1.643 + 1.644 + if(matchLength==0) { 1.645 + /* no match at all */ 1.646 + return 0; 1.647 + } 1.648 + } else /* result from firstCP trie lookup */ { 1.649 + if(extFromUUseMapping(useFallback, value, firstCP)) { 1.650 + /* full match, stop with result */ 1.651 + matchValue=value; 1.652 + matchLength=2; 1.653 + } else { 1.654 + /* fallback not taken */ 1.655 + return 0; 1.656 + } 1.657 + } 1.658 + 1.659 + /* return result */ 1.660 + if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { 1.661 + return 1; /* assert matchLength==2 */ 1.662 + } 1.663 + 1.664 + *pMatchValue=matchValue; 1.665 + return matchLength; 1.666 +} 1.667 + 1.668 +/* 1.669 + * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits 1.670 + */ 1.671 +static inline void 1.672 +ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, 1.673 + uint32_t value, 1.674 + char **target, const char *targetLimit, 1.675 + int32_t **offsets, int32_t srcIndex, 1.676 + UErrorCode *pErrorCode) { 1.677 + uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; 1.678 + const uint8_t *result; 1.679 + int32_t length, prevLength; 1.680 + 1.681 + length=UCNV_EXT_FROM_U_GET_LENGTH(value); 1.682 + value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 1.683 + 1.684 + /* output the result */ 1.685 + if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 1.686 + /* 1.687 + * Generate a byte array and then write it below. 1.688 + * This is not the fastest possible way, but it should be ok for 1.689 + * extension mappings, and it is much simpler. 1.690 + * Offset and overflow handling are only done once this way. 1.691 + */ 1.692 + uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ 1.693 + switch(length) { 1.694 + case 3: 1.695 + *p++=(uint8_t)(value>>16); 1.696 + case 2: /*fall through*/ 1.697 + *p++=(uint8_t)(value>>8); 1.698 + case 1: /*fall through*/ 1.699 + *p++=(uint8_t)value; 1.700 + default: 1.701 + break; /* will never occur */ 1.702 + } 1.703 + result=buffer+1; 1.704 + } else { 1.705 + result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 1.706 + } 1.707 + 1.708 + /* with correct data we have length>0 */ 1.709 + 1.710 + if((prevLength=cnv->fromUnicodeStatus)!=0) { 1.711 + /* handle SI/SO stateful output */ 1.712 + uint8_t shiftByte; 1.713 + 1.714 + if(prevLength>1 && length==1) { 1.715 + /* change from double-byte mode to single-byte */ 1.716 + shiftByte=(uint8_t)UCNV_SI; 1.717 + cnv->fromUnicodeStatus=1; 1.718 + } else if(prevLength==1 && length>1) { 1.719 + /* change from single-byte mode to double-byte */ 1.720 + shiftByte=(uint8_t)UCNV_SO; 1.721 + cnv->fromUnicodeStatus=2; 1.722 + } else { 1.723 + shiftByte=0; 1.724 + } 1.725 + 1.726 + if(shiftByte!=0) { 1.727 + /* prepend the shift byte to the result bytes */ 1.728 + buffer[0]=shiftByte; 1.729 + if(result!=buffer+1) { 1.730 + uprv_memcpy(buffer+1, result, length); 1.731 + } 1.732 + result=buffer; 1.733 + ++length; 1.734 + } 1.735 + } 1.736 + 1.737 + ucnv_fromUWriteBytes(cnv, (const char *)result, length, 1.738 + target, targetLimit, 1.739 + offsets, srcIndex, 1.740 + pErrorCode); 1.741 +} 1.742 + 1.743 +/* 1.744 + * target<targetLimit; set error code for overflow 1.745 + */ 1.746 +U_CFUNC UBool 1.747 +ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, 1.748 + UChar32 cp, 1.749 + const UChar **src, const UChar *srcLimit, 1.750 + char **target, const char *targetLimit, 1.751 + int32_t **offsets, int32_t srcIndex, 1.752 + UBool flush, 1.753 + UErrorCode *pErrorCode) { 1.754 + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 1.755 + int32_t match; 1.756 + 1.757 + /* try to match */ 1.758 + match=ucnv_extMatchFromU(cx, cp, 1.759 + NULL, 0, 1.760 + *src, (int32_t)(srcLimit-*src), 1.761 + &value, 1.762 + cnv->useFallback, flush); 1.763 + 1.764 + /* reject a match if the result is a single byte for DBCS-only */ 1.765 + if( match>=2 && 1.766 + !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && 1.767 + cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) 1.768 + ) { 1.769 + /* advance src pointer for the consumed input */ 1.770 + *src+=match-2; /* remove 2 for the initial code point */ 1.771 + 1.772 + /* write result to target */ 1.773 + ucnv_extWriteFromU(cnv, cx, 1.774 + value, 1.775 + target, targetLimit, 1.776 + offsets, srcIndex, 1.777 + pErrorCode); 1.778 + return TRUE; 1.779 + } else if(match<0) { 1.780 + /* save state for partial match */ 1.781 + const UChar *s; 1.782 + int32_t j; 1.783 + 1.784 + /* copy the first code point */ 1.785 + cnv->preFromUFirstCP=cp; 1.786 + 1.787 + /* now copy the newly consumed input */ 1.788 + s=*src; 1.789 + match=-match-2; /* remove 2 for the initial code point */ 1.790 + for(j=0; j<match; ++j) { 1.791 + cnv->preFromU[j]=*s++; 1.792 + } 1.793 + *src=s; /* same as *src=srcLimit; because we reached the end of input */ 1.794 + cnv->preFromULength=(int8_t)match; 1.795 + return TRUE; 1.796 + } else if(match==1) { 1.797 + /* matched, no mapping but request for <subchar1> */ 1.798 + cnv->useSubChar1=TRUE; 1.799 + return FALSE; 1.800 + } else /* match==0 no match */ { 1.801 + return FALSE; 1.802 + } 1.803 +} 1.804 + 1.805 +/* 1.806 + * Used by ISO 2022 implementation. 1.807 + * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping 1.808 + */ 1.809 +U_CFUNC int32_t 1.810 +ucnv_extSimpleMatchFromU(const int32_t *cx, 1.811 + UChar32 cp, uint32_t *pValue, 1.812 + UBool useFallback) { 1.813 + uint32_t value; 1.814 + int32_t match; 1.815 + 1.816 + /* try to match */ 1.817 + match=ucnv_extMatchFromU(cx, 1.818 + cp, 1.819 + NULL, 0, 1.820 + NULL, 0, 1.821 + &value, 1.822 + useFallback, TRUE); 1.823 + if(match>=2) { 1.824 + /* write result for simple, single-character conversion */ 1.825 + int32_t length; 1.826 + int isRoundtrip; 1.827 + 1.828 + isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); 1.829 + length=UCNV_EXT_FROM_U_GET_LENGTH(value); 1.830 + value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 1.831 + 1.832 + if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 1.833 + *pValue=value; 1.834 + return isRoundtrip ? length : -length; 1.835 +#if 0 /* not currently used */ 1.836 + } else if(length==4) { 1.837 + /* de-serialize a 4-byte result */ 1.838 + const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 1.839 + *pValue= 1.840 + ((uint32_t)result[0]<<24)| 1.841 + ((uint32_t)result[1]<<16)| 1.842 + ((uint32_t)result[2]<<8)| 1.843 + result[3]; 1.844 + return isRoundtrip ? 4 : -4; 1.845 +#endif 1.846 + } 1.847 + } 1.848 + 1.849 + /* 1.850 + * return no match because 1.851 + * - match>1 && resultLength>4: result too long for simple conversion 1.852 + * - match==1: no match found, <subchar1> preferred 1.853 + * - match==0: no match found in the first place 1.854 + * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 1.855 + */ 1.856 + return 0; 1.857 +} 1.858 + 1.859 +/* 1.860 + * continue partial match with new input, requires cnv->preFromUFirstCP>=0 1.861 + * never called for simple, single-character conversion 1.862 + */ 1.863 +U_CFUNC void 1.864 +ucnv_extContinueMatchFromU(UConverter *cnv, 1.865 + UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, 1.866 + UErrorCode *pErrorCode) { 1.867 + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 1.868 + int32_t match; 1.869 + 1.870 + match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, 1.871 + cnv->preFromUFirstCP, 1.872 + cnv->preFromU, cnv->preFromULength, 1.873 + pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 1.874 + &value, 1.875 + cnv->useFallback, pArgs->flush); 1.876 + if(match>=2) { 1.877 + match-=2; /* remove 2 for the initial code point */ 1.878 + 1.879 + if(match>=cnv->preFromULength) { 1.880 + /* advance src pointer for the consumed input */ 1.881 + pArgs->source+=match-cnv->preFromULength; 1.882 + cnv->preFromULength=0; 1.883 + } else { 1.884 + /* the match did not use all of preFromU[] - keep the rest for replay */ 1.885 + int32_t length=cnv->preFromULength-match; 1.886 + uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); 1.887 + cnv->preFromULength=(int8_t)-length; 1.888 + } 1.889 + 1.890 + /* finish the partial match */ 1.891 + cnv->preFromUFirstCP=U_SENTINEL; 1.892 + 1.893 + /* write result */ 1.894 + ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, 1.895 + value, 1.896 + &pArgs->target, pArgs->targetLimit, 1.897 + &pArgs->offsets, srcIndex, 1.898 + pErrorCode); 1.899 + } else if(match<0) { 1.900 + /* save state for partial match */ 1.901 + const UChar *s; 1.902 + int32_t j; 1.903 + 1.904 + /* just _append_ the newly consumed input to preFromU[] */ 1.905 + s=pArgs->source; 1.906 + match=-match-2; /* remove 2 for the initial code point */ 1.907 + for(j=cnv->preFromULength; j<match; ++j) { 1.908 + U_ASSERT(j>=0); 1.909 + cnv->preFromU[j]=*s++; 1.910 + } 1.911 + pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 1.912 + cnv->preFromULength=(int8_t)match; 1.913 + } else /* match==0 or 1 */ { 1.914 + /* 1.915 + * no match 1.916 + * 1.917 + * We need to split the previous input into two parts: 1.918 + * 1.919 + * 1. The first code point is unmappable - that's how we got into 1.920 + * trying the extension data in the first place. 1.921 + * We need to move it from the preFromU buffer 1.922 + * to the error buffer, set an error code, 1.923 + * and prepare the rest of the previous input for 2. 1.924 + * 1.925 + * 2. The rest of the previous input must be converted once we 1.926 + * come back from the callback for the first code point. 1.927 + * At that time, we have to try again from scratch to convert 1.928 + * these input characters. 1.929 + * The replay will be handled by the ucnv.c conversion code. 1.930 + */ 1.931 + 1.932 + if(match==1) { 1.933 + /* matched, no mapping but request for <subchar1> */ 1.934 + cnv->useSubChar1=TRUE; 1.935 + } 1.936 + 1.937 + /* move the first code point to the error field */ 1.938 + cnv->fromUChar32=cnv->preFromUFirstCP; 1.939 + cnv->preFromUFirstCP=U_SENTINEL; 1.940 + 1.941 + /* mark preFromU for replay */ 1.942 + cnv->preFromULength=-cnv->preFromULength; 1.943 + 1.944 + /* set the error code for unassigned */ 1.945 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.946 + } 1.947 +} 1.948 + 1.949 +static UBool 1.950 +extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { 1.951 + if(which==UCNV_ROUNDTRIP_SET) { 1.952 + // Add only code points for which the roundtrip flag is set. 1.953 + // Do not add any fallbacks, even if ucnv_fromUnicode() would use them 1.954 + // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). 1.955 + // 1.956 + // By analogy, also do not add "good one-way" mappings. 1.957 + // 1.958 + // Do not add entries with reserved bits set. 1.959 + if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= 1.960 + UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { 1.961 + return FALSE; 1.962 + } 1.963 + } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { 1.964 + // Do not add entries with reserved bits set. 1.965 + if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { 1.966 + return FALSE; 1.967 + } 1.968 + } 1.969 + // Do not add <subchar1> entries or other (future?) pseudo-entries 1.970 + // with an output length of 0. 1.971 + return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; 1.972 +} 1.973 + 1.974 +static void 1.975 +ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, 1.976 + const int32_t *cx, 1.977 + const USetAdder *sa, 1.978 + UConverterUnicodeSet which, 1.979 + int32_t minLength, 1.980 + UChar32 firstCP, 1.981 + UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, 1.982 + int32_t sectionIndex, 1.983 + UErrorCode *pErrorCode) { 1.984 + const UChar *fromUSectionUChars; 1.985 + const uint32_t *fromUSectionValues; 1.986 + 1.987 + uint32_t value; 1.988 + int32_t i, count; 1.989 + 1.990 + fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; 1.991 + fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; 1.992 + 1.993 + /* read first pair of the section */ 1.994 + count=*fromUSectionUChars++; 1.995 + value=*fromUSectionValues++; 1.996 + 1.997 + if(extSetUseMapping(which, minLength, value)) { 1.998 + if(length==U16_LENGTH(firstCP)) { 1.999 + /* add the initial code point */ 1.1000 + sa->add(sa->set, firstCP); 1.1001 + } else { 1.1002 + /* add the string so far */ 1.1003 + sa->addString(sa->set, s, length); 1.1004 + } 1.1005 + } 1.1006 + 1.1007 + for(i=0; i<count; ++i) { 1.1008 + /* append this code unit and recurse or add the string */ 1.1009 + s[length]=fromUSectionUChars[i]; 1.1010 + value=fromUSectionValues[i]; 1.1011 + 1.1012 + if(value==0) { 1.1013 + /* no mapping, do nothing */ 1.1014 + } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1.1015 + ucnv_extGetUnicodeSetString( 1.1016 + sharedData, cx, sa, which, minLength, 1.1017 + firstCP, s, length+1, 1.1018 + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1.1019 + pErrorCode); 1.1020 + } else if(extSetUseMapping(which, minLength, value)) { 1.1021 + sa->addString(sa->set, s, length+1); 1.1022 + } 1.1023 + } 1.1024 +} 1.1025 + 1.1026 +U_CFUNC void 1.1027 +ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, 1.1028 + const USetAdder *sa, 1.1029 + UConverterUnicodeSet which, 1.1030 + UConverterSetFilter filter, 1.1031 + UErrorCode *pErrorCode) { 1.1032 + const int32_t *cx; 1.1033 + const uint16_t *stage12, *stage3, *ps2, *ps3; 1.1034 + const uint32_t *stage3b; 1.1035 + 1.1036 + uint32_t value; 1.1037 + int32_t st1, stage1Length, st2, st3, minLength; 1.1038 + 1.1039 + UChar s[UCNV_EXT_MAX_UCHARS]; 1.1040 + UChar32 c; 1.1041 + int32_t length; 1.1042 + 1.1043 + cx=sharedData->mbcs.extIndexes; 1.1044 + if(cx==NULL) { 1.1045 + return; 1.1046 + } 1.1047 + 1.1048 + stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 1.1049 + stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 1.1050 + stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 1.1051 + 1.1052 + stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; 1.1053 + 1.1054 + /* enumerate the from-Unicode trie table */ 1.1055 + c=0; /* keep track of the current code point while enumerating */ 1.1056 + 1.1057 + if(filter==UCNV_SET_FILTER_2022_CN) { 1.1058 + minLength=3; 1.1059 + } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || 1.1060 + filter!=UCNV_SET_FILTER_NONE 1.1061 + ) { 1.1062 + /* DBCS-only, ignore single-byte results */ 1.1063 + minLength=2; 1.1064 + } else { 1.1065 + minLength=1; 1.1066 + } 1.1067 + 1.1068 + /* 1.1069 + * the trie enumeration is almost the same as 1.1070 + * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 1.1071 + */ 1.1072 + for(st1=0; st1<stage1Length; ++st1) { 1.1073 + st2=stage12[st1]; 1.1074 + if(st2>stage1Length) { 1.1075 + ps2=stage12+st2; 1.1076 + for(st2=0; st2<64; ++st2) { 1.1077 + if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { 1.1078 + /* read the stage 3 block */ 1.1079 + ps3=stage3+st3; 1.1080 + 1.1081 + do { 1.1082 + value=stage3b[*ps3++]; 1.1083 + if(value==0) { 1.1084 + /* no mapping, do nothing */ 1.1085 + } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1.1086 + // Recurse for partial results. 1.1087 + length=0; 1.1088 + U16_APPEND_UNSAFE(s, length, c); 1.1089 + ucnv_extGetUnicodeSetString( 1.1090 + sharedData, cx, sa, which, minLength, 1.1091 + c, s, length, 1.1092 + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1.1093 + pErrorCode); 1.1094 + } else if(extSetUseMapping(which, minLength, value)) { 1.1095 + switch(filter) { 1.1096 + case UCNV_SET_FILTER_2022_CN: 1.1097 + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { 1.1098 + continue; 1.1099 + } 1.1100 + break; 1.1101 + case UCNV_SET_FILTER_SJIS: 1.1102 + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { 1.1103 + continue; 1.1104 + } 1.1105 + break; 1.1106 + case UCNV_SET_FILTER_GR94DBCS: 1.1107 + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1.1108 + (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && 1.1109 + (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1.1110 + continue; 1.1111 + } 1.1112 + break; 1.1113 + case UCNV_SET_FILTER_HZ: 1.1114 + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1.1115 + (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && 1.1116 + (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1.1117 + continue; 1.1118 + } 1.1119 + break; 1.1120 + default: 1.1121 + /* 1.1122 + * UCNV_SET_FILTER_NONE, 1.1123 + * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength 1.1124 + */ 1.1125 + break; 1.1126 + } 1.1127 + sa->add(sa->set, c); 1.1128 + } 1.1129 + } while((++c&0xf)!=0); 1.1130 + } else { 1.1131 + c+=16; /* empty stage 3 block */ 1.1132 + } 1.1133 + } 1.1134 + } else { 1.1135 + c+=1024; /* empty stage 2 block */ 1.1136 + } 1.1137 + } 1.1138 +} 1.1139 + 1.1140 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */