1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/utrie2.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,736 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 2001-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: utrie2.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2008aug16 (starting from a copy of utrie.c) 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* This is a common implementation of a Unicode trie. 1.20 +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with 1.21 +* Unicode code points (0..0x10ffff). 1.22 +* This is the second common version of a Unicode trie (hence the name UTrie2). 1.23 +* See utrie2.h for a comparison. 1.24 +* 1.25 +* This file contains only the runtime and enumeration code, for read-only access. 1.26 +* See utrie2_builder.c for the builder code. 1.27 +*/ 1.28 +#ifdef UTRIE2_DEBUG 1.29 +# include <stdio.h> 1.30 +#endif 1.31 + 1.32 +#include "unicode/utypes.h" 1.33 +#include "unicode/utf.h" 1.34 +#include "unicode/utf8.h" 1.35 +#include "unicode/utf16.h" 1.36 +#include "cmemory.h" 1.37 +#include "utrie2.h" 1.38 +#include "utrie2_impl.h" 1.39 +#include "uassert.h" 1.40 + 1.41 +/* Public UTrie2 API implementation ----------------------------------------- */ 1.42 + 1.43 +static uint32_t 1.44 +get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { 1.45 + int32_t i2, block; 1.46 + 1.47 + if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { 1.48 + return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; 1.49 + } 1.50 + 1.51 + if(U_IS_LEAD(c) && fromLSCP) { 1.52 + i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 1.53 + (c>>UTRIE2_SHIFT_2); 1.54 + } else { 1.55 + i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 1.56 + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 1.57 + } 1.58 + block=trie->index2[i2]; 1.59 + return trie->data[block+(c&UTRIE2_DATA_MASK)]; 1.60 +} 1.61 + 1.62 +U_CAPI uint32_t U_EXPORT2 1.63 +utrie2_get32(const UTrie2 *trie, UChar32 c) { 1.64 + if(trie->data16!=NULL) { 1.65 + return UTRIE2_GET16(trie, c); 1.66 + } else if(trie->data32!=NULL) { 1.67 + return UTRIE2_GET32(trie, c); 1.68 + } else if((uint32_t)c>0x10ffff) { 1.69 + return trie->errorValue; 1.70 + } else { 1.71 + return get32(trie->newTrie, c, TRUE); 1.72 + } 1.73 +} 1.74 + 1.75 +U_CAPI uint32_t U_EXPORT2 1.76 +utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { 1.77 + if(!U_IS_LEAD(c)) { 1.78 + return trie->errorValue; 1.79 + } 1.80 + if(trie->data16!=NULL) { 1.81 + return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); 1.82 + } else if(trie->data32!=NULL) { 1.83 + return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); 1.84 + } else { 1.85 + return get32(trie->newTrie, c, FALSE); 1.86 + } 1.87 +} 1.88 + 1.89 +static inline int32_t 1.90 +u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { 1.91 + int32_t idx= 1.92 + _UTRIE2_INDEX_FROM_CP( 1.93 + trie, 1.94 + trie->data32==NULL ? trie->indexLength : 0, 1.95 + c); 1.96 + return (idx<<3)|i; 1.97 +} 1.98 + 1.99 +U_CAPI int32_t U_EXPORT2 1.100 +utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, 1.101 + const uint8_t *src, const uint8_t *limit) { 1.102 + int32_t i, length; 1.103 + i=0; 1.104 + /* support 64-bit pointers by avoiding cast of arbitrary difference */ 1.105 + if((limit-src)<=7) { 1.106 + length=(int32_t)(limit-src); 1.107 + } else { 1.108 + length=7; 1.109 + } 1.110 + c=utf8_nextCharSafeBody(src, &i, length, c, -1); 1.111 + return u8Index(trie, c, i); 1.112 +} 1.113 + 1.114 +U_CAPI int32_t U_EXPORT2 1.115 +utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, 1.116 + const uint8_t *start, const uint8_t *src) { 1.117 + int32_t i, length; 1.118 + /* support 64-bit pointers by avoiding cast of arbitrary difference */ 1.119 + if((src-start)<=7) { 1.120 + i=length=(int32_t)(src-start); 1.121 + } else { 1.122 + i=length=7; 1.123 + start=src-7; 1.124 + } 1.125 + c=utf8_prevCharSafeBody(start, 0, &i, c, -1); 1.126 + i=length-i; /* number of bytes read backward from src */ 1.127 + return u8Index(trie, c, i); 1.128 +} 1.129 + 1.130 +U_CAPI UTrie2 * U_EXPORT2 1.131 +utrie2_openFromSerialized(UTrie2ValueBits valueBits, 1.132 + const void *data, int32_t length, int32_t *pActualLength, 1.133 + UErrorCode *pErrorCode) { 1.134 + const UTrie2Header *header; 1.135 + const uint16_t *p16; 1.136 + int32_t actualLength; 1.137 + 1.138 + UTrie2 tempTrie; 1.139 + UTrie2 *trie; 1.140 + 1.141 + if(U_FAILURE(*pErrorCode)) { 1.142 + return 0; 1.143 + } 1.144 + 1.145 + if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || 1.146 + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 1.147 + ) { 1.148 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.149 + return 0; 1.150 + } 1.151 + 1.152 + /* enough data for a trie header? */ 1.153 + if(length<(int32_t)sizeof(UTrie2Header)) { 1.154 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.155 + return 0; 1.156 + } 1.157 + 1.158 + /* check the signature */ 1.159 + header=(const UTrie2Header *)data; 1.160 + if(header->signature!=UTRIE2_SIG) { 1.161 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.162 + return 0; 1.163 + } 1.164 + 1.165 + /* get the options */ 1.166 + if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { 1.167 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.168 + return 0; 1.169 + } 1.170 + 1.171 + /* get the length values and offsets */ 1.172 + uprv_memset(&tempTrie, 0, sizeof(tempTrie)); 1.173 + tempTrie.indexLength=header->indexLength; 1.174 + tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; 1.175 + tempTrie.index2NullOffset=header->index2NullOffset; 1.176 + tempTrie.dataNullOffset=header->dataNullOffset; 1.177 + 1.178 + tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; 1.179 + tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; 1.180 + if(valueBits==UTRIE2_16_VALUE_BITS) { 1.181 + tempTrie.highValueIndex+=tempTrie.indexLength; 1.182 + } 1.183 + 1.184 + /* calculate the actual length */ 1.185 + actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; 1.186 + if(valueBits==UTRIE2_16_VALUE_BITS) { 1.187 + actualLength+=tempTrie.dataLength*2; 1.188 + } else { 1.189 + actualLength+=tempTrie.dataLength*4; 1.190 + } 1.191 + if(length<actualLength) { 1.192 + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ 1.193 + return 0; 1.194 + } 1.195 + 1.196 + /* allocate the trie */ 1.197 + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 1.198 + if(trie==NULL) { 1.199 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.200 + return 0; 1.201 + } 1.202 + uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); 1.203 + trie->memory=(uint32_t *)data; 1.204 + trie->length=actualLength; 1.205 + trie->isMemoryOwned=FALSE; 1.206 + 1.207 + /* set the pointers to its index and data arrays */ 1.208 + p16=(const uint16_t *)(header+1); 1.209 + trie->index=p16; 1.210 + p16+=trie->indexLength; 1.211 + 1.212 + /* get the data */ 1.213 + switch(valueBits) { 1.214 + case UTRIE2_16_VALUE_BITS: 1.215 + trie->data16=p16; 1.216 + trie->data32=NULL; 1.217 + trie->initialValue=trie->index[trie->dataNullOffset]; 1.218 + trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; 1.219 + break; 1.220 + case UTRIE2_32_VALUE_BITS: 1.221 + trie->data16=NULL; 1.222 + trie->data32=(const uint32_t *)p16; 1.223 + trie->initialValue=trie->data32[trie->dataNullOffset]; 1.224 + trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; 1.225 + break; 1.226 + default: 1.227 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.228 + return 0; 1.229 + } 1.230 + 1.231 + if(pActualLength!=NULL) { 1.232 + *pActualLength=actualLength; 1.233 + } 1.234 + return trie; 1.235 +} 1.236 + 1.237 +U_CAPI UTrie2 * U_EXPORT2 1.238 +utrie2_openDummy(UTrie2ValueBits valueBits, 1.239 + uint32_t initialValue, uint32_t errorValue, 1.240 + UErrorCode *pErrorCode) { 1.241 + UTrie2 *trie; 1.242 + UTrie2Header *header; 1.243 + uint32_t *p; 1.244 + uint16_t *dest16; 1.245 + int32_t indexLength, dataLength, length, i; 1.246 + int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 1.247 + 1.248 + if(U_FAILURE(*pErrorCode)) { 1.249 + return 0; 1.250 + } 1.251 + 1.252 + if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { 1.253 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.254 + return 0; 1.255 + } 1.256 + 1.257 + /* calculate the total length of the dummy trie data */ 1.258 + indexLength=UTRIE2_INDEX_1_OFFSET; 1.259 + dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; 1.260 + length=(int32_t)sizeof(UTrie2Header)+indexLength*2; 1.261 + if(valueBits==UTRIE2_16_VALUE_BITS) { 1.262 + length+=dataLength*2; 1.263 + } else { 1.264 + length+=dataLength*4; 1.265 + } 1.266 + 1.267 + /* allocate the trie */ 1.268 + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 1.269 + if(trie==NULL) { 1.270 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.271 + return 0; 1.272 + } 1.273 + uprv_memset(trie, 0, sizeof(UTrie2)); 1.274 + trie->memory=uprv_malloc(length); 1.275 + if(trie->memory==NULL) { 1.276 + uprv_free(trie); 1.277 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.278 + return 0; 1.279 + } 1.280 + trie->length=length; 1.281 + trie->isMemoryOwned=TRUE; 1.282 + 1.283 + /* set the UTrie2 fields */ 1.284 + if(valueBits==UTRIE2_16_VALUE_BITS) { 1.285 + dataMove=indexLength; 1.286 + } else { 1.287 + dataMove=0; 1.288 + } 1.289 + 1.290 + trie->indexLength=indexLength; 1.291 + trie->dataLength=dataLength; 1.292 + trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; 1.293 + trie->dataNullOffset=(uint16_t)dataMove; 1.294 + trie->initialValue=initialValue; 1.295 + trie->errorValue=errorValue; 1.296 + trie->highStart=0; 1.297 + trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; 1.298 + 1.299 + /* set the header fields */ 1.300 + header=(UTrie2Header *)trie->memory; 1.301 + 1.302 + header->signature=UTRIE2_SIG; /* "Tri2" */ 1.303 + header->options=(uint16_t)valueBits; 1.304 + 1.305 + header->indexLength=(uint16_t)indexLength; 1.306 + header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); 1.307 + header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; 1.308 + header->dataNullOffset=(uint16_t)dataMove; 1.309 + header->shiftedHighStart=0; 1.310 + 1.311 + /* fill the index and data arrays */ 1.312 + dest16=(uint16_t *)(header+1); 1.313 + trie->index=dest16; 1.314 + 1.315 + /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ 1.316 + for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 1.317 + *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ 1.318 + } 1.319 + 1.320 + /* write UTF-8 2-byte index-2 values, not right-shifted */ 1.321 + for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 1.322 + *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 1.323 + } 1.324 + for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 1.325 + *dest16++=(uint16_t)dataMove; 1.326 + } 1.327 + 1.328 + /* write the 16/32-bit data array */ 1.329 + switch(valueBits) { 1.330 + case UTRIE2_16_VALUE_BITS: 1.331 + /* write 16-bit data values */ 1.332 + trie->data16=dest16; 1.333 + trie->data32=NULL; 1.334 + for(i=0; i<0x80; ++i) { 1.335 + *dest16++=(uint16_t)initialValue; 1.336 + } 1.337 + for(; i<0xc0; ++i) { 1.338 + *dest16++=(uint16_t)errorValue; 1.339 + } 1.340 + /* highValue and reserved values */ 1.341 + for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 1.342 + *dest16++=(uint16_t)initialValue; 1.343 + } 1.344 + break; 1.345 + case UTRIE2_32_VALUE_BITS: 1.346 + /* write 32-bit data values */ 1.347 + p=(uint32_t *)dest16; 1.348 + trie->data16=NULL; 1.349 + trie->data32=p; 1.350 + for(i=0; i<0x80; ++i) { 1.351 + *p++=initialValue; 1.352 + } 1.353 + for(; i<0xc0; ++i) { 1.354 + *p++=errorValue; 1.355 + } 1.356 + /* highValue and reserved values */ 1.357 + for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 1.358 + *p++=initialValue; 1.359 + } 1.360 + break; 1.361 + default: 1.362 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.363 + return 0; 1.364 + } 1.365 + 1.366 + return trie; 1.367 +} 1.368 + 1.369 +U_CAPI void U_EXPORT2 1.370 +utrie2_close(UTrie2 *trie) { 1.371 + if(trie!=NULL) { 1.372 + if(trie->isMemoryOwned) { 1.373 + uprv_free(trie->memory); 1.374 + } 1.375 + if(trie->newTrie!=NULL) { 1.376 + uprv_free(trie->newTrie->data); 1.377 + uprv_free(trie->newTrie); 1.378 + } 1.379 + uprv_free(trie); 1.380 + } 1.381 +} 1.382 + 1.383 +U_CAPI int32_t U_EXPORT2 1.384 +utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { 1.385 + uint32_t signature; 1.386 + if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { 1.387 + return 0; 1.388 + } 1.389 + signature=*(const uint32_t *)data; 1.390 + if(signature==UTRIE2_SIG) { 1.391 + return 2; 1.392 + } 1.393 + if(anyEndianOk && signature==UTRIE2_OE_SIG) { 1.394 + return 2; 1.395 + } 1.396 + if(signature==UTRIE_SIG) { 1.397 + return 1; 1.398 + } 1.399 + if(anyEndianOk && signature==UTRIE_OE_SIG) { 1.400 + return 1; 1.401 + } 1.402 + return 0; 1.403 +} 1.404 + 1.405 +U_CAPI int32_t U_EXPORT2 1.406 +utrie2_swap(const UDataSwapper *ds, 1.407 + const void *inData, int32_t length, void *outData, 1.408 + UErrorCode *pErrorCode) { 1.409 + const UTrie2Header *inTrie; 1.410 + UTrie2Header trie; 1.411 + int32_t dataLength, size; 1.412 + UTrie2ValueBits valueBits; 1.413 + 1.414 + if(U_FAILURE(*pErrorCode)) { 1.415 + return 0; 1.416 + } 1.417 + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 1.418 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.419 + return 0; 1.420 + } 1.421 + 1.422 + /* setup and swapping */ 1.423 + if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { 1.424 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.425 + return 0; 1.426 + } 1.427 + 1.428 + inTrie=(const UTrie2Header *)inData; 1.429 + trie.signature=ds->readUInt32(inTrie->signature); 1.430 + trie.options=ds->readUInt16(inTrie->options); 1.431 + trie.indexLength=ds->readUInt16(inTrie->indexLength); 1.432 + trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); 1.433 + 1.434 + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); 1.435 + dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; 1.436 + 1.437 + if( trie.signature!=UTRIE2_SIG || 1.438 + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || 1.439 + trie.indexLength<UTRIE2_INDEX_1_OFFSET || 1.440 + dataLength<UTRIE2_DATA_START_OFFSET 1.441 + ) { 1.442 + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 1.443 + return 0; 1.444 + } 1.445 + 1.446 + size=sizeof(UTrie2Header)+trie.indexLength*2; 1.447 + switch(valueBits) { 1.448 + case UTRIE2_16_VALUE_BITS: 1.449 + size+=dataLength*2; 1.450 + break; 1.451 + case UTRIE2_32_VALUE_BITS: 1.452 + size+=dataLength*4; 1.453 + break; 1.454 + default: 1.455 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.456 + return 0; 1.457 + } 1.458 + 1.459 + if(length>=0) { 1.460 + UTrie2Header *outTrie; 1.461 + 1.462 + if(length<size) { 1.463 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.464 + return 0; 1.465 + } 1.466 + 1.467 + outTrie=(UTrie2Header *)outData; 1.468 + 1.469 + /* swap the header */ 1.470 + ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 1.471 + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 1.472 + 1.473 + /* swap the index and the data */ 1.474 + switch(valueBits) { 1.475 + case UTRIE2_16_VALUE_BITS: 1.476 + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); 1.477 + break; 1.478 + case UTRIE2_32_VALUE_BITS: 1.479 + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 1.480 + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, 1.481 + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 1.482 + break; 1.483 + default: 1.484 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.485 + return 0; 1.486 + } 1.487 + } 1.488 + 1.489 + return size; 1.490 +} 1.491 + 1.492 +// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c 1.493 +// to avoid a dependency from utrie2.cpp on utrie.c. 1.494 + 1.495 +/* enumeration -------------------------------------------------------------- */ 1.496 + 1.497 +#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) 1.498 + 1.499 +/* default UTrie2EnumValue() returns the input value itself */ 1.500 +static uint32_t U_CALLCONV 1.501 +enumSameValue(const void * /*context*/, uint32_t value) { 1.502 + return value; 1.503 +} 1.504 + 1.505 +/** 1.506 + * Enumerate all ranges of code points with the same relevant values. 1.507 + * The values are transformed from the raw trie entries by the enumValue function. 1.508 + * 1.509 + * Currently requires start<limit and both start and limit must be multiples 1.510 + * of UTRIE2_DATA_BLOCK_LENGTH. 1.511 + * 1.512 + * Optimizations: 1.513 + * - Skip a whole block if we know that it is filled with a single value, 1.514 + * and it is the same as we visited just before. 1.515 + * - Handle the null block specially because we know a priori that it is filled 1.516 + * with a single value. 1.517 + */ 1.518 +static void 1.519 +enumEitherTrie(const UTrie2 *trie, 1.520 + UChar32 start, UChar32 limit, 1.521 + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 1.522 + const uint32_t *data32; 1.523 + const uint16_t *idx; 1.524 + 1.525 + uint32_t value, prevValue, initialValue; 1.526 + UChar32 c, prev, highStart; 1.527 + int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 1.528 + 1.529 + if(enumRange==NULL) { 1.530 + return; 1.531 + } 1.532 + if(enumValue==NULL) { 1.533 + enumValue=enumSameValue; 1.534 + } 1.535 + 1.536 + if(trie->newTrie==NULL) { 1.537 + /* frozen trie */ 1.538 + idx=trie->index; 1.539 + U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ 1.540 + data32=trie->data32; 1.541 + 1.542 + index2NullOffset=trie->index2NullOffset; 1.543 + nullBlock=trie->dataNullOffset; 1.544 + } else { 1.545 + /* unfrozen, mutable trie */ 1.546 + idx=NULL; 1.547 + data32=trie->newTrie->data; 1.548 + U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ 1.549 + 1.550 + index2NullOffset=trie->newTrie->index2NullOffset; 1.551 + nullBlock=trie->newTrie->dataNullOffset; 1.552 + } 1.553 + 1.554 + highStart=trie->highStart; 1.555 + 1.556 + /* get the enumeration value that corresponds to an initial-value trie data entry */ 1.557 + initialValue=enumValue(context, trie->initialValue); 1.558 + 1.559 + /* set variables for previous range */ 1.560 + prevI2Block=-1; 1.561 + prevBlock=-1; 1.562 + prev=start; 1.563 + prevValue=0; 1.564 + 1.565 + /* enumerate index-2 blocks */ 1.566 + for(c=start; c<limit && c<highStart;) { 1.567 + /* Code point limit for iterating inside this i2Block. */ 1.568 + UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; 1.569 + if(limit<tempLimit) { 1.570 + tempLimit=limit; 1.571 + } 1.572 + if(c<=0xffff) { 1.573 + if(!U_IS_SURROGATE(c)) { 1.574 + i2Block=c>>UTRIE2_SHIFT_2; 1.575 + } else if(U_IS_SURROGATE_LEAD(c)) { 1.576 + /* 1.577 + * Enumerate values for lead surrogate code points, not code units: 1.578 + * This special block has half the normal length. 1.579 + */ 1.580 + i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; 1.581 + tempLimit=MIN_VALUE(0xdc00, limit); 1.582 + } else { 1.583 + /* 1.584 + * Switch back to the normal part of the index-2 table. 1.585 + * Enumerate the second half of the surrogates block. 1.586 + */ 1.587 + i2Block=0xd800>>UTRIE2_SHIFT_2; 1.588 + tempLimit=MIN_VALUE(0xe000, limit); 1.589 + } 1.590 + } else { 1.591 + /* supplementary code points */ 1.592 + if(idx!=NULL) { 1.593 + i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ 1.594 + (c>>UTRIE2_SHIFT_1)]; 1.595 + } else { 1.596 + i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; 1.597 + } 1.598 + if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { 1.599 + /* 1.600 + * The index-2 block is the same as the previous one, and filled with prevValue. 1.601 + * Only possible for supplementary code points because the linear-BMP index-2 1.602 + * table creates unique i2Block values. 1.603 + */ 1.604 + c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 1.605 + continue; 1.606 + } 1.607 + } 1.608 + prevI2Block=i2Block; 1.609 + if(i2Block==index2NullOffset) { 1.610 + /* this is the null index-2 block */ 1.611 + if(prevValue!=initialValue) { 1.612 + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 1.613 + return; 1.614 + } 1.615 + prevBlock=nullBlock; 1.616 + prev=c; 1.617 + prevValue=initialValue; 1.618 + } 1.619 + c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 1.620 + } else { 1.621 + /* enumerate data blocks for one index-2 block */ 1.622 + int32_t i2, i2Limit; 1.623 + i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 1.624 + if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { 1.625 + i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 1.626 + } else { 1.627 + i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; 1.628 + } 1.629 + for(; i2<i2Limit; ++i2) { 1.630 + if(idx!=NULL) { 1.631 + block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; 1.632 + } else { 1.633 + block=trie->newTrie->index2[i2Block+i2]; 1.634 + } 1.635 + if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { 1.636 + /* the block is the same as the previous one, and filled with prevValue */ 1.637 + c+=UTRIE2_DATA_BLOCK_LENGTH; 1.638 + continue; 1.639 + } 1.640 + prevBlock=block; 1.641 + if(block==nullBlock) { 1.642 + /* this is the null data block */ 1.643 + if(prevValue!=initialValue) { 1.644 + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 1.645 + return; 1.646 + } 1.647 + prev=c; 1.648 + prevValue=initialValue; 1.649 + } 1.650 + c+=UTRIE2_DATA_BLOCK_LENGTH; 1.651 + } else { 1.652 + for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { 1.653 + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 1.654 + if(value!=prevValue) { 1.655 + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 1.656 + return; 1.657 + } 1.658 + prev=c; 1.659 + prevValue=value; 1.660 + } 1.661 + ++c; 1.662 + } 1.663 + } 1.664 + } 1.665 + } 1.666 + } 1.667 + 1.668 + if(c>limit) { 1.669 + c=limit; /* could be higher if in the index2NullOffset */ 1.670 + } else if(c<limit) { 1.671 + /* c==highStart<limit */ 1.672 + uint32_t highValue; 1.673 + if(idx!=NULL) { 1.674 + highValue= 1.675 + data32!=NULL ? 1.676 + data32[trie->highValueIndex] : 1.677 + idx[trie->highValueIndex]; 1.678 + } else { 1.679 + highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; 1.680 + } 1.681 + value=enumValue(context, highValue); 1.682 + if(value!=prevValue) { 1.683 + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 1.684 + return; 1.685 + } 1.686 + prev=c; 1.687 + prevValue=value; 1.688 + } 1.689 + c=limit; 1.690 + } 1.691 + 1.692 + /* deliver last range */ 1.693 + enumRange(context, prev, c-1, prevValue); 1.694 +} 1.695 + 1.696 +U_CAPI void U_EXPORT2 1.697 +utrie2_enum(const UTrie2 *trie, 1.698 + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 1.699 + enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); 1.700 +} 1.701 + 1.702 +U_CAPI void U_EXPORT2 1.703 +utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, 1.704 + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, 1.705 + const void *context) { 1.706 + if(!U16_IS_LEAD(lead)) { 1.707 + return; 1.708 + } 1.709 + lead=(lead-0xd7c0)<<10; /* start code point */ 1.710 + enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); 1.711 +} 1.712 + 1.713 +/* C++ convenience wrappers ------------------------------------------------- */ 1.714 + 1.715 +U_NAMESPACE_BEGIN 1.716 + 1.717 +uint16_t BackwardUTrie2StringIterator::previous16() { 1.718 + codePointLimit=codePointStart; 1.719 + if(start>=codePointStart) { 1.720 + codePoint=U_SENTINEL; 1.721 + return 0; 1.722 + } 1.723 + uint16_t result; 1.724 + UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); 1.725 + return result; 1.726 +} 1.727 + 1.728 +uint16_t ForwardUTrie2StringIterator::next16() { 1.729 + codePointStart=codePointLimit; 1.730 + if(codePointLimit==limit) { 1.731 + codePoint=U_SENTINEL; 1.732 + return 0; 1.733 + } 1.734 + uint16_t result; 1.735 + UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); 1.736 + return result; 1.737 +} 1.738 + 1.739 +U_NAMESPACE_END