1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/utrie.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1232 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 2001-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: utrie.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2001oct20 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* This is a common implementation of a "folded" trie. 1.20 +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with 1.21 +* Unicode code points (0..0x10ffff). 1.22 +*/ 1.23 + 1.24 +#ifdef UTRIE_DEBUG 1.25 +# include <stdio.h> 1.26 +#endif 1.27 + 1.28 +#include "unicode/utypes.h" 1.29 +#include "cmemory.h" 1.30 +#include "utrie.h" 1.31 + 1.32 +/* miscellaneous ------------------------------------------------------------ */ 1.33 + 1.34 +#undef ABS 1.35 +#define ABS(x) ((x)>=0 ? (x) : -(x)) 1.36 + 1.37 +static inline UBool 1.38 +equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { 1.39 + while(length>0 && *s==*t) { 1.40 + ++s; 1.41 + ++t; 1.42 + --length; 1.43 + } 1.44 + return (UBool)(length==0); 1.45 +} 1.46 + 1.47 +/* Building a trie ----------------------------------------------------------*/ 1.48 + 1.49 +U_CAPI UNewTrie * U_EXPORT2 1.50 +utrie_open(UNewTrie *fillIn, 1.51 + uint32_t *aliasData, int32_t maxDataLength, 1.52 + uint32_t initialValue, uint32_t leadUnitValue, 1.53 + UBool latin1Linear) { 1.54 + UNewTrie *trie; 1.55 + int32_t i, j; 1.56 + 1.57 + if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH || 1.58 + (latin1Linear && maxDataLength<1024) 1.59 + ) { 1.60 + return NULL; 1.61 + } 1.62 + 1.63 + if(fillIn!=NULL) { 1.64 + trie=fillIn; 1.65 + } else { 1.66 + trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie)); 1.67 + if(trie==NULL) { 1.68 + return NULL; 1.69 + } 1.70 + } 1.71 + uprv_memset(trie, 0, sizeof(UNewTrie)); 1.72 + trie->isAllocated= (UBool)(fillIn==NULL); 1.73 + 1.74 + if(aliasData!=NULL) { 1.75 + trie->data=aliasData; 1.76 + trie->isDataAllocated=FALSE; 1.77 + } else { 1.78 + trie->data=(uint32_t *)uprv_malloc(maxDataLength*4); 1.79 + if(trie->data==NULL) { 1.80 + uprv_free(trie); 1.81 + return NULL; 1.82 + } 1.83 + trie->isDataAllocated=TRUE; 1.84 + } 1.85 + 1.86 + /* preallocate and reset the first data block (block index 0) */ 1.87 + j=UTRIE_DATA_BLOCK_LENGTH; 1.88 + 1.89 + if(latin1Linear) { 1.90 + /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */ 1.91 + /* made sure above that maxDataLength>=1024 */ 1.92 + 1.93 + /* set indexes to point to consecutive data blocks */ 1.94 + i=0; 1.95 + do { 1.96 + /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */ 1.97 + trie->index[i++]=j; 1.98 + j+=UTRIE_DATA_BLOCK_LENGTH; 1.99 + } while(i<(256>>UTRIE_SHIFT)); 1.100 + } 1.101 + 1.102 + /* reset the initially allocated blocks to the initial value */ 1.103 + trie->dataLength=j; 1.104 + while(j>0) { 1.105 + trie->data[--j]=initialValue; 1.106 + } 1.107 + 1.108 + trie->leadUnitValue=leadUnitValue; 1.109 + trie->indexLength=UTRIE_MAX_INDEX_LENGTH; 1.110 + trie->dataCapacity=maxDataLength; 1.111 + trie->isLatin1Linear=latin1Linear; 1.112 + trie->isCompacted=FALSE; 1.113 + return trie; 1.114 +} 1.115 + 1.116 +U_CAPI UNewTrie * U_EXPORT2 1.117 +utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) { 1.118 + UNewTrie *trie; 1.119 + UBool isDataAllocated; 1.120 + 1.121 + /* do not clone if other is not valid or already compacted */ 1.122 + if(other==NULL || other->data==NULL || other->isCompacted) { 1.123 + return NULL; 1.124 + } 1.125 + 1.126 + /* clone data */ 1.127 + if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) { 1.128 + isDataAllocated=FALSE; 1.129 + } else { 1.130 + aliasDataCapacity=other->dataCapacity; 1.131 + aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4); 1.132 + if(aliasData==NULL) { 1.133 + return NULL; 1.134 + } 1.135 + isDataAllocated=TRUE; 1.136 + } 1.137 + 1.138 + trie=utrie_open(fillIn, aliasData, aliasDataCapacity, 1.139 + other->data[0], other->leadUnitValue, 1.140 + other->isLatin1Linear); 1.141 + if(trie==NULL) { 1.142 + uprv_free(aliasData); 1.143 + } else { 1.144 + uprv_memcpy(trie->index, other->index, sizeof(trie->index)); 1.145 + uprv_memcpy(trie->data, other->data, other->dataLength*4); 1.146 + trie->dataLength=other->dataLength; 1.147 + trie->isDataAllocated=isDataAllocated; 1.148 + } 1.149 + 1.150 + return trie; 1.151 +} 1.152 + 1.153 +U_CAPI void U_EXPORT2 1.154 +utrie_close(UNewTrie *trie) { 1.155 + if(trie!=NULL) { 1.156 + if(trie->isDataAllocated) { 1.157 + uprv_free(trie->data); 1.158 + trie->data=NULL; 1.159 + } 1.160 + if(trie->isAllocated) { 1.161 + uprv_free(trie); 1.162 + } 1.163 + } 1.164 +} 1.165 + 1.166 +U_CAPI uint32_t * U_EXPORT2 1.167 +utrie_getData(UNewTrie *trie, int32_t *pLength) { 1.168 + if(trie==NULL || pLength==NULL) { 1.169 + return NULL; 1.170 + } 1.171 + 1.172 + *pLength=trie->dataLength; 1.173 + return trie->data; 1.174 +} 1.175 + 1.176 +static int32_t 1.177 +utrie_allocDataBlock(UNewTrie *trie) { 1.178 + int32_t newBlock, newTop; 1.179 + 1.180 + newBlock=trie->dataLength; 1.181 + newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH; 1.182 + if(newTop>trie->dataCapacity) { 1.183 + /* out of memory in the data array */ 1.184 + return -1; 1.185 + } 1.186 + trie->dataLength=newTop; 1.187 + return newBlock; 1.188 +} 1.189 + 1.190 +/** 1.191 + * No error checking for illegal arguments. 1.192 + * 1.193 + * @return -1 if no new data block available (out of memory in data array) 1.194 + * @internal 1.195 + */ 1.196 +static int32_t 1.197 +utrie_getDataBlock(UNewTrie *trie, UChar32 c) { 1.198 + int32_t indexValue, newBlock; 1.199 + 1.200 + c>>=UTRIE_SHIFT; 1.201 + indexValue=trie->index[c]; 1.202 + if(indexValue>0) { 1.203 + return indexValue; 1.204 + } 1.205 + 1.206 + /* allocate a new data block */ 1.207 + newBlock=utrie_allocDataBlock(trie); 1.208 + if(newBlock<0) { 1.209 + /* out of memory in the data array */ 1.210 + return -1; 1.211 + } 1.212 + trie->index[c]=newBlock; 1.213 + 1.214 + /* copy-on-write for a block from a setRange() */ 1.215 + uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH); 1.216 + return newBlock; 1.217 +} 1.218 + 1.219 +/** 1.220 + * @return TRUE if the value was successfully set 1.221 + */ 1.222 +U_CAPI UBool U_EXPORT2 1.223 +utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) { 1.224 + int32_t block; 1.225 + 1.226 + /* valid, uncompacted trie and valid c? */ 1.227 + if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { 1.228 + return FALSE; 1.229 + } 1.230 + 1.231 + block=utrie_getDataBlock(trie, c); 1.232 + if(block<0) { 1.233 + return FALSE; 1.234 + } 1.235 + 1.236 + trie->data[block+(c&UTRIE_MASK)]=value; 1.237 + return TRUE; 1.238 +} 1.239 + 1.240 +U_CAPI uint32_t U_EXPORT2 1.241 +utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) { 1.242 + int32_t block; 1.243 + 1.244 + /* valid, uncompacted trie and valid c? */ 1.245 + if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { 1.246 + if(pInBlockZero!=NULL) { 1.247 + *pInBlockZero=TRUE; 1.248 + } 1.249 + return 0; 1.250 + } 1.251 + 1.252 + block=trie->index[c>>UTRIE_SHIFT]; 1.253 + if(pInBlockZero!=NULL) { 1.254 + *pInBlockZero= (UBool)(block==0); 1.255 + } 1.256 + 1.257 + return trie->data[ABS(block)+(c&UTRIE_MASK)]; 1.258 +} 1.259 + 1.260 +/** 1.261 + * @internal 1.262 + */ 1.263 +static void 1.264 +utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, 1.265 + uint32_t value, uint32_t initialValue, UBool overwrite) { 1.266 + uint32_t *pLimit; 1.267 + 1.268 + pLimit=block+limit; 1.269 + block+=start; 1.270 + if(overwrite) { 1.271 + while(block<pLimit) { 1.272 + *block++=value; 1.273 + } 1.274 + } else { 1.275 + while(block<pLimit) { 1.276 + if(*block==initialValue) { 1.277 + *block=value; 1.278 + } 1.279 + ++block; 1.280 + } 1.281 + } 1.282 +} 1.283 + 1.284 +U_CAPI UBool U_EXPORT2 1.285 +utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) { 1.286 + /* 1.287 + * repeat value in [start..limit[ 1.288 + * mark index values for repeat-data blocks by setting bit 31 of the index values 1.289 + * fill around existing values if any, if(overwrite) 1.290 + */ 1.291 + uint32_t initialValue; 1.292 + int32_t block, rest, repeatBlock; 1.293 + 1.294 + /* valid, uncompacted trie and valid indexes? */ 1.295 + if( trie==NULL || trie->isCompacted || 1.296 + (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit 1.297 + ) { 1.298 + return FALSE; 1.299 + } 1.300 + if(start==limit) { 1.301 + return TRUE; /* nothing to do */ 1.302 + } 1.303 + 1.304 + initialValue=trie->data[0]; 1.305 + if(start&UTRIE_MASK) { 1.306 + UChar32 nextStart; 1.307 + 1.308 + /* set partial block at [start..following block boundary[ */ 1.309 + block=utrie_getDataBlock(trie, start); 1.310 + if(block<0) { 1.311 + return FALSE; 1.312 + } 1.313 + 1.314 + nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK; 1.315 + if(nextStart<=limit) { 1.316 + utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, 1.317 + value, initialValue, overwrite); 1.318 + start=nextStart; 1.319 + } else { 1.320 + utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, 1.321 + value, initialValue, overwrite); 1.322 + return TRUE; 1.323 + } 1.324 + } 1.325 + 1.326 + /* number of positions in the last, partial block */ 1.327 + rest=limit&UTRIE_MASK; 1.328 + 1.329 + /* round down limit to a block boundary */ 1.330 + limit&=~UTRIE_MASK; 1.331 + 1.332 + /* iterate over all-value blocks */ 1.333 + if(value==initialValue) { 1.334 + repeatBlock=0; 1.335 + } else { 1.336 + repeatBlock=-1; 1.337 + } 1.338 + while(start<limit) { 1.339 + /* get index value */ 1.340 + block=trie->index[start>>UTRIE_SHIFT]; 1.341 + if(block>0) { 1.342 + /* already allocated, fill in value */ 1.343 + utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite); 1.344 + } else if(trie->data[-block]!=value && (block==0 || overwrite)) { 1.345 + /* set the repeatBlock instead of the current block 0 or range block */ 1.346 + if(repeatBlock>=0) { 1.347 + trie->index[start>>UTRIE_SHIFT]=-repeatBlock; 1.348 + } else { 1.349 + /* create and set and fill the repeatBlock */ 1.350 + repeatBlock=utrie_getDataBlock(trie, start); 1.351 + if(repeatBlock<0) { 1.352 + return FALSE; 1.353 + } 1.354 + 1.355 + /* set the negative block number to indicate that it is a repeat block */ 1.356 + trie->index[start>>UTRIE_SHIFT]=-repeatBlock; 1.357 + utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE); 1.358 + } 1.359 + } 1.360 + 1.361 + start+=UTRIE_DATA_BLOCK_LENGTH; 1.362 + } 1.363 + 1.364 + if(rest>0) { 1.365 + /* set partial block at [last block boundary..limit[ */ 1.366 + block=utrie_getDataBlock(trie, start); 1.367 + if(block<0) { 1.368 + return FALSE; 1.369 + } 1.370 + 1.371 + utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite); 1.372 + } 1.373 + 1.374 + return TRUE; 1.375 +} 1.376 + 1.377 +static int32_t 1.378 +_findSameIndexBlock(const int32_t *idx, int32_t indexLength, 1.379 + int32_t otherBlock) { 1.380 + int32_t block, i; 1.381 + 1.382 + for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) { 1.383 + for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) { 1.384 + if(idx[block+i]!=idx[otherBlock+i]) { 1.385 + break; 1.386 + } 1.387 + } 1.388 + if(i==UTRIE_SURROGATE_BLOCK_COUNT) { 1.389 + return block; 1.390 + } 1.391 + } 1.392 + return indexLength; 1.393 +} 1.394 + 1.395 +/* 1.396 + * Fold the normalization data for supplementary code points into 1.397 + * a compact area on top of the BMP-part of the trie index, 1.398 + * with the lead surrogates indexing this compact area. 1.399 + * 1.400 + * Duplicate the index values for lead surrogates: 1.401 + * From inside the BMP area, where some may be overridden with folded values, 1.402 + * to just after the BMP area, where they can be retrieved for 1.403 + * code point lookups. 1.404 + */ 1.405 +static void 1.406 +utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) { 1.407 + int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT]; 1.408 + int32_t *idx; 1.409 + uint32_t value; 1.410 + UChar32 c; 1.411 + int32_t indexLength, block; 1.412 +#ifdef UTRIE_DEBUG 1.413 + int countLeadCUWithData=0; 1.414 +#endif 1.415 + 1.416 + idx=trie->index; 1.417 + 1.418 + /* copy the lead surrogate indexes into a temporary array */ 1.419 + uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT); 1.420 + 1.421 + /* 1.422 + * set all values for lead surrogate code *units* to leadUnitValue 1.423 + * so that, by default, runtime lookups will find no data for associated 1.424 + * supplementary code points, unless there is data for such code points 1.425 + * which will result in a non-zero folding value below that is set for 1.426 + * the respective lead units 1.427 + * 1.428 + * the above saved the indexes for surrogate code *points* 1.429 + * fill the indexes with simplified code from utrie_setRange32() 1.430 + */ 1.431 + if(trie->leadUnitValue==trie->data[0]) { 1.432 + block=0; /* leadUnitValue==initialValue, use all-initial-value block */ 1.433 + } else { 1.434 + /* create and fill the repeatBlock */ 1.435 + block=utrie_allocDataBlock(trie); 1.436 + if(block<0) { 1.437 + /* data table overflow */ 1.438 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.439 + return; 1.440 + } 1.441 + utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE); 1.442 + block=-block; /* negative block number to indicate that it is a repeat block */ 1.443 + } 1.444 + for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) { 1.445 + trie->index[c]=block; 1.446 + } 1.447 + 1.448 + /* 1.449 + * Fold significant index values into the area just after the BMP indexes. 1.450 + * In case the first lead surrogate has significant data, 1.451 + * its index block must be used first (in which case the folding is a no-op). 1.452 + * Later all folded index blocks are moved up one to insert the copied 1.453 + * lead surrogate indexes. 1.454 + */ 1.455 + indexLength=UTRIE_BMP_INDEX_LENGTH; 1.456 + 1.457 + /* search for any index (stage 1) entries for supplementary code points */ 1.458 + for(c=0x10000; c<0x110000;) { 1.459 + if(idx[c>>UTRIE_SHIFT]!=0) { 1.460 + /* there is data, treat the full block for a lead surrogate */ 1.461 + c&=~0x3ff; 1.462 + 1.463 +#ifdef UTRIE_DEBUG 1.464 + ++countLeadCUWithData; 1.465 + /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */ 1.466 +#endif 1.467 + 1.468 + /* is there an identical index block? */ 1.469 + block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT); 1.470 + 1.471 + /* 1.472 + * get a folded value for [c..c+0x400[ and, 1.473 + * if different from the value for the lead surrogate code point, 1.474 + * set it for the lead surrogate code unit 1.475 + */ 1.476 + value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT); 1.477 + if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) { 1.478 + if(!utrie_set32(trie, U16_LEAD(c), value)) { 1.479 + /* data table overflow */ 1.480 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.481 + return; 1.482 + } 1.483 + 1.484 + /* if we did not find an identical index block... */ 1.485 + if(block==indexLength) { 1.486 + /* move the actual index (stage 1) entries from the supplementary position to the new one */ 1.487 + uprv_memmove(idx+indexLength, 1.488 + idx+(c>>UTRIE_SHIFT), 1.489 + 4*UTRIE_SURROGATE_BLOCK_COUNT); 1.490 + indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; 1.491 + } 1.492 + } 1.493 + c+=0x400; 1.494 + } else { 1.495 + c+=UTRIE_DATA_BLOCK_LENGTH; 1.496 + } 1.497 + } 1.498 +#ifdef UTRIE_DEBUG 1.499 + if(countLeadCUWithData>0) { 1.500 + printf("supplementary data for %d lead surrogates\n", countLeadCUWithData); 1.501 + } 1.502 +#endif 1.503 + 1.504 + /* 1.505 + * index array overflow? 1.506 + * This is to guarantee that a folding offset is of the form 1.507 + * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. 1.508 + * If the index is too large, then n>=1024 and more than 10 bits are necessary. 1.509 + * 1.510 + * In fact, it can only ever become n==1024 with completely unfoldable data and 1.511 + * the additional block of duplicated values for lead surrogates. 1.512 + */ 1.513 + if(indexLength>=UTRIE_MAX_INDEX_LENGTH) { 1.514 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.515 + return; 1.516 + } 1.517 + 1.518 + /* 1.519 + * make space for the lead surrogate index block and 1.520 + * insert it between the BMP indexes and the folded ones 1.521 + */ 1.522 + uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT, 1.523 + idx+UTRIE_BMP_INDEX_LENGTH, 1.524 + 4*(indexLength-UTRIE_BMP_INDEX_LENGTH)); 1.525 + uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH, 1.526 + leadIndexes, 1.527 + 4*UTRIE_SURROGATE_BLOCK_COUNT); 1.528 + indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; 1.529 + 1.530 +#ifdef UTRIE_DEBUG 1.531 + printf("trie index count: BMP %ld all Unicode %ld folded %ld\n", 1.532 + UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength); 1.533 +#endif 1.534 + 1.535 + trie->indexLength=indexLength; 1.536 +} 1.537 + 1.538 +/* 1.539 + * Set a value in the trie index map to indicate which data block 1.540 + * is referenced and which one is not. 1.541 + * utrie_compact() will remove data blocks that are not used at all. 1.542 + * Set 1.543 + * - 0 if it is used 1.544 + * - -1 if it is not used 1.545 + */ 1.546 +static void 1.547 +_findUnusedBlocks(UNewTrie *trie) { 1.548 + int32_t i; 1.549 + 1.550 + /* fill the entire map with "not used" */ 1.551 + uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4); 1.552 + 1.553 + /* mark each block that _is_ used with 0 */ 1.554 + for(i=0; i<trie->indexLength; ++i) { 1.555 + trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0; 1.556 + } 1.557 + 1.558 + /* never move the all-initial-value block 0 */ 1.559 + trie->map[0]=0; 1.560 +} 1.561 + 1.562 +static int32_t 1.563 +_findSameDataBlock(const uint32_t *data, int32_t dataLength, 1.564 + int32_t otherBlock, int32_t step) { 1.565 + int32_t block; 1.566 + 1.567 + /* ensure that we do not even partially get past dataLength */ 1.568 + dataLength-=UTRIE_DATA_BLOCK_LENGTH; 1.569 + 1.570 + for(block=0; block<=dataLength; block+=step) { 1.571 + if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) { 1.572 + return block; 1.573 + } 1.574 + } 1.575 + return -1; 1.576 +} 1.577 + 1.578 +/* 1.579 + * Compact a folded build-time trie. 1.580 + * 1.581 + * The compaction 1.582 + * - removes blocks that are identical with earlier ones 1.583 + * - overlaps adjacent blocks as much as possible (if overlap==TRUE) 1.584 + * - moves blocks in steps of the data granularity 1.585 + * - moves and overlaps blocks that overlap with multiple values in the overlap region 1.586 + * 1.587 + * It does not 1.588 + * - try to move and overlap blocks that are not already adjacent 1.589 + */ 1.590 +static void 1.591 +utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) { 1.592 + int32_t i, start, newStart, overlapStart; 1.593 + 1.594 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.595 + return; 1.596 + } 1.597 + 1.598 + /* valid, uncompacted trie? */ 1.599 + if(trie==NULL) { 1.600 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.601 + return; 1.602 + } 1.603 + if(trie->isCompacted) { 1.604 + return; /* nothing left to do */ 1.605 + } 1.606 + 1.607 + /* compaction */ 1.608 + 1.609 + /* initialize the index map with "block is used/unused" flags */ 1.610 + _findUnusedBlocks(trie); 1.611 + 1.612 + /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */ 1.613 + if(trie->isLatin1Linear && UTRIE_SHIFT<=8) { 1.614 + overlapStart=UTRIE_DATA_BLOCK_LENGTH+256; 1.615 + } else { 1.616 + overlapStart=UTRIE_DATA_BLOCK_LENGTH; 1.617 + } 1.618 + 1.619 + newStart=UTRIE_DATA_BLOCK_LENGTH; 1.620 + for(start=newStart; start<trie->dataLength;) { 1.621 + /* 1.622 + * start: index of first entry of current block 1.623 + * newStart: index where the current block is to be moved 1.624 + * (right after current end of already-compacted data) 1.625 + */ 1.626 + 1.627 + /* skip blocks that are not used */ 1.628 + if(trie->map[start>>UTRIE_SHIFT]<0) { 1.629 + /* advance start to the next block */ 1.630 + start+=UTRIE_DATA_BLOCK_LENGTH; 1.631 + 1.632 + /* leave newStart with the previous block! */ 1.633 + continue; 1.634 + } 1.635 + 1.636 + /* search for an identical block */ 1.637 + if( start>=overlapStart && 1.638 + (i=_findSameDataBlock(trie->data, newStart, start, 1.639 + overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH)) 1.640 + >=0 1.641 + ) { 1.642 + /* found an identical block, set the other block's index value for the current block */ 1.643 + trie->map[start>>UTRIE_SHIFT]=i; 1.644 + 1.645 + /* advance start to the next block */ 1.646 + start+=UTRIE_DATA_BLOCK_LENGTH; 1.647 + 1.648 + /* leave newStart with the previous block! */ 1.649 + continue; 1.650 + } 1.651 + 1.652 + /* see if the beginning of this block can be overlapped with the end of the previous block */ 1.653 + if(overlap && start>=overlapStart) { 1.654 + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 1.655 + for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY; 1.656 + i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i); 1.657 + i-=UTRIE_DATA_GRANULARITY) {} 1.658 + } else { 1.659 + i=0; 1.660 + } 1.661 + 1.662 + if(i>0) { 1.663 + /* some overlap */ 1.664 + trie->map[start>>UTRIE_SHIFT]=newStart-i; 1.665 + 1.666 + /* move the non-overlapping indexes to their new positions */ 1.667 + start+=i; 1.668 + for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) { 1.669 + trie->data[newStart++]=trie->data[start++]; 1.670 + } 1.671 + } else if(newStart<start) { 1.672 + /* no overlap, just move the indexes to their new positions */ 1.673 + trie->map[start>>UTRIE_SHIFT]=newStart; 1.674 + for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) { 1.675 + trie->data[newStart++]=trie->data[start++]; 1.676 + } 1.677 + } else /* no overlap && newStart==start */ { 1.678 + trie->map[start>>UTRIE_SHIFT]=start; 1.679 + newStart+=UTRIE_DATA_BLOCK_LENGTH; 1.680 + start=newStart; 1.681 + } 1.682 + } 1.683 + 1.684 + /* now adjust the index (stage 1) table */ 1.685 + for(i=0; i<trie->indexLength; ++i) { 1.686 + trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]; 1.687 + } 1.688 + 1.689 +#ifdef UTRIE_DEBUG 1.690 + /* we saved some space */ 1.691 + printf("compacting trie: count of 32-bit words %lu->%lu\n", 1.692 + (long)trie->dataLength, (long)newStart); 1.693 +#endif 1.694 + 1.695 + trie->dataLength=newStart; 1.696 +} 1.697 + 1.698 +/* serialization ------------------------------------------------------------ */ 1.699 + 1.700 +/* 1.701 + * Default function for the folding value: 1.702 + * Just store the offset (16 bits) if there is any non-initial-value entry. 1.703 + * 1.704 + * The offset parameter is never 0. 1.705 + * Returning the offset itself is safe for UTRIE_SHIFT>=5 because 1.706 + * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800 1.707 + * which fits into 16-bit trie values; 1.708 + * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases. 1.709 + * 1.710 + * Theoretically, it would be safer for all possible UTRIE_SHIFT including 1.711 + * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS 1.712 + * which would always result in a value of 0x40..0x43f 1.713 + * (start/end 1k blocks of supplementary Unicode code points). 1.714 + * However, this would be uglier, and would not work for some existing 1.715 + * binary data file formats. 1.716 + * 1.717 + * Also, we do not plan to change UTRIE_SHIFT because it would change binary 1.718 + * data file formats, and we would probably not make it smaller because of 1.719 + * the then even larger BMP index length even for empty tries. 1.720 + */ 1.721 +static uint32_t U_CALLCONV 1.722 +defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { 1.723 + uint32_t value, initialValue; 1.724 + UChar32 limit; 1.725 + UBool inBlockZero; 1.726 + 1.727 + initialValue=trie->data[0]; 1.728 + limit=start+0x400; 1.729 + while(start<limit) { 1.730 + value=utrie_get32(trie, start, &inBlockZero); 1.731 + if(inBlockZero) { 1.732 + start+=UTRIE_DATA_BLOCK_LENGTH; 1.733 + } else if(value!=initialValue) { 1.734 + return (uint32_t)offset; 1.735 + } else { 1.736 + ++start; 1.737 + } 1.738 + } 1.739 + return 0; 1.740 +} 1.741 + 1.742 +U_CAPI int32_t U_EXPORT2 1.743 +utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity, 1.744 + UNewTrieGetFoldedValue *getFoldedValue, 1.745 + UBool reduceTo16Bits, 1.746 + UErrorCode *pErrorCode) { 1.747 + UTrieHeader *header; 1.748 + uint32_t *p; 1.749 + uint16_t *dest16; 1.750 + int32_t i, length; 1.751 + uint8_t* data = NULL; 1.752 + 1.753 + /* argument check */ 1.754 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.755 + return 0; 1.756 + } 1.757 + 1.758 + if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) { 1.759 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.760 + return 0; 1.761 + } 1.762 + if(getFoldedValue==NULL) { 1.763 + getFoldedValue=defaultGetFoldedValue; 1.764 + } 1.765 + 1.766 + data = (uint8_t*)dt; 1.767 + /* fold and compact if necessary, also checks that indexLength is within limits */ 1.768 + if(!trie->isCompacted) { 1.769 + /* compact once without overlap to improve folding */ 1.770 + utrie_compact(trie, FALSE, pErrorCode); 1.771 + 1.772 + /* fold the supplementary part of the index array */ 1.773 + utrie_fold(trie, getFoldedValue, pErrorCode); 1.774 + 1.775 + /* compact again with overlap for minimum data array length */ 1.776 + utrie_compact(trie, TRUE, pErrorCode); 1.777 + 1.778 + trie->isCompacted=TRUE; 1.779 + if(U_FAILURE(*pErrorCode)) { 1.780 + return 0; 1.781 + } 1.782 + } 1.783 + 1.784 + /* is dataLength within limits? */ 1.785 + if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) { 1.786 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.787 + } 1.788 + 1.789 + length=sizeof(UTrieHeader)+2*trie->indexLength; 1.790 + if(reduceTo16Bits) { 1.791 + length+=2*trie->dataLength; 1.792 + } else { 1.793 + length+=4*trie->dataLength; 1.794 + } 1.795 + 1.796 + if(length>capacity) { 1.797 + return length; /* preflighting */ 1.798 + } 1.799 + 1.800 +#ifdef UTRIE_DEBUG 1.801 + printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n", 1.802 + (long)trie->indexLength, (long)trie->dataLength, (long)length); 1.803 +#endif 1.804 + 1.805 + /* set the header fields */ 1.806 + header=(UTrieHeader *)data; 1.807 + data+=sizeof(UTrieHeader); 1.808 + 1.809 + header->signature=0x54726965; /* "Trie" */ 1.810 + header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT); 1.811 + 1.812 + if(!reduceTo16Bits) { 1.813 + header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT; 1.814 + } 1.815 + if(trie->isLatin1Linear) { 1.816 + header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR; 1.817 + } 1.818 + 1.819 + header->indexLength=trie->indexLength; 1.820 + header->dataLength=trie->dataLength; 1.821 + 1.822 + /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ 1.823 + if(reduceTo16Bits) { 1.824 + /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ 1.825 + p=(uint32_t *)trie->index; 1.826 + dest16=(uint16_t *)data; 1.827 + for(i=trie->indexLength; i>0; --i) { 1.828 + *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT); 1.829 + } 1.830 + 1.831 + /* write 16-bit data values */ 1.832 + p=trie->data; 1.833 + for(i=trie->dataLength; i>0; --i) { 1.834 + *dest16++=(uint16_t)*p++; 1.835 + } 1.836 + } else { 1.837 + /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ 1.838 + p=(uint32_t *)trie->index; 1.839 + dest16=(uint16_t *)data; 1.840 + for(i=trie->indexLength; i>0; --i) { 1.841 + *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT); 1.842 + } 1.843 + 1.844 + /* write 32-bit data values */ 1.845 + uprv_memcpy(dest16, trie->data, 4*trie->dataLength); 1.846 + } 1.847 + 1.848 + return length; 1.849 +} 1.850 + 1.851 +/* inverse to defaultGetFoldedValue() */ 1.852 +U_CAPI int32_t U_EXPORT2 1.853 +utrie_defaultGetFoldingOffset(uint32_t data) { 1.854 + return (int32_t)data; 1.855 +} 1.856 + 1.857 +U_CAPI int32_t U_EXPORT2 1.858 +utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { 1.859 + const UTrieHeader *header; 1.860 + const uint16_t *p16; 1.861 + uint32_t options; 1.862 + 1.863 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.864 + return -1; 1.865 + } 1.866 + 1.867 + /* enough data for a trie header? */ 1.868 + if(length<(int32_t)sizeof(UTrieHeader)) { 1.869 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.870 + return -1; 1.871 + } 1.872 + 1.873 + /* check the signature */ 1.874 + header=(const UTrieHeader *)data; 1.875 + if(header->signature!=0x54726965) { 1.876 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.877 + return -1; 1.878 + } 1.879 + 1.880 + /* get the options and check the shift values */ 1.881 + options=header->options; 1.882 + if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || 1.883 + ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT 1.884 + ) { 1.885 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.886 + return -1; 1.887 + } 1.888 + trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0); 1.889 + 1.890 + /* get the length values */ 1.891 + trie->indexLength=header->indexLength; 1.892 + trie->dataLength=header->dataLength; 1.893 + 1.894 + length-=(int32_t)sizeof(UTrieHeader); 1.895 + 1.896 + /* enough data for the index? */ 1.897 + if(length<2*trie->indexLength) { 1.898 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.899 + return -1; 1.900 + } 1.901 + p16=(const uint16_t *)(header+1); 1.902 + trie->index=p16; 1.903 + p16+=trie->indexLength; 1.904 + length-=2*trie->indexLength; 1.905 + 1.906 + /* get the data */ 1.907 + if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) { 1.908 + if(length<4*trie->dataLength) { 1.909 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.910 + return -1; 1.911 + } 1.912 + trie->data32=(const uint32_t *)p16; 1.913 + trie->initialValue=trie->data32[0]; 1.914 + length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength; 1.915 + } else { 1.916 + if(length<2*trie->dataLength) { 1.917 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.918 + return -1; 1.919 + } 1.920 + 1.921 + /* the "data16" data is used via the index pointer */ 1.922 + trie->data32=NULL; 1.923 + trie->initialValue=trie->index[trie->indexLength]; 1.924 + length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength; 1.925 + } 1.926 + 1.927 + trie->getFoldingOffset=utrie_defaultGetFoldingOffset; 1.928 + 1.929 + return length; 1.930 +} 1.931 + 1.932 +U_CAPI int32_t U_EXPORT2 1.933 +utrie_unserializeDummy(UTrie *trie, 1.934 + void *data, int32_t length, 1.935 + uint32_t initialValue, uint32_t leadUnitValue, 1.936 + UBool make16BitTrie, 1.937 + UErrorCode *pErrorCode) { 1.938 + uint16_t *p16; 1.939 + int32_t actualLength, latin1Length, i, limit; 1.940 + uint16_t block; 1.941 + 1.942 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.943 + return -1; 1.944 + } 1.945 + 1.946 + /* calculate the actual size of the dummy trie data */ 1.947 + 1.948 + /* max(Latin-1, block 0) */ 1.949 + latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/ 1.950 + 1.951 + trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT; 1.952 + trie->dataLength=latin1Length; 1.953 + if(leadUnitValue!=initialValue) { 1.954 + trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH; 1.955 + } 1.956 + 1.957 + actualLength=trie->indexLength*2; 1.958 + if(make16BitTrie) { 1.959 + actualLength+=trie->dataLength*2; 1.960 + } else { 1.961 + actualLength+=trie->dataLength*4; 1.962 + } 1.963 + 1.964 + /* enough space for the dummy trie? */ 1.965 + if(length<actualLength) { 1.966 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.967 + return actualLength; 1.968 + } 1.969 + 1.970 + trie->isLatin1Linear=TRUE; 1.971 + trie->initialValue=initialValue; 1.972 + 1.973 + /* fill the index and data arrays */ 1.974 + p16=(uint16_t *)data; 1.975 + trie->index=p16; 1.976 + 1.977 + if(make16BitTrie) { 1.978 + /* indexes to block 0 */ 1.979 + block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT); 1.980 + limit=trie->indexLength; 1.981 + for(i=0; i<limit; ++i) { 1.982 + p16[i]=block; 1.983 + } 1.984 + 1.985 + if(leadUnitValue!=initialValue) { 1.986 + /* indexes for lead surrogate code units to the block after Latin-1 */ 1.987 + block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); 1.988 + i=0xd800>>UTRIE_SHIFT; 1.989 + limit=0xdc00>>UTRIE_SHIFT; 1.990 + for(; i<limit; ++i) { 1.991 + p16[i]=block; 1.992 + } 1.993 + } 1.994 + 1.995 + trie->data32=NULL; 1.996 + 1.997 + /* Latin-1 data */ 1.998 + p16+=trie->indexLength; 1.999 + for(i=0; i<latin1Length; ++i) { 1.1000 + p16[i]=(uint16_t)initialValue; 1.1001 + } 1.1002 + 1.1003 + /* data for lead surrogate code units */ 1.1004 + if(leadUnitValue!=initialValue) { 1.1005 + limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; 1.1006 + for(/* i=latin1Length */; i<limit; ++i) { 1.1007 + p16[i]=(uint16_t)leadUnitValue; 1.1008 + } 1.1009 + } 1.1010 + } else { 1.1011 + uint32_t *p32; 1.1012 + 1.1013 + /* indexes to block 0 */ 1.1014 + uprv_memset(p16, 0, trie->indexLength*2); 1.1015 + 1.1016 + if(leadUnitValue!=initialValue) { 1.1017 + /* indexes for lead surrogate code units to the block after Latin-1 */ 1.1018 + block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); 1.1019 + i=0xd800>>UTRIE_SHIFT; 1.1020 + limit=0xdc00>>UTRIE_SHIFT; 1.1021 + for(; i<limit; ++i) { 1.1022 + p16[i]=block; 1.1023 + } 1.1024 + } 1.1025 + 1.1026 + trie->data32=p32=(uint32_t *)(p16+trie->indexLength); 1.1027 + 1.1028 + /* Latin-1 data */ 1.1029 + for(i=0; i<latin1Length; ++i) { 1.1030 + p32[i]=initialValue; 1.1031 + } 1.1032 + 1.1033 + /* data for lead surrogate code units */ 1.1034 + if(leadUnitValue!=initialValue) { 1.1035 + limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; 1.1036 + for(/* i=latin1Length */; i<limit; ++i) { 1.1037 + p32[i]=leadUnitValue; 1.1038 + } 1.1039 + } 1.1040 + } 1.1041 + 1.1042 + trie->getFoldingOffset=utrie_defaultGetFoldingOffset; 1.1043 + 1.1044 + return actualLength; 1.1045 +} 1.1046 + 1.1047 +/* enumeration -------------------------------------------------------------- */ 1.1048 + 1.1049 +/* default UTrieEnumValue() returns the input value itself */ 1.1050 +static uint32_t U_CALLCONV 1.1051 +enumSameValue(const void * /*context*/, uint32_t value) { 1.1052 + return value; 1.1053 +} 1.1054 + 1.1055 +/** 1.1056 + * Enumerate all ranges of code points with the same relevant values. 1.1057 + * The values are transformed from the raw trie entries by the enumValue function. 1.1058 + */ 1.1059 +U_CAPI void U_EXPORT2 1.1060 +utrie_enum(const UTrie *trie, 1.1061 + UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) { 1.1062 + const uint32_t *data32; 1.1063 + const uint16_t *idx; 1.1064 + 1.1065 + uint32_t value, prevValue, initialValue; 1.1066 + UChar32 c, prev; 1.1067 + int32_t l, i, j, block, prevBlock, nullBlock, offset; 1.1068 + 1.1069 + /* check arguments */ 1.1070 + if(trie==NULL || trie->index==NULL || enumRange==NULL) { 1.1071 + return; 1.1072 + } 1.1073 + if(enumValue==NULL) { 1.1074 + enumValue=enumSameValue; 1.1075 + } 1.1076 + 1.1077 + idx=trie->index; 1.1078 + data32=trie->data32; 1.1079 + 1.1080 + /* get the enumeration value that corresponds to an initial-value trie data entry */ 1.1081 + initialValue=enumValue(context, trie->initialValue); 1.1082 + 1.1083 + if(data32==NULL) { 1.1084 + nullBlock=trie->indexLength; 1.1085 + } else { 1.1086 + nullBlock=0; 1.1087 + } 1.1088 + 1.1089 + /* set variables for previous range */ 1.1090 + prevBlock=nullBlock; 1.1091 + prev=0; 1.1092 + prevValue=initialValue; 1.1093 + 1.1094 + /* enumerate BMP - the main loop enumerates data blocks */ 1.1095 + for(i=0, c=0; c<=0xffff; ++i) { 1.1096 + if(c==0xd800) { 1.1097 + /* skip lead surrogate code _units_, go to lead surr. code _points_ */ 1.1098 + i=UTRIE_BMP_INDEX_LENGTH; 1.1099 + } else if(c==0xdc00) { 1.1100 + /* go back to regular BMP code points */ 1.1101 + i=c>>UTRIE_SHIFT; 1.1102 + } 1.1103 + 1.1104 + block=idx[i]<<UTRIE_INDEX_SHIFT; 1.1105 + if(block==prevBlock) { 1.1106 + /* the block is the same as the previous one, and filled with value */ 1.1107 + c+=UTRIE_DATA_BLOCK_LENGTH; 1.1108 + } else if(block==nullBlock) { 1.1109 + /* this is the all-initial-value block */ 1.1110 + if(prevValue!=initialValue) { 1.1111 + if(prev<c) { 1.1112 + if(!enumRange(context, prev, c, prevValue)) { 1.1113 + return; 1.1114 + } 1.1115 + } 1.1116 + prevBlock=nullBlock; 1.1117 + prev=c; 1.1118 + prevValue=initialValue; 1.1119 + } 1.1120 + c+=UTRIE_DATA_BLOCK_LENGTH; 1.1121 + } else { 1.1122 + prevBlock=block; 1.1123 + for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { 1.1124 + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 1.1125 + if(value!=prevValue) { 1.1126 + if(prev<c) { 1.1127 + if(!enumRange(context, prev, c, prevValue)) { 1.1128 + return; 1.1129 + } 1.1130 + } 1.1131 + if(j>0) { 1.1132 + /* the block is not filled with all the same value */ 1.1133 + prevBlock=-1; 1.1134 + } 1.1135 + prev=c; 1.1136 + prevValue=value; 1.1137 + } 1.1138 + ++c; 1.1139 + } 1.1140 + } 1.1141 + } 1.1142 + 1.1143 + /* enumerate supplementary code points */ 1.1144 + for(l=0xd800; l<0xdc00;) { 1.1145 + /* lead surrogate access */ 1.1146 + offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT; 1.1147 + if(offset==nullBlock) { 1.1148 + /* no entries for a whole block of lead surrogates */ 1.1149 + if(prevValue!=initialValue) { 1.1150 + if(prev<c) { 1.1151 + if(!enumRange(context, prev, c, prevValue)) { 1.1152 + return; 1.1153 + } 1.1154 + } 1.1155 + prevBlock=nullBlock; 1.1156 + prev=c; 1.1157 + prevValue=initialValue; 1.1158 + } 1.1159 + 1.1160 + l+=UTRIE_DATA_BLOCK_LENGTH; 1.1161 + c+=UTRIE_DATA_BLOCK_LENGTH<<10; 1.1162 + continue; 1.1163 + } 1.1164 + 1.1165 + value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)]; 1.1166 + 1.1167 + /* enumerate trail surrogates for this lead surrogate */ 1.1168 + offset=trie->getFoldingOffset(value); 1.1169 + if(offset<=0) { 1.1170 + /* no data for this lead surrogate */ 1.1171 + if(prevValue!=initialValue) { 1.1172 + if(prev<c) { 1.1173 + if(!enumRange(context, prev, c, prevValue)) { 1.1174 + return; 1.1175 + } 1.1176 + } 1.1177 + prevBlock=nullBlock; 1.1178 + prev=c; 1.1179 + prevValue=initialValue; 1.1180 + } 1.1181 + 1.1182 + /* nothing else to do for the supplementary code points for this lead surrogate */ 1.1183 + c+=0x400; 1.1184 + } else { 1.1185 + /* enumerate code points for this lead surrogate */ 1.1186 + i=offset; 1.1187 + offset+=UTRIE_SURROGATE_BLOCK_COUNT; 1.1188 + do { 1.1189 + /* copy of most of the body of the BMP loop */ 1.1190 + block=idx[i]<<UTRIE_INDEX_SHIFT; 1.1191 + if(block==prevBlock) { 1.1192 + /* the block is the same as the previous one, and filled with value */ 1.1193 + c+=UTRIE_DATA_BLOCK_LENGTH; 1.1194 + } else if(block==nullBlock) { 1.1195 + /* this is the all-initial-value block */ 1.1196 + if(prevValue!=initialValue) { 1.1197 + if(prev<c) { 1.1198 + if(!enumRange(context, prev, c, prevValue)) { 1.1199 + return; 1.1200 + } 1.1201 + } 1.1202 + prevBlock=nullBlock; 1.1203 + prev=c; 1.1204 + prevValue=initialValue; 1.1205 + } 1.1206 + c+=UTRIE_DATA_BLOCK_LENGTH; 1.1207 + } else { 1.1208 + prevBlock=block; 1.1209 + for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { 1.1210 + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 1.1211 + if(value!=prevValue) { 1.1212 + if(prev<c) { 1.1213 + if(!enumRange(context, prev, c, prevValue)) { 1.1214 + return; 1.1215 + } 1.1216 + } 1.1217 + if(j>0) { 1.1218 + /* the block is not filled with all the same value */ 1.1219 + prevBlock=-1; 1.1220 + } 1.1221 + prev=c; 1.1222 + prevValue=value; 1.1223 + } 1.1224 + ++c; 1.1225 + } 1.1226 + } 1.1227 + } while(++i<offset); 1.1228 + } 1.1229 + 1.1230 + ++l; 1.1231 + } 1.1232 + 1.1233 + /* deliver last range */ 1.1234 + enumRange(context, prev, c, prevValue); 1.1235 +}