intl/icu/source/common/utrie2.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2001-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: utrie2.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2008aug16 (starting from a copy of utrie.c)
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * This is a common implementation of a Unicode trie.
michael@0 17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
michael@0 18 * Unicode code points (0..0x10ffff).
michael@0 19 * This is the second common version of a Unicode trie (hence the name UTrie2).
michael@0 20 * See utrie2.h for a comparison.
michael@0 21 *
michael@0 22 * This file contains only the runtime and enumeration code, for read-only access.
michael@0 23 * See utrie2_builder.c for the builder code.
michael@0 24 */
michael@0 25 #ifdef UTRIE2_DEBUG
michael@0 26 # include <stdio.h>
michael@0 27 #endif
michael@0 28
michael@0 29 #include "unicode/utypes.h"
michael@0 30 #include "unicode/utf.h"
michael@0 31 #include "unicode/utf8.h"
michael@0 32 #include "unicode/utf16.h"
michael@0 33 #include "cmemory.h"
michael@0 34 #include "utrie2.h"
michael@0 35 #include "utrie2_impl.h"
michael@0 36 #include "uassert.h"
michael@0 37
michael@0 38 /* Public UTrie2 API implementation ----------------------------------------- */
michael@0 39
michael@0 40 static uint32_t
michael@0 41 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
michael@0 42 int32_t i2, block;
michael@0 43
michael@0 44 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
michael@0 45 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
michael@0 46 }
michael@0 47
michael@0 48 if(U_IS_LEAD(c) && fromLSCP) {
michael@0 49 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
michael@0 50 (c>>UTRIE2_SHIFT_2);
michael@0 51 } else {
michael@0 52 i2=trie->index1[c>>UTRIE2_SHIFT_1]+
michael@0 53 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
michael@0 54 }
michael@0 55 block=trie->index2[i2];
michael@0 56 return trie->data[block+(c&UTRIE2_DATA_MASK)];
michael@0 57 }
michael@0 58
michael@0 59 U_CAPI uint32_t U_EXPORT2
michael@0 60 utrie2_get32(const UTrie2 *trie, UChar32 c) {
michael@0 61 if(trie->data16!=NULL) {
michael@0 62 return UTRIE2_GET16(trie, c);
michael@0 63 } else if(trie->data32!=NULL) {
michael@0 64 return UTRIE2_GET32(trie, c);
michael@0 65 } else if((uint32_t)c>0x10ffff) {
michael@0 66 return trie->errorValue;
michael@0 67 } else {
michael@0 68 return get32(trie->newTrie, c, TRUE);
michael@0 69 }
michael@0 70 }
michael@0 71
michael@0 72 U_CAPI uint32_t U_EXPORT2
michael@0 73 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
michael@0 74 if(!U_IS_LEAD(c)) {
michael@0 75 return trie->errorValue;
michael@0 76 }
michael@0 77 if(trie->data16!=NULL) {
michael@0 78 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
michael@0 79 } else if(trie->data32!=NULL) {
michael@0 80 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
michael@0 81 } else {
michael@0 82 return get32(trie->newTrie, c, FALSE);
michael@0 83 }
michael@0 84 }
michael@0 85
michael@0 86 static inline int32_t
michael@0 87 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
michael@0 88 int32_t idx=
michael@0 89 _UTRIE2_INDEX_FROM_CP(
michael@0 90 trie,
michael@0 91 trie->data32==NULL ? trie->indexLength : 0,
michael@0 92 c);
michael@0 93 return (idx<<3)|i;
michael@0 94 }
michael@0 95
michael@0 96 U_CAPI int32_t U_EXPORT2
michael@0 97 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
michael@0 98 const uint8_t *src, const uint8_t *limit) {
michael@0 99 int32_t i, length;
michael@0 100 i=0;
michael@0 101 /* support 64-bit pointers by avoiding cast of arbitrary difference */
michael@0 102 if((limit-src)<=7) {
michael@0 103 length=(int32_t)(limit-src);
michael@0 104 } else {
michael@0 105 length=7;
michael@0 106 }
michael@0 107 c=utf8_nextCharSafeBody(src, &i, length, c, -1);
michael@0 108 return u8Index(trie, c, i);
michael@0 109 }
michael@0 110
michael@0 111 U_CAPI int32_t U_EXPORT2
michael@0 112 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
michael@0 113 const uint8_t *start, const uint8_t *src) {
michael@0 114 int32_t i, length;
michael@0 115 /* support 64-bit pointers by avoiding cast of arbitrary difference */
michael@0 116 if((src-start)<=7) {
michael@0 117 i=length=(int32_t)(src-start);
michael@0 118 } else {
michael@0 119 i=length=7;
michael@0 120 start=src-7;
michael@0 121 }
michael@0 122 c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
michael@0 123 i=length-i; /* number of bytes read backward from src */
michael@0 124 return u8Index(trie, c, i);
michael@0 125 }
michael@0 126
michael@0 127 U_CAPI UTrie2 * U_EXPORT2
michael@0 128 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
michael@0 129 const void *data, int32_t length, int32_t *pActualLength,
michael@0 130 UErrorCode *pErrorCode) {
michael@0 131 const UTrie2Header *header;
michael@0 132 const uint16_t *p16;
michael@0 133 int32_t actualLength;
michael@0 134
michael@0 135 UTrie2 tempTrie;
michael@0 136 UTrie2 *trie;
michael@0 137
michael@0 138 if(U_FAILURE(*pErrorCode)) {
michael@0 139 return 0;
michael@0 140 }
michael@0 141
michael@0 142 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
michael@0 143 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
michael@0 144 ) {
michael@0 145 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 146 return 0;
michael@0 147 }
michael@0 148
michael@0 149 /* enough data for a trie header? */
michael@0 150 if(length<(int32_t)sizeof(UTrie2Header)) {
michael@0 151 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 152 return 0;
michael@0 153 }
michael@0 154
michael@0 155 /* check the signature */
michael@0 156 header=(const UTrie2Header *)data;
michael@0 157 if(header->signature!=UTRIE2_SIG) {
michael@0 158 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 159 return 0;
michael@0 160 }
michael@0 161
michael@0 162 /* get the options */
michael@0 163 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
michael@0 164 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 165 return 0;
michael@0 166 }
michael@0 167
michael@0 168 /* get the length values and offsets */
michael@0 169 uprv_memset(&tempTrie, 0, sizeof(tempTrie));
michael@0 170 tempTrie.indexLength=header->indexLength;
michael@0 171 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
michael@0 172 tempTrie.index2NullOffset=header->index2NullOffset;
michael@0 173 tempTrie.dataNullOffset=header->dataNullOffset;
michael@0 174
michael@0 175 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
michael@0 176 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
michael@0 177 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 178 tempTrie.highValueIndex+=tempTrie.indexLength;
michael@0 179 }
michael@0 180
michael@0 181 /* calculate the actual length */
michael@0 182 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
michael@0 183 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 184 actualLength+=tempTrie.dataLength*2;
michael@0 185 } else {
michael@0 186 actualLength+=tempTrie.dataLength*4;
michael@0 187 }
michael@0 188 if(length<actualLength) {
michael@0 189 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */
michael@0 190 return 0;
michael@0 191 }
michael@0 192
michael@0 193 /* allocate the trie */
michael@0 194 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
michael@0 195 if(trie==NULL) {
michael@0 196 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 197 return 0;
michael@0 198 }
michael@0 199 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
michael@0 200 trie->memory=(uint32_t *)data;
michael@0 201 trie->length=actualLength;
michael@0 202 trie->isMemoryOwned=FALSE;
michael@0 203
michael@0 204 /* set the pointers to its index and data arrays */
michael@0 205 p16=(const uint16_t *)(header+1);
michael@0 206 trie->index=p16;
michael@0 207 p16+=trie->indexLength;
michael@0 208
michael@0 209 /* get the data */
michael@0 210 switch(valueBits) {
michael@0 211 case UTRIE2_16_VALUE_BITS:
michael@0 212 trie->data16=p16;
michael@0 213 trie->data32=NULL;
michael@0 214 trie->initialValue=trie->index[trie->dataNullOffset];
michael@0 215 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
michael@0 216 break;
michael@0 217 case UTRIE2_32_VALUE_BITS:
michael@0 218 trie->data16=NULL;
michael@0 219 trie->data32=(const uint32_t *)p16;
michael@0 220 trie->initialValue=trie->data32[trie->dataNullOffset];
michael@0 221 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
michael@0 222 break;
michael@0 223 default:
michael@0 224 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 225 return 0;
michael@0 226 }
michael@0 227
michael@0 228 if(pActualLength!=NULL) {
michael@0 229 *pActualLength=actualLength;
michael@0 230 }
michael@0 231 return trie;
michael@0 232 }
michael@0 233
michael@0 234 U_CAPI UTrie2 * U_EXPORT2
michael@0 235 utrie2_openDummy(UTrie2ValueBits valueBits,
michael@0 236 uint32_t initialValue, uint32_t errorValue,
michael@0 237 UErrorCode *pErrorCode) {
michael@0 238 UTrie2 *trie;
michael@0 239 UTrie2Header *header;
michael@0 240 uint32_t *p;
michael@0 241 uint16_t *dest16;
michael@0 242 int32_t indexLength, dataLength, length, i;
michael@0 243 int32_t dataMove; /* >0 if the data is moved to the end of the index array */
michael@0 244
michael@0 245 if(U_FAILURE(*pErrorCode)) {
michael@0 246 return 0;
michael@0 247 }
michael@0 248
michael@0 249 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
michael@0 250 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 251 return 0;
michael@0 252 }
michael@0 253
michael@0 254 /* calculate the total length of the dummy trie data */
michael@0 255 indexLength=UTRIE2_INDEX_1_OFFSET;
michael@0 256 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
michael@0 257 length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
michael@0 258 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 259 length+=dataLength*2;
michael@0 260 } else {
michael@0 261 length+=dataLength*4;
michael@0 262 }
michael@0 263
michael@0 264 /* allocate the trie */
michael@0 265 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
michael@0 266 if(trie==NULL) {
michael@0 267 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 268 return 0;
michael@0 269 }
michael@0 270 uprv_memset(trie, 0, sizeof(UTrie2));
michael@0 271 trie->memory=uprv_malloc(length);
michael@0 272 if(trie->memory==NULL) {
michael@0 273 uprv_free(trie);
michael@0 274 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 275 return 0;
michael@0 276 }
michael@0 277 trie->length=length;
michael@0 278 trie->isMemoryOwned=TRUE;
michael@0 279
michael@0 280 /* set the UTrie2 fields */
michael@0 281 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 282 dataMove=indexLength;
michael@0 283 } else {
michael@0 284 dataMove=0;
michael@0 285 }
michael@0 286
michael@0 287 trie->indexLength=indexLength;
michael@0 288 trie->dataLength=dataLength;
michael@0 289 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
michael@0 290 trie->dataNullOffset=(uint16_t)dataMove;
michael@0 291 trie->initialValue=initialValue;
michael@0 292 trie->errorValue=errorValue;
michael@0 293 trie->highStart=0;
michael@0 294 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
michael@0 295
michael@0 296 /* set the header fields */
michael@0 297 header=(UTrie2Header *)trie->memory;
michael@0 298
michael@0 299 header->signature=UTRIE2_SIG; /* "Tri2" */
michael@0 300 header->options=(uint16_t)valueBits;
michael@0 301
michael@0 302 header->indexLength=(uint16_t)indexLength;
michael@0 303 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
michael@0 304 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
michael@0 305 header->dataNullOffset=(uint16_t)dataMove;
michael@0 306 header->shiftedHighStart=0;
michael@0 307
michael@0 308 /* fill the index and data arrays */
michael@0 309 dest16=(uint16_t *)(header+1);
michael@0 310 trie->index=dest16;
michael@0 311
michael@0 312 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
michael@0 313 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
michael@0 314 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */
michael@0 315 }
michael@0 316
michael@0 317 /* write UTF-8 2-byte index-2 values, not right-shifted */
michael@0 318 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
michael@0 319 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
michael@0 320 }
michael@0 321 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
michael@0 322 *dest16++=(uint16_t)dataMove;
michael@0 323 }
michael@0 324
michael@0 325 /* write the 16/32-bit data array */
michael@0 326 switch(valueBits) {
michael@0 327 case UTRIE2_16_VALUE_BITS:
michael@0 328 /* write 16-bit data values */
michael@0 329 trie->data16=dest16;
michael@0 330 trie->data32=NULL;
michael@0 331 for(i=0; i<0x80; ++i) {
michael@0 332 *dest16++=(uint16_t)initialValue;
michael@0 333 }
michael@0 334 for(; i<0xc0; ++i) {
michael@0 335 *dest16++=(uint16_t)errorValue;
michael@0 336 }
michael@0 337 /* highValue and reserved values */
michael@0 338 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
michael@0 339 *dest16++=(uint16_t)initialValue;
michael@0 340 }
michael@0 341 break;
michael@0 342 case UTRIE2_32_VALUE_BITS:
michael@0 343 /* write 32-bit data values */
michael@0 344 p=(uint32_t *)dest16;
michael@0 345 trie->data16=NULL;
michael@0 346 trie->data32=p;
michael@0 347 for(i=0; i<0x80; ++i) {
michael@0 348 *p++=initialValue;
michael@0 349 }
michael@0 350 for(; i<0xc0; ++i) {
michael@0 351 *p++=errorValue;
michael@0 352 }
michael@0 353 /* highValue and reserved values */
michael@0 354 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
michael@0 355 *p++=initialValue;
michael@0 356 }
michael@0 357 break;
michael@0 358 default:
michael@0 359 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 360 return 0;
michael@0 361 }
michael@0 362
michael@0 363 return trie;
michael@0 364 }
michael@0 365
michael@0 366 U_CAPI void U_EXPORT2
michael@0 367 utrie2_close(UTrie2 *trie) {
michael@0 368 if(trie!=NULL) {
michael@0 369 if(trie->isMemoryOwned) {
michael@0 370 uprv_free(trie->memory);
michael@0 371 }
michael@0 372 if(trie->newTrie!=NULL) {
michael@0 373 uprv_free(trie->newTrie->data);
michael@0 374 uprv_free(trie->newTrie);
michael@0 375 }
michael@0 376 uprv_free(trie);
michael@0 377 }
michael@0 378 }
michael@0 379
michael@0 380 U_CAPI int32_t U_EXPORT2
michael@0 381 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
michael@0 382 uint32_t signature;
michael@0 383 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
michael@0 384 return 0;
michael@0 385 }
michael@0 386 signature=*(const uint32_t *)data;
michael@0 387 if(signature==UTRIE2_SIG) {
michael@0 388 return 2;
michael@0 389 }
michael@0 390 if(anyEndianOk && signature==UTRIE2_OE_SIG) {
michael@0 391 return 2;
michael@0 392 }
michael@0 393 if(signature==UTRIE_SIG) {
michael@0 394 return 1;
michael@0 395 }
michael@0 396 if(anyEndianOk && signature==UTRIE_OE_SIG) {
michael@0 397 return 1;
michael@0 398 }
michael@0 399 return 0;
michael@0 400 }
michael@0 401
michael@0 402 U_CAPI int32_t U_EXPORT2
michael@0 403 utrie2_swap(const UDataSwapper *ds,
michael@0 404 const void *inData, int32_t length, void *outData,
michael@0 405 UErrorCode *pErrorCode) {
michael@0 406 const UTrie2Header *inTrie;
michael@0 407 UTrie2Header trie;
michael@0 408 int32_t dataLength, size;
michael@0 409 UTrie2ValueBits valueBits;
michael@0 410
michael@0 411 if(U_FAILURE(*pErrorCode)) {
michael@0 412 return 0;
michael@0 413 }
michael@0 414 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
michael@0 415 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 416 return 0;
michael@0 417 }
michael@0 418
michael@0 419 /* setup and swapping */
michael@0 420 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
michael@0 421 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 422 return 0;
michael@0 423 }
michael@0 424
michael@0 425 inTrie=(const UTrie2Header *)inData;
michael@0 426 trie.signature=ds->readUInt32(inTrie->signature);
michael@0 427 trie.options=ds->readUInt16(inTrie->options);
michael@0 428 trie.indexLength=ds->readUInt16(inTrie->indexLength);
michael@0 429 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
michael@0 430
michael@0 431 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
michael@0 432 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
michael@0 433
michael@0 434 if( trie.signature!=UTRIE2_SIG ||
michael@0 435 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
michael@0 436 trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
michael@0 437 dataLength<UTRIE2_DATA_START_OFFSET
michael@0 438 ) {
michael@0 439 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
michael@0 440 return 0;
michael@0 441 }
michael@0 442
michael@0 443 size=sizeof(UTrie2Header)+trie.indexLength*2;
michael@0 444 switch(valueBits) {
michael@0 445 case UTRIE2_16_VALUE_BITS:
michael@0 446 size+=dataLength*2;
michael@0 447 break;
michael@0 448 case UTRIE2_32_VALUE_BITS:
michael@0 449 size+=dataLength*4;
michael@0 450 break;
michael@0 451 default:
michael@0 452 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 453 return 0;
michael@0 454 }
michael@0 455
michael@0 456 if(length>=0) {
michael@0 457 UTrie2Header *outTrie;
michael@0 458
michael@0 459 if(length<size) {
michael@0 460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 461 return 0;
michael@0 462 }
michael@0 463
michael@0 464 outTrie=(UTrie2Header *)outData;
michael@0 465
michael@0 466 /* swap the header */
michael@0 467 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
michael@0 468 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
michael@0 469
michael@0 470 /* swap the index and the data */
michael@0 471 switch(valueBits) {
michael@0 472 case UTRIE2_16_VALUE_BITS:
michael@0 473 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
michael@0 474 break;
michael@0 475 case UTRIE2_32_VALUE_BITS:
michael@0 476 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
michael@0 477 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
michael@0 478 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
michael@0 479 break;
michael@0 480 default:
michael@0 481 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 482 return 0;
michael@0 483 }
michael@0 484 }
michael@0 485
michael@0 486 return size;
michael@0 487 }
michael@0 488
michael@0 489 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
michael@0 490 // to avoid a dependency from utrie2.cpp on utrie.c.
michael@0 491
michael@0 492 /* enumeration -------------------------------------------------------------- */
michael@0 493
michael@0 494 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
michael@0 495
michael@0 496 /* default UTrie2EnumValue() returns the input value itself */
michael@0 497 static uint32_t U_CALLCONV
michael@0 498 enumSameValue(const void * /*context*/, uint32_t value) {
michael@0 499 return value;
michael@0 500 }
michael@0 501
michael@0 502 /**
michael@0 503 * Enumerate all ranges of code points with the same relevant values.
michael@0 504 * The values are transformed from the raw trie entries by the enumValue function.
michael@0 505 *
michael@0 506 * Currently requires start<limit and both start and limit must be multiples
michael@0 507 * of UTRIE2_DATA_BLOCK_LENGTH.
michael@0 508 *
michael@0 509 * Optimizations:
michael@0 510 * - Skip a whole block if we know that it is filled with a single value,
michael@0 511 * and it is the same as we visited just before.
michael@0 512 * - Handle the null block specially because we know a priori that it is filled
michael@0 513 * with a single value.
michael@0 514 */
michael@0 515 static void
michael@0 516 enumEitherTrie(const UTrie2 *trie,
michael@0 517 UChar32 start, UChar32 limit,
michael@0 518 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
michael@0 519 const uint32_t *data32;
michael@0 520 const uint16_t *idx;
michael@0 521
michael@0 522 uint32_t value, prevValue, initialValue;
michael@0 523 UChar32 c, prev, highStart;
michael@0 524 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
michael@0 525
michael@0 526 if(enumRange==NULL) {
michael@0 527 return;
michael@0 528 }
michael@0 529 if(enumValue==NULL) {
michael@0 530 enumValue=enumSameValue;
michael@0 531 }
michael@0 532
michael@0 533 if(trie->newTrie==NULL) {
michael@0 534 /* frozen trie */
michael@0 535 idx=trie->index;
michael@0 536 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
michael@0 537 data32=trie->data32;
michael@0 538
michael@0 539 index2NullOffset=trie->index2NullOffset;
michael@0 540 nullBlock=trie->dataNullOffset;
michael@0 541 } else {
michael@0 542 /* unfrozen, mutable trie */
michael@0 543 idx=NULL;
michael@0 544 data32=trie->newTrie->data;
michael@0 545 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
michael@0 546
michael@0 547 index2NullOffset=trie->newTrie->index2NullOffset;
michael@0 548 nullBlock=trie->newTrie->dataNullOffset;
michael@0 549 }
michael@0 550
michael@0 551 highStart=trie->highStart;
michael@0 552
michael@0 553 /* get the enumeration value that corresponds to an initial-value trie data entry */
michael@0 554 initialValue=enumValue(context, trie->initialValue);
michael@0 555
michael@0 556 /* set variables for previous range */
michael@0 557 prevI2Block=-1;
michael@0 558 prevBlock=-1;
michael@0 559 prev=start;
michael@0 560 prevValue=0;
michael@0 561
michael@0 562 /* enumerate index-2 blocks */
michael@0 563 for(c=start; c<limit && c<highStart;) {
michael@0 564 /* Code point limit for iterating inside this i2Block. */
michael@0 565 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
michael@0 566 if(limit<tempLimit) {
michael@0 567 tempLimit=limit;
michael@0 568 }
michael@0 569 if(c<=0xffff) {
michael@0 570 if(!U_IS_SURROGATE(c)) {
michael@0 571 i2Block=c>>UTRIE2_SHIFT_2;
michael@0 572 } else if(U_IS_SURROGATE_LEAD(c)) {
michael@0 573 /*
michael@0 574 * Enumerate values for lead surrogate code points, not code units:
michael@0 575 * This special block has half the normal length.
michael@0 576 */
michael@0 577 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
michael@0 578 tempLimit=MIN_VALUE(0xdc00, limit);
michael@0 579 } else {
michael@0 580 /*
michael@0 581 * Switch back to the normal part of the index-2 table.
michael@0 582 * Enumerate the second half of the surrogates block.
michael@0 583 */
michael@0 584 i2Block=0xd800>>UTRIE2_SHIFT_2;
michael@0 585 tempLimit=MIN_VALUE(0xe000, limit);
michael@0 586 }
michael@0 587 } else {
michael@0 588 /* supplementary code points */
michael@0 589 if(idx!=NULL) {
michael@0 590 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
michael@0 591 (c>>UTRIE2_SHIFT_1)];
michael@0 592 } else {
michael@0 593 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
michael@0 594 }
michael@0 595 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
michael@0 596 /*
michael@0 597 * The index-2 block is the same as the previous one, and filled with prevValue.
michael@0 598 * Only possible for supplementary code points because the linear-BMP index-2
michael@0 599 * table creates unique i2Block values.
michael@0 600 */
michael@0 601 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
michael@0 602 continue;
michael@0 603 }
michael@0 604 }
michael@0 605 prevI2Block=i2Block;
michael@0 606 if(i2Block==index2NullOffset) {
michael@0 607 /* this is the null index-2 block */
michael@0 608 if(prevValue!=initialValue) {
michael@0 609 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
michael@0 610 return;
michael@0 611 }
michael@0 612 prevBlock=nullBlock;
michael@0 613 prev=c;
michael@0 614 prevValue=initialValue;
michael@0 615 }
michael@0 616 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
michael@0 617 } else {
michael@0 618 /* enumerate data blocks for one index-2 block */
michael@0 619 int32_t i2, i2Limit;
michael@0 620 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
michael@0 621 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
michael@0 622 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
michael@0 623 } else {
michael@0 624 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
michael@0 625 }
michael@0 626 for(; i2<i2Limit; ++i2) {
michael@0 627 if(idx!=NULL) {
michael@0 628 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
michael@0 629 } else {
michael@0 630 block=trie->newTrie->index2[i2Block+i2];
michael@0 631 }
michael@0 632 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
michael@0 633 /* the block is the same as the previous one, and filled with prevValue */
michael@0 634 c+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 635 continue;
michael@0 636 }
michael@0 637 prevBlock=block;
michael@0 638 if(block==nullBlock) {
michael@0 639 /* this is the null data block */
michael@0 640 if(prevValue!=initialValue) {
michael@0 641 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
michael@0 642 return;
michael@0 643 }
michael@0 644 prev=c;
michael@0 645 prevValue=initialValue;
michael@0 646 }
michael@0 647 c+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 648 } else {
michael@0 649 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
michael@0 650 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
michael@0 651 if(value!=prevValue) {
michael@0 652 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
michael@0 653 return;
michael@0 654 }
michael@0 655 prev=c;
michael@0 656 prevValue=value;
michael@0 657 }
michael@0 658 ++c;
michael@0 659 }
michael@0 660 }
michael@0 661 }
michael@0 662 }
michael@0 663 }
michael@0 664
michael@0 665 if(c>limit) {
michael@0 666 c=limit; /* could be higher if in the index2NullOffset */
michael@0 667 } else if(c<limit) {
michael@0 668 /* c==highStart<limit */
michael@0 669 uint32_t highValue;
michael@0 670 if(idx!=NULL) {
michael@0 671 highValue=
michael@0 672 data32!=NULL ?
michael@0 673 data32[trie->highValueIndex] :
michael@0 674 idx[trie->highValueIndex];
michael@0 675 } else {
michael@0 676 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
michael@0 677 }
michael@0 678 value=enumValue(context, highValue);
michael@0 679 if(value!=prevValue) {
michael@0 680 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
michael@0 681 return;
michael@0 682 }
michael@0 683 prev=c;
michael@0 684 prevValue=value;
michael@0 685 }
michael@0 686 c=limit;
michael@0 687 }
michael@0 688
michael@0 689 /* deliver last range */
michael@0 690 enumRange(context, prev, c-1, prevValue);
michael@0 691 }
michael@0 692
michael@0 693 U_CAPI void U_EXPORT2
michael@0 694 utrie2_enum(const UTrie2 *trie,
michael@0 695 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
michael@0 696 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
michael@0 697 }
michael@0 698
michael@0 699 U_CAPI void U_EXPORT2
michael@0 700 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
michael@0 701 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
michael@0 702 const void *context) {
michael@0 703 if(!U16_IS_LEAD(lead)) {
michael@0 704 return;
michael@0 705 }
michael@0 706 lead=(lead-0xd7c0)<<10; /* start code point */
michael@0 707 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
michael@0 708 }
michael@0 709
michael@0 710 /* C++ convenience wrappers ------------------------------------------------- */
michael@0 711
michael@0 712 U_NAMESPACE_BEGIN
michael@0 713
michael@0 714 uint16_t BackwardUTrie2StringIterator::previous16() {
michael@0 715 codePointLimit=codePointStart;
michael@0 716 if(start>=codePointStart) {
michael@0 717 codePoint=U_SENTINEL;
michael@0 718 return 0;
michael@0 719 }
michael@0 720 uint16_t result;
michael@0 721 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
michael@0 722 return result;
michael@0 723 }
michael@0 724
michael@0 725 uint16_t ForwardUTrie2StringIterator::next16() {
michael@0 726 codePointStart=codePointLimit;
michael@0 727 if(codePointLimit==limit) {
michael@0 728 codePoint=U_SENTINEL;
michael@0 729 return 0;
michael@0 730 }
michael@0 731 uint16_t result;
michael@0 732 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
michael@0 733 return result;
michael@0 734 }
michael@0 735
michael@0 736 U_NAMESPACE_END

mercurial