intl/icu/source/common/utrie2_builder.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2001-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: utrie2_builder.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2008sep26 (split off from utrie2.c)
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * This is a common implementation of a Unicode trie.
michael@0 17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
michael@0 18 * Unicode code points (0..0x10ffff).
michael@0 19 * This is the second common version of a Unicode trie (hence the name UTrie2).
michael@0 20 * See utrie2.h for a comparison.
michael@0 21 *
michael@0 22 * This file contains only the builder code.
michael@0 23 * See utrie2.c for the runtime and enumeration code.
michael@0 24 */
michael@0 25 #ifdef UTRIE2_DEBUG
michael@0 26 # include <stdio.h>
michael@0 27 #endif
michael@0 28
michael@0 29 #include "unicode/utypes.h"
michael@0 30 #include "cmemory.h"
michael@0 31 #include "utrie2.h"
michael@0 32 #include "utrie2_impl.h"
michael@0 33
michael@0 34 #include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
michael@0 35
michael@0 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 37
michael@0 38 /* Implementation notes ----------------------------------------------------- */
michael@0 39
michael@0 40 /*
michael@0 41 * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
michael@0 42 * have been chosen to minimize trie sizes overall.
michael@0 43 * Most of the code is flexible enough to work with a range of values,
michael@0 44 * within certain limits.
michael@0 45 *
michael@0 46 * Exception: Support for separate values for lead surrogate code _units_
michael@0 47 * vs. code _points_ was added after the constants were fixed,
michael@0 48 * and has not been tested nor particularly designed for different constant values.
michael@0 49 * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
michael@0 50 * part and back.)
michael@0 51 *
michael@0 52 * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
michael@0 53 * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
michael@0 54 * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
michael@0 55 * remapping) stops working.
michael@0 56 *
michael@0 57 * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
michael@0 58 * assumes that a single index-2 block is used for 0x400 code points
michael@0 59 * corresponding to one lead surrogate.
michael@0 60 *
michael@0 61 * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
michael@0 62 * more than one Unicode plane, and the split of the index-2 table into a BMP
michael@0 63 * part and a supplementary part, with a gap in between, would not work.
michael@0 64 *
michael@0 65 * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
michael@0 66 * there is data with more than 64k distinct values,
michael@0 67 * for example for Unihan collation with a separate collation weight per
michael@0 68 * Han character.
michael@0 69 */
michael@0 70
michael@0 71 /* Building a trie ----------------------------------------------------------*/
michael@0 72
michael@0 73 enum {
michael@0 74 /** The null index-2 block, following the gap in the index-2 table. */
michael@0 75 UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
michael@0 76
michael@0 77 /** The start of allocated index-2 blocks. */
michael@0 78 UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
michael@0 79
michael@0 80 /**
michael@0 81 * The null data block.
michael@0 82 * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
michael@0 83 * to work with 6-bit trail bytes from 2-byte UTF-8.
michael@0 84 */
michael@0 85 UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
michael@0 86
michael@0 87 /** The start of allocated data blocks. */
michael@0 88 UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
michael@0 89
michael@0 90 /**
michael@0 91 * The start of data blocks for U+0800 and above.
michael@0 92 * Below, compaction uses a block length of 64 for 2-byte UTF-8.
michael@0 93 * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
michael@0 94 * Data values for 0x780 code points beyond ASCII.
michael@0 95 */
michael@0 96 UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
michael@0 97 };
michael@0 98
michael@0 99 /* Start with allocation of 16k data entries. */
michael@0 100 #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
michael@0 101
michael@0 102 /* Grow about 8x each time. */
michael@0 103 #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
michael@0 104
michael@0 105 static int32_t
michael@0 106 allocIndex2Block(UNewTrie2 *trie);
michael@0 107
michael@0 108 U_CAPI UTrie2 * U_EXPORT2
michael@0 109 utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
michael@0 110 UTrie2 *trie;
michael@0 111 UNewTrie2 *newTrie;
michael@0 112 uint32_t *data;
michael@0 113 int32_t i, j;
michael@0 114
michael@0 115 if(U_FAILURE(*pErrorCode)) {
michael@0 116 return NULL;
michael@0 117 }
michael@0 118
michael@0 119 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
michael@0 120 newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
michael@0 121 data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
michael@0 122 if(trie==NULL || newTrie==NULL || data==NULL) {
michael@0 123 uprv_free(trie);
michael@0 124 uprv_free(newTrie);
michael@0 125 uprv_free(data);
michael@0 126 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 127 return 0;
michael@0 128 }
michael@0 129
michael@0 130 uprv_memset(trie, 0, sizeof(UTrie2));
michael@0 131 trie->initialValue=initialValue;
michael@0 132 trie->errorValue=errorValue;
michael@0 133 trie->highStart=0x110000;
michael@0 134 trie->newTrie=newTrie;
michael@0 135
michael@0 136 newTrie->data=data;
michael@0 137 newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
michael@0 138 newTrie->initialValue=initialValue;
michael@0 139 newTrie->errorValue=errorValue;
michael@0 140 newTrie->highStart=0x110000;
michael@0 141 newTrie->firstFreeBlock=0; /* no free block in the list */
michael@0 142 newTrie->isCompacted=FALSE;
michael@0 143
michael@0 144 /*
michael@0 145 * preallocate and reset
michael@0 146 * - ASCII
michael@0 147 * - the bad-UTF-8-data block
michael@0 148 * - the null data block
michael@0 149 */
michael@0 150 for(i=0; i<0x80; ++i) {
michael@0 151 newTrie->data[i]=initialValue;
michael@0 152 }
michael@0 153 for(; i<0xc0; ++i) {
michael@0 154 newTrie->data[i]=errorValue;
michael@0 155 }
michael@0 156 for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
michael@0 157 newTrie->data[i]=initialValue;
michael@0 158 }
michael@0 159 newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
michael@0 160 newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
michael@0 161
michael@0 162 /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
michael@0 163 for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
michael@0 164 newTrie->index2[i]=j;
michael@0 165 newTrie->map[i]=1;
michael@0 166 }
michael@0 167 /* reference counts for the bad-UTF-8-data block */
michael@0 168 for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
michael@0 169 newTrie->map[i]=0;
michael@0 170 }
michael@0 171 /*
michael@0 172 * Reference counts for the null data block: all blocks except for the ASCII blocks.
michael@0 173 * Plus 1 so that we don't drop this block during compaction.
michael@0 174 * Plus as many as needed for lead surrogate code points.
michael@0 175 */
michael@0 176 /* i==newTrie->dataNullOffset */
michael@0 177 newTrie->map[i++]=
michael@0 178 (0x110000>>UTRIE2_SHIFT_2)-
michael@0 179 (0x80>>UTRIE2_SHIFT_2)+
michael@0 180 1+
michael@0 181 UTRIE2_LSCP_INDEX_2_LENGTH;
michael@0 182 j+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 183 for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
michael@0 184 newTrie->map[i]=0;
michael@0 185 }
michael@0 186
michael@0 187 /*
michael@0 188 * set the remaining indexes in the BMP index-2 block
michael@0 189 * to the null data block
michael@0 190 */
michael@0 191 for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
michael@0 192 newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
michael@0 193 }
michael@0 194
michael@0 195 /*
michael@0 196 * Fill the index gap with impossible values so that compaction
michael@0 197 * does not overlap other index-2 blocks with the gap.
michael@0 198 */
michael@0 199 for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
michael@0 200 newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
michael@0 201 }
michael@0 202
michael@0 203 /* set the indexes in the null index-2 block */
michael@0 204 for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
michael@0 205 newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
michael@0 206 }
michael@0 207 newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
michael@0 208 newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
michael@0 209
michael@0 210 /* set the index-1 indexes for the linear index-2 block */
michael@0 211 for(i=0, j=0;
michael@0 212 i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
michael@0 213 ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
michael@0 214 ) {
michael@0 215 newTrie->index1[i]=j;
michael@0 216 }
michael@0 217
michael@0 218 /* set the remaining index-1 indexes to the null index-2 block */
michael@0 219 for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
michael@0 220 newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
michael@0 221 }
michael@0 222
michael@0 223 /*
michael@0 224 * Preallocate and reset data for U+0080..U+07ff,
michael@0 225 * for 2-byte UTF-8 which will be compacted in 64-blocks
michael@0 226 * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
michael@0 227 */
michael@0 228 for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
michael@0 229 utrie2_set32(trie, i, initialValue, pErrorCode);
michael@0 230 }
michael@0 231
michael@0 232 return trie;
michael@0 233 }
michael@0 234
michael@0 235 static UNewTrie2 *
michael@0 236 cloneBuilder(const UNewTrie2 *other) {
michael@0 237 UNewTrie2 *trie;
michael@0 238
michael@0 239 trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
michael@0 240 if(trie==NULL) {
michael@0 241 return NULL;
michael@0 242 }
michael@0 243
michael@0 244 trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
michael@0 245 if(trie->data==NULL) {
michael@0 246 uprv_free(trie);
michael@0 247 return NULL;
michael@0 248 }
michael@0 249 trie->dataCapacity=other->dataCapacity;
michael@0 250
michael@0 251 /* clone data */
michael@0 252 uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
michael@0 253 uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
michael@0 254 trie->index2NullOffset=other->index2NullOffset;
michael@0 255 trie->index2Length=other->index2Length;
michael@0 256
michael@0 257 uprv_memcpy(trie->data, other->data, other->dataLength*4);
michael@0 258 trie->dataNullOffset=other->dataNullOffset;
michael@0 259 trie->dataLength=other->dataLength;
michael@0 260
michael@0 261 /* reference counters */
michael@0 262 if(other->isCompacted) {
michael@0 263 trie->firstFreeBlock=0;
michael@0 264 } else {
michael@0 265 uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
michael@0 266 trie->firstFreeBlock=other->firstFreeBlock;
michael@0 267 }
michael@0 268
michael@0 269 trie->initialValue=other->initialValue;
michael@0 270 trie->errorValue=other->errorValue;
michael@0 271 trie->highStart=other->highStart;
michael@0 272 trie->isCompacted=other->isCompacted;
michael@0 273
michael@0 274 return trie;
michael@0 275 }
michael@0 276
michael@0 277 U_CAPI UTrie2 * U_EXPORT2
michael@0 278 utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
michael@0 279 UTrie2 *trie;
michael@0 280
michael@0 281 if(U_FAILURE(*pErrorCode)) {
michael@0 282 return NULL;
michael@0 283 }
michael@0 284 if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
michael@0 285 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 286 return NULL;
michael@0 287 }
michael@0 288
michael@0 289 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
michael@0 290 if(trie==NULL) {
michael@0 291 return NULL;
michael@0 292 }
michael@0 293 uprv_memcpy(trie, other, sizeof(UTrie2));
michael@0 294
michael@0 295 if(other->memory!=NULL) {
michael@0 296 trie->memory=uprv_malloc(other->length);
michael@0 297 if(trie->memory!=NULL) {
michael@0 298 trie->isMemoryOwned=TRUE;
michael@0 299 uprv_memcpy(trie->memory, other->memory, other->length);
michael@0 300
michael@0 301 /* make the clone's pointers point to its own memory */
michael@0 302 trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
michael@0 303 if(other->data16!=NULL) {
michael@0 304 trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
michael@0 305 }
michael@0 306 if(other->data32!=NULL) {
michael@0 307 trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
michael@0 308 }
michael@0 309 }
michael@0 310 } else /* other->newTrie!=NULL */ {
michael@0 311 trie->newTrie=cloneBuilder(other->newTrie);
michael@0 312 }
michael@0 313
michael@0 314 if(trie->memory==NULL && trie->newTrie==NULL) {
michael@0 315 uprv_free(trie);
michael@0 316 trie=NULL;
michael@0 317 }
michael@0 318 return trie;
michael@0 319 }
michael@0 320
michael@0 321 typedef struct NewTrieAndStatus {
michael@0 322 UTrie2 *trie;
michael@0 323 UErrorCode errorCode;
michael@0 324 UBool exclusiveLimit; /* rather than inclusive range end */
michael@0 325 } NewTrieAndStatus;
michael@0 326
michael@0 327 static UBool U_CALLCONV
michael@0 328 copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
michael@0 329 NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
michael@0 330 if(value!=nt->trie->initialValue) {
michael@0 331 if(nt->exclusiveLimit) {
michael@0 332 --end;
michael@0 333 }
michael@0 334 if(start==end) {
michael@0 335 utrie2_set32(nt->trie, start, value, &nt->errorCode);
michael@0 336 } else {
michael@0 337 utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
michael@0 338 }
michael@0 339 return U_SUCCESS(nt->errorCode);
michael@0 340 } else {
michael@0 341 return TRUE;
michael@0 342 }
michael@0 343 }
michael@0 344
michael@0 345 #ifdef UTRIE2_DEBUG
michael@0 346 static void
michael@0 347 utrie_printLengths(const UTrie *trie) {
michael@0 348 long indexLength=trie->indexLength;
michael@0 349 long dataLength=(long)trie->dataLength;
michael@0 350 long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
michael@0 351 printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
michael@0 352 indexLength, dataLength, totalLength);
michael@0 353 }
michael@0 354
michael@0 355 static void
michael@0 356 utrie2_printLengths(const UTrie2 *trie, const char *which) {
michael@0 357 long indexLength=trie->indexLength;
michael@0 358 long dataLength=(long)trie->dataLength;
michael@0 359 long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
michael@0 360 printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n",
michael@0 361 which, indexLength, dataLength, totalLength);
michael@0 362 }
michael@0 363 #endif
michael@0 364
michael@0 365 U_CAPI UTrie2 * U_EXPORT2
michael@0 366 utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
michael@0 367 NewTrieAndStatus context;
michael@0 368 UChar lead;
michael@0 369
michael@0 370 if(U_FAILURE(*pErrorCode)) {
michael@0 371 return NULL;
michael@0 372 }
michael@0 373 if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
michael@0 374 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 375 return NULL;
michael@0 376 }
michael@0 377 if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
michael@0 378 return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */
michael@0 379 }
michael@0 380
michael@0 381 /* Clone the frozen trie by enumerating it and building a new one. */
michael@0 382 context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
michael@0 383 if(U_FAILURE(*pErrorCode)) {
michael@0 384 return NULL;
michael@0 385 }
michael@0 386 context.exclusiveLimit=FALSE;
michael@0 387 context.errorCode=*pErrorCode;
michael@0 388 utrie2_enum(other, NULL, copyEnumRange, &context);
michael@0 389 *pErrorCode=context.errorCode;
michael@0 390 for(lead=0xd800; lead<0xdc00; ++lead) {
michael@0 391 uint32_t value;
michael@0 392 if(other->data32==NULL) {
michael@0 393 value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
michael@0 394 } else {
michael@0 395 value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
michael@0 396 }
michael@0 397 if(value!=other->initialValue) {
michael@0 398 utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
michael@0 399 }
michael@0 400 }
michael@0 401 if(U_FAILURE(*pErrorCode)) {
michael@0 402 utrie2_close(context.trie);
michael@0 403 context.trie=NULL;
michael@0 404 }
michael@0 405 return context.trie;
michael@0 406 }
michael@0 407
michael@0 408 /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
michael@0 409 U_CAPI UTrie2 * U_EXPORT2
michael@0 410 utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
michael@0 411 NewTrieAndStatus context;
michael@0 412 UChar lead;
michael@0 413
michael@0 414 if(U_FAILURE(*pErrorCode)) {
michael@0 415 return NULL;
michael@0 416 }
michael@0 417 if(trie1==NULL) {
michael@0 418 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 419 return NULL;
michael@0 420 }
michael@0 421 context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
michael@0 422 if(U_FAILURE(*pErrorCode)) {
michael@0 423 return NULL;
michael@0 424 }
michael@0 425 context.exclusiveLimit=TRUE;
michael@0 426 context.errorCode=*pErrorCode;
michael@0 427 utrie_enum(trie1, NULL, copyEnumRange, &context);
michael@0 428 *pErrorCode=context.errorCode;
michael@0 429 for(lead=0xd800; lead<0xdc00; ++lead) {
michael@0 430 uint32_t value;
michael@0 431 if(trie1->data32==NULL) {
michael@0 432 value=UTRIE_GET16_FROM_LEAD(trie1, lead);
michael@0 433 } else {
michael@0 434 value=UTRIE_GET32_FROM_LEAD(trie1, lead);
michael@0 435 }
michael@0 436 if(value!=trie1->initialValue) {
michael@0 437 utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
michael@0 438 }
michael@0 439 }
michael@0 440 if(U_SUCCESS(*pErrorCode)) {
michael@0 441 utrie2_freeze(context.trie,
michael@0 442 trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
michael@0 443 pErrorCode);
michael@0 444 }
michael@0 445 #ifdef UTRIE2_DEBUG
michael@0 446 if(U_SUCCESS(*pErrorCode)) {
michael@0 447 utrie_printLengths(trie1);
michael@0 448 utrie2_printLengths(context.trie, "fromUTrie");
michael@0 449 }
michael@0 450 #endif
michael@0 451 if(U_FAILURE(*pErrorCode)) {
michael@0 452 utrie2_close(context.trie);
michael@0 453 context.trie=NULL;
michael@0 454 }
michael@0 455 return context.trie;
michael@0 456 }
michael@0 457
michael@0 458 static inline UBool
michael@0 459 isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
michael@0 460 int32_t i2, block;
michael@0 461
michael@0 462 if(U_IS_LEAD(c) && forLSCP) {
michael@0 463 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
michael@0 464 (c>>UTRIE2_SHIFT_2);
michael@0 465 } else {
michael@0 466 i2=trie->index1[c>>UTRIE2_SHIFT_1]+
michael@0 467 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
michael@0 468 }
michael@0 469 block=trie->index2[i2];
michael@0 470 return (UBool)(block==trie->dataNullOffset);
michael@0 471 }
michael@0 472
michael@0 473 static int32_t
michael@0 474 allocIndex2Block(UNewTrie2 *trie) {
michael@0 475 int32_t newBlock, newTop;
michael@0 476
michael@0 477 newBlock=trie->index2Length;
michael@0 478 newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
michael@0 479 if(newTop>LENGTHOF(trie->index2)) {
michael@0 480 /*
michael@0 481 * Should never occur.
michael@0 482 * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
michael@0 483 * or the code writes more values than should be possible.
michael@0 484 */
michael@0 485 return -1;
michael@0 486 }
michael@0 487 trie->index2Length=newTop;
michael@0 488 uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
michael@0 489 return newBlock;
michael@0 490 }
michael@0 491
michael@0 492 static int32_t
michael@0 493 getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
michael@0 494 int32_t i1, i2;
michael@0 495
michael@0 496 if(U_IS_LEAD(c) && forLSCP) {
michael@0 497 return UTRIE2_LSCP_INDEX_2_OFFSET;
michael@0 498 }
michael@0 499
michael@0 500 i1=c>>UTRIE2_SHIFT_1;
michael@0 501 i2=trie->index1[i1];
michael@0 502 if(i2==trie->index2NullOffset) {
michael@0 503 i2=allocIndex2Block(trie);
michael@0 504 if(i2<0) {
michael@0 505 return -1; /* program error */
michael@0 506 }
michael@0 507 trie->index1[i1]=i2;
michael@0 508 }
michael@0 509 return i2;
michael@0 510 }
michael@0 511
michael@0 512 static int32_t
michael@0 513 allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
michael@0 514 int32_t newBlock, newTop;
michael@0 515
michael@0 516 if(trie->firstFreeBlock!=0) {
michael@0 517 /* get the first free block */
michael@0 518 newBlock=trie->firstFreeBlock;
michael@0 519 trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
michael@0 520 } else {
michael@0 521 /* get a new block from the high end */
michael@0 522 newBlock=trie->dataLength;
michael@0 523 newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
michael@0 524 if(newTop>trie->dataCapacity) {
michael@0 525 /* out of memory in the data array */
michael@0 526 int32_t capacity;
michael@0 527 uint32_t *data;
michael@0 528
michael@0 529 if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
michael@0 530 capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
michael@0 531 } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
michael@0 532 capacity=UNEWTRIE2_MAX_DATA_LENGTH;
michael@0 533 } else {
michael@0 534 /*
michael@0 535 * Should never occur.
michael@0 536 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
michael@0 537 * or the code writes more values than should be possible.
michael@0 538 */
michael@0 539 return -1;
michael@0 540 }
michael@0 541 data=(uint32_t *)uprv_malloc(capacity*4);
michael@0 542 if(data==NULL) {
michael@0 543 return -1;
michael@0 544 }
michael@0 545 uprv_memcpy(data, trie->data, trie->dataLength*4);
michael@0 546 uprv_free(trie->data);
michael@0 547 trie->data=data;
michael@0 548 trie->dataCapacity=capacity;
michael@0 549 }
michael@0 550 trie->dataLength=newTop;
michael@0 551 }
michael@0 552 uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
michael@0 553 trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
michael@0 554 return newBlock;
michael@0 555 }
michael@0 556
michael@0 557 /* call when the block's reference counter reaches 0 */
michael@0 558 static void
michael@0 559 releaseDataBlock(UNewTrie2 *trie, int32_t block) {
michael@0 560 /* put this block at the front of the free-block chain */
michael@0 561 trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
michael@0 562 trie->firstFreeBlock=block;
michael@0 563 }
michael@0 564
michael@0 565 static inline UBool
michael@0 566 isWritableBlock(UNewTrie2 *trie, int32_t block) {
michael@0 567 return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
michael@0 568 }
michael@0 569
michael@0 570 static inline void
michael@0 571 setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
michael@0 572 int32_t oldBlock;
michael@0 573 ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
michael@0 574 oldBlock=trie->index2[i2];
michael@0 575 if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
michael@0 576 releaseDataBlock(trie, oldBlock);
michael@0 577 }
michael@0 578 trie->index2[i2]=block;
michael@0 579 }
michael@0 580
michael@0 581 /**
michael@0 582 * No error checking for illegal arguments.
michael@0 583 *
michael@0 584 * @return -1 if no new data block available (out of memory in data array)
michael@0 585 * @internal
michael@0 586 */
michael@0 587 static int32_t
michael@0 588 getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
michael@0 589 int32_t i2, oldBlock, newBlock;
michael@0 590
michael@0 591 i2=getIndex2Block(trie, c, forLSCP);
michael@0 592 if(i2<0) {
michael@0 593 return -1; /* program error */
michael@0 594 }
michael@0 595
michael@0 596 i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
michael@0 597 oldBlock=trie->index2[i2];
michael@0 598 if(isWritableBlock(trie, oldBlock)) {
michael@0 599 return oldBlock;
michael@0 600 }
michael@0 601
michael@0 602 /* allocate a new data block */
michael@0 603 newBlock=allocDataBlock(trie, oldBlock);
michael@0 604 if(newBlock<0) {
michael@0 605 /* out of memory in the data array */
michael@0 606 return -1;
michael@0 607 }
michael@0 608 setIndex2Entry(trie, i2, newBlock);
michael@0 609 return newBlock;
michael@0 610 }
michael@0 611
michael@0 612 /**
michael@0 613 * @return TRUE if the value was successfully set
michael@0 614 */
michael@0 615 static void
michael@0 616 set32(UNewTrie2 *trie,
michael@0 617 UChar32 c, UBool forLSCP, uint32_t value,
michael@0 618 UErrorCode *pErrorCode) {
michael@0 619 int32_t block;
michael@0 620
michael@0 621 if(trie==NULL || trie->isCompacted) {
michael@0 622 *pErrorCode=U_NO_WRITE_PERMISSION;
michael@0 623 return;
michael@0 624 }
michael@0 625
michael@0 626 block=getDataBlock(trie, c, forLSCP);
michael@0 627 if(block<0) {
michael@0 628 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 629 return;
michael@0 630 }
michael@0 631
michael@0 632 trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
michael@0 633 }
michael@0 634
michael@0 635 U_CAPI void U_EXPORT2
michael@0 636 utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
michael@0 637 if(U_FAILURE(*pErrorCode)) {
michael@0 638 return;
michael@0 639 }
michael@0 640 if((uint32_t)c>0x10ffff) {
michael@0 641 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 642 return;
michael@0 643 }
michael@0 644 set32(trie->newTrie, c, TRUE, value, pErrorCode);
michael@0 645 }
michael@0 646
michael@0 647 U_CAPI void U_EXPORT2
michael@0 648 utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
michael@0 649 UChar32 c, uint32_t value,
michael@0 650 UErrorCode *pErrorCode) {
michael@0 651 if(U_FAILURE(*pErrorCode)) {
michael@0 652 return;
michael@0 653 }
michael@0 654 if(!U_IS_LEAD(c)) {
michael@0 655 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 656 return;
michael@0 657 }
michael@0 658 set32(trie->newTrie, c, FALSE, value, pErrorCode);
michael@0 659 }
michael@0 660
michael@0 661 static void
michael@0 662 writeBlock(uint32_t *block, uint32_t value) {
michael@0 663 uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
michael@0 664 while(block<limit) {
michael@0 665 *block++=value;
michael@0 666 }
michael@0 667 }
michael@0 668
michael@0 669 /**
michael@0 670 * initialValue is ignored if overwrite=TRUE
michael@0 671 * @internal
michael@0 672 */
michael@0 673 static void
michael@0 674 fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
michael@0 675 uint32_t value, uint32_t initialValue, UBool overwrite) {
michael@0 676 uint32_t *pLimit;
michael@0 677
michael@0 678 pLimit=block+limit;
michael@0 679 block+=start;
michael@0 680 if(overwrite) {
michael@0 681 while(block<pLimit) {
michael@0 682 *block++=value;
michael@0 683 }
michael@0 684 } else {
michael@0 685 while(block<pLimit) {
michael@0 686 if(*block==initialValue) {
michael@0 687 *block=value;
michael@0 688 }
michael@0 689 ++block;
michael@0 690 }
michael@0 691 }
michael@0 692 }
michael@0 693
michael@0 694 U_CAPI void U_EXPORT2
michael@0 695 utrie2_setRange32(UTrie2 *trie,
michael@0 696 UChar32 start, UChar32 end,
michael@0 697 uint32_t value, UBool overwrite,
michael@0 698 UErrorCode *pErrorCode) {
michael@0 699 /*
michael@0 700 * repeat value in [start..end]
michael@0 701 * mark index values for repeat-data blocks by setting bit 31 of the index values
michael@0 702 * fill around existing values if any, if(overwrite)
michael@0 703 */
michael@0 704 UNewTrie2 *newTrie;
michael@0 705 int32_t block, rest, repeatBlock;
michael@0 706 UChar32 limit;
michael@0 707
michael@0 708 if(U_FAILURE(*pErrorCode)) {
michael@0 709 return;
michael@0 710 }
michael@0 711 if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
michael@0 712 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 713 return;
michael@0 714 }
michael@0 715 newTrie=trie->newTrie;
michael@0 716 if(newTrie==NULL || newTrie->isCompacted) {
michael@0 717 *pErrorCode=U_NO_WRITE_PERMISSION;
michael@0 718 return;
michael@0 719 }
michael@0 720 if(!overwrite && value==newTrie->initialValue) {
michael@0 721 return; /* nothing to do */
michael@0 722 }
michael@0 723
michael@0 724 limit=end+1;
michael@0 725 if(start&UTRIE2_DATA_MASK) {
michael@0 726 UChar32 nextStart;
michael@0 727
michael@0 728 /* set partial block at [start..following block boundary[ */
michael@0 729 block=getDataBlock(newTrie, start, TRUE);
michael@0 730 if(block<0) {
michael@0 731 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 732 return;
michael@0 733 }
michael@0 734
michael@0 735 nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
michael@0 736 if(nextStart<=limit) {
michael@0 737 fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
michael@0 738 value, newTrie->initialValue, overwrite);
michael@0 739 start=nextStart;
michael@0 740 } else {
michael@0 741 fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
michael@0 742 value, newTrie->initialValue, overwrite);
michael@0 743 return;
michael@0 744 }
michael@0 745 }
michael@0 746
michael@0 747 /* number of positions in the last, partial block */
michael@0 748 rest=limit&UTRIE2_DATA_MASK;
michael@0 749
michael@0 750 /* round down limit to a block boundary */
michael@0 751 limit&=~UTRIE2_DATA_MASK;
michael@0 752
michael@0 753 /* iterate over all-value blocks */
michael@0 754 if(value==newTrie->initialValue) {
michael@0 755 repeatBlock=newTrie->dataNullOffset;
michael@0 756 } else {
michael@0 757 repeatBlock=-1;
michael@0 758 }
michael@0 759
michael@0 760 while(start<limit) {
michael@0 761 int32_t i2;
michael@0 762 UBool setRepeatBlock=FALSE;
michael@0 763
michael@0 764 if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
michael@0 765 start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
michael@0 766 continue;
michael@0 767 }
michael@0 768
michael@0 769 /* get index value */
michael@0 770 i2=getIndex2Block(newTrie, start, TRUE);
michael@0 771 if(i2<0) {
michael@0 772 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
michael@0 773 return;
michael@0 774 }
michael@0 775 i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
michael@0 776 block=newTrie->index2[i2];
michael@0 777 if(isWritableBlock(newTrie, block)) {
michael@0 778 /* already allocated */
michael@0 779 if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
michael@0 780 /*
michael@0 781 * We overwrite all values, and it's not a
michael@0 782 * protected (ASCII-linear or 2-byte UTF-8) block:
michael@0 783 * replace with the repeatBlock.
michael@0 784 */
michael@0 785 setRepeatBlock=TRUE;
michael@0 786 } else {
michael@0 787 /* !overwrite, or protected block: just write the values into this block */
michael@0 788 fillBlock(newTrie->data+block,
michael@0 789 0, UTRIE2_DATA_BLOCK_LENGTH,
michael@0 790 value, newTrie->initialValue, overwrite);
michael@0 791 }
michael@0 792 } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
michael@0 793 /*
michael@0 794 * Set the repeatBlock instead of the null block or previous repeat block:
michael@0 795 *
michael@0 796 * If !isWritableBlock() then all entries in the block have the same value
michael@0 797 * because it's the null block or a range block (the repeatBlock from a previous
michael@0 798 * call to utrie2_setRange32()).
michael@0 799 * No other blocks are used multiple times before compacting.
michael@0 800 *
michael@0 801 * The null block is the only non-writable block with the initialValue because
michael@0 802 * of the repeatBlock initialization above. (If value==initialValue, then
michael@0 803 * the repeatBlock will be the null data block.)
michael@0 804 *
michael@0 805 * We set our repeatBlock if the desired value differs from the block's value,
michael@0 806 * and if we overwrite any data or if the data is all initial values
michael@0 807 * (which is the same as the block being the null block, see above).
michael@0 808 */
michael@0 809 setRepeatBlock=TRUE;
michael@0 810 }
michael@0 811 if(setRepeatBlock) {
michael@0 812 if(repeatBlock>=0) {
michael@0 813 setIndex2Entry(newTrie, i2, repeatBlock);
michael@0 814 } else {
michael@0 815 /* create and set and fill the repeatBlock */
michael@0 816 repeatBlock=getDataBlock(newTrie, start, TRUE);
michael@0 817 if(repeatBlock<0) {
michael@0 818 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 819 return;
michael@0 820 }
michael@0 821 writeBlock(newTrie->data+repeatBlock, value);
michael@0 822 }
michael@0 823 }
michael@0 824
michael@0 825 start+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 826 }
michael@0 827
michael@0 828 if(rest>0) {
michael@0 829 /* set partial block at [last block boundary..limit[ */
michael@0 830 block=getDataBlock(newTrie, start, TRUE);
michael@0 831 if(block<0) {
michael@0 832 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 833 return;
michael@0 834 }
michael@0 835
michael@0 836 fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
michael@0 837 }
michael@0 838
michael@0 839 return;
michael@0 840 }
michael@0 841
michael@0 842 /* compaction --------------------------------------------------------------- */
michael@0 843
michael@0 844 static inline UBool
michael@0 845 equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
michael@0 846 while(length>0 && *s==*t) {
michael@0 847 ++s;
michael@0 848 ++t;
michael@0 849 --length;
michael@0 850 }
michael@0 851 return (UBool)(length==0);
michael@0 852 }
michael@0 853
michael@0 854 static inline UBool
michael@0 855 equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
michael@0 856 while(length>0 && *s==*t) {
michael@0 857 ++s;
michael@0 858 ++t;
michael@0 859 --length;
michael@0 860 }
michael@0 861 return (UBool)(length==0);
michael@0 862 }
michael@0 863
michael@0 864 static int32_t
michael@0 865 findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
michael@0 866 int32_t block;
michael@0 867
michael@0 868 /* ensure that we do not even partially get past index2Length */
michael@0 869 index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
michael@0 870
michael@0 871 for(block=0; block<=index2Length; ++block) {
michael@0 872 if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
michael@0 873 return block;
michael@0 874 }
michael@0 875 }
michael@0 876 return -1;
michael@0 877 }
michael@0 878
michael@0 879 static int32_t
michael@0 880 findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
michael@0 881 int32_t block;
michael@0 882
michael@0 883 /* ensure that we do not even partially get past dataLength */
michael@0 884 dataLength-=blockLength;
michael@0 885
michael@0 886 for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
michael@0 887 if(equal_uint32(data+block, data+otherBlock, blockLength)) {
michael@0 888 return block;
michael@0 889 }
michael@0 890 }
michael@0 891 return -1;
michael@0 892 }
michael@0 893
michael@0 894 /*
michael@0 895 * Find the start of the last range in the trie by enumerating backward.
michael@0 896 * Indexes for supplementary code points higher than this will be omitted.
michael@0 897 */
michael@0 898 static UChar32
michael@0 899 findHighStart(UNewTrie2 *trie, uint32_t highValue) {
michael@0 900 const uint32_t *data32;
michael@0 901
michael@0 902 uint32_t value, initialValue;
michael@0 903 UChar32 c, prev;
michael@0 904 int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
michael@0 905
michael@0 906 data32=trie->data;
michael@0 907 initialValue=trie->initialValue;
michael@0 908
michael@0 909 index2NullOffset=trie->index2NullOffset;
michael@0 910 nullBlock=trie->dataNullOffset;
michael@0 911
michael@0 912 /* set variables for previous range */
michael@0 913 if(highValue==initialValue) {
michael@0 914 prevI2Block=index2NullOffset;
michael@0 915 prevBlock=nullBlock;
michael@0 916 } else {
michael@0 917 prevI2Block=-1;
michael@0 918 prevBlock=-1;
michael@0 919 }
michael@0 920 prev=0x110000;
michael@0 921
michael@0 922 /* enumerate index-2 blocks */
michael@0 923 i1=UNEWTRIE2_INDEX_1_LENGTH;
michael@0 924 c=prev;
michael@0 925 while(c>0) {
michael@0 926 i2Block=trie->index1[--i1];
michael@0 927 if(i2Block==prevI2Block) {
michael@0 928 /* the index-2 block is the same as the previous one, and filled with highValue */
michael@0 929 c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
michael@0 930 continue;
michael@0 931 }
michael@0 932 prevI2Block=i2Block;
michael@0 933 if(i2Block==index2NullOffset) {
michael@0 934 /* this is the null index-2 block */
michael@0 935 if(highValue!=initialValue) {
michael@0 936 return c;
michael@0 937 }
michael@0 938 c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
michael@0 939 } else {
michael@0 940 /* enumerate data blocks for one index-2 block */
michael@0 941 for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
michael@0 942 block=trie->index2[i2Block+ --i2];
michael@0 943 if(block==prevBlock) {
michael@0 944 /* the block is the same as the previous one, and filled with highValue */
michael@0 945 c-=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 946 continue;
michael@0 947 }
michael@0 948 prevBlock=block;
michael@0 949 if(block==nullBlock) {
michael@0 950 /* this is the null data block */
michael@0 951 if(highValue!=initialValue) {
michael@0 952 return c;
michael@0 953 }
michael@0 954 c-=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 955 } else {
michael@0 956 for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
michael@0 957 value=data32[block+ --j];
michael@0 958 if(value!=highValue) {
michael@0 959 return c;
michael@0 960 }
michael@0 961 --c;
michael@0 962 }
michael@0 963 }
michael@0 964 }
michael@0 965 }
michael@0 966 }
michael@0 967
michael@0 968 /* deliver last range */
michael@0 969 return 0;
michael@0 970 }
michael@0 971
michael@0 972 /*
michael@0 973 * Compact a build-time trie.
michael@0 974 *
michael@0 975 * The compaction
michael@0 976 * - removes blocks that are identical with earlier ones
michael@0 977 * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
michael@0 978 * - moves blocks in steps of the data granularity
michael@0 979 * - moves and overlaps blocks that overlap with multiple values in the overlap region
michael@0 980 *
michael@0 981 * It does not
michael@0 982 * - try to move and overlap blocks that are not already adjacent
michael@0 983 */
michael@0 984 static void
michael@0 985 compactData(UNewTrie2 *trie) {
michael@0 986 int32_t start, newStart, movedStart;
michael@0 987 int32_t blockLength, overlap;
michael@0 988 int32_t i, mapIndex, blockCount;
michael@0 989
michael@0 990 /* do not compact linear-ASCII data */
michael@0 991 newStart=UTRIE2_DATA_START_OFFSET;
michael@0 992 for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
michael@0 993 trie->map[i]=start;
michael@0 994 }
michael@0 995
michael@0 996 /*
michael@0 997 * Start with a block length of 64 for 2-byte UTF-8,
michael@0 998 * then switch to UTRIE2_DATA_BLOCK_LENGTH.
michael@0 999 */
michael@0 1000 blockLength=64;
michael@0 1001 blockCount=blockLength>>UTRIE2_SHIFT_2;
michael@0 1002 for(start=newStart; start<trie->dataLength;) {
michael@0 1003 /*
michael@0 1004 * start: index of first entry of current block
michael@0 1005 * newStart: index where the current block is to be moved
michael@0 1006 * (right after current end of already-compacted data)
michael@0 1007 */
michael@0 1008 if(start==UNEWTRIE2_DATA_0800_OFFSET) {
michael@0 1009 blockLength=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 1010 blockCount=1;
michael@0 1011 }
michael@0 1012
michael@0 1013 /* skip blocks that are not used */
michael@0 1014 if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
michael@0 1015 /* advance start to the next block */
michael@0 1016 start+=blockLength;
michael@0 1017
michael@0 1018 /* leave newStart with the previous block! */
michael@0 1019 continue;
michael@0 1020 }
michael@0 1021
michael@0 1022 /* search for an identical block */
michael@0 1023 if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
michael@0 1024 >=0
michael@0 1025 ) {
michael@0 1026 /* found an identical block, set the other block's index value for the current block */
michael@0 1027 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
michael@0 1028 trie->map[mapIndex++]=movedStart;
michael@0 1029 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 1030 }
michael@0 1031
michael@0 1032 /* advance start to the next block */
michael@0 1033 start+=blockLength;
michael@0 1034
michael@0 1035 /* leave newStart with the previous block! */
michael@0 1036 continue;
michael@0 1037 }
michael@0 1038
michael@0 1039 /* see if the beginning of this block can be overlapped with the end of the previous block */
michael@0 1040 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
michael@0 1041 for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
michael@0 1042 overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
michael@0 1043 overlap-=UTRIE2_DATA_GRANULARITY) {}
michael@0 1044
michael@0 1045 if(overlap>0 || newStart<start) {
michael@0 1046 /* some overlap, or just move the whole block */
michael@0 1047 movedStart=newStart-overlap;
michael@0 1048 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
michael@0 1049 trie->map[mapIndex++]=movedStart;
michael@0 1050 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 1051 }
michael@0 1052
michael@0 1053 /* move the non-overlapping indexes to their new positions */
michael@0 1054 start+=overlap;
michael@0 1055 for(i=blockLength-overlap; i>0; --i) {
michael@0 1056 trie->data[newStart++]=trie->data[start++];
michael@0 1057 }
michael@0 1058 } else /* no overlap && newStart==start */ {
michael@0 1059 for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
michael@0 1060 trie->map[mapIndex++]=start;
michael@0 1061 start+=UTRIE2_DATA_BLOCK_LENGTH;
michael@0 1062 }
michael@0 1063 newStart=start;
michael@0 1064 }
michael@0 1065 }
michael@0 1066
michael@0 1067 /* now adjust the index-2 table */
michael@0 1068 for(i=0; i<trie->index2Length; ++i) {
michael@0 1069 if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
michael@0 1070 /* Gap indexes are invalid (-1). Skip over the gap. */
michael@0 1071 i+=UNEWTRIE2_INDEX_GAP_LENGTH;
michael@0 1072 }
michael@0 1073 trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
michael@0 1074 }
michael@0 1075 trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
michael@0 1076
michael@0 1077 /* ensure dataLength alignment */
michael@0 1078 while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
michael@0 1079 trie->data[newStart++]=trie->initialValue;
michael@0 1080 }
michael@0 1081
michael@0 1082 #ifdef UTRIE2_DEBUG
michael@0 1083 /* we saved some space */
michael@0 1084 printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
michael@0 1085 (long)trie->dataLength, (long)newStart);
michael@0 1086 #endif
michael@0 1087
michael@0 1088 trie->dataLength=newStart;
michael@0 1089 }
michael@0 1090
michael@0 1091 static void
michael@0 1092 compactIndex2(UNewTrie2 *trie) {
michael@0 1093 int32_t i, start, newStart, movedStart, overlap;
michael@0 1094
michael@0 1095 /* do not compact linear-BMP index-2 blocks */
michael@0 1096 newStart=UTRIE2_INDEX_2_BMP_LENGTH;
michael@0 1097 for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
michael@0 1098 trie->map[i]=start;
michael@0 1099 }
michael@0 1100
michael@0 1101 /* Reduce the index table gap to what will be needed at runtime. */
michael@0 1102 newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
michael@0 1103
michael@0 1104 for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
michael@0 1105 /*
michael@0 1106 * start: index of first entry of current block
michael@0 1107 * newStart: index where the current block is to be moved
michael@0 1108 * (right after current end of already-compacted data)
michael@0 1109 */
michael@0 1110
michael@0 1111 /* search for an identical block */
michael@0 1112 if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
michael@0 1113 >=0
michael@0 1114 ) {
michael@0 1115 /* found an identical block, set the other block's index value for the current block */
michael@0 1116 trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
michael@0 1117
michael@0 1118 /* advance start to the next block */
michael@0 1119 start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
michael@0 1120
michael@0 1121 /* leave newStart with the previous block! */
michael@0 1122 continue;
michael@0 1123 }
michael@0 1124
michael@0 1125 /* see if the beginning of this block can be overlapped with the end of the previous block */
michael@0 1126 /* look for maximum overlap with the previous, adjacent block */
michael@0 1127 for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
michael@0 1128 overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
michael@0 1129 --overlap) {}
michael@0 1130
michael@0 1131 if(overlap>0 || newStart<start) {
michael@0 1132 /* some overlap, or just move the whole block */
michael@0 1133 trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
michael@0 1134
michael@0 1135 /* move the non-overlapping indexes to their new positions */
michael@0 1136 start+=overlap;
michael@0 1137 for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
michael@0 1138 trie->index2[newStart++]=trie->index2[start++];
michael@0 1139 }
michael@0 1140 } else /* no overlap && newStart==start */ {
michael@0 1141 trie->map[start>>UTRIE2_SHIFT_1_2]=start;
michael@0 1142 start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
michael@0 1143 newStart=start;
michael@0 1144 }
michael@0 1145 }
michael@0 1146
michael@0 1147 /* now adjust the index-1 table */
michael@0 1148 for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
michael@0 1149 trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
michael@0 1150 }
michael@0 1151 trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
michael@0 1152
michael@0 1153 /*
michael@0 1154 * Ensure data table alignment:
michael@0 1155 * Needs to be granularity-aligned for 16-bit trie
michael@0 1156 * (so that dataMove will be down-shiftable),
michael@0 1157 * and 2-aligned for uint32_t data.
michael@0 1158 */
michael@0 1159 while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
michael@0 1160 /* Arbitrary value: 0x3fffc not possible for real data. */
michael@0 1161 trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
michael@0 1162 }
michael@0 1163
michael@0 1164 #ifdef UTRIE2_DEBUG
michael@0 1165 /* we saved some space */
michael@0 1166 printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
michael@0 1167 (long)trie->index2Length, (long)newStart);
michael@0 1168 #endif
michael@0 1169
michael@0 1170 trie->index2Length=newStart;
michael@0 1171 }
michael@0 1172
michael@0 1173 static void
michael@0 1174 compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
michael@0 1175 UNewTrie2 *newTrie;
michael@0 1176 UChar32 highStart, suppHighStart;
michael@0 1177 uint32_t highValue;
michael@0 1178
michael@0 1179 newTrie=trie->newTrie;
michael@0 1180
michael@0 1181 /* find highStart and round it up */
michael@0 1182 highValue=utrie2_get32(trie, 0x10ffff);
michael@0 1183 highStart=findHighStart(newTrie, highValue);
michael@0 1184 highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
michael@0 1185 if(highStart==0x110000) {
michael@0 1186 highValue=trie->errorValue;
michael@0 1187 }
michael@0 1188
michael@0 1189 /*
michael@0 1190 * Set trie->highStart only after utrie2_get32(trie, highStart).
michael@0 1191 * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
michael@0 1192 */
michael@0 1193 trie->highStart=newTrie->highStart=highStart;
michael@0 1194
michael@0 1195 #ifdef UTRIE2_DEBUG
michael@0 1196 printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n",
michael@0 1197 (long)highStart, (long)highValue, (long)trie->initialValue);
michael@0 1198 #endif
michael@0 1199
michael@0 1200 if(highStart<0x110000) {
michael@0 1201 /* Blank out [highStart..10ffff] to release associated data blocks. */
michael@0 1202 suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
michael@0 1203 utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
michael@0 1204 if(U_FAILURE(*pErrorCode)) {
michael@0 1205 return;
michael@0 1206 }
michael@0 1207 }
michael@0 1208
michael@0 1209 compactData(newTrie);
michael@0 1210 if(highStart>0x10000) {
michael@0 1211 compactIndex2(newTrie);
michael@0 1212 #ifdef UTRIE2_DEBUG
michael@0 1213 } else {
michael@0 1214 printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n",
michael@0 1215 (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
michael@0 1216 #endif
michael@0 1217 }
michael@0 1218
michael@0 1219 /*
michael@0 1220 * Store the highValue in the data array and round up the dataLength.
michael@0 1221 * Must be done after compactData() because that assumes that dataLength
michael@0 1222 * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
michael@0 1223 */
michael@0 1224 newTrie->data[newTrie->dataLength++]=highValue;
michael@0 1225 while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
michael@0 1226 newTrie->data[newTrie->dataLength++]=trie->initialValue;
michael@0 1227 }
michael@0 1228
michael@0 1229 newTrie->isCompacted=TRUE;
michael@0 1230 }
michael@0 1231
michael@0 1232 /* serialization ------------------------------------------------------------ */
michael@0 1233
michael@0 1234 /**
michael@0 1235 * Maximum length of the runtime index array.
michael@0 1236 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
michael@0 1237 * (The actual maximum length is lower,
michael@0 1238 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
michael@0 1239 */
michael@0 1240 #define UTRIE2_MAX_INDEX_LENGTH 0xffff
michael@0 1241
michael@0 1242 /**
michael@0 1243 * Maximum length of the runtime data array.
michael@0 1244 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
michael@0 1245 * and by uint16_t UTrie2Header.shiftedDataLength.
michael@0 1246 */
michael@0 1247 #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
michael@0 1248
michael@0 1249 /* Compact and internally serialize the trie. */
michael@0 1250 U_CAPI void U_EXPORT2
michael@0 1251 utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
michael@0 1252 UNewTrie2 *newTrie;
michael@0 1253 UTrie2Header *header;
michael@0 1254 uint32_t *p;
michael@0 1255 uint16_t *dest16;
michael@0 1256 int32_t i, length;
michael@0 1257 int32_t allIndexesLength;
michael@0 1258 int32_t dataMove; /* >0 if the data is moved to the end of the index array */
michael@0 1259 UChar32 highStart;
michael@0 1260
michael@0 1261 /* argument check */
michael@0 1262 if(U_FAILURE(*pErrorCode)) {
michael@0 1263 return;
michael@0 1264 }
michael@0 1265 if( trie==NULL ||
michael@0 1266 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
michael@0 1267 ) {
michael@0 1268 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1269 return;
michael@0 1270 }
michael@0 1271 newTrie=trie->newTrie;
michael@0 1272 if(newTrie==NULL) {
michael@0 1273 /* already frozen */
michael@0 1274 UTrie2ValueBits frozenValueBits=
michael@0 1275 trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
michael@0 1276 if(valueBits!=frozenValueBits) {
michael@0 1277 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1278 }
michael@0 1279 return;
michael@0 1280 }
michael@0 1281
michael@0 1282 /* compact if necessary */
michael@0 1283 if(!newTrie->isCompacted) {
michael@0 1284 compactTrie(trie, pErrorCode);
michael@0 1285 if(U_FAILURE(*pErrorCode)) {
michael@0 1286 return;
michael@0 1287 }
michael@0 1288 }
michael@0 1289 highStart=trie->highStart;
michael@0 1290
michael@0 1291 if(highStart<=0x10000) {
michael@0 1292 allIndexesLength=UTRIE2_INDEX_1_OFFSET;
michael@0 1293 } else {
michael@0 1294 allIndexesLength=newTrie->index2Length;
michael@0 1295 }
michael@0 1296 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 1297 dataMove=allIndexesLength;
michael@0 1298 } else {
michael@0 1299 dataMove=0;
michael@0 1300 }
michael@0 1301
michael@0 1302 /* are indexLength and dataLength within limits? */
michael@0 1303 if( /* for unshifted indexLength */
michael@0 1304 allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
michael@0 1305 /* for unshifted dataNullOffset */
michael@0 1306 (dataMove+newTrie->dataNullOffset)>0xffff ||
michael@0 1307 /* for unshifted 2-byte UTF-8 index-2 values */
michael@0 1308 (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
michael@0 1309 /* for shiftedDataLength */
michael@0 1310 (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
michael@0 1311 ) {
michael@0 1312 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 1313 return;
michael@0 1314 }
michael@0 1315
michael@0 1316 /* calculate the total serialized length */
michael@0 1317 length=sizeof(UTrie2Header)+allIndexesLength*2;
michael@0 1318 if(valueBits==UTRIE2_16_VALUE_BITS) {
michael@0 1319 length+=newTrie->dataLength*2;
michael@0 1320 } else {
michael@0 1321 length+=newTrie->dataLength*4;
michael@0 1322 }
michael@0 1323
michael@0 1324 trie->memory=uprv_malloc(length);
michael@0 1325 if(trie->memory==NULL) {
michael@0 1326 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1327 return;
michael@0 1328 }
michael@0 1329 trie->length=length;
michael@0 1330 trie->isMemoryOwned=TRUE;
michael@0 1331
michael@0 1332 trie->indexLength=allIndexesLength;
michael@0 1333 trie->dataLength=newTrie->dataLength;
michael@0 1334 if(highStart<=0x10000) {
michael@0 1335 trie->index2NullOffset=0xffff;
michael@0 1336 } else {
michael@0 1337 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
michael@0 1338 }
michael@0 1339 trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
michael@0 1340 trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
michael@0 1341
michael@0 1342 /* set the header fields */
michael@0 1343 header=(UTrie2Header *)trie->memory;
michael@0 1344
michael@0 1345 header->signature=UTRIE2_SIG; /* "Tri2" */
michael@0 1346 header->options=(uint16_t)valueBits;
michael@0 1347
michael@0 1348 header->indexLength=(uint16_t)trie->indexLength;
michael@0 1349 header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
michael@0 1350 header->index2NullOffset=trie->index2NullOffset;
michael@0 1351 header->dataNullOffset=trie->dataNullOffset;
michael@0 1352 header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
michael@0 1353
michael@0 1354 /* fill the index and data arrays */
michael@0 1355 dest16=(uint16_t *)(header+1);
michael@0 1356 trie->index=dest16;
michael@0 1357
michael@0 1358 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
michael@0 1359 p=(uint32_t *)newTrie->index2;
michael@0 1360 for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
michael@0 1361 *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
michael@0 1362 }
michael@0 1363
michael@0 1364 /* write UTF-8 2-byte index-2 values, not right-shifted */
michael@0 1365 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
michael@0 1366 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
michael@0 1367 }
michael@0 1368 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
michael@0 1369 *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
michael@0 1370 }
michael@0 1371
michael@0 1372 if(highStart>0x10000) {
michael@0 1373 int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
michael@0 1374 int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
michael@0 1375
michael@0 1376 /* write 16-bit index-1 values for supplementary code points */
michael@0 1377 p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
michael@0 1378 for(i=index1Length; i>0; --i) {
michael@0 1379 *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
michael@0 1380 }
michael@0 1381
michael@0 1382 /*
michael@0 1383 * write the index-2 array values for supplementary code points,
michael@0 1384 * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
michael@0 1385 */
michael@0 1386 p=(uint32_t *)newTrie->index2+index2Offset;
michael@0 1387 for(i=newTrie->index2Length-index2Offset; i>0; --i) {
michael@0 1388 *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
michael@0 1389 }
michael@0 1390 }
michael@0 1391
michael@0 1392 /* write the 16/32-bit data array */
michael@0 1393 switch(valueBits) {
michael@0 1394 case UTRIE2_16_VALUE_BITS:
michael@0 1395 /* write 16-bit data values */
michael@0 1396 trie->data16=dest16;
michael@0 1397 trie->data32=NULL;
michael@0 1398 p=newTrie->data;
michael@0 1399 for(i=newTrie->dataLength; i>0; --i) {
michael@0 1400 *dest16++=(uint16_t)*p++;
michael@0 1401 }
michael@0 1402 break;
michael@0 1403 case UTRIE2_32_VALUE_BITS:
michael@0 1404 /* write 32-bit data values */
michael@0 1405 trie->data16=NULL;
michael@0 1406 trie->data32=(uint32_t *)dest16;
michael@0 1407 uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
michael@0 1408 break;
michael@0 1409 default:
michael@0 1410 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1411 return;
michael@0 1412 }
michael@0 1413
michael@0 1414 /* Delete the UNewTrie2. */
michael@0 1415 uprv_free(newTrie->data);
michael@0 1416 uprv_free(newTrie);
michael@0 1417 trie->newTrie=NULL;
michael@0 1418 }
michael@0 1419
michael@0 1420 U_CAPI UBool U_EXPORT2
michael@0 1421 utrie2_isFrozen(const UTrie2 *trie) {
michael@0 1422 return (UBool)(trie->newTrie==NULL);
michael@0 1423 }
michael@0 1424
michael@0 1425 U_CAPI int32_t U_EXPORT2
michael@0 1426 utrie2_serialize(UTrie2 *trie,
michael@0 1427 void *data, int32_t capacity,
michael@0 1428 UErrorCode *pErrorCode) {
michael@0 1429 /* argument check */
michael@0 1430 if(U_FAILURE(*pErrorCode)) {
michael@0 1431 return 0;
michael@0 1432 }
michael@0 1433
michael@0 1434 if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
michael@0 1435 capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
michael@0 1436 ) {
michael@0 1437 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1438 return 0;
michael@0 1439 }
michael@0 1440
michael@0 1441 if(capacity>=trie->length) {
michael@0 1442 uprv_memcpy(data, trie->memory, trie->length);
michael@0 1443 } else {
michael@0 1444 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1445 }
michael@0 1446 return trie->length;
michael@0 1447 }
michael@0 1448
michael@0 1449 /*
michael@0 1450 * This is here to avoid a dependency from utrie2.cpp on utrie.c.
michael@0 1451 * This file already depends on utrie.c.
michael@0 1452 * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
michael@0 1453 */
michael@0 1454 U_CAPI int32_t U_EXPORT2
michael@0 1455 utrie2_swapAnyVersion(const UDataSwapper *ds,
michael@0 1456 const void *inData, int32_t length, void *outData,
michael@0 1457 UErrorCode *pErrorCode) {
michael@0 1458 if(U_SUCCESS(*pErrorCode)) {
michael@0 1459 switch(utrie2_getVersion(inData, length, TRUE)) {
michael@0 1460 case 1:
michael@0 1461 return utrie_swap(ds, inData, length, outData, pErrorCode);
michael@0 1462 case 2:
michael@0 1463 return utrie2_swap(ds, inData, length, outData, pErrorCode);
michael@0 1464 default:
michael@0 1465 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 1466 return 0;
michael@0 1467 }
michael@0 1468 }
michael@0 1469 return 0;
michael@0 1470 }

mercurial