intl/icu/source/common/utrie2_builder.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/utrie2_builder.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1470 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2001-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  utrie2_builder.cpp
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2008sep26 (split off from utrie2.c)
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   This is a common implementation of a Unicode trie.
    1.20 +*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
    1.21 +*   Unicode code points (0..0x10ffff).
    1.22 +*   This is the second common version of a Unicode trie (hence the name UTrie2).
    1.23 +*   See utrie2.h for a comparison.
    1.24 +*
    1.25 +*   This file contains only the builder code.
    1.26 +*   See utrie2.c for the runtime and enumeration code.
    1.27 +*/
    1.28 +#ifdef UTRIE2_DEBUG
    1.29 +#   include <stdio.h>
    1.30 +#endif
    1.31 +
    1.32 +#include "unicode/utypes.h"
    1.33 +#include "cmemory.h"
    1.34 +#include "utrie2.h"
    1.35 +#include "utrie2_impl.h"
    1.36 +
    1.37 +#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
    1.38 +
    1.39 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.40 +
    1.41 +/* Implementation notes ----------------------------------------------------- */
    1.42 +
    1.43 +/*
    1.44 + * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
    1.45 + * have been chosen to minimize trie sizes overall.
    1.46 + * Most of the code is flexible enough to work with a range of values,
    1.47 + * within certain limits.
    1.48 + *
    1.49 + * Exception: Support for separate values for lead surrogate code _units_
    1.50 + * vs. code _points_ was added after the constants were fixed,
    1.51 + * and has not been tested nor particularly designed for different constant values.
    1.52 + * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
    1.53 + * part and back.)
    1.54 + *
    1.55 + * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
    1.56 + * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
    1.57 + * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
    1.58 + * remapping) stops working.
    1.59 + *
    1.60 + * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
    1.61 + * assumes that a single index-2 block is used for 0x400 code points
    1.62 + * corresponding to one lead surrogate.
    1.63 + *
    1.64 + * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
    1.65 + * more than one Unicode plane, and the split of the index-2 table into a BMP
    1.66 + * part and a supplementary part, with a gap in between, would not work.
    1.67 + *
    1.68 + * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
    1.69 + * there is data with more than 64k distinct values,
    1.70 + * for example for Unihan collation with a separate collation weight per
    1.71 + * Han character.
    1.72 + */
    1.73 +
    1.74 +/* Building a trie ----------------------------------------------------------*/
    1.75 +
    1.76 +enum {
    1.77 +    /** The null index-2 block, following the gap in the index-2 table. */
    1.78 +    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
    1.79 +
    1.80 +    /** The start of allocated index-2 blocks. */
    1.81 +    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
    1.82 +
    1.83 +    /**
    1.84 +     * The null data block.
    1.85 +     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
    1.86 +     * to work with 6-bit trail bytes from 2-byte UTF-8.
    1.87 +     */
    1.88 +    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
    1.89 +
    1.90 +    /** The start of allocated data blocks. */
    1.91 +    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
    1.92 +
    1.93 +    /**
    1.94 +     * The start of data blocks for U+0800 and above.
    1.95 +     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
    1.96 +     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
    1.97 +     * Data values for 0x780 code points beyond ASCII.
    1.98 +     */
    1.99 +    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
   1.100 +};
   1.101 +
   1.102 +/* Start with allocation of 16k data entries. */
   1.103 +#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
   1.104 +
   1.105 +/* Grow about 8x each time. */
   1.106 +#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
   1.107 +
   1.108 +static int32_t
   1.109 +allocIndex2Block(UNewTrie2 *trie);
   1.110 +
   1.111 +U_CAPI UTrie2 * U_EXPORT2
   1.112 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
   1.113 +    UTrie2 *trie;
   1.114 +    UNewTrie2 *newTrie;
   1.115 +    uint32_t *data;
   1.116 +    int32_t i, j;
   1.117 +
   1.118 +    if(U_FAILURE(*pErrorCode)) {
   1.119 +        return NULL;
   1.120 +    }
   1.121 +
   1.122 +    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
   1.123 +    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
   1.124 +    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
   1.125 +    if(trie==NULL || newTrie==NULL || data==NULL) {
   1.126 +        uprv_free(trie);
   1.127 +        uprv_free(newTrie);
   1.128 +        uprv_free(data);
   1.129 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.130 +        return 0;
   1.131 +    }
   1.132 +
   1.133 +    uprv_memset(trie, 0, sizeof(UTrie2));
   1.134 +    trie->initialValue=initialValue;
   1.135 +    trie->errorValue=errorValue;
   1.136 +    trie->highStart=0x110000;
   1.137 +    trie->newTrie=newTrie;
   1.138 +
   1.139 +    newTrie->data=data;
   1.140 +    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
   1.141 +    newTrie->initialValue=initialValue;
   1.142 +    newTrie->errorValue=errorValue;
   1.143 +    newTrie->highStart=0x110000;
   1.144 +    newTrie->firstFreeBlock=0;  /* no free block in the list */
   1.145 +    newTrie->isCompacted=FALSE;
   1.146 +
   1.147 +    /*
   1.148 +     * preallocate and reset
   1.149 +     * - ASCII
   1.150 +     * - the bad-UTF-8-data block
   1.151 +     * - the null data block
   1.152 +     */
   1.153 +    for(i=0; i<0x80; ++i) {
   1.154 +        newTrie->data[i]=initialValue;
   1.155 +    }
   1.156 +    for(; i<0xc0; ++i) {
   1.157 +        newTrie->data[i]=errorValue;
   1.158 +    }
   1.159 +    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
   1.160 +        newTrie->data[i]=initialValue;
   1.161 +    }
   1.162 +    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
   1.163 +    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
   1.164 +
   1.165 +    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
   1.166 +    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
   1.167 +        newTrie->index2[i]=j;
   1.168 +        newTrie->map[i]=1;
   1.169 +    }
   1.170 +    /* reference counts for the bad-UTF-8-data block */
   1.171 +    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
   1.172 +        newTrie->map[i]=0;
   1.173 +    }
   1.174 +    /*
   1.175 +     * Reference counts for the null data block: all blocks except for the ASCII blocks.
   1.176 +     * Plus 1 so that we don't drop this block during compaction.
   1.177 +     * Plus as many as needed for lead surrogate code points.
   1.178 +     */
   1.179 +    /* i==newTrie->dataNullOffset */
   1.180 +    newTrie->map[i++]=
   1.181 +        (0x110000>>UTRIE2_SHIFT_2)-
   1.182 +        (0x80>>UTRIE2_SHIFT_2)+
   1.183 +        1+
   1.184 +        UTRIE2_LSCP_INDEX_2_LENGTH;
   1.185 +    j+=UTRIE2_DATA_BLOCK_LENGTH;
   1.186 +    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
   1.187 +        newTrie->map[i]=0;
   1.188 +    }
   1.189 +
   1.190 +    /*
   1.191 +     * set the remaining indexes in the BMP index-2 block
   1.192 +     * to the null data block
   1.193 +     */
   1.194 +    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
   1.195 +        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
   1.196 +    }
   1.197 +
   1.198 +    /*
   1.199 +     * Fill the index gap with impossible values so that compaction
   1.200 +     * does not overlap other index-2 blocks with the gap.
   1.201 +     */
   1.202 +    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
   1.203 +        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
   1.204 +    }
   1.205 +
   1.206 +    /* set the indexes in the null index-2 block */
   1.207 +    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
   1.208 +        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
   1.209 +    }
   1.210 +    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
   1.211 +    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
   1.212 +
   1.213 +    /* set the index-1 indexes for the linear index-2 block */
   1.214 +    for(i=0, j=0;
   1.215 +        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
   1.216 +        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
   1.217 +    ) {
   1.218 +        newTrie->index1[i]=j;
   1.219 +    }
   1.220 +
   1.221 +    /* set the remaining index-1 indexes to the null index-2 block */
   1.222 +    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
   1.223 +        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
   1.224 +    }
   1.225 +
   1.226 +    /*
   1.227 +     * Preallocate and reset data for U+0080..U+07ff,
   1.228 +     * for 2-byte UTF-8 which will be compacted in 64-blocks
   1.229 +     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
   1.230 +     */
   1.231 +    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
   1.232 +        utrie2_set32(trie, i, initialValue, pErrorCode);
   1.233 +    }
   1.234 +
   1.235 +    return trie;
   1.236 +}
   1.237 +
   1.238 +static UNewTrie2 *
   1.239 +cloneBuilder(const UNewTrie2 *other) {
   1.240 +    UNewTrie2 *trie;
   1.241 +
   1.242 +    trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
   1.243 +    if(trie==NULL) {
   1.244 +        return NULL;
   1.245 +    }
   1.246 +
   1.247 +    trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
   1.248 +    if(trie->data==NULL) {
   1.249 +        uprv_free(trie);
   1.250 +        return NULL;
   1.251 +    }
   1.252 +    trie->dataCapacity=other->dataCapacity;
   1.253 +
   1.254 +    /* clone data */
   1.255 +    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
   1.256 +    uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
   1.257 +    trie->index2NullOffset=other->index2NullOffset;
   1.258 +    trie->index2Length=other->index2Length;
   1.259 +
   1.260 +    uprv_memcpy(trie->data, other->data, other->dataLength*4);
   1.261 +    trie->dataNullOffset=other->dataNullOffset;
   1.262 +    trie->dataLength=other->dataLength;
   1.263 +
   1.264 +    /* reference counters */
   1.265 +    if(other->isCompacted) {
   1.266 +        trie->firstFreeBlock=0;
   1.267 +    } else {
   1.268 +        uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
   1.269 +        trie->firstFreeBlock=other->firstFreeBlock;
   1.270 +    }
   1.271 +
   1.272 +    trie->initialValue=other->initialValue;
   1.273 +    trie->errorValue=other->errorValue;
   1.274 +    trie->highStart=other->highStart;
   1.275 +    trie->isCompacted=other->isCompacted;
   1.276 +
   1.277 +    return trie;
   1.278 +}
   1.279 +
   1.280 +U_CAPI UTrie2 * U_EXPORT2
   1.281 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
   1.282 +    UTrie2 *trie;
   1.283 +
   1.284 +    if(U_FAILURE(*pErrorCode)) {
   1.285 +        return NULL;
   1.286 +    }
   1.287 +    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
   1.288 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.289 +        return NULL;
   1.290 +    }
   1.291 +
   1.292 +    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
   1.293 +    if(trie==NULL) {
   1.294 +        return NULL;
   1.295 +    }
   1.296 +    uprv_memcpy(trie, other, sizeof(UTrie2));
   1.297 +
   1.298 +    if(other->memory!=NULL) {
   1.299 +        trie->memory=uprv_malloc(other->length);
   1.300 +        if(trie->memory!=NULL) {
   1.301 +            trie->isMemoryOwned=TRUE;
   1.302 +            uprv_memcpy(trie->memory, other->memory, other->length);
   1.303 +
   1.304 +            /* make the clone's pointers point to its own memory */
   1.305 +            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
   1.306 +            if(other->data16!=NULL) {
   1.307 +                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
   1.308 +            }
   1.309 +            if(other->data32!=NULL) {
   1.310 +                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
   1.311 +            }
   1.312 +        }
   1.313 +    } else /* other->newTrie!=NULL */ {
   1.314 +        trie->newTrie=cloneBuilder(other->newTrie);
   1.315 +    }
   1.316 +
   1.317 +    if(trie->memory==NULL && trie->newTrie==NULL) {
   1.318 +        uprv_free(trie);
   1.319 +        trie=NULL;
   1.320 +    }
   1.321 +    return trie;
   1.322 +}
   1.323 +
   1.324 +typedef struct NewTrieAndStatus {
   1.325 +    UTrie2 *trie;
   1.326 +    UErrorCode errorCode;
   1.327 +    UBool exclusiveLimit;  /* rather than inclusive range end */
   1.328 +} NewTrieAndStatus;
   1.329 +
   1.330 +static UBool U_CALLCONV
   1.331 +copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
   1.332 +    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
   1.333 +    if(value!=nt->trie->initialValue) {
   1.334 +        if(nt->exclusiveLimit) {
   1.335 +            --end;
   1.336 +        }
   1.337 +        if(start==end) {
   1.338 +            utrie2_set32(nt->trie, start, value, &nt->errorCode);
   1.339 +        } else {
   1.340 +            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
   1.341 +        }
   1.342 +        return U_SUCCESS(nt->errorCode);
   1.343 +    } else {
   1.344 +        return TRUE;
   1.345 +    }
   1.346 +}
   1.347 +
   1.348 +#ifdef UTRIE2_DEBUG
   1.349 +static void
   1.350 +utrie_printLengths(const UTrie *trie) {
   1.351 +    long indexLength=trie->indexLength;
   1.352 +    long dataLength=(long)trie->dataLength;
   1.353 +    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
   1.354 +    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
   1.355 +           indexLength, dataLength, totalLength);
   1.356 +}
   1.357 +
   1.358 +static void
   1.359 +utrie2_printLengths(const UTrie2 *trie, const char *which) {
   1.360 +    long indexLength=trie->indexLength;
   1.361 +    long dataLength=(long)trie->dataLength;
   1.362 +    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
   1.363 +    printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
   1.364 +           which, indexLength, dataLength, totalLength);
   1.365 +}
   1.366 +#endif
   1.367 +
   1.368 +U_CAPI UTrie2 * U_EXPORT2
   1.369 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
   1.370 +    NewTrieAndStatus context;
   1.371 +    UChar lead;
   1.372 +
   1.373 +    if(U_FAILURE(*pErrorCode)) {
   1.374 +        return NULL;
   1.375 +    }
   1.376 +    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
   1.377 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.378 +        return NULL;
   1.379 +    }
   1.380 +    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
   1.381 +        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
   1.382 +    }
   1.383 +
   1.384 +    /* Clone the frozen trie by enumerating it and building a new one. */
   1.385 +    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
   1.386 +    if(U_FAILURE(*pErrorCode)) {
   1.387 +        return NULL;
   1.388 +    }
   1.389 +    context.exclusiveLimit=FALSE;
   1.390 +    context.errorCode=*pErrorCode;
   1.391 +    utrie2_enum(other, NULL, copyEnumRange, &context);
   1.392 +    *pErrorCode=context.errorCode;
   1.393 +    for(lead=0xd800; lead<0xdc00; ++lead) {
   1.394 +        uint32_t value;
   1.395 +        if(other->data32==NULL) {
   1.396 +            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
   1.397 +        } else {
   1.398 +            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
   1.399 +        }
   1.400 +        if(value!=other->initialValue) {
   1.401 +            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
   1.402 +        }
   1.403 +    }
   1.404 +    if(U_FAILURE(*pErrorCode)) {
   1.405 +        utrie2_close(context.trie);
   1.406 +        context.trie=NULL;
   1.407 +    }
   1.408 +    return context.trie;
   1.409 +}
   1.410 +
   1.411 +/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
   1.412 +U_CAPI UTrie2 * U_EXPORT2
   1.413 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
   1.414 +    NewTrieAndStatus context;
   1.415 +    UChar lead;
   1.416 +
   1.417 +    if(U_FAILURE(*pErrorCode)) {
   1.418 +        return NULL;
   1.419 +    }
   1.420 +    if(trie1==NULL) {
   1.421 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.422 +        return NULL;
   1.423 +    }
   1.424 +    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
   1.425 +    if(U_FAILURE(*pErrorCode)) {
   1.426 +        return NULL;
   1.427 +    }
   1.428 +    context.exclusiveLimit=TRUE;
   1.429 +    context.errorCode=*pErrorCode;
   1.430 +    utrie_enum(trie1, NULL, copyEnumRange, &context);
   1.431 +    *pErrorCode=context.errorCode;
   1.432 +    for(lead=0xd800; lead<0xdc00; ++lead) {
   1.433 +        uint32_t value;
   1.434 +        if(trie1->data32==NULL) {
   1.435 +            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
   1.436 +        } else {
   1.437 +            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
   1.438 +        }
   1.439 +        if(value!=trie1->initialValue) {
   1.440 +            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
   1.441 +        }
   1.442 +    }
   1.443 +    if(U_SUCCESS(*pErrorCode)) {
   1.444 +        utrie2_freeze(context.trie,
   1.445 +                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
   1.446 +                      pErrorCode);
   1.447 +    }
   1.448 +#ifdef UTRIE2_DEBUG
   1.449 +    if(U_SUCCESS(*pErrorCode)) {
   1.450 +        utrie_printLengths(trie1);
   1.451 +        utrie2_printLengths(context.trie, "fromUTrie");
   1.452 +    }
   1.453 +#endif
   1.454 +    if(U_FAILURE(*pErrorCode)) {
   1.455 +        utrie2_close(context.trie);
   1.456 +        context.trie=NULL;
   1.457 +    }
   1.458 +    return context.trie;
   1.459 +}
   1.460 +
   1.461 +static inline UBool
   1.462 +isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
   1.463 +    int32_t i2, block;
   1.464 +
   1.465 +    if(U_IS_LEAD(c) && forLSCP) {
   1.466 +        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
   1.467 +            (c>>UTRIE2_SHIFT_2);
   1.468 +    } else {
   1.469 +        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
   1.470 +            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
   1.471 +    }
   1.472 +    block=trie->index2[i2];
   1.473 +    return (UBool)(block==trie->dataNullOffset);
   1.474 +}
   1.475 +
   1.476 +static int32_t
   1.477 +allocIndex2Block(UNewTrie2 *trie) {
   1.478 +    int32_t newBlock, newTop;
   1.479 +
   1.480 +    newBlock=trie->index2Length;
   1.481 +    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
   1.482 +    if(newTop>LENGTHOF(trie->index2)) {
   1.483 +        /*
   1.484 +         * Should never occur.
   1.485 +         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
   1.486 +         * or the code writes more values than should be possible.
   1.487 +         */
   1.488 +        return -1;
   1.489 +    }
   1.490 +    trie->index2Length=newTop;
   1.491 +    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
   1.492 +    return newBlock;
   1.493 +}
   1.494 +
   1.495 +static int32_t
   1.496 +getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
   1.497 +    int32_t i1, i2;
   1.498 +
   1.499 +    if(U_IS_LEAD(c) && forLSCP) {
   1.500 +        return UTRIE2_LSCP_INDEX_2_OFFSET;
   1.501 +    }
   1.502 +
   1.503 +    i1=c>>UTRIE2_SHIFT_1;
   1.504 +    i2=trie->index1[i1];
   1.505 +    if(i2==trie->index2NullOffset) {
   1.506 +        i2=allocIndex2Block(trie);
   1.507 +        if(i2<0) {
   1.508 +            return -1;  /* program error */
   1.509 +        }
   1.510 +        trie->index1[i1]=i2;
   1.511 +    }
   1.512 +    return i2;
   1.513 +}
   1.514 +
   1.515 +static int32_t
   1.516 +allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
   1.517 +    int32_t newBlock, newTop;
   1.518 +
   1.519 +    if(trie->firstFreeBlock!=0) {
   1.520 +        /* get the first free block */
   1.521 +        newBlock=trie->firstFreeBlock;
   1.522 +        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
   1.523 +    } else {
   1.524 +        /* get a new block from the high end */
   1.525 +        newBlock=trie->dataLength;
   1.526 +        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
   1.527 +        if(newTop>trie->dataCapacity) {
   1.528 +            /* out of memory in the data array */
   1.529 +            int32_t capacity;
   1.530 +            uint32_t *data;
   1.531 +
   1.532 +            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
   1.533 +                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
   1.534 +            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
   1.535 +                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
   1.536 +            } else {
   1.537 +                /*
   1.538 +                 * Should never occur.
   1.539 +                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
   1.540 +                 * or the code writes more values than should be possible.
   1.541 +                 */
   1.542 +                return -1;
   1.543 +            }
   1.544 +            data=(uint32_t *)uprv_malloc(capacity*4);
   1.545 +            if(data==NULL) {
   1.546 +                return -1;
   1.547 +            }
   1.548 +            uprv_memcpy(data, trie->data, trie->dataLength*4);
   1.549 +            uprv_free(trie->data);
   1.550 +            trie->data=data;
   1.551 +            trie->dataCapacity=capacity;
   1.552 +        }
   1.553 +        trie->dataLength=newTop;
   1.554 +    }
   1.555 +    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
   1.556 +    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
   1.557 +    return newBlock;
   1.558 +}
   1.559 +
   1.560 +/* call when the block's reference counter reaches 0 */
   1.561 +static void
   1.562 +releaseDataBlock(UNewTrie2 *trie, int32_t block) {
   1.563 +    /* put this block at the front of the free-block chain */
   1.564 +    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
   1.565 +    trie->firstFreeBlock=block;
   1.566 +}
   1.567 +
   1.568 +static inline UBool
   1.569 +isWritableBlock(UNewTrie2 *trie, int32_t block) {
   1.570 +    return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
   1.571 +}
   1.572 +
   1.573 +static inline void
   1.574 +setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
   1.575 +    int32_t oldBlock;
   1.576 +    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
   1.577 +    oldBlock=trie->index2[i2];
   1.578 +    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
   1.579 +        releaseDataBlock(trie, oldBlock);
   1.580 +    }
   1.581 +    trie->index2[i2]=block;
   1.582 +}
   1.583 +
   1.584 +/**
   1.585 + * No error checking for illegal arguments.
   1.586 + *
   1.587 + * @return -1 if no new data block available (out of memory in data array)
   1.588 + * @internal
   1.589 + */
   1.590 +static int32_t
   1.591 +getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
   1.592 +    int32_t i2, oldBlock, newBlock;
   1.593 +
   1.594 +    i2=getIndex2Block(trie, c, forLSCP);
   1.595 +    if(i2<0) {
   1.596 +        return -1;  /* program error */
   1.597 +    }
   1.598 +
   1.599 +    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
   1.600 +    oldBlock=trie->index2[i2];
   1.601 +    if(isWritableBlock(trie, oldBlock)) {
   1.602 +        return oldBlock;
   1.603 +    }
   1.604 +
   1.605 +    /* allocate a new data block */
   1.606 +    newBlock=allocDataBlock(trie, oldBlock);
   1.607 +    if(newBlock<0) {
   1.608 +        /* out of memory in the data array */
   1.609 +        return -1;
   1.610 +    }
   1.611 +    setIndex2Entry(trie, i2, newBlock);
   1.612 +    return newBlock;
   1.613 +}
   1.614 +
   1.615 +/**
   1.616 + * @return TRUE if the value was successfully set
   1.617 + */
   1.618 +static void
   1.619 +set32(UNewTrie2 *trie,
   1.620 +      UChar32 c, UBool forLSCP, uint32_t value,
   1.621 +      UErrorCode *pErrorCode) {
   1.622 +    int32_t block;
   1.623 +
   1.624 +    if(trie==NULL || trie->isCompacted) {
   1.625 +        *pErrorCode=U_NO_WRITE_PERMISSION;
   1.626 +        return;
   1.627 +    }
   1.628 +
   1.629 +    block=getDataBlock(trie, c, forLSCP);
   1.630 +    if(block<0) {
   1.631 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.632 +        return;
   1.633 +    }
   1.634 +
   1.635 +    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
   1.636 +}
   1.637 +
   1.638 +U_CAPI void U_EXPORT2
   1.639 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
   1.640 +    if(U_FAILURE(*pErrorCode)) {
   1.641 +        return;
   1.642 +    }
   1.643 +    if((uint32_t)c>0x10ffff) {
   1.644 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.645 +        return;
   1.646 +    }
   1.647 +    set32(trie->newTrie, c, TRUE, value, pErrorCode);
   1.648 +}
   1.649 +
   1.650 +U_CAPI void U_EXPORT2
   1.651 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
   1.652 +                                     UChar32 c, uint32_t value,
   1.653 +                                     UErrorCode *pErrorCode) {
   1.654 +    if(U_FAILURE(*pErrorCode)) {
   1.655 +        return;
   1.656 +    }
   1.657 +    if(!U_IS_LEAD(c)) {
   1.658 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.659 +        return;
   1.660 +    }
   1.661 +    set32(trie->newTrie, c, FALSE, value, pErrorCode);
   1.662 +}
   1.663 +
   1.664 +static void
   1.665 +writeBlock(uint32_t *block, uint32_t value) {
   1.666 +    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
   1.667 +    while(block<limit) {
   1.668 +        *block++=value;
   1.669 +    }
   1.670 +}
   1.671 +
   1.672 +/**
   1.673 + * initialValue is ignored if overwrite=TRUE
   1.674 + * @internal
   1.675 + */
   1.676 +static void
   1.677 +fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
   1.678 +          uint32_t value, uint32_t initialValue, UBool overwrite) {
   1.679 +    uint32_t *pLimit;
   1.680 +
   1.681 +    pLimit=block+limit;
   1.682 +    block+=start;
   1.683 +    if(overwrite) {
   1.684 +        while(block<pLimit) {
   1.685 +            *block++=value;
   1.686 +        }
   1.687 +    } else {
   1.688 +        while(block<pLimit) {
   1.689 +            if(*block==initialValue) {
   1.690 +                *block=value;
   1.691 +            }
   1.692 +            ++block;
   1.693 +        }
   1.694 +    }
   1.695 +}
   1.696 +
   1.697 +U_CAPI void U_EXPORT2
   1.698 +utrie2_setRange32(UTrie2 *trie,
   1.699 +                  UChar32 start, UChar32 end,
   1.700 +                  uint32_t value, UBool overwrite,
   1.701 +                  UErrorCode *pErrorCode) {
   1.702 +    /*
   1.703 +     * repeat value in [start..end]
   1.704 +     * mark index values for repeat-data blocks by setting bit 31 of the index values
   1.705 +     * fill around existing values if any, if(overwrite)
   1.706 +     */
   1.707 +    UNewTrie2 *newTrie;
   1.708 +    int32_t block, rest, repeatBlock;
   1.709 +    UChar32 limit;
   1.710 +
   1.711 +    if(U_FAILURE(*pErrorCode)) {
   1.712 +        return;
   1.713 +    }
   1.714 +    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
   1.715 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.716 +        return;
   1.717 +    }
   1.718 +    newTrie=trie->newTrie;
   1.719 +    if(newTrie==NULL || newTrie->isCompacted) {
   1.720 +        *pErrorCode=U_NO_WRITE_PERMISSION;
   1.721 +        return;
   1.722 +    }
   1.723 +    if(!overwrite && value==newTrie->initialValue) {
   1.724 +        return; /* nothing to do */
   1.725 +    }
   1.726 +
   1.727 +    limit=end+1;
   1.728 +    if(start&UTRIE2_DATA_MASK) {
   1.729 +        UChar32 nextStart;
   1.730 +
   1.731 +        /* set partial block at [start..following block boundary[ */
   1.732 +        block=getDataBlock(newTrie, start, TRUE);
   1.733 +        if(block<0) {
   1.734 +            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.735 +            return;
   1.736 +        }
   1.737 +
   1.738 +        nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
   1.739 +        if(nextStart<=limit) {
   1.740 +            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
   1.741 +                      value, newTrie->initialValue, overwrite);
   1.742 +            start=nextStart;
   1.743 +        } else {
   1.744 +            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
   1.745 +                      value, newTrie->initialValue, overwrite);
   1.746 +            return;
   1.747 +        }
   1.748 +    }
   1.749 +
   1.750 +    /* number of positions in the last, partial block */
   1.751 +    rest=limit&UTRIE2_DATA_MASK;
   1.752 +
   1.753 +    /* round down limit to a block boundary */
   1.754 +    limit&=~UTRIE2_DATA_MASK;
   1.755 +
   1.756 +    /* iterate over all-value blocks */
   1.757 +    if(value==newTrie->initialValue) {
   1.758 +        repeatBlock=newTrie->dataNullOffset;
   1.759 +    } else {
   1.760 +        repeatBlock=-1;
   1.761 +    }
   1.762 +
   1.763 +    while(start<limit) {
   1.764 +        int32_t i2;
   1.765 +        UBool setRepeatBlock=FALSE;
   1.766 +
   1.767 +        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
   1.768 +            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
   1.769 +            continue;
   1.770 +        }
   1.771 +
   1.772 +        /* get index value */
   1.773 +        i2=getIndex2Block(newTrie, start, TRUE);
   1.774 +        if(i2<0) {
   1.775 +            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1.776 +            return;
   1.777 +        }
   1.778 +        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
   1.779 +        block=newTrie->index2[i2];
   1.780 +        if(isWritableBlock(newTrie, block)) {
   1.781 +            /* already allocated */
   1.782 +            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
   1.783 +                /*
   1.784 +                 * We overwrite all values, and it's not a
   1.785 +                 * protected (ASCII-linear or 2-byte UTF-8) block:
   1.786 +                 * replace with the repeatBlock.
   1.787 +                 */
   1.788 +                setRepeatBlock=TRUE;
   1.789 +            } else {
   1.790 +                /* !overwrite, or protected block: just write the values into this block */
   1.791 +                fillBlock(newTrie->data+block,
   1.792 +                          0, UTRIE2_DATA_BLOCK_LENGTH,
   1.793 +                          value, newTrie->initialValue, overwrite);
   1.794 +            }
   1.795 +        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
   1.796 +            /*
   1.797 +             * Set the repeatBlock instead of the null block or previous repeat block:
   1.798 +             *
   1.799 +             * If !isWritableBlock() then all entries in the block have the same value
   1.800 +             * because it's the null block or a range block (the repeatBlock from a previous
   1.801 +             * call to utrie2_setRange32()).
   1.802 +             * No other blocks are used multiple times before compacting.
   1.803 +             *
   1.804 +             * The null block is the only non-writable block with the initialValue because
   1.805 +             * of the repeatBlock initialization above. (If value==initialValue, then
   1.806 +             * the repeatBlock will be the null data block.)
   1.807 +             *
   1.808 +             * We set our repeatBlock if the desired value differs from the block's value,
   1.809 +             * and if we overwrite any data or if the data is all initial values
   1.810 +             * (which is the same as the block being the null block, see above).
   1.811 +             */
   1.812 +            setRepeatBlock=TRUE;
   1.813 +        }
   1.814 +        if(setRepeatBlock) {
   1.815 +            if(repeatBlock>=0) {
   1.816 +                setIndex2Entry(newTrie, i2, repeatBlock);
   1.817 +            } else {
   1.818 +                /* create and set and fill the repeatBlock */
   1.819 +                repeatBlock=getDataBlock(newTrie, start, TRUE);
   1.820 +                if(repeatBlock<0) {
   1.821 +                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.822 +                    return;
   1.823 +                }
   1.824 +                writeBlock(newTrie->data+repeatBlock, value);
   1.825 +            }
   1.826 +        }
   1.827 +
   1.828 +        start+=UTRIE2_DATA_BLOCK_LENGTH;
   1.829 +    }
   1.830 +
   1.831 +    if(rest>0) {
   1.832 +        /* set partial block at [last block boundary..limit[ */
   1.833 +        block=getDataBlock(newTrie, start, TRUE);
   1.834 +        if(block<0) {
   1.835 +            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.836 +            return;
   1.837 +        }
   1.838 +
   1.839 +        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
   1.840 +    }
   1.841 +
   1.842 +    return;
   1.843 +}
   1.844 +
   1.845 +/* compaction --------------------------------------------------------------- */
   1.846 +
   1.847 +static inline UBool
   1.848 +equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
   1.849 +    while(length>0 && *s==*t) {
   1.850 +        ++s;
   1.851 +        ++t;
   1.852 +        --length;
   1.853 +    }
   1.854 +    return (UBool)(length==0);
   1.855 +}
   1.856 +
   1.857 +static inline UBool
   1.858 +equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
   1.859 +    while(length>0 && *s==*t) {
   1.860 +        ++s;
   1.861 +        ++t;
   1.862 +        --length;
   1.863 +    }
   1.864 +    return (UBool)(length==0);
   1.865 +}
   1.866 +
   1.867 +static int32_t
   1.868 +findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
   1.869 +    int32_t block;
   1.870 +
   1.871 +    /* ensure that we do not even partially get past index2Length */
   1.872 +    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
   1.873 +
   1.874 +    for(block=0; block<=index2Length; ++block) {
   1.875 +        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
   1.876 +            return block;
   1.877 +        }
   1.878 +    }
   1.879 +    return -1;
   1.880 +}
   1.881 +
   1.882 +static int32_t
   1.883 +findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
   1.884 +    int32_t block;
   1.885 +
   1.886 +    /* ensure that we do not even partially get past dataLength */
   1.887 +    dataLength-=blockLength;
   1.888 +
   1.889 +    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
   1.890 +        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
   1.891 +            return block;
   1.892 +        }
   1.893 +    }
   1.894 +    return -1;
   1.895 +}
   1.896 +
   1.897 +/*
   1.898 + * Find the start of the last range in the trie by enumerating backward.
   1.899 + * Indexes for supplementary code points higher than this will be omitted.
   1.900 + */
   1.901 +static UChar32
   1.902 +findHighStart(UNewTrie2 *trie, uint32_t highValue) {
   1.903 +    const uint32_t *data32;
   1.904 +
   1.905 +    uint32_t value, initialValue;
   1.906 +    UChar32 c, prev;
   1.907 +    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
   1.908 +
   1.909 +    data32=trie->data;
   1.910 +    initialValue=trie->initialValue;
   1.911 +
   1.912 +    index2NullOffset=trie->index2NullOffset;
   1.913 +    nullBlock=trie->dataNullOffset;
   1.914 +
   1.915 +    /* set variables for previous range */
   1.916 +    if(highValue==initialValue) {
   1.917 +        prevI2Block=index2NullOffset;
   1.918 +        prevBlock=nullBlock;
   1.919 +    } else {
   1.920 +        prevI2Block=-1;
   1.921 +        prevBlock=-1;
   1.922 +    }
   1.923 +    prev=0x110000;
   1.924 +
   1.925 +    /* enumerate index-2 blocks */
   1.926 +    i1=UNEWTRIE2_INDEX_1_LENGTH;
   1.927 +    c=prev;
   1.928 +    while(c>0) {
   1.929 +        i2Block=trie->index1[--i1];
   1.930 +        if(i2Block==prevI2Block) {
   1.931 +            /* the index-2 block is the same as the previous one, and filled with highValue */
   1.932 +            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
   1.933 +            continue;
   1.934 +        }
   1.935 +        prevI2Block=i2Block;
   1.936 +        if(i2Block==index2NullOffset) {
   1.937 +            /* this is the null index-2 block */
   1.938 +            if(highValue!=initialValue) {
   1.939 +                return c;
   1.940 +            }
   1.941 +            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
   1.942 +        } else {
   1.943 +            /* enumerate data blocks for one index-2 block */
   1.944 +            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
   1.945 +                block=trie->index2[i2Block+ --i2];
   1.946 +                if(block==prevBlock) {
   1.947 +                    /* the block is the same as the previous one, and filled with highValue */
   1.948 +                    c-=UTRIE2_DATA_BLOCK_LENGTH;
   1.949 +                    continue;
   1.950 +                }
   1.951 +                prevBlock=block;
   1.952 +                if(block==nullBlock) {
   1.953 +                    /* this is the null data block */
   1.954 +                    if(highValue!=initialValue) {
   1.955 +                        return c;
   1.956 +                    }
   1.957 +                    c-=UTRIE2_DATA_BLOCK_LENGTH;
   1.958 +                } else {
   1.959 +                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
   1.960 +                        value=data32[block+ --j];
   1.961 +                        if(value!=highValue) {
   1.962 +                            return c;
   1.963 +                        }
   1.964 +                        --c;
   1.965 +                    }
   1.966 +                }
   1.967 +            }
   1.968 +        }
   1.969 +    }
   1.970 +
   1.971 +    /* deliver last range */
   1.972 +    return 0;
   1.973 +}
   1.974 +
   1.975 +/*
   1.976 + * Compact a build-time trie.
   1.977 + *
   1.978 + * The compaction
   1.979 + * - removes blocks that are identical with earlier ones
   1.980 + * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
   1.981 + * - moves blocks in steps of the data granularity
   1.982 + * - moves and overlaps blocks that overlap with multiple values in the overlap region
   1.983 + *
   1.984 + * It does not
   1.985 + * - try to move and overlap blocks that are not already adjacent
   1.986 + */
   1.987 +static void
   1.988 +compactData(UNewTrie2 *trie) {
   1.989 +    int32_t start, newStart, movedStart;
   1.990 +    int32_t blockLength, overlap;
   1.991 +    int32_t i, mapIndex, blockCount;
   1.992 +
   1.993 +    /* do not compact linear-ASCII data */
   1.994 +    newStart=UTRIE2_DATA_START_OFFSET;
   1.995 +    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
   1.996 +        trie->map[i]=start;
   1.997 +    }
   1.998 +
   1.999 +    /*
  1.1000 +     * Start with a block length of 64 for 2-byte UTF-8,
  1.1001 +     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
  1.1002 +     */
  1.1003 +    blockLength=64;
  1.1004 +    blockCount=blockLength>>UTRIE2_SHIFT_2;
  1.1005 +    for(start=newStart; start<trie->dataLength;) {
  1.1006 +        /*
  1.1007 +         * start: index of first entry of current block
  1.1008 +         * newStart: index where the current block is to be moved
  1.1009 +         *           (right after current end of already-compacted data)
  1.1010 +         */
  1.1011 +        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
  1.1012 +            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
  1.1013 +            blockCount=1;
  1.1014 +        }
  1.1015 +
  1.1016 +        /* skip blocks that are not used */
  1.1017 +        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
  1.1018 +            /* advance start to the next block */
  1.1019 +            start+=blockLength;
  1.1020 +
  1.1021 +            /* leave newStart with the previous block! */
  1.1022 +            continue;
  1.1023 +        }
  1.1024 +
  1.1025 +        /* search for an identical block */
  1.1026 +        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
  1.1027 +             >=0
  1.1028 +        ) {
  1.1029 +            /* found an identical block, set the other block's index value for the current block */
  1.1030 +            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
  1.1031 +                trie->map[mapIndex++]=movedStart;
  1.1032 +                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
  1.1033 +            }
  1.1034 +
  1.1035 +            /* advance start to the next block */
  1.1036 +            start+=blockLength;
  1.1037 +
  1.1038 +            /* leave newStart with the previous block! */
  1.1039 +            continue;
  1.1040 +        }
  1.1041 +
  1.1042 +        /* see if the beginning of this block can be overlapped with the end of the previous block */
  1.1043 +        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
  1.1044 +        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
  1.1045 +            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
  1.1046 +            overlap-=UTRIE2_DATA_GRANULARITY) {}
  1.1047 +
  1.1048 +        if(overlap>0 || newStart<start) {
  1.1049 +            /* some overlap, or just move the whole block */
  1.1050 +            movedStart=newStart-overlap;
  1.1051 +            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
  1.1052 +                trie->map[mapIndex++]=movedStart;
  1.1053 +                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
  1.1054 +            }
  1.1055 +
  1.1056 +            /* move the non-overlapping indexes to their new positions */
  1.1057 +            start+=overlap;
  1.1058 +            for(i=blockLength-overlap; i>0; --i) {
  1.1059 +                trie->data[newStart++]=trie->data[start++];
  1.1060 +            }
  1.1061 +        } else /* no overlap && newStart==start */ {
  1.1062 +            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
  1.1063 +                trie->map[mapIndex++]=start;
  1.1064 +                start+=UTRIE2_DATA_BLOCK_LENGTH;
  1.1065 +            }
  1.1066 +            newStart=start;
  1.1067 +        }
  1.1068 +    }
  1.1069 +
  1.1070 +    /* now adjust the index-2 table */
  1.1071 +    for(i=0; i<trie->index2Length; ++i) {
  1.1072 +        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
  1.1073 +            /* Gap indexes are invalid (-1). Skip over the gap. */
  1.1074 +            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
  1.1075 +        }
  1.1076 +        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
  1.1077 +    }
  1.1078 +    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
  1.1079 +
  1.1080 +    /* ensure dataLength alignment */
  1.1081 +    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
  1.1082 +        trie->data[newStart++]=trie->initialValue;
  1.1083 +    }
  1.1084 +
  1.1085 +#ifdef UTRIE2_DEBUG
  1.1086 +    /* we saved some space */
  1.1087 +    printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
  1.1088 +            (long)trie->dataLength, (long)newStart);
  1.1089 +#endif
  1.1090 +
  1.1091 +    trie->dataLength=newStart;
  1.1092 +}
  1.1093 +
  1.1094 +static void
  1.1095 +compactIndex2(UNewTrie2 *trie) {
  1.1096 +    int32_t i, start, newStart, movedStart, overlap;
  1.1097 +
  1.1098 +    /* do not compact linear-BMP index-2 blocks */
  1.1099 +    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
  1.1100 +    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
  1.1101 +        trie->map[i]=start;
  1.1102 +    }
  1.1103 +
  1.1104 +    /* Reduce the index table gap to what will be needed at runtime. */
  1.1105 +    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
  1.1106 +
  1.1107 +    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
  1.1108 +        /*
  1.1109 +         * start: index of first entry of current block
  1.1110 +         * newStart: index where the current block is to be moved
  1.1111 +         *           (right after current end of already-compacted data)
  1.1112 +         */
  1.1113 +
  1.1114 +        /* search for an identical block */
  1.1115 +        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
  1.1116 +             >=0
  1.1117 +        ) {
  1.1118 +            /* found an identical block, set the other block's index value for the current block */
  1.1119 +            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
  1.1120 +
  1.1121 +            /* advance start to the next block */
  1.1122 +            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
  1.1123 +
  1.1124 +            /* leave newStart with the previous block! */
  1.1125 +            continue;
  1.1126 +        }
  1.1127 +
  1.1128 +        /* see if the beginning of this block can be overlapped with the end of the previous block */
  1.1129 +        /* look for maximum overlap with the previous, adjacent block */
  1.1130 +        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
  1.1131 +            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
  1.1132 +            --overlap) {}
  1.1133 +
  1.1134 +        if(overlap>0 || newStart<start) {
  1.1135 +            /* some overlap, or just move the whole block */
  1.1136 +            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
  1.1137 +
  1.1138 +            /* move the non-overlapping indexes to their new positions */
  1.1139 +            start+=overlap;
  1.1140 +            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
  1.1141 +                trie->index2[newStart++]=trie->index2[start++];
  1.1142 +            }
  1.1143 +        } else /* no overlap && newStart==start */ {
  1.1144 +            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
  1.1145 +            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
  1.1146 +            newStart=start;
  1.1147 +        }
  1.1148 +    }
  1.1149 +
  1.1150 +    /* now adjust the index-1 table */
  1.1151 +    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
  1.1152 +        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
  1.1153 +    }
  1.1154 +    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
  1.1155 +
  1.1156 +    /*
  1.1157 +     * Ensure data table alignment:
  1.1158 +     * Needs to be granularity-aligned for 16-bit trie
  1.1159 +     * (so that dataMove will be down-shiftable),
  1.1160 +     * and 2-aligned for uint32_t data.
  1.1161 +     */
  1.1162 +    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
  1.1163 +        /* Arbitrary value: 0x3fffc not possible for real data. */
  1.1164 +        trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
  1.1165 +    }
  1.1166 +
  1.1167 +#ifdef UTRIE2_DEBUG
  1.1168 +    /* we saved some space */
  1.1169 +    printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
  1.1170 +            (long)trie->index2Length, (long)newStart);
  1.1171 +#endif
  1.1172 +
  1.1173 +    trie->index2Length=newStart;
  1.1174 +}
  1.1175 +
  1.1176 +static void
  1.1177 +compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
  1.1178 +    UNewTrie2 *newTrie;
  1.1179 +    UChar32 highStart, suppHighStart;
  1.1180 +    uint32_t highValue;
  1.1181 +
  1.1182 +    newTrie=trie->newTrie;
  1.1183 +
  1.1184 +    /* find highStart and round it up */
  1.1185 +    highValue=utrie2_get32(trie, 0x10ffff);
  1.1186 +    highStart=findHighStart(newTrie, highValue);
  1.1187 +    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
  1.1188 +    if(highStart==0x110000) {
  1.1189 +        highValue=trie->errorValue;
  1.1190 +    }
  1.1191 +
  1.1192 +    /*
  1.1193 +     * Set trie->highStart only after utrie2_get32(trie, highStart).
  1.1194 +     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
  1.1195 +     */
  1.1196 +    trie->highStart=newTrie->highStart=highStart;
  1.1197 +
  1.1198 +#ifdef UTRIE2_DEBUG
  1.1199 +    printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
  1.1200 +            (long)highStart, (long)highValue, (long)trie->initialValue);
  1.1201 +#endif
  1.1202 +
  1.1203 +    if(highStart<0x110000) {
  1.1204 +        /* Blank out [highStart..10ffff] to release associated data blocks. */
  1.1205 +        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
  1.1206 +        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
  1.1207 +        if(U_FAILURE(*pErrorCode)) {
  1.1208 +            return;
  1.1209 +        }
  1.1210 +    }
  1.1211 +
  1.1212 +    compactData(newTrie);
  1.1213 +    if(highStart>0x10000) {
  1.1214 +        compactIndex2(newTrie);
  1.1215 +#ifdef UTRIE2_DEBUG
  1.1216 +    } else {
  1.1217 +        printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
  1.1218 +                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
  1.1219 +#endif
  1.1220 +    }
  1.1221 +
  1.1222 +    /*
  1.1223 +     * Store the highValue in the data array and round up the dataLength.
  1.1224 +     * Must be done after compactData() because that assumes that dataLength
  1.1225 +     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
  1.1226 +     */
  1.1227 +    newTrie->data[newTrie->dataLength++]=highValue;
  1.1228 +    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
  1.1229 +        newTrie->data[newTrie->dataLength++]=trie->initialValue;
  1.1230 +    }
  1.1231 +
  1.1232 +    newTrie->isCompacted=TRUE;
  1.1233 +}
  1.1234 +
  1.1235 +/* serialization ------------------------------------------------------------ */
  1.1236 +
  1.1237 +/**
  1.1238 + * Maximum length of the runtime index array.
  1.1239 + * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
  1.1240 + * (The actual maximum length is lower,
  1.1241 + * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
  1.1242 + */
  1.1243 +#define UTRIE2_MAX_INDEX_LENGTH 0xffff
  1.1244 +
  1.1245 +/**
  1.1246 + * Maximum length of the runtime data array.
  1.1247 + * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
  1.1248 + * and by uint16_t UTrie2Header.shiftedDataLength.
  1.1249 + */
  1.1250 +#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
  1.1251 +
  1.1252 +/* Compact and internally serialize the trie. */
  1.1253 +U_CAPI void U_EXPORT2
  1.1254 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
  1.1255 +    UNewTrie2 *newTrie;
  1.1256 +    UTrie2Header *header;
  1.1257 +    uint32_t *p;
  1.1258 +    uint16_t *dest16;
  1.1259 +    int32_t i, length;
  1.1260 +    int32_t allIndexesLength;
  1.1261 +    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
  1.1262 +    UChar32 highStart;
  1.1263 +
  1.1264 +    /* argument check */
  1.1265 +    if(U_FAILURE(*pErrorCode)) {
  1.1266 +        return;
  1.1267 +    }
  1.1268 +    if( trie==NULL ||
  1.1269 +        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
  1.1270 +    ) {
  1.1271 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1272 +        return;
  1.1273 +    }
  1.1274 +    newTrie=trie->newTrie;
  1.1275 +    if(newTrie==NULL) {
  1.1276 +        /* already frozen */
  1.1277 +        UTrie2ValueBits frozenValueBits=
  1.1278 +            trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
  1.1279 +        if(valueBits!=frozenValueBits) {
  1.1280 +            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1281 +        }
  1.1282 +        return;
  1.1283 +    }
  1.1284 +
  1.1285 +    /* compact if necessary */
  1.1286 +    if(!newTrie->isCompacted) {
  1.1287 +        compactTrie(trie, pErrorCode);
  1.1288 +        if(U_FAILURE(*pErrorCode)) {
  1.1289 +            return;
  1.1290 +        }
  1.1291 +    }
  1.1292 +    highStart=trie->highStart;
  1.1293 +
  1.1294 +    if(highStart<=0x10000) {
  1.1295 +        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
  1.1296 +    } else {
  1.1297 +        allIndexesLength=newTrie->index2Length;
  1.1298 +    }
  1.1299 +    if(valueBits==UTRIE2_16_VALUE_BITS) {
  1.1300 +        dataMove=allIndexesLength;
  1.1301 +    } else {
  1.1302 +        dataMove=0;
  1.1303 +    }
  1.1304 +
  1.1305 +    /* are indexLength and dataLength within limits? */
  1.1306 +    if( /* for unshifted indexLength */
  1.1307 +        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
  1.1308 +        /* for unshifted dataNullOffset */
  1.1309 +        (dataMove+newTrie->dataNullOffset)>0xffff ||
  1.1310 +        /* for unshifted 2-byte UTF-8 index-2 values */
  1.1311 +        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
  1.1312 +        /* for shiftedDataLength */
  1.1313 +        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
  1.1314 +    ) {
  1.1315 +        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1.1316 +        return;
  1.1317 +    }
  1.1318 +
  1.1319 +    /* calculate the total serialized length */
  1.1320 +    length=sizeof(UTrie2Header)+allIndexesLength*2;
  1.1321 +    if(valueBits==UTRIE2_16_VALUE_BITS) {
  1.1322 +        length+=newTrie->dataLength*2;
  1.1323 +    } else {
  1.1324 +        length+=newTrie->dataLength*4;
  1.1325 +    }
  1.1326 +
  1.1327 +    trie->memory=uprv_malloc(length);
  1.1328 +    if(trie->memory==NULL) {
  1.1329 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1.1330 +        return;
  1.1331 +    }
  1.1332 +    trie->length=length;
  1.1333 +    trie->isMemoryOwned=TRUE;
  1.1334 +
  1.1335 +    trie->indexLength=allIndexesLength;
  1.1336 +    trie->dataLength=newTrie->dataLength;
  1.1337 +    if(highStart<=0x10000) {
  1.1338 +        trie->index2NullOffset=0xffff;
  1.1339 +    } else {
  1.1340 +        trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
  1.1341 +    }
  1.1342 +    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
  1.1343 +    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
  1.1344 +
  1.1345 +    /* set the header fields */
  1.1346 +    header=(UTrie2Header *)trie->memory;
  1.1347 +
  1.1348 +    header->signature=UTRIE2_SIG; /* "Tri2" */
  1.1349 +    header->options=(uint16_t)valueBits;
  1.1350 +
  1.1351 +    header->indexLength=(uint16_t)trie->indexLength;
  1.1352 +    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
  1.1353 +    header->index2NullOffset=trie->index2NullOffset;
  1.1354 +    header->dataNullOffset=trie->dataNullOffset;
  1.1355 +    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
  1.1356 +
  1.1357 +    /* fill the index and data arrays */
  1.1358 +    dest16=(uint16_t *)(header+1);
  1.1359 +    trie->index=dest16;
  1.1360 +
  1.1361 +    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
  1.1362 +    p=(uint32_t *)newTrie->index2;
  1.1363 +    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
  1.1364 +        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
  1.1365 +    }
  1.1366 +
  1.1367 +    /* write UTF-8 2-byte index-2 values, not right-shifted */
  1.1368 +    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
  1.1369 +        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
  1.1370 +    }
  1.1371 +    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
  1.1372 +        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
  1.1373 +    }
  1.1374 +
  1.1375 +    if(highStart>0x10000) {
  1.1376 +        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
  1.1377 +        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
  1.1378 +
  1.1379 +        /* write 16-bit index-1 values for supplementary code points */
  1.1380 +        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
  1.1381 +        for(i=index1Length; i>0; --i) {
  1.1382 +            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
  1.1383 +        }
  1.1384 +
  1.1385 +        /*
  1.1386 +         * write the index-2 array values for supplementary code points,
  1.1387 +         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
  1.1388 +         */
  1.1389 +        p=(uint32_t *)newTrie->index2+index2Offset;
  1.1390 +        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
  1.1391 +            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
  1.1392 +        }
  1.1393 +    }
  1.1394 +
  1.1395 +    /* write the 16/32-bit data array */
  1.1396 +    switch(valueBits) {
  1.1397 +    case UTRIE2_16_VALUE_BITS:
  1.1398 +        /* write 16-bit data values */
  1.1399 +        trie->data16=dest16;
  1.1400 +        trie->data32=NULL;
  1.1401 +        p=newTrie->data;
  1.1402 +        for(i=newTrie->dataLength; i>0; --i) {
  1.1403 +            *dest16++=(uint16_t)*p++;
  1.1404 +        }
  1.1405 +        break;
  1.1406 +    case UTRIE2_32_VALUE_BITS:
  1.1407 +        /* write 32-bit data values */
  1.1408 +        trie->data16=NULL;
  1.1409 +        trie->data32=(uint32_t *)dest16;
  1.1410 +        uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
  1.1411 +        break;
  1.1412 +    default:
  1.1413 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1414 +        return;
  1.1415 +    }
  1.1416 +
  1.1417 +    /* Delete the UNewTrie2. */
  1.1418 +    uprv_free(newTrie->data);
  1.1419 +    uprv_free(newTrie);
  1.1420 +    trie->newTrie=NULL;
  1.1421 +}
  1.1422 +
  1.1423 +U_CAPI UBool U_EXPORT2
  1.1424 +utrie2_isFrozen(const UTrie2 *trie) {
  1.1425 +    return (UBool)(trie->newTrie==NULL);
  1.1426 +}
  1.1427 +
  1.1428 +U_CAPI int32_t U_EXPORT2
  1.1429 +utrie2_serialize(UTrie2 *trie,
  1.1430 +                 void *data, int32_t capacity,
  1.1431 +                 UErrorCode *pErrorCode) {
  1.1432 +    /* argument check */
  1.1433 +    if(U_FAILURE(*pErrorCode)) {
  1.1434 +        return 0;
  1.1435 +    }
  1.1436 +
  1.1437 +    if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
  1.1438 +        capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
  1.1439 +    ) {
  1.1440 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1441 +        return 0;
  1.1442 +    }
  1.1443 +
  1.1444 +    if(capacity>=trie->length) {
  1.1445 +        uprv_memcpy(data, trie->memory, trie->length);
  1.1446 +    } else {
  1.1447 +        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1448 +    }
  1.1449 +    return trie->length;
  1.1450 +}
  1.1451 +
  1.1452 +/*
  1.1453 + * This is here to avoid a dependency from utrie2.cpp on utrie.c.
  1.1454 + * This file already depends on utrie.c.
  1.1455 + * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
  1.1456 + */
  1.1457 +U_CAPI int32_t U_EXPORT2
  1.1458 +utrie2_swapAnyVersion(const UDataSwapper *ds,
  1.1459 +                      const void *inData, int32_t length, void *outData,
  1.1460 +                      UErrorCode *pErrorCode) {
  1.1461 +    if(U_SUCCESS(*pErrorCode)) {
  1.1462 +        switch(utrie2_getVersion(inData, length, TRUE)) {
  1.1463 +        case 1:
  1.1464 +            return utrie_swap(ds, inData, length, outData, pErrorCode);
  1.1465 +        case 2:
  1.1466 +            return utrie2_swap(ds, inData, length, outData, pErrorCode);
  1.1467 +        default:
  1.1468 +            *pErrorCode=U_INVALID_FORMAT_ERROR;
  1.1469 +            return 0;
  1.1470 +        }
  1.1471 +    }
  1.1472 +    return 0;
  1.1473 +}

mercurial