intl/icu/source/tools/makeconv/gencnvex.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/makeconv/gencnvex.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1079 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2003-2013, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  gencnvex.c
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2003oct12
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#include <stdio.h>
    1.21 +#include "unicode/utypes.h"
    1.22 +#include "unicode/ustring.h"
    1.23 +#include "cstring.h"
    1.24 +#include "cmemory.h"
    1.25 +#include "ucnv_cnv.h"
    1.26 +#include "ucnvmbcs.h"
    1.27 +#include "toolutil.h"
    1.28 +#include "unewdata.h"
    1.29 +#include "ucm.h"
    1.30 +#include "makeconv.h"
    1.31 +#include "genmbcs.h"
    1.32 +
    1.33 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.34 +
    1.35 +
    1.36 +static void
    1.37 +CnvExtClose(NewConverter *cnvData);
    1.38 +
    1.39 +static UBool
    1.40 +CnvExtIsValid(NewConverter *cnvData,
    1.41 +              const uint8_t *bytes, int32_t length);
    1.42 +
    1.43 +static UBool
    1.44 +CnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData);
    1.45 +
    1.46 +static uint32_t
    1.47 +CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
    1.48 +            UNewDataMemory *pData, int32_t tableType);
    1.49 +
    1.50 +typedef struct CnvExtData {
    1.51 +    NewConverter newConverter;
    1.52 +
    1.53 +    UCMFile *ucm;
    1.54 +
    1.55 +    /* toUnicode (state table in ucm->states) */
    1.56 +    UToolMemory *toUTable, *toUUChars;
    1.57 +
    1.58 +    /* fromUnicode */
    1.59 +    UToolMemory *fromUTableUChars, *fromUTableValues, *fromUBytes;
    1.60 +
    1.61 +    uint16_t stage1[MBCS_STAGE_1_SIZE];
    1.62 +    uint16_t stage2[MBCS_STAGE_2_SIZE];
    1.63 +    uint16_t stage3[0x10000<<UCNV_EXT_STAGE_2_LEFT_SHIFT]; /* 0x10000 because of 16-bit stage 2/3 indexes */
    1.64 +    uint32_t stage3b[0x10000];
    1.65 +
    1.66 +    int32_t stage1Top, stage2Top, stage3Top, stage3bTop;
    1.67 +
    1.68 +    /* for stage3 compaction of <subchar1> |2 mappings */
    1.69 +    uint16_t stage3Sub1Block;
    1.70 +
    1.71 +    /* statistics */
    1.72 +    int32_t
    1.73 +        maxInBytes, maxOutBytes, maxBytesPerUChar,
    1.74 +        maxInUChars, maxOutUChars, maxUCharsPerByte;
    1.75 +} CnvExtData;
    1.76 +
    1.77 +NewConverter *
    1.78 +CnvExtOpen(UCMFile *ucm) {
    1.79 +    CnvExtData *extData;
    1.80 +    
    1.81 +    extData=(CnvExtData *)uprv_malloc(sizeof(CnvExtData));
    1.82 +    if(extData==NULL) {
    1.83 +        printf("out of memory\n");
    1.84 +        exit(U_MEMORY_ALLOCATION_ERROR);
    1.85 +    }
    1.86 +    uprv_memset(extData, 0, sizeof(CnvExtData));
    1.87 +
    1.88 +    extData->ucm=ucm; /* aliased, not owned */
    1.89 +
    1.90 +    extData->newConverter.close=CnvExtClose;
    1.91 +    extData->newConverter.isValid=CnvExtIsValid;
    1.92 +    extData->newConverter.addTable=CnvExtAddTable;
    1.93 +    extData->newConverter.write=CnvExtWrite;
    1.94 +    return &extData->newConverter;
    1.95 +}
    1.96 +
    1.97 +static void
    1.98 +CnvExtClose(NewConverter *cnvData) {
    1.99 +    CnvExtData *extData=(CnvExtData *)cnvData;
   1.100 +    if(extData!=NULL) {
   1.101 +        utm_close(extData->toUTable);
   1.102 +        utm_close(extData->toUUChars);
   1.103 +        utm_close(extData->fromUTableUChars);
   1.104 +        utm_close(extData->fromUTableValues);
   1.105 +        utm_close(extData->fromUBytes);
   1.106 +        uprv_free(extData);
   1.107 +    }
   1.108 +}
   1.109 +
   1.110 +/* we do not expect this to be called */
   1.111 +static UBool
   1.112 +CnvExtIsValid(NewConverter *cnvData,
   1.113 +        const uint8_t *bytes, int32_t length) {
   1.114 +    return FALSE;
   1.115 +}
   1.116 +
   1.117 +static uint32_t
   1.118 +CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
   1.119 +            UNewDataMemory *pData, int32_t tableType) {
   1.120 +    CnvExtData *extData=(CnvExtData *)cnvData;
   1.121 +    int32_t length, top, headerSize;
   1.122 +
   1.123 +    int32_t indexes[UCNV_EXT_INDEXES_MIN_LENGTH]={ 0 };
   1.124 +
   1.125 +    if(tableType&TABLE_BASE) {
   1.126 +        headerSize=0;
   1.127 +    } else {
   1.128 +        _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 };
   1.129 +
   1.130 +        /* write the header and base table name for an extension-only table */
   1.131 +        length=(int32_t)uprv_strlen(extData->ucm->baseName)+1;
   1.132 +        while(length&3) {
   1.133 +            /* add padding */
   1.134 +            extData->ucm->baseName[length++]=0;
   1.135 +        }
   1.136 +
   1.137 +        headerSize=MBCS_HEADER_V4_LENGTH*4+length;
   1.138 +
   1.139 +        /* fill the header */
   1.140 +        header.version[0]=4;
   1.141 +        header.version[1]=2;
   1.142 +        header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY);
   1.143 +
   1.144 +        /* write the header and the base table name */
   1.145 +        udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4);
   1.146 +        udata_writeBlock(pData, extData->ucm->baseName, length);
   1.147 +    }
   1.148 +
   1.149 +    /* fill indexes[] - offsets/indexes are in units of the target array */
   1.150 +    top=0;
   1.151 +
   1.152 +    indexes[UCNV_EXT_INDEXES_LENGTH]=length=UCNV_EXT_INDEXES_MIN_LENGTH;
   1.153 +    top+=length*4;
   1.154 +
   1.155 +    indexes[UCNV_EXT_TO_U_INDEX]=top;
   1.156 +    indexes[UCNV_EXT_TO_U_LENGTH]=length=utm_countItems(extData->toUTable);
   1.157 +    top+=length*4;
   1.158 +
   1.159 +    indexes[UCNV_EXT_TO_U_UCHARS_INDEX]=top;
   1.160 +    indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]=length=utm_countItems(extData->toUUChars);
   1.161 +    top+=length*2;
   1.162 +
   1.163 +    indexes[UCNV_EXT_FROM_U_UCHARS_INDEX]=top;
   1.164 +    length=utm_countItems(extData->fromUTableUChars);
   1.165 +    top+=length*2;
   1.166 +
   1.167 +    if(top&3) {
   1.168 +        /* add padding */
   1.169 +        *((UChar *)utm_alloc(extData->fromUTableUChars))=0;
   1.170 +        *((uint32_t *)utm_alloc(extData->fromUTableValues))=0;
   1.171 +        ++length;
   1.172 +        top+=2;
   1.173 +    }
   1.174 +    indexes[UCNV_EXT_FROM_U_LENGTH]=length;
   1.175 +
   1.176 +    indexes[UCNV_EXT_FROM_U_VALUES_INDEX]=top;
   1.177 +    top+=length*4;
   1.178 +
   1.179 +    indexes[UCNV_EXT_FROM_U_BYTES_INDEX]=top;
   1.180 +    length=utm_countItems(extData->fromUBytes);
   1.181 +    top+=length;
   1.182 +
   1.183 +    if(top&1) {
   1.184 +        /* add padding */
   1.185 +        *((uint8_t *)utm_alloc(extData->fromUBytes))=0;
   1.186 +        ++length;
   1.187 +        ++top;
   1.188 +    }
   1.189 +    indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]=length;
   1.190 +
   1.191 +    indexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]=top;
   1.192 +    indexes[UCNV_EXT_FROM_U_STAGE_1_LENGTH]=length=extData->stage1Top;
   1.193 +    indexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]=length+=extData->stage2Top;
   1.194 +    top+=length*2;
   1.195 +
   1.196 +    indexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]=top;
   1.197 +    length=extData->stage3Top;
   1.198 +    top+=length*2;
   1.199 +
   1.200 +    if(top&3) {
   1.201 +        /* add padding */
   1.202 +        extData->stage3[extData->stage3Top++]=0;
   1.203 +        ++length;
   1.204 +        top+=2;
   1.205 +    }
   1.206 +    indexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]=length;
   1.207 +
   1.208 +    indexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]=top;
   1.209 +    indexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]=length=extData->stage3bTop;
   1.210 +    top+=length*4;
   1.211 +
   1.212 +    indexes[UCNV_EXT_SIZE]=top;
   1.213 +
   1.214 +    /* statistics */
   1.215 +    indexes[UCNV_EXT_COUNT_BYTES]=
   1.216 +        (extData->maxInBytes<<16)|
   1.217 +        (extData->maxOutBytes<<8)|
   1.218 +        extData->maxBytesPerUChar;
   1.219 +    indexes[UCNV_EXT_COUNT_UCHARS]=
   1.220 +        (extData->maxInUChars<<16)|
   1.221 +        (extData->maxOutUChars<<8)|
   1.222 +        extData->maxUCharsPerByte;
   1.223 +
   1.224 +    indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask;
   1.225 +
   1.226 +    /* write the extension data */
   1.227 +    udata_writeBlock(pData, indexes, sizeof(indexes));
   1.228 +    udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4);
   1.229 +    udata_writeBlock(pData, utm_getStart(extData->toUUChars), indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]*2);
   1.230 +
   1.231 +    udata_writeBlock(pData, utm_getStart(extData->fromUTableUChars), indexes[UCNV_EXT_FROM_U_LENGTH]*2);
   1.232 +    udata_writeBlock(pData, utm_getStart(extData->fromUTableValues), indexes[UCNV_EXT_FROM_U_LENGTH]*4);
   1.233 +    udata_writeBlock(pData, utm_getStart(extData->fromUBytes), indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]);
   1.234 +
   1.235 +    udata_writeBlock(pData, extData->stage1, extData->stage1Top*2);
   1.236 +    udata_writeBlock(pData, extData->stage2, extData->stage2Top*2);
   1.237 +    udata_writeBlock(pData, extData->stage3, extData->stage3Top*2);
   1.238 +    udata_writeBlock(pData, extData->stage3b, extData->stage3bTop*4);
   1.239 +
   1.240 +#if 0
   1.241 +    {
   1.242 +        int32_t i, j;
   1.243 +
   1.244 +        length=extData->stage1Top;
   1.245 +        printf("\nstage1[%x]:\n", length);
   1.246 +
   1.247 +        for(i=0; i<length; ++i) {
   1.248 +            if(extData->stage1[i]!=length) {
   1.249 +                printf("stage1[%04x]=%04x\n", i, extData->stage1[i]);
   1.250 +            }
   1.251 +        }
   1.252 +
   1.253 +        j=length;
   1.254 +        length=extData->stage2Top;
   1.255 +        printf("\nstage2[%x]:\n", length);
   1.256 +
   1.257 +        for(i=0; i<length; ++j, ++i) {
   1.258 +            if(extData->stage2[i]!=0) {
   1.259 +                printf("stage12[%04x]=%04x\n", j, extData->stage2[i]);
   1.260 +            }
   1.261 +        }
   1.262 +
   1.263 +        length=extData->stage3Top;
   1.264 +        printf("\nstage3[%x]:\n", length);
   1.265 +
   1.266 +        for(i=0; i<length; ++i) {
   1.267 +            if(extData->stage3[i]!=0) {
   1.268 +                printf("stage3[%04x]=%04x\n", i, extData->stage3[i]);
   1.269 +            }
   1.270 +        }
   1.271 +
   1.272 +        length=extData->stage3bTop;
   1.273 +        printf("\nstage3b[%x]:\n", length);
   1.274 +
   1.275 +        for(i=0; i<length; ++i) {
   1.276 +            if(extData->stage3b[i]!=0) {
   1.277 +                printf("stage3b[%04x]=%08x\n", i, extData->stage3b[i]);
   1.278 +            }
   1.279 +        }
   1.280 +    }
   1.281 +#endif
   1.282 +
   1.283 +    if(VERBOSE) {
   1.284 +        printf("size of extension data: %ld\n", (long)top);
   1.285 +    }
   1.286 +
   1.287 +    /* return the number of bytes that should have been written */
   1.288 +    return (uint32_t)(headerSize+top);
   1.289 +}
   1.290 +
   1.291 +/* to Unicode --------------------------------------------------------------- */
   1.292 +
   1.293 +/*
   1.294 + * Remove fromUnicode fallbacks and SUB mappings which are irrelevant for
   1.295 + * the toUnicode table.
   1.296 + * This includes mappings with MBCS_FROM_U_EXT_FLAG which were suitable
   1.297 + * for the base toUnicode table but not for the base fromUnicode table.
   1.298 + * The table must be sorted.
   1.299 + * Modifies previous data in the reverseMap.
   1.300 + */
   1.301 +static int32_t
   1.302 +reduceToUMappings(UCMTable *table) {
   1.303 +    UCMapping *mappings;
   1.304 +    int32_t *map;
   1.305 +    int32_t i, j, count;
   1.306 +    int8_t flag;
   1.307 +
   1.308 +    mappings=table->mappings;
   1.309 +    map=table->reverseMap;
   1.310 +    count=table->mappingsLength;
   1.311 +
   1.312 +    /* leave the map alone for the initial mappings with desired flags */
   1.313 +    for(i=j=0; i<count; ++i) {
   1.314 +        flag=mappings[map[i]].f;
   1.315 +        if(flag!=0 && flag!=3) {
   1.316 +            break;
   1.317 +        }
   1.318 +    }
   1.319 +
   1.320 +    /* reduce from here to the rest */
   1.321 +    for(j=i; i<count; ++i) {
   1.322 +        flag=mappings[map[i]].f;
   1.323 +        if(flag==0 || flag==3) {
   1.324 +            map[j++]=map[i];
   1.325 +        }
   1.326 +    }
   1.327 +
   1.328 +    return j;
   1.329 +}
   1.330 +
   1.331 +static uint32_t
   1.332 +getToUnicodeValue(CnvExtData *extData, UCMTable *table, UCMapping *m) {
   1.333 +    UChar32 *u32;
   1.334 +    UChar *u;
   1.335 +    uint32_t value;
   1.336 +    int32_t u16Length, ratio;
   1.337 +    UErrorCode errorCode;
   1.338 +
   1.339 +    /* write the Unicode result code point or string index */
   1.340 +    if(m->uLen==1) {
   1.341 +        u16Length=U16_LENGTH(m->u);
   1.342 +        value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u);
   1.343 +    } else {
   1.344 +        /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */
   1.345 +
   1.346 +        /* get the result code point string and its 16-bit string length */
   1.347 +        u32=UCM_GET_CODE_POINTS(table, m);
   1.348 +        errorCode=U_ZERO_ERROR;
   1.349 +        u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode);
   1.350 +        if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) {
   1.351 +            exit(errorCode);
   1.352 +        }
   1.353 +
   1.354 +        /* allocate it and put its length and index into the value */
   1.355 +        value=
   1.356 +            (((uint32_t)u16Length+UCNV_EXT_TO_U_LENGTH_OFFSET)<<UCNV_EXT_TO_U_LENGTH_SHIFT)|
   1.357 +            ((uint32_t)utm_countItems(extData->toUUChars));
   1.358 +        u=utm_allocN(extData->toUUChars, u16Length);
   1.359 +
   1.360 +        /* write the result 16-bit string */
   1.361 +        errorCode=U_ZERO_ERROR;
   1.362 +        u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode);
   1.363 +        if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) {
   1.364 +            exit(errorCode);
   1.365 +        }
   1.366 +    }
   1.367 +    if(m->f==0) {
   1.368 +        value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG;
   1.369 +    }
   1.370 +
   1.371 +    /* update statistics */
   1.372 +    if(m->bLen>extData->maxInBytes) {
   1.373 +        extData->maxInBytes=m->bLen;
   1.374 +    }
   1.375 +    if(u16Length>extData->maxOutUChars) {
   1.376 +        extData->maxOutUChars=u16Length;
   1.377 +    }
   1.378 +
   1.379 +    ratio=(u16Length+(m->bLen-1))/m->bLen;
   1.380 +    if(ratio>extData->maxUCharsPerByte) {
   1.381 +        extData->maxUCharsPerByte=ratio;
   1.382 +    }
   1.383 +
   1.384 +    return value;
   1.385 +}
   1.386 +
   1.387 +/*
   1.388 + * Recursive toUTable generator core function.
   1.389 + * Preconditions:
   1.390 + * - start<limit (There is at least one mapping.)
   1.391 + * - The mappings are sorted lexically. (Access is through the reverseMap.)
   1.392 + * - All mappings between start and limit have input sequences that share
   1.393 + *   the same prefix of unitIndex length, and therefore all of these sequences
   1.394 + *   are at least unitIndex+1 long.
   1.395 + * - There are only relevant mappings available through the reverseMap,
   1.396 + *   see reduceToUMappings().
   1.397 + *
   1.398 + * One function invocation generates one section table.
   1.399 + *
   1.400 + * Steps:
   1.401 + * 1. Count the number of unique unit values and get the low/high unit values
   1.402 + *    that occur at unitIndex.
   1.403 + * 2. Allocate the section table with possible optimization for linear access.
   1.404 + * 3. Write temporary version of the section table with start indexes of
   1.405 + *    subsections, each corresponding to one unit value at unitIndex.
   1.406 + * 4. Iterate through the table once more, and depending on the subsection length:
   1.407 + *    0: write 0 as a result value (unused byte in linear-access section table)
   1.408 + *   >0: if there is one mapping with an input unit sequence of unitIndex+1
   1.409 + *       then defaultValue=compute the mapping result for this whole sequence
   1.410 + *       else defaultValue=0
   1.411 + *
   1.412 + *       recurse into the subsection
   1.413 + */
   1.414 +static UBool
   1.415 +generateToUTable(CnvExtData *extData, UCMTable *table,
   1.416 +                 int32_t start, int32_t limit, int32_t unitIndex,
   1.417 +                 uint32_t defaultValue) {
   1.418 +    UCMapping *mappings, *m;
   1.419 +    int32_t *map;
   1.420 +    int32_t i, j, uniqueCount, count, subStart, subLimit;
   1.421 +
   1.422 +    uint8_t *bytes;
   1.423 +    int32_t low, high, prev;
   1.424 +
   1.425 +    uint32_t *section;
   1.426 +
   1.427 +    mappings=table->mappings;
   1.428 +    map=table->reverseMap;
   1.429 +
   1.430 +    /* step 1: examine the input units; set low, high, uniqueCount */
   1.431 +    m=mappings+map[start];
   1.432 +    bytes=UCM_GET_BYTES(table, m);
   1.433 +    low=bytes[unitIndex];
   1.434 +    uniqueCount=1;
   1.435 +
   1.436 +    prev=high=low;
   1.437 +    for(i=start+1; i<limit; ++i) {
   1.438 +        m=mappings+map[i];
   1.439 +        bytes=UCM_GET_BYTES(table, m);
   1.440 +        high=bytes[unitIndex];
   1.441 +
   1.442 +        if(high!=prev) {
   1.443 +            prev=high;
   1.444 +            ++uniqueCount;
   1.445 +        }
   1.446 +    }
   1.447 +
   1.448 +    /* step 2: allocate the section; set count, section */
   1.449 +    count=(high-low)+1;
   1.450 +    if(count<0x100 && (unitIndex==0 || uniqueCount>=(3*count)/4)) {
   1.451 +        /*
   1.452 +         * for the root table and for fairly full tables:
   1.453 +         * allocate for direct, linear array access
   1.454 +         * by keeping count, to write an entry for each unit value
   1.455 +         * from low to high
   1.456 +         * exception: use a compact table if count==0x100 because
   1.457 +         * that cannot be encoded in the length byte
   1.458 +         */
   1.459 +    } else {
   1.460 +        count=uniqueCount;
   1.461 +    }
   1.462 +
   1.463 +    if(count>=0x100) {
   1.464 +        fprintf(stderr, "error: toUnicode extension table section overflow: %ld section entries\n", (long)count);
   1.465 +        return FALSE;
   1.466 +    }
   1.467 +
   1.468 +    /* allocate the section: 1 entry for the header + count for the items */
   1.469 +    section=(uint32_t *)utm_allocN(extData->toUTable, 1+count);
   1.470 +
   1.471 +    /* write the section header */
   1.472 +    *section++=((uint32_t)count<<UCNV_EXT_TO_U_BYTE_SHIFT)|defaultValue;
   1.473 +
   1.474 +    /* step 3: write temporary section table with subsection starts */
   1.475 +    prev=low-1; /* just before low to prevent empty subsections before low */
   1.476 +    j=0; /* section table index */
   1.477 +    for(i=start; i<limit; ++i) {
   1.478 +        m=mappings+map[i];
   1.479 +        bytes=UCM_GET_BYTES(table, m);
   1.480 +        high=bytes[unitIndex];
   1.481 +
   1.482 +        if(high!=prev) {
   1.483 +            /* start of a new subsection for unit high */
   1.484 +            if(count>uniqueCount) {
   1.485 +                /* write empty subsections for unused units in a linear table */
   1.486 +                while(++prev<high) {
   1.487 +                    section[j++]=((uint32_t)prev<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i;
   1.488 +                }
   1.489 +            } else {
   1.490 +                prev=high;
   1.491 +            }
   1.492 +
   1.493 +            /* write the entry with the subsection start */
   1.494 +            section[j++]=((uint32_t)high<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i;
   1.495 +        }
   1.496 +    }
   1.497 +    /* assert(j==count) */
   1.498 +
   1.499 +    /* step 4: recurse and write results */
   1.500 +    subLimit=UCNV_EXT_TO_U_GET_VALUE(section[0]);
   1.501 +    for(j=0; j<count; ++j) {
   1.502 +        subStart=subLimit;
   1.503 +        subLimit= (j+1)<count ? UCNV_EXT_TO_U_GET_VALUE(section[j+1]) : limit;
   1.504 +
   1.505 +        /* remove the subStart temporary value */
   1.506 +        section[j]&=~UCNV_EXT_TO_U_VALUE_MASK;
   1.507 +
   1.508 +        if(subStart==subLimit) {
   1.509 +            /* leave the value zero: empty subsection for unused unit in a linear table */
   1.510 +            continue;
   1.511 +        }
   1.512 +
   1.513 +        /* see if there is exactly one input unit sequence of length unitIndex+1 */
   1.514 +        defaultValue=0;
   1.515 +        m=mappings+map[subStart];
   1.516 +        if(m->bLen==unitIndex+1) {
   1.517 +            /* do not include this in generateToUTable() */
   1.518 +            ++subStart;
   1.519 +
   1.520 +            if(subStart<subLimit && mappings[map[subStart]].bLen==unitIndex+1) {
   1.521 +                /* print error for multiple same-input-sequence mappings */
   1.522 +                fprintf(stderr, "error: multiple mappings from same bytes\n");
   1.523 +                ucm_printMapping(table, m, stderr);
   1.524 +                ucm_printMapping(table, mappings+map[subStart], stderr);
   1.525 +                return FALSE;
   1.526 +            }
   1.527 +
   1.528 +            defaultValue=getToUnicodeValue(extData, table, m);
   1.529 +        }
   1.530 +
   1.531 +        if(subStart==subLimit) {
   1.532 +            /* write the result for the input sequence ending here */
   1.533 +            section[j]|=defaultValue;
   1.534 +        } else {
   1.535 +            /* write the index to the subsection table */
   1.536 +            section[j]|=(uint32_t)utm_countItems(extData->toUTable);
   1.537 +
   1.538 +            /* recurse */
   1.539 +            if(!generateToUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) {
   1.540 +                return FALSE;
   1.541 +            }
   1.542 +        }
   1.543 +    }
   1.544 +    return TRUE;
   1.545 +}
   1.546 +
   1.547 +/*
   1.548 + * Generate the toUTable and toUUChars from the input table.
   1.549 + * The input table must be sorted, and all precision flags must be 0..3.
   1.550 + * This function will modify the table's reverseMap.
   1.551 + */
   1.552 +static UBool
   1.553 +makeToUTable(CnvExtData *extData, UCMTable *table) {
   1.554 +    int32_t toUCount;
   1.555 +
   1.556 +    toUCount=reduceToUMappings(table);
   1.557 +
   1.558 +    extData->toUTable=utm_open("cnv extension toUTable", 0x10000, UCNV_EXT_TO_U_MIN_CODE_POINT, 4);
   1.559 +    extData->toUUChars=utm_open("cnv extension toUUChars", 0x10000, UCNV_EXT_TO_U_INDEX_MASK+1, 2);
   1.560 +
   1.561 +    return generateToUTable(extData, table, 0, toUCount, 0, 0);
   1.562 +}
   1.563 +
   1.564 +/* from Unicode ------------------------------------------------------------- */
   1.565 +
   1.566 +/*
   1.567 + * preprocessing:
   1.568 + * rebuild reverseMap with mapping indexes for mappings relevant for from Unicode
   1.569 + * change each Unicode string to encode all but the first code point in 16-bit form
   1.570 + *
   1.571 + * generation:
   1.572 + * for each unique code point
   1.573 + *   write an entry in the 3-stage trie
   1.574 + *   check that there is only one single-code point sequence
   1.575 + *   start recursion for following 16-bit input units
   1.576 + */
   1.577 +
   1.578 +/*
   1.579 + * Remove toUnicode fallbacks and non-<subchar1> SUB mappings
   1.580 + * which are irrelevant for the fromUnicode extension table.
   1.581 + * Remove MBCS_FROM_U_EXT_FLAG bits.
   1.582 + * Overwrite the reverseMap with an index array to the relevant mappings.
   1.583 + * Modify the code point sequences to a generator-friendly format where
   1.584 + * the first code points remains unchanged but the following are recoded
   1.585 + * into 16-bit Unicode string form.
   1.586 + * The table must be sorted.
   1.587 + * Destroys previous data in the reverseMap.
   1.588 + */
   1.589 +static int32_t
   1.590 +prepareFromUMappings(UCMTable *table) {
   1.591 +    UCMapping *mappings, *m;
   1.592 +    int32_t *map;
   1.593 +    int32_t i, j, count;
   1.594 +    int8_t flag;
   1.595 +
   1.596 +    mappings=table->mappings;
   1.597 +    map=table->reverseMap;
   1.598 +    count=table->mappingsLength;
   1.599 +
   1.600 +    /*
   1.601 +     * we do not go through the map on input because the mappings are
   1.602 +     * sorted lexically
   1.603 +     */
   1.604 +    m=mappings;
   1.605 +
   1.606 +    for(i=j=0; i<count; ++m, ++i) {
   1.607 +        flag=m->f;
   1.608 +        if(flag>=0) {
   1.609 +            flag&=MBCS_FROM_U_EXT_MASK;
   1.610 +            m->f=flag;
   1.611 +        }
   1.612 +        if(flag==0 || flag==1 || (flag==2 && m->bLen==1) || flag==4) {
   1.613 +            map[j++]=i;
   1.614 +
   1.615 +            if(m->uLen>1) {
   1.616 +                /* recode all but the first code point to 16-bit Unicode */
   1.617 +                UChar32 *u32;
   1.618 +                UChar *u;
   1.619 +                UChar32 c;
   1.620 +                int32_t q, r;
   1.621 +
   1.622 +                u32=UCM_GET_CODE_POINTS(table, m);
   1.623 +                u=(UChar *)u32; /* destructive in-place recoding */
   1.624 +                for(r=2, q=1; q<m->uLen; ++q) {
   1.625 +                    c=u32[q];
   1.626 +                    U16_APPEND_UNSAFE(u, r, c);
   1.627 +                }
   1.628 +
   1.629 +                /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */
   1.630 +                m->uLen=(int8_t)r;
   1.631 +            }
   1.632 +        }
   1.633 +    }
   1.634 +
   1.635 +    return j;
   1.636 +}
   1.637 +
   1.638 +static uint32_t
   1.639 +getFromUBytesValue(CnvExtData *extData, UCMTable *table, UCMapping *m) {
   1.640 +    uint8_t *bytes, *resultBytes;
   1.641 +    uint32_t value;
   1.642 +    int32_t u16Length, ratio;
   1.643 +
   1.644 +    if(m->f==2) {
   1.645 +        /*
   1.646 +         * no mapping, <subchar1> preferred
   1.647 +         *
   1.648 +         * no need to count in statistics because the subchars are already
   1.649 +         * counted for maxOutBytes and maxBytesPerUChar in UConverterStaticData,
   1.650 +         * and this non-mapping does not count for maxInUChars which are always
   1.651 +         * trivially at least two if counting unmappable supplementary code points
   1.652 +         */
   1.653 +        return UCNV_EXT_FROM_U_SUBCHAR1;
   1.654 +    }
   1.655 +
   1.656 +    bytes=UCM_GET_BYTES(table, m);
   1.657 +    value=0;
   1.658 +    switch(m->bLen) {
   1.659 +        /* 1..3: store the bytes in the value word */
   1.660 +    case 3:
   1.661 +        value=((uint32_t)*bytes++)<<16;
   1.662 +    case 2:
   1.663 +        value|=((uint32_t)*bytes++)<<8;
   1.664 +    case 1:
   1.665 +        value|=*bytes;
   1.666 +        break;
   1.667 +    default:
   1.668 +        /* the parser enforces m->bLen<=UCNV_EXT_MAX_BYTES */
   1.669 +        /* store the bytes in fromUBytes[] and the index in the value word */
   1.670 +        value=(uint32_t)utm_countItems(extData->fromUBytes);
   1.671 +        resultBytes=utm_allocN(extData->fromUBytes, m->bLen);
   1.672 +        uprv_memcpy(resultBytes, bytes, m->bLen);
   1.673 +        break;
   1.674 +    }
   1.675 +    value|=(uint32_t)m->bLen<<UCNV_EXT_FROM_U_LENGTH_SHIFT;
   1.676 +    if(m->f==0) {
   1.677 +        value|=UCNV_EXT_FROM_U_ROUNDTRIP_FLAG;
   1.678 +    } else if(m->f==4) {
   1.679 +        value|=UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG;
   1.680 +    }
   1.681 +
   1.682 +    /* calculate the real UTF-16 length (see recoding in prepareFromUMappings()) */
   1.683 +    if(m->uLen==1) {
   1.684 +        u16Length=U16_LENGTH(m->u);
   1.685 +    } else {
   1.686 +        u16Length=U16_LENGTH(UCM_GET_CODE_POINTS(table, m)[0])+(m->uLen-2);
   1.687 +    }
   1.688 +
   1.689 +    /* update statistics */
   1.690 +    if(u16Length>extData->maxInUChars) {
   1.691 +        extData->maxInUChars=u16Length;
   1.692 +    }
   1.693 +    if(m->bLen>extData->maxOutBytes) {
   1.694 +        extData->maxOutBytes=m->bLen;
   1.695 +    }
   1.696 +
   1.697 +    ratio=(m->bLen+(u16Length-1))/u16Length;
   1.698 +    if(ratio>extData->maxBytesPerUChar) {
   1.699 +        extData->maxBytesPerUChar=ratio;
   1.700 +    }
   1.701 +
   1.702 +    return value;
   1.703 +}
   1.704 +
   1.705 +/*
   1.706 + * works like generateToUTable(), except that the
   1.707 + * output section consists of two arrays, one for input UChars and one
   1.708 + * for result values
   1.709 + *
   1.710 + * also, fromUTable sections are always stored in a compact form for
   1.711 + * access via binary search
   1.712 + */
   1.713 +static UBool
   1.714 +generateFromUTable(CnvExtData *extData, UCMTable *table,
   1.715 +                   int32_t start, int32_t limit, int32_t unitIndex,
   1.716 +                   uint32_t defaultValue) {
   1.717 +    UCMapping *mappings, *m;
   1.718 +    int32_t *map;
   1.719 +    int32_t i, j, uniqueCount, count, subStart, subLimit;
   1.720 +
   1.721 +    UChar *uchars;
   1.722 +    UChar32 low, high, prev;
   1.723 +
   1.724 +    UChar *sectionUChars;
   1.725 +    uint32_t *sectionValues;
   1.726 +
   1.727 +    mappings=table->mappings;
   1.728 +    map=table->reverseMap;
   1.729 +
   1.730 +    /* step 1: examine the input units; set low, high, uniqueCount */
   1.731 +    m=mappings+map[start];
   1.732 +    uchars=(UChar *)UCM_GET_CODE_POINTS(table, m);
   1.733 +    low=uchars[unitIndex];
   1.734 +    uniqueCount=1;
   1.735 +
   1.736 +    prev=high=low;
   1.737 +    for(i=start+1; i<limit; ++i) {
   1.738 +        m=mappings+map[i];
   1.739 +        uchars=(UChar *)UCM_GET_CODE_POINTS(table, m);
   1.740 +        high=uchars[unitIndex];
   1.741 +
   1.742 +        if(high!=prev) {
   1.743 +            prev=high;
   1.744 +            ++uniqueCount;
   1.745 +        }
   1.746 +    }
   1.747 +
   1.748 +    /* step 2: allocate the section; set count, section */
   1.749 +    /* the fromUTable always stores for access via binary search */
   1.750 +    count=uniqueCount;
   1.751 +
   1.752 +    /* allocate the section: 1 entry for the header + count for the items */
   1.753 +    sectionUChars=(UChar *)utm_allocN(extData->fromUTableUChars, 1+count);
   1.754 +    sectionValues=(uint32_t *)utm_allocN(extData->fromUTableValues, 1+count);
   1.755 +
   1.756 +    /* write the section header */
   1.757 +    *sectionUChars++=(UChar)count;
   1.758 +    *sectionValues++=defaultValue;
   1.759 +
   1.760 +    /* step 3: write temporary section table with subsection starts */
   1.761 +    prev=low-1; /* just before low to prevent empty subsections before low */
   1.762 +    j=0; /* section table index */
   1.763 +    for(i=start; i<limit; ++i) {
   1.764 +        m=mappings+map[i];
   1.765 +        uchars=(UChar *)UCM_GET_CODE_POINTS(table, m);
   1.766 +        high=uchars[unitIndex];
   1.767 +
   1.768 +        if(high!=prev) {
   1.769 +            /* start of a new subsection for unit high */
   1.770 +            prev=high;
   1.771 +
   1.772 +            /* write the entry with the subsection start */
   1.773 +            sectionUChars[j]=(UChar)high;
   1.774 +            sectionValues[j]=(uint32_t)i;
   1.775 +            ++j;
   1.776 +        }
   1.777 +    }
   1.778 +    /* assert(j==count) */
   1.779 +
   1.780 +    /* step 4: recurse and write results */
   1.781 +    subLimit=(int32_t)(sectionValues[0]);
   1.782 +    for(j=0; j<count; ++j) {
   1.783 +        subStart=subLimit;
   1.784 +        subLimit= (j+1)<count ? (int32_t)(sectionValues[j+1]) : limit;
   1.785 +
   1.786 +        /* see if there is exactly one input unit sequence of length unitIndex+1 */
   1.787 +        defaultValue=0;
   1.788 +        m=mappings+map[subStart];
   1.789 +        if(m->uLen==unitIndex+1) {
   1.790 +            /* do not include this in generateToUTable() */
   1.791 +            ++subStart;
   1.792 +
   1.793 +            if(subStart<subLimit && mappings[map[subStart]].uLen==unitIndex+1) {
   1.794 +                /* print error for multiple same-input-sequence mappings */
   1.795 +                fprintf(stderr, "error: multiple mappings from same Unicode code points\n");
   1.796 +                ucm_printMapping(table, m, stderr);
   1.797 +                ucm_printMapping(table, mappings+map[subStart], stderr);
   1.798 +                return FALSE;
   1.799 +            }
   1.800 +
   1.801 +            defaultValue=getFromUBytesValue(extData, table, m);
   1.802 +        }
   1.803 +
   1.804 +        if(subStart==subLimit) {
   1.805 +            /* write the result for the input sequence ending here */
   1.806 +            sectionValues[j]=defaultValue;
   1.807 +        } else {
   1.808 +            /* write the index to the subsection table */
   1.809 +            sectionValues[j]=(uint32_t)utm_countItems(extData->fromUTableValues);
   1.810 +
   1.811 +            /* recurse */
   1.812 +            if(!generateFromUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) {
   1.813 +                return FALSE;
   1.814 +            }
   1.815 +        }
   1.816 +    }
   1.817 +    return TRUE;
   1.818 +}
   1.819 +
   1.820 +/*
   1.821 + * add entries to the fromUnicode trie,
   1.822 + * assume to be called with code points in ascending order
   1.823 + * and use that to build the trie in precompacted form
   1.824 + */
   1.825 +static void
   1.826 +addFromUTrieEntry(CnvExtData *extData, UChar32 c, uint32_t value) {
   1.827 +    int32_t i1, i2, i3, i3b, nextOffset, min, newBlock;
   1.828 +
   1.829 +    if(value==0) {
   1.830 +        return;
   1.831 +    }
   1.832 +
   1.833 +    /*
   1.834 +     * compute the index for each stage,
   1.835 +     * allocate a stage block if necessary,
   1.836 +     * and write the stage value
   1.837 +     */
   1.838 +    i1=c>>10;
   1.839 +    if(i1>=extData->stage1Top) {
   1.840 +        extData->stage1Top=i1+1;
   1.841 +    }
   1.842 +
   1.843 +    nextOffset=(c>>4)&0x3f;
   1.844 +
   1.845 +    if(extData->stage1[i1]==0) {
   1.846 +        /* allocate another block in stage 2; overlap with the previous block */
   1.847 +        newBlock=extData->stage2Top;
   1.848 +        min=newBlock-nextOffset; /* minimum block start with overlap */
   1.849 +        while(min<newBlock && extData->stage2[newBlock-1]==0) {
   1.850 +            --newBlock;
   1.851 +        }
   1.852 +
   1.853 +        extData->stage1[i1]=(uint16_t)newBlock;
   1.854 +        extData->stage2Top=newBlock+MBCS_STAGE_2_BLOCK_SIZE;
   1.855 +        if(extData->stage2Top>LENGTHOF(extData->stage2)) {
   1.856 +            fprintf(stderr, "error: too many stage 2 entries at U+%04x\n", (int)c);
   1.857 +            exit(U_MEMORY_ALLOCATION_ERROR);
   1.858 +        }
   1.859 +    }
   1.860 +
   1.861 +    i2=extData->stage1[i1]+nextOffset;
   1.862 +    nextOffset=c&0xf;
   1.863 +
   1.864 +    if(extData->stage2[i2]==0) {
   1.865 +        /* allocate another block in stage 3; overlap with the previous block */
   1.866 +        newBlock=extData->stage3Top;
   1.867 +        min=newBlock-nextOffset; /* minimum block start with overlap */
   1.868 +        while(min<newBlock && extData->stage3[newBlock-1]==0) {
   1.869 +            --newBlock;
   1.870 +        }
   1.871 +
   1.872 +        /* round up to a multiple of stage 3 granularity >1 (similar to utrie.c) */
   1.873 +        newBlock=(newBlock+(UCNV_EXT_STAGE_3_GRANULARITY-1))&~(UCNV_EXT_STAGE_3_GRANULARITY-1);
   1.874 +        extData->stage2[i2]=(uint16_t)(newBlock>>UCNV_EXT_STAGE_2_LEFT_SHIFT);
   1.875 +
   1.876 +        extData->stage3Top=newBlock+MBCS_STAGE_3_BLOCK_SIZE;
   1.877 +        if(extData->stage3Top>LENGTHOF(extData->stage3)) {
   1.878 +            fprintf(stderr, "error: too many stage 3 entries at U+%04x\n", (int)c);
   1.879 +            exit(U_MEMORY_ALLOCATION_ERROR);
   1.880 +        }
   1.881 +    }
   1.882 +
   1.883 +    i3=((int32_t)extData->stage2[i2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)+nextOffset;
   1.884 +    /*
   1.885 +     * assume extData->stage3[i3]==0 because we get
   1.886 +     * code points in strictly ascending order
   1.887 +     */
   1.888 +
   1.889 +    if(value==UCNV_EXT_FROM_U_SUBCHAR1) {
   1.890 +        /* <subchar1> SUB mapping, see getFromUBytesValue() and prepareFromUMappings() */
   1.891 +        extData->stage3[i3]=1;
   1.892 +
   1.893 +        /*
   1.894 +         * precompaction is not optimal for <subchar1> |2 mappings because
   1.895 +         * stage3 values for them are all the same, unlike for other mappings
   1.896 +         * which all have unique values;
   1.897 +         * use a simple compaction of reusing a whole block filled with these
   1.898 +         * mappings
   1.899 +         */
   1.900 +
   1.901 +        /* is the entire block filled with <subchar1> |2 mappings? */
   1.902 +        if(nextOffset==MBCS_STAGE_3_BLOCK_SIZE-1) {
   1.903 +            for(min=i3-nextOffset;
   1.904 +                min<i3 && extData->stage3[min]==1;
   1.905 +                ++min) {}
   1.906 +
   1.907 +            if(min==i3) {
   1.908 +                /* the entire block is filled with these mappings */
   1.909 +                if(extData->stage3Sub1Block!=0) {
   1.910 +                    /* point to the previous such block and remove this block from stage3 */
   1.911 +                    extData->stage2[i2]=extData->stage3Sub1Block;
   1.912 +                    extData->stage3Top-=MBCS_STAGE_3_BLOCK_SIZE;
   1.913 +                    uprv_memset(extData->stage3+extData->stage3Top, 0, MBCS_STAGE_3_BLOCK_SIZE*2);
   1.914 +                } else {
   1.915 +                    /* remember this block's stage2 entry */
   1.916 +                    extData->stage3Sub1Block=extData->stage2[i2];
   1.917 +                }
   1.918 +            }
   1.919 +        }
   1.920 +    } else {
   1.921 +        if((i3b=extData->stage3bTop++)>=LENGTHOF(extData->stage3b)) {
   1.922 +            fprintf(stderr, "error: too many stage 3b entries at U+%04x\n", (int)c);
   1.923 +            exit(U_MEMORY_ALLOCATION_ERROR);
   1.924 +        }
   1.925 +
   1.926 +        /* roundtrip or fallback mapping */
   1.927 +        extData->stage3[i3]=(uint16_t)i3b;
   1.928 +        extData->stage3b[i3b]=value;
   1.929 +    }
   1.930 +}
   1.931 +
   1.932 +static UBool
   1.933 +generateFromUTrie(CnvExtData *extData, UCMTable *table, int32_t mapLength) {
   1.934 +    UCMapping *mappings, *m;
   1.935 +    int32_t *map;
   1.936 +    uint32_t value;
   1.937 +    int32_t subStart, subLimit;
   1.938 +
   1.939 +    UChar32 *codePoints;
   1.940 +    UChar32 c, next;
   1.941 +
   1.942 +    if(mapLength==0) {
   1.943 +        return TRUE;
   1.944 +    }
   1.945 +
   1.946 +    mappings=table->mappings;
   1.947 +    map=table->reverseMap;
   1.948 +
   1.949 +    /*
   1.950 +     * iterate over same-initial-code point mappings,
   1.951 +     * enter the initial code point into the trie,
   1.952 +     * and start a recursion on the corresponding mappings section
   1.953 +     * with generateFromUTable()
   1.954 +     */
   1.955 +    m=mappings+map[0];
   1.956 +    codePoints=UCM_GET_CODE_POINTS(table, m);
   1.957 +    next=codePoints[0];
   1.958 +    subLimit=0;
   1.959 +    while(subLimit<mapLength) {
   1.960 +        /* get a new subsection of mappings starting with the same code point */
   1.961 +        subStart=subLimit;
   1.962 +        c=next;
   1.963 +        while(next==c && ++subLimit<mapLength) {
   1.964 +            m=mappings+map[subLimit];
   1.965 +            codePoints=UCM_GET_CODE_POINTS(table, m);
   1.966 +            next=codePoints[0];
   1.967 +        }
   1.968 +
   1.969 +        /*
   1.970 +         * compute the value for this code point;
   1.971 +         * if there is a mapping for this code point alone, it is at subStart
   1.972 +         * because the table is sorted lexically
   1.973 +         */
   1.974 +        value=0;
   1.975 +        m=mappings+map[subStart];
   1.976 +        codePoints=UCM_GET_CODE_POINTS(table, m);
   1.977 +        if(m->uLen==1) {
   1.978 +            /* do not include this in generateFromUTable() */
   1.979 +            ++subStart;
   1.980 +
   1.981 +            if(subStart<subLimit && mappings[map[subStart]].uLen==1) {
   1.982 +                /* print error for multiple same-input-sequence mappings */
   1.983 +                fprintf(stderr, "error: multiple mappings from same Unicode code points\n");
   1.984 +                ucm_printMapping(table, m, stderr);
   1.985 +                ucm_printMapping(table, mappings+map[subStart], stderr);
   1.986 +                return FALSE;
   1.987 +            }
   1.988 +
   1.989 +            value=getFromUBytesValue(extData, table, m);
   1.990 +        }
   1.991 +
   1.992 +        if(subStart==subLimit) {
   1.993 +            /* write the result for this one code point */
   1.994 +            addFromUTrieEntry(extData, c, value);
   1.995 +        } else {
   1.996 +            /* write the index to the subsection table */
   1.997 +            addFromUTrieEntry(extData, c, (uint32_t)utm_countItems(extData->fromUTableValues));
   1.998 +
   1.999 +            /* recurse, starting from 16-bit-unit index 2, the first 16-bit unit after c */
  1.1000 +            if(!generateFromUTable(extData, table, subStart, subLimit, 2, value)) {
  1.1001 +                return FALSE;
  1.1002 +            }
  1.1003 +        }
  1.1004 +    }
  1.1005 +    return TRUE;
  1.1006 +}
  1.1007 +
  1.1008 +/*
  1.1009 + * Generate the fromU data structures from the input table.
  1.1010 + * The input table must be sorted, and all precision flags must be 0..3.
  1.1011 + * This function will modify the table's reverseMap.
  1.1012 + */
  1.1013 +static UBool
  1.1014 +makeFromUTable(CnvExtData *extData, UCMTable *table) {
  1.1015 +    uint16_t *stage1;
  1.1016 +    int32_t i, stage1Top, fromUCount;
  1.1017 +
  1.1018 +    fromUCount=prepareFromUMappings(table);
  1.1019 +
  1.1020 +    extData->fromUTableUChars=utm_open("cnv extension fromUTableUChars", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 2);
  1.1021 +    extData->fromUTableValues=utm_open("cnv extension fromUTableValues", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 4);
  1.1022 +    extData->fromUBytes=utm_open("cnv extension fromUBytes", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 1);
  1.1023 +
  1.1024 +    /* allocate all-unassigned stage blocks */
  1.1025 +    extData->stage2Top=MBCS_STAGE_2_FIRST_ASSIGNED;
  1.1026 +    extData->stage3Top=MBCS_STAGE_3_FIRST_ASSIGNED;
  1.1027 +
  1.1028 +    /*
  1.1029 +     * stage 3b stores only unique values, and in
  1.1030 +     * index 0: 0 for "no mapping"
  1.1031 +     * index 1: "no mapping" with preference for <subchar1> rather than <subchar>
  1.1032 +     */
  1.1033 +    extData->stage3b[1]=UCNV_EXT_FROM_U_SUBCHAR1;
  1.1034 +    extData->stage3bTop=2;
  1.1035 +
  1.1036 +    /* allocate the first entry in the fromUTable because index 0 means "no result" */
  1.1037 +    utm_alloc(extData->fromUTableUChars);
  1.1038 +    utm_alloc(extData->fromUTableValues);
  1.1039 +
  1.1040 +    if(!generateFromUTrie(extData, table, fromUCount)) {
  1.1041 +        return FALSE;
  1.1042 +    }
  1.1043 +
  1.1044 +    /*
  1.1045 +     * offset the stage 1 trie entries by stage1Top because they will
  1.1046 +     * be stored in a single array
  1.1047 +     */
  1.1048 +    stage1=extData->stage1;
  1.1049 +    stage1Top=extData->stage1Top;
  1.1050 +    for(i=0; i<stage1Top; ++i) {
  1.1051 +        stage1[i]=(uint16_t)(stage1[i]+stage1Top);
  1.1052 +    }
  1.1053 +
  1.1054 +    return TRUE;
  1.1055 +}
  1.1056 +
  1.1057 +/* -------------------------------------------------------------------------- */
  1.1058 +
  1.1059 +static UBool
  1.1060 +CnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData) {
  1.1061 +    CnvExtData *extData;
  1.1062 +
  1.1063 +    if(table->unicodeMask&UCNV_HAS_SURROGATES) {
  1.1064 +        fprintf(stderr, "error: contains mappings for surrogate code points\n");
  1.1065 +        return FALSE;
  1.1066 +    }
  1.1067 +
  1.1068 +    staticData->conversionType=UCNV_MBCS;
  1.1069 +
  1.1070 +    extData=(CnvExtData *)cnvData;
  1.1071 +
  1.1072 +    /*
  1.1073 +     * assume that the table is sorted
  1.1074 +     *
  1.1075 +     * call the functions in this order because
  1.1076 +     * makeToUTable() modifies the original reverseMap,
  1.1077 +     * makeFromUTable() writes a whole new mapping into reverseMap
  1.1078 +     */
  1.1079 +    return
  1.1080 +        makeToUTable(extData, table) &&
  1.1081 +        makeFromUTable(extData, table);
  1.1082 +}

mercurial