intl/icu/source/tools/gencnval/gencnval.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/gencnval/gencnval.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1128 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1999-2012, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  gencnval.c
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 1999nov05
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   This program reads convrtrs.txt and writes a memory-mappable
    1.20 +*   converter name alias table to cnvalias.dat .
    1.21 +*
    1.22 +*   This program currently writes version 2.1 of the data format. See
    1.23 +*   ucnv_io.c for more details on the format. Note that version 2.1
    1.24 +*   is written in such a way that a 2.0 reader will be able to use it,
    1.25 +*   and a 2.1 reader will be able to read 2.0.
    1.26 +*/
    1.27 +
    1.28 +#include "unicode/utypes.h"
    1.29 +#include "unicode/putil.h"
    1.30 +#include "unicode/ucnv.h" /* ucnv_compareNames() */
    1.31 +#include "ucnv_io.h"
    1.32 +#include "cmemory.h"
    1.33 +#include "cstring.h"
    1.34 +#include "uinvchar.h"
    1.35 +#include "filestrm.h"
    1.36 +#include "unicode/uclean.h"
    1.37 +#include "unewdata.h"
    1.38 +#include "uoptions.h"
    1.39 +
    1.40 +#include <stdio.h>
    1.41 +#include <stdlib.h>
    1.42 +#include <ctype.h>
    1.43 +
    1.44 +/* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
    1.45 +
    1.46 +/* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
    1.47 + That is the maximum size for the string stores combined
    1.48 + because the strings are index at 16-bit boundries by a
    1.49 + 16-bit index, and there is only one section for the 
    1.50 + strings.
    1.51 + */
    1.52 +#define STRING_STORE_SIZE 0x1FBFE   /* 130046 */
    1.53 +#define TAG_STORE_SIZE      0x400   /* 1024 */
    1.54 +
    1.55 +/* The combined tag and converter count can affect the number of lists
    1.56 + created.  The size of all lists must be less than (2^17 - 1)
    1.57 + because the lists are indexed as a 16-bit array with a 16-bit index.
    1.58 + */
    1.59 +#define MAX_TAG_COUNT 0x3F      /* 63 */
    1.60 +#define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
    1.61 +#define MAX_ALIAS_COUNT 0xFFFF  /* 65535 */
    1.62 +
    1.63 +/* The maximum number of aliases that a standard tag/converter combination can have.
    1.64 + At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
    1.65 + this value. I don't recommend more than 31 for this value.
    1.66 + */
    1.67 +#define MAX_TC_ALIAS_COUNT 0x1F    /* 31 */
    1.68 +
    1.69 +#define MAX_LINE_SIZE 0x7FFF    /* 32767 */
    1.70 +#define MAX_LIST_SIZE 0xFFFF    /* 65535 */
    1.71 +
    1.72 +#define DATA_NAME "cnvalias"
    1.73 +#define DATA_TYPE "icu" /* ICU alias table */
    1.74 +
    1.75 +#define ALL_TAG_STR "ALL"
    1.76 +#define ALL_TAG_NUM 1
    1.77 +#define EMPTY_TAG_NUM 0
    1.78 +
    1.79 +/* UDataInfo cf. udata.h */
    1.80 +static const UDataInfo dataInfo={
    1.81 +    sizeof(UDataInfo),
    1.82 +    0,
    1.83 +
    1.84 +    U_IS_BIG_ENDIAN,
    1.85 +    U_CHARSET_FAMILY,
    1.86 +    sizeof(UChar),
    1.87 +    0,
    1.88 +
    1.89 +    {0x43, 0x76, 0x41, 0x6c},     /* dataFormat="CvAl" */
    1.90 +    {3, 0, 1, 0},                 /* formatVersion */
    1.91 +    {1, 4, 2, 0}                  /* dataVersion */
    1.92 +};
    1.93 +
    1.94 +typedef struct {
    1.95 +    char *store;
    1.96 +    uint32_t top;
    1.97 +    uint32_t max;
    1.98 +} StringBlock;
    1.99 +
   1.100 +static char stringStore[STRING_STORE_SIZE];
   1.101 +static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
   1.102 +
   1.103 +typedef struct {
   1.104 +    uint16_t    aliasCount;
   1.105 +    uint16_t    *aliases;     /* Index into stringStore */
   1.106 +} AliasList;
   1.107 +
   1.108 +typedef struct {
   1.109 +    uint16_t converter;     /* Index into stringStore */
   1.110 +    uint16_t totalAliasCount;    /* Total aliases in this column */
   1.111 +} Converter;
   1.112 +
   1.113 +static Converter converters[MAX_CONV_COUNT];
   1.114 +static uint16_t converterCount=0;
   1.115 +
   1.116 +static char tagStore[TAG_STORE_SIZE];
   1.117 +static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
   1.118 +
   1.119 +typedef struct {
   1.120 +    uint16_t    tag;        /* Index into tagStore */
   1.121 +    uint16_t    totalAliasCount; /* Total aliases in this row */
   1.122 +    AliasList   aliasList[MAX_CONV_COUNT];
   1.123 +} Tag;
   1.124 +
   1.125 +/* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
   1.126 +static Tag tags[MAX_TAG_COUNT];
   1.127 +static uint16_t tagCount = 0;
   1.128 +
   1.129 +/* Used for storing all aliases  */
   1.130 +static uint16_t knownAliases[MAX_ALIAS_COUNT];
   1.131 +static uint16_t knownAliasesCount = 0;
   1.132 +/*static uint16_t duplicateKnownAliasesCount = 0;*/
   1.133 +
   1.134 +/* Used for storing the lists section that point to aliases */
   1.135 +static uint16_t aliasLists[MAX_LIST_SIZE];
   1.136 +static uint16_t aliasListsSize = 0;
   1.137 +
   1.138 +/* Were the standard tags declared before the aliases. */
   1.139 +static UBool standardTagsUsed = FALSE;
   1.140 +static UBool verbose = FALSE;
   1.141 +static int lineNum = 1;
   1.142 +
   1.143 +static UConverterAliasOptions tableOptions = {
   1.144 +    UCNV_IO_STD_NORMALIZED,
   1.145 +    1 /* containsCnvOptionInfo */
   1.146 +};
   1.147 +
   1.148 +
   1.149 +/**
   1.150 + * path to convrtrs.txt
   1.151 + */
   1.152 +const char *path;
   1.153 +
   1.154 +/* prototypes --------------------------------------------------------------- */
   1.155 +
   1.156 +static void
   1.157 +parseLine(const char *line);
   1.158 +
   1.159 +static void
   1.160 +parseFile(FileStream *in);
   1.161 +
   1.162 +static int32_t
   1.163 +chomp(char *line);
   1.164 +
   1.165 +static void
   1.166 +addOfficialTaggedStandards(char *line, int32_t lineLen);
   1.167 +
   1.168 +static uint16_t
   1.169 +addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
   1.170 +
   1.171 +static uint16_t
   1.172 +addConverter(const char *converter);
   1.173 +
   1.174 +static char *
   1.175 +allocString(StringBlock *block, const char *s, int32_t length);
   1.176 +
   1.177 +static uint16_t
   1.178 +addToKnownAliases(const char *alias);
   1.179 +
   1.180 +static int
   1.181 +compareAliases(const void *alias1, const void *alias2);
   1.182 +
   1.183 +static uint16_t
   1.184 +getTagNumber(const char *tag, uint16_t tagLen);
   1.185 +
   1.186 +/*static void
   1.187 +addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
   1.188 +
   1.189 +static void
   1.190 +writeAliasTable(UNewDataMemory *out);
   1.191 +
   1.192 +/* -------------------------------------------------------------------------- */
   1.193 +
   1.194 +/* Presumes that you used allocString() */
   1.195 +#define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
   1.196 +#define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
   1.197 +
   1.198 +/* Presumes that you used allocString() */
   1.199 +#define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
   1.200 +#define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
   1.201 +
   1.202 +enum
   1.203 +{
   1.204 +    HELP1,
   1.205 +    HELP2,
   1.206 +    VERBOSE,
   1.207 +    COPYRIGHT,
   1.208 +    DESTDIR,
   1.209 +    SOURCEDIR
   1.210 +};
   1.211 +
   1.212 +static UOption options[]={
   1.213 +    UOPTION_HELP_H,
   1.214 +    UOPTION_HELP_QUESTION_MARK,
   1.215 +    UOPTION_VERBOSE,
   1.216 +    UOPTION_COPYRIGHT,
   1.217 +    UOPTION_DESTDIR,
   1.218 +    UOPTION_SOURCEDIR
   1.219 +};
   1.220 +
   1.221 +extern int
   1.222 +main(int argc, char* argv[]) {
   1.223 +    int i, n;
   1.224 +    char pathBuf[512];
   1.225 +    FileStream *in;
   1.226 +    UNewDataMemory *out;
   1.227 +    UErrorCode errorCode=U_ZERO_ERROR;
   1.228 +
   1.229 +    U_MAIN_INIT_ARGS(argc, argv);
   1.230 +
   1.231 +    /* preset then read command line options */
   1.232 +    options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
   1.233 +    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
   1.234 +
   1.235 +    /* error handling, printing usage message */
   1.236 +    if(argc<0) {
   1.237 +        fprintf(stderr,
   1.238 +            "error in command line argument \"%s\"\n",
   1.239 +            argv[-argc]);
   1.240 +    }
   1.241 +    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
   1.242 +        fprintf(stderr,
   1.243 +            "usage: %s [-options] [convrtrs.txt]\n"
   1.244 +            "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
   1.245 +            "options:\n"
   1.246 +            "\t-h or -? or --help  this usage text\n"
   1.247 +            "\t-v or --verbose     prints out extra information about the alias table\n"
   1.248 +            "\t-c or --copyright   include a copyright notice\n"
   1.249 +            "\t-d or --destdir     destination directory, followed by the path\n"
   1.250 +            "\t-s or --sourcedir   source directory, followed by the path\n",
   1.251 +            argv[0]);
   1.252 +        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1.253 +    }
   1.254 +
   1.255 +    if(options[VERBOSE].doesOccur) {
   1.256 +        verbose = TRUE;
   1.257 +    }
   1.258 +
   1.259 +    if(argc>=2) {
   1.260 +        path=argv[1];
   1.261 +    } else {
   1.262 +        path=options[SOURCEDIR].value;
   1.263 +        if(path!=NULL && *path!=0) {
   1.264 +            char *end;
   1.265 +
   1.266 +            uprv_strcpy(pathBuf, path);
   1.267 +            end = uprv_strchr(pathBuf, 0);
   1.268 +            if(*(end-1)!=U_FILE_SEP_CHAR) {
   1.269 +                *(end++)=U_FILE_SEP_CHAR;
   1.270 +            }
   1.271 +            uprv_strcpy(end, "convrtrs.txt");
   1.272 +            path=pathBuf;
   1.273 +        } else {
   1.274 +            path = "convrtrs.txt";
   1.275 +        }
   1.276 +    }
   1.277 +
   1.278 +    uprv_memset(stringStore, 0, sizeof(stringStore));
   1.279 +    uprv_memset(tagStore, 0, sizeof(tagStore));
   1.280 +    uprv_memset(converters, 0, sizeof(converters));
   1.281 +    uprv_memset(tags, 0, sizeof(tags));
   1.282 +    uprv_memset(aliasLists, 0, sizeof(aliasLists));
   1.283 +    uprv_memset(knownAliases, 0, sizeof(aliasLists));
   1.284 +
   1.285 +
   1.286 +    in=T_FileStream_open(path, "r");
   1.287 +    if(in==NULL) {
   1.288 +        fprintf(stderr, "gencnval: unable to open input file %s\n", path);
   1.289 +        exit(U_FILE_ACCESS_ERROR);
   1.290 +    }
   1.291 +    parseFile(in);
   1.292 +    T_FileStream_close(in);
   1.293 +
   1.294 +    /* create the output file */
   1.295 +    out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
   1.296 +                     options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
   1.297 +    if(U_FAILURE(errorCode)) {
   1.298 +        fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
   1.299 +        exit(errorCode);
   1.300 +    }
   1.301 +
   1.302 +    /* write the table of aliases based on a tag/converter name combination */
   1.303 +    writeAliasTable(out);
   1.304 +
   1.305 +    /* finish */
   1.306 +    udata_finish(out, &errorCode);
   1.307 +    if(U_FAILURE(errorCode)) {
   1.308 +        fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
   1.309 +        exit(errorCode);
   1.310 +    }
   1.311 +
   1.312 +    /* clean up tags */
   1.313 +    for (i = 0; i < MAX_TAG_COUNT; i++) {
   1.314 +        for (n = 0; n < MAX_CONV_COUNT; n++) {
   1.315 +            if (tags[i].aliasList[n].aliases!=NULL) {
   1.316 +                uprv_free(tags[i].aliasList[n].aliases);
   1.317 +            }
   1.318 +        }
   1.319 +    }
   1.320 +
   1.321 +    return 0;
   1.322 +}
   1.323 +
   1.324 +static void
   1.325 +parseFile(FileStream *in) {
   1.326 +    char line[MAX_LINE_SIZE];
   1.327 +    char lastLine[MAX_LINE_SIZE];
   1.328 +    int32_t lineSize = 0;
   1.329 +    int32_t lastLineSize = 0;
   1.330 +    UBool validParse = TRUE;
   1.331 +
   1.332 +    lineNum = 0;
   1.333 +
   1.334 +    /* Add the empty tag, which is for untagged aliases */
   1.335 +    getTagNumber("", 0);
   1.336 +    getTagNumber(ALL_TAG_STR, 3);
   1.337 +    allocString(&stringBlock, "", 0);
   1.338 +
   1.339 +    /* read the list of aliases */
   1.340 +    while (validParse) {
   1.341 +        validParse = FALSE;
   1.342 +
   1.343 +        /* Read non-empty lines that don't start with a space character. */
   1.344 +        while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
   1.345 +            lastLineSize = chomp(lastLine);
   1.346 +            if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) {
   1.347 +                uprv_strcpy(line + lineSize, lastLine);
   1.348 +                lineSize += lastLineSize;
   1.349 +            } else if (lineSize > 0) {
   1.350 +                validParse = TRUE;
   1.351 +                break;
   1.352 +            }
   1.353 +            lineNum++;
   1.354 +        }
   1.355 +
   1.356 +        if (validParse || lineSize > 0) {
   1.357 +            if (isspace((int)*line)) {
   1.358 +                fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1);
   1.359 +                exit(U_PARSE_ERROR);
   1.360 +            } else if (line[0] == '{') {
   1.361 +                if (!standardTagsUsed && line[lineSize - 1] != '}') {
   1.362 +                    fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum);
   1.363 +                    exit(U_PARSE_ERROR);
   1.364 +                }
   1.365 +                addOfficialTaggedStandards(line, lineSize);
   1.366 +                standardTagsUsed = TRUE;
   1.367 +            } else {
   1.368 +                if (standardTagsUsed) {
   1.369 +                    parseLine(line);
   1.370 +                }
   1.371 +                else {
   1.372 +                    fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum);
   1.373 +                    exit(U_PARSE_ERROR);
   1.374 +                }
   1.375 +            }
   1.376 +            /* Was the last line consumed */
   1.377 +            if (lastLineSize > 0) {
   1.378 +                uprv_strcpy(line, lastLine);
   1.379 +                lineSize = lastLineSize;
   1.380 +            }
   1.381 +            else {
   1.382 +                lineSize = 0;
   1.383 +            }
   1.384 +        }
   1.385 +        lineNum++;
   1.386 +    }
   1.387 +}
   1.388 +
   1.389 +/* This works almost like the Perl chomp.
   1.390 + It removes the newlines, comments and trailing whitespace (not preceding whitespace).
   1.391 +*/
   1.392 +static int32_t
   1.393 +chomp(char *line) {
   1.394 +    char *s = line;
   1.395 +    char *lastNonSpace = line;
   1.396 +    while(*s!=0) {
   1.397 +        /* truncate at a newline or a comment */
   1.398 +        if(*s == '\r' || *s == '\n' || *s == '#') {
   1.399 +            *s = 0;
   1.400 +            break;
   1.401 +        }
   1.402 +        if (!isspace((int)*s)) {
   1.403 +            lastNonSpace = s;
   1.404 +        }
   1.405 +        ++s;
   1.406 +    }
   1.407 +    if (lastNonSpace++ > line) {
   1.408 +        *lastNonSpace = 0;
   1.409 +        s = lastNonSpace;
   1.410 +    }
   1.411 +    return (int32_t)(s - line);
   1.412 +}
   1.413 +
   1.414 +static void
   1.415 +parseLine(const char *line) {
   1.416 +    uint16_t pos=0, start, limit, length, cnv;
   1.417 +    char *converter, *alias;
   1.418 +
   1.419 +    /* skip leading white space */
   1.420 +    /* There is no whitespace at the beginning anymore */
   1.421 +/*    while(line[pos]!=0 && isspace(line[pos])) {
   1.422 +        ++pos;
   1.423 +    }
   1.424 +*/
   1.425 +
   1.426 +    /* is there nothing on this line? */
   1.427 +    if(line[pos]==0) {
   1.428 +        return;
   1.429 +    }
   1.430 +
   1.431 +    /* get the converter name */
   1.432 +    start=pos;
   1.433 +    while(line[pos]!=0 && !isspace((int)line[pos])) {
   1.434 +        ++pos;
   1.435 +    }
   1.436 +    limit=pos;
   1.437 +
   1.438 +    /* store the converter name */
   1.439 +    length=(uint16_t)(limit-start);
   1.440 +    converter=allocString(&stringBlock, line+start, length);
   1.441 +
   1.442 +    /* add the converter to the converter table */
   1.443 +    cnv=addConverter(converter);
   1.444 +
   1.445 +    /* The name itself may be tagged, so let's added it to the aliases list properly */
   1.446 +    pos = start;
   1.447 +
   1.448 +    /* get all the real aliases */
   1.449 +    for(;;) {
   1.450 +
   1.451 +        /* skip white space */
   1.452 +        while(line[pos]!=0 && isspace((int)line[pos])) {
   1.453 +            ++pos;
   1.454 +        }
   1.455 +
   1.456 +        /* is there no more alias name on this line? */
   1.457 +        if(line[pos]==0) {
   1.458 +            break;
   1.459 +        }
   1.460 +
   1.461 +        /* get an alias name */
   1.462 +        start=pos;
   1.463 +        while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) {
   1.464 +            ++pos;
   1.465 +        }
   1.466 +        limit=pos;
   1.467 +
   1.468 +        /* store the alias name */
   1.469 +        length=(uint16_t)(limit-start);
   1.470 +        if (start == 0) {
   1.471 +            /* add the converter as its own alias to the alias table */
   1.472 +            alias = converter;
   1.473 +            addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
   1.474 +        }
   1.475 +        else {
   1.476 +            alias=allocString(&stringBlock, line+start, length);
   1.477 +            addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
   1.478 +        }
   1.479 +        addToKnownAliases(alias);
   1.480 +
   1.481 +        /* add the alias/converter pair to the alias table */
   1.482 +        /* addAlias(alias, 0, cnv, FALSE);*/
   1.483 +
   1.484 +        /* skip whitespace */
   1.485 +        while (line[pos] && isspace((int)line[pos])) {
   1.486 +            ++pos;
   1.487 +        }
   1.488 +
   1.489 +        /* handle tags if they are present */
   1.490 +        if (line[pos] == '{') {
   1.491 +            ++pos;
   1.492 +            do {
   1.493 +                start = pos;
   1.494 +                while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) {
   1.495 +                    ++pos;
   1.496 +                }
   1.497 +                limit = pos;
   1.498 +
   1.499 +                if (start != limit) {
   1.500 +                    /* add the tag to the tag table */
   1.501 +                    uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
   1.502 +                    addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
   1.503 +                }
   1.504 +
   1.505 +                while (line[pos] && isspace((int)line[pos])) {
   1.506 +                    ++pos;
   1.507 +                }
   1.508 +            } while (line[pos] && line[pos] != '}');
   1.509 +
   1.510 +            if (line[pos] == '}') {
   1.511 +                ++pos;
   1.512 +            } else {
   1.513 +                fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum);
   1.514 +                exit(U_UNMATCHED_BRACES);
   1.515 +            }
   1.516 +        } else {
   1.517 +            addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
   1.518 +        }
   1.519 +    }
   1.520 +}
   1.521 +
   1.522 +static uint16_t
   1.523 +getTagNumber(const char *tag, uint16_t tagLen) {
   1.524 +    char *atag;
   1.525 +    uint16_t t;
   1.526 +    UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
   1.527 +
   1.528 +    if (tagCount >= MAX_TAG_COUNT) {
   1.529 +        fprintf(stderr, "%s:%d: too many tags\n", path, lineNum);
   1.530 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.531 +    }
   1.532 +
   1.533 +    if (preferredName) {
   1.534 +/*        puts(tag);*/
   1.535 +        tagLen--;
   1.536 +    }
   1.537 +
   1.538 +    for (t = 0; t < tagCount; ++t) {
   1.539 +        const char *currTag = GET_TAG_STR(tags[t].tag);
   1.540 +        if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
   1.541 +            return t;
   1.542 +        }
   1.543 +    }
   1.544 +
   1.545 +    /* we need to add this tag */
   1.546 +    if (tagCount >= MAX_TAG_COUNT) {
   1.547 +        fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum);
   1.548 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.549 +    }
   1.550 +
   1.551 +    /* allocate a new entry in the tag table */
   1.552 +    atag = allocString(&tagBlock, tag, tagLen);
   1.553 +
   1.554 +    if (standardTagsUsed) {
   1.555 +        fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
   1.556 +            path, lineNum, atag);
   1.557 +        exit(1);
   1.558 +    }
   1.559 +    else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
   1.560 +        fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
   1.561 +            path, lineNum, atag);
   1.562 +    }
   1.563 +
   1.564 +    /* add the tag to the tag table */
   1.565 +    tags[tagCount].tag = GET_TAG_NUM(atag);
   1.566 +    /* The aliasList should be set to 0's already */
   1.567 +
   1.568 +    return tagCount++;
   1.569 +}
   1.570 +
   1.571 +/*static void
   1.572 +addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
   1.573 +    tags[tag].aliases[converter] = alias;
   1.574 +}
   1.575 +*/
   1.576 +
   1.577 +static void
   1.578 +addOfficialTaggedStandards(char *line, int32_t lineLen) {
   1.579 +    char *atag;
   1.580 +    char *endTagExp;
   1.581 +    char *tag;
   1.582 +    static const char WHITESPACE[] = " \t";
   1.583 +
   1.584 +    if (tagCount > UCNV_NUM_RESERVED_TAGS) {
   1.585 +        fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum);
   1.586 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.587 +    }
   1.588 +    tag = strchr(line, '{');
   1.589 +    if (tag == NULL) {
   1.590 +        /* Why were we called? */
   1.591 +        fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum);
   1.592 +        exit(U_PARSE_ERROR);
   1.593 +    }
   1.594 +    tag++;
   1.595 +    endTagExp = strchr(tag, '}');
   1.596 +    if (endTagExp == NULL) {
   1.597 +        fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum);
   1.598 +        exit(U_PARSE_ERROR);
   1.599 +    }
   1.600 +    endTagExp[0] = 0;
   1.601 +
   1.602 +    tag = strtok(tag, WHITESPACE);
   1.603 +    while (tag != NULL) {
   1.604 +/*        printf("Adding original tag \"%s\"\n", tag);*/
   1.605 +
   1.606 +        /* allocate a new entry in the tag table */
   1.607 +        atag = allocString(&tagBlock, tag, -1);
   1.608 +
   1.609 +        /* add the tag to the tag table */
   1.610 +        tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
   1.611 +
   1.612 +        /* The aliasList should already be set to 0's */
   1.613 +
   1.614 +        /* Get next tag */
   1.615 +        tag = strtok(NULL, WHITESPACE);
   1.616 +    }
   1.617 +}
   1.618 +
   1.619 +static uint16_t
   1.620 +addToKnownAliases(const char *alias) {
   1.621 +/*    uint32_t idx; */
   1.622 +    /* strict matching */
   1.623 +/*    for (idx = 0; idx < knownAliasesCount; idx++) {
   1.624 +        uint16_t num = GET_ALIAS_NUM(alias);
   1.625 +        if (knownAliases[idx] != num
   1.626 +            && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
   1.627 +        {
   1.628 +            fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path, 
   1.629 +                lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
   1.630 +            duplicateKnownAliasesCount++;
   1.631 +            break;
   1.632 +        }
   1.633 +        else if (knownAliases[idx] != num
   1.634 +            && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
   1.635 +        {
   1.636 +            if (verbose) {
   1.637 +                fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path, 
   1.638 +                    lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
   1.639 +            }
   1.640 +            duplicateKnownAliasesCount++;
   1.641 +            break;
   1.642 +        }
   1.643 +    }
   1.644 +*/
   1.645 +    if (knownAliasesCount >= MAX_ALIAS_COUNT) {
   1.646 +        fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n",
   1.647 +            path, lineNum);
   1.648 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.649 +    }
   1.650 +    /* TODO: We could try to unlist exact duplicates. */
   1.651 +    return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
   1.652 +}
   1.653 +
   1.654 +/*
   1.655 +@param standard When standard is 0, then it's the "empty" tag.
   1.656 +*/
   1.657 +static uint16_t
   1.658 +addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
   1.659 +    uint32_t idx, idx2;
   1.660 +    UBool startEmptyWithoutDefault = FALSE;
   1.661 +    AliasList *aliasList;
   1.662 +
   1.663 +    if(standard>=MAX_TAG_COUNT) {
   1.664 +        fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum);
   1.665 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.666 +    }
   1.667 +    if(converter>=MAX_CONV_COUNT) {
   1.668 +        fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum);
   1.669 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.670 +    }
   1.671 +    aliasList = &tags[standard].aliasList[converter];
   1.672 +
   1.673 +    if (strchr(alias, '}')) {
   1.674 +        fprintf(stderr, "%s:%d: error: unmatched } found\n", path, 
   1.675 +            lineNum);
   1.676 +    }
   1.677 +
   1.678 +    if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
   1.679 +        fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path, 
   1.680 +            lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
   1.681 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.682 +    }
   1.683 +
   1.684 +    /* Show this warning only once. All aliases are added to the "ALL" tag. */
   1.685 +    if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
   1.686 +        /* Normally these option values are parsed at runtime, and they can
   1.687 +           be discarded when the alias is a default converter. Options should
   1.688 +           only be on a converter and not an alias. */
   1.689 +        if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
   1.690 +        {
   1.691 +            fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
   1.692 +                lineNum, alias);
   1.693 +        }
   1.694 +        if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
   1.695 +        {
   1.696 +            fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
   1.697 +                lineNum, alias);
   1.698 +        }
   1.699 +    }
   1.700 +
   1.701 +    if (standard != ALL_TAG_NUM) {
   1.702 +        /* Check for duplicate aliases for this tag on all converters */
   1.703 +        for (idx = 0; idx < converterCount; idx++) {
   1.704 +            for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
   1.705 +                uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
   1.706 +                if (aliasNum
   1.707 +                    && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
   1.708 +                {
   1.709 +                    if (idx == converter) {
   1.710 +                        /*
   1.711 +                         * (alias, standard) duplicates are harmless if they map to the same converter.
   1.712 +                         * Only print a warning in verbose mode, or if the alias is a precise duplicate,
   1.713 +                         * not just a lenient-match duplicate.
   1.714 +                         */
   1.715 +                        if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
   1.716 +                            fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path, 
   1.717 +                                lineNum, alias, GET_ALIAS_STR(aliasNum),
   1.718 +                                GET_TAG_STR(tags[standard].tag),
   1.719 +                                GET_ALIAS_STR(converters[converter].converter));
   1.720 +                        }
   1.721 +                    } else {
   1.722 +                        fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path, 
   1.723 +                            lineNum, alias, GET_ALIAS_STR(aliasNum),
   1.724 +                            GET_TAG_STR(tags[standard].tag),
   1.725 +                            GET_ALIAS_STR(converters[converter].converter),
   1.726 +                            GET_ALIAS_STR(converters[idx].converter));
   1.727 +                    }
   1.728 +                    break;
   1.729 +                }
   1.730 +            }
   1.731 +        }
   1.732 +
   1.733 +        /* Check for duplicate default aliases for this converter on all tags */
   1.734 +        /* It's okay to have multiple standards prefer the same name */
   1.735 +/*        if (verbose && !dupFound) {
   1.736 +            for (idx = 0; idx < tagCount; idx++) {
   1.737 +                if (tags[idx].aliasList[converter].aliases) {
   1.738 +                    uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
   1.739 +                    if (aliasNum
   1.740 +                        && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
   1.741 +                    {
   1.742 +                        fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path, 
   1.743 +                            lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
   1.744 +                        break;
   1.745 +                    }
   1.746 +                }
   1.747 +            }
   1.748 +        }*/
   1.749 +    }
   1.750 +
   1.751 +    if (aliasList->aliasCount <= 0) {
   1.752 +        aliasList->aliasCount++;
   1.753 +        startEmptyWithoutDefault = TRUE;
   1.754 +    }
   1.755 +    aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
   1.756 +    if (startEmptyWithoutDefault) {
   1.757 +        aliasList->aliases[0] = 0;
   1.758 +    }
   1.759 +    if (defaultName) {
   1.760 +        if (aliasList->aliases[0] != 0) {
   1.761 +            fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path, 
   1.762 +                lineNum,
   1.763 +                alias,
   1.764 +                GET_ALIAS_STR(aliasList->aliases[0]),
   1.765 +                GET_TAG_STR(tags[standard].tag),
   1.766 +                GET_ALIAS_STR(converters[converter].converter));
   1.767 +            exit(U_PARSE_ERROR);
   1.768 +        }
   1.769 +        aliasList->aliases[0] = GET_ALIAS_NUM(alias);
   1.770 +    } else {
   1.771 +        aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
   1.772 +    }
   1.773 +/*    aliasList->converter = converter;*/
   1.774 +
   1.775 +    converters[converter].totalAliasCount++; /* One more to the column */
   1.776 +    tags[standard].totalAliasCount++; /* One more to the row */
   1.777 +
   1.778 +    return aliasList->aliasCount;
   1.779 +}
   1.780 +
   1.781 +static uint16_t
   1.782 +addConverter(const char *converter) {
   1.783 +    uint32_t idx;
   1.784 +    if(converterCount>=MAX_CONV_COUNT) {
   1.785 +        fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum);
   1.786 +        exit(U_BUFFER_OVERFLOW_ERROR);
   1.787 +    }
   1.788 +
   1.789 +    for (idx = 0; idx < converterCount; idx++) {
   1.790 +        if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
   1.791 +            fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter);
   1.792 +            exit(U_PARSE_ERROR);
   1.793 +            break;
   1.794 +        }
   1.795 +    }
   1.796 +
   1.797 +    converters[converterCount].converter = GET_ALIAS_NUM(converter);
   1.798 +    converters[converterCount].totalAliasCount = 0;
   1.799 +
   1.800 +    return converterCount++;
   1.801 +}
   1.802 +
   1.803 +/* resolve this alias based on the prioritization of the standard tags. */
   1.804 +static void
   1.805 +resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
   1.806 +    uint16_t idx, idx2, idx3;
   1.807 +
   1.808 +    for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
   1.809 +        for (idx2 = 0; idx2 < converterCount; idx2++) {
   1.810 +            for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
   1.811 +                uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
   1.812 +                if (aliasNum == alias) {
   1.813 +                    *tagNum = idx;
   1.814 +                    *converterNum = idx2;
   1.815 +                    return;
   1.816 +                }
   1.817 +            }
   1.818 +        }
   1.819 +    }
   1.820 +    /* Do the leftovers last, just in case */
   1.821 +    /* There is no need to do the ALL tag */
   1.822 +    idx = 0;
   1.823 +    for (idx2 = 0; idx2 < converterCount; idx2++) {
   1.824 +        for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
   1.825 +            uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
   1.826 +            if (aliasNum == alias) {
   1.827 +                *tagNum = idx;
   1.828 +                *converterNum = idx2;
   1.829 +                return;
   1.830 +            }
   1.831 +        }
   1.832 +    }
   1.833 +    *tagNum = UINT16_MAX;
   1.834 +    *converterNum = UINT16_MAX;
   1.835 +    fprintf(stderr, "%s: warning: alias %s not found\n",
   1.836 +        path,
   1.837 +        GET_ALIAS_STR(alias));
   1.838 +    return;
   1.839 +}
   1.840 +
   1.841 +/* The knownAliases should be sorted before calling this function */
   1.842 +static uint32_t
   1.843 +resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
   1.844 +    uint32_t uniqueAliasIdx = 0;
   1.845 +    uint32_t idx;
   1.846 +    uint16_t currTagNum, oldTagNum;
   1.847 +    uint16_t currConvNum, oldConvNum;
   1.848 +    const char *lastName;
   1.849 +
   1.850 +    resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
   1.851 +    uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
   1.852 +    oldConvNum = currConvNum;
   1.853 +    uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
   1.854 +    uniqueAliasIdx++;
   1.855 +    lastName = GET_ALIAS_STR(knownAliases[0]);
   1.856 +
   1.857 +    for (idx = 1; idx < knownAliasesCount; idx++) {
   1.858 +        resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
   1.859 +        if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
   1.860 +            /* duplicate found */
   1.861 +            if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
   1.862 +                || oldTagNum == 0) {
   1.863 +                oldTagNum = currTagNum;
   1.864 +                uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
   1.865 +                uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
   1.866 +                if (verbose) {
   1.867 +                    printf("using %s instead of %s -> %s", 
   1.868 +                        GET_ALIAS_STR(knownAliases[idx]),
   1.869 +                        lastName,
   1.870 +                        GET_ALIAS_STR(converters[currConvNum].converter));
   1.871 +                    if (oldConvNum != currConvNum) {
   1.872 +                        printf(" (alias conflict)");
   1.873 +                    }
   1.874 +                    puts("");
   1.875 +                }
   1.876 +            }
   1.877 +            else {
   1.878 +                /* else ignore it */
   1.879 +                if (verbose) {
   1.880 +                    printf("folding %s into %s -> %s",
   1.881 +                        GET_ALIAS_STR(knownAliases[idx]),
   1.882 +                        lastName,
   1.883 +                        GET_ALIAS_STR(converters[oldConvNum].converter));
   1.884 +                    if (oldConvNum != currConvNum) {
   1.885 +                        printf(" (alias conflict)");
   1.886 +                    }
   1.887 +                    puts("");
   1.888 +                }
   1.889 +            }
   1.890 +            if (oldConvNum != currConvNum) {
   1.891 +                uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
   1.892 +            }
   1.893 +        }
   1.894 +        else {
   1.895 +            uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
   1.896 +            oldConvNum = currConvNum;
   1.897 +            uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
   1.898 +            uniqueAliasIdx++;
   1.899 +            lastName = GET_ALIAS_STR(knownAliases[idx]);
   1.900 +            oldTagNum = currTagNum;
   1.901 +            /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
   1.902 +        }
   1.903 +        if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
   1.904 +            uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
   1.905 +        }
   1.906 +    }
   1.907 +    return uniqueAliasIdx;
   1.908 +}
   1.909 +
   1.910 +static void
   1.911 +createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
   1.912 +    uint32_t aliasNum;
   1.913 +    AliasList *aliasList = &tags[tag].aliasList[converter];
   1.914 +
   1.915 +    if (aliasList->aliasCount == 0) {
   1.916 +        aliasArrLists[tag*converterCount + converter] = 0;
   1.917 +    }
   1.918 +    else {
   1.919 +        aliasLists[aliasListsSize++] = aliasList->aliasCount;
   1.920 +
   1.921 +        /* write into the array area a 1's based index. */
   1.922 +        aliasArrLists[tag*converterCount + converter] = aliasListsSize;
   1.923 +
   1.924 +/*        printf("tag %s converter %s\n",
   1.925 +            GET_TAG_STR(tags[tag].tag),
   1.926 +            GET_ALIAS_STR(converters[converter].converter));*/
   1.927 +        for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
   1.928 +            uint16_t value;
   1.929 +/*            printf("   %s\n",
   1.930 +                GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
   1.931 +            if (aliasList->aliases[aliasNum]) {
   1.932 +                value = aliasList->aliases[aliasNum] + offset;
   1.933 +            } else {
   1.934 +                value = 0;
   1.935 +                if (tag != 0) { /* Only show the warning when it's not the leftover tag. */
   1.936 +                    fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n",
   1.937 +                            path,
   1.938 +                            GET_TAG_STR(tags[tag].tag),
   1.939 +                            GET_ALIAS_STR(converters[converter].converter));
   1.940 +                }
   1.941 +            }
   1.942 +            aliasLists[aliasListsSize++] = value;
   1.943 +            if (aliasListsSize >= MAX_LIST_SIZE) {
   1.944 +                fprintf(stderr, "%s: error: Too many alias lists\n", path);
   1.945 +                exit(U_BUFFER_OVERFLOW_ERROR);
   1.946 +            }
   1.947 +
   1.948 +        }
   1.949 +    }
   1.950 +}
   1.951 +
   1.952 +static void
   1.953 +createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
   1.954 +    int32_t currStrLen;
   1.955 +    uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
   1.956 +    while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
   1.957 +        int32_t currStrSize = currStrLen + 1;
   1.958 +        if (currStrLen > 0) {
   1.959 +            int32_t normStrLen;
   1.960 +            ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
   1.961 +            normStrLen = uprv_strlen(normalizedStrings);
   1.962 +            if (normStrLen > 0) {
   1.963 +                uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
   1.964 +            }
   1.965 +        }
   1.966 +        stringBlockLength -= currStrSize;
   1.967 +        normalizedStrings += currStrSize;
   1.968 +        origStringBlock += currStrSize;
   1.969 +    }
   1.970 +}
   1.971 +
   1.972 +static void
   1.973 +writeAliasTable(UNewDataMemory *out) {
   1.974 +    uint32_t i, j;
   1.975 +    uint32_t uniqueAliasesSize;
   1.976 +    uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
   1.977 +    uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
   1.978 +    uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
   1.979 +    uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
   1.980 +
   1.981 +    qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
   1.982 +    uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
   1.983 +
   1.984 +    /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
   1.985 +    aliasListsSize = 0;
   1.986 +
   1.987 +    /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
   1.988 +    for (i = 0; i < tagCount; ++i) {
   1.989 +        for (j = 0; j < converterCount; ++j) {
   1.990 +            createOneAliasList(aliasArrLists, i, j, aliasOffset);
   1.991 +        }
   1.992 +    }
   1.993 +
   1.994 +    /* Write the size of the TOC */
   1.995 +    if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
   1.996 +        udata_write32(out, 8);
   1.997 +    }
   1.998 +    else {
   1.999 +        udata_write32(out, 9);
  1.1000 +    }
  1.1001 +
  1.1002 +    /* Write the sizes of each section */
  1.1003 +    /* All sizes are the number of uint16_t units, not bytes */
  1.1004 +    udata_write32(out, converterCount);
  1.1005 +    udata_write32(out, tagCount);
  1.1006 +    udata_write32(out, uniqueAliasesSize);  /* list of aliases */
  1.1007 +    udata_write32(out, uniqueAliasesSize);  /* The preresolved form of mapping an untagged the alias to a converter */
  1.1008 +    udata_write32(out, tagCount * converterCount);
  1.1009 +    udata_write32(out, aliasListsSize + 1);
  1.1010 +    udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
  1.1011 +    udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
  1.1012 +    if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
  1.1013 +        udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
  1.1014 +    }
  1.1015 +
  1.1016 +    /* write the table of converters */
  1.1017 +    /* Think of this as the column headers */
  1.1018 +    for(i=0; i<converterCount; ++i) {
  1.1019 +        udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
  1.1020 +    }
  1.1021 +
  1.1022 +    /* write the table of tags */
  1.1023 +    /* Think of this as the row headers */
  1.1024 +    for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
  1.1025 +        udata_write16(out, tags[i].tag);
  1.1026 +    }
  1.1027 +    /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
  1.1028 +    udata_write16(out, tags[EMPTY_TAG_NUM].tag);
  1.1029 +    udata_write16(out, tags[ALL_TAG_NUM].tag);
  1.1030 +
  1.1031 +    /* Write the unique list of aliases */
  1.1032 +    udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
  1.1033 +
  1.1034 +    /* Write the unique list of aliases */
  1.1035 +    udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
  1.1036 +
  1.1037 +    /* Write the array to the lists */
  1.1038 +    udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
  1.1039 +    /* Now write the leftover part of the array for the EMPTY and ALL lists */
  1.1040 +    udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
  1.1041 +
  1.1042 +    /* Offset the next array to make the index start at 1. */
  1.1043 +    udata_write16(out, 0xDEAD);
  1.1044 +
  1.1045 +    /* Write the lists */
  1.1046 +    udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
  1.1047 +
  1.1048 +    /* Write any options for the alias table. */
  1.1049 +    udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
  1.1050 +
  1.1051 +    /* write the tags strings */
  1.1052 +    udata_writeString(out, tagBlock.store, tagBlock.top);
  1.1053 +
  1.1054 +    /* write the aliases strings */
  1.1055 +    udata_writeString(out, stringBlock.store, stringBlock.top);
  1.1056 +
  1.1057 +    /* write the normalized aliases strings */
  1.1058 +    if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
  1.1059 +        char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
  1.1060 +        createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
  1.1061 +        createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
  1.1062 +
  1.1063 +        /* Write out the complete normalized array. */
  1.1064 +        udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
  1.1065 +        uprv_free(normalizedStrings);
  1.1066 +    }
  1.1067 +
  1.1068 +    uprv_free(uniqueAliasesToConverter);
  1.1069 +    uprv_free(uniqueAliases);
  1.1070 +    uprv_free(aliasArrLists);
  1.1071 +}
  1.1072 +
  1.1073 +static char *
  1.1074 +allocString(StringBlock *block, const char *s, int32_t length) {
  1.1075 +    uint32_t top;
  1.1076 +    char *p;
  1.1077 +
  1.1078 +    if(length<0) {
  1.1079 +        length=(int32_t)uprv_strlen(s);
  1.1080 +    }
  1.1081 +
  1.1082 +    /*
  1.1083 +     * add 1 for the terminating NUL
  1.1084 +     * and round up (+1 &~1)
  1.1085 +     * to keep the addresses on a 16-bit boundary
  1.1086 +     */
  1.1087 +    top=block->top + (uint32_t)((length + 1 + 1) & ~1);
  1.1088 +
  1.1089 +    if(top >= block->max) {
  1.1090 +        fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum);
  1.1091 +        exit(U_MEMORY_ALLOCATION_ERROR);
  1.1092 +    }
  1.1093 +
  1.1094 +    /* get the pointer and copy the string */
  1.1095 +    p = block->store + block->top;
  1.1096 +    uprv_memcpy(p, s, length);
  1.1097 +    p[length] = 0; /* NUL-terminate it */
  1.1098 +    if((length & 1) == 0) {
  1.1099 +        p[length + 1] = 0; /* set the padding byte */
  1.1100 +    }
  1.1101 +
  1.1102 +    /* check for invariant characters now that we have a NUL-terminated string for easy output */
  1.1103 +    if(!uprv_isInvariantString(p, length)) {
  1.1104 +        fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p);
  1.1105 +        exit(U_INVALID_TABLE_FORMAT);
  1.1106 +    }
  1.1107 +
  1.1108 +    block->top = top;
  1.1109 +    return p;
  1.1110 +}
  1.1111 +
  1.1112 +static int
  1.1113 +compareAliases(const void *alias1, const void *alias2) {
  1.1114 +    /* Names like IBM850 and ibm-850 need to be sorted together */
  1.1115 +    int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
  1.1116 +    if (!result) {
  1.1117 +        /* Sort the shortest first */
  1.1118 +        return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
  1.1119 +    }
  1.1120 +    return result;
  1.1121 +}
  1.1122 +
  1.1123 +/*
  1.1124 + * Hey, Emacs, please set the following:
  1.1125 + *
  1.1126 + * Local Variables:
  1.1127 + * indent-tabs-mode: nil
  1.1128 + * End:
  1.1129 + *
  1.1130 + */
  1.1131 +

mercurial