intl/icu/source/tools/gencnval/gencnval.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2012, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: gencnval.c
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 1999nov05
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * This program reads convrtrs.txt and writes a memory-mappable
michael@0 17 * converter name alias table to cnvalias.dat .
michael@0 18 *
michael@0 19 * This program currently writes version 2.1 of the data format. See
michael@0 20 * ucnv_io.c for more details on the format. Note that version 2.1
michael@0 21 * is written in such a way that a 2.0 reader will be able to use it,
michael@0 22 * and a 2.1 reader will be able to read 2.0.
michael@0 23 */
michael@0 24
michael@0 25 #include "unicode/utypes.h"
michael@0 26 #include "unicode/putil.h"
michael@0 27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
michael@0 28 #include "ucnv_io.h"
michael@0 29 #include "cmemory.h"
michael@0 30 #include "cstring.h"
michael@0 31 #include "uinvchar.h"
michael@0 32 #include "filestrm.h"
michael@0 33 #include "unicode/uclean.h"
michael@0 34 #include "unewdata.h"
michael@0 35 #include "uoptions.h"
michael@0 36
michael@0 37 #include <stdio.h>
michael@0 38 #include <stdlib.h>
michael@0 39 #include <ctype.h>
michael@0 40
michael@0 41 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
michael@0 42
michael@0 43 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
michael@0 44 That is the maximum size for the string stores combined
michael@0 45 because the strings are index at 16-bit boundries by a
michael@0 46 16-bit index, and there is only one section for the
michael@0 47 strings.
michael@0 48 */
michael@0 49 #define STRING_STORE_SIZE 0x1FBFE /* 130046 */
michael@0 50 #define TAG_STORE_SIZE 0x400 /* 1024 */
michael@0 51
michael@0 52 /* The combined tag and converter count can affect the number of lists
michael@0 53 created. The size of all lists must be less than (2^17 - 1)
michael@0 54 because the lists are indexed as a 16-bit array with a 16-bit index.
michael@0 55 */
michael@0 56 #define MAX_TAG_COUNT 0x3F /* 63 */
michael@0 57 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
michael@0 58 #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */
michael@0 59
michael@0 60 /* The maximum number of aliases that a standard tag/converter combination can have.
michael@0 61 At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
michael@0 62 this value. I don't recommend more than 31 for this value.
michael@0 63 */
michael@0 64 #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */
michael@0 65
michael@0 66 #define MAX_LINE_SIZE 0x7FFF /* 32767 */
michael@0 67 #define MAX_LIST_SIZE 0xFFFF /* 65535 */
michael@0 68
michael@0 69 #define DATA_NAME "cnvalias"
michael@0 70 #define DATA_TYPE "icu" /* ICU alias table */
michael@0 71
michael@0 72 #define ALL_TAG_STR "ALL"
michael@0 73 #define ALL_TAG_NUM 1
michael@0 74 #define EMPTY_TAG_NUM 0
michael@0 75
michael@0 76 /* UDataInfo cf. udata.h */
michael@0 77 static const UDataInfo dataInfo={
michael@0 78 sizeof(UDataInfo),
michael@0 79 0,
michael@0 80
michael@0 81 U_IS_BIG_ENDIAN,
michael@0 82 U_CHARSET_FAMILY,
michael@0 83 sizeof(UChar),
michael@0 84 0,
michael@0 85
michael@0 86 {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */
michael@0 87 {3, 0, 1, 0}, /* formatVersion */
michael@0 88 {1, 4, 2, 0} /* dataVersion */
michael@0 89 };
michael@0 90
michael@0 91 typedef struct {
michael@0 92 char *store;
michael@0 93 uint32_t top;
michael@0 94 uint32_t max;
michael@0 95 } StringBlock;
michael@0 96
michael@0 97 static char stringStore[STRING_STORE_SIZE];
michael@0 98 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
michael@0 99
michael@0 100 typedef struct {
michael@0 101 uint16_t aliasCount;
michael@0 102 uint16_t *aliases; /* Index into stringStore */
michael@0 103 } AliasList;
michael@0 104
michael@0 105 typedef struct {
michael@0 106 uint16_t converter; /* Index into stringStore */
michael@0 107 uint16_t totalAliasCount; /* Total aliases in this column */
michael@0 108 } Converter;
michael@0 109
michael@0 110 static Converter converters[MAX_CONV_COUNT];
michael@0 111 static uint16_t converterCount=0;
michael@0 112
michael@0 113 static char tagStore[TAG_STORE_SIZE];
michael@0 114 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
michael@0 115
michael@0 116 typedef struct {
michael@0 117 uint16_t tag; /* Index into tagStore */
michael@0 118 uint16_t totalAliasCount; /* Total aliases in this row */
michael@0 119 AliasList aliasList[MAX_CONV_COUNT];
michael@0 120 } Tag;
michael@0 121
michael@0 122 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
michael@0 123 static Tag tags[MAX_TAG_COUNT];
michael@0 124 static uint16_t tagCount = 0;
michael@0 125
michael@0 126 /* Used for storing all aliases */
michael@0 127 static uint16_t knownAliases[MAX_ALIAS_COUNT];
michael@0 128 static uint16_t knownAliasesCount = 0;
michael@0 129 /*static uint16_t duplicateKnownAliasesCount = 0;*/
michael@0 130
michael@0 131 /* Used for storing the lists section that point to aliases */
michael@0 132 static uint16_t aliasLists[MAX_LIST_SIZE];
michael@0 133 static uint16_t aliasListsSize = 0;
michael@0 134
michael@0 135 /* Were the standard tags declared before the aliases. */
michael@0 136 static UBool standardTagsUsed = FALSE;
michael@0 137 static UBool verbose = FALSE;
michael@0 138 static int lineNum = 1;
michael@0 139
michael@0 140 static UConverterAliasOptions tableOptions = {
michael@0 141 UCNV_IO_STD_NORMALIZED,
michael@0 142 1 /* containsCnvOptionInfo */
michael@0 143 };
michael@0 144
michael@0 145
michael@0 146 /**
michael@0 147 * path to convrtrs.txt
michael@0 148 */
michael@0 149 const char *path;
michael@0 150
michael@0 151 /* prototypes --------------------------------------------------------------- */
michael@0 152
michael@0 153 static void
michael@0 154 parseLine(const char *line);
michael@0 155
michael@0 156 static void
michael@0 157 parseFile(FileStream *in);
michael@0 158
michael@0 159 static int32_t
michael@0 160 chomp(char *line);
michael@0 161
michael@0 162 static void
michael@0 163 addOfficialTaggedStandards(char *line, int32_t lineLen);
michael@0 164
michael@0 165 static uint16_t
michael@0 166 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
michael@0 167
michael@0 168 static uint16_t
michael@0 169 addConverter(const char *converter);
michael@0 170
michael@0 171 static char *
michael@0 172 allocString(StringBlock *block, const char *s, int32_t length);
michael@0 173
michael@0 174 static uint16_t
michael@0 175 addToKnownAliases(const char *alias);
michael@0 176
michael@0 177 static int
michael@0 178 compareAliases(const void *alias1, const void *alias2);
michael@0 179
michael@0 180 static uint16_t
michael@0 181 getTagNumber(const char *tag, uint16_t tagLen);
michael@0 182
michael@0 183 /*static void
michael@0 184 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
michael@0 185
michael@0 186 static void
michael@0 187 writeAliasTable(UNewDataMemory *out);
michael@0 188
michael@0 189 /* -------------------------------------------------------------------------- */
michael@0 190
michael@0 191 /* Presumes that you used allocString() */
michael@0 192 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
michael@0 193 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
michael@0 194
michael@0 195 /* Presumes that you used allocString() */
michael@0 196 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
michael@0 197 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
michael@0 198
michael@0 199 enum
michael@0 200 {
michael@0 201 HELP1,
michael@0 202 HELP2,
michael@0 203 VERBOSE,
michael@0 204 COPYRIGHT,
michael@0 205 DESTDIR,
michael@0 206 SOURCEDIR
michael@0 207 };
michael@0 208
michael@0 209 static UOption options[]={
michael@0 210 UOPTION_HELP_H,
michael@0 211 UOPTION_HELP_QUESTION_MARK,
michael@0 212 UOPTION_VERBOSE,
michael@0 213 UOPTION_COPYRIGHT,
michael@0 214 UOPTION_DESTDIR,
michael@0 215 UOPTION_SOURCEDIR
michael@0 216 };
michael@0 217
michael@0 218 extern int
michael@0 219 main(int argc, char* argv[]) {
michael@0 220 int i, n;
michael@0 221 char pathBuf[512];
michael@0 222 FileStream *in;
michael@0 223 UNewDataMemory *out;
michael@0 224 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 225
michael@0 226 U_MAIN_INIT_ARGS(argc, argv);
michael@0 227
michael@0 228 /* preset then read command line options */
michael@0 229 options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
michael@0 230 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
michael@0 231
michael@0 232 /* error handling, printing usage message */
michael@0 233 if(argc<0) {
michael@0 234 fprintf(stderr,
michael@0 235 "error in command line argument \"%s\"\n",
michael@0 236 argv[-argc]);
michael@0 237 }
michael@0 238 if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
michael@0 239 fprintf(stderr,
michael@0 240 "usage: %s [-options] [convrtrs.txt]\n"
michael@0 241 "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
michael@0 242 "options:\n"
michael@0 243 "\t-h or -? or --help this usage text\n"
michael@0 244 "\t-v or --verbose prints out extra information about the alias table\n"
michael@0 245 "\t-c or --copyright include a copyright notice\n"
michael@0 246 "\t-d or --destdir destination directory, followed by the path\n"
michael@0 247 "\t-s or --sourcedir source directory, followed by the path\n",
michael@0 248 argv[0]);
michael@0 249 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
michael@0 250 }
michael@0 251
michael@0 252 if(options[VERBOSE].doesOccur) {
michael@0 253 verbose = TRUE;
michael@0 254 }
michael@0 255
michael@0 256 if(argc>=2) {
michael@0 257 path=argv[1];
michael@0 258 } else {
michael@0 259 path=options[SOURCEDIR].value;
michael@0 260 if(path!=NULL && *path!=0) {
michael@0 261 char *end;
michael@0 262
michael@0 263 uprv_strcpy(pathBuf, path);
michael@0 264 end = uprv_strchr(pathBuf, 0);
michael@0 265 if(*(end-1)!=U_FILE_SEP_CHAR) {
michael@0 266 *(end++)=U_FILE_SEP_CHAR;
michael@0 267 }
michael@0 268 uprv_strcpy(end, "convrtrs.txt");
michael@0 269 path=pathBuf;
michael@0 270 } else {
michael@0 271 path = "convrtrs.txt";
michael@0 272 }
michael@0 273 }
michael@0 274
michael@0 275 uprv_memset(stringStore, 0, sizeof(stringStore));
michael@0 276 uprv_memset(tagStore, 0, sizeof(tagStore));
michael@0 277 uprv_memset(converters, 0, sizeof(converters));
michael@0 278 uprv_memset(tags, 0, sizeof(tags));
michael@0 279 uprv_memset(aliasLists, 0, sizeof(aliasLists));
michael@0 280 uprv_memset(knownAliases, 0, sizeof(aliasLists));
michael@0 281
michael@0 282
michael@0 283 in=T_FileStream_open(path, "r");
michael@0 284 if(in==NULL) {
michael@0 285 fprintf(stderr, "gencnval: unable to open input file %s\n", path);
michael@0 286 exit(U_FILE_ACCESS_ERROR);
michael@0 287 }
michael@0 288 parseFile(in);
michael@0 289 T_FileStream_close(in);
michael@0 290
michael@0 291 /* create the output file */
michael@0 292 out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
michael@0 293 options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
michael@0 294 if(U_FAILURE(errorCode)) {
michael@0 295 fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
michael@0 296 exit(errorCode);
michael@0 297 }
michael@0 298
michael@0 299 /* write the table of aliases based on a tag/converter name combination */
michael@0 300 writeAliasTable(out);
michael@0 301
michael@0 302 /* finish */
michael@0 303 udata_finish(out, &errorCode);
michael@0 304 if(U_FAILURE(errorCode)) {
michael@0 305 fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
michael@0 306 exit(errorCode);
michael@0 307 }
michael@0 308
michael@0 309 /* clean up tags */
michael@0 310 for (i = 0; i < MAX_TAG_COUNT; i++) {
michael@0 311 for (n = 0; n < MAX_CONV_COUNT; n++) {
michael@0 312 if (tags[i].aliasList[n].aliases!=NULL) {
michael@0 313 uprv_free(tags[i].aliasList[n].aliases);
michael@0 314 }
michael@0 315 }
michael@0 316 }
michael@0 317
michael@0 318 return 0;
michael@0 319 }
michael@0 320
michael@0 321 static void
michael@0 322 parseFile(FileStream *in) {
michael@0 323 char line[MAX_LINE_SIZE];
michael@0 324 char lastLine[MAX_LINE_SIZE];
michael@0 325 int32_t lineSize = 0;
michael@0 326 int32_t lastLineSize = 0;
michael@0 327 UBool validParse = TRUE;
michael@0 328
michael@0 329 lineNum = 0;
michael@0 330
michael@0 331 /* Add the empty tag, which is for untagged aliases */
michael@0 332 getTagNumber("", 0);
michael@0 333 getTagNumber(ALL_TAG_STR, 3);
michael@0 334 allocString(&stringBlock, "", 0);
michael@0 335
michael@0 336 /* read the list of aliases */
michael@0 337 while (validParse) {
michael@0 338 validParse = FALSE;
michael@0 339
michael@0 340 /* Read non-empty lines that don't start with a space character. */
michael@0 341 while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
michael@0 342 lastLineSize = chomp(lastLine);
michael@0 343 if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) {
michael@0 344 uprv_strcpy(line + lineSize, lastLine);
michael@0 345 lineSize += lastLineSize;
michael@0 346 } else if (lineSize > 0) {
michael@0 347 validParse = TRUE;
michael@0 348 break;
michael@0 349 }
michael@0 350 lineNum++;
michael@0 351 }
michael@0 352
michael@0 353 if (validParse || lineSize > 0) {
michael@0 354 if (isspace((int)*line)) {
michael@0 355 fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1);
michael@0 356 exit(U_PARSE_ERROR);
michael@0 357 } else if (line[0] == '{') {
michael@0 358 if (!standardTagsUsed && line[lineSize - 1] != '}') {
michael@0 359 fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum);
michael@0 360 exit(U_PARSE_ERROR);
michael@0 361 }
michael@0 362 addOfficialTaggedStandards(line, lineSize);
michael@0 363 standardTagsUsed = TRUE;
michael@0 364 } else {
michael@0 365 if (standardTagsUsed) {
michael@0 366 parseLine(line);
michael@0 367 }
michael@0 368 else {
michael@0 369 fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum);
michael@0 370 exit(U_PARSE_ERROR);
michael@0 371 }
michael@0 372 }
michael@0 373 /* Was the last line consumed */
michael@0 374 if (lastLineSize > 0) {
michael@0 375 uprv_strcpy(line, lastLine);
michael@0 376 lineSize = lastLineSize;
michael@0 377 }
michael@0 378 else {
michael@0 379 lineSize = 0;
michael@0 380 }
michael@0 381 }
michael@0 382 lineNum++;
michael@0 383 }
michael@0 384 }
michael@0 385
michael@0 386 /* This works almost like the Perl chomp.
michael@0 387 It removes the newlines, comments and trailing whitespace (not preceding whitespace).
michael@0 388 */
michael@0 389 static int32_t
michael@0 390 chomp(char *line) {
michael@0 391 char *s = line;
michael@0 392 char *lastNonSpace = line;
michael@0 393 while(*s!=0) {
michael@0 394 /* truncate at a newline or a comment */
michael@0 395 if(*s == '\r' || *s == '\n' || *s == '#') {
michael@0 396 *s = 0;
michael@0 397 break;
michael@0 398 }
michael@0 399 if (!isspace((int)*s)) {
michael@0 400 lastNonSpace = s;
michael@0 401 }
michael@0 402 ++s;
michael@0 403 }
michael@0 404 if (lastNonSpace++ > line) {
michael@0 405 *lastNonSpace = 0;
michael@0 406 s = lastNonSpace;
michael@0 407 }
michael@0 408 return (int32_t)(s - line);
michael@0 409 }
michael@0 410
michael@0 411 static void
michael@0 412 parseLine(const char *line) {
michael@0 413 uint16_t pos=0, start, limit, length, cnv;
michael@0 414 char *converter, *alias;
michael@0 415
michael@0 416 /* skip leading white space */
michael@0 417 /* There is no whitespace at the beginning anymore */
michael@0 418 /* while(line[pos]!=0 && isspace(line[pos])) {
michael@0 419 ++pos;
michael@0 420 }
michael@0 421 */
michael@0 422
michael@0 423 /* is there nothing on this line? */
michael@0 424 if(line[pos]==0) {
michael@0 425 return;
michael@0 426 }
michael@0 427
michael@0 428 /* get the converter name */
michael@0 429 start=pos;
michael@0 430 while(line[pos]!=0 && !isspace((int)line[pos])) {
michael@0 431 ++pos;
michael@0 432 }
michael@0 433 limit=pos;
michael@0 434
michael@0 435 /* store the converter name */
michael@0 436 length=(uint16_t)(limit-start);
michael@0 437 converter=allocString(&stringBlock, line+start, length);
michael@0 438
michael@0 439 /* add the converter to the converter table */
michael@0 440 cnv=addConverter(converter);
michael@0 441
michael@0 442 /* The name itself may be tagged, so let's added it to the aliases list properly */
michael@0 443 pos = start;
michael@0 444
michael@0 445 /* get all the real aliases */
michael@0 446 for(;;) {
michael@0 447
michael@0 448 /* skip white space */
michael@0 449 while(line[pos]!=0 && isspace((int)line[pos])) {
michael@0 450 ++pos;
michael@0 451 }
michael@0 452
michael@0 453 /* is there no more alias name on this line? */
michael@0 454 if(line[pos]==0) {
michael@0 455 break;
michael@0 456 }
michael@0 457
michael@0 458 /* get an alias name */
michael@0 459 start=pos;
michael@0 460 while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) {
michael@0 461 ++pos;
michael@0 462 }
michael@0 463 limit=pos;
michael@0 464
michael@0 465 /* store the alias name */
michael@0 466 length=(uint16_t)(limit-start);
michael@0 467 if (start == 0) {
michael@0 468 /* add the converter as its own alias to the alias table */
michael@0 469 alias = converter;
michael@0 470 addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
michael@0 471 }
michael@0 472 else {
michael@0 473 alias=allocString(&stringBlock, line+start, length);
michael@0 474 addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
michael@0 475 }
michael@0 476 addToKnownAliases(alias);
michael@0 477
michael@0 478 /* add the alias/converter pair to the alias table */
michael@0 479 /* addAlias(alias, 0, cnv, FALSE);*/
michael@0 480
michael@0 481 /* skip whitespace */
michael@0 482 while (line[pos] && isspace((int)line[pos])) {
michael@0 483 ++pos;
michael@0 484 }
michael@0 485
michael@0 486 /* handle tags if they are present */
michael@0 487 if (line[pos] == '{') {
michael@0 488 ++pos;
michael@0 489 do {
michael@0 490 start = pos;
michael@0 491 while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) {
michael@0 492 ++pos;
michael@0 493 }
michael@0 494 limit = pos;
michael@0 495
michael@0 496 if (start != limit) {
michael@0 497 /* add the tag to the tag table */
michael@0 498 uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
michael@0 499 addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
michael@0 500 }
michael@0 501
michael@0 502 while (line[pos] && isspace((int)line[pos])) {
michael@0 503 ++pos;
michael@0 504 }
michael@0 505 } while (line[pos] && line[pos] != '}');
michael@0 506
michael@0 507 if (line[pos] == '}') {
michael@0 508 ++pos;
michael@0 509 } else {
michael@0 510 fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum);
michael@0 511 exit(U_UNMATCHED_BRACES);
michael@0 512 }
michael@0 513 } else {
michael@0 514 addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
michael@0 515 }
michael@0 516 }
michael@0 517 }
michael@0 518
michael@0 519 static uint16_t
michael@0 520 getTagNumber(const char *tag, uint16_t tagLen) {
michael@0 521 char *atag;
michael@0 522 uint16_t t;
michael@0 523 UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
michael@0 524
michael@0 525 if (tagCount >= MAX_TAG_COUNT) {
michael@0 526 fprintf(stderr, "%s:%d: too many tags\n", path, lineNum);
michael@0 527 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 528 }
michael@0 529
michael@0 530 if (preferredName) {
michael@0 531 /* puts(tag);*/
michael@0 532 tagLen--;
michael@0 533 }
michael@0 534
michael@0 535 for (t = 0; t < tagCount; ++t) {
michael@0 536 const char *currTag = GET_TAG_STR(tags[t].tag);
michael@0 537 if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
michael@0 538 return t;
michael@0 539 }
michael@0 540 }
michael@0 541
michael@0 542 /* we need to add this tag */
michael@0 543 if (tagCount >= MAX_TAG_COUNT) {
michael@0 544 fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum);
michael@0 545 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 546 }
michael@0 547
michael@0 548 /* allocate a new entry in the tag table */
michael@0 549 atag = allocString(&tagBlock, tag, tagLen);
michael@0 550
michael@0 551 if (standardTagsUsed) {
michael@0 552 fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
michael@0 553 path, lineNum, atag);
michael@0 554 exit(1);
michael@0 555 }
michael@0 556 else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
michael@0 557 fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
michael@0 558 path, lineNum, atag);
michael@0 559 }
michael@0 560
michael@0 561 /* add the tag to the tag table */
michael@0 562 tags[tagCount].tag = GET_TAG_NUM(atag);
michael@0 563 /* The aliasList should be set to 0's already */
michael@0 564
michael@0 565 return tagCount++;
michael@0 566 }
michael@0 567
michael@0 568 /*static void
michael@0 569 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
michael@0 570 tags[tag].aliases[converter] = alias;
michael@0 571 }
michael@0 572 */
michael@0 573
michael@0 574 static void
michael@0 575 addOfficialTaggedStandards(char *line, int32_t lineLen) {
michael@0 576 char *atag;
michael@0 577 char *endTagExp;
michael@0 578 char *tag;
michael@0 579 static const char WHITESPACE[] = " \t";
michael@0 580
michael@0 581 if (tagCount > UCNV_NUM_RESERVED_TAGS) {
michael@0 582 fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum);
michael@0 583 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 584 }
michael@0 585 tag = strchr(line, '{');
michael@0 586 if (tag == NULL) {
michael@0 587 /* Why were we called? */
michael@0 588 fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum);
michael@0 589 exit(U_PARSE_ERROR);
michael@0 590 }
michael@0 591 tag++;
michael@0 592 endTagExp = strchr(tag, '}');
michael@0 593 if (endTagExp == NULL) {
michael@0 594 fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum);
michael@0 595 exit(U_PARSE_ERROR);
michael@0 596 }
michael@0 597 endTagExp[0] = 0;
michael@0 598
michael@0 599 tag = strtok(tag, WHITESPACE);
michael@0 600 while (tag != NULL) {
michael@0 601 /* printf("Adding original tag \"%s\"\n", tag);*/
michael@0 602
michael@0 603 /* allocate a new entry in the tag table */
michael@0 604 atag = allocString(&tagBlock, tag, -1);
michael@0 605
michael@0 606 /* add the tag to the tag table */
michael@0 607 tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
michael@0 608
michael@0 609 /* The aliasList should already be set to 0's */
michael@0 610
michael@0 611 /* Get next tag */
michael@0 612 tag = strtok(NULL, WHITESPACE);
michael@0 613 }
michael@0 614 }
michael@0 615
michael@0 616 static uint16_t
michael@0 617 addToKnownAliases(const char *alias) {
michael@0 618 /* uint32_t idx; */
michael@0 619 /* strict matching */
michael@0 620 /* for (idx = 0; idx < knownAliasesCount; idx++) {
michael@0 621 uint16_t num = GET_ALIAS_NUM(alias);
michael@0 622 if (knownAliases[idx] != num
michael@0 623 && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
michael@0 624 {
michael@0 625 fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path,
michael@0 626 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
michael@0 627 duplicateKnownAliasesCount++;
michael@0 628 break;
michael@0 629 }
michael@0 630 else if (knownAliases[idx] != num
michael@0 631 && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
michael@0 632 {
michael@0 633 if (verbose) {
michael@0 634 fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path,
michael@0 635 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
michael@0 636 }
michael@0 637 duplicateKnownAliasesCount++;
michael@0 638 break;
michael@0 639 }
michael@0 640 }
michael@0 641 */
michael@0 642 if (knownAliasesCount >= MAX_ALIAS_COUNT) {
michael@0 643 fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n",
michael@0 644 path, lineNum);
michael@0 645 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 646 }
michael@0 647 /* TODO: We could try to unlist exact duplicates. */
michael@0 648 return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
michael@0 649 }
michael@0 650
michael@0 651 /*
michael@0 652 @param standard When standard is 0, then it's the "empty" tag.
michael@0 653 */
michael@0 654 static uint16_t
michael@0 655 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
michael@0 656 uint32_t idx, idx2;
michael@0 657 UBool startEmptyWithoutDefault = FALSE;
michael@0 658 AliasList *aliasList;
michael@0 659
michael@0 660 if(standard>=MAX_TAG_COUNT) {
michael@0 661 fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum);
michael@0 662 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 663 }
michael@0 664 if(converter>=MAX_CONV_COUNT) {
michael@0 665 fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum);
michael@0 666 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 667 }
michael@0 668 aliasList = &tags[standard].aliasList[converter];
michael@0 669
michael@0 670 if (strchr(alias, '}')) {
michael@0 671 fprintf(stderr, "%s:%d: error: unmatched } found\n", path,
michael@0 672 lineNum);
michael@0 673 }
michael@0 674
michael@0 675 if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
michael@0 676 fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path,
michael@0 677 lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
michael@0 678 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 679 }
michael@0 680
michael@0 681 /* Show this warning only once. All aliases are added to the "ALL" tag. */
michael@0 682 if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
michael@0 683 /* Normally these option values are parsed at runtime, and they can
michael@0 684 be discarded when the alias is a default converter. Options should
michael@0 685 only be on a converter and not an alias. */
michael@0 686 if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
michael@0 687 {
michael@0 688 fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
michael@0 689 lineNum, alias);
michael@0 690 }
michael@0 691 if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
michael@0 692 {
michael@0 693 fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
michael@0 694 lineNum, alias);
michael@0 695 }
michael@0 696 }
michael@0 697
michael@0 698 if (standard != ALL_TAG_NUM) {
michael@0 699 /* Check for duplicate aliases for this tag on all converters */
michael@0 700 for (idx = 0; idx < converterCount; idx++) {
michael@0 701 for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
michael@0 702 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
michael@0 703 if (aliasNum
michael@0 704 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
michael@0 705 {
michael@0 706 if (idx == converter) {
michael@0 707 /*
michael@0 708 * (alias, standard) duplicates are harmless if they map to the same converter.
michael@0 709 * Only print a warning in verbose mode, or if the alias is a precise duplicate,
michael@0 710 * not just a lenient-match duplicate.
michael@0 711 */
michael@0 712 if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
michael@0 713 fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path,
michael@0 714 lineNum, alias, GET_ALIAS_STR(aliasNum),
michael@0 715 GET_TAG_STR(tags[standard].tag),
michael@0 716 GET_ALIAS_STR(converters[converter].converter));
michael@0 717 }
michael@0 718 } else {
michael@0 719 fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path,
michael@0 720 lineNum, alias, GET_ALIAS_STR(aliasNum),
michael@0 721 GET_TAG_STR(tags[standard].tag),
michael@0 722 GET_ALIAS_STR(converters[converter].converter),
michael@0 723 GET_ALIAS_STR(converters[idx].converter));
michael@0 724 }
michael@0 725 break;
michael@0 726 }
michael@0 727 }
michael@0 728 }
michael@0 729
michael@0 730 /* Check for duplicate default aliases for this converter on all tags */
michael@0 731 /* It's okay to have multiple standards prefer the same name */
michael@0 732 /* if (verbose && !dupFound) {
michael@0 733 for (idx = 0; idx < tagCount; idx++) {
michael@0 734 if (tags[idx].aliasList[converter].aliases) {
michael@0 735 uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
michael@0 736 if (aliasNum
michael@0 737 && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
michael@0 738 {
michael@0 739 fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path,
michael@0 740 lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
michael@0 741 break;
michael@0 742 }
michael@0 743 }
michael@0 744 }
michael@0 745 }*/
michael@0 746 }
michael@0 747
michael@0 748 if (aliasList->aliasCount <= 0) {
michael@0 749 aliasList->aliasCount++;
michael@0 750 startEmptyWithoutDefault = TRUE;
michael@0 751 }
michael@0 752 aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
michael@0 753 if (startEmptyWithoutDefault) {
michael@0 754 aliasList->aliases[0] = 0;
michael@0 755 }
michael@0 756 if (defaultName) {
michael@0 757 if (aliasList->aliases[0] != 0) {
michael@0 758 fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path,
michael@0 759 lineNum,
michael@0 760 alias,
michael@0 761 GET_ALIAS_STR(aliasList->aliases[0]),
michael@0 762 GET_TAG_STR(tags[standard].tag),
michael@0 763 GET_ALIAS_STR(converters[converter].converter));
michael@0 764 exit(U_PARSE_ERROR);
michael@0 765 }
michael@0 766 aliasList->aliases[0] = GET_ALIAS_NUM(alias);
michael@0 767 } else {
michael@0 768 aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
michael@0 769 }
michael@0 770 /* aliasList->converter = converter;*/
michael@0 771
michael@0 772 converters[converter].totalAliasCount++; /* One more to the column */
michael@0 773 tags[standard].totalAliasCount++; /* One more to the row */
michael@0 774
michael@0 775 return aliasList->aliasCount;
michael@0 776 }
michael@0 777
michael@0 778 static uint16_t
michael@0 779 addConverter(const char *converter) {
michael@0 780 uint32_t idx;
michael@0 781 if(converterCount>=MAX_CONV_COUNT) {
michael@0 782 fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum);
michael@0 783 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 784 }
michael@0 785
michael@0 786 for (idx = 0; idx < converterCount; idx++) {
michael@0 787 if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
michael@0 788 fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter);
michael@0 789 exit(U_PARSE_ERROR);
michael@0 790 break;
michael@0 791 }
michael@0 792 }
michael@0 793
michael@0 794 converters[converterCount].converter = GET_ALIAS_NUM(converter);
michael@0 795 converters[converterCount].totalAliasCount = 0;
michael@0 796
michael@0 797 return converterCount++;
michael@0 798 }
michael@0 799
michael@0 800 /* resolve this alias based on the prioritization of the standard tags. */
michael@0 801 static void
michael@0 802 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
michael@0 803 uint16_t idx, idx2, idx3;
michael@0 804
michael@0 805 for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
michael@0 806 for (idx2 = 0; idx2 < converterCount; idx2++) {
michael@0 807 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
michael@0 808 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
michael@0 809 if (aliasNum == alias) {
michael@0 810 *tagNum = idx;
michael@0 811 *converterNum = idx2;
michael@0 812 return;
michael@0 813 }
michael@0 814 }
michael@0 815 }
michael@0 816 }
michael@0 817 /* Do the leftovers last, just in case */
michael@0 818 /* There is no need to do the ALL tag */
michael@0 819 idx = 0;
michael@0 820 for (idx2 = 0; idx2 < converterCount; idx2++) {
michael@0 821 for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
michael@0 822 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
michael@0 823 if (aliasNum == alias) {
michael@0 824 *tagNum = idx;
michael@0 825 *converterNum = idx2;
michael@0 826 return;
michael@0 827 }
michael@0 828 }
michael@0 829 }
michael@0 830 *tagNum = UINT16_MAX;
michael@0 831 *converterNum = UINT16_MAX;
michael@0 832 fprintf(stderr, "%s: warning: alias %s not found\n",
michael@0 833 path,
michael@0 834 GET_ALIAS_STR(alias));
michael@0 835 return;
michael@0 836 }
michael@0 837
michael@0 838 /* The knownAliases should be sorted before calling this function */
michael@0 839 static uint32_t
michael@0 840 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
michael@0 841 uint32_t uniqueAliasIdx = 0;
michael@0 842 uint32_t idx;
michael@0 843 uint16_t currTagNum, oldTagNum;
michael@0 844 uint16_t currConvNum, oldConvNum;
michael@0 845 const char *lastName;
michael@0 846
michael@0 847 resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
michael@0 848 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
michael@0 849 oldConvNum = currConvNum;
michael@0 850 uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
michael@0 851 uniqueAliasIdx++;
michael@0 852 lastName = GET_ALIAS_STR(knownAliases[0]);
michael@0 853
michael@0 854 for (idx = 1; idx < knownAliasesCount; idx++) {
michael@0 855 resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
michael@0 856 if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
michael@0 857 /* duplicate found */
michael@0 858 if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
michael@0 859 || oldTagNum == 0) {
michael@0 860 oldTagNum = currTagNum;
michael@0 861 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
michael@0 862 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
michael@0 863 if (verbose) {
michael@0 864 printf("using %s instead of %s -> %s",
michael@0 865 GET_ALIAS_STR(knownAliases[idx]),
michael@0 866 lastName,
michael@0 867 GET_ALIAS_STR(converters[currConvNum].converter));
michael@0 868 if (oldConvNum != currConvNum) {
michael@0 869 printf(" (alias conflict)");
michael@0 870 }
michael@0 871 puts("");
michael@0 872 }
michael@0 873 }
michael@0 874 else {
michael@0 875 /* else ignore it */
michael@0 876 if (verbose) {
michael@0 877 printf("folding %s into %s -> %s",
michael@0 878 GET_ALIAS_STR(knownAliases[idx]),
michael@0 879 lastName,
michael@0 880 GET_ALIAS_STR(converters[oldConvNum].converter));
michael@0 881 if (oldConvNum != currConvNum) {
michael@0 882 printf(" (alias conflict)");
michael@0 883 }
michael@0 884 puts("");
michael@0 885 }
michael@0 886 }
michael@0 887 if (oldConvNum != currConvNum) {
michael@0 888 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
michael@0 889 }
michael@0 890 }
michael@0 891 else {
michael@0 892 uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
michael@0 893 oldConvNum = currConvNum;
michael@0 894 uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
michael@0 895 uniqueAliasIdx++;
michael@0 896 lastName = GET_ALIAS_STR(knownAliases[idx]);
michael@0 897 oldTagNum = currTagNum;
michael@0 898 /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
michael@0 899 }
michael@0 900 if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
michael@0 901 uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
michael@0 902 }
michael@0 903 }
michael@0 904 return uniqueAliasIdx;
michael@0 905 }
michael@0 906
michael@0 907 static void
michael@0 908 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
michael@0 909 uint32_t aliasNum;
michael@0 910 AliasList *aliasList = &tags[tag].aliasList[converter];
michael@0 911
michael@0 912 if (aliasList->aliasCount == 0) {
michael@0 913 aliasArrLists[tag*converterCount + converter] = 0;
michael@0 914 }
michael@0 915 else {
michael@0 916 aliasLists[aliasListsSize++] = aliasList->aliasCount;
michael@0 917
michael@0 918 /* write into the array area a 1's based index. */
michael@0 919 aliasArrLists[tag*converterCount + converter] = aliasListsSize;
michael@0 920
michael@0 921 /* printf("tag %s converter %s\n",
michael@0 922 GET_TAG_STR(tags[tag].tag),
michael@0 923 GET_ALIAS_STR(converters[converter].converter));*/
michael@0 924 for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
michael@0 925 uint16_t value;
michael@0 926 /* printf(" %s\n",
michael@0 927 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
michael@0 928 if (aliasList->aliases[aliasNum]) {
michael@0 929 value = aliasList->aliases[aliasNum] + offset;
michael@0 930 } else {
michael@0 931 value = 0;
michael@0 932 if (tag != 0) { /* Only show the warning when it's not the leftover tag. */
michael@0 933 fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n",
michael@0 934 path,
michael@0 935 GET_TAG_STR(tags[tag].tag),
michael@0 936 GET_ALIAS_STR(converters[converter].converter));
michael@0 937 }
michael@0 938 }
michael@0 939 aliasLists[aliasListsSize++] = value;
michael@0 940 if (aliasListsSize >= MAX_LIST_SIZE) {
michael@0 941 fprintf(stderr, "%s: error: Too many alias lists\n", path);
michael@0 942 exit(U_BUFFER_OVERFLOW_ERROR);
michael@0 943 }
michael@0 944
michael@0 945 }
michael@0 946 }
michael@0 947 }
michael@0 948
michael@0 949 static void
michael@0 950 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
michael@0 951 int32_t currStrLen;
michael@0 952 uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
michael@0 953 while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
michael@0 954 int32_t currStrSize = currStrLen + 1;
michael@0 955 if (currStrLen > 0) {
michael@0 956 int32_t normStrLen;
michael@0 957 ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
michael@0 958 normStrLen = uprv_strlen(normalizedStrings);
michael@0 959 if (normStrLen > 0) {
michael@0 960 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
michael@0 961 }
michael@0 962 }
michael@0 963 stringBlockLength -= currStrSize;
michael@0 964 normalizedStrings += currStrSize;
michael@0 965 origStringBlock += currStrSize;
michael@0 966 }
michael@0 967 }
michael@0 968
michael@0 969 static void
michael@0 970 writeAliasTable(UNewDataMemory *out) {
michael@0 971 uint32_t i, j;
michael@0 972 uint32_t uniqueAliasesSize;
michael@0 973 uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
michael@0 974 uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
michael@0 975 uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
michael@0 976 uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
michael@0 977
michael@0 978 qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
michael@0 979 uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
michael@0 980
michael@0 981 /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
michael@0 982 aliasListsSize = 0;
michael@0 983
michael@0 984 /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
michael@0 985 for (i = 0; i < tagCount; ++i) {
michael@0 986 for (j = 0; j < converterCount; ++j) {
michael@0 987 createOneAliasList(aliasArrLists, i, j, aliasOffset);
michael@0 988 }
michael@0 989 }
michael@0 990
michael@0 991 /* Write the size of the TOC */
michael@0 992 if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
michael@0 993 udata_write32(out, 8);
michael@0 994 }
michael@0 995 else {
michael@0 996 udata_write32(out, 9);
michael@0 997 }
michael@0 998
michael@0 999 /* Write the sizes of each section */
michael@0 1000 /* All sizes are the number of uint16_t units, not bytes */
michael@0 1001 udata_write32(out, converterCount);
michael@0 1002 udata_write32(out, tagCount);
michael@0 1003 udata_write32(out, uniqueAliasesSize); /* list of aliases */
michael@0 1004 udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */
michael@0 1005 udata_write32(out, tagCount * converterCount);
michael@0 1006 udata_write32(out, aliasListsSize + 1);
michael@0 1007 udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
michael@0 1008 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
michael@0 1009 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
michael@0 1010 udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
michael@0 1011 }
michael@0 1012
michael@0 1013 /* write the table of converters */
michael@0 1014 /* Think of this as the column headers */
michael@0 1015 for(i=0; i<converterCount; ++i) {
michael@0 1016 udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
michael@0 1017 }
michael@0 1018
michael@0 1019 /* write the table of tags */
michael@0 1020 /* Think of this as the row headers */
michael@0 1021 for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
michael@0 1022 udata_write16(out, tags[i].tag);
michael@0 1023 }
michael@0 1024 /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
michael@0 1025 udata_write16(out, tags[EMPTY_TAG_NUM].tag);
michael@0 1026 udata_write16(out, tags[ALL_TAG_NUM].tag);
michael@0 1027
michael@0 1028 /* Write the unique list of aliases */
michael@0 1029 udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
michael@0 1030
michael@0 1031 /* Write the unique list of aliases */
michael@0 1032 udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
michael@0 1033
michael@0 1034 /* Write the array to the lists */
michael@0 1035 udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
michael@0 1036 /* Now write the leftover part of the array for the EMPTY and ALL lists */
michael@0 1037 udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
michael@0 1038
michael@0 1039 /* Offset the next array to make the index start at 1. */
michael@0 1040 udata_write16(out, 0xDEAD);
michael@0 1041
michael@0 1042 /* Write the lists */
michael@0 1043 udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
michael@0 1044
michael@0 1045 /* Write any options for the alias table. */
michael@0 1046 udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
michael@0 1047
michael@0 1048 /* write the tags strings */
michael@0 1049 udata_writeString(out, tagBlock.store, tagBlock.top);
michael@0 1050
michael@0 1051 /* write the aliases strings */
michael@0 1052 udata_writeString(out, stringBlock.store, stringBlock.top);
michael@0 1053
michael@0 1054 /* write the normalized aliases strings */
michael@0 1055 if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
michael@0 1056 char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
michael@0 1057 createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
michael@0 1058 createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
michael@0 1059
michael@0 1060 /* Write out the complete normalized array. */
michael@0 1061 udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
michael@0 1062 uprv_free(normalizedStrings);
michael@0 1063 }
michael@0 1064
michael@0 1065 uprv_free(uniqueAliasesToConverter);
michael@0 1066 uprv_free(uniqueAliases);
michael@0 1067 uprv_free(aliasArrLists);
michael@0 1068 }
michael@0 1069
michael@0 1070 static char *
michael@0 1071 allocString(StringBlock *block, const char *s, int32_t length) {
michael@0 1072 uint32_t top;
michael@0 1073 char *p;
michael@0 1074
michael@0 1075 if(length<0) {
michael@0 1076 length=(int32_t)uprv_strlen(s);
michael@0 1077 }
michael@0 1078
michael@0 1079 /*
michael@0 1080 * add 1 for the terminating NUL
michael@0 1081 * and round up (+1 &~1)
michael@0 1082 * to keep the addresses on a 16-bit boundary
michael@0 1083 */
michael@0 1084 top=block->top + (uint32_t)((length + 1 + 1) & ~1);
michael@0 1085
michael@0 1086 if(top >= block->max) {
michael@0 1087 fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum);
michael@0 1088 exit(U_MEMORY_ALLOCATION_ERROR);
michael@0 1089 }
michael@0 1090
michael@0 1091 /* get the pointer and copy the string */
michael@0 1092 p = block->store + block->top;
michael@0 1093 uprv_memcpy(p, s, length);
michael@0 1094 p[length] = 0; /* NUL-terminate it */
michael@0 1095 if((length & 1) == 0) {
michael@0 1096 p[length + 1] = 0; /* set the padding byte */
michael@0 1097 }
michael@0 1098
michael@0 1099 /* check for invariant characters now that we have a NUL-terminated string for easy output */
michael@0 1100 if(!uprv_isInvariantString(p, length)) {
michael@0 1101 fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p);
michael@0 1102 exit(U_INVALID_TABLE_FORMAT);
michael@0 1103 }
michael@0 1104
michael@0 1105 block->top = top;
michael@0 1106 return p;
michael@0 1107 }
michael@0 1108
michael@0 1109 static int
michael@0 1110 compareAliases(const void *alias1, const void *alias2) {
michael@0 1111 /* Names like IBM850 and ibm-850 need to be sorted together */
michael@0 1112 int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
michael@0 1113 if (!result) {
michael@0 1114 /* Sort the shortest first */
michael@0 1115 return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
michael@0 1116 }
michael@0 1117 return result;
michael@0 1118 }
michael@0 1119
michael@0 1120 /*
michael@0 1121 * Hey, Emacs, please set the following:
michael@0 1122 *
michael@0 1123 * Local Variables:
michael@0 1124 * indent-tabs-mode: nil
michael@0 1125 * End:
michael@0 1126 *
michael@0 1127 */
michael@0 1128

mercurial