Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 1999-2012, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: gencnval.c |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 1999nov05 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * This program reads convrtrs.txt and writes a memory-mappable |
michael@0 | 17 | * converter name alias table to cnvalias.dat . |
michael@0 | 18 | * |
michael@0 | 19 | * This program currently writes version 2.1 of the data format. See |
michael@0 | 20 | * ucnv_io.c for more details on the format. Note that version 2.1 |
michael@0 | 21 | * is written in such a way that a 2.0 reader will be able to use it, |
michael@0 | 22 | * and a 2.1 reader will be able to read 2.0. |
michael@0 | 23 | */ |
michael@0 | 24 | |
michael@0 | 25 | #include "unicode/utypes.h" |
michael@0 | 26 | #include "unicode/putil.h" |
michael@0 | 27 | #include "unicode/ucnv.h" /* ucnv_compareNames() */ |
michael@0 | 28 | #include "ucnv_io.h" |
michael@0 | 29 | #include "cmemory.h" |
michael@0 | 30 | #include "cstring.h" |
michael@0 | 31 | #include "uinvchar.h" |
michael@0 | 32 | #include "filestrm.h" |
michael@0 | 33 | #include "unicode/uclean.h" |
michael@0 | 34 | #include "unewdata.h" |
michael@0 | 35 | #include "uoptions.h" |
michael@0 | 36 | |
michael@0 | 37 | #include <stdio.h> |
michael@0 | 38 | #include <stdlib.h> |
michael@0 | 39 | #include <ctype.h> |
michael@0 | 40 | |
michael@0 | 41 | /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */ |
michael@0 | 42 | |
michael@0 | 43 | /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2) |
michael@0 | 44 | That is the maximum size for the string stores combined |
michael@0 | 45 | because the strings are index at 16-bit boundries by a |
michael@0 | 46 | 16-bit index, and there is only one section for the |
michael@0 | 47 | strings. |
michael@0 | 48 | */ |
michael@0 | 49 | #define STRING_STORE_SIZE 0x1FBFE /* 130046 */ |
michael@0 | 50 | #define TAG_STORE_SIZE 0x400 /* 1024 */ |
michael@0 | 51 | |
michael@0 | 52 | /* The combined tag and converter count can affect the number of lists |
michael@0 | 53 | created. The size of all lists must be less than (2^17 - 1) |
michael@0 | 54 | because the lists are indexed as a 16-bit array with a 16-bit index. |
michael@0 | 55 | */ |
michael@0 | 56 | #define MAX_TAG_COUNT 0x3F /* 63 */ |
michael@0 | 57 | #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK |
michael@0 | 58 | #define MAX_ALIAS_COUNT 0xFFFF /* 65535 */ |
michael@0 | 59 | |
michael@0 | 60 | /* The maximum number of aliases that a standard tag/converter combination can have. |
michael@0 | 61 | At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for |
michael@0 | 62 | this value. I don't recommend more than 31 for this value. |
michael@0 | 63 | */ |
michael@0 | 64 | #define MAX_TC_ALIAS_COUNT 0x1F /* 31 */ |
michael@0 | 65 | |
michael@0 | 66 | #define MAX_LINE_SIZE 0x7FFF /* 32767 */ |
michael@0 | 67 | #define MAX_LIST_SIZE 0xFFFF /* 65535 */ |
michael@0 | 68 | |
michael@0 | 69 | #define DATA_NAME "cnvalias" |
michael@0 | 70 | #define DATA_TYPE "icu" /* ICU alias table */ |
michael@0 | 71 | |
michael@0 | 72 | #define ALL_TAG_STR "ALL" |
michael@0 | 73 | #define ALL_TAG_NUM 1 |
michael@0 | 74 | #define EMPTY_TAG_NUM 0 |
michael@0 | 75 | |
michael@0 | 76 | /* UDataInfo cf. udata.h */ |
michael@0 | 77 | static const UDataInfo dataInfo={ |
michael@0 | 78 | sizeof(UDataInfo), |
michael@0 | 79 | 0, |
michael@0 | 80 | |
michael@0 | 81 | U_IS_BIG_ENDIAN, |
michael@0 | 82 | U_CHARSET_FAMILY, |
michael@0 | 83 | sizeof(UChar), |
michael@0 | 84 | 0, |
michael@0 | 85 | |
michael@0 | 86 | {0x43, 0x76, 0x41, 0x6c}, /* dataFormat="CvAl" */ |
michael@0 | 87 | {3, 0, 1, 0}, /* formatVersion */ |
michael@0 | 88 | {1, 4, 2, 0} /* dataVersion */ |
michael@0 | 89 | }; |
michael@0 | 90 | |
michael@0 | 91 | typedef struct { |
michael@0 | 92 | char *store; |
michael@0 | 93 | uint32_t top; |
michael@0 | 94 | uint32_t max; |
michael@0 | 95 | } StringBlock; |
michael@0 | 96 | |
michael@0 | 97 | static char stringStore[STRING_STORE_SIZE]; |
michael@0 | 98 | static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE }; |
michael@0 | 99 | |
michael@0 | 100 | typedef struct { |
michael@0 | 101 | uint16_t aliasCount; |
michael@0 | 102 | uint16_t *aliases; /* Index into stringStore */ |
michael@0 | 103 | } AliasList; |
michael@0 | 104 | |
michael@0 | 105 | typedef struct { |
michael@0 | 106 | uint16_t converter; /* Index into stringStore */ |
michael@0 | 107 | uint16_t totalAliasCount; /* Total aliases in this column */ |
michael@0 | 108 | } Converter; |
michael@0 | 109 | |
michael@0 | 110 | static Converter converters[MAX_CONV_COUNT]; |
michael@0 | 111 | static uint16_t converterCount=0; |
michael@0 | 112 | |
michael@0 | 113 | static char tagStore[TAG_STORE_SIZE]; |
michael@0 | 114 | static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE }; |
michael@0 | 115 | |
michael@0 | 116 | typedef struct { |
michael@0 | 117 | uint16_t tag; /* Index into tagStore */ |
michael@0 | 118 | uint16_t totalAliasCount; /* Total aliases in this row */ |
michael@0 | 119 | AliasList aliasList[MAX_CONV_COUNT]; |
michael@0 | 120 | } Tag; |
michael@0 | 121 | |
michael@0 | 122 | /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */ |
michael@0 | 123 | static Tag tags[MAX_TAG_COUNT]; |
michael@0 | 124 | static uint16_t tagCount = 0; |
michael@0 | 125 | |
michael@0 | 126 | /* Used for storing all aliases */ |
michael@0 | 127 | static uint16_t knownAliases[MAX_ALIAS_COUNT]; |
michael@0 | 128 | static uint16_t knownAliasesCount = 0; |
michael@0 | 129 | /*static uint16_t duplicateKnownAliasesCount = 0;*/ |
michael@0 | 130 | |
michael@0 | 131 | /* Used for storing the lists section that point to aliases */ |
michael@0 | 132 | static uint16_t aliasLists[MAX_LIST_SIZE]; |
michael@0 | 133 | static uint16_t aliasListsSize = 0; |
michael@0 | 134 | |
michael@0 | 135 | /* Were the standard tags declared before the aliases. */ |
michael@0 | 136 | static UBool standardTagsUsed = FALSE; |
michael@0 | 137 | static UBool verbose = FALSE; |
michael@0 | 138 | static int lineNum = 1; |
michael@0 | 139 | |
michael@0 | 140 | static UConverterAliasOptions tableOptions = { |
michael@0 | 141 | UCNV_IO_STD_NORMALIZED, |
michael@0 | 142 | 1 /* containsCnvOptionInfo */ |
michael@0 | 143 | }; |
michael@0 | 144 | |
michael@0 | 145 | |
michael@0 | 146 | /** |
michael@0 | 147 | * path to convrtrs.txt |
michael@0 | 148 | */ |
michael@0 | 149 | const char *path; |
michael@0 | 150 | |
michael@0 | 151 | /* prototypes --------------------------------------------------------------- */ |
michael@0 | 152 | |
michael@0 | 153 | static void |
michael@0 | 154 | parseLine(const char *line); |
michael@0 | 155 | |
michael@0 | 156 | static void |
michael@0 | 157 | parseFile(FileStream *in); |
michael@0 | 158 | |
michael@0 | 159 | static int32_t |
michael@0 | 160 | chomp(char *line); |
michael@0 | 161 | |
michael@0 | 162 | static void |
michael@0 | 163 | addOfficialTaggedStandards(char *line, int32_t lineLen); |
michael@0 | 164 | |
michael@0 | 165 | static uint16_t |
michael@0 | 166 | addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName); |
michael@0 | 167 | |
michael@0 | 168 | static uint16_t |
michael@0 | 169 | addConverter(const char *converter); |
michael@0 | 170 | |
michael@0 | 171 | static char * |
michael@0 | 172 | allocString(StringBlock *block, const char *s, int32_t length); |
michael@0 | 173 | |
michael@0 | 174 | static uint16_t |
michael@0 | 175 | addToKnownAliases(const char *alias); |
michael@0 | 176 | |
michael@0 | 177 | static int |
michael@0 | 178 | compareAliases(const void *alias1, const void *alias2); |
michael@0 | 179 | |
michael@0 | 180 | static uint16_t |
michael@0 | 181 | getTagNumber(const char *tag, uint16_t tagLen); |
michael@0 | 182 | |
michael@0 | 183 | /*static void |
michael@0 | 184 | addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/ |
michael@0 | 185 | |
michael@0 | 186 | static void |
michael@0 | 187 | writeAliasTable(UNewDataMemory *out); |
michael@0 | 188 | |
michael@0 | 189 | /* -------------------------------------------------------------------------- */ |
michael@0 | 190 | |
michael@0 | 191 | /* Presumes that you used allocString() */ |
michael@0 | 192 | #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1)) |
michael@0 | 193 | #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1)) |
michael@0 | 194 | |
michael@0 | 195 | /* Presumes that you used allocString() */ |
michael@0 | 196 | #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1)) |
michael@0 | 197 | #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1)) |
michael@0 | 198 | |
michael@0 | 199 | enum |
michael@0 | 200 | { |
michael@0 | 201 | HELP1, |
michael@0 | 202 | HELP2, |
michael@0 | 203 | VERBOSE, |
michael@0 | 204 | COPYRIGHT, |
michael@0 | 205 | DESTDIR, |
michael@0 | 206 | SOURCEDIR |
michael@0 | 207 | }; |
michael@0 | 208 | |
michael@0 | 209 | static UOption options[]={ |
michael@0 | 210 | UOPTION_HELP_H, |
michael@0 | 211 | UOPTION_HELP_QUESTION_MARK, |
michael@0 | 212 | UOPTION_VERBOSE, |
michael@0 | 213 | UOPTION_COPYRIGHT, |
michael@0 | 214 | UOPTION_DESTDIR, |
michael@0 | 215 | UOPTION_SOURCEDIR |
michael@0 | 216 | }; |
michael@0 | 217 | |
michael@0 | 218 | extern int |
michael@0 | 219 | main(int argc, char* argv[]) { |
michael@0 | 220 | int i, n; |
michael@0 | 221 | char pathBuf[512]; |
michael@0 | 222 | FileStream *in; |
michael@0 | 223 | UNewDataMemory *out; |
michael@0 | 224 | UErrorCode errorCode=U_ZERO_ERROR; |
michael@0 | 225 | |
michael@0 | 226 | U_MAIN_INIT_ARGS(argc, argv); |
michael@0 | 227 | |
michael@0 | 228 | /* preset then read command line options */ |
michael@0 | 229 | options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory(); |
michael@0 | 230 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); |
michael@0 | 231 | |
michael@0 | 232 | /* error handling, printing usage message */ |
michael@0 | 233 | if(argc<0) { |
michael@0 | 234 | fprintf(stderr, |
michael@0 | 235 | "error in command line argument \"%s\"\n", |
michael@0 | 236 | argv[-argc]); |
michael@0 | 237 | } |
michael@0 | 238 | if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) { |
michael@0 | 239 | fprintf(stderr, |
michael@0 | 240 | "usage: %s [-options] [convrtrs.txt]\n" |
michael@0 | 241 | "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n" |
michael@0 | 242 | "options:\n" |
michael@0 | 243 | "\t-h or -? or --help this usage text\n" |
michael@0 | 244 | "\t-v or --verbose prints out extra information about the alias table\n" |
michael@0 | 245 | "\t-c or --copyright include a copyright notice\n" |
michael@0 | 246 | "\t-d or --destdir destination directory, followed by the path\n" |
michael@0 | 247 | "\t-s or --sourcedir source directory, followed by the path\n", |
michael@0 | 248 | argv[0]); |
michael@0 | 249 | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
michael@0 | 250 | } |
michael@0 | 251 | |
michael@0 | 252 | if(options[VERBOSE].doesOccur) { |
michael@0 | 253 | verbose = TRUE; |
michael@0 | 254 | } |
michael@0 | 255 | |
michael@0 | 256 | if(argc>=2) { |
michael@0 | 257 | path=argv[1]; |
michael@0 | 258 | } else { |
michael@0 | 259 | path=options[SOURCEDIR].value; |
michael@0 | 260 | if(path!=NULL && *path!=0) { |
michael@0 | 261 | char *end; |
michael@0 | 262 | |
michael@0 | 263 | uprv_strcpy(pathBuf, path); |
michael@0 | 264 | end = uprv_strchr(pathBuf, 0); |
michael@0 | 265 | if(*(end-1)!=U_FILE_SEP_CHAR) { |
michael@0 | 266 | *(end++)=U_FILE_SEP_CHAR; |
michael@0 | 267 | } |
michael@0 | 268 | uprv_strcpy(end, "convrtrs.txt"); |
michael@0 | 269 | path=pathBuf; |
michael@0 | 270 | } else { |
michael@0 | 271 | path = "convrtrs.txt"; |
michael@0 | 272 | } |
michael@0 | 273 | } |
michael@0 | 274 | |
michael@0 | 275 | uprv_memset(stringStore, 0, sizeof(stringStore)); |
michael@0 | 276 | uprv_memset(tagStore, 0, sizeof(tagStore)); |
michael@0 | 277 | uprv_memset(converters, 0, sizeof(converters)); |
michael@0 | 278 | uprv_memset(tags, 0, sizeof(tags)); |
michael@0 | 279 | uprv_memset(aliasLists, 0, sizeof(aliasLists)); |
michael@0 | 280 | uprv_memset(knownAliases, 0, sizeof(aliasLists)); |
michael@0 | 281 | |
michael@0 | 282 | |
michael@0 | 283 | in=T_FileStream_open(path, "r"); |
michael@0 | 284 | if(in==NULL) { |
michael@0 | 285 | fprintf(stderr, "gencnval: unable to open input file %s\n", path); |
michael@0 | 286 | exit(U_FILE_ACCESS_ERROR); |
michael@0 | 287 | } |
michael@0 | 288 | parseFile(in); |
michael@0 | 289 | T_FileStream_close(in); |
michael@0 | 290 | |
michael@0 | 291 | /* create the output file */ |
michael@0 | 292 | out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo, |
michael@0 | 293 | options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode); |
michael@0 | 294 | if(U_FAILURE(errorCode)) { |
michael@0 | 295 | fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode)); |
michael@0 | 296 | exit(errorCode); |
michael@0 | 297 | } |
michael@0 | 298 | |
michael@0 | 299 | /* write the table of aliases based on a tag/converter name combination */ |
michael@0 | 300 | writeAliasTable(out); |
michael@0 | 301 | |
michael@0 | 302 | /* finish */ |
michael@0 | 303 | udata_finish(out, &errorCode); |
michael@0 | 304 | if(U_FAILURE(errorCode)) { |
michael@0 | 305 | fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode)); |
michael@0 | 306 | exit(errorCode); |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | /* clean up tags */ |
michael@0 | 310 | for (i = 0; i < MAX_TAG_COUNT; i++) { |
michael@0 | 311 | for (n = 0; n < MAX_CONV_COUNT; n++) { |
michael@0 | 312 | if (tags[i].aliasList[n].aliases!=NULL) { |
michael@0 | 313 | uprv_free(tags[i].aliasList[n].aliases); |
michael@0 | 314 | } |
michael@0 | 315 | } |
michael@0 | 316 | } |
michael@0 | 317 | |
michael@0 | 318 | return 0; |
michael@0 | 319 | } |
michael@0 | 320 | |
michael@0 | 321 | static void |
michael@0 | 322 | parseFile(FileStream *in) { |
michael@0 | 323 | char line[MAX_LINE_SIZE]; |
michael@0 | 324 | char lastLine[MAX_LINE_SIZE]; |
michael@0 | 325 | int32_t lineSize = 0; |
michael@0 | 326 | int32_t lastLineSize = 0; |
michael@0 | 327 | UBool validParse = TRUE; |
michael@0 | 328 | |
michael@0 | 329 | lineNum = 0; |
michael@0 | 330 | |
michael@0 | 331 | /* Add the empty tag, which is for untagged aliases */ |
michael@0 | 332 | getTagNumber("", 0); |
michael@0 | 333 | getTagNumber(ALL_TAG_STR, 3); |
michael@0 | 334 | allocString(&stringBlock, "", 0); |
michael@0 | 335 | |
michael@0 | 336 | /* read the list of aliases */ |
michael@0 | 337 | while (validParse) { |
michael@0 | 338 | validParse = FALSE; |
michael@0 | 339 | |
michael@0 | 340 | /* Read non-empty lines that don't start with a space character. */ |
michael@0 | 341 | while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) { |
michael@0 | 342 | lastLineSize = chomp(lastLine); |
michael@0 | 343 | if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) { |
michael@0 | 344 | uprv_strcpy(line + lineSize, lastLine); |
michael@0 | 345 | lineSize += lastLineSize; |
michael@0 | 346 | } else if (lineSize > 0) { |
michael@0 | 347 | validParse = TRUE; |
michael@0 | 348 | break; |
michael@0 | 349 | } |
michael@0 | 350 | lineNum++; |
michael@0 | 351 | } |
michael@0 | 352 | |
michael@0 | 353 | if (validParse || lineSize > 0) { |
michael@0 | 354 | if (isspace((int)*line)) { |
michael@0 | 355 | fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1); |
michael@0 | 356 | exit(U_PARSE_ERROR); |
michael@0 | 357 | } else if (line[0] == '{') { |
michael@0 | 358 | if (!standardTagsUsed && line[lineSize - 1] != '}') { |
michael@0 | 359 | fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum); |
michael@0 | 360 | exit(U_PARSE_ERROR); |
michael@0 | 361 | } |
michael@0 | 362 | addOfficialTaggedStandards(line, lineSize); |
michael@0 | 363 | standardTagsUsed = TRUE; |
michael@0 | 364 | } else { |
michael@0 | 365 | if (standardTagsUsed) { |
michael@0 | 366 | parseLine(line); |
michael@0 | 367 | } |
michael@0 | 368 | else { |
michael@0 | 369 | fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum); |
michael@0 | 370 | exit(U_PARSE_ERROR); |
michael@0 | 371 | } |
michael@0 | 372 | } |
michael@0 | 373 | /* Was the last line consumed */ |
michael@0 | 374 | if (lastLineSize > 0) { |
michael@0 | 375 | uprv_strcpy(line, lastLine); |
michael@0 | 376 | lineSize = lastLineSize; |
michael@0 | 377 | } |
michael@0 | 378 | else { |
michael@0 | 379 | lineSize = 0; |
michael@0 | 380 | } |
michael@0 | 381 | } |
michael@0 | 382 | lineNum++; |
michael@0 | 383 | } |
michael@0 | 384 | } |
michael@0 | 385 | |
michael@0 | 386 | /* This works almost like the Perl chomp. |
michael@0 | 387 | It removes the newlines, comments and trailing whitespace (not preceding whitespace). |
michael@0 | 388 | */ |
michael@0 | 389 | static int32_t |
michael@0 | 390 | chomp(char *line) { |
michael@0 | 391 | char *s = line; |
michael@0 | 392 | char *lastNonSpace = line; |
michael@0 | 393 | while(*s!=0) { |
michael@0 | 394 | /* truncate at a newline or a comment */ |
michael@0 | 395 | if(*s == '\r' || *s == '\n' || *s == '#') { |
michael@0 | 396 | *s = 0; |
michael@0 | 397 | break; |
michael@0 | 398 | } |
michael@0 | 399 | if (!isspace((int)*s)) { |
michael@0 | 400 | lastNonSpace = s; |
michael@0 | 401 | } |
michael@0 | 402 | ++s; |
michael@0 | 403 | } |
michael@0 | 404 | if (lastNonSpace++ > line) { |
michael@0 | 405 | *lastNonSpace = 0; |
michael@0 | 406 | s = lastNonSpace; |
michael@0 | 407 | } |
michael@0 | 408 | return (int32_t)(s - line); |
michael@0 | 409 | } |
michael@0 | 410 | |
michael@0 | 411 | static void |
michael@0 | 412 | parseLine(const char *line) { |
michael@0 | 413 | uint16_t pos=0, start, limit, length, cnv; |
michael@0 | 414 | char *converter, *alias; |
michael@0 | 415 | |
michael@0 | 416 | /* skip leading white space */ |
michael@0 | 417 | /* There is no whitespace at the beginning anymore */ |
michael@0 | 418 | /* while(line[pos]!=0 && isspace(line[pos])) { |
michael@0 | 419 | ++pos; |
michael@0 | 420 | } |
michael@0 | 421 | */ |
michael@0 | 422 | |
michael@0 | 423 | /* is there nothing on this line? */ |
michael@0 | 424 | if(line[pos]==0) { |
michael@0 | 425 | return; |
michael@0 | 426 | } |
michael@0 | 427 | |
michael@0 | 428 | /* get the converter name */ |
michael@0 | 429 | start=pos; |
michael@0 | 430 | while(line[pos]!=0 && !isspace((int)line[pos])) { |
michael@0 | 431 | ++pos; |
michael@0 | 432 | } |
michael@0 | 433 | limit=pos; |
michael@0 | 434 | |
michael@0 | 435 | /* store the converter name */ |
michael@0 | 436 | length=(uint16_t)(limit-start); |
michael@0 | 437 | converter=allocString(&stringBlock, line+start, length); |
michael@0 | 438 | |
michael@0 | 439 | /* add the converter to the converter table */ |
michael@0 | 440 | cnv=addConverter(converter); |
michael@0 | 441 | |
michael@0 | 442 | /* The name itself may be tagged, so let's added it to the aliases list properly */ |
michael@0 | 443 | pos = start; |
michael@0 | 444 | |
michael@0 | 445 | /* get all the real aliases */ |
michael@0 | 446 | for(;;) { |
michael@0 | 447 | |
michael@0 | 448 | /* skip white space */ |
michael@0 | 449 | while(line[pos]!=0 && isspace((int)line[pos])) { |
michael@0 | 450 | ++pos; |
michael@0 | 451 | } |
michael@0 | 452 | |
michael@0 | 453 | /* is there no more alias name on this line? */ |
michael@0 | 454 | if(line[pos]==0) { |
michael@0 | 455 | break; |
michael@0 | 456 | } |
michael@0 | 457 | |
michael@0 | 458 | /* get an alias name */ |
michael@0 | 459 | start=pos; |
michael@0 | 460 | while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) { |
michael@0 | 461 | ++pos; |
michael@0 | 462 | } |
michael@0 | 463 | limit=pos; |
michael@0 | 464 | |
michael@0 | 465 | /* store the alias name */ |
michael@0 | 466 | length=(uint16_t)(limit-start); |
michael@0 | 467 | if (start == 0) { |
michael@0 | 468 | /* add the converter as its own alias to the alias table */ |
michael@0 | 469 | alias = converter; |
michael@0 | 470 | addAlias(alias, ALL_TAG_NUM, cnv, TRUE); |
michael@0 | 471 | } |
michael@0 | 472 | else { |
michael@0 | 473 | alias=allocString(&stringBlock, line+start, length); |
michael@0 | 474 | addAlias(alias, ALL_TAG_NUM, cnv, FALSE); |
michael@0 | 475 | } |
michael@0 | 476 | addToKnownAliases(alias); |
michael@0 | 477 | |
michael@0 | 478 | /* add the alias/converter pair to the alias table */ |
michael@0 | 479 | /* addAlias(alias, 0, cnv, FALSE);*/ |
michael@0 | 480 | |
michael@0 | 481 | /* skip whitespace */ |
michael@0 | 482 | while (line[pos] && isspace((int)line[pos])) { |
michael@0 | 483 | ++pos; |
michael@0 | 484 | } |
michael@0 | 485 | |
michael@0 | 486 | /* handle tags if they are present */ |
michael@0 | 487 | if (line[pos] == '{') { |
michael@0 | 488 | ++pos; |
michael@0 | 489 | do { |
michael@0 | 490 | start = pos; |
michael@0 | 491 | while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) { |
michael@0 | 492 | ++pos; |
michael@0 | 493 | } |
michael@0 | 494 | limit = pos; |
michael@0 | 495 | |
michael@0 | 496 | if (start != limit) { |
michael@0 | 497 | /* add the tag to the tag table */ |
michael@0 | 498 | uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start)); |
michael@0 | 499 | addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*')); |
michael@0 | 500 | } |
michael@0 | 501 | |
michael@0 | 502 | while (line[pos] && isspace((int)line[pos])) { |
michael@0 | 503 | ++pos; |
michael@0 | 504 | } |
michael@0 | 505 | } while (line[pos] && line[pos] != '}'); |
michael@0 | 506 | |
michael@0 | 507 | if (line[pos] == '}') { |
michael@0 | 508 | ++pos; |
michael@0 | 509 | } else { |
michael@0 | 510 | fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum); |
michael@0 | 511 | exit(U_UNMATCHED_BRACES); |
michael@0 | 512 | } |
michael@0 | 513 | } else { |
michael@0 | 514 | addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0)); |
michael@0 | 515 | } |
michael@0 | 516 | } |
michael@0 | 517 | } |
michael@0 | 518 | |
michael@0 | 519 | static uint16_t |
michael@0 | 520 | getTagNumber(const char *tag, uint16_t tagLen) { |
michael@0 | 521 | char *atag; |
michael@0 | 522 | uint16_t t; |
michael@0 | 523 | UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE)); |
michael@0 | 524 | |
michael@0 | 525 | if (tagCount >= MAX_TAG_COUNT) { |
michael@0 | 526 | fprintf(stderr, "%s:%d: too many tags\n", path, lineNum); |
michael@0 | 527 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 528 | } |
michael@0 | 529 | |
michael@0 | 530 | if (preferredName) { |
michael@0 | 531 | /* puts(tag);*/ |
michael@0 | 532 | tagLen--; |
michael@0 | 533 | } |
michael@0 | 534 | |
michael@0 | 535 | for (t = 0; t < tagCount; ++t) { |
michael@0 | 536 | const char *currTag = GET_TAG_STR(tags[t].tag); |
michael@0 | 537 | if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) { |
michael@0 | 538 | return t; |
michael@0 | 539 | } |
michael@0 | 540 | } |
michael@0 | 541 | |
michael@0 | 542 | /* we need to add this tag */ |
michael@0 | 543 | if (tagCount >= MAX_TAG_COUNT) { |
michael@0 | 544 | fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum); |
michael@0 | 545 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 546 | } |
michael@0 | 547 | |
michael@0 | 548 | /* allocate a new entry in the tag table */ |
michael@0 | 549 | atag = allocString(&tagBlock, tag, tagLen); |
michael@0 | 550 | |
michael@0 | 551 | if (standardTagsUsed) { |
michael@0 | 552 | fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n", |
michael@0 | 553 | path, lineNum, atag); |
michael@0 | 554 | exit(1); |
michael@0 | 555 | } |
michael@0 | 556 | else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) { |
michael@0 | 557 | fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n", |
michael@0 | 558 | path, lineNum, atag); |
michael@0 | 559 | } |
michael@0 | 560 | |
michael@0 | 561 | /* add the tag to the tag table */ |
michael@0 | 562 | tags[tagCount].tag = GET_TAG_NUM(atag); |
michael@0 | 563 | /* The aliasList should be set to 0's already */ |
michael@0 | 564 | |
michael@0 | 565 | return tagCount++; |
michael@0 | 566 | } |
michael@0 | 567 | |
michael@0 | 568 | /*static void |
michael@0 | 569 | addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) { |
michael@0 | 570 | tags[tag].aliases[converter] = alias; |
michael@0 | 571 | } |
michael@0 | 572 | */ |
michael@0 | 573 | |
michael@0 | 574 | static void |
michael@0 | 575 | addOfficialTaggedStandards(char *line, int32_t lineLen) { |
michael@0 | 576 | char *atag; |
michael@0 | 577 | char *endTagExp; |
michael@0 | 578 | char *tag; |
michael@0 | 579 | static const char WHITESPACE[] = " \t"; |
michael@0 | 580 | |
michael@0 | 581 | if (tagCount > UCNV_NUM_RESERVED_TAGS) { |
michael@0 | 582 | fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum); |
michael@0 | 583 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 584 | } |
michael@0 | 585 | tag = strchr(line, '{'); |
michael@0 | 586 | if (tag == NULL) { |
michael@0 | 587 | /* Why were we called? */ |
michael@0 | 588 | fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum); |
michael@0 | 589 | exit(U_PARSE_ERROR); |
michael@0 | 590 | } |
michael@0 | 591 | tag++; |
michael@0 | 592 | endTagExp = strchr(tag, '}'); |
michael@0 | 593 | if (endTagExp == NULL) { |
michael@0 | 594 | fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum); |
michael@0 | 595 | exit(U_PARSE_ERROR); |
michael@0 | 596 | } |
michael@0 | 597 | endTagExp[0] = 0; |
michael@0 | 598 | |
michael@0 | 599 | tag = strtok(tag, WHITESPACE); |
michael@0 | 600 | while (tag != NULL) { |
michael@0 | 601 | /* printf("Adding original tag \"%s\"\n", tag);*/ |
michael@0 | 602 | |
michael@0 | 603 | /* allocate a new entry in the tag table */ |
michael@0 | 604 | atag = allocString(&tagBlock, tag, -1); |
michael@0 | 605 | |
michael@0 | 606 | /* add the tag to the tag table */ |
michael@0 | 607 | tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1); |
michael@0 | 608 | |
michael@0 | 609 | /* The aliasList should already be set to 0's */ |
michael@0 | 610 | |
michael@0 | 611 | /* Get next tag */ |
michael@0 | 612 | tag = strtok(NULL, WHITESPACE); |
michael@0 | 613 | } |
michael@0 | 614 | } |
michael@0 | 615 | |
michael@0 | 616 | static uint16_t |
michael@0 | 617 | addToKnownAliases(const char *alias) { |
michael@0 | 618 | /* uint32_t idx; */ |
michael@0 | 619 | /* strict matching */ |
michael@0 | 620 | /* for (idx = 0; idx < knownAliasesCount; idx++) { |
michael@0 | 621 | uint16_t num = GET_ALIAS_NUM(alias); |
michael@0 | 622 | if (knownAliases[idx] != num |
michael@0 | 623 | && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0) |
michael@0 | 624 | { |
michael@0 | 625 | fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path, |
michael@0 | 626 | lineNum, alias, GET_ALIAS_STR(knownAliases[idx])); |
michael@0 | 627 | duplicateKnownAliasesCount++; |
michael@0 | 628 | break; |
michael@0 | 629 | } |
michael@0 | 630 | else if (knownAliases[idx] != num |
michael@0 | 631 | && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0) |
michael@0 | 632 | { |
michael@0 | 633 | if (verbose) { |
michael@0 | 634 | fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path, |
michael@0 | 635 | lineNum, alias, GET_ALIAS_STR(knownAliases[idx])); |
michael@0 | 636 | } |
michael@0 | 637 | duplicateKnownAliasesCount++; |
michael@0 | 638 | break; |
michael@0 | 639 | } |
michael@0 | 640 | } |
michael@0 | 641 | */ |
michael@0 | 642 | if (knownAliasesCount >= MAX_ALIAS_COUNT) { |
michael@0 | 643 | fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n", |
michael@0 | 644 | path, lineNum); |
michael@0 | 645 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 646 | } |
michael@0 | 647 | /* TODO: We could try to unlist exact duplicates. */ |
michael@0 | 648 | return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias); |
michael@0 | 649 | } |
michael@0 | 650 | |
michael@0 | 651 | /* |
michael@0 | 652 | @param standard When standard is 0, then it's the "empty" tag. |
michael@0 | 653 | */ |
michael@0 | 654 | static uint16_t |
michael@0 | 655 | addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) { |
michael@0 | 656 | uint32_t idx, idx2; |
michael@0 | 657 | UBool startEmptyWithoutDefault = FALSE; |
michael@0 | 658 | AliasList *aliasList; |
michael@0 | 659 | |
michael@0 | 660 | if(standard>=MAX_TAG_COUNT) { |
michael@0 | 661 | fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum); |
michael@0 | 662 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 663 | } |
michael@0 | 664 | if(converter>=MAX_CONV_COUNT) { |
michael@0 | 665 | fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum); |
michael@0 | 666 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 667 | } |
michael@0 | 668 | aliasList = &tags[standard].aliasList[converter]; |
michael@0 | 669 | |
michael@0 | 670 | if (strchr(alias, '}')) { |
michael@0 | 671 | fprintf(stderr, "%s:%d: error: unmatched } found\n", path, |
michael@0 | 672 | lineNum); |
michael@0 | 673 | } |
michael@0 | 674 | |
michael@0 | 675 | if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) { |
michael@0 | 676 | fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path, |
michael@0 | 677 | lineNum, alias, GET_ALIAS_STR(converters[converter].converter)); |
michael@0 | 678 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 679 | } |
michael@0 | 680 | |
michael@0 | 681 | /* Show this warning only once. All aliases are added to the "ALL" tag. */ |
michael@0 | 682 | if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) { |
michael@0 | 683 | /* Normally these option values are parsed at runtime, and they can |
michael@0 | 684 | be discarded when the alias is a default converter. Options should |
michael@0 | 685 | only be on a converter and not an alias. */ |
michael@0 | 686 | if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0) |
michael@0 | 687 | { |
michael@0 | 688 | fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n", |
michael@0 | 689 | lineNum, alias); |
michael@0 | 690 | } |
michael@0 | 691 | if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0) |
michael@0 | 692 | { |
michael@0 | 693 | fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n", |
michael@0 | 694 | lineNum, alias); |
michael@0 | 695 | } |
michael@0 | 696 | } |
michael@0 | 697 | |
michael@0 | 698 | if (standard != ALL_TAG_NUM) { |
michael@0 | 699 | /* Check for duplicate aliases for this tag on all converters */ |
michael@0 | 700 | for (idx = 0; idx < converterCount; idx++) { |
michael@0 | 701 | for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) { |
michael@0 | 702 | uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2]; |
michael@0 | 703 | if (aliasNum |
michael@0 | 704 | && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0) |
michael@0 | 705 | { |
michael@0 | 706 | if (idx == converter) { |
michael@0 | 707 | /* |
michael@0 | 708 | * (alias, standard) duplicates are harmless if they map to the same converter. |
michael@0 | 709 | * Only print a warning in verbose mode, or if the alias is a precise duplicate, |
michael@0 | 710 | * not just a lenient-match duplicate. |
michael@0 | 711 | */ |
michael@0 | 712 | if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) { |
michael@0 | 713 | fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path, |
michael@0 | 714 | lineNum, alias, GET_ALIAS_STR(aliasNum), |
michael@0 | 715 | GET_TAG_STR(tags[standard].tag), |
michael@0 | 716 | GET_ALIAS_STR(converters[converter].converter)); |
michael@0 | 717 | } |
michael@0 | 718 | } else { |
michael@0 | 719 | fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path, |
michael@0 | 720 | lineNum, alias, GET_ALIAS_STR(aliasNum), |
michael@0 | 721 | GET_TAG_STR(tags[standard].tag), |
michael@0 | 722 | GET_ALIAS_STR(converters[converter].converter), |
michael@0 | 723 | GET_ALIAS_STR(converters[idx].converter)); |
michael@0 | 724 | } |
michael@0 | 725 | break; |
michael@0 | 726 | } |
michael@0 | 727 | } |
michael@0 | 728 | } |
michael@0 | 729 | |
michael@0 | 730 | /* Check for duplicate default aliases for this converter on all tags */ |
michael@0 | 731 | /* It's okay to have multiple standards prefer the same name */ |
michael@0 | 732 | /* if (verbose && !dupFound) { |
michael@0 | 733 | for (idx = 0; idx < tagCount; idx++) { |
michael@0 | 734 | if (tags[idx].aliasList[converter].aliases) { |
michael@0 | 735 | uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0]; |
michael@0 | 736 | if (aliasNum |
michael@0 | 737 | && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0) |
michael@0 | 738 | { |
michael@0 | 739 | fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path, |
michael@0 | 740 | lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag)); |
michael@0 | 741 | break; |
michael@0 | 742 | } |
michael@0 | 743 | } |
michael@0 | 744 | } |
michael@0 | 745 | }*/ |
michael@0 | 746 | } |
michael@0 | 747 | |
michael@0 | 748 | if (aliasList->aliasCount <= 0) { |
michael@0 | 749 | aliasList->aliasCount++; |
michael@0 | 750 | startEmptyWithoutDefault = TRUE; |
michael@0 | 751 | } |
michael@0 | 752 | aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0])); |
michael@0 | 753 | if (startEmptyWithoutDefault) { |
michael@0 | 754 | aliasList->aliases[0] = 0; |
michael@0 | 755 | } |
michael@0 | 756 | if (defaultName) { |
michael@0 | 757 | if (aliasList->aliases[0] != 0) { |
michael@0 | 758 | fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path, |
michael@0 | 759 | lineNum, |
michael@0 | 760 | alias, |
michael@0 | 761 | GET_ALIAS_STR(aliasList->aliases[0]), |
michael@0 | 762 | GET_TAG_STR(tags[standard].tag), |
michael@0 | 763 | GET_ALIAS_STR(converters[converter].converter)); |
michael@0 | 764 | exit(U_PARSE_ERROR); |
michael@0 | 765 | } |
michael@0 | 766 | aliasList->aliases[0] = GET_ALIAS_NUM(alias); |
michael@0 | 767 | } else { |
michael@0 | 768 | aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias); |
michael@0 | 769 | } |
michael@0 | 770 | /* aliasList->converter = converter;*/ |
michael@0 | 771 | |
michael@0 | 772 | converters[converter].totalAliasCount++; /* One more to the column */ |
michael@0 | 773 | tags[standard].totalAliasCount++; /* One more to the row */ |
michael@0 | 774 | |
michael@0 | 775 | return aliasList->aliasCount; |
michael@0 | 776 | } |
michael@0 | 777 | |
michael@0 | 778 | static uint16_t |
michael@0 | 779 | addConverter(const char *converter) { |
michael@0 | 780 | uint32_t idx; |
michael@0 | 781 | if(converterCount>=MAX_CONV_COUNT) { |
michael@0 | 782 | fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum); |
michael@0 | 783 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 784 | } |
michael@0 | 785 | |
michael@0 | 786 | for (idx = 0; idx < converterCount; idx++) { |
michael@0 | 787 | if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) { |
michael@0 | 788 | fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter); |
michael@0 | 789 | exit(U_PARSE_ERROR); |
michael@0 | 790 | break; |
michael@0 | 791 | } |
michael@0 | 792 | } |
michael@0 | 793 | |
michael@0 | 794 | converters[converterCount].converter = GET_ALIAS_NUM(converter); |
michael@0 | 795 | converters[converterCount].totalAliasCount = 0; |
michael@0 | 796 | |
michael@0 | 797 | return converterCount++; |
michael@0 | 798 | } |
michael@0 | 799 | |
michael@0 | 800 | /* resolve this alias based on the prioritization of the standard tags. */ |
michael@0 | 801 | static void |
michael@0 | 802 | resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) { |
michael@0 | 803 | uint16_t idx, idx2, idx3; |
michael@0 | 804 | |
michael@0 | 805 | for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) { |
michael@0 | 806 | for (idx2 = 0; idx2 < converterCount; idx2++) { |
michael@0 | 807 | for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) { |
michael@0 | 808 | uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3]; |
michael@0 | 809 | if (aliasNum == alias) { |
michael@0 | 810 | *tagNum = idx; |
michael@0 | 811 | *converterNum = idx2; |
michael@0 | 812 | return; |
michael@0 | 813 | } |
michael@0 | 814 | } |
michael@0 | 815 | } |
michael@0 | 816 | } |
michael@0 | 817 | /* Do the leftovers last, just in case */ |
michael@0 | 818 | /* There is no need to do the ALL tag */ |
michael@0 | 819 | idx = 0; |
michael@0 | 820 | for (idx2 = 0; idx2 < converterCount; idx2++) { |
michael@0 | 821 | for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) { |
michael@0 | 822 | uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3]; |
michael@0 | 823 | if (aliasNum == alias) { |
michael@0 | 824 | *tagNum = idx; |
michael@0 | 825 | *converterNum = idx2; |
michael@0 | 826 | return; |
michael@0 | 827 | } |
michael@0 | 828 | } |
michael@0 | 829 | } |
michael@0 | 830 | *tagNum = UINT16_MAX; |
michael@0 | 831 | *converterNum = UINT16_MAX; |
michael@0 | 832 | fprintf(stderr, "%s: warning: alias %s not found\n", |
michael@0 | 833 | path, |
michael@0 | 834 | GET_ALIAS_STR(alias)); |
michael@0 | 835 | return; |
michael@0 | 836 | } |
michael@0 | 837 | |
michael@0 | 838 | /* The knownAliases should be sorted before calling this function */ |
michael@0 | 839 | static uint32_t |
michael@0 | 840 | resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) { |
michael@0 | 841 | uint32_t uniqueAliasIdx = 0; |
michael@0 | 842 | uint32_t idx; |
michael@0 | 843 | uint16_t currTagNum, oldTagNum; |
michael@0 | 844 | uint16_t currConvNum, oldConvNum; |
michael@0 | 845 | const char *lastName; |
michael@0 | 846 | |
michael@0 | 847 | resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum); |
michael@0 | 848 | uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum; |
michael@0 | 849 | oldConvNum = currConvNum; |
michael@0 | 850 | uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset; |
michael@0 | 851 | uniqueAliasIdx++; |
michael@0 | 852 | lastName = GET_ALIAS_STR(knownAliases[0]); |
michael@0 | 853 | |
michael@0 | 854 | for (idx = 1; idx < knownAliasesCount; idx++) { |
michael@0 | 855 | resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum); |
michael@0 | 856 | if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) { |
michael@0 | 857 | /* duplicate found */ |
michael@0 | 858 | if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS) |
michael@0 | 859 | || oldTagNum == 0) { |
michael@0 | 860 | oldTagNum = currTagNum; |
michael@0 | 861 | uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum; |
michael@0 | 862 | uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset; |
michael@0 | 863 | if (verbose) { |
michael@0 | 864 | printf("using %s instead of %s -> %s", |
michael@0 | 865 | GET_ALIAS_STR(knownAliases[idx]), |
michael@0 | 866 | lastName, |
michael@0 | 867 | GET_ALIAS_STR(converters[currConvNum].converter)); |
michael@0 | 868 | if (oldConvNum != currConvNum) { |
michael@0 | 869 | printf(" (alias conflict)"); |
michael@0 | 870 | } |
michael@0 | 871 | puts(""); |
michael@0 | 872 | } |
michael@0 | 873 | } |
michael@0 | 874 | else { |
michael@0 | 875 | /* else ignore it */ |
michael@0 | 876 | if (verbose) { |
michael@0 | 877 | printf("folding %s into %s -> %s", |
michael@0 | 878 | GET_ALIAS_STR(knownAliases[idx]), |
michael@0 | 879 | lastName, |
michael@0 | 880 | GET_ALIAS_STR(converters[oldConvNum].converter)); |
michael@0 | 881 | if (oldConvNum != currConvNum) { |
michael@0 | 882 | printf(" (alias conflict)"); |
michael@0 | 883 | } |
michael@0 | 884 | puts(""); |
michael@0 | 885 | } |
michael@0 | 886 | } |
michael@0 | 887 | if (oldConvNum != currConvNum) { |
michael@0 | 888 | uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT; |
michael@0 | 889 | } |
michael@0 | 890 | } |
michael@0 | 891 | else { |
michael@0 | 892 | uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum; |
michael@0 | 893 | oldConvNum = currConvNum; |
michael@0 | 894 | uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset; |
michael@0 | 895 | uniqueAliasIdx++; |
michael@0 | 896 | lastName = GET_ALIAS_STR(knownAliases[idx]); |
michael@0 | 897 | oldTagNum = currTagNum; |
michael@0 | 898 | /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/ |
michael@0 | 899 | } |
michael@0 | 900 | if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) { |
michael@0 | 901 | uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT; |
michael@0 | 902 | } |
michael@0 | 903 | } |
michael@0 | 904 | return uniqueAliasIdx; |
michael@0 | 905 | } |
michael@0 | 906 | |
michael@0 | 907 | static void |
michael@0 | 908 | createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) { |
michael@0 | 909 | uint32_t aliasNum; |
michael@0 | 910 | AliasList *aliasList = &tags[tag].aliasList[converter]; |
michael@0 | 911 | |
michael@0 | 912 | if (aliasList->aliasCount == 0) { |
michael@0 | 913 | aliasArrLists[tag*converterCount + converter] = 0; |
michael@0 | 914 | } |
michael@0 | 915 | else { |
michael@0 | 916 | aliasLists[aliasListsSize++] = aliasList->aliasCount; |
michael@0 | 917 | |
michael@0 | 918 | /* write into the array area a 1's based index. */ |
michael@0 | 919 | aliasArrLists[tag*converterCount + converter] = aliasListsSize; |
michael@0 | 920 | |
michael@0 | 921 | /* printf("tag %s converter %s\n", |
michael@0 | 922 | GET_TAG_STR(tags[tag].tag), |
michael@0 | 923 | GET_ALIAS_STR(converters[converter].converter));*/ |
michael@0 | 924 | for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) { |
michael@0 | 925 | uint16_t value; |
michael@0 | 926 | /* printf(" %s\n", |
michael@0 | 927 | GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/ |
michael@0 | 928 | if (aliasList->aliases[aliasNum]) { |
michael@0 | 929 | value = aliasList->aliases[aliasNum] + offset; |
michael@0 | 930 | } else { |
michael@0 | 931 | value = 0; |
michael@0 | 932 | if (tag != 0) { /* Only show the warning when it's not the leftover tag. */ |
michael@0 | 933 | fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n", |
michael@0 | 934 | path, |
michael@0 | 935 | GET_TAG_STR(tags[tag].tag), |
michael@0 | 936 | GET_ALIAS_STR(converters[converter].converter)); |
michael@0 | 937 | } |
michael@0 | 938 | } |
michael@0 | 939 | aliasLists[aliasListsSize++] = value; |
michael@0 | 940 | if (aliasListsSize >= MAX_LIST_SIZE) { |
michael@0 | 941 | fprintf(stderr, "%s: error: Too many alias lists\n", path); |
michael@0 | 942 | exit(U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 943 | } |
michael@0 | 944 | |
michael@0 | 945 | } |
michael@0 | 946 | } |
michael@0 | 947 | } |
michael@0 | 948 | |
michael@0 | 949 | static void |
michael@0 | 950 | createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) { |
michael@0 | 951 | int32_t currStrLen; |
michael@0 | 952 | uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength); |
michael@0 | 953 | while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) { |
michael@0 | 954 | int32_t currStrSize = currStrLen + 1; |
michael@0 | 955 | if (currStrLen > 0) { |
michael@0 | 956 | int32_t normStrLen; |
michael@0 | 957 | ucnv_io_stripForCompare(normalizedStrings, origStringBlock); |
michael@0 | 958 | normStrLen = uprv_strlen(normalizedStrings); |
michael@0 | 959 | if (normStrLen > 0) { |
michael@0 | 960 | uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen); |
michael@0 | 961 | } |
michael@0 | 962 | } |
michael@0 | 963 | stringBlockLength -= currStrSize; |
michael@0 | 964 | normalizedStrings += currStrSize; |
michael@0 | 965 | origStringBlock += currStrSize; |
michael@0 | 966 | } |
michael@0 | 967 | } |
michael@0 | 968 | |
michael@0 | 969 | static void |
michael@0 | 970 | writeAliasTable(UNewDataMemory *out) { |
michael@0 | 971 | uint32_t i, j; |
michael@0 | 972 | uint32_t uniqueAliasesSize; |
michael@0 | 973 | uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t)); |
michael@0 | 974 | uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t)); |
michael@0 | 975 | uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t)); |
michael@0 | 976 | uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t)); |
michael@0 | 977 | |
michael@0 | 978 | qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases); |
michael@0 | 979 | uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset); |
michael@0 | 980 | |
michael@0 | 981 | /* Array index starts at 1. aliasLists[0] is the size of the lists section. */ |
michael@0 | 982 | aliasListsSize = 0; |
michael@0 | 983 | |
michael@0 | 984 | /* write the offsets of all the aliases lists in a 2D array, and create the lists. */ |
michael@0 | 985 | for (i = 0; i < tagCount; ++i) { |
michael@0 | 986 | for (j = 0; j < converterCount; ++j) { |
michael@0 | 987 | createOneAliasList(aliasArrLists, i, j, aliasOffset); |
michael@0 | 988 | } |
michael@0 | 989 | } |
michael@0 | 990 | |
michael@0 | 991 | /* Write the size of the TOC */ |
michael@0 | 992 | if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) { |
michael@0 | 993 | udata_write32(out, 8); |
michael@0 | 994 | } |
michael@0 | 995 | else { |
michael@0 | 996 | udata_write32(out, 9); |
michael@0 | 997 | } |
michael@0 | 998 | |
michael@0 | 999 | /* Write the sizes of each section */ |
michael@0 | 1000 | /* All sizes are the number of uint16_t units, not bytes */ |
michael@0 | 1001 | udata_write32(out, converterCount); |
michael@0 | 1002 | udata_write32(out, tagCount); |
michael@0 | 1003 | udata_write32(out, uniqueAliasesSize); /* list of aliases */ |
michael@0 | 1004 | udata_write32(out, uniqueAliasesSize); /* The preresolved form of mapping an untagged the alias to a converter */ |
michael@0 | 1005 | udata_write32(out, tagCount * converterCount); |
michael@0 | 1006 | udata_write32(out, aliasListsSize + 1); |
michael@0 | 1007 | udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t)); |
michael@0 | 1008 | udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t)); |
michael@0 | 1009 | if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) { |
michael@0 | 1010 | udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t)); |
michael@0 | 1011 | } |
michael@0 | 1012 | |
michael@0 | 1013 | /* write the table of converters */ |
michael@0 | 1014 | /* Think of this as the column headers */ |
michael@0 | 1015 | for(i=0; i<converterCount; ++i) { |
michael@0 | 1016 | udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset)); |
michael@0 | 1017 | } |
michael@0 | 1018 | |
michael@0 | 1019 | /* write the table of tags */ |
michael@0 | 1020 | /* Think of this as the row headers */ |
michael@0 | 1021 | for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) { |
michael@0 | 1022 | udata_write16(out, tags[i].tag); |
michael@0 | 1023 | } |
michael@0 | 1024 | /* The empty tag is considered the leftover list, and put that at the end of the priority list. */ |
michael@0 | 1025 | udata_write16(out, tags[EMPTY_TAG_NUM].tag); |
michael@0 | 1026 | udata_write16(out, tags[ALL_TAG_NUM].tag); |
michael@0 | 1027 | |
michael@0 | 1028 | /* Write the unique list of aliases */ |
michael@0 | 1029 | udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t)); |
michael@0 | 1030 | |
michael@0 | 1031 | /* Write the unique list of aliases */ |
michael@0 | 1032 | udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t)); |
michael@0 | 1033 | |
michael@0 | 1034 | /* Write the array to the lists */ |
michael@0 | 1035 | udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t))); |
michael@0 | 1036 | /* Now write the leftover part of the array for the EMPTY and ALL lists */ |
michael@0 | 1037 | udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t))); |
michael@0 | 1038 | |
michael@0 | 1039 | /* Offset the next array to make the index start at 1. */ |
michael@0 | 1040 | udata_write16(out, 0xDEAD); |
michael@0 | 1041 | |
michael@0 | 1042 | /* Write the lists */ |
michael@0 | 1043 | udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t)); |
michael@0 | 1044 | |
michael@0 | 1045 | /* Write any options for the alias table. */ |
michael@0 | 1046 | udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions)); |
michael@0 | 1047 | |
michael@0 | 1048 | /* write the tags strings */ |
michael@0 | 1049 | udata_writeString(out, tagBlock.store, tagBlock.top); |
michael@0 | 1050 | |
michael@0 | 1051 | /* write the aliases strings */ |
michael@0 | 1052 | udata_writeString(out, stringBlock.store, stringBlock.top); |
michael@0 | 1053 | |
michael@0 | 1054 | /* write the normalized aliases strings */ |
michael@0 | 1055 | if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) { |
michael@0 | 1056 | char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top); |
michael@0 | 1057 | createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top); |
michael@0 | 1058 | createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top); |
michael@0 | 1059 | |
michael@0 | 1060 | /* Write out the complete normalized array. */ |
michael@0 | 1061 | udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top); |
michael@0 | 1062 | uprv_free(normalizedStrings); |
michael@0 | 1063 | } |
michael@0 | 1064 | |
michael@0 | 1065 | uprv_free(uniqueAliasesToConverter); |
michael@0 | 1066 | uprv_free(uniqueAliases); |
michael@0 | 1067 | uprv_free(aliasArrLists); |
michael@0 | 1068 | } |
michael@0 | 1069 | |
michael@0 | 1070 | static char * |
michael@0 | 1071 | allocString(StringBlock *block, const char *s, int32_t length) { |
michael@0 | 1072 | uint32_t top; |
michael@0 | 1073 | char *p; |
michael@0 | 1074 | |
michael@0 | 1075 | if(length<0) { |
michael@0 | 1076 | length=(int32_t)uprv_strlen(s); |
michael@0 | 1077 | } |
michael@0 | 1078 | |
michael@0 | 1079 | /* |
michael@0 | 1080 | * add 1 for the terminating NUL |
michael@0 | 1081 | * and round up (+1 &~1) |
michael@0 | 1082 | * to keep the addresses on a 16-bit boundary |
michael@0 | 1083 | */ |
michael@0 | 1084 | top=block->top + (uint32_t)((length + 1 + 1) & ~1); |
michael@0 | 1085 | |
michael@0 | 1086 | if(top >= block->max) { |
michael@0 | 1087 | fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum); |
michael@0 | 1088 | exit(U_MEMORY_ALLOCATION_ERROR); |
michael@0 | 1089 | } |
michael@0 | 1090 | |
michael@0 | 1091 | /* get the pointer and copy the string */ |
michael@0 | 1092 | p = block->store + block->top; |
michael@0 | 1093 | uprv_memcpy(p, s, length); |
michael@0 | 1094 | p[length] = 0; /* NUL-terminate it */ |
michael@0 | 1095 | if((length & 1) == 0) { |
michael@0 | 1096 | p[length + 1] = 0; /* set the padding byte */ |
michael@0 | 1097 | } |
michael@0 | 1098 | |
michael@0 | 1099 | /* check for invariant characters now that we have a NUL-terminated string for easy output */ |
michael@0 | 1100 | if(!uprv_isInvariantString(p, length)) { |
michael@0 | 1101 | fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p); |
michael@0 | 1102 | exit(U_INVALID_TABLE_FORMAT); |
michael@0 | 1103 | } |
michael@0 | 1104 | |
michael@0 | 1105 | block->top = top; |
michael@0 | 1106 | return p; |
michael@0 | 1107 | } |
michael@0 | 1108 | |
michael@0 | 1109 | static int |
michael@0 | 1110 | compareAliases(const void *alias1, const void *alias2) { |
michael@0 | 1111 | /* Names like IBM850 and ibm-850 need to be sorted together */ |
michael@0 | 1112 | int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2)); |
michael@0 | 1113 | if (!result) { |
michael@0 | 1114 | /* Sort the shortest first */ |
michael@0 | 1115 | return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2)); |
michael@0 | 1116 | } |
michael@0 | 1117 | return result; |
michael@0 | 1118 | } |
michael@0 | 1119 | |
michael@0 | 1120 | /* |
michael@0 | 1121 | * Hey, Emacs, please set the following: |
michael@0 | 1122 | * |
michael@0 | 1123 | * Local Variables: |
michael@0 | 1124 | * indent-tabs-mode: nil |
michael@0 | 1125 | * End: |
michael@0 | 1126 | * |
michael@0 | 1127 | */ |
michael@0 | 1128 |