intl/icu/source/tools/genrb/parse.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 *
michael@0 9 * File parse.cpp
michael@0 10 *
michael@0 11 * Modification History:
michael@0 12 *
michael@0 13 * Date Name Description
michael@0 14 * 05/26/99 stephen Creation.
michael@0 15 * 02/25/00 weiv Overhaul to write udata
michael@0 16 * 5/10/01 Ram removed ustdio dependency
michael@0 17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
michael@0 18 *******************************************************************************
michael@0 19 */
michael@0 20
michael@0 21 #include "ucol_imp.h"
michael@0 22 #include "parse.h"
michael@0 23 #include "errmsg.h"
michael@0 24 #include "uhash.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "cstring.h"
michael@0 27 #include "uinvchar.h"
michael@0 28 #include "read.h"
michael@0 29 #include "ustr.h"
michael@0 30 #include "reslist.h"
michael@0 31 #include "rbt_pars.h"
michael@0 32 #include "genrb.h"
michael@0 33 #include "unicode/ustring.h"
michael@0 34 #include "unicode/uscript.h"
michael@0 35 #include "unicode/putil.h"
michael@0 36 #include <stdio.h>
michael@0 37
michael@0 38 /* Number of tokens to read ahead of the current stream position */
michael@0 39 #define MAX_LOOKAHEAD 3
michael@0 40
michael@0 41 #define CR 0x000D
michael@0 42 #define LF 0x000A
michael@0 43 #define SPACE 0x0020
michael@0 44 #define TAB 0x0009
michael@0 45 #define ESCAPE 0x005C
michael@0 46 #define HASH 0x0023
michael@0 47 #define QUOTE 0x0027
michael@0 48 #define ZERO 0x0030
michael@0 49 #define STARTCOMMAND 0x005B
michael@0 50 #define ENDCOMMAND 0x005D
michael@0 51 #define OPENSQBRACKET 0x005B
michael@0 52 #define CLOSESQBRACKET 0x005D
michael@0 53
michael@0 54 struct Lookahead
michael@0 55 {
michael@0 56 enum ETokenType type;
michael@0 57 struct UString value;
michael@0 58 struct UString comment;
michael@0 59 uint32_t line;
michael@0 60 };
michael@0 61
michael@0 62 /* keep in sync with token defines in read.h */
michael@0 63 const char *tokenNames[TOK_TOKEN_COUNT] =
michael@0 64 {
michael@0 65 "string", /* A string token, such as "MonthNames" */
michael@0 66 "'{'", /* An opening brace character */
michael@0 67 "'}'", /* A closing brace character */
michael@0 68 "','", /* A comma */
michael@0 69 "':'", /* A colon */
michael@0 70
michael@0 71 "<end of file>", /* End of the file has been reached successfully */
michael@0 72 "<end of line>"
michael@0 73 };
michael@0 74
michael@0 75 /* Just to store "TRUE" */
michael@0 76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
michael@0 77
michael@0 78 typedef struct {
michael@0 79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
michael@0 80 uint32_t lookaheadPosition;
michael@0 81 UCHARBUF *buffer;
michael@0 82 struct SRBRoot *bundle;
michael@0 83 const char *inputdir;
michael@0 84 uint32_t inputdirLength;
michael@0 85 const char *outputdir;
michael@0 86 uint32_t outputdirLength;
michael@0 87 UBool makeBinaryCollation;
michael@0 88 UBool omitCollationRules;
michael@0 89 } ParseState;
michael@0 90
michael@0 91 typedef struct SResource *
michael@0 92 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
michael@0 93
michael@0 94 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
michael@0 95
michael@0 96 /* The nature of the lookahead buffer:
michael@0 97 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
michael@0 98 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
michael@0 99 When getToken is called, the current pointer is moved to the next slot and the
michael@0 100 old slot is filled with the next token from the reader by calling getNextToken.
michael@0 101 The token values are stored in the slot, which means that token values don't
michael@0 102 survive a call to getToken, ie.
michael@0 103
michael@0 104 UString *value;
michael@0 105
michael@0 106 getToken(&value, NULL, status);
michael@0 107 getToken(NULL, NULL, status); bad - value is now a different string
michael@0 108 */
michael@0 109 static void
michael@0 110 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
michael@0 111 {
michael@0 112 static uint32_t initTypeStrings = 0;
michael@0 113 uint32_t i;
michael@0 114
michael@0 115 if (!initTypeStrings)
michael@0 116 {
michael@0 117 initTypeStrings = 1;
michael@0 118 }
michael@0 119
michael@0 120 state->lookaheadPosition = 0;
michael@0 121 state->buffer = buf;
michael@0 122
michael@0 123 resetLineNumber();
michael@0 124
michael@0 125 for (i = 0; i < MAX_LOOKAHEAD; i++)
michael@0 126 {
michael@0 127 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
michael@0 128 if (U_FAILURE(*status))
michael@0 129 {
michael@0 130 return;
michael@0 131 }
michael@0 132 }
michael@0 133
michael@0 134 *status = U_ZERO_ERROR;
michael@0 135 }
michael@0 136
michael@0 137 static void
michael@0 138 cleanupLookahead(ParseState* state)
michael@0 139 {
michael@0 140 uint32_t i;
michael@0 141 for (i = 0; i <= MAX_LOOKAHEAD; i++)
michael@0 142 {
michael@0 143 ustr_deinit(&state->lookahead[i].value);
michael@0 144 ustr_deinit(&state->lookahead[i].comment);
michael@0 145 }
michael@0 146
michael@0 147 }
michael@0 148
michael@0 149 static enum ETokenType
michael@0 150 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
michael@0 151 {
michael@0 152 enum ETokenType result;
michael@0 153 uint32_t i;
michael@0 154
michael@0 155 result = state->lookahead[state->lookaheadPosition].type;
michael@0 156
michael@0 157 if (tokenValue != NULL)
michael@0 158 {
michael@0 159 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
michael@0 160 }
michael@0 161
michael@0 162 if (linenumber != NULL)
michael@0 163 {
michael@0 164 *linenumber = state->lookahead[state->lookaheadPosition].line;
michael@0 165 }
michael@0 166
michael@0 167 if (comment != NULL)
michael@0 168 {
michael@0 169 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
michael@0 170 }
michael@0 171
michael@0 172 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
michael@0 173 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
michael@0 174 ustr_setlen(&state->lookahead[i].comment, 0, status);
michael@0 175 ustr_setlen(&state->lookahead[i].value, 0, status);
michael@0 176 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
michael@0 177
michael@0 178 /* printf("getToken, returning %s\n", tokenNames[result]); */
michael@0 179
michael@0 180 return result;
michael@0 181 }
michael@0 182
michael@0 183 static enum ETokenType
michael@0 184 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
michael@0 185 {
michael@0 186 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
michael@0 187
michael@0 188 if (U_FAILURE(*status))
michael@0 189 {
michael@0 190 return TOK_ERROR;
michael@0 191 }
michael@0 192
michael@0 193 if (lookaheadCount >= MAX_LOOKAHEAD)
michael@0 194 {
michael@0 195 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 196 return TOK_ERROR;
michael@0 197 }
michael@0 198
michael@0 199 if (tokenValue != NULL)
michael@0 200 {
michael@0 201 *tokenValue = &state->lookahead[i].value;
michael@0 202 }
michael@0 203
michael@0 204 if (linenumber != NULL)
michael@0 205 {
michael@0 206 *linenumber = state->lookahead[i].line;
michael@0 207 }
michael@0 208
michael@0 209 if(comment != NULL){
michael@0 210 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
michael@0 211 }
michael@0 212
michael@0 213 return state->lookahead[i].type;
michael@0 214 }
michael@0 215
michael@0 216 static void
michael@0 217 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
michael@0 218 {
michael@0 219 uint32_t line;
michael@0 220
michael@0 221 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
michael@0 222
michael@0 223 if (linenumber != NULL)
michael@0 224 {
michael@0 225 *linenumber = line;
michael@0 226 }
michael@0 227
michael@0 228 if (U_FAILURE(*status))
michael@0 229 {
michael@0 230 return;
michael@0 231 }
michael@0 232
michael@0 233 if (token != expectedToken)
michael@0 234 {
michael@0 235 *status = U_INVALID_FORMAT_ERROR;
michael@0 236 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
michael@0 237 }
michael@0 238 else
michael@0 239 {
michael@0 240 *status = U_ZERO_ERROR;
michael@0 241 }
michael@0 242 }
michael@0 243
michael@0 244 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
michael@0 245 {
michael@0 246 struct UString *tokenValue;
michael@0 247 char *result;
michael@0 248 uint32_t count;
michael@0 249
michael@0 250 expect(state, TOK_STRING, &tokenValue, comment, line, status);
michael@0 251
michael@0 252 if (U_FAILURE(*status))
michael@0 253 {
michael@0 254 return NULL;
michael@0 255 }
michael@0 256
michael@0 257 count = u_strlen(tokenValue->fChars);
michael@0 258 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
michael@0 259 *status = U_INVALID_FORMAT_ERROR;
michael@0 260 error(*line, "invariant characters required for table keys, binary data, etc.");
michael@0 261 return NULL;
michael@0 262 }
michael@0 263
michael@0 264 result = static_cast<char *>(uprv_malloc(count+1));
michael@0 265
michael@0 266 if (result == NULL)
michael@0 267 {
michael@0 268 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 269 return NULL;
michael@0 270 }
michael@0 271
michael@0 272 u_UCharsToChars(tokenValue->fChars, result, count+1);
michael@0 273 return result;
michael@0 274 }
michael@0 275
michael@0 276 static struct SResource *
michael@0 277 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
michael@0 278 {
michael@0 279 struct SResource *result = NULL;
michael@0 280 struct UString *tokenValue;
michael@0 281 FileStream *file = NULL;
michael@0 282 char filename[256] = { '\0' };
michael@0 283 char cs[128] = { '\0' };
michael@0 284 uint32_t line;
michael@0 285 UBool quoted = FALSE;
michael@0 286 UCHARBUF *ucbuf=NULL;
michael@0 287 UChar32 c = 0;
michael@0 288 const char* cp = NULL;
michael@0 289 UChar *pTarget = NULL;
michael@0 290 UChar *target = NULL;
michael@0 291 UChar *targetLimit = NULL;
michael@0 292 int32_t size = 0;
michael@0 293
michael@0 294 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
michael@0 295
michael@0 296 if(isVerbose()){
michael@0 297 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 298 }
michael@0 299
michael@0 300 if (U_FAILURE(*status))
michael@0 301 {
michael@0 302 return NULL;
michael@0 303 }
michael@0 304 /* make the filename including the directory */
michael@0 305 if (state->inputdir != NULL)
michael@0 306 {
michael@0 307 uprv_strcat(filename, state->inputdir);
michael@0 308
michael@0 309 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
michael@0 310 {
michael@0 311 uprv_strcat(filename, U_FILE_SEP_STRING);
michael@0 312 }
michael@0 313 }
michael@0 314
michael@0 315 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
michael@0 316
michael@0 317 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 318
michael@0 319 if (U_FAILURE(*status))
michael@0 320 {
michael@0 321 return NULL;
michael@0 322 }
michael@0 323 uprv_strcat(filename, cs);
michael@0 324
michael@0 325 if(state->omitCollationRules) {
michael@0 326 return res_none();
michael@0 327 }
michael@0 328
michael@0 329 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
michael@0 330
michael@0 331 if (U_FAILURE(*status)) {
michael@0 332 error(line, "An error occured while opening the input file %s\n", filename);
michael@0 333 return NULL;
michael@0 334 }
michael@0 335
michael@0 336 /* We allocate more space than actually required
michael@0 337 * since the actual size needed for storing UChars
michael@0 338 * is not known in UTF-8 byte stream
michael@0 339 */
michael@0 340 size = ucbuf_size(ucbuf) + 1;
michael@0 341 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
michael@0 342 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
michael@0 343 target = pTarget;
michael@0 344 targetLimit = pTarget+size;
michael@0 345
michael@0 346 /* read the rules into the buffer */
michael@0 347 while (target < targetLimit)
michael@0 348 {
michael@0 349 c = ucbuf_getc(ucbuf, status);
michael@0 350 if(c == QUOTE) {
michael@0 351 quoted = (UBool)!quoted;
michael@0 352 }
michael@0 353 /* weiv (06/26/2002): adding the following:
michael@0 354 * - preserving spaces in commands [...]
michael@0 355 * - # comments until the end of line
michael@0 356 */
michael@0 357 if (c == STARTCOMMAND && !quoted)
michael@0 358 {
michael@0 359 /* preserve commands
michael@0 360 * closing bracket will be handled by the
michael@0 361 * append at the end of the loop
michael@0 362 */
michael@0 363 while(c != ENDCOMMAND) {
michael@0 364 U_APPEND_CHAR32_ONLY(c, target);
michael@0 365 c = ucbuf_getc(ucbuf, status);
michael@0 366 }
michael@0 367 }
michael@0 368 else if (c == HASH && !quoted) {
michael@0 369 /* skip comments */
michael@0 370 while(c != CR && c != LF) {
michael@0 371 c = ucbuf_getc(ucbuf, status);
michael@0 372 }
michael@0 373 continue;
michael@0 374 }
michael@0 375 else if (c == ESCAPE)
michael@0 376 {
michael@0 377 c = unescape(ucbuf, status);
michael@0 378
michael@0 379 if (c == (UChar32)U_ERR)
michael@0 380 {
michael@0 381 uprv_free(pTarget);
michael@0 382 T_FileStream_close(file);
michael@0 383 return NULL;
michael@0 384 }
michael@0 385 }
michael@0 386 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
michael@0 387 {
michael@0 388 /* ignore spaces carriage returns
michael@0 389 * and line feed unless in the form \uXXXX
michael@0 390 */
michael@0 391 continue;
michael@0 392 }
michael@0 393
michael@0 394 /* Append UChar * after dissembling if c > 0xffff*/
michael@0 395 if (c != (UChar32)U_EOF)
michael@0 396 {
michael@0 397 U_APPEND_CHAR32_ONLY(c, target);
michael@0 398 }
michael@0 399 else
michael@0 400 {
michael@0 401 break;
michael@0 402 }
michael@0 403 }
michael@0 404
michael@0 405 /* terminate the string */
michael@0 406 if(target < targetLimit){
michael@0 407 *target = 0x0000;
michael@0 408 }
michael@0 409
michael@0 410 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
michael@0 411
michael@0 412
michael@0 413 ucbuf_close(ucbuf);
michael@0 414 uprv_free(pTarget);
michael@0 415 T_FileStream_close(file);
michael@0 416
michael@0 417 return result;
michael@0 418 }
michael@0 419
michael@0 420 static struct SResource *
michael@0 421 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
michael@0 422 {
michael@0 423 struct SResource *result = NULL;
michael@0 424 struct UString *tokenValue;
michael@0 425 FileStream *file = NULL;
michael@0 426 char filename[256] = { '\0' };
michael@0 427 char cs[128] = { '\0' };
michael@0 428 uint32_t line;
michael@0 429 UCHARBUF *ucbuf=NULL;
michael@0 430 const char* cp = NULL;
michael@0 431 UChar *pTarget = NULL;
michael@0 432 const UChar *pSource = NULL;
michael@0 433 int32_t size = 0;
michael@0 434
michael@0 435 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
michael@0 436
michael@0 437 if(isVerbose()){
michael@0 438 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 439 }
michael@0 440
michael@0 441 if (U_FAILURE(*status))
michael@0 442 {
michael@0 443 return NULL;
michael@0 444 }
michael@0 445 /* make the filename including the directory */
michael@0 446 if (state->inputdir != NULL)
michael@0 447 {
michael@0 448 uprv_strcat(filename, state->inputdir);
michael@0 449
michael@0 450 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
michael@0 451 {
michael@0 452 uprv_strcat(filename, U_FILE_SEP_STRING);
michael@0 453 }
michael@0 454 }
michael@0 455
michael@0 456 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
michael@0 457
michael@0 458 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 459
michael@0 460 if (U_FAILURE(*status))
michael@0 461 {
michael@0 462 return NULL;
michael@0 463 }
michael@0 464 uprv_strcat(filename, cs);
michael@0 465
michael@0 466
michael@0 467 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
michael@0 468
michael@0 469 if (U_FAILURE(*status)) {
michael@0 470 error(line, "An error occured while opening the input file %s\n", filename);
michael@0 471 return NULL;
michael@0 472 }
michael@0 473
michael@0 474 /* We allocate more space than actually required
michael@0 475 * since the actual size needed for storing UChars
michael@0 476 * is not known in UTF-8 byte stream
michael@0 477 */
michael@0 478 pSource = ucbuf_getBuffer(ucbuf, &size, status);
michael@0 479 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
michael@0 480 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
michael@0 481
michael@0 482 #if !UCONFIG_NO_TRANSLITERATION
michael@0 483 size = utrans_stripRules(pSource, size, pTarget, status);
michael@0 484 #else
michael@0 485 size = 0;
michael@0 486 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
michael@0 487 #endif
michael@0 488 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
michael@0 489
michael@0 490 ucbuf_close(ucbuf);
michael@0 491 uprv_free(pTarget);
michael@0 492 T_FileStream_close(file);
michael@0 493
michael@0 494 return result;
michael@0 495 }
michael@0 496 static struct SResource* dependencyArray = NULL;
michael@0 497
michael@0 498 static struct SResource *
michael@0 499 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
michael@0 500 {
michael@0 501 struct SResource *result = NULL;
michael@0 502 struct SResource *elem = NULL;
michael@0 503 struct UString *tokenValue;
michael@0 504 uint32_t line;
michael@0 505 char filename[256] = { '\0' };
michael@0 506 char cs[128] = { '\0' };
michael@0 507
michael@0 508 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
michael@0 509
michael@0 510 if(isVerbose()){
michael@0 511 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 512 }
michael@0 513
michael@0 514 if (U_FAILURE(*status))
michael@0 515 {
michael@0 516 return NULL;
michael@0 517 }
michael@0 518 /* make the filename including the directory */
michael@0 519 if (state->outputdir != NULL)
michael@0 520 {
michael@0 521 uprv_strcat(filename, state->outputdir);
michael@0 522
michael@0 523 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
michael@0 524 {
michael@0 525 uprv_strcat(filename, U_FILE_SEP_STRING);
michael@0 526 }
michael@0 527 }
michael@0 528
michael@0 529 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
michael@0 530
michael@0 531 if (U_FAILURE(*status))
michael@0 532 {
michael@0 533 return NULL;
michael@0 534 }
michael@0 535 uprv_strcat(filename, cs);
michael@0 536 if(!T_FileStream_file_exists(filename)){
michael@0 537 if(isStrict()){
michael@0 538 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
michael@0 539 }else{
michael@0 540 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
michael@0 541 }
michael@0 542 }
michael@0 543 if(dependencyArray==NULL){
michael@0 544 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
michael@0 545 }
michael@0 546 if(tag!=NULL){
michael@0 547 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
michael@0 548 }
michael@0 549 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
michael@0 550
michael@0 551 array_add(dependencyArray, elem, status);
michael@0 552
michael@0 553 if (U_FAILURE(*status))
michael@0 554 {
michael@0 555 return NULL;
michael@0 556 }
michael@0 557 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 558 return result;
michael@0 559 }
michael@0 560 static struct SResource *
michael@0 561 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
michael@0 562 {
michael@0 563 struct UString *tokenValue;
michael@0 564 struct SResource *result = NULL;
michael@0 565
michael@0 566 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
michael@0 567 {
michael@0 568 return parseUCARules(tag, startline, status);
michael@0 569 }*/
michael@0 570 if(isVerbose()){
michael@0 571 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 572 }
michael@0 573 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
michael@0 574
michael@0 575 if (U_SUCCESS(*status))
michael@0 576 {
michael@0 577 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
michael@0 578 doesn't survive expect either) */
michael@0 579
michael@0 580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
michael@0 581 if(U_SUCCESS(*status) && result) {
michael@0 582 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 583
michael@0 584 if (U_FAILURE(*status))
michael@0 585 {
michael@0 586 res_close(result);
michael@0 587 return NULL;
michael@0 588 }
michael@0 589 }
michael@0 590 }
michael@0 591
michael@0 592 return result;
michael@0 593 }
michael@0 594
michael@0 595 static struct SResource *
michael@0 596 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 597 {
michael@0 598 struct UString *tokenValue;
michael@0 599 struct SResource *result = NULL;
michael@0 600
michael@0 601 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
michael@0 602
michael@0 603 if(isVerbose()){
michael@0 604 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 605 }
michael@0 606
michael@0 607 if (U_SUCCESS(*status))
michael@0 608 {
michael@0 609 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
michael@0 610 doesn't survive expect either) */
michael@0 611
michael@0 612 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
michael@0 613
michael@0 614 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 615
michael@0 616 if (U_FAILURE(*status))
michael@0 617 {
michael@0 618 res_close(result);
michael@0 619 return NULL;
michael@0 620 }
michael@0 621 }
michael@0 622
michael@0 623 return result;
michael@0 624 }
michael@0 625
michael@0 626 typedef struct{
michael@0 627 const char* inputDir;
michael@0 628 const char* outputDir;
michael@0 629 } GenrbData;
michael@0 630
michael@0 631 static struct SResource* resLookup(struct SResource* res, const char* key){
michael@0 632 struct SResource *current = NULL;
michael@0 633 struct SResTable *list;
michael@0 634 if (res == res_none()) {
michael@0 635 return NULL;
michael@0 636 }
michael@0 637
michael@0 638 list = &(res->u.fTable);
michael@0 639
michael@0 640 current = list->fFirst;
michael@0 641 while (current != NULL) {
michael@0 642 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
michael@0 643 return current;
michael@0 644 }
michael@0 645 current = current->fNext;
michael@0 646 }
michael@0 647 return NULL;
michael@0 648 }
michael@0 649
michael@0 650 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
michael@0 651 struct SRBRoot *data = NULL;
michael@0 652 UCHARBUF *ucbuf = NULL;
michael@0 653 GenrbData* genrbdata = (GenrbData*) context;
michael@0 654 int localeLength = strlen(locale);
michael@0 655 char* filename = (char*)uprv_malloc(localeLength+5);
michael@0 656 char *inputDirBuf = NULL;
michael@0 657 char *openFileName = NULL;
michael@0 658 const char* cp = "";
michael@0 659 UChar* urules = NULL;
michael@0 660 int32_t urulesLength = 0;
michael@0 661 int32_t i = 0;
michael@0 662 int32_t dirlen = 0;
michael@0 663 int32_t filelen = 0;
michael@0 664 struct SResource* root;
michael@0 665 struct SResource* collations;
michael@0 666 struct SResource* collation;
michael@0 667 struct SResource* sequence;
michael@0 668
michael@0 669 memcpy(filename, locale, localeLength);
michael@0 670 for(i = 0; i < localeLength; i++){
michael@0 671 if(filename[i] == '-'){
michael@0 672 filename[i] = '_';
michael@0 673 }
michael@0 674 }
michael@0 675 filename[localeLength] = '.';
michael@0 676 filename[localeLength+1] = 't';
michael@0 677 filename[localeLength+2] = 'x';
michael@0 678 filename[localeLength+3] = 't';
michael@0 679 filename[localeLength+4] = 0;
michael@0 680
michael@0 681
michael@0 682 if (status==NULL || U_FAILURE(*status)) {
michael@0 683 return NULL;
michael@0 684 }
michael@0 685 if(filename==NULL){
michael@0 686 *status=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 687 return NULL;
michael@0 688 }else{
michael@0 689 filelen = (int32_t)uprv_strlen(filename);
michael@0 690 }
michael@0 691 if(genrbdata->inputDir == NULL) {
michael@0 692 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
michael@0 693 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
michael@0 694 openFileName[0] = '\0';
michael@0 695 if (filenameBegin != NULL) {
michael@0 696 /*
michael@0 697 * When a filename ../../../data/root.txt is specified,
michael@0 698 * we presume that the input directory is ../../../data
michael@0 699 * This is very important when the resource file includes
michael@0 700 * another file, like UCARules.txt or thaidict.brk.
michael@0 701 */
michael@0 702 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
michael@0 703 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
michael@0 704
michael@0 705 /* test for NULL */
michael@0 706 if(inputDirBuf == NULL) {
michael@0 707 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 708 goto finish;
michael@0 709 }
michael@0 710
michael@0 711 inputDirBuf[filenameSize - 1] = 0;
michael@0 712 genrbdata->inputDir = inputDirBuf;
michael@0 713 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
michael@0 714 }
michael@0 715 }else{
michael@0 716 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
michael@0 717
michael@0 718 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
michael@0 719 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
michael@0 720
michael@0 721 /* test for NULL */
michael@0 722 if(openFileName == NULL) {
michael@0 723 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 724 goto finish;
michael@0 725 }
michael@0 726
michael@0 727 openFileName[0] = '\0';
michael@0 728 /*
michael@0 729 * append the input dir to openFileName if the first char in
michael@0 730 * filename is not file seperation char and the last char input directory is not '.'.
michael@0 731 * This is to support :
michael@0 732 * genrb -s. /home/icu/data
michael@0 733 * genrb -s. icu/data
michael@0 734 * The user cannot mix notations like
michael@0 735 * genrb -s. /icu/data --- the absolute path specified. -s redundant
michael@0 736 * user should use
michael@0 737 * genrb -s. icu/data --- start from CWD and look in icu/data dir
michael@0 738 */
michael@0 739 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
michael@0 740 uprv_strcpy(openFileName, genrbdata->inputDir);
michael@0 741 openFileName[dirlen] = U_FILE_SEP_CHAR;
michael@0 742 }
michael@0 743 openFileName[dirlen + 1] = '\0';
michael@0 744 } else {
michael@0 745 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
michael@0 746
michael@0 747 /* test for NULL */
michael@0 748 if(openFileName == NULL) {
michael@0 749 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 750 goto finish;
michael@0 751 }
michael@0 752
michael@0 753 uprv_strcpy(openFileName, genrbdata->inputDir);
michael@0 754
michael@0 755 }
michael@0 756 }
michael@0 757 uprv_strcat(openFileName, filename);
michael@0 758 /* printf("%s\n", openFileName); */
michael@0 759 *status = U_ZERO_ERROR;
michael@0 760 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
michael@0 761
michael@0 762 if(*status == U_FILE_ACCESS_ERROR) {
michael@0 763
michael@0 764 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
michael@0 765 goto finish;
michael@0 766 }
michael@0 767 if (ucbuf == NULL || U_FAILURE(*status)) {
michael@0 768 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
michael@0 769 goto finish;
michael@0 770 }
michael@0 771
michael@0 772 /* Parse the data into an SRBRoot */
michael@0 773 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status);
michael@0 774
michael@0 775 root = data->fRoot;
michael@0 776 collations = resLookup(root, "collations");
michael@0 777 if (collations != NULL) {
michael@0 778 collation = resLookup(collations, type);
michael@0 779 if (collation != NULL) {
michael@0 780 sequence = resLookup(collation, "Sequence");
michael@0 781 if (sequence != NULL) {
michael@0 782 urules = sequence->u.fString.fChars;
michael@0 783 urulesLength = sequence->u.fString.fLength;
michael@0 784 *pLength = urulesLength;
michael@0 785 }
michael@0 786 }
michael@0 787 }
michael@0 788
michael@0 789 finish:
michael@0 790 if (inputDirBuf != NULL) {
michael@0 791 uprv_free(inputDirBuf);
michael@0 792 }
michael@0 793
michael@0 794 if (openFileName != NULL) {
michael@0 795 uprv_free(openFileName);
michael@0 796 }
michael@0 797
michael@0 798 if(ucbuf) {
michael@0 799 ucbuf_close(ucbuf);
michael@0 800 }
michael@0 801
michael@0 802 return urules;
michael@0 803 }
michael@0 804
michael@0 805 // Quick-and-dirty escaping function.
michael@0 806 // Assumes that we are on an ASCII-based platform.
michael@0 807 static void
michael@0 808 escape(const UChar *s, char *buffer) {
michael@0 809 int32_t length = u_strlen(s);
michael@0 810 int32_t i = 0;
michael@0 811 for (;;) {
michael@0 812 UChar32 c;
michael@0 813 U16_NEXT(s, i, length, c);
michael@0 814 if (c == 0) {
michael@0 815 *buffer = 0;
michael@0 816 return;
michael@0 817 } else if (0x20 <= c && c <= 0x7e) {
michael@0 818 // printable ASCII
michael@0 819 *buffer++ = (char)c; // assumes ASCII-based platform
michael@0 820 } else {
michael@0 821 buffer += sprintf(buffer, "\\u%04X", (int)c);
michael@0 822 }
michael@0 823 }
michael@0 824 }
michael@0 825
michael@0 826 static struct SResource *
michael@0 827 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
michael@0 828 {
michael@0 829 struct SResource *member = NULL;
michael@0 830 struct UString *tokenValue;
michael@0 831 struct UString comment;
michael@0 832 enum ETokenType token;
michael@0 833 char subtag[1024];
michael@0 834 UVersionInfo version;
michael@0 835 uint32_t line;
michael@0 836 GenrbData genrbdata;
michael@0 837 /* '{' . (name resource)* '}' */
michael@0 838 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
michael@0 839
michael@0 840 for (;;)
michael@0 841 {
michael@0 842 ustr_init(&comment);
michael@0 843 token = getToken(state, &tokenValue, &comment, &line, status);
michael@0 844
michael@0 845 if (token == TOK_CLOSE_BRACE)
michael@0 846 {
michael@0 847 return result;
michael@0 848 }
michael@0 849
michael@0 850 if (token != TOK_STRING)
michael@0 851 {
michael@0 852 res_close(result);
michael@0 853 *status = U_INVALID_FORMAT_ERROR;
michael@0 854
michael@0 855 if (token == TOK_EOF)
michael@0 856 {
michael@0 857 error(startline, "unterminated table");
michael@0 858 }
michael@0 859 else
michael@0 860 {
michael@0 861 error(line, "Unexpected token %s", tokenNames[token]);
michael@0 862 }
michael@0 863
michael@0 864 return NULL;
michael@0 865 }
michael@0 866
michael@0 867 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
michael@0 868
michael@0 869 if (U_FAILURE(*status))
michael@0 870 {
michael@0 871 res_close(result);
michael@0 872 return NULL;
michael@0 873 }
michael@0 874
michael@0 875 member = parseResource(state, subtag, NULL, status);
michael@0 876
michael@0 877 if (U_FAILURE(*status))
michael@0 878 {
michael@0 879 res_close(result);
michael@0 880 return NULL;
michael@0 881 }
michael@0 882
michael@0 883 if (uprv_strcmp(subtag, "Version") == 0)
michael@0 884 {
michael@0 885 char ver[40];
michael@0 886 int32_t length = member->u.fString.fLength;
michael@0 887
michael@0 888 if (length >= (int32_t) sizeof(ver))
michael@0 889 {
michael@0 890 length = (int32_t) sizeof(ver) - 1;
michael@0 891 }
michael@0 892
michael@0 893 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
michael@0 894 u_versionFromString(version, ver);
michael@0 895
michael@0 896 table_add(result, member, line, status);
michael@0 897
michael@0 898 }
michael@0 899 else if (uprv_strcmp(subtag, "Override") == 0)
michael@0 900 {
michael@0 901 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
michael@0 902 table_add(result, member, line, status);
michael@0 903
michael@0 904 }
michael@0 905 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
michael@0 906 {
michael@0 907 /* discard duplicate %%CollationBin if any*/
michael@0 908 }
michael@0 909 else if (uprv_strcmp(subtag, "Sequence") == 0)
michael@0 910 {
michael@0 911 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
michael@0 912 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
michael@0 913 #else
michael@0 914 if(state->makeBinaryCollation) {
michael@0 915
michael@0 916 /* do the collation elements */
michael@0 917 int32_t len = 0;
michael@0 918 uint8_t *data = NULL;
michael@0 919 UCollator *coll = NULL;
michael@0 920 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
michael@0 921 int32_t reorderCodeCount;
michael@0 922 int32_t reorderCodeIndex;
michael@0 923 UParseError parseError;
michael@0 924
michael@0 925 genrbdata.inputDir = state->inputdir;
michael@0 926 genrbdata.outputDir = state->outputdir;
michael@0 927
michael@0 928 UErrorCode intStatus = U_ZERO_ERROR;
michael@0 929 uprv_memset(&parseError, 0, sizeof(parseError));
michael@0 930 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
michael@0 931 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
michael@0 932
michael@0 933 if (U_SUCCESS(intStatus) && coll != NULL)
michael@0 934 {
michael@0 935 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
michael@0 936 data = (uint8_t *)uprv_malloc(len);
michael@0 937 intStatus = U_ZERO_ERROR;
michael@0 938 len = ucol_cloneBinary(coll, data, len, &intStatus);
michael@0 939
michael@0 940 /* tailoring rules version */
michael@0 941 /* This is wrong! */
michael@0 942 /*coll->dataInfo.dataVersion[1] = version[0];*/
michael@0 943 /* Copy tailoring version. Builder version already */
michael@0 944 /* set in ucol_openRules */
michael@0 945 ((UCATableHeader *)data)->version[1] = version[0];
michael@0 946 ((UCATableHeader *)data)->version[2] = version[1];
michael@0 947 ((UCATableHeader *)data)->version[3] = version[2];
michael@0 948
michael@0 949 if (U_SUCCESS(intStatus) && data != NULL)
michael@0 950 {
michael@0 951 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
michael@0 952 table_add(result, collationBin, line, status);
michael@0 953 uprv_free(data);
michael@0 954
michael@0 955 reorderCodeCount = ucol_getReorderCodes(
michael@0 956 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
michael@0 957 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
michael@0 958 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
michael@0 959 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
michael@0 960 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
michael@0 961 }
michael@0 962 table_add(result, reorderCodeRes, line, status);
michael@0 963 }
michael@0 964 }
michael@0 965 else
michael@0 966 {
michael@0 967 warning(line, "could not obtain rules from collator");
michael@0 968 if(isStrict()){
michael@0 969 *status = U_INVALID_FORMAT_ERROR;
michael@0 970 return NULL;
michael@0 971 }
michael@0 972 }
michael@0 973
michael@0 974 ucol_close(coll);
michael@0 975 }
michael@0 976 else
michael@0 977 {
michael@0 978 if(intStatus == U_FILE_ACCESS_ERROR) {
michael@0 979 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
michael@0 980 *status = intStatus;
michael@0 981 return NULL;
michael@0 982 }
michael@0 983 char preBuffer[100], postBuffer[100];
michael@0 984 escape(parseError.preContext, preBuffer);
michael@0 985 escape(parseError.postContext, postBuffer);
michael@0 986 warning(line,
michael@0 987 "%%%%CollationBin could not be constructed from CollationElements\n"
michael@0 988 " check context, check that the FractionalUCA.txt UCA version "
michael@0 989 "matches the current UCD version\n"
michael@0 990 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
michael@0 991 u_errorName(intStatus),
michael@0 992 parseError.line,
michael@0 993 parseError.offset,
michael@0 994 preBuffer,
michael@0 995 postBuffer);
michael@0 996 if(isStrict()){
michael@0 997 *status = intStatus;
michael@0 998 return NULL;
michael@0 999 }
michael@0 1000 }
michael@0 1001 } else {
michael@0 1002 if(isVerbose()) {
michael@0 1003 printf("Not building Collation binary\n");
michael@0 1004 }
michael@0 1005 }
michael@0 1006 #endif
michael@0 1007 /* in order to achieve smaller data files, we can direct genrb */
michael@0 1008 /* to omit collation rules */
michael@0 1009 if(state->omitCollationRules) {
michael@0 1010 bundle_closeString(state->bundle, member);
michael@0 1011 } else {
michael@0 1012 table_add(result, member, line, status);
michael@0 1013 }
michael@0 1014 }
michael@0 1015 if (U_FAILURE(*status))
michael@0 1016 {
michael@0 1017 res_close(result);
michael@0 1018 return NULL;
michael@0 1019 }
michael@0 1020 }
michael@0 1021
michael@0 1022 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
michael@0 1023 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 1024 return NULL;
michael@0 1025 }
michael@0 1026
michael@0 1027 static struct SResource *
michael@0 1028 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
michael@0 1029 {
michael@0 1030 struct SResource *result = NULL;
michael@0 1031 struct SResource *member = NULL;
michael@0 1032 struct SResource *collationRes = NULL;
michael@0 1033 struct UString *tokenValue;
michael@0 1034 struct UString comment;
michael@0 1035 enum ETokenType token;
michael@0 1036 char subtag[1024], typeKeyword[1024];
michael@0 1037 uint32_t line;
michael@0 1038
michael@0 1039 result = table_open(state->bundle, tag, NULL, status);
michael@0 1040
michael@0 1041 if (result == NULL || U_FAILURE(*status))
michael@0 1042 {
michael@0 1043 return NULL;
michael@0 1044 }
michael@0 1045 if(isVerbose()){
michael@0 1046 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1047 }
michael@0 1048 if(!newCollation) {
michael@0 1049 return addCollation(state, result, startline, status);
michael@0 1050 }
michael@0 1051 else {
michael@0 1052 for(;;) {
michael@0 1053 ustr_init(&comment);
michael@0 1054 token = getToken(state, &tokenValue, &comment, &line, status);
michael@0 1055
michael@0 1056 if (token == TOK_CLOSE_BRACE)
michael@0 1057 {
michael@0 1058 return result;
michael@0 1059 }
michael@0 1060
michael@0 1061 if (token != TOK_STRING)
michael@0 1062 {
michael@0 1063 res_close(result);
michael@0 1064 *status = U_INVALID_FORMAT_ERROR;
michael@0 1065
michael@0 1066 if (token == TOK_EOF)
michael@0 1067 {
michael@0 1068 error(startline, "unterminated table");
michael@0 1069 }
michael@0 1070 else
michael@0 1071 {
michael@0 1072 error(line, "Unexpected token %s", tokenNames[token]);
michael@0 1073 }
michael@0 1074
michael@0 1075 return NULL;
michael@0 1076 }
michael@0 1077
michael@0 1078 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
michael@0 1079
michael@0 1080 if (U_FAILURE(*status))
michael@0 1081 {
michael@0 1082 res_close(result);
michael@0 1083 return NULL;
michael@0 1084 }
michael@0 1085
michael@0 1086 if (uprv_strcmp(subtag, "default") == 0)
michael@0 1087 {
michael@0 1088 member = parseResource(state, subtag, NULL, status);
michael@0 1089
michael@0 1090 if (U_FAILURE(*status))
michael@0 1091 {
michael@0 1092 res_close(result);
michael@0 1093 return NULL;
michael@0 1094 }
michael@0 1095
michael@0 1096 table_add(result, member, line, status);
michael@0 1097 }
michael@0 1098 else
michael@0 1099 {
michael@0 1100 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
michael@0 1101 /* this probably needs to be refactored or recursively use the parser */
michael@0 1102 /* first we assume that our collation table won't have the explicit type */
michael@0 1103 /* then, we cannot handle aliases */
michael@0 1104 if(token == TOK_OPEN_BRACE) {
michael@0 1105 token = getToken(state, &tokenValue, &comment, &line, status);
michael@0 1106 collationRes = table_open(state->bundle, subtag, NULL, status);
michael@0 1107 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
michael@0 1108 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
michael@0 1109 table_add(result, collationRes, startline, status);
michael@0 1110 }
michael@0 1111 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
michael@0 1112 /* we could have a table too */
michael@0 1113 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
michael@0 1114 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
michael@0 1115 if(uprv_strcmp(typeKeyword, "alias") == 0) {
michael@0 1116 member = parseResource(state, subtag, NULL, status);
michael@0 1117 if (U_FAILURE(*status))
michael@0 1118 {
michael@0 1119 res_close(result);
michael@0 1120 return NULL;
michael@0 1121 }
michael@0 1122
michael@0 1123 table_add(result, member, line, status);
michael@0 1124 } else {
michael@0 1125 res_close(result);
michael@0 1126 *status = U_INVALID_FORMAT_ERROR;
michael@0 1127 return NULL;
michael@0 1128 }
michael@0 1129 } else {
michael@0 1130 res_close(result);
michael@0 1131 *status = U_INVALID_FORMAT_ERROR;
michael@0 1132 return NULL;
michael@0 1133 }
michael@0 1134 }
michael@0 1135
michael@0 1136 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
michael@0 1137
michael@0 1138 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
michael@0 1139
michael@0 1140 if (U_FAILURE(*status))
michael@0 1141 {
michael@0 1142 res_close(result);
michael@0 1143 return NULL;
michael@0 1144 }
michael@0 1145 }
michael@0 1146 }
michael@0 1147 }
michael@0 1148
michael@0 1149 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
michael@0 1150 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
michael@0 1151 static struct SResource *
michael@0 1152 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
michael@0 1153 {
michael@0 1154 struct SResource *member = NULL;
michael@0 1155 struct UString *tokenValue=NULL;
michael@0 1156 struct UString comment;
michael@0 1157 enum ETokenType token;
michael@0 1158 char subtag[1024];
michael@0 1159 uint32_t line;
michael@0 1160 UBool readToken = FALSE;
michael@0 1161
michael@0 1162 /* '{' . (name resource)* '}' */
michael@0 1163
michael@0 1164 if(isVerbose()){
michael@0 1165 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1166 }
michael@0 1167 for (;;)
michael@0 1168 {
michael@0 1169 ustr_init(&comment);
michael@0 1170 token = getToken(state, &tokenValue, &comment, &line, status);
michael@0 1171
michael@0 1172 if (token == TOK_CLOSE_BRACE)
michael@0 1173 {
michael@0 1174 if (!readToken) {
michael@0 1175 warning(startline, "Encountered empty table");
michael@0 1176 }
michael@0 1177 return table;
michael@0 1178 }
michael@0 1179
michael@0 1180 if (token != TOK_STRING)
michael@0 1181 {
michael@0 1182 *status = U_INVALID_FORMAT_ERROR;
michael@0 1183
michael@0 1184 if (token == TOK_EOF)
michael@0 1185 {
michael@0 1186 error(startline, "unterminated table");
michael@0 1187 }
michael@0 1188 else
michael@0 1189 {
michael@0 1190 error(line, "unexpected token %s", tokenNames[token]);
michael@0 1191 }
michael@0 1192
michael@0 1193 return NULL;
michael@0 1194 }
michael@0 1195
michael@0 1196 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
michael@0 1197 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
michael@0 1198 } else {
michael@0 1199 *status = U_INVALID_FORMAT_ERROR;
michael@0 1200 error(line, "invariant characters required for table keys");
michael@0 1201 return NULL;
michael@0 1202 }
michael@0 1203
michael@0 1204 if (U_FAILURE(*status))
michael@0 1205 {
michael@0 1206 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
michael@0 1207 return NULL;
michael@0 1208 }
michael@0 1209
michael@0 1210 member = parseResource(state, subtag, &comment, status);
michael@0 1211
michael@0 1212 if (member == NULL || U_FAILURE(*status))
michael@0 1213 {
michael@0 1214 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
michael@0 1215 return NULL;
michael@0 1216 }
michael@0 1217
michael@0 1218 table_add(table, member, line, status);
michael@0 1219
michael@0 1220 if (U_FAILURE(*status))
michael@0 1221 {
michael@0 1222 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
michael@0 1223 return NULL;
michael@0 1224 }
michael@0 1225 readToken = TRUE;
michael@0 1226 ustr_deinit(&comment);
michael@0 1227 }
michael@0 1228
michael@0 1229 /* not reached */
michael@0 1230 /* A compiler warning will appear if all paths don't contain a return statement. */
michael@0 1231 /* *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 1232 return NULL;*/
michael@0 1233 }
michael@0 1234
michael@0 1235 static struct SResource *
michael@0 1236 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 1237 {
michael@0 1238 struct SResource *result;
michael@0 1239
michael@0 1240 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
michael@0 1241 {
michael@0 1242 return parseCollationElements(state, tag, startline, FALSE, status);
michael@0 1243 }
michael@0 1244 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
michael@0 1245 {
michael@0 1246 return parseCollationElements(state, tag, startline, TRUE, status);
michael@0 1247 }
michael@0 1248 if(isVerbose()){
michael@0 1249 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1250 }
michael@0 1251
michael@0 1252 result = table_open(state->bundle, tag, comment, status);
michael@0 1253
michael@0 1254 if (result == NULL || U_FAILURE(*status))
michael@0 1255 {
michael@0 1256 return NULL;
michael@0 1257 }
michael@0 1258 return realParseTable(state, result, tag, startline, status);
michael@0 1259 }
michael@0 1260
michael@0 1261 static struct SResource *
michael@0 1262 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 1263 {
michael@0 1264 struct SResource *result = NULL;
michael@0 1265 struct SResource *member = NULL;
michael@0 1266 struct UString *tokenValue;
michael@0 1267 struct UString memberComments;
michael@0 1268 enum ETokenType token;
michael@0 1269 UBool readToken = FALSE;
michael@0 1270
michael@0 1271 result = array_open(state->bundle, tag, comment, status);
michael@0 1272
michael@0 1273 if (result == NULL || U_FAILURE(*status))
michael@0 1274 {
michael@0 1275 return NULL;
michael@0 1276 }
michael@0 1277 if(isVerbose()){
michael@0 1278 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1279 }
michael@0 1280
michael@0 1281 ustr_init(&memberComments);
michael@0 1282
michael@0 1283 /* '{' . resource [','] '}' */
michael@0 1284 for (;;)
michael@0 1285 {
michael@0 1286 /* reset length */
michael@0 1287 ustr_setlen(&memberComments, 0, status);
michael@0 1288
michael@0 1289 /* check for end of array, but don't consume next token unless it really is the end */
michael@0 1290 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
michael@0 1291
michael@0 1292
michael@0 1293 if (token == TOK_CLOSE_BRACE)
michael@0 1294 {
michael@0 1295 getToken(state, NULL, NULL, NULL, status);
michael@0 1296 if (!readToken) {
michael@0 1297 warning(startline, "Encountered empty array");
michael@0 1298 }
michael@0 1299 break;
michael@0 1300 }
michael@0 1301
michael@0 1302 if (token == TOK_EOF)
michael@0 1303 {
michael@0 1304 res_close(result);
michael@0 1305 *status = U_INVALID_FORMAT_ERROR;
michael@0 1306 error(startline, "unterminated array");
michael@0 1307 return NULL;
michael@0 1308 }
michael@0 1309
michael@0 1310 /* string arrays are a special case */
michael@0 1311 if (token == TOK_STRING)
michael@0 1312 {
michael@0 1313 getToken(state, &tokenValue, &memberComments, NULL, status);
michael@0 1314 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
michael@0 1315 }
michael@0 1316 else
michael@0 1317 {
michael@0 1318 member = parseResource(state, NULL, &memberComments, status);
michael@0 1319 }
michael@0 1320
michael@0 1321 if (member == NULL || U_FAILURE(*status))
michael@0 1322 {
michael@0 1323 res_close(result);
michael@0 1324 return NULL;
michael@0 1325 }
michael@0 1326
michael@0 1327 array_add(result, member, status);
michael@0 1328
michael@0 1329 if (U_FAILURE(*status))
michael@0 1330 {
michael@0 1331 res_close(result);
michael@0 1332 return NULL;
michael@0 1333 }
michael@0 1334
michael@0 1335 /* eat optional comma if present */
michael@0 1336 token = peekToken(state, 0, NULL, NULL, NULL, status);
michael@0 1337
michael@0 1338 if (token == TOK_COMMA)
michael@0 1339 {
michael@0 1340 getToken(state, NULL, NULL, NULL, status);
michael@0 1341 }
michael@0 1342
michael@0 1343 if (U_FAILURE(*status))
michael@0 1344 {
michael@0 1345 res_close(result);
michael@0 1346 return NULL;
michael@0 1347 }
michael@0 1348 readToken = TRUE;
michael@0 1349 }
michael@0 1350
michael@0 1351 ustr_deinit(&memberComments);
michael@0 1352 return result;
michael@0 1353 }
michael@0 1354
michael@0 1355 static struct SResource *
michael@0 1356 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 1357 {
michael@0 1358 struct SResource *result = NULL;
michael@0 1359 enum ETokenType token;
michael@0 1360 char *string;
michael@0 1361 int32_t value;
michael@0 1362 UBool readToken = FALSE;
michael@0 1363 char *stopstring;
michael@0 1364 uint32_t len;
michael@0 1365 struct UString memberComments;
michael@0 1366
michael@0 1367 result = intvector_open(state->bundle, tag, comment, status);
michael@0 1368
michael@0 1369 if (result == NULL || U_FAILURE(*status))
michael@0 1370 {
michael@0 1371 return NULL;
michael@0 1372 }
michael@0 1373
michael@0 1374 if(isVerbose()){
michael@0 1375 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1376 }
michael@0 1377 ustr_init(&memberComments);
michael@0 1378 /* '{' . string [','] '}' */
michael@0 1379 for (;;)
michael@0 1380 {
michael@0 1381 ustr_setlen(&memberComments, 0, status);
michael@0 1382
michael@0 1383 /* check for end of array, but don't consume next token unless it really is the end */
michael@0 1384 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
michael@0 1385
michael@0 1386 if (token == TOK_CLOSE_BRACE)
michael@0 1387 {
michael@0 1388 /* it's the end, consume the close brace */
michael@0 1389 getToken(state, NULL, NULL, NULL, status);
michael@0 1390 if (!readToken) {
michael@0 1391 warning(startline, "Encountered empty int vector");
michael@0 1392 }
michael@0 1393 ustr_deinit(&memberComments);
michael@0 1394 return result;
michael@0 1395 }
michael@0 1396
michael@0 1397 string = getInvariantString(state, NULL, NULL, status);
michael@0 1398
michael@0 1399 if (U_FAILURE(*status))
michael@0 1400 {
michael@0 1401 res_close(result);
michael@0 1402 return NULL;
michael@0 1403 }
michael@0 1404
michael@0 1405 /* For handling illegal char in the Intvector */
michael@0 1406 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
michael@0 1407 len=(uint32_t)(stopstring-string);
michael@0 1408
michael@0 1409 if(len==uprv_strlen(string))
michael@0 1410 {
michael@0 1411 intvector_add(result, value, status);
michael@0 1412 uprv_free(string);
michael@0 1413 token = peekToken(state, 0, NULL, NULL, NULL, status);
michael@0 1414 }
michael@0 1415 else
michael@0 1416 {
michael@0 1417 uprv_free(string);
michael@0 1418 *status=U_INVALID_CHAR_FOUND;
michael@0 1419 }
michael@0 1420
michael@0 1421 if (U_FAILURE(*status))
michael@0 1422 {
michael@0 1423 res_close(result);
michael@0 1424 return NULL;
michael@0 1425 }
michael@0 1426
michael@0 1427 /* the comma is optional (even though it is required to prevent the reader from concatenating
michael@0 1428 consecutive entries) so that a missing comma on the last entry isn't an error */
michael@0 1429 if (token == TOK_COMMA)
michael@0 1430 {
michael@0 1431 getToken(state, NULL, NULL, NULL, status);
michael@0 1432 }
michael@0 1433 readToken = TRUE;
michael@0 1434 }
michael@0 1435
michael@0 1436 /* not reached */
michael@0 1437 /* A compiler warning will appear if all paths don't contain a return statement. */
michael@0 1438 /* intvector_close(result, status);
michael@0 1439 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 1440 return NULL;*/
michael@0 1441 }
michael@0 1442
michael@0 1443 static struct SResource *
michael@0 1444 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 1445 {
michael@0 1446 struct SResource *result = NULL;
michael@0 1447 uint8_t *value;
michael@0 1448 char *string;
michael@0 1449 char toConv[3] = {'\0', '\0', '\0'};
michael@0 1450 uint32_t count;
michael@0 1451 uint32_t i;
michael@0 1452 uint32_t line;
michael@0 1453 char *stopstring;
michael@0 1454 uint32_t len;
michael@0 1455
michael@0 1456 string = getInvariantString(state, &line, NULL, status);
michael@0 1457
michael@0 1458 if (string == NULL || U_FAILURE(*status))
michael@0 1459 {
michael@0 1460 return NULL;
michael@0 1461 }
michael@0 1462
michael@0 1463 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 1464
michael@0 1465 if (U_FAILURE(*status))
michael@0 1466 {
michael@0 1467 uprv_free(string);
michael@0 1468 return NULL;
michael@0 1469 }
michael@0 1470
michael@0 1471 if(isVerbose()){
michael@0 1472 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1473 }
michael@0 1474
michael@0 1475 count = (uint32_t)uprv_strlen(string);
michael@0 1476 if (count > 0){
michael@0 1477 if((count % 2)==0){
michael@0 1478 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
michael@0 1479
michael@0 1480 if (value == NULL)
michael@0 1481 {
michael@0 1482 uprv_free(string);
michael@0 1483 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 1484 return NULL;
michael@0 1485 }
michael@0 1486
michael@0 1487 for (i = 0; i < count; i += 2)
michael@0 1488 {
michael@0 1489 toConv[0] = string[i];
michael@0 1490 toConv[1] = string[i + 1];
michael@0 1491
michael@0 1492 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
michael@0 1493 len=(uint32_t)(stopstring-toConv);
michael@0 1494
michael@0 1495 if(len!=uprv_strlen(toConv))
michael@0 1496 {
michael@0 1497 uprv_free(string);
michael@0 1498 *status=U_INVALID_CHAR_FOUND;
michael@0 1499 return NULL;
michael@0 1500 }
michael@0 1501 }
michael@0 1502
michael@0 1503 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
michael@0 1504
michael@0 1505 uprv_free(value);
michael@0 1506 }
michael@0 1507 else
michael@0 1508 {
michael@0 1509 *status = U_INVALID_CHAR_FOUND;
michael@0 1510 uprv_free(string);
michael@0 1511 error(line, "Encountered invalid binary string");
michael@0 1512 return NULL;
michael@0 1513 }
michael@0 1514 }
michael@0 1515 else
michael@0 1516 {
michael@0 1517 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
michael@0 1518 warning(startline, "Encountered empty binary tag");
michael@0 1519 }
michael@0 1520 uprv_free(string);
michael@0 1521
michael@0 1522 return result;
michael@0 1523 }
michael@0 1524
michael@0 1525 static struct SResource *
michael@0 1526 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
michael@0 1527 {
michael@0 1528 struct SResource *result = NULL;
michael@0 1529 int32_t value;
michael@0 1530 char *string;
michael@0 1531 char *stopstring;
michael@0 1532 uint32_t len;
michael@0 1533
michael@0 1534 string = getInvariantString(state, NULL, NULL, status);
michael@0 1535
michael@0 1536 if (string == NULL || U_FAILURE(*status))
michael@0 1537 {
michael@0 1538 return NULL;
michael@0 1539 }
michael@0 1540
michael@0 1541 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 1542
michael@0 1543 if (U_FAILURE(*status))
michael@0 1544 {
michael@0 1545 uprv_free(string);
michael@0 1546 return NULL;
michael@0 1547 }
michael@0 1548
michael@0 1549 if(isVerbose()){
michael@0 1550 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1551 }
michael@0 1552
michael@0 1553 if (uprv_strlen(string) <= 0)
michael@0 1554 {
michael@0 1555 warning(startline, "Encountered empty integer. Default value is 0.");
michael@0 1556 }
michael@0 1557
michael@0 1558 /* Allow integer support for hexdecimal, octal digit and decimal*/
michael@0 1559 /* and handle illegal char in the integer*/
michael@0 1560 value = uprv_strtoul(string, &stopstring, 0);
michael@0 1561 len=(uint32_t)(stopstring-string);
michael@0 1562 if(len==uprv_strlen(string))
michael@0 1563 {
michael@0 1564 result = int_open(state->bundle, tag, value, comment, status);
michael@0 1565 }
michael@0 1566 else
michael@0 1567 {
michael@0 1568 *status=U_INVALID_CHAR_FOUND;
michael@0 1569 }
michael@0 1570 uprv_free(string);
michael@0 1571
michael@0 1572 return result;
michael@0 1573 }
michael@0 1574
michael@0 1575 static struct SResource *
michael@0 1576 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
michael@0 1577 {
michael@0 1578 struct SResource *result;
michael@0 1579 FileStream *file;
michael@0 1580 int32_t len;
michael@0 1581 uint8_t *data;
michael@0 1582 char *filename;
michael@0 1583 uint32_t line;
michael@0 1584 char *fullname = NULL;
michael@0 1585 filename = getInvariantString(state, &line, NULL, status);
michael@0 1586
michael@0 1587 if (U_FAILURE(*status))
michael@0 1588 {
michael@0 1589 return NULL;
michael@0 1590 }
michael@0 1591
michael@0 1592 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 1593
michael@0 1594 if (U_FAILURE(*status))
michael@0 1595 {
michael@0 1596 uprv_free(filename);
michael@0 1597 return NULL;
michael@0 1598 }
michael@0 1599
michael@0 1600 if(isVerbose()){
michael@0 1601 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1602 }
michael@0 1603
michael@0 1604 /* Open the input file for reading */
michael@0 1605 if (state->inputdir == NULL)
michael@0 1606 {
michael@0 1607 #if 1
michael@0 1608 /*
michael@0 1609 * Always save file file name, even if there's
michael@0 1610 * no input directory specified. MIGHT BREAK SOMETHING
michael@0 1611 */
michael@0 1612 int32_t filenameLength = uprv_strlen(filename);
michael@0 1613
michael@0 1614 fullname = (char *) uprv_malloc(filenameLength + 1);
michael@0 1615 uprv_strcpy(fullname, filename);
michael@0 1616 #endif
michael@0 1617
michael@0 1618 file = T_FileStream_open(filename, "rb");
michael@0 1619 }
michael@0 1620 else
michael@0 1621 {
michael@0 1622
michael@0 1623 int32_t count = (int32_t)uprv_strlen(filename);
michael@0 1624
michael@0 1625 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
michael@0 1626 {
michael@0 1627 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
michael@0 1628
michael@0 1629 /* test for NULL */
michael@0 1630 if(fullname == NULL)
michael@0 1631 {
michael@0 1632 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 1633 return NULL;
michael@0 1634 }
michael@0 1635
michael@0 1636 uprv_strcpy(fullname, state->inputdir);
michael@0 1637
michael@0 1638 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
michael@0 1639 fullname[state->inputdirLength + 1] = '\0';
michael@0 1640
michael@0 1641 uprv_strcat(fullname, filename);
michael@0 1642 }
michael@0 1643 else
michael@0 1644 {
michael@0 1645 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
michael@0 1646
michael@0 1647 /* test for NULL */
michael@0 1648 if(fullname == NULL)
michael@0 1649 {
michael@0 1650 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 1651 return NULL;
michael@0 1652 }
michael@0 1653
michael@0 1654 uprv_strcpy(fullname, state->inputdir);
michael@0 1655 uprv_strcat(fullname, filename);
michael@0 1656 }
michael@0 1657
michael@0 1658 file = T_FileStream_open(fullname, "rb");
michael@0 1659
michael@0 1660 }
michael@0 1661
michael@0 1662 if (file == NULL)
michael@0 1663 {
michael@0 1664 error(line, "couldn't open input file %s", filename);
michael@0 1665 *status = U_FILE_ACCESS_ERROR;
michael@0 1666 return NULL;
michael@0 1667 }
michael@0 1668
michael@0 1669 len = T_FileStream_size(file);
michael@0 1670 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
michael@0 1671 /* test for NULL */
michael@0 1672 if(data == NULL)
michael@0 1673 {
michael@0 1674 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 1675 T_FileStream_close (file);
michael@0 1676 return NULL;
michael@0 1677 }
michael@0 1678
michael@0 1679 /* int32_t numRead = */ T_FileStream_read (file, data, len);
michael@0 1680 T_FileStream_close (file);
michael@0 1681
michael@0 1682 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
michael@0 1683
michael@0 1684 uprv_free(data);
michael@0 1685 uprv_free(filename);
michael@0 1686 uprv_free(fullname);
michael@0 1687
michael@0 1688 return result;
michael@0 1689 }
michael@0 1690
michael@0 1691 static struct SResource *
michael@0 1692 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
michael@0 1693 {
michael@0 1694 struct SResource *result;
michael@0 1695 int32_t len=0;
michael@0 1696 char *filename;
michael@0 1697 uint32_t line;
michael@0 1698 UChar *pTarget = NULL;
michael@0 1699
michael@0 1700 UCHARBUF *ucbuf;
michael@0 1701 char *fullname = NULL;
michael@0 1702 int32_t count = 0;
michael@0 1703 const char* cp = NULL;
michael@0 1704 const UChar* uBuffer = NULL;
michael@0 1705
michael@0 1706 filename = getInvariantString(state, &line, NULL, status);
michael@0 1707 count = (int32_t)uprv_strlen(filename);
michael@0 1708
michael@0 1709 if (U_FAILURE(*status))
michael@0 1710 {
michael@0 1711 return NULL;
michael@0 1712 }
michael@0 1713
michael@0 1714 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
michael@0 1715
michael@0 1716 if (U_FAILURE(*status))
michael@0 1717 {
michael@0 1718 uprv_free(filename);
michael@0 1719 return NULL;
michael@0 1720 }
michael@0 1721
michael@0 1722 if(isVerbose()){
michael@0 1723 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1724 }
michael@0 1725
michael@0 1726 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
michael@0 1727 /* test for NULL */
michael@0 1728 if(fullname == NULL)
michael@0 1729 {
michael@0 1730 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 1731 uprv_free(filename);
michael@0 1732 return NULL;
michael@0 1733 }
michael@0 1734
michael@0 1735 if(state->inputdir!=NULL){
michael@0 1736 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
michael@0 1737 {
michael@0 1738
michael@0 1739 uprv_strcpy(fullname, state->inputdir);
michael@0 1740
michael@0 1741 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
michael@0 1742 fullname[state->inputdirLength + 1] = '\0';
michael@0 1743
michael@0 1744 uprv_strcat(fullname, filename);
michael@0 1745 }
michael@0 1746 else
michael@0 1747 {
michael@0 1748 uprv_strcpy(fullname, state->inputdir);
michael@0 1749 uprv_strcat(fullname, filename);
michael@0 1750 }
michael@0 1751 }else{
michael@0 1752 uprv_strcpy(fullname,filename);
michael@0 1753 }
michael@0 1754
michael@0 1755 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
michael@0 1756
michael@0 1757 if (U_FAILURE(*status)) {
michael@0 1758 error(line, "couldn't open input file %s\n", filename);
michael@0 1759 return NULL;
michael@0 1760 }
michael@0 1761
michael@0 1762 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
michael@0 1763 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
michael@0 1764
michael@0 1765 ucbuf_close(ucbuf);
michael@0 1766
michael@0 1767 uprv_free(pTarget);
michael@0 1768
michael@0 1769 uprv_free(filename);
michael@0 1770 uprv_free(fullname);
michael@0 1771
michael@0 1772 return result;
michael@0 1773 }
michael@0 1774
michael@0 1775
michael@0 1776
michael@0 1777
michael@0 1778
michael@0 1779 U_STRING_DECL(k_type_string, "string", 6);
michael@0 1780 U_STRING_DECL(k_type_binary, "binary", 6);
michael@0 1781 U_STRING_DECL(k_type_bin, "bin", 3);
michael@0 1782 U_STRING_DECL(k_type_table, "table", 5);
michael@0 1783 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
michael@0 1784 U_STRING_DECL(k_type_int, "int", 3);
michael@0 1785 U_STRING_DECL(k_type_integer, "integer", 7);
michael@0 1786 U_STRING_DECL(k_type_array, "array", 5);
michael@0 1787 U_STRING_DECL(k_type_alias, "alias", 5);
michael@0 1788 U_STRING_DECL(k_type_intvector, "intvector", 9);
michael@0 1789 U_STRING_DECL(k_type_import, "import", 6);
michael@0 1790 U_STRING_DECL(k_type_include, "include", 7);
michael@0 1791
michael@0 1792 /* Various non-standard processing plugins that create one or more special resources. */
michael@0 1793 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
michael@0 1794 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
michael@0 1795 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
michael@0 1796 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
michael@0 1797
michael@0 1798 typedef enum EResourceType
michael@0 1799 {
michael@0 1800 RT_UNKNOWN,
michael@0 1801 RT_STRING,
michael@0 1802 RT_BINARY,
michael@0 1803 RT_TABLE,
michael@0 1804 RT_TABLE_NO_FALLBACK,
michael@0 1805 RT_INTEGER,
michael@0 1806 RT_ARRAY,
michael@0 1807 RT_ALIAS,
michael@0 1808 RT_INTVECTOR,
michael@0 1809 RT_IMPORT,
michael@0 1810 RT_INCLUDE,
michael@0 1811 RT_PROCESS_UCA_RULES,
michael@0 1812 RT_PROCESS_COLLATION,
michael@0 1813 RT_PROCESS_TRANSLITERATOR,
michael@0 1814 RT_PROCESS_DEPENDENCY,
michael@0 1815 RT_RESERVED
michael@0 1816 } EResourceType;
michael@0 1817
michael@0 1818 static struct {
michael@0 1819 const char *nameChars; /* only used for debugging */
michael@0 1820 const UChar *nameUChars;
michael@0 1821 ParseResourceFunction *parseFunction;
michael@0 1822 } gResourceTypes[] = {
michael@0 1823 {"Unknown", NULL, NULL},
michael@0 1824 {"string", k_type_string, parseString},
michael@0 1825 {"binary", k_type_binary, parseBinary},
michael@0 1826 {"table", k_type_table, parseTable},
michael@0 1827 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
michael@0 1828 {"integer", k_type_integer, parseInteger},
michael@0 1829 {"array", k_type_array, parseArray},
michael@0 1830 {"alias", k_type_alias, parseAlias},
michael@0 1831 {"intvector", k_type_intvector, parseIntVector},
michael@0 1832 {"import", k_type_import, parseImport},
michael@0 1833 {"include", k_type_include, parseInclude},
michael@0 1834 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
michael@0 1835 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
michael@0 1836 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
michael@0 1837 {"process(dependency)", k_type_plugin_dependency, parseDependency},
michael@0 1838 {"reserved", NULL, NULL}
michael@0 1839 };
michael@0 1840
michael@0 1841 void initParser()
michael@0 1842 {
michael@0 1843 U_STRING_INIT(k_type_string, "string", 6);
michael@0 1844 U_STRING_INIT(k_type_binary, "binary", 6);
michael@0 1845 U_STRING_INIT(k_type_bin, "bin", 3);
michael@0 1846 U_STRING_INIT(k_type_table, "table", 5);
michael@0 1847 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
michael@0 1848 U_STRING_INIT(k_type_int, "int", 3);
michael@0 1849 U_STRING_INIT(k_type_integer, "integer", 7);
michael@0 1850 U_STRING_INIT(k_type_array, "array", 5);
michael@0 1851 U_STRING_INIT(k_type_alias, "alias", 5);
michael@0 1852 U_STRING_INIT(k_type_intvector, "intvector", 9);
michael@0 1853 U_STRING_INIT(k_type_import, "import", 6);
michael@0 1854 U_STRING_INIT(k_type_include, "include", 7);
michael@0 1855
michael@0 1856 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
michael@0 1857 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
michael@0 1858 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
michael@0 1859 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
michael@0 1860 }
michael@0 1861
michael@0 1862 static inline UBool isTable(enum EResourceType type) {
michael@0 1863 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
michael@0 1864 }
michael@0 1865
michael@0 1866 static enum EResourceType
michael@0 1867 parseResourceType(ParseState* state, UErrorCode *status)
michael@0 1868 {
michael@0 1869 struct UString *tokenValue;
michael@0 1870 struct UString comment;
michael@0 1871 enum EResourceType result = RT_UNKNOWN;
michael@0 1872 uint32_t line=0;
michael@0 1873 ustr_init(&comment);
michael@0 1874 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
michael@0 1875
michael@0 1876 if (U_FAILURE(*status))
michael@0 1877 {
michael@0 1878 return RT_UNKNOWN;
michael@0 1879 }
michael@0 1880
michael@0 1881 *status = U_ZERO_ERROR;
michael@0 1882
michael@0 1883 /* Search for normal types */
michael@0 1884 result=RT_UNKNOWN;
michael@0 1885 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
michael@0 1886 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
michael@0 1887 break;
michael@0 1888 }
michael@0 1889 }
michael@0 1890 /* Now search for the aliases */
michael@0 1891 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
michael@0 1892 result = RT_INTEGER;
michael@0 1893 }
michael@0 1894 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
michael@0 1895 result = RT_BINARY;
michael@0 1896 }
michael@0 1897 else if (result == RT_RESERVED) {
michael@0 1898 char tokenBuffer[1024];
michael@0 1899 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
michael@0 1900 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
michael@0 1901 *status = U_INVALID_FORMAT_ERROR;
michael@0 1902 error(line, "unknown resource type '%s'", tokenBuffer);
michael@0 1903 }
michael@0 1904
michael@0 1905 return result;
michael@0 1906 }
michael@0 1907
michael@0 1908 /* parse a non-top-level resource */
michael@0 1909 static struct SResource *
michael@0 1910 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
michael@0 1911 {
michael@0 1912 enum ETokenType token;
michael@0 1913 enum EResourceType resType = RT_UNKNOWN;
michael@0 1914 ParseResourceFunction *parseFunction = NULL;
michael@0 1915 struct UString *tokenValue;
michael@0 1916 uint32_t startline;
michael@0 1917 uint32_t line;
michael@0 1918
michael@0 1919
michael@0 1920 token = getToken(state, &tokenValue, NULL, &startline, status);
michael@0 1921
michael@0 1922 if(isVerbose()){
michael@0 1923 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
michael@0 1924 }
michael@0 1925
michael@0 1926 /* name . [ ':' type ] '{' resource '}' */
michael@0 1927 /* This function parses from the colon onwards. If the colon is present, parse the
michael@0 1928 type then try to parse a resource of that type. If there is no explicit type,
michael@0 1929 work it out using the lookahead tokens. */
michael@0 1930 switch (token)
michael@0 1931 {
michael@0 1932 case TOK_EOF:
michael@0 1933 *status = U_INVALID_FORMAT_ERROR;
michael@0 1934 error(startline, "Unexpected EOF encountered");
michael@0 1935 return NULL;
michael@0 1936
michael@0 1937 case TOK_ERROR:
michael@0 1938 *status = U_INVALID_FORMAT_ERROR;
michael@0 1939 return NULL;
michael@0 1940
michael@0 1941 case TOK_COLON:
michael@0 1942 resType = parseResourceType(state, status);
michael@0 1943 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
michael@0 1944
michael@0 1945 if (U_FAILURE(*status))
michael@0 1946 {
michael@0 1947 return NULL;
michael@0 1948 }
michael@0 1949
michael@0 1950 break;
michael@0 1951
michael@0 1952 case TOK_OPEN_BRACE:
michael@0 1953 break;
michael@0 1954
michael@0 1955 default:
michael@0 1956 *status = U_INVALID_FORMAT_ERROR;
michael@0 1957 error(startline, "syntax error while reading a resource, expected '{' or ':'");
michael@0 1958 return NULL;
michael@0 1959 }
michael@0 1960
michael@0 1961
michael@0 1962 if (resType == RT_UNKNOWN)
michael@0 1963 {
michael@0 1964 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
michael@0 1965 We could have any of the following:
michael@0 1966 { { => array (nested)
michael@0 1967 { :/} => array
michael@0 1968 { string , => string array
michael@0 1969
michael@0 1970 { string { => table
michael@0 1971
michael@0 1972 { string :/{ => table
michael@0 1973 { string } => string
michael@0 1974 */
michael@0 1975
michael@0 1976 token = peekToken(state, 0, NULL, &line, NULL,status);
michael@0 1977
michael@0 1978 if (U_FAILURE(*status))
michael@0 1979 {
michael@0 1980 return NULL;
michael@0 1981 }
michael@0 1982
michael@0 1983 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
michael@0 1984 {
michael@0 1985 resType = RT_ARRAY;
michael@0 1986 }
michael@0 1987 else if (token == TOK_STRING)
michael@0 1988 {
michael@0 1989 token = peekToken(state, 1, NULL, &line, NULL, status);
michael@0 1990
michael@0 1991 if (U_FAILURE(*status))
michael@0 1992 {
michael@0 1993 return NULL;
michael@0 1994 }
michael@0 1995
michael@0 1996 switch (token)
michael@0 1997 {
michael@0 1998 case TOK_COMMA: resType = RT_ARRAY; break;
michael@0 1999 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
michael@0 2000 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
michael@0 2001 case TOK_COLON: resType = RT_TABLE; break;
michael@0 2002 default:
michael@0 2003 *status = U_INVALID_FORMAT_ERROR;
michael@0 2004 error(line, "Unexpected token after string, expected ',', '{' or '}'");
michael@0 2005 return NULL;
michael@0 2006 }
michael@0 2007 }
michael@0 2008 else
michael@0 2009 {
michael@0 2010 *status = U_INVALID_FORMAT_ERROR;
michael@0 2011 error(line, "Unexpected token after '{'");
michael@0 2012 return NULL;
michael@0 2013 }
michael@0 2014
michael@0 2015 /* printf("Type guessed as %s\n", resourceNames[resType]); */
michael@0 2016 } else if(resType == RT_TABLE_NO_FALLBACK) {
michael@0 2017 *status = U_INVALID_FORMAT_ERROR;
michael@0 2018 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
michael@0 2019 return NULL;
michael@0 2020 }
michael@0 2021
michael@0 2022
michael@0 2023 /* We should now know what we need to parse next, so call the appropriate parser
michael@0 2024 function and return. */
michael@0 2025 parseFunction = gResourceTypes[resType].parseFunction;
michael@0 2026 if (parseFunction != NULL) {
michael@0 2027 return parseFunction(state, tag, startline, comment, status);
michael@0 2028 }
michael@0 2029 else {
michael@0 2030 *status = U_INTERNAL_PROGRAM_ERROR;
michael@0 2031 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
michael@0 2032 }
michael@0 2033
michael@0 2034 return NULL;
michael@0 2035 }
michael@0 2036
michael@0 2037 /* parse the top-level resource */
michael@0 2038 struct SRBRoot *
michael@0 2039 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir,
michael@0 2040 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
michael@0 2041 {
michael@0 2042 struct UString *tokenValue;
michael@0 2043 struct UString comment;
michael@0 2044 uint32_t line;
michael@0 2045 enum EResourceType bundleType;
michael@0 2046 enum ETokenType token;
michael@0 2047 ParseState state;
michael@0 2048 uint32_t i;
michael@0 2049
michael@0 2050
michael@0 2051 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
michael@0 2052 {
michael@0 2053 ustr_init(&state.lookahead[i].value);
michael@0 2054 ustr_init(&state.lookahead[i].comment);
michael@0 2055 }
michael@0 2056
michael@0 2057 initLookahead(&state, buf, status);
michael@0 2058
michael@0 2059 state.inputdir = inputDir;
michael@0 2060 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
michael@0 2061 state.outputdir = outputDir;
michael@0 2062 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
michael@0 2063 state.makeBinaryCollation = makeBinaryCollation;
michael@0 2064 state.omitCollationRules = omitCollationRules;
michael@0 2065
michael@0 2066 ustr_init(&comment);
michael@0 2067 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
michael@0 2068
michael@0 2069 state.bundle = bundle_open(&comment, FALSE, status);
michael@0 2070
michael@0 2071 if (state.bundle == NULL || U_FAILURE(*status))
michael@0 2072 {
michael@0 2073 return NULL;
michael@0 2074 }
michael@0 2075
michael@0 2076
michael@0 2077 bundle_setlocale(state.bundle, tokenValue->fChars, status);
michael@0 2078
michael@0 2079 /* The following code is to make Empty bundle work no matter with :table specifer or not */
michael@0 2080 token = getToken(&state, NULL, NULL, &line, status);
michael@0 2081 if(token==TOK_COLON) {
michael@0 2082 *status=U_ZERO_ERROR;
michael@0 2083 bundleType=parseResourceType(&state, status);
michael@0 2084
michael@0 2085 if(isTable(bundleType))
michael@0 2086 {
michael@0 2087 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
michael@0 2088 }
michael@0 2089 else
michael@0 2090 {
michael@0 2091 *status=U_PARSE_ERROR;
michael@0 2092 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
michael@0 2093 }
michael@0 2094 }
michael@0 2095 else
michael@0 2096 {
michael@0 2097 /* not a colon */
michael@0 2098 if(token==TOK_OPEN_BRACE)
michael@0 2099 {
michael@0 2100 *status=U_ZERO_ERROR;
michael@0 2101 bundleType=RT_TABLE;
michael@0 2102 }
michael@0 2103 else
michael@0 2104 {
michael@0 2105 /* neither colon nor open brace */
michael@0 2106 *status=U_PARSE_ERROR;
michael@0 2107 bundleType=RT_UNKNOWN;
michael@0 2108 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
michael@0 2109 }
michael@0 2110 }
michael@0 2111
michael@0 2112 if (U_FAILURE(*status))
michael@0 2113 {
michael@0 2114 bundle_close(state.bundle, status);
michael@0 2115 return NULL;
michael@0 2116 }
michael@0 2117
michael@0 2118 if(bundleType==RT_TABLE_NO_FALLBACK) {
michael@0 2119 /*
michael@0 2120 * Parse a top-level table with the table(nofallback) declaration.
michael@0 2121 * This is the same as a regular table, but also sets the
michael@0 2122 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
michael@0 2123 */
michael@0 2124 state.bundle->noFallback=TRUE;
michael@0 2125 }
michael@0 2126 /* top-level tables need not handle special table names like "collations" */
michael@0 2127 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
michael@0 2128 if(dependencyArray!=NULL){
michael@0 2129 table_add(state.bundle->fRoot, dependencyArray, 0, status);
michael@0 2130 dependencyArray = NULL;
michael@0 2131 }
michael@0 2132 if (U_FAILURE(*status))
michael@0 2133 {
michael@0 2134 bundle_close(state.bundle, status);
michael@0 2135 res_close(dependencyArray);
michael@0 2136 return NULL;
michael@0 2137 }
michael@0 2138
michael@0 2139 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
michael@0 2140 {
michael@0 2141 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
michael@0 2142 if(isStrict()){
michael@0 2143 *status = U_INVALID_FORMAT_ERROR;
michael@0 2144 return NULL;
michael@0 2145 }
michael@0 2146 }
michael@0 2147
michael@0 2148 cleanupLookahead(&state);
michael@0 2149 ustr_deinit(&comment);
michael@0 2150 return state.bundle;
michael@0 2151 }

mercurial