Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
41 #define CR 0x000D
42 #define LF 0x000A
43 #define SPACE 0x0020
44 #define TAB 0x0009
45 #define ESCAPE 0x005C
46 #define HASH 0x0023
47 #define QUOTE 0x0027
48 #define ZERO 0x0030
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
54 struct Lookahead
55 {
56 enum ETokenType type;
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
60 };
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] =
64 {
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
71 "<end of file>", /* End of the file has been reached successfully */
72 "<end of line>"
73 };
75 /* Just to store "TRUE" */
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78 typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
87 UBool makeBinaryCollation;
88 UBool omitCollationRules;
89 } ParseState;
91 typedef struct SResource *
92 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
96 /* The nature of the lookahead buffer:
97 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
98 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
99 When getToken is called, the current pointer is moved to the next slot and the
100 old slot is filled with the next token from the reader by calling getNextToken.
101 The token values are stored in the slot, which means that token values don't
102 survive a call to getToken, ie.
104 UString *value;
106 getToken(&value, NULL, status);
107 getToken(NULL, NULL, status); bad - value is now a different string
108 */
109 static void
110 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
111 {
112 static uint32_t initTypeStrings = 0;
113 uint32_t i;
115 if (!initTypeStrings)
116 {
117 initTypeStrings = 1;
118 }
120 state->lookaheadPosition = 0;
121 state->buffer = buf;
123 resetLineNumber();
125 for (i = 0; i < MAX_LOOKAHEAD; i++)
126 {
127 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
128 if (U_FAILURE(*status))
129 {
130 return;
131 }
132 }
134 *status = U_ZERO_ERROR;
135 }
137 static void
138 cleanupLookahead(ParseState* state)
139 {
140 uint32_t i;
141 for (i = 0; i <= MAX_LOOKAHEAD; i++)
142 {
143 ustr_deinit(&state->lookahead[i].value);
144 ustr_deinit(&state->lookahead[i].comment);
145 }
147 }
149 static enum ETokenType
150 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
151 {
152 enum ETokenType result;
153 uint32_t i;
155 result = state->lookahead[state->lookaheadPosition].type;
157 if (tokenValue != NULL)
158 {
159 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
160 }
162 if (linenumber != NULL)
163 {
164 *linenumber = state->lookahead[state->lookaheadPosition].line;
165 }
167 if (comment != NULL)
168 {
169 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
170 }
172 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
173 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
174 ustr_setlen(&state->lookahead[i].comment, 0, status);
175 ustr_setlen(&state->lookahead[i].value, 0, status);
176 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
178 /* printf("getToken, returning %s\n", tokenNames[result]); */
180 return result;
181 }
183 static enum ETokenType
184 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
185 {
186 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
188 if (U_FAILURE(*status))
189 {
190 return TOK_ERROR;
191 }
193 if (lookaheadCount >= MAX_LOOKAHEAD)
194 {
195 *status = U_INTERNAL_PROGRAM_ERROR;
196 return TOK_ERROR;
197 }
199 if (tokenValue != NULL)
200 {
201 *tokenValue = &state->lookahead[i].value;
202 }
204 if (linenumber != NULL)
205 {
206 *linenumber = state->lookahead[i].line;
207 }
209 if(comment != NULL){
210 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
211 }
213 return state->lookahead[i].type;
214 }
216 static void
217 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
218 {
219 uint32_t line;
221 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
223 if (linenumber != NULL)
224 {
225 *linenumber = line;
226 }
228 if (U_FAILURE(*status))
229 {
230 return;
231 }
233 if (token != expectedToken)
234 {
235 *status = U_INVALID_FORMAT_ERROR;
236 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
237 }
238 else
239 {
240 *status = U_ZERO_ERROR;
241 }
242 }
244 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
245 {
246 struct UString *tokenValue;
247 char *result;
248 uint32_t count;
250 expect(state, TOK_STRING, &tokenValue, comment, line, status);
252 if (U_FAILURE(*status))
253 {
254 return NULL;
255 }
257 count = u_strlen(tokenValue->fChars);
258 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
259 *status = U_INVALID_FORMAT_ERROR;
260 error(*line, "invariant characters required for table keys, binary data, etc.");
261 return NULL;
262 }
264 result = static_cast<char *>(uprv_malloc(count+1));
266 if (result == NULL)
267 {
268 *status = U_MEMORY_ALLOCATION_ERROR;
269 return NULL;
270 }
272 u_UCharsToChars(tokenValue->fChars, result, count+1);
273 return result;
274 }
276 static struct SResource *
277 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
278 {
279 struct SResource *result = NULL;
280 struct UString *tokenValue;
281 FileStream *file = NULL;
282 char filename[256] = { '\0' };
283 char cs[128] = { '\0' };
284 uint32_t line;
285 UBool quoted = FALSE;
286 UCHARBUF *ucbuf=NULL;
287 UChar32 c = 0;
288 const char* cp = NULL;
289 UChar *pTarget = NULL;
290 UChar *target = NULL;
291 UChar *targetLimit = NULL;
292 int32_t size = 0;
294 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
296 if(isVerbose()){
297 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
298 }
300 if (U_FAILURE(*status))
301 {
302 return NULL;
303 }
304 /* make the filename including the directory */
305 if (state->inputdir != NULL)
306 {
307 uprv_strcat(filename, state->inputdir);
309 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
310 {
311 uprv_strcat(filename, U_FILE_SEP_STRING);
312 }
313 }
315 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
319 if (U_FAILURE(*status))
320 {
321 return NULL;
322 }
323 uprv_strcat(filename, cs);
325 if(state->omitCollationRules) {
326 return res_none();
327 }
329 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331 if (U_FAILURE(*status)) {
332 error(line, "An error occured while opening the input file %s\n", filename);
333 return NULL;
334 }
336 /* We allocate more space than actually required
337 * since the actual size needed for storing UChars
338 * is not known in UTF-8 byte stream
339 */
340 size = ucbuf_size(ucbuf) + 1;
341 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
342 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
343 target = pTarget;
344 targetLimit = pTarget+size;
346 /* read the rules into the buffer */
347 while (target < targetLimit)
348 {
349 c = ucbuf_getc(ucbuf, status);
350 if(c == QUOTE) {
351 quoted = (UBool)!quoted;
352 }
353 /* weiv (06/26/2002): adding the following:
354 * - preserving spaces in commands [...]
355 * - # comments until the end of line
356 */
357 if (c == STARTCOMMAND && !quoted)
358 {
359 /* preserve commands
360 * closing bracket will be handled by the
361 * append at the end of the loop
362 */
363 while(c != ENDCOMMAND) {
364 U_APPEND_CHAR32_ONLY(c, target);
365 c = ucbuf_getc(ucbuf, status);
366 }
367 }
368 else if (c == HASH && !quoted) {
369 /* skip comments */
370 while(c != CR && c != LF) {
371 c = ucbuf_getc(ucbuf, status);
372 }
373 continue;
374 }
375 else if (c == ESCAPE)
376 {
377 c = unescape(ucbuf, status);
379 if (c == (UChar32)U_ERR)
380 {
381 uprv_free(pTarget);
382 T_FileStream_close(file);
383 return NULL;
384 }
385 }
386 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
387 {
388 /* ignore spaces carriage returns
389 * and line feed unless in the form \uXXXX
390 */
391 continue;
392 }
394 /* Append UChar * after dissembling if c > 0xffff*/
395 if (c != (UChar32)U_EOF)
396 {
397 U_APPEND_CHAR32_ONLY(c, target);
398 }
399 else
400 {
401 break;
402 }
403 }
405 /* terminate the string */
406 if(target < targetLimit){
407 *target = 0x0000;
408 }
410 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
413 ucbuf_close(ucbuf);
414 uprv_free(pTarget);
415 T_FileStream_close(file);
417 return result;
418 }
420 static struct SResource *
421 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
422 {
423 struct SResource *result = NULL;
424 struct UString *tokenValue;
425 FileStream *file = NULL;
426 char filename[256] = { '\0' };
427 char cs[128] = { '\0' };
428 uint32_t line;
429 UCHARBUF *ucbuf=NULL;
430 const char* cp = NULL;
431 UChar *pTarget = NULL;
432 const UChar *pSource = NULL;
433 int32_t size = 0;
435 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
437 if(isVerbose()){
438 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
439 }
441 if (U_FAILURE(*status))
442 {
443 return NULL;
444 }
445 /* make the filename including the directory */
446 if (state->inputdir != NULL)
447 {
448 uprv_strcat(filename, state->inputdir);
450 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
451 {
452 uprv_strcat(filename, U_FILE_SEP_STRING);
453 }
454 }
456 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
458 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
460 if (U_FAILURE(*status))
461 {
462 return NULL;
463 }
464 uprv_strcat(filename, cs);
467 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
469 if (U_FAILURE(*status)) {
470 error(line, "An error occured while opening the input file %s\n", filename);
471 return NULL;
472 }
474 /* We allocate more space than actually required
475 * since the actual size needed for storing UChars
476 * is not known in UTF-8 byte stream
477 */
478 pSource = ucbuf_getBuffer(ucbuf, &size, status);
479 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
480 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
482 #if !UCONFIG_NO_TRANSLITERATION
483 size = utrans_stripRules(pSource, size, pTarget, status);
484 #else
485 size = 0;
486 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
487 #endif
488 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
490 ucbuf_close(ucbuf);
491 uprv_free(pTarget);
492 T_FileStream_close(file);
494 return result;
495 }
496 static struct SResource* dependencyArray = NULL;
498 static struct SResource *
499 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
500 {
501 struct SResource *result = NULL;
502 struct SResource *elem = NULL;
503 struct UString *tokenValue;
504 uint32_t line;
505 char filename[256] = { '\0' };
506 char cs[128] = { '\0' };
508 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
510 if(isVerbose()){
511 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
512 }
514 if (U_FAILURE(*status))
515 {
516 return NULL;
517 }
518 /* make the filename including the directory */
519 if (state->outputdir != NULL)
520 {
521 uprv_strcat(filename, state->outputdir);
523 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
524 {
525 uprv_strcat(filename, U_FILE_SEP_STRING);
526 }
527 }
529 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
531 if (U_FAILURE(*status))
532 {
533 return NULL;
534 }
535 uprv_strcat(filename, cs);
536 if(!T_FileStream_file_exists(filename)){
537 if(isStrict()){
538 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
539 }else{
540 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
541 }
542 }
543 if(dependencyArray==NULL){
544 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
545 }
546 if(tag!=NULL){
547 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
548 }
549 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
551 array_add(dependencyArray, elem, status);
553 if (U_FAILURE(*status))
554 {
555 return NULL;
556 }
557 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
558 return result;
559 }
560 static struct SResource *
561 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
562 {
563 struct UString *tokenValue;
564 struct SResource *result = NULL;
566 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
567 {
568 return parseUCARules(tag, startline, status);
569 }*/
570 if(isVerbose()){
571 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
572 }
573 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
575 if (U_SUCCESS(*status))
576 {
577 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
578 doesn't survive expect either) */
580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
581 if(U_SUCCESS(*status) && result) {
582 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
584 if (U_FAILURE(*status))
585 {
586 res_close(result);
587 return NULL;
588 }
589 }
590 }
592 return result;
593 }
595 static struct SResource *
596 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
597 {
598 struct UString *tokenValue;
599 struct SResource *result = NULL;
601 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
603 if(isVerbose()){
604 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
605 }
607 if (U_SUCCESS(*status))
608 {
609 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
610 doesn't survive expect either) */
612 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616 if (U_FAILURE(*status))
617 {
618 res_close(result);
619 return NULL;
620 }
621 }
623 return result;
624 }
626 typedef struct{
627 const char* inputDir;
628 const char* outputDir;
629 } GenrbData;
631 static struct SResource* resLookup(struct SResource* res, const char* key){
632 struct SResource *current = NULL;
633 struct SResTable *list;
634 if (res == res_none()) {
635 return NULL;
636 }
638 list = &(res->u.fTable);
640 current = list->fFirst;
641 while (current != NULL) {
642 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
643 return current;
644 }
645 current = current->fNext;
646 }
647 return NULL;
648 }
650 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
651 struct SRBRoot *data = NULL;
652 UCHARBUF *ucbuf = NULL;
653 GenrbData* genrbdata = (GenrbData*) context;
654 int localeLength = strlen(locale);
655 char* filename = (char*)uprv_malloc(localeLength+5);
656 char *inputDirBuf = NULL;
657 char *openFileName = NULL;
658 const char* cp = "";
659 UChar* urules = NULL;
660 int32_t urulesLength = 0;
661 int32_t i = 0;
662 int32_t dirlen = 0;
663 int32_t filelen = 0;
664 struct SResource* root;
665 struct SResource* collations;
666 struct SResource* collation;
667 struct SResource* sequence;
669 memcpy(filename, locale, localeLength);
670 for(i = 0; i < localeLength; i++){
671 if(filename[i] == '-'){
672 filename[i] = '_';
673 }
674 }
675 filename[localeLength] = '.';
676 filename[localeLength+1] = 't';
677 filename[localeLength+2] = 'x';
678 filename[localeLength+3] = 't';
679 filename[localeLength+4] = 0;
682 if (status==NULL || U_FAILURE(*status)) {
683 return NULL;
684 }
685 if(filename==NULL){
686 *status=U_ILLEGAL_ARGUMENT_ERROR;
687 return NULL;
688 }else{
689 filelen = (int32_t)uprv_strlen(filename);
690 }
691 if(genrbdata->inputDir == NULL) {
692 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
693 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
694 openFileName[0] = '\0';
695 if (filenameBegin != NULL) {
696 /*
697 * When a filename ../../../data/root.txt is specified,
698 * we presume that the input directory is ../../../data
699 * This is very important when the resource file includes
700 * another file, like UCARules.txt or thaidict.brk.
701 */
702 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
703 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
705 /* test for NULL */
706 if(inputDirBuf == NULL) {
707 *status = U_MEMORY_ALLOCATION_ERROR;
708 goto finish;
709 }
711 inputDirBuf[filenameSize - 1] = 0;
712 genrbdata->inputDir = inputDirBuf;
713 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
714 }
715 }else{
716 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
718 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
719 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721 /* test for NULL */
722 if(openFileName == NULL) {
723 *status = U_MEMORY_ALLOCATION_ERROR;
724 goto finish;
725 }
727 openFileName[0] = '\0';
728 /*
729 * append the input dir to openFileName if the first char in
730 * filename is not file seperation char and the last char input directory is not '.'.
731 * This is to support :
732 * genrb -s. /home/icu/data
733 * genrb -s. icu/data
734 * The user cannot mix notations like
735 * genrb -s. /icu/data --- the absolute path specified. -s redundant
736 * user should use
737 * genrb -s. icu/data --- start from CWD and look in icu/data dir
738 */
739 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
740 uprv_strcpy(openFileName, genrbdata->inputDir);
741 openFileName[dirlen] = U_FILE_SEP_CHAR;
742 }
743 openFileName[dirlen + 1] = '\0';
744 } else {
745 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747 /* test for NULL */
748 if(openFileName == NULL) {
749 *status = U_MEMORY_ALLOCATION_ERROR;
750 goto finish;
751 }
753 uprv_strcpy(openFileName, genrbdata->inputDir);
755 }
756 }
757 uprv_strcat(openFileName, filename);
758 /* printf("%s\n", openFileName); */
759 *status = U_ZERO_ERROR;
760 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
762 if(*status == U_FILE_ACCESS_ERROR) {
764 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
765 goto finish;
766 }
767 if (ucbuf == NULL || U_FAILURE(*status)) {
768 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
769 goto finish;
770 }
772 /* Parse the data into an SRBRoot */
773 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status);
775 root = data->fRoot;
776 collations = resLookup(root, "collations");
777 if (collations != NULL) {
778 collation = resLookup(collations, type);
779 if (collation != NULL) {
780 sequence = resLookup(collation, "Sequence");
781 if (sequence != NULL) {
782 urules = sequence->u.fString.fChars;
783 urulesLength = sequence->u.fString.fLength;
784 *pLength = urulesLength;
785 }
786 }
787 }
789 finish:
790 if (inputDirBuf != NULL) {
791 uprv_free(inputDirBuf);
792 }
794 if (openFileName != NULL) {
795 uprv_free(openFileName);
796 }
798 if(ucbuf) {
799 ucbuf_close(ucbuf);
800 }
802 return urules;
803 }
805 // Quick-and-dirty escaping function.
806 // Assumes that we are on an ASCII-based platform.
807 static void
808 escape(const UChar *s, char *buffer) {
809 int32_t length = u_strlen(s);
810 int32_t i = 0;
811 for (;;) {
812 UChar32 c;
813 U16_NEXT(s, i, length, c);
814 if (c == 0) {
815 *buffer = 0;
816 return;
817 } else if (0x20 <= c && c <= 0x7e) {
818 // printable ASCII
819 *buffer++ = (char)c; // assumes ASCII-based platform
820 } else {
821 buffer += sprintf(buffer, "\\u%04X", (int)c);
822 }
823 }
824 }
826 static struct SResource *
827 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
828 {
829 struct SResource *member = NULL;
830 struct UString *tokenValue;
831 struct UString comment;
832 enum ETokenType token;
833 char subtag[1024];
834 UVersionInfo version;
835 uint32_t line;
836 GenrbData genrbdata;
837 /* '{' . (name resource)* '}' */
838 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840 for (;;)
841 {
842 ustr_init(&comment);
843 token = getToken(state, &tokenValue, &comment, &line, status);
845 if (token == TOK_CLOSE_BRACE)
846 {
847 return result;
848 }
850 if (token != TOK_STRING)
851 {
852 res_close(result);
853 *status = U_INVALID_FORMAT_ERROR;
855 if (token == TOK_EOF)
856 {
857 error(startline, "unterminated table");
858 }
859 else
860 {
861 error(line, "Unexpected token %s", tokenNames[token]);
862 }
864 return NULL;
865 }
867 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
869 if (U_FAILURE(*status))
870 {
871 res_close(result);
872 return NULL;
873 }
875 member = parseResource(state, subtag, NULL, status);
877 if (U_FAILURE(*status))
878 {
879 res_close(result);
880 return NULL;
881 }
883 if (uprv_strcmp(subtag, "Version") == 0)
884 {
885 char ver[40];
886 int32_t length = member->u.fString.fLength;
888 if (length >= (int32_t) sizeof(ver))
889 {
890 length = (int32_t) sizeof(ver) - 1;
891 }
893 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
894 u_versionFromString(version, ver);
896 table_add(result, member, line, status);
898 }
899 else if (uprv_strcmp(subtag, "Override") == 0)
900 {
901 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
902 table_add(result, member, line, status);
904 }
905 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
906 {
907 /* discard duplicate %%CollationBin if any*/
908 }
909 else if (uprv_strcmp(subtag, "Sequence") == 0)
910 {
911 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
912 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
913 #else
914 if(state->makeBinaryCollation) {
916 /* do the collation elements */
917 int32_t len = 0;
918 uint8_t *data = NULL;
919 UCollator *coll = NULL;
920 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
921 int32_t reorderCodeCount;
922 int32_t reorderCodeIndex;
923 UParseError parseError;
925 genrbdata.inputDir = state->inputdir;
926 genrbdata.outputDir = state->outputdir;
928 UErrorCode intStatus = U_ZERO_ERROR;
929 uprv_memset(&parseError, 0, sizeof(parseError));
930 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
931 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
933 if (U_SUCCESS(intStatus) && coll != NULL)
934 {
935 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
936 data = (uint8_t *)uprv_malloc(len);
937 intStatus = U_ZERO_ERROR;
938 len = ucol_cloneBinary(coll, data, len, &intStatus);
940 /* tailoring rules version */
941 /* This is wrong! */
942 /*coll->dataInfo.dataVersion[1] = version[0];*/
943 /* Copy tailoring version. Builder version already */
944 /* set in ucol_openRules */
945 ((UCATableHeader *)data)->version[1] = version[0];
946 ((UCATableHeader *)data)->version[2] = version[1];
947 ((UCATableHeader *)data)->version[3] = version[2];
949 if (U_SUCCESS(intStatus) && data != NULL)
950 {
951 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
952 table_add(result, collationBin, line, status);
953 uprv_free(data);
955 reorderCodeCount = ucol_getReorderCodes(
956 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
957 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
958 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
959 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
960 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
961 }
962 table_add(result, reorderCodeRes, line, status);
963 }
964 }
965 else
966 {
967 warning(line, "could not obtain rules from collator");
968 if(isStrict()){
969 *status = U_INVALID_FORMAT_ERROR;
970 return NULL;
971 }
972 }
974 ucol_close(coll);
975 }
976 else
977 {
978 if(intStatus == U_FILE_ACCESS_ERROR) {
979 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
980 *status = intStatus;
981 return NULL;
982 }
983 char preBuffer[100], postBuffer[100];
984 escape(parseError.preContext, preBuffer);
985 escape(parseError.postContext, postBuffer);
986 warning(line,
987 "%%%%CollationBin could not be constructed from CollationElements\n"
988 " check context, check that the FractionalUCA.txt UCA version "
989 "matches the current UCD version\n"
990 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
991 u_errorName(intStatus),
992 parseError.line,
993 parseError.offset,
994 preBuffer,
995 postBuffer);
996 if(isStrict()){
997 *status = intStatus;
998 return NULL;
999 }
1000 }
1001 } else {
1002 if(isVerbose()) {
1003 printf("Not building Collation binary\n");
1004 }
1005 }
1006 #endif
1007 /* in order to achieve smaller data files, we can direct genrb */
1008 /* to omit collation rules */
1009 if(state->omitCollationRules) {
1010 bundle_closeString(state->bundle, member);
1011 } else {
1012 table_add(result, member, line, status);
1013 }
1014 }
1015 if (U_FAILURE(*status))
1016 {
1017 res_close(result);
1018 return NULL;
1019 }
1020 }
1022 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1023 *status = U_INTERNAL_PROGRAM_ERROR;
1024 return NULL;
1025 }
1027 static struct SResource *
1028 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1029 {
1030 struct SResource *result = NULL;
1031 struct SResource *member = NULL;
1032 struct SResource *collationRes = NULL;
1033 struct UString *tokenValue;
1034 struct UString comment;
1035 enum ETokenType token;
1036 char subtag[1024], typeKeyword[1024];
1037 uint32_t line;
1039 result = table_open(state->bundle, tag, NULL, status);
1041 if (result == NULL || U_FAILURE(*status))
1042 {
1043 return NULL;
1044 }
1045 if(isVerbose()){
1046 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1047 }
1048 if(!newCollation) {
1049 return addCollation(state, result, startline, status);
1050 }
1051 else {
1052 for(;;) {
1053 ustr_init(&comment);
1054 token = getToken(state, &tokenValue, &comment, &line, status);
1056 if (token == TOK_CLOSE_BRACE)
1057 {
1058 return result;
1059 }
1061 if (token != TOK_STRING)
1062 {
1063 res_close(result);
1064 *status = U_INVALID_FORMAT_ERROR;
1066 if (token == TOK_EOF)
1067 {
1068 error(startline, "unterminated table");
1069 }
1070 else
1071 {
1072 error(line, "Unexpected token %s", tokenNames[token]);
1073 }
1075 return NULL;
1076 }
1078 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1080 if (U_FAILURE(*status))
1081 {
1082 res_close(result);
1083 return NULL;
1084 }
1086 if (uprv_strcmp(subtag, "default") == 0)
1087 {
1088 member = parseResource(state, subtag, NULL, status);
1090 if (U_FAILURE(*status))
1091 {
1092 res_close(result);
1093 return NULL;
1094 }
1096 table_add(result, member, line, status);
1097 }
1098 else
1099 {
1100 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1101 /* this probably needs to be refactored or recursively use the parser */
1102 /* first we assume that our collation table won't have the explicit type */
1103 /* then, we cannot handle aliases */
1104 if(token == TOK_OPEN_BRACE) {
1105 token = getToken(state, &tokenValue, &comment, &line, status);
1106 collationRes = table_open(state->bundle, subtag, NULL, status);
1107 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1108 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1109 table_add(result, collationRes, startline, status);
1110 }
1111 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1112 /* we could have a table too */
1113 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1114 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1115 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1116 member = parseResource(state, subtag, NULL, status);
1117 if (U_FAILURE(*status))
1118 {
1119 res_close(result);
1120 return NULL;
1121 }
1123 table_add(result, member, line, status);
1124 } else {
1125 res_close(result);
1126 *status = U_INVALID_FORMAT_ERROR;
1127 return NULL;
1128 }
1129 } else {
1130 res_close(result);
1131 *status = U_INVALID_FORMAT_ERROR;
1132 return NULL;
1133 }
1134 }
1136 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1138 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1140 if (U_FAILURE(*status))
1141 {
1142 res_close(result);
1143 return NULL;
1144 }
1145 }
1146 }
1147 }
1149 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1150 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1151 static struct SResource *
1152 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1153 {
1154 struct SResource *member = NULL;
1155 struct UString *tokenValue=NULL;
1156 struct UString comment;
1157 enum ETokenType token;
1158 char subtag[1024];
1159 uint32_t line;
1160 UBool readToken = FALSE;
1162 /* '{' . (name resource)* '}' */
1164 if(isVerbose()){
1165 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1166 }
1167 for (;;)
1168 {
1169 ustr_init(&comment);
1170 token = getToken(state, &tokenValue, &comment, &line, status);
1172 if (token == TOK_CLOSE_BRACE)
1173 {
1174 if (!readToken) {
1175 warning(startline, "Encountered empty table");
1176 }
1177 return table;
1178 }
1180 if (token != TOK_STRING)
1181 {
1182 *status = U_INVALID_FORMAT_ERROR;
1184 if (token == TOK_EOF)
1185 {
1186 error(startline, "unterminated table");
1187 }
1188 else
1189 {
1190 error(line, "unexpected token %s", tokenNames[token]);
1191 }
1193 return NULL;
1194 }
1196 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1197 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1198 } else {
1199 *status = U_INVALID_FORMAT_ERROR;
1200 error(line, "invariant characters required for table keys");
1201 return NULL;
1202 }
1204 if (U_FAILURE(*status))
1205 {
1206 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1207 return NULL;
1208 }
1210 member = parseResource(state, subtag, &comment, status);
1212 if (member == NULL || U_FAILURE(*status))
1213 {
1214 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1215 return NULL;
1216 }
1218 table_add(table, member, line, status);
1220 if (U_FAILURE(*status))
1221 {
1222 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1223 return NULL;
1224 }
1225 readToken = TRUE;
1226 ustr_deinit(&comment);
1227 }
1229 /* not reached */
1230 /* A compiler warning will appear if all paths don't contain a return statement. */
1231 /* *status = U_INTERNAL_PROGRAM_ERROR;
1232 return NULL;*/
1233 }
1235 static struct SResource *
1236 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1237 {
1238 struct SResource *result;
1240 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1241 {
1242 return parseCollationElements(state, tag, startline, FALSE, status);
1243 }
1244 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1245 {
1246 return parseCollationElements(state, tag, startline, TRUE, status);
1247 }
1248 if(isVerbose()){
1249 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1250 }
1252 result = table_open(state->bundle, tag, comment, status);
1254 if (result == NULL || U_FAILURE(*status))
1255 {
1256 return NULL;
1257 }
1258 return realParseTable(state, result, tag, startline, status);
1259 }
1261 static struct SResource *
1262 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1263 {
1264 struct SResource *result = NULL;
1265 struct SResource *member = NULL;
1266 struct UString *tokenValue;
1267 struct UString memberComments;
1268 enum ETokenType token;
1269 UBool readToken = FALSE;
1271 result = array_open(state->bundle, tag, comment, status);
1273 if (result == NULL || U_FAILURE(*status))
1274 {
1275 return NULL;
1276 }
1277 if(isVerbose()){
1278 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1279 }
1281 ustr_init(&memberComments);
1283 /* '{' . resource [','] '}' */
1284 for (;;)
1285 {
1286 /* reset length */
1287 ustr_setlen(&memberComments, 0, status);
1289 /* check for end of array, but don't consume next token unless it really is the end */
1290 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1293 if (token == TOK_CLOSE_BRACE)
1294 {
1295 getToken(state, NULL, NULL, NULL, status);
1296 if (!readToken) {
1297 warning(startline, "Encountered empty array");
1298 }
1299 break;
1300 }
1302 if (token == TOK_EOF)
1303 {
1304 res_close(result);
1305 *status = U_INVALID_FORMAT_ERROR;
1306 error(startline, "unterminated array");
1307 return NULL;
1308 }
1310 /* string arrays are a special case */
1311 if (token == TOK_STRING)
1312 {
1313 getToken(state, &tokenValue, &memberComments, NULL, status);
1314 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1315 }
1316 else
1317 {
1318 member = parseResource(state, NULL, &memberComments, status);
1319 }
1321 if (member == NULL || U_FAILURE(*status))
1322 {
1323 res_close(result);
1324 return NULL;
1325 }
1327 array_add(result, member, status);
1329 if (U_FAILURE(*status))
1330 {
1331 res_close(result);
1332 return NULL;
1333 }
1335 /* eat optional comma if present */
1336 token = peekToken(state, 0, NULL, NULL, NULL, status);
1338 if (token == TOK_COMMA)
1339 {
1340 getToken(state, NULL, NULL, NULL, status);
1341 }
1343 if (U_FAILURE(*status))
1344 {
1345 res_close(result);
1346 return NULL;
1347 }
1348 readToken = TRUE;
1349 }
1351 ustr_deinit(&memberComments);
1352 return result;
1353 }
1355 static struct SResource *
1356 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1357 {
1358 struct SResource *result = NULL;
1359 enum ETokenType token;
1360 char *string;
1361 int32_t value;
1362 UBool readToken = FALSE;
1363 char *stopstring;
1364 uint32_t len;
1365 struct UString memberComments;
1367 result = intvector_open(state->bundle, tag, comment, status);
1369 if (result == NULL || U_FAILURE(*status))
1370 {
1371 return NULL;
1372 }
1374 if(isVerbose()){
1375 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1376 }
1377 ustr_init(&memberComments);
1378 /* '{' . string [','] '}' */
1379 for (;;)
1380 {
1381 ustr_setlen(&memberComments, 0, status);
1383 /* check for end of array, but don't consume next token unless it really is the end */
1384 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1386 if (token == TOK_CLOSE_BRACE)
1387 {
1388 /* it's the end, consume the close brace */
1389 getToken(state, NULL, NULL, NULL, status);
1390 if (!readToken) {
1391 warning(startline, "Encountered empty int vector");
1392 }
1393 ustr_deinit(&memberComments);
1394 return result;
1395 }
1397 string = getInvariantString(state, NULL, NULL, status);
1399 if (U_FAILURE(*status))
1400 {
1401 res_close(result);
1402 return NULL;
1403 }
1405 /* For handling illegal char in the Intvector */
1406 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1407 len=(uint32_t)(stopstring-string);
1409 if(len==uprv_strlen(string))
1410 {
1411 intvector_add(result, value, status);
1412 uprv_free(string);
1413 token = peekToken(state, 0, NULL, NULL, NULL, status);
1414 }
1415 else
1416 {
1417 uprv_free(string);
1418 *status=U_INVALID_CHAR_FOUND;
1419 }
1421 if (U_FAILURE(*status))
1422 {
1423 res_close(result);
1424 return NULL;
1425 }
1427 /* the comma is optional (even though it is required to prevent the reader from concatenating
1428 consecutive entries) so that a missing comma on the last entry isn't an error */
1429 if (token == TOK_COMMA)
1430 {
1431 getToken(state, NULL, NULL, NULL, status);
1432 }
1433 readToken = TRUE;
1434 }
1436 /* not reached */
1437 /* A compiler warning will appear if all paths don't contain a return statement. */
1438 /* intvector_close(result, status);
1439 *status = U_INTERNAL_PROGRAM_ERROR;
1440 return NULL;*/
1441 }
1443 static struct SResource *
1444 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1445 {
1446 struct SResource *result = NULL;
1447 uint8_t *value;
1448 char *string;
1449 char toConv[3] = {'\0', '\0', '\0'};
1450 uint32_t count;
1451 uint32_t i;
1452 uint32_t line;
1453 char *stopstring;
1454 uint32_t len;
1456 string = getInvariantString(state, &line, NULL, status);
1458 if (string == NULL || U_FAILURE(*status))
1459 {
1460 return NULL;
1461 }
1463 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1465 if (U_FAILURE(*status))
1466 {
1467 uprv_free(string);
1468 return NULL;
1469 }
1471 if(isVerbose()){
1472 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1473 }
1475 count = (uint32_t)uprv_strlen(string);
1476 if (count > 0){
1477 if((count % 2)==0){
1478 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1480 if (value == NULL)
1481 {
1482 uprv_free(string);
1483 *status = U_MEMORY_ALLOCATION_ERROR;
1484 return NULL;
1485 }
1487 for (i = 0; i < count; i += 2)
1488 {
1489 toConv[0] = string[i];
1490 toConv[1] = string[i + 1];
1492 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1493 len=(uint32_t)(stopstring-toConv);
1495 if(len!=uprv_strlen(toConv))
1496 {
1497 uprv_free(string);
1498 *status=U_INVALID_CHAR_FOUND;
1499 return NULL;
1500 }
1501 }
1503 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1505 uprv_free(value);
1506 }
1507 else
1508 {
1509 *status = U_INVALID_CHAR_FOUND;
1510 uprv_free(string);
1511 error(line, "Encountered invalid binary string");
1512 return NULL;
1513 }
1514 }
1515 else
1516 {
1517 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1518 warning(startline, "Encountered empty binary tag");
1519 }
1520 uprv_free(string);
1522 return result;
1523 }
1525 static struct SResource *
1526 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1527 {
1528 struct SResource *result = NULL;
1529 int32_t value;
1530 char *string;
1531 char *stopstring;
1532 uint32_t len;
1534 string = getInvariantString(state, NULL, NULL, status);
1536 if (string == NULL || U_FAILURE(*status))
1537 {
1538 return NULL;
1539 }
1541 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1543 if (U_FAILURE(*status))
1544 {
1545 uprv_free(string);
1546 return NULL;
1547 }
1549 if(isVerbose()){
1550 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1551 }
1553 if (uprv_strlen(string) <= 0)
1554 {
1555 warning(startline, "Encountered empty integer. Default value is 0.");
1556 }
1558 /* Allow integer support for hexdecimal, octal digit and decimal*/
1559 /* and handle illegal char in the integer*/
1560 value = uprv_strtoul(string, &stopstring, 0);
1561 len=(uint32_t)(stopstring-string);
1562 if(len==uprv_strlen(string))
1563 {
1564 result = int_open(state->bundle, tag, value, comment, status);
1565 }
1566 else
1567 {
1568 *status=U_INVALID_CHAR_FOUND;
1569 }
1570 uprv_free(string);
1572 return result;
1573 }
1575 static struct SResource *
1576 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1577 {
1578 struct SResource *result;
1579 FileStream *file;
1580 int32_t len;
1581 uint8_t *data;
1582 char *filename;
1583 uint32_t line;
1584 char *fullname = NULL;
1585 filename = getInvariantString(state, &line, NULL, status);
1587 if (U_FAILURE(*status))
1588 {
1589 return NULL;
1590 }
1592 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1594 if (U_FAILURE(*status))
1595 {
1596 uprv_free(filename);
1597 return NULL;
1598 }
1600 if(isVerbose()){
1601 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1602 }
1604 /* Open the input file for reading */
1605 if (state->inputdir == NULL)
1606 {
1607 #if 1
1608 /*
1609 * Always save file file name, even if there's
1610 * no input directory specified. MIGHT BREAK SOMETHING
1611 */
1612 int32_t filenameLength = uprv_strlen(filename);
1614 fullname = (char *) uprv_malloc(filenameLength + 1);
1615 uprv_strcpy(fullname, filename);
1616 #endif
1618 file = T_FileStream_open(filename, "rb");
1619 }
1620 else
1621 {
1623 int32_t count = (int32_t)uprv_strlen(filename);
1625 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1626 {
1627 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1629 /* test for NULL */
1630 if(fullname == NULL)
1631 {
1632 *status = U_MEMORY_ALLOCATION_ERROR;
1633 return NULL;
1634 }
1636 uprv_strcpy(fullname, state->inputdir);
1638 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1639 fullname[state->inputdirLength + 1] = '\0';
1641 uprv_strcat(fullname, filename);
1642 }
1643 else
1644 {
1645 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1647 /* test for NULL */
1648 if(fullname == NULL)
1649 {
1650 *status = U_MEMORY_ALLOCATION_ERROR;
1651 return NULL;
1652 }
1654 uprv_strcpy(fullname, state->inputdir);
1655 uprv_strcat(fullname, filename);
1656 }
1658 file = T_FileStream_open(fullname, "rb");
1660 }
1662 if (file == NULL)
1663 {
1664 error(line, "couldn't open input file %s", filename);
1665 *status = U_FILE_ACCESS_ERROR;
1666 return NULL;
1667 }
1669 len = T_FileStream_size(file);
1670 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1671 /* test for NULL */
1672 if(data == NULL)
1673 {
1674 *status = U_MEMORY_ALLOCATION_ERROR;
1675 T_FileStream_close (file);
1676 return NULL;
1677 }
1679 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1680 T_FileStream_close (file);
1682 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1684 uprv_free(data);
1685 uprv_free(filename);
1686 uprv_free(fullname);
1688 return result;
1689 }
1691 static struct SResource *
1692 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1693 {
1694 struct SResource *result;
1695 int32_t len=0;
1696 char *filename;
1697 uint32_t line;
1698 UChar *pTarget = NULL;
1700 UCHARBUF *ucbuf;
1701 char *fullname = NULL;
1702 int32_t count = 0;
1703 const char* cp = NULL;
1704 const UChar* uBuffer = NULL;
1706 filename = getInvariantString(state, &line, NULL, status);
1707 count = (int32_t)uprv_strlen(filename);
1709 if (U_FAILURE(*status))
1710 {
1711 return NULL;
1712 }
1714 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1716 if (U_FAILURE(*status))
1717 {
1718 uprv_free(filename);
1719 return NULL;
1720 }
1722 if(isVerbose()){
1723 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1724 }
1726 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1727 /* test for NULL */
1728 if(fullname == NULL)
1729 {
1730 *status = U_MEMORY_ALLOCATION_ERROR;
1731 uprv_free(filename);
1732 return NULL;
1733 }
1735 if(state->inputdir!=NULL){
1736 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1737 {
1739 uprv_strcpy(fullname, state->inputdir);
1741 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1742 fullname[state->inputdirLength + 1] = '\0';
1744 uprv_strcat(fullname, filename);
1745 }
1746 else
1747 {
1748 uprv_strcpy(fullname, state->inputdir);
1749 uprv_strcat(fullname, filename);
1750 }
1751 }else{
1752 uprv_strcpy(fullname,filename);
1753 }
1755 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1757 if (U_FAILURE(*status)) {
1758 error(line, "couldn't open input file %s\n", filename);
1759 return NULL;
1760 }
1762 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1763 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1765 ucbuf_close(ucbuf);
1767 uprv_free(pTarget);
1769 uprv_free(filename);
1770 uprv_free(fullname);
1772 return result;
1773 }
1779 U_STRING_DECL(k_type_string, "string", 6);
1780 U_STRING_DECL(k_type_binary, "binary", 6);
1781 U_STRING_DECL(k_type_bin, "bin", 3);
1782 U_STRING_DECL(k_type_table, "table", 5);
1783 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1784 U_STRING_DECL(k_type_int, "int", 3);
1785 U_STRING_DECL(k_type_integer, "integer", 7);
1786 U_STRING_DECL(k_type_array, "array", 5);
1787 U_STRING_DECL(k_type_alias, "alias", 5);
1788 U_STRING_DECL(k_type_intvector, "intvector", 9);
1789 U_STRING_DECL(k_type_import, "import", 6);
1790 U_STRING_DECL(k_type_include, "include", 7);
1792 /* Various non-standard processing plugins that create one or more special resources. */
1793 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1794 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1795 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1796 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1798 typedef enum EResourceType
1799 {
1800 RT_UNKNOWN,
1801 RT_STRING,
1802 RT_BINARY,
1803 RT_TABLE,
1804 RT_TABLE_NO_FALLBACK,
1805 RT_INTEGER,
1806 RT_ARRAY,
1807 RT_ALIAS,
1808 RT_INTVECTOR,
1809 RT_IMPORT,
1810 RT_INCLUDE,
1811 RT_PROCESS_UCA_RULES,
1812 RT_PROCESS_COLLATION,
1813 RT_PROCESS_TRANSLITERATOR,
1814 RT_PROCESS_DEPENDENCY,
1815 RT_RESERVED
1816 } EResourceType;
1818 static struct {
1819 const char *nameChars; /* only used for debugging */
1820 const UChar *nameUChars;
1821 ParseResourceFunction *parseFunction;
1822 } gResourceTypes[] = {
1823 {"Unknown", NULL, NULL},
1824 {"string", k_type_string, parseString},
1825 {"binary", k_type_binary, parseBinary},
1826 {"table", k_type_table, parseTable},
1827 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1828 {"integer", k_type_integer, parseInteger},
1829 {"array", k_type_array, parseArray},
1830 {"alias", k_type_alias, parseAlias},
1831 {"intvector", k_type_intvector, parseIntVector},
1832 {"import", k_type_import, parseImport},
1833 {"include", k_type_include, parseInclude},
1834 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1835 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1836 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1837 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1838 {"reserved", NULL, NULL}
1839 };
1841 void initParser()
1842 {
1843 U_STRING_INIT(k_type_string, "string", 6);
1844 U_STRING_INIT(k_type_binary, "binary", 6);
1845 U_STRING_INIT(k_type_bin, "bin", 3);
1846 U_STRING_INIT(k_type_table, "table", 5);
1847 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1848 U_STRING_INIT(k_type_int, "int", 3);
1849 U_STRING_INIT(k_type_integer, "integer", 7);
1850 U_STRING_INIT(k_type_array, "array", 5);
1851 U_STRING_INIT(k_type_alias, "alias", 5);
1852 U_STRING_INIT(k_type_intvector, "intvector", 9);
1853 U_STRING_INIT(k_type_import, "import", 6);
1854 U_STRING_INIT(k_type_include, "include", 7);
1856 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1857 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1858 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1859 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1860 }
1862 static inline UBool isTable(enum EResourceType type) {
1863 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1864 }
1866 static enum EResourceType
1867 parseResourceType(ParseState* state, UErrorCode *status)
1868 {
1869 struct UString *tokenValue;
1870 struct UString comment;
1871 enum EResourceType result = RT_UNKNOWN;
1872 uint32_t line=0;
1873 ustr_init(&comment);
1874 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1876 if (U_FAILURE(*status))
1877 {
1878 return RT_UNKNOWN;
1879 }
1881 *status = U_ZERO_ERROR;
1883 /* Search for normal types */
1884 result=RT_UNKNOWN;
1885 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1886 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1887 break;
1888 }
1889 }
1890 /* Now search for the aliases */
1891 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1892 result = RT_INTEGER;
1893 }
1894 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1895 result = RT_BINARY;
1896 }
1897 else if (result == RT_RESERVED) {
1898 char tokenBuffer[1024];
1899 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1900 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1901 *status = U_INVALID_FORMAT_ERROR;
1902 error(line, "unknown resource type '%s'", tokenBuffer);
1903 }
1905 return result;
1906 }
1908 /* parse a non-top-level resource */
1909 static struct SResource *
1910 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1911 {
1912 enum ETokenType token;
1913 enum EResourceType resType = RT_UNKNOWN;
1914 ParseResourceFunction *parseFunction = NULL;
1915 struct UString *tokenValue;
1916 uint32_t startline;
1917 uint32_t line;
1920 token = getToken(state, &tokenValue, NULL, &startline, status);
1922 if(isVerbose()){
1923 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1924 }
1926 /* name . [ ':' type ] '{' resource '}' */
1927 /* This function parses from the colon onwards. If the colon is present, parse the
1928 type then try to parse a resource of that type. If there is no explicit type,
1929 work it out using the lookahead tokens. */
1930 switch (token)
1931 {
1932 case TOK_EOF:
1933 *status = U_INVALID_FORMAT_ERROR;
1934 error(startline, "Unexpected EOF encountered");
1935 return NULL;
1937 case TOK_ERROR:
1938 *status = U_INVALID_FORMAT_ERROR;
1939 return NULL;
1941 case TOK_COLON:
1942 resType = parseResourceType(state, status);
1943 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1945 if (U_FAILURE(*status))
1946 {
1947 return NULL;
1948 }
1950 break;
1952 case TOK_OPEN_BRACE:
1953 break;
1955 default:
1956 *status = U_INVALID_FORMAT_ERROR;
1957 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1958 return NULL;
1959 }
1962 if (resType == RT_UNKNOWN)
1963 {
1964 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1965 We could have any of the following:
1966 { { => array (nested)
1967 { :/} => array
1968 { string , => string array
1970 { string { => table
1972 { string :/{ => table
1973 { string } => string
1974 */
1976 token = peekToken(state, 0, NULL, &line, NULL,status);
1978 if (U_FAILURE(*status))
1979 {
1980 return NULL;
1981 }
1983 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1984 {
1985 resType = RT_ARRAY;
1986 }
1987 else if (token == TOK_STRING)
1988 {
1989 token = peekToken(state, 1, NULL, &line, NULL, status);
1991 if (U_FAILURE(*status))
1992 {
1993 return NULL;
1994 }
1996 switch (token)
1997 {
1998 case TOK_COMMA: resType = RT_ARRAY; break;
1999 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2000 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
2001 case TOK_COLON: resType = RT_TABLE; break;
2002 default:
2003 *status = U_INVALID_FORMAT_ERROR;
2004 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2005 return NULL;
2006 }
2007 }
2008 else
2009 {
2010 *status = U_INVALID_FORMAT_ERROR;
2011 error(line, "Unexpected token after '{'");
2012 return NULL;
2013 }
2015 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2016 } else if(resType == RT_TABLE_NO_FALLBACK) {
2017 *status = U_INVALID_FORMAT_ERROR;
2018 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2019 return NULL;
2020 }
2023 /* We should now know what we need to parse next, so call the appropriate parser
2024 function and return. */
2025 parseFunction = gResourceTypes[resType].parseFunction;
2026 if (parseFunction != NULL) {
2027 return parseFunction(state, tag, startline, comment, status);
2028 }
2029 else {
2030 *status = U_INTERNAL_PROGRAM_ERROR;
2031 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2032 }
2034 return NULL;
2035 }
2037 /* parse the top-level resource */
2038 struct SRBRoot *
2039 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir,
2040 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2041 {
2042 struct UString *tokenValue;
2043 struct UString comment;
2044 uint32_t line;
2045 enum EResourceType bundleType;
2046 enum ETokenType token;
2047 ParseState state;
2048 uint32_t i;
2051 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2052 {
2053 ustr_init(&state.lookahead[i].value);
2054 ustr_init(&state.lookahead[i].comment);
2055 }
2057 initLookahead(&state, buf, status);
2059 state.inputdir = inputDir;
2060 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2061 state.outputdir = outputDir;
2062 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2063 state.makeBinaryCollation = makeBinaryCollation;
2064 state.omitCollationRules = omitCollationRules;
2066 ustr_init(&comment);
2067 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2069 state.bundle = bundle_open(&comment, FALSE, status);
2071 if (state.bundle == NULL || U_FAILURE(*status))
2072 {
2073 return NULL;
2074 }
2077 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2079 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2080 token = getToken(&state, NULL, NULL, &line, status);
2081 if(token==TOK_COLON) {
2082 *status=U_ZERO_ERROR;
2083 bundleType=parseResourceType(&state, status);
2085 if(isTable(bundleType))
2086 {
2087 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2088 }
2089 else
2090 {
2091 *status=U_PARSE_ERROR;
2092 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2093 }
2094 }
2095 else
2096 {
2097 /* not a colon */
2098 if(token==TOK_OPEN_BRACE)
2099 {
2100 *status=U_ZERO_ERROR;
2101 bundleType=RT_TABLE;
2102 }
2103 else
2104 {
2105 /* neither colon nor open brace */
2106 *status=U_PARSE_ERROR;
2107 bundleType=RT_UNKNOWN;
2108 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2109 }
2110 }
2112 if (U_FAILURE(*status))
2113 {
2114 bundle_close(state.bundle, status);
2115 return NULL;
2116 }
2118 if(bundleType==RT_TABLE_NO_FALLBACK) {
2119 /*
2120 * Parse a top-level table with the table(nofallback) declaration.
2121 * This is the same as a regular table, but also sets the
2122 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2123 */
2124 state.bundle->noFallback=TRUE;
2125 }
2126 /* top-level tables need not handle special table names like "collations" */
2127 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2128 if(dependencyArray!=NULL){
2129 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2130 dependencyArray = NULL;
2131 }
2132 if (U_FAILURE(*status))
2133 {
2134 bundle_close(state.bundle, status);
2135 res_close(dependencyArray);
2136 return NULL;
2137 }
2139 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2140 {
2141 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2142 if(isStrict()){
2143 *status = U_INVALID_FORMAT_ERROR;
2144 return NULL;
2145 }
2146 }
2148 cleanupLookahead(&state);
2149 ustr_deinit(&comment);
2150 return state.bundle;
2151 }