intl/icu/source/tools/genrb/parse.cpp

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:ac2c7f60fdc3
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
37
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3
40
41 #define CR 0x000D
42 #define LF 0x000A
43 #define SPACE 0x0020
44 #define TAB 0x0009
45 #define ESCAPE 0x005C
46 #define HASH 0x0023
47 #define QUOTE 0x0027
48 #define ZERO 0x0030
49 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D
53
54 struct Lookahead
55 {
56 enum ETokenType type;
57 struct UString value;
58 struct UString comment;
59 uint32_t line;
60 };
61
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] =
64 {
65 "string", /* A string token, such as "MonthNames" */
66 "'{'", /* An opening brace character */
67 "'}'", /* A closing brace character */
68 "','", /* A comma */
69 "':'", /* A colon */
70
71 "<end of file>", /* End of the file has been reached successfully */
72 "<end of line>"
73 };
74
75 /* Just to store "TRUE" */
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
77
78 typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition;
81 UCHARBUF *buffer;
82 struct SRBRoot *bundle;
83 const char *inputdir;
84 uint32_t inputdirLength;
85 const char *outputdir;
86 uint32_t outputdirLength;
87 UBool makeBinaryCollation;
88 UBool omitCollationRules;
89 } ParseState;
90
91 typedef struct SResource *
92 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
93
94 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
95
96 /* The nature of the lookahead buffer:
97 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
98 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
99 When getToken is called, the current pointer is moved to the next slot and the
100 old slot is filled with the next token from the reader by calling getNextToken.
101 The token values are stored in the slot, which means that token values don't
102 survive a call to getToken, ie.
103
104 UString *value;
105
106 getToken(&value, NULL, status);
107 getToken(NULL, NULL, status); bad - value is now a different string
108 */
109 static void
110 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
111 {
112 static uint32_t initTypeStrings = 0;
113 uint32_t i;
114
115 if (!initTypeStrings)
116 {
117 initTypeStrings = 1;
118 }
119
120 state->lookaheadPosition = 0;
121 state->buffer = buf;
122
123 resetLineNumber();
124
125 for (i = 0; i < MAX_LOOKAHEAD; i++)
126 {
127 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
128 if (U_FAILURE(*status))
129 {
130 return;
131 }
132 }
133
134 *status = U_ZERO_ERROR;
135 }
136
137 static void
138 cleanupLookahead(ParseState* state)
139 {
140 uint32_t i;
141 for (i = 0; i <= MAX_LOOKAHEAD; i++)
142 {
143 ustr_deinit(&state->lookahead[i].value);
144 ustr_deinit(&state->lookahead[i].comment);
145 }
146
147 }
148
149 static enum ETokenType
150 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
151 {
152 enum ETokenType result;
153 uint32_t i;
154
155 result = state->lookahead[state->lookaheadPosition].type;
156
157 if (tokenValue != NULL)
158 {
159 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
160 }
161
162 if (linenumber != NULL)
163 {
164 *linenumber = state->lookahead[state->lookaheadPosition].line;
165 }
166
167 if (comment != NULL)
168 {
169 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
170 }
171
172 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
173 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
174 ustr_setlen(&state->lookahead[i].comment, 0, status);
175 ustr_setlen(&state->lookahead[i].value, 0, status);
176 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
177
178 /* printf("getToken, returning %s\n", tokenNames[result]); */
179
180 return result;
181 }
182
183 static enum ETokenType
184 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
185 {
186 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
187
188 if (U_FAILURE(*status))
189 {
190 return TOK_ERROR;
191 }
192
193 if (lookaheadCount >= MAX_LOOKAHEAD)
194 {
195 *status = U_INTERNAL_PROGRAM_ERROR;
196 return TOK_ERROR;
197 }
198
199 if (tokenValue != NULL)
200 {
201 *tokenValue = &state->lookahead[i].value;
202 }
203
204 if (linenumber != NULL)
205 {
206 *linenumber = state->lookahead[i].line;
207 }
208
209 if(comment != NULL){
210 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
211 }
212
213 return state->lookahead[i].type;
214 }
215
216 static void
217 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
218 {
219 uint32_t line;
220
221 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
222
223 if (linenumber != NULL)
224 {
225 *linenumber = line;
226 }
227
228 if (U_FAILURE(*status))
229 {
230 return;
231 }
232
233 if (token != expectedToken)
234 {
235 *status = U_INVALID_FORMAT_ERROR;
236 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
237 }
238 else
239 {
240 *status = U_ZERO_ERROR;
241 }
242 }
243
244 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
245 {
246 struct UString *tokenValue;
247 char *result;
248 uint32_t count;
249
250 expect(state, TOK_STRING, &tokenValue, comment, line, status);
251
252 if (U_FAILURE(*status))
253 {
254 return NULL;
255 }
256
257 count = u_strlen(tokenValue->fChars);
258 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
259 *status = U_INVALID_FORMAT_ERROR;
260 error(*line, "invariant characters required for table keys, binary data, etc.");
261 return NULL;
262 }
263
264 result = static_cast<char *>(uprv_malloc(count+1));
265
266 if (result == NULL)
267 {
268 *status = U_MEMORY_ALLOCATION_ERROR;
269 return NULL;
270 }
271
272 u_UCharsToChars(tokenValue->fChars, result, count+1);
273 return result;
274 }
275
276 static struct SResource *
277 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
278 {
279 struct SResource *result = NULL;
280 struct UString *tokenValue;
281 FileStream *file = NULL;
282 char filename[256] = { '\0' };
283 char cs[128] = { '\0' };
284 uint32_t line;
285 UBool quoted = FALSE;
286 UCHARBUF *ucbuf=NULL;
287 UChar32 c = 0;
288 const char* cp = NULL;
289 UChar *pTarget = NULL;
290 UChar *target = NULL;
291 UChar *targetLimit = NULL;
292 int32_t size = 0;
293
294 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
295
296 if(isVerbose()){
297 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
298 }
299
300 if (U_FAILURE(*status))
301 {
302 return NULL;
303 }
304 /* make the filename including the directory */
305 if (state->inputdir != NULL)
306 {
307 uprv_strcat(filename, state->inputdir);
308
309 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
310 {
311 uprv_strcat(filename, U_FILE_SEP_STRING);
312 }
313 }
314
315 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
316
317 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
318
319 if (U_FAILURE(*status))
320 {
321 return NULL;
322 }
323 uprv_strcat(filename, cs);
324
325 if(state->omitCollationRules) {
326 return res_none();
327 }
328
329 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
330
331 if (U_FAILURE(*status)) {
332 error(line, "An error occured while opening the input file %s\n", filename);
333 return NULL;
334 }
335
336 /* We allocate more space than actually required
337 * since the actual size needed for storing UChars
338 * is not known in UTF-8 byte stream
339 */
340 size = ucbuf_size(ucbuf) + 1;
341 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
342 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
343 target = pTarget;
344 targetLimit = pTarget+size;
345
346 /* read the rules into the buffer */
347 while (target < targetLimit)
348 {
349 c = ucbuf_getc(ucbuf, status);
350 if(c == QUOTE) {
351 quoted = (UBool)!quoted;
352 }
353 /* weiv (06/26/2002): adding the following:
354 * - preserving spaces in commands [...]
355 * - # comments until the end of line
356 */
357 if (c == STARTCOMMAND && !quoted)
358 {
359 /* preserve commands
360 * closing bracket will be handled by the
361 * append at the end of the loop
362 */
363 while(c != ENDCOMMAND) {
364 U_APPEND_CHAR32_ONLY(c, target);
365 c = ucbuf_getc(ucbuf, status);
366 }
367 }
368 else if (c == HASH && !quoted) {
369 /* skip comments */
370 while(c != CR && c != LF) {
371 c = ucbuf_getc(ucbuf, status);
372 }
373 continue;
374 }
375 else if (c == ESCAPE)
376 {
377 c = unescape(ucbuf, status);
378
379 if (c == (UChar32)U_ERR)
380 {
381 uprv_free(pTarget);
382 T_FileStream_close(file);
383 return NULL;
384 }
385 }
386 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
387 {
388 /* ignore spaces carriage returns
389 * and line feed unless in the form \uXXXX
390 */
391 continue;
392 }
393
394 /* Append UChar * after dissembling if c > 0xffff*/
395 if (c != (UChar32)U_EOF)
396 {
397 U_APPEND_CHAR32_ONLY(c, target);
398 }
399 else
400 {
401 break;
402 }
403 }
404
405 /* terminate the string */
406 if(target < targetLimit){
407 *target = 0x0000;
408 }
409
410 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
411
412
413 ucbuf_close(ucbuf);
414 uprv_free(pTarget);
415 T_FileStream_close(file);
416
417 return result;
418 }
419
420 static struct SResource *
421 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
422 {
423 struct SResource *result = NULL;
424 struct UString *tokenValue;
425 FileStream *file = NULL;
426 char filename[256] = { '\0' };
427 char cs[128] = { '\0' };
428 uint32_t line;
429 UCHARBUF *ucbuf=NULL;
430 const char* cp = NULL;
431 UChar *pTarget = NULL;
432 const UChar *pSource = NULL;
433 int32_t size = 0;
434
435 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
436
437 if(isVerbose()){
438 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
439 }
440
441 if (U_FAILURE(*status))
442 {
443 return NULL;
444 }
445 /* make the filename including the directory */
446 if (state->inputdir != NULL)
447 {
448 uprv_strcat(filename, state->inputdir);
449
450 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
451 {
452 uprv_strcat(filename, U_FILE_SEP_STRING);
453 }
454 }
455
456 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
457
458 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
459
460 if (U_FAILURE(*status))
461 {
462 return NULL;
463 }
464 uprv_strcat(filename, cs);
465
466
467 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
468
469 if (U_FAILURE(*status)) {
470 error(line, "An error occured while opening the input file %s\n", filename);
471 return NULL;
472 }
473
474 /* We allocate more space than actually required
475 * since the actual size needed for storing UChars
476 * is not known in UTF-8 byte stream
477 */
478 pSource = ucbuf_getBuffer(ucbuf, &size, status);
479 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
480 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
481
482 #if !UCONFIG_NO_TRANSLITERATION
483 size = utrans_stripRules(pSource, size, pTarget, status);
484 #else
485 size = 0;
486 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
487 #endif
488 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
489
490 ucbuf_close(ucbuf);
491 uprv_free(pTarget);
492 T_FileStream_close(file);
493
494 return result;
495 }
496 static struct SResource* dependencyArray = NULL;
497
498 static struct SResource *
499 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
500 {
501 struct SResource *result = NULL;
502 struct SResource *elem = NULL;
503 struct UString *tokenValue;
504 uint32_t line;
505 char filename[256] = { '\0' };
506 char cs[128] = { '\0' };
507
508 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
509
510 if(isVerbose()){
511 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
512 }
513
514 if (U_FAILURE(*status))
515 {
516 return NULL;
517 }
518 /* make the filename including the directory */
519 if (state->outputdir != NULL)
520 {
521 uprv_strcat(filename, state->outputdir);
522
523 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
524 {
525 uprv_strcat(filename, U_FILE_SEP_STRING);
526 }
527 }
528
529 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
530
531 if (U_FAILURE(*status))
532 {
533 return NULL;
534 }
535 uprv_strcat(filename, cs);
536 if(!T_FileStream_file_exists(filename)){
537 if(isStrict()){
538 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
539 }else{
540 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
541 }
542 }
543 if(dependencyArray==NULL){
544 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
545 }
546 if(tag!=NULL){
547 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
548 }
549 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
550
551 array_add(dependencyArray, elem, status);
552
553 if (U_FAILURE(*status))
554 {
555 return NULL;
556 }
557 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
558 return result;
559 }
560 static struct SResource *
561 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
562 {
563 struct UString *tokenValue;
564 struct SResource *result = NULL;
565
566 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
567 {
568 return parseUCARules(tag, startline, status);
569 }*/
570 if(isVerbose()){
571 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
572 }
573 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
574
575 if (U_SUCCESS(*status))
576 {
577 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
578 doesn't survive expect either) */
579
580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
581 if(U_SUCCESS(*status) && result) {
582 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
583
584 if (U_FAILURE(*status))
585 {
586 res_close(result);
587 return NULL;
588 }
589 }
590 }
591
592 return result;
593 }
594
595 static struct SResource *
596 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
597 {
598 struct UString *tokenValue;
599 struct SResource *result = NULL;
600
601 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
602
603 if(isVerbose()){
604 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
605 }
606
607 if (U_SUCCESS(*status))
608 {
609 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
610 doesn't survive expect either) */
611
612 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
613
614 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
615
616 if (U_FAILURE(*status))
617 {
618 res_close(result);
619 return NULL;
620 }
621 }
622
623 return result;
624 }
625
626 typedef struct{
627 const char* inputDir;
628 const char* outputDir;
629 } GenrbData;
630
631 static struct SResource* resLookup(struct SResource* res, const char* key){
632 struct SResource *current = NULL;
633 struct SResTable *list;
634 if (res == res_none()) {
635 return NULL;
636 }
637
638 list = &(res->u.fTable);
639
640 current = list->fFirst;
641 while (current != NULL) {
642 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
643 return current;
644 }
645 current = current->fNext;
646 }
647 return NULL;
648 }
649
650 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
651 struct SRBRoot *data = NULL;
652 UCHARBUF *ucbuf = NULL;
653 GenrbData* genrbdata = (GenrbData*) context;
654 int localeLength = strlen(locale);
655 char* filename = (char*)uprv_malloc(localeLength+5);
656 char *inputDirBuf = NULL;
657 char *openFileName = NULL;
658 const char* cp = "";
659 UChar* urules = NULL;
660 int32_t urulesLength = 0;
661 int32_t i = 0;
662 int32_t dirlen = 0;
663 int32_t filelen = 0;
664 struct SResource* root;
665 struct SResource* collations;
666 struct SResource* collation;
667 struct SResource* sequence;
668
669 memcpy(filename, locale, localeLength);
670 for(i = 0; i < localeLength; i++){
671 if(filename[i] == '-'){
672 filename[i] = '_';
673 }
674 }
675 filename[localeLength] = '.';
676 filename[localeLength+1] = 't';
677 filename[localeLength+2] = 'x';
678 filename[localeLength+3] = 't';
679 filename[localeLength+4] = 0;
680
681
682 if (status==NULL || U_FAILURE(*status)) {
683 return NULL;
684 }
685 if(filename==NULL){
686 *status=U_ILLEGAL_ARGUMENT_ERROR;
687 return NULL;
688 }else{
689 filelen = (int32_t)uprv_strlen(filename);
690 }
691 if(genrbdata->inputDir == NULL) {
692 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
693 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
694 openFileName[0] = '\0';
695 if (filenameBegin != NULL) {
696 /*
697 * When a filename ../../../data/root.txt is specified,
698 * we presume that the input directory is ../../../data
699 * This is very important when the resource file includes
700 * another file, like UCARules.txt or thaidict.brk.
701 */
702 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
703 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
704
705 /* test for NULL */
706 if(inputDirBuf == NULL) {
707 *status = U_MEMORY_ALLOCATION_ERROR;
708 goto finish;
709 }
710
711 inputDirBuf[filenameSize - 1] = 0;
712 genrbdata->inputDir = inputDirBuf;
713 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
714 }
715 }else{
716 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir);
717
718 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
719 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
720
721 /* test for NULL */
722 if(openFileName == NULL) {
723 *status = U_MEMORY_ALLOCATION_ERROR;
724 goto finish;
725 }
726
727 openFileName[0] = '\0';
728 /*
729 * append the input dir to openFileName if the first char in
730 * filename is not file seperation char and the last char input directory is not '.'.
731 * This is to support :
732 * genrb -s. /home/icu/data
733 * genrb -s. icu/data
734 * The user cannot mix notations like
735 * genrb -s. /icu/data --- the absolute path specified. -s redundant
736 * user should use
737 * genrb -s. icu/data --- start from CWD and look in icu/data dir
738 */
739 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
740 uprv_strcpy(openFileName, genrbdata->inputDir);
741 openFileName[dirlen] = U_FILE_SEP_CHAR;
742 }
743 openFileName[dirlen + 1] = '\0';
744 } else {
745 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
746
747 /* test for NULL */
748 if(openFileName == NULL) {
749 *status = U_MEMORY_ALLOCATION_ERROR;
750 goto finish;
751 }
752
753 uprv_strcpy(openFileName, genrbdata->inputDir);
754
755 }
756 }
757 uprv_strcat(openFileName, filename);
758 /* printf("%s\n", openFileName); */
759 *status = U_ZERO_ERROR;
760 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
761
762 if(*status == U_FILE_ACCESS_ERROR) {
763
764 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
765 goto finish;
766 }
767 if (ucbuf == NULL || U_FAILURE(*status)) {
768 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
769 goto finish;
770 }
771
772 /* Parse the data into an SRBRoot */
773 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status);
774
775 root = data->fRoot;
776 collations = resLookup(root, "collations");
777 if (collations != NULL) {
778 collation = resLookup(collations, type);
779 if (collation != NULL) {
780 sequence = resLookup(collation, "Sequence");
781 if (sequence != NULL) {
782 urules = sequence->u.fString.fChars;
783 urulesLength = sequence->u.fString.fLength;
784 *pLength = urulesLength;
785 }
786 }
787 }
788
789 finish:
790 if (inputDirBuf != NULL) {
791 uprv_free(inputDirBuf);
792 }
793
794 if (openFileName != NULL) {
795 uprv_free(openFileName);
796 }
797
798 if(ucbuf) {
799 ucbuf_close(ucbuf);
800 }
801
802 return urules;
803 }
804
805 // Quick-and-dirty escaping function.
806 // Assumes that we are on an ASCII-based platform.
807 static void
808 escape(const UChar *s, char *buffer) {
809 int32_t length = u_strlen(s);
810 int32_t i = 0;
811 for (;;) {
812 UChar32 c;
813 U16_NEXT(s, i, length, c);
814 if (c == 0) {
815 *buffer = 0;
816 return;
817 } else if (0x20 <= c && c <= 0x7e) {
818 // printable ASCII
819 *buffer++ = (char)c; // assumes ASCII-based platform
820 } else {
821 buffer += sprintf(buffer, "\\u%04X", (int)c);
822 }
823 }
824 }
825
826 static struct SResource *
827 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status)
828 {
829 struct SResource *member = NULL;
830 struct UString *tokenValue;
831 struct UString comment;
832 enum ETokenType token;
833 char subtag[1024];
834 UVersionInfo version;
835 uint32_t line;
836 GenrbData genrbdata;
837 /* '{' . (name resource)* '}' */
838 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
839
840 for (;;)
841 {
842 ustr_init(&comment);
843 token = getToken(state, &tokenValue, &comment, &line, status);
844
845 if (token == TOK_CLOSE_BRACE)
846 {
847 return result;
848 }
849
850 if (token != TOK_STRING)
851 {
852 res_close(result);
853 *status = U_INVALID_FORMAT_ERROR;
854
855 if (token == TOK_EOF)
856 {
857 error(startline, "unterminated table");
858 }
859 else
860 {
861 error(line, "Unexpected token %s", tokenNames[token]);
862 }
863
864 return NULL;
865 }
866
867 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
868
869 if (U_FAILURE(*status))
870 {
871 res_close(result);
872 return NULL;
873 }
874
875 member = parseResource(state, subtag, NULL, status);
876
877 if (U_FAILURE(*status))
878 {
879 res_close(result);
880 return NULL;
881 }
882
883 if (uprv_strcmp(subtag, "Version") == 0)
884 {
885 char ver[40];
886 int32_t length = member->u.fString.fLength;
887
888 if (length >= (int32_t) sizeof(ver))
889 {
890 length = (int32_t) sizeof(ver) - 1;
891 }
892
893 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
894 u_versionFromString(version, ver);
895
896 table_add(result, member, line, status);
897
898 }
899 else if (uprv_strcmp(subtag, "Override") == 0)
900 {
901 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
902 table_add(result, member, line, status);
903
904 }
905 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
906 {
907 /* discard duplicate %%CollationBin if any*/
908 }
909 else if (uprv_strcmp(subtag, "Sequence") == 0)
910 {
911 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
912 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
913 #else
914 if(state->makeBinaryCollation) {
915
916 /* do the collation elements */
917 int32_t len = 0;
918 uint8_t *data = NULL;
919 UCollator *coll = NULL;
920 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
921 int32_t reorderCodeCount;
922 int32_t reorderCodeIndex;
923 UParseError parseError;
924
925 genrbdata.inputDir = state->inputdir;
926 genrbdata.outputDir = state->outputdir;
927
928 UErrorCode intStatus = U_ZERO_ERROR;
929 uprv_memset(&parseError, 0, sizeof(parseError));
930 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
931 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
932
933 if (U_SUCCESS(intStatus) && coll != NULL)
934 {
935 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
936 data = (uint8_t *)uprv_malloc(len);
937 intStatus = U_ZERO_ERROR;
938 len = ucol_cloneBinary(coll, data, len, &intStatus);
939
940 /* tailoring rules version */
941 /* This is wrong! */
942 /*coll->dataInfo.dataVersion[1] = version[0];*/
943 /* Copy tailoring version. Builder version already */
944 /* set in ucol_openRules */
945 ((UCATableHeader *)data)->version[1] = version[0];
946 ((UCATableHeader *)data)->version[2] = version[1];
947 ((UCATableHeader *)data)->version[3] = version[2];
948
949 if (U_SUCCESS(intStatus) && data != NULL)
950 {
951 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
952 table_add(result, collationBin, line, status);
953 uprv_free(data);
954
955 reorderCodeCount = ucol_getReorderCodes(
956 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
957 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
958 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
959 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
960 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
961 }
962 table_add(result, reorderCodeRes, line, status);
963 }
964 }
965 else
966 {
967 warning(line, "could not obtain rules from collator");
968 if(isStrict()){
969 *status = U_INVALID_FORMAT_ERROR;
970 return NULL;
971 }
972 }
973
974 ucol_close(coll);
975 }
976 else
977 {
978 if(intStatus == U_FILE_ACCESS_ERROR) {
979 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
980 *status = intStatus;
981 return NULL;
982 }
983 char preBuffer[100], postBuffer[100];
984 escape(parseError.preContext, preBuffer);
985 escape(parseError.postContext, postBuffer);
986 warning(line,
987 "%%%%CollationBin could not be constructed from CollationElements\n"
988 " check context, check that the FractionalUCA.txt UCA version "
989 "matches the current UCD version\n"
990 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }",
991 u_errorName(intStatus),
992 parseError.line,
993 parseError.offset,
994 preBuffer,
995 postBuffer);
996 if(isStrict()){
997 *status = intStatus;
998 return NULL;
999 }
1000 }
1001 } else {
1002 if(isVerbose()) {
1003 printf("Not building Collation binary\n");
1004 }
1005 }
1006 #endif
1007 /* in order to achieve smaller data files, we can direct genrb */
1008 /* to omit collation rules */
1009 if(state->omitCollationRules) {
1010 bundle_closeString(state->bundle, member);
1011 } else {
1012 table_add(result, member, line, status);
1013 }
1014 }
1015 if (U_FAILURE(*status))
1016 {
1017 res_close(result);
1018 return NULL;
1019 }
1020 }
1021
1022 // Reached the end without a TOK_CLOSE_BRACE. Should be an error.
1023 *status = U_INTERNAL_PROGRAM_ERROR;
1024 return NULL;
1025 }
1026
1027 static struct SResource *
1028 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1029 {
1030 struct SResource *result = NULL;
1031 struct SResource *member = NULL;
1032 struct SResource *collationRes = NULL;
1033 struct UString *tokenValue;
1034 struct UString comment;
1035 enum ETokenType token;
1036 char subtag[1024], typeKeyword[1024];
1037 uint32_t line;
1038
1039 result = table_open(state->bundle, tag, NULL, status);
1040
1041 if (result == NULL || U_FAILURE(*status))
1042 {
1043 return NULL;
1044 }
1045 if(isVerbose()){
1046 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1047 }
1048 if(!newCollation) {
1049 return addCollation(state, result, startline, status);
1050 }
1051 else {
1052 for(;;) {
1053 ustr_init(&comment);
1054 token = getToken(state, &tokenValue, &comment, &line, status);
1055
1056 if (token == TOK_CLOSE_BRACE)
1057 {
1058 return result;
1059 }
1060
1061 if (token != TOK_STRING)
1062 {
1063 res_close(result);
1064 *status = U_INVALID_FORMAT_ERROR;
1065
1066 if (token == TOK_EOF)
1067 {
1068 error(startline, "unterminated table");
1069 }
1070 else
1071 {
1072 error(line, "Unexpected token %s", tokenNames[token]);
1073 }
1074
1075 return NULL;
1076 }
1077
1078 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1079
1080 if (U_FAILURE(*status))
1081 {
1082 res_close(result);
1083 return NULL;
1084 }
1085
1086 if (uprv_strcmp(subtag, "default") == 0)
1087 {
1088 member = parseResource(state, subtag, NULL, status);
1089
1090 if (U_FAILURE(*status))
1091 {
1092 res_close(result);
1093 return NULL;
1094 }
1095
1096 table_add(result, member, line, status);
1097 }
1098 else
1099 {
1100 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1101 /* this probably needs to be refactored or recursively use the parser */
1102 /* first we assume that our collation table won't have the explicit type */
1103 /* then, we cannot handle aliases */
1104 if(token == TOK_OPEN_BRACE) {
1105 token = getToken(state, &tokenValue, &comment, &line, status);
1106 collationRes = table_open(state->bundle, subtag, NULL, status);
1107 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1108 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1109 table_add(result, collationRes, startline, status);
1110 }
1111 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1112 /* we could have a table too */
1113 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1114 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1115 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1116 member = parseResource(state, subtag, NULL, status);
1117 if (U_FAILURE(*status))
1118 {
1119 res_close(result);
1120 return NULL;
1121 }
1122
1123 table_add(result, member, line, status);
1124 } else {
1125 res_close(result);
1126 *status = U_INVALID_FORMAT_ERROR;
1127 return NULL;
1128 }
1129 } else {
1130 res_close(result);
1131 *status = U_INVALID_FORMAT_ERROR;
1132 return NULL;
1133 }
1134 }
1135
1136 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1137
1138 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1139
1140 if (U_FAILURE(*status))
1141 {
1142 res_close(result);
1143 return NULL;
1144 }
1145 }
1146 }
1147 }
1148
1149 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1150 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1151 static struct SResource *
1152 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1153 {
1154 struct SResource *member = NULL;
1155 struct UString *tokenValue=NULL;
1156 struct UString comment;
1157 enum ETokenType token;
1158 char subtag[1024];
1159 uint32_t line;
1160 UBool readToken = FALSE;
1161
1162 /* '{' . (name resource)* '}' */
1163
1164 if(isVerbose()){
1165 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1166 }
1167 for (;;)
1168 {
1169 ustr_init(&comment);
1170 token = getToken(state, &tokenValue, &comment, &line, status);
1171
1172 if (token == TOK_CLOSE_BRACE)
1173 {
1174 if (!readToken) {
1175 warning(startline, "Encountered empty table");
1176 }
1177 return table;
1178 }
1179
1180 if (token != TOK_STRING)
1181 {
1182 *status = U_INVALID_FORMAT_ERROR;
1183
1184 if (token == TOK_EOF)
1185 {
1186 error(startline, "unterminated table");
1187 }
1188 else
1189 {
1190 error(line, "unexpected token %s", tokenNames[token]);
1191 }
1192
1193 return NULL;
1194 }
1195
1196 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1197 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1198 } else {
1199 *status = U_INVALID_FORMAT_ERROR;
1200 error(line, "invariant characters required for table keys");
1201 return NULL;
1202 }
1203
1204 if (U_FAILURE(*status))
1205 {
1206 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1207 return NULL;
1208 }
1209
1210 member = parseResource(state, subtag, &comment, status);
1211
1212 if (member == NULL || U_FAILURE(*status))
1213 {
1214 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1215 return NULL;
1216 }
1217
1218 table_add(table, member, line, status);
1219
1220 if (U_FAILURE(*status))
1221 {
1222 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1223 return NULL;
1224 }
1225 readToken = TRUE;
1226 ustr_deinit(&comment);
1227 }
1228
1229 /* not reached */
1230 /* A compiler warning will appear if all paths don't contain a return statement. */
1231 /* *status = U_INTERNAL_PROGRAM_ERROR;
1232 return NULL;*/
1233 }
1234
1235 static struct SResource *
1236 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1237 {
1238 struct SResource *result;
1239
1240 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1241 {
1242 return parseCollationElements(state, tag, startline, FALSE, status);
1243 }
1244 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1245 {
1246 return parseCollationElements(state, tag, startline, TRUE, status);
1247 }
1248 if(isVerbose()){
1249 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1250 }
1251
1252 result = table_open(state->bundle, tag, comment, status);
1253
1254 if (result == NULL || U_FAILURE(*status))
1255 {
1256 return NULL;
1257 }
1258 return realParseTable(state, result, tag, startline, status);
1259 }
1260
1261 static struct SResource *
1262 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1263 {
1264 struct SResource *result = NULL;
1265 struct SResource *member = NULL;
1266 struct UString *tokenValue;
1267 struct UString memberComments;
1268 enum ETokenType token;
1269 UBool readToken = FALSE;
1270
1271 result = array_open(state->bundle, tag, comment, status);
1272
1273 if (result == NULL || U_FAILURE(*status))
1274 {
1275 return NULL;
1276 }
1277 if(isVerbose()){
1278 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1279 }
1280
1281 ustr_init(&memberComments);
1282
1283 /* '{' . resource [','] '}' */
1284 for (;;)
1285 {
1286 /* reset length */
1287 ustr_setlen(&memberComments, 0, status);
1288
1289 /* check for end of array, but don't consume next token unless it really is the end */
1290 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1291
1292
1293 if (token == TOK_CLOSE_BRACE)
1294 {
1295 getToken(state, NULL, NULL, NULL, status);
1296 if (!readToken) {
1297 warning(startline, "Encountered empty array");
1298 }
1299 break;
1300 }
1301
1302 if (token == TOK_EOF)
1303 {
1304 res_close(result);
1305 *status = U_INVALID_FORMAT_ERROR;
1306 error(startline, "unterminated array");
1307 return NULL;
1308 }
1309
1310 /* string arrays are a special case */
1311 if (token == TOK_STRING)
1312 {
1313 getToken(state, &tokenValue, &memberComments, NULL, status);
1314 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1315 }
1316 else
1317 {
1318 member = parseResource(state, NULL, &memberComments, status);
1319 }
1320
1321 if (member == NULL || U_FAILURE(*status))
1322 {
1323 res_close(result);
1324 return NULL;
1325 }
1326
1327 array_add(result, member, status);
1328
1329 if (U_FAILURE(*status))
1330 {
1331 res_close(result);
1332 return NULL;
1333 }
1334
1335 /* eat optional comma if present */
1336 token = peekToken(state, 0, NULL, NULL, NULL, status);
1337
1338 if (token == TOK_COMMA)
1339 {
1340 getToken(state, NULL, NULL, NULL, status);
1341 }
1342
1343 if (U_FAILURE(*status))
1344 {
1345 res_close(result);
1346 return NULL;
1347 }
1348 readToken = TRUE;
1349 }
1350
1351 ustr_deinit(&memberComments);
1352 return result;
1353 }
1354
1355 static struct SResource *
1356 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1357 {
1358 struct SResource *result = NULL;
1359 enum ETokenType token;
1360 char *string;
1361 int32_t value;
1362 UBool readToken = FALSE;
1363 char *stopstring;
1364 uint32_t len;
1365 struct UString memberComments;
1366
1367 result = intvector_open(state->bundle, tag, comment, status);
1368
1369 if (result == NULL || U_FAILURE(*status))
1370 {
1371 return NULL;
1372 }
1373
1374 if(isVerbose()){
1375 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1376 }
1377 ustr_init(&memberComments);
1378 /* '{' . string [','] '}' */
1379 for (;;)
1380 {
1381 ustr_setlen(&memberComments, 0, status);
1382
1383 /* check for end of array, but don't consume next token unless it really is the end */
1384 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1385
1386 if (token == TOK_CLOSE_BRACE)
1387 {
1388 /* it's the end, consume the close brace */
1389 getToken(state, NULL, NULL, NULL, status);
1390 if (!readToken) {
1391 warning(startline, "Encountered empty int vector");
1392 }
1393 ustr_deinit(&memberComments);
1394 return result;
1395 }
1396
1397 string = getInvariantString(state, NULL, NULL, status);
1398
1399 if (U_FAILURE(*status))
1400 {
1401 res_close(result);
1402 return NULL;
1403 }
1404
1405 /* For handling illegal char in the Intvector */
1406 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1407 len=(uint32_t)(stopstring-string);
1408
1409 if(len==uprv_strlen(string))
1410 {
1411 intvector_add(result, value, status);
1412 uprv_free(string);
1413 token = peekToken(state, 0, NULL, NULL, NULL, status);
1414 }
1415 else
1416 {
1417 uprv_free(string);
1418 *status=U_INVALID_CHAR_FOUND;
1419 }
1420
1421 if (U_FAILURE(*status))
1422 {
1423 res_close(result);
1424 return NULL;
1425 }
1426
1427 /* the comma is optional (even though it is required to prevent the reader from concatenating
1428 consecutive entries) so that a missing comma on the last entry isn't an error */
1429 if (token == TOK_COMMA)
1430 {
1431 getToken(state, NULL, NULL, NULL, status);
1432 }
1433 readToken = TRUE;
1434 }
1435
1436 /* not reached */
1437 /* A compiler warning will appear if all paths don't contain a return statement. */
1438 /* intvector_close(result, status);
1439 *status = U_INTERNAL_PROGRAM_ERROR;
1440 return NULL;*/
1441 }
1442
1443 static struct SResource *
1444 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1445 {
1446 struct SResource *result = NULL;
1447 uint8_t *value;
1448 char *string;
1449 char toConv[3] = {'\0', '\0', '\0'};
1450 uint32_t count;
1451 uint32_t i;
1452 uint32_t line;
1453 char *stopstring;
1454 uint32_t len;
1455
1456 string = getInvariantString(state, &line, NULL, status);
1457
1458 if (string == NULL || U_FAILURE(*status))
1459 {
1460 return NULL;
1461 }
1462
1463 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1464
1465 if (U_FAILURE(*status))
1466 {
1467 uprv_free(string);
1468 return NULL;
1469 }
1470
1471 if(isVerbose()){
1472 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1473 }
1474
1475 count = (uint32_t)uprv_strlen(string);
1476 if (count > 0){
1477 if((count % 2)==0){
1478 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1479
1480 if (value == NULL)
1481 {
1482 uprv_free(string);
1483 *status = U_MEMORY_ALLOCATION_ERROR;
1484 return NULL;
1485 }
1486
1487 for (i = 0; i < count; i += 2)
1488 {
1489 toConv[0] = string[i];
1490 toConv[1] = string[i + 1];
1491
1492 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1493 len=(uint32_t)(stopstring-toConv);
1494
1495 if(len!=uprv_strlen(toConv))
1496 {
1497 uprv_free(string);
1498 *status=U_INVALID_CHAR_FOUND;
1499 return NULL;
1500 }
1501 }
1502
1503 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1504
1505 uprv_free(value);
1506 }
1507 else
1508 {
1509 *status = U_INVALID_CHAR_FOUND;
1510 uprv_free(string);
1511 error(line, "Encountered invalid binary string");
1512 return NULL;
1513 }
1514 }
1515 else
1516 {
1517 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1518 warning(startline, "Encountered empty binary tag");
1519 }
1520 uprv_free(string);
1521
1522 return result;
1523 }
1524
1525 static struct SResource *
1526 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1527 {
1528 struct SResource *result = NULL;
1529 int32_t value;
1530 char *string;
1531 char *stopstring;
1532 uint32_t len;
1533
1534 string = getInvariantString(state, NULL, NULL, status);
1535
1536 if (string == NULL || U_FAILURE(*status))
1537 {
1538 return NULL;
1539 }
1540
1541 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1542
1543 if (U_FAILURE(*status))
1544 {
1545 uprv_free(string);
1546 return NULL;
1547 }
1548
1549 if(isVerbose()){
1550 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1551 }
1552
1553 if (uprv_strlen(string) <= 0)
1554 {
1555 warning(startline, "Encountered empty integer. Default value is 0.");
1556 }
1557
1558 /* Allow integer support for hexdecimal, octal digit and decimal*/
1559 /* and handle illegal char in the integer*/
1560 value = uprv_strtoul(string, &stopstring, 0);
1561 len=(uint32_t)(stopstring-string);
1562 if(len==uprv_strlen(string))
1563 {
1564 result = int_open(state->bundle, tag, value, comment, status);
1565 }
1566 else
1567 {
1568 *status=U_INVALID_CHAR_FOUND;
1569 }
1570 uprv_free(string);
1571
1572 return result;
1573 }
1574
1575 static struct SResource *
1576 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1577 {
1578 struct SResource *result;
1579 FileStream *file;
1580 int32_t len;
1581 uint8_t *data;
1582 char *filename;
1583 uint32_t line;
1584 char *fullname = NULL;
1585 filename = getInvariantString(state, &line, NULL, status);
1586
1587 if (U_FAILURE(*status))
1588 {
1589 return NULL;
1590 }
1591
1592 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1593
1594 if (U_FAILURE(*status))
1595 {
1596 uprv_free(filename);
1597 return NULL;
1598 }
1599
1600 if(isVerbose()){
1601 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1602 }
1603
1604 /* Open the input file for reading */
1605 if (state->inputdir == NULL)
1606 {
1607 #if 1
1608 /*
1609 * Always save file file name, even if there's
1610 * no input directory specified. MIGHT BREAK SOMETHING
1611 */
1612 int32_t filenameLength = uprv_strlen(filename);
1613
1614 fullname = (char *) uprv_malloc(filenameLength + 1);
1615 uprv_strcpy(fullname, filename);
1616 #endif
1617
1618 file = T_FileStream_open(filename, "rb");
1619 }
1620 else
1621 {
1622
1623 int32_t count = (int32_t)uprv_strlen(filename);
1624
1625 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1626 {
1627 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1628
1629 /* test for NULL */
1630 if(fullname == NULL)
1631 {
1632 *status = U_MEMORY_ALLOCATION_ERROR;
1633 return NULL;
1634 }
1635
1636 uprv_strcpy(fullname, state->inputdir);
1637
1638 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1639 fullname[state->inputdirLength + 1] = '\0';
1640
1641 uprv_strcat(fullname, filename);
1642 }
1643 else
1644 {
1645 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1646
1647 /* test for NULL */
1648 if(fullname == NULL)
1649 {
1650 *status = U_MEMORY_ALLOCATION_ERROR;
1651 return NULL;
1652 }
1653
1654 uprv_strcpy(fullname, state->inputdir);
1655 uprv_strcat(fullname, filename);
1656 }
1657
1658 file = T_FileStream_open(fullname, "rb");
1659
1660 }
1661
1662 if (file == NULL)
1663 {
1664 error(line, "couldn't open input file %s", filename);
1665 *status = U_FILE_ACCESS_ERROR;
1666 return NULL;
1667 }
1668
1669 len = T_FileStream_size(file);
1670 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1671 /* test for NULL */
1672 if(data == NULL)
1673 {
1674 *status = U_MEMORY_ALLOCATION_ERROR;
1675 T_FileStream_close (file);
1676 return NULL;
1677 }
1678
1679 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1680 T_FileStream_close (file);
1681
1682 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1683
1684 uprv_free(data);
1685 uprv_free(filename);
1686 uprv_free(fullname);
1687
1688 return result;
1689 }
1690
1691 static struct SResource *
1692 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1693 {
1694 struct SResource *result;
1695 int32_t len=0;
1696 char *filename;
1697 uint32_t line;
1698 UChar *pTarget = NULL;
1699
1700 UCHARBUF *ucbuf;
1701 char *fullname = NULL;
1702 int32_t count = 0;
1703 const char* cp = NULL;
1704 const UChar* uBuffer = NULL;
1705
1706 filename = getInvariantString(state, &line, NULL, status);
1707 count = (int32_t)uprv_strlen(filename);
1708
1709 if (U_FAILURE(*status))
1710 {
1711 return NULL;
1712 }
1713
1714 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1715
1716 if (U_FAILURE(*status))
1717 {
1718 uprv_free(filename);
1719 return NULL;
1720 }
1721
1722 if(isVerbose()){
1723 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1724 }
1725
1726 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1727 /* test for NULL */
1728 if(fullname == NULL)
1729 {
1730 *status = U_MEMORY_ALLOCATION_ERROR;
1731 uprv_free(filename);
1732 return NULL;
1733 }
1734
1735 if(state->inputdir!=NULL){
1736 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1737 {
1738
1739 uprv_strcpy(fullname, state->inputdir);
1740
1741 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1742 fullname[state->inputdirLength + 1] = '\0';
1743
1744 uprv_strcat(fullname, filename);
1745 }
1746 else
1747 {
1748 uprv_strcpy(fullname, state->inputdir);
1749 uprv_strcat(fullname, filename);
1750 }
1751 }else{
1752 uprv_strcpy(fullname,filename);
1753 }
1754
1755 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1756
1757 if (U_FAILURE(*status)) {
1758 error(line, "couldn't open input file %s\n", filename);
1759 return NULL;
1760 }
1761
1762 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1763 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1764
1765 ucbuf_close(ucbuf);
1766
1767 uprv_free(pTarget);
1768
1769 uprv_free(filename);
1770 uprv_free(fullname);
1771
1772 return result;
1773 }
1774
1775
1776
1777
1778
1779 U_STRING_DECL(k_type_string, "string", 6);
1780 U_STRING_DECL(k_type_binary, "binary", 6);
1781 U_STRING_DECL(k_type_bin, "bin", 3);
1782 U_STRING_DECL(k_type_table, "table", 5);
1783 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1784 U_STRING_DECL(k_type_int, "int", 3);
1785 U_STRING_DECL(k_type_integer, "integer", 7);
1786 U_STRING_DECL(k_type_array, "array", 5);
1787 U_STRING_DECL(k_type_alias, "alias", 5);
1788 U_STRING_DECL(k_type_intvector, "intvector", 9);
1789 U_STRING_DECL(k_type_import, "import", 6);
1790 U_STRING_DECL(k_type_include, "include", 7);
1791
1792 /* Various non-standard processing plugins that create one or more special resources. */
1793 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1794 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1795 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1796 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1797
1798 typedef enum EResourceType
1799 {
1800 RT_UNKNOWN,
1801 RT_STRING,
1802 RT_BINARY,
1803 RT_TABLE,
1804 RT_TABLE_NO_FALLBACK,
1805 RT_INTEGER,
1806 RT_ARRAY,
1807 RT_ALIAS,
1808 RT_INTVECTOR,
1809 RT_IMPORT,
1810 RT_INCLUDE,
1811 RT_PROCESS_UCA_RULES,
1812 RT_PROCESS_COLLATION,
1813 RT_PROCESS_TRANSLITERATOR,
1814 RT_PROCESS_DEPENDENCY,
1815 RT_RESERVED
1816 } EResourceType;
1817
1818 static struct {
1819 const char *nameChars; /* only used for debugging */
1820 const UChar *nameUChars;
1821 ParseResourceFunction *parseFunction;
1822 } gResourceTypes[] = {
1823 {"Unknown", NULL, NULL},
1824 {"string", k_type_string, parseString},
1825 {"binary", k_type_binary, parseBinary},
1826 {"table", k_type_table, parseTable},
1827 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1828 {"integer", k_type_integer, parseInteger},
1829 {"array", k_type_array, parseArray},
1830 {"alias", k_type_alias, parseAlias},
1831 {"intvector", k_type_intvector, parseIntVector},
1832 {"import", k_type_import, parseImport},
1833 {"include", k_type_include, parseInclude},
1834 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1835 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1836 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1837 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1838 {"reserved", NULL, NULL}
1839 };
1840
1841 void initParser()
1842 {
1843 U_STRING_INIT(k_type_string, "string", 6);
1844 U_STRING_INIT(k_type_binary, "binary", 6);
1845 U_STRING_INIT(k_type_bin, "bin", 3);
1846 U_STRING_INIT(k_type_table, "table", 5);
1847 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1848 U_STRING_INIT(k_type_int, "int", 3);
1849 U_STRING_INIT(k_type_integer, "integer", 7);
1850 U_STRING_INIT(k_type_array, "array", 5);
1851 U_STRING_INIT(k_type_alias, "alias", 5);
1852 U_STRING_INIT(k_type_intvector, "intvector", 9);
1853 U_STRING_INIT(k_type_import, "import", 6);
1854 U_STRING_INIT(k_type_include, "include", 7);
1855
1856 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1857 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1858 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1859 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1860 }
1861
1862 static inline UBool isTable(enum EResourceType type) {
1863 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1864 }
1865
1866 static enum EResourceType
1867 parseResourceType(ParseState* state, UErrorCode *status)
1868 {
1869 struct UString *tokenValue;
1870 struct UString comment;
1871 enum EResourceType result = RT_UNKNOWN;
1872 uint32_t line=0;
1873 ustr_init(&comment);
1874 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1875
1876 if (U_FAILURE(*status))
1877 {
1878 return RT_UNKNOWN;
1879 }
1880
1881 *status = U_ZERO_ERROR;
1882
1883 /* Search for normal types */
1884 result=RT_UNKNOWN;
1885 while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1886 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1887 break;
1888 }
1889 }
1890 /* Now search for the aliases */
1891 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1892 result = RT_INTEGER;
1893 }
1894 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1895 result = RT_BINARY;
1896 }
1897 else if (result == RT_RESERVED) {
1898 char tokenBuffer[1024];
1899 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1900 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1901 *status = U_INVALID_FORMAT_ERROR;
1902 error(line, "unknown resource type '%s'", tokenBuffer);
1903 }
1904
1905 return result;
1906 }
1907
1908 /* parse a non-top-level resource */
1909 static struct SResource *
1910 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1911 {
1912 enum ETokenType token;
1913 enum EResourceType resType = RT_UNKNOWN;
1914 ParseResourceFunction *parseFunction = NULL;
1915 struct UString *tokenValue;
1916 uint32_t startline;
1917 uint32_t line;
1918
1919
1920 token = getToken(state, &tokenValue, NULL, &startline, status);
1921
1922 if(isVerbose()){
1923 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1924 }
1925
1926 /* name . [ ':' type ] '{' resource '}' */
1927 /* This function parses from the colon onwards. If the colon is present, parse the
1928 type then try to parse a resource of that type. If there is no explicit type,
1929 work it out using the lookahead tokens. */
1930 switch (token)
1931 {
1932 case TOK_EOF:
1933 *status = U_INVALID_FORMAT_ERROR;
1934 error(startline, "Unexpected EOF encountered");
1935 return NULL;
1936
1937 case TOK_ERROR:
1938 *status = U_INVALID_FORMAT_ERROR;
1939 return NULL;
1940
1941 case TOK_COLON:
1942 resType = parseResourceType(state, status);
1943 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1944
1945 if (U_FAILURE(*status))
1946 {
1947 return NULL;
1948 }
1949
1950 break;
1951
1952 case TOK_OPEN_BRACE:
1953 break;
1954
1955 default:
1956 *status = U_INVALID_FORMAT_ERROR;
1957 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1958 return NULL;
1959 }
1960
1961
1962 if (resType == RT_UNKNOWN)
1963 {
1964 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1965 We could have any of the following:
1966 { { => array (nested)
1967 { :/} => array
1968 { string , => string array
1969
1970 { string { => table
1971
1972 { string :/{ => table
1973 { string } => string
1974 */
1975
1976 token = peekToken(state, 0, NULL, &line, NULL,status);
1977
1978 if (U_FAILURE(*status))
1979 {
1980 return NULL;
1981 }
1982
1983 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1984 {
1985 resType = RT_ARRAY;
1986 }
1987 else if (token == TOK_STRING)
1988 {
1989 token = peekToken(state, 1, NULL, &line, NULL, status);
1990
1991 if (U_FAILURE(*status))
1992 {
1993 return NULL;
1994 }
1995
1996 switch (token)
1997 {
1998 case TOK_COMMA: resType = RT_ARRAY; break;
1999 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
2000 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
2001 case TOK_COLON: resType = RT_TABLE; break;
2002 default:
2003 *status = U_INVALID_FORMAT_ERROR;
2004 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2005 return NULL;
2006 }
2007 }
2008 else
2009 {
2010 *status = U_INVALID_FORMAT_ERROR;
2011 error(line, "Unexpected token after '{'");
2012 return NULL;
2013 }
2014
2015 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2016 } else if(resType == RT_TABLE_NO_FALLBACK) {
2017 *status = U_INVALID_FORMAT_ERROR;
2018 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2019 return NULL;
2020 }
2021
2022
2023 /* We should now know what we need to parse next, so call the appropriate parser
2024 function and return. */
2025 parseFunction = gResourceTypes[resType].parseFunction;
2026 if (parseFunction != NULL) {
2027 return parseFunction(state, tag, startline, comment, status);
2028 }
2029 else {
2030 *status = U_INTERNAL_PROGRAM_ERROR;
2031 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2032 }
2033
2034 return NULL;
2035 }
2036
2037 /* parse the top-level resource */
2038 struct SRBRoot *
2039 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir,
2040 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2041 {
2042 struct UString *tokenValue;
2043 struct UString comment;
2044 uint32_t line;
2045 enum EResourceType bundleType;
2046 enum ETokenType token;
2047 ParseState state;
2048 uint32_t i;
2049
2050
2051 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2052 {
2053 ustr_init(&state.lookahead[i].value);
2054 ustr_init(&state.lookahead[i].comment);
2055 }
2056
2057 initLookahead(&state, buf, status);
2058
2059 state.inputdir = inputDir;
2060 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2061 state.outputdir = outputDir;
2062 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2063 state.makeBinaryCollation = makeBinaryCollation;
2064 state.omitCollationRules = omitCollationRules;
2065
2066 ustr_init(&comment);
2067 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2068
2069 state.bundle = bundle_open(&comment, FALSE, status);
2070
2071 if (state.bundle == NULL || U_FAILURE(*status))
2072 {
2073 return NULL;
2074 }
2075
2076
2077 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2078
2079 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2080 token = getToken(&state, NULL, NULL, &line, status);
2081 if(token==TOK_COLON) {
2082 *status=U_ZERO_ERROR;
2083 bundleType=parseResourceType(&state, status);
2084
2085 if(isTable(bundleType))
2086 {
2087 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2088 }
2089 else
2090 {
2091 *status=U_PARSE_ERROR;
2092 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2093 }
2094 }
2095 else
2096 {
2097 /* not a colon */
2098 if(token==TOK_OPEN_BRACE)
2099 {
2100 *status=U_ZERO_ERROR;
2101 bundleType=RT_TABLE;
2102 }
2103 else
2104 {
2105 /* neither colon nor open brace */
2106 *status=U_PARSE_ERROR;
2107 bundleType=RT_UNKNOWN;
2108 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2109 }
2110 }
2111
2112 if (U_FAILURE(*status))
2113 {
2114 bundle_close(state.bundle, status);
2115 return NULL;
2116 }
2117
2118 if(bundleType==RT_TABLE_NO_FALLBACK) {
2119 /*
2120 * Parse a top-level table with the table(nofallback) declaration.
2121 * This is the same as a regular table, but also sets the
2122 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2123 */
2124 state.bundle->noFallback=TRUE;
2125 }
2126 /* top-level tables need not handle special table names like "collations" */
2127 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2128 if(dependencyArray!=NULL){
2129 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2130 dependencyArray = NULL;
2131 }
2132 if (U_FAILURE(*status))
2133 {
2134 bundle_close(state.bundle, status);
2135 res_close(dependencyArray);
2136 return NULL;
2137 }
2138
2139 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2140 {
2141 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2142 if(isStrict()){
2143 *status = U_INVALID_FORMAT_ERROR;
2144 return NULL;
2145 }
2146 }
2147
2148 cleanupLookahead(&state);
2149 ustr_deinit(&comment);
2150 return state.bundle;
2151 }

mercurial