|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 1998-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * |
|
9 * File parse.cpp |
|
10 * |
|
11 * Modification History: |
|
12 * |
|
13 * Date Name Description |
|
14 * 05/26/99 stephen Creation. |
|
15 * 02/25/00 weiv Overhaul to write udata |
|
16 * 5/10/01 Ram removed ustdio dependency |
|
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten |
|
18 ******************************************************************************* |
|
19 */ |
|
20 |
|
21 #include "ucol_imp.h" |
|
22 #include "parse.h" |
|
23 #include "errmsg.h" |
|
24 #include "uhash.h" |
|
25 #include "cmemory.h" |
|
26 #include "cstring.h" |
|
27 #include "uinvchar.h" |
|
28 #include "read.h" |
|
29 #include "ustr.h" |
|
30 #include "reslist.h" |
|
31 #include "rbt_pars.h" |
|
32 #include "genrb.h" |
|
33 #include "unicode/ustring.h" |
|
34 #include "unicode/uscript.h" |
|
35 #include "unicode/putil.h" |
|
36 #include <stdio.h> |
|
37 |
|
38 /* Number of tokens to read ahead of the current stream position */ |
|
39 #define MAX_LOOKAHEAD 3 |
|
40 |
|
41 #define CR 0x000D |
|
42 #define LF 0x000A |
|
43 #define SPACE 0x0020 |
|
44 #define TAB 0x0009 |
|
45 #define ESCAPE 0x005C |
|
46 #define HASH 0x0023 |
|
47 #define QUOTE 0x0027 |
|
48 #define ZERO 0x0030 |
|
49 #define STARTCOMMAND 0x005B |
|
50 #define ENDCOMMAND 0x005D |
|
51 #define OPENSQBRACKET 0x005B |
|
52 #define CLOSESQBRACKET 0x005D |
|
53 |
|
54 struct Lookahead |
|
55 { |
|
56 enum ETokenType type; |
|
57 struct UString value; |
|
58 struct UString comment; |
|
59 uint32_t line; |
|
60 }; |
|
61 |
|
62 /* keep in sync with token defines in read.h */ |
|
63 const char *tokenNames[TOK_TOKEN_COUNT] = |
|
64 { |
|
65 "string", /* A string token, such as "MonthNames" */ |
|
66 "'{'", /* An opening brace character */ |
|
67 "'}'", /* A closing brace character */ |
|
68 "','", /* A comma */ |
|
69 "':'", /* A colon */ |
|
70 |
|
71 "<end of file>", /* End of the file has been reached successfully */ |
|
72 "<end of line>" |
|
73 }; |
|
74 |
|
75 /* Just to store "TRUE" */ |
|
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; |
|
77 |
|
78 typedef struct { |
|
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; |
|
80 uint32_t lookaheadPosition; |
|
81 UCHARBUF *buffer; |
|
82 struct SRBRoot *bundle; |
|
83 const char *inputdir; |
|
84 uint32_t inputdirLength; |
|
85 const char *outputdir; |
|
86 uint32_t outputdirLength; |
|
87 UBool makeBinaryCollation; |
|
88 UBool omitCollationRules; |
|
89 } ParseState; |
|
90 |
|
91 typedef struct SResource * |
|
92 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); |
|
93 |
|
94 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); |
|
95 |
|
96 /* The nature of the lookahead buffer: |
|
97 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides |
|
98 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. |
|
99 When getToken is called, the current pointer is moved to the next slot and the |
|
100 old slot is filled with the next token from the reader by calling getNextToken. |
|
101 The token values are stored in the slot, which means that token values don't |
|
102 survive a call to getToken, ie. |
|
103 |
|
104 UString *value; |
|
105 |
|
106 getToken(&value, NULL, status); |
|
107 getToken(NULL, NULL, status); bad - value is now a different string |
|
108 */ |
|
109 static void |
|
110 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) |
|
111 { |
|
112 static uint32_t initTypeStrings = 0; |
|
113 uint32_t i; |
|
114 |
|
115 if (!initTypeStrings) |
|
116 { |
|
117 initTypeStrings = 1; |
|
118 } |
|
119 |
|
120 state->lookaheadPosition = 0; |
|
121 state->buffer = buf; |
|
122 |
|
123 resetLineNumber(); |
|
124 |
|
125 for (i = 0; i < MAX_LOOKAHEAD; i++) |
|
126 { |
|
127 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); |
|
128 if (U_FAILURE(*status)) |
|
129 { |
|
130 return; |
|
131 } |
|
132 } |
|
133 |
|
134 *status = U_ZERO_ERROR; |
|
135 } |
|
136 |
|
137 static void |
|
138 cleanupLookahead(ParseState* state) |
|
139 { |
|
140 uint32_t i; |
|
141 for (i = 0; i <= MAX_LOOKAHEAD; i++) |
|
142 { |
|
143 ustr_deinit(&state->lookahead[i].value); |
|
144 ustr_deinit(&state->lookahead[i].comment); |
|
145 } |
|
146 |
|
147 } |
|
148 |
|
149 static enum ETokenType |
|
150 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) |
|
151 { |
|
152 enum ETokenType result; |
|
153 uint32_t i; |
|
154 |
|
155 result = state->lookahead[state->lookaheadPosition].type; |
|
156 |
|
157 if (tokenValue != NULL) |
|
158 { |
|
159 *tokenValue = &state->lookahead[state->lookaheadPosition].value; |
|
160 } |
|
161 |
|
162 if (linenumber != NULL) |
|
163 { |
|
164 *linenumber = state->lookahead[state->lookaheadPosition].line; |
|
165 } |
|
166 |
|
167 if (comment != NULL) |
|
168 { |
|
169 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); |
|
170 } |
|
171 |
|
172 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); |
|
173 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); |
|
174 ustr_setlen(&state->lookahead[i].comment, 0, status); |
|
175 ustr_setlen(&state->lookahead[i].value, 0, status); |
|
176 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); |
|
177 |
|
178 /* printf("getToken, returning %s\n", tokenNames[result]); */ |
|
179 |
|
180 return result; |
|
181 } |
|
182 |
|
183 static enum ETokenType |
|
184 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) |
|
185 { |
|
186 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); |
|
187 |
|
188 if (U_FAILURE(*status)) |
|
189 { |
|
190 return TOK_ERROR; |
|
191 } |
|
192 |
|
193 if (lookaheadCount >= MAX_LOOKAHEAD) |
|
194 { |
|
195 *status = U_INTERNAL_PROGRAM_ERROR; |
|
196 return TOK_ERROR; |
|
197 } |
|
198 |
|
199 if (tokenValue != NULL) |
|
200 { |
|
201 *tokenValue = &state->lookahead[i].value; |
|
202 } |
|
203 |
|
204 if (linenumber != NULL) |
|
205 { |
|
206 *linenumber = state->lookahead[i].line; |
|
207 } |
|
208 |
|
209 if(comment != NULL){ |
|
210 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); |
|
211 } |
|
212 |
|
213 return state->lookahead[i].type; |
|
214 } |
|
215 |
|
216 static void |
|
217 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) |
|
218 { |
|
219 uint32_t line; |
|
220 |
|
221 enum ETokenType token = getToken(state, tokenValue, comment, &line, status); |
|
222 |
|
223 if (linenumber != NULL) |
|
224 { |
|
225 *linenumber = line; |
|
226 } |
|
227 |
|
228 if (U_FAILURE(*status)) |
|
229 { |
|
230 return; |
|
231 } |
|
232 |
|
233 if (token != expectedToken) |
|
234 { |
|
235 *status = U_INVALID_FORMAT_ERROR; |
|
236 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); |
|
237 } |
|
238 else |
|
239 { |
|
240 *status = U_ZERO_ERROR; |
|
241 } |
|
242 } |
|
243 |
|
244 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) |
|
245 { |
|
246 struct UString *tokenValue; |
|
247 char *result; |
|
248 uint32_t count; |
|
249 |
|
250 expect(state, TOK_STRING, &tokenValue, comment, line, status); |
|
251 |
|
252 if (U_FAILURE(*status)) |
|
253 { |
|
254 return NULL; |
|
255 } |
|
256 |
|
257 count = u_strlen(tokenValue->fChars); |
|
258 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { |
|
259 *status = U_INVALID_FORMAT_ERROR; |
|
260 error(*line, "invariant characters required for table keys, binary data, etc."); |
|
261 return NULL; |
|
262 } |
|
263 |
|
264 result = static_cast<char *>(uprv_malloc(count+1)); |
|
265 |
|
266 if (result == NULL) |
|
267 { |
|
268 *status = U_MEMORY_ALLOCATION_ERROR; |
|
269 return NULL; |
|
270 } |
|
271 |
|
272 u_UCharsToChars(tokenValue->fChars, result, count+1); |
|
273 return result; |
|
274 } |
|
275 |
|
276 static struct SResource * |
|
277 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) |
|
278 { |
|
279 struct SResource *result = NULL; |
|
280 struct UString *tokenValue; |
|
281 FileStream *file = NULL; |
|
282 char filename[256] = { '\0' }; |
|
283 char cs[128] = { '\0' }; |
|
284 uint32_t line; |
|
285 UBool quoted = FALSE; |
|
286 UCHARBUF *ucbuf=NULL; |
|
287 UChar32 c = 0; |
|
288 const char* cp = NULL; |
|
289 UChar *pTarget = NULL; |
|
290 UChar *target = NULL; |
|
291 UChar *targetLimit = NULL; |
|
292 int32_t size = 0; |
|
293 |
|
294 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); |
|
295 |
|
296 if(isVerbose()){ |
|
297 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
298 } |
|
299 |
|
300 if (U_FAILURE(*status)) |
|
301 { |
|
302 return NULL; |
|
303 } |
|
304 /* make the filename including the directory */ |
|
305 if (state->inputdir != NULL) |
|
306 { |
|
307 uprv_strcat(filename, state->inputdir); |
|
308 |
|
309 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) |
|
310 { |
|
311 uprv_strcat(filename, U_FILE_SEP_STRING); |
|
312 } |
|
313 } |
|
314 |
|
315 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); |
|
316 |
|
317 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
318 |
|
319 if (U_FAILURE(*status)) |
|
320 { |
|
321 return NULL; |
|
322 } |
|
323 uprv_strcat(filename, cs); |
|
324 |
|
325 if(state->omitCollationRules) { |
|
326 return res_none(); |
|
327 } |
|
328 |
|
329 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); |
|
330 |
|
331 if (U_FAILURE(*status)) { |
|
332 error(line, "An error occured while opening the input file %s\n", filename); |
|
333 return NULL; |
|
334 } |
|
335 |
|
336 /* We allocate more space than actually required |
|
337 * since the actual size needed for storing UChars |
|
338 * is not known in UTF-8 byte stream |
|
339 */ |
|
340 size = ucbuf_size(ucbuf) + 1; |
|
341 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); |
|
342 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); |
|
343 target = pTarget; |
|
344 targetLimit = pTarget+size; |
|
345 |
|
346 /* read the rules into the buffer */ |
|
347 while (target < targetLimit) |
|
348 { |
|
349 c = ucbuf_getc(ucbuf, status); |
|
350 if(c == QUOTE) { |
|
351 quoted = (UBool)!quoted; |
|
352 } |
|
353 /* weiv (06/26/2002): adding the following: |
|
354 * - preserving spaces in commands [...] |
|
355 * - # comments until the end of line |
|
356 */ |
|
357 if (c == STARTCOMMAND && !quoted) |
|
358 { |
|
359 /* preserve commands |
|
360 * closing bracket will be handled by the |
|
361 * append at the end of the loop |
|
362 */ |
|
363 while(c != ENDCOMMAND) { |
|
364 U_APPEND_CHAR32_ONLY(c, target); |
|
365 c = ucbuf_getc(ucbuf, status); |
|
366 } |
|
367 } |
|
368 else if (c == HASH && !quoted) { |
|
369 /* skip comments */ |
|
370 while(c != CR && c != LF) { |
|
371 c = ucbuf_getc(ucbuf, status); |
|
372 } |
|
373 continue; |
|
374 } |
|
375 else if (c == ESCAPE) |
|
376 { |
|
377 c = unescape(ucbuf, status); |
|
378 |
|
379 if (c == (UChar32)U_ERR) |
|
380 { |
|
381 uprv_free(pTarget); |
|
382 T_FileStream_close(file); |
|
383 return NULL; |
|
384 } |
|
385 } |
|
386 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) |
|
387 { |
|
388 /* ignore spaces carriage returns |
|
389 * and line feed unless in the form \uXXXX |
|
390 */ |
|
391 continue; |
|
392 } |
|
393 |
|
394 /* Append UChar * after dissembling if c > 0xffff*/ |
|
395 if (c != (UChar32)U_EOF) |
|
396 { |
|
397 U_APPEND_CHAR32_ONLY(c, target); |
|
398 } |
|
399 else |
|
400 { |
|
401 break; |
|
402 } |
|
403 } |
|
404 |
|
405 /* terminate the string */ |
|
406 if(target < targetLimit){ |
|
407 *target = 0x0000; |
|
408 } |
|
409 |
|
410 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); |
|
411 |
|
412 |
|
413 ucbuf_close(ucbuf); |
|
414 uprv_free(pTarget); |
|
415 T_FileStream_close(file); |
|
416 |
|
417 return result; |
|
418 } |
|
419 |
|
420 static struct SResource * |
|
421 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) |
|
422 { |
|
423 struct SResource *result = NULL; |
|
424 struct UString *tokenValue; |
|
425 FileStream *file = NULL; |
|
426 char filename[256] = { '\0' }; |
|
427 char cs[128] = { '\0' }; |
|
428 uint32_t line; |
|
429 UCHARBUF *ucbuf=NULL; |
|
430 const char* cp = NULL; |
|
431 UChar *pTarget = NULL; |
|
432 const UChar *pSource = NULL; |
|
433 int32_t size = 0; |
|
434 |
|
435 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); |
|
436 |
|
437 if(isVerbose()){ |
|
438 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
439 } |
|
440 |
|
441 if (U_FAILURE(*status)) |
|
442 { |
|
443 return NULL; |
|
444 } |
|
445 /* make the filename including the directory */ |
|
446 if (state->inputdir != NULL) |
|
447 { |
|
448 uprv_strcat(filename, state->inputdir); |
|
449 |
|
450 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) |
|
451 { |
|
452 uprv_strcat(filename, U_FILE_SEP_STRING); |
|
453 } |
|
454 } |
|
455 |
|
456 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); |
|
457 |
|
458 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
459 |
|
460 if (U_FAILURE(*status)) |
|
461 { |
|
462 return NULL; |
|
463 } |
|
464 uprv_strcat(filename, cs); |
|
465 |
|
466 |
|
467 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); |
|
468 |
|
469 if (U_FAILURE(*status)) { |
|
470 error(line, "An error occured while opening the input file %s\n", filename); |
|
471 return NULL; |
|
472 } |
|
473 |
|
474 /* We allocate more space than actually required |
|
475 * since the actual size needed for storing UChars |
|
476 * is not known in UTF-8 byte stream |
|
477 */ |
|
478 pSource = ucbuf_getBuffer(ucbuf, &size, status); |
|
479 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); |
|
480 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); |
|
481 |
|
482 #if !UCONFIG_NO_TRANSLITERATION |
|
483 size = utrans_stripRules(pSource, size, pTarget, status); |
|
484 #else |
|
485 size = 0; |
|
486 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); |
|
487 #endif |
|
488 result = string_open(state->bundle, tag, pTarget, size, NULL, status); |
|
489 |
|
490 ucbuf_close(ucbuf); |
|
491 uprv_free(pTarget); |
|
492 T_FileStream_close(file); |
|
493 |
|
494 return result; |
|
495 } |
|
496 static struct SResource* dependencyArray = NULL; |
|
497 |
|
498 static struct SResource * |
|
499 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) |
|
500 { |
|
501 struct SResource *result = NULL; |
|
502 struct SResource *elem = NULL; |
|
503 struct UString *tokenValue; |
|
504 uint32_t line; |
|
505 char filename[256] = { '\0' }; |
|
506 char cs[128] = { '\0' }; |
|
507 |
|
508 expect(state, TOK_STRING, &tokenValue, NULL, &line, status); |
|
509 |
|
510 if(isVerbose()){ |
|
511 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
512 } |
|
513 |
|
514 if (U_FAILURE(*status)) |
|
515 { |
|
516 return NULL; |
|
517 } |
|
518 /* make the filename including the directory */ |
|
519 if (state->outputdir != NULL) |
|
520 { |
|
521 uprv_strcat(filename, state->outputdir); |
|
522 |
|
523 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) |
|
524 { |
|
525 uprv_strcat(filename, U_FILE_SEP_STRING); |
|
526 } |
|
527 } |
|
528 |
|
529 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); |
|
530 |
|
531 if (U_FAILURE(*status)) |
|
532 { |
|
533 return NULL; |
|
534 } |
|
535 uprv_strcat(filename, cs); |
|
536 if(!T_FileStream_file_exists(filename)){ |
|
537 if(isStrict()){ |
|
538 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); |
|
539 }else{ |
|
540 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); |
|
541 } |
|
542 } |
|
543 if(dependencyArray==NULL){ |
|
544 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); |
|
545 } |
|
546 if(tag!=NULL){ |
|
547 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); |
|
548 } |
|
549 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); |
|
550 |
|
551 array_add(dependencyArray, elem, status); |
|
552 |
|
553 if (U_FAILURE(*status)) |
|
554 { |
|
555 return NULL; |
|
556 } |
|
557 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
558 return result; |
|
559 } |
|
560 static struct SResource * |
|
561 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) |
|
562 { |
|
563 struct UString *tokenValue; |
|
564 struct SResource *result = NULL; |
|
565 |
|
566 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) |
|
567 { |
|
568 return parseUCARules(tag, startline, status); |
|
569 }*/ |
|
570 if(isVerbose()){ |
|
571 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
572 } |
|
573 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); |
|
574 |
|
575 if (U_SUCCESS(*status)) |
|
576 { |
|
577 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore |
|
578 doesn't survive expect either) */ |
|
579 |
|
580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); |
|
581 if(U_SUCCESS(*status) && result) { |
|
582 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
583 |
|
584 if (U_FAILURE(*status)) |
|
585 { |
|
586 res_close(result); |
|
587 return NULL; |
|
588 } |
|
589 } |
|
590 } |
|
591 |
|
592 return result; |
|
593 } |
|
594 |
|
595 static struct SResource * |
|
596 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
597 { |
|
598 struct UString *tokenValue; |
|
599 struct SResource *result = NULL; |
|
600 |
|
601 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); |
|
602 |
|
603 if(isVerbose()){ |
|
604 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
605 } |
|
606 |
|
607 if (U_SUCCESS(*status)) |
|
608 { |
|
609 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore |
|
610 doesn't survive expect either) */ |
|
611 |
|
612 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); |
|
613 |
|
614 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
615 |
|
616 if (U_FAILURE(*status)) |
|
617 { |
|
618 res_close(result); |
|
619 return NULL; |
|
620 } |
|
621 } |
|
622 |
|
623 return result; |
|
624 } |
|
625 |
|
626 typedef struct{ |
|
627 const char* inputDir; |
|
628 const char* outputDir; |
|
629 } GenrbData; |
|
630 |
|
631 static struct SResource* resLookup(struct SResource* res, const char* key){ |
|
632 struct SResource *current = NULL; |
|
633 struct SResTable *list; |
|
634 if (res == res_none()) { |
|
635 return NULL; |
|
636 } |
|
637 |
|
638 list = &(res->u.fTable); |
|
639 |
|
640 current = list->fFirst; |
|
641 while (current != NULL) { |
|
642 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { |
|
643 return current; |
|
644 } |
|
645 current = current->fNext; |
|
646 } |
|
647 return NULL; |
|
648 } |
|
649 |
|
650 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ |
|
651 struct SRBRoot *data = NULL; |
|
652 UCHARBUF *ucbuf = NULL; |
|
653 GenrbData* genrbdata = (GenrbData*) context; |
|
654 int localeLength = strlen(locale); |
|
655 char* filename = (char*)uprv_malloc(localeLength+5); |
|
656 char *inputDirBuf = NULL; |
|
657 char *openFileName = NULL; |
|
658 const char* cp = ""; |
|
659 UChar* urules = NULL; |
|
660 int32_t urulesLength = 0; |
|
661 int32_t i = 0; |
|
662 int32_t dirlen = 0; |
|
663 int32_t filelen = 0; |
|
664 struct SResource* root; |
|
665 struct SResource* collations; |
|
666 struct SResource* collation; |
|
667 struct SResource* sequence; |
|
668 |
|
669 memcpy(filename, locale, localeLength); |
|
670 for(i = 0; i < localeLength; i++){ |
|
671 if(filename[i] == '-'){ |
|
672 filename[i] = '_'; |
|
673 } |
|
674 } |
|
675 filename[localeLength] = '.'; |
|
676 filename[localeLength+1] = 't'; |
|
677 filename[localeLength+2] = 'x'; |
|
678 filename[localeLength+3] = 't'; |
|
679 filename[localeLength+4] = 0; |
|
680 |
|
681 |
|
682 if (status==NULL || U_FAILURE(*status)) { |
|
683 return NULL; |
|
684 } |
|
685 if(filename==NULL){ |
|
686 *status=U_ILLEGAL_ARGUMENT_ERROR; |
|
687 return NULL; |
|
688 }else{ |
|
689 filelen = (int32_t)uprv_strlen(filename); |
|
690 } |
|
691 if(genrbdata->inputDir == NULL) { |
|
692 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); |
|
693 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); |
|
694 openFileName[0] = '\0'; |
|
695 if (filenameBegin != NULL) { |
|
696 /* |
|
697 * When a filename ../../../data/root.txt is specified, |
|
698 * we presume that the input directory is ../../../data |
|
699 * This is very important when the resource file includes |
|
700 * another file, like UCARules.txt or thaidict.brk. |
|
701 */ |
|
702 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); |
|
703 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); |
|
704 |
|
705 /* test for NULL */ |
|
706 if(inputDirBuf == NULL) { |
|
707 *status = U_MEMORY_ALLOCATION_ERROR; |
|
708 goto finish; |
|
709 } |
|
710 |
|
711 inputDirBuf[filenameSize - 1] = 0; |
|
712 genrbdata->inputDir = inputDirBuf; |
|
713 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); |
|
714 } |
|
715 }else{ |
|
716 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); |
|
717 |
|
718 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { |
|
719 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); |
|
720 |
|
721 /* test for NULL */ |
|
722 if(openFileName == NULL) { |
|
723 *status = U_MEMORY_ALLOCATION_ERROR; |
|
724 goto finish; |
|
725 } |
|
726 |
|
727 openFileName[0] = '\0'; |
|
728 /* |
|
729 * append the input dir to openFileName if the first char in |
|
730 * filename is not file seperation char and the last char input directory is not '.'. |
|
731 * This is to support : |
|
732 * genrb -s. /home/icu/data |
|
733 * genrb -s. icu/data |
|
734 * The user cannot mix notations like |
|
735 * genrb -s. /icu/data --- the absolute path specified. -s redundant |
|
736 * user should use |
|
737 * genrb -s. icu/data --- start from CWD and look in icu/data dir |
|
738 */ |
|
739 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){ |
|
740 uprv_strcpy(openFileName, genrbdata->inputDir); |
|
741 openFileName[dirlen] = U_FILE_SEP_CHAR; |
|
742 } |
|
743 openFileName[dirlen + 1] = '\0'; |
|
744 } else { |
|
745 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); |
|
746 |
|
747 /* test for NULL */ |
|
748 if(openFileName == NULL) { |
|
749 *status = U_MEMORY_ALLOCATION_ERROR; |
|
750 goto finish; |
|
751 } |
|
752 |
|
753 uprv_strcpy(openFileName, genrbdata->inputDir); |
|
754 |
|
755 } |
|
756 } |
|
757 uprv_strcat(openFileName, filename); |
|
758 /* printf("%s\n", openFileName); */ |
|
759 *status = U_ZERO_ERROR; |
|
760 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); |
|
761 |
|
762 if(*status == U_FILE_ACCESS_ERROR) { |
|
763 |
|
764 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); |
|
765 goto finish; |
|
766 } |
|
767 if (ucbuf == NULL || U_FAILURE(*status)) { |
|
768 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status)); |
|
769 goto finish; |
|
770 } |
|
771 |
|
772 /* Parse the data into an SRBRoot */ |
|
773 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status); |
|
774 |
|
775 root = data->fRoot; |
|
776 collations = resLookup(root, "collations"); |
|
777 if (collations != NULL) { |
|
778 collation = resLookup(collations, type); |
|
779 if (collation != NULL) { |
|
780 sequence = resLookup(collation, "Sequence"); |
|
781 if (sequence != NULL) { |
|
782 urules = sequence->u.fString.fChars; |
|
783 urulesLength = sequence->u.fString.fLength; |
|
784 *pLength = urulesLength; |
|
785 } |
|
786 } |
|
787 } |
|
788 |
|
789 finish: |
|
790 if (inputDirBuf != NULL) { |
|
791 uprv_free(inputDirBuf); |
|
792 } |
|
793 |
|
794 if (openFileName != NULL) { |
|
795 uprv_free(openFileName); |
|
796 } |
|
797 |
|
798 if(ucbuf) { |
|
799 ucbuf_close(ucbuf); |
|
800 } |
|
801 |
|
802 return urules; |
|
803 } |
|
804 |
|
805 // Quick-and-dirty escaping function. |
|
806 // Assumes that we are on an ASCII-based platform. |
|
807 static void |
|
808 escape(const UChar *s, char *buffer) { |
|
809 int32_t length = u_strlen(s); |
|
810 int32_t i = 0; |
|
811 for (;;) { |
|
812 UChar32 c; |
|
813 U16_NEXT(s, i, length, c); |
|
814 if (c == 0) { |
|
815 *buffer = 0; |
|
816 return; |
|
817 } else if (0x20 <= c && c <= 0x7e) { |
|
818 // printable ASCII |
|
819 *buffer++ = (char)c; // assumes ASCII-based platform |
|
820 } else { |
|
821 buffer += sprintf(buffer, "\\u%04X", (int)c); |
|
822 } |
|
823 } |
|
824 } |
|
825 |
|
826 static struct SResource * |
|
827 addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status) |
|
828 { |
|
829 struct SResource *member = NULL; |
|
830 struct UString *tokenValue; |
|
831 struct UString comment; |
|
832 enum ETokenType token; |
|
833 char subtag[1024]; |
|
834 UVersionInfo version; |
|
835 uint32_t line; |
|
836 GenrbData genrbdata; |
|
837 /* '{' . (name resource)* '}' */ |
|
838 version[0]=0; version[1]=0; version[2]=0; version[3]=0; |
|
839 |
|
840 for (;;) |
|
841 { |
|
842 ustr_init(&comment); |
|
843 token = getToken(state, &tokenValue, &comment, &line, status); |
|
844 |
|
845 if (token == TOK_CLOSE_BRACE) |
|
846 { |
|
847 return result; |
|
848 } |
|
849 |
|
850 if (token != TOK_STRING) |
|
851 { |
|
852 res_close(result); |
|
853 *status = U_INVALID_FORMAT_ERROR; |
|
854 |
|
855 if (token == TOK_EOF) |
|
856 { |
|
857 error(startline, "unterminated table"); |
|
858 } |
|
859 else |
|
860 { |
|
861 error(line, "Unexpected token %s", tokenNames[token]); |
|
862 } |
|
863 |
|
864 return NULL; |
|
865 } |
|
866 |
|
867 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); |
|
868 |
|
869 if (U_FAILURE(*status)) |
|
870 { |
|
871 res_close(result); |
|
872 return NULL; |
|
873 } |
|
874 |
|
875 member = parseResource(state, subtag, NULL, status); |
|
876 |
|
877 if (U_FAILURE(*status)) |
|
878 { |
|
879 res_close(result); |
|
880 return NULL; |
|
881 } |
|
882 |
|
883 if (uprv_strcmp(subtag, "Version") == 0) |
|
884 { |
|
885 char ver[40]; |
|
886 int32_t length = member->u.fString.fLength; |
|
887 |
|
888 if (length >= (int32_t) sizeof(ver)) |
|
889 { |
|
890 length = (int32_t) sizeof(ver) - 1; |
|
891 } |
|
892 |
|
893 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ |
|
894 u_versionFromString(version, ver); |
|
895 |
|
896 table_add(result, member, line, status); |
|
897 |
|
898 } |
|
899 else if (uprv_strcmp(subtag, "Override") == 0) |
|
900 { |
|
901 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0); |
|
902 table_add(result, member, line, status); |
|
903 |
|
904 } |
|
905 else if(uprv_strcmp(subtag, "%%CollationBin")==0) |
|
906 { |
|
907 /* discard duplicate %%CollationBin if any*/ |
|
908 } |
|
909 else if (uprv_strcmp(subtag, "Sequence") == 0) |
|
910 { |
|
911 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO |
|
912 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); |
|
913 #else |
|
914 if(state->makeBinaryCollation) { |
|
915 |
|
916 /* do the collation elements */ |
|
917 int32_t len = 0; |
|
918 uint8_t *data = NULL; |
|
919 UCollator *coll = NULL; |
|
920 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)]; |
|
921 int32_t reorderCodeCount; |
|
922 int32_t reorderCodeIndex; |
|
923 UParseError parseError; |
|
924 |
|
925 genrbdata.inputDir = state->inputdir; |
|
926 genrbdata.outputDir = state->outputdir; |
|
927 |
|
928 UErrorCode intStatus = U_ZERO_ERROR; |
|
929 uprv_memset(&parseError, 0, sizeof(parseError)); |
|
930 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength, |
|
931 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus); |
|
932 |
|
933 if (U_SUCCESS(intStatus) && coll != NULL) |
|
934 { |
|
935 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); |
|
936 data = (uint8_t *)uprv_malloc(len); |
|
937 intStatus = U_ZERO_ERROR; |
|
938 len = ucol_cloneBinary(coll, data, len, &intStatus); |
|
939 |
|
940 /* tailoring rules version */ |
|
941 /* This is wrong! */ |
|
942 /*coll->dataInfo.dataVersion[1] = version[0];*/ |
|
943 /* Copy tailoring version. Builder version already */ |
|
944 /* set in ucol_openRules */ |
|
945 ((UCATableHeader *)data)->version[1] = version[0]; |
|
946 ((UCATableHeader *)data)->version[2] = version[1]; |
|
947 ((UCATableHeader *)data)->version[3] = version[2]; |
|
948 |
|
949 if (U_SUCCESS(intStatus) && data != NULL) |
|
950 { |
|
951 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status); |
|
952 table_add(result, collationBin, line, status); |
|
953 uprv_free(data); |
|
954 |
|
955 reorderCodeCount = ucol_getReorderCodes( |
|
956 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); |
|
957 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { |
|
958 struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status); |
|
959 for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) { |
|
960 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status); |
|
961 } |
|
962 table_add(result, reorderCodeRes, line, status); |
|
963 } |
|
964 } |
|
965 else |
|
966 { |
|
967 warning(line, "could not obtain rules from collator"); |
|
968 if(isStrict()){ |
|
969 *status = U_INVALID_FORMAT_ERROR; |
|
970 return NULL; |
|
971 } |
|
972 } |
|
973 |
|
974 ucol_close(coll); |
|
975 } |
|
976 else |
|
977 { |
|
978 if(intStatus == U_FILE_ACCESS_ERROR) { |
|
979 error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); |
|
980 *status = intStatus; |
|
981 return NULL; |
|
982 } |
|
983 char preBuffer[100], postBuffer[100]; |
|
984 escape(parseError.preContext, preBuffer); |
|
985 escape(parseError.postContext, postBuffer); |
|
986 warning(line, |
|
987 "%%%%CollationBin could not be constructed from CollationElements\n" |
|
988 " check context, check that the FractionalUCA.txt UCA version " |
|
989 "matches the current UCD version\n" |
|
990 " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }", |
|
991 u_errorName(intStatus), |
|
992 parseError.line, |
|
993 parseError.offset, |
|
994 preBuffer, |
|
995 postBuffer); |
|
996 if(isStrict()){ |
|
997 *status = intStatus; |
|
998 return NULL; |
|
999 } |
|
1000 } |
|
1001 } else { |
|
1002 if(isVerbose()) { |
|
1003 printf("Not building Collation binary\n"); |
|
1004 } |
|
1005 } |
|
1006 #endif |
|
1007 /* in order to achieve smaller data files, we can direct genrb */ |
|
1008 /* to omit collation rules */ |
|
1009 if(state->omitCollationRules) { |
|
1010 bundle_closeString(state->bundle, member); |
|
1011 } else { |
|
1012 table_add(result, member, line, status); |
|
1013 } |
|
1014 } |
|
1015 if (U_FAILURE(*status)) |
|
1016 { |
|
1017 res_close(result); |
|
1018 return NULL; |
|
1019 } |
|
1020 } |
|
1021 |
|
1022 // Reached the end without a TOK_CLOSE_BRACE. Should be an error. |
|
1023 *status = U_INTERNAL_PROGRAM_ERROR; |
|
1024 return NULL; |
|
1025 } |
|
1026 |
|
1027 static struct SResource * |
|
1028 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) |
|
1029 { |
|
1030 struct SResource *result = NULL; |
|
1031 struct SResource *member = NULL; |
|
1032 struct SResource *collationRes = NULL; |
|
1033 struct UString *tokenValue; |
|
1034 struct UString comment; |
|
1035 enum ETokenType token; |
|
1036 char subtag[1024], typeKeyword[1024]; |
|
1037 uint32_t line; |
|
1038 |
|
1039 result = table_open(state->bundle, tag, NULL, status); |
|
1040 |
|
1041 if (result == NULL || U_FAILURE(*status)) |
|
1042 { |
|
1043 return NULL; |
|
1044 } |
|
1045 if(isVerbose()){ |
|
1046 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1047 } |
|
1048 if(!newCollation) { |
|
1049 return addCollation(state, result, startline, status); |
|
1050 } |
|
1051 else { |
|
1052 for(;;) { |
|
1053 ustr_init(&comment); |
|
1054 token = getToken(state, &tokenValue, &comment, &line, status); |
|
1055 |
|
1056 if (token == TOK_CLOSE_BRACE) |
|
1057 { |
|
1058 return result; |
|
1059 } |
|
1060 |
|
1061 if (token != TOK_STRING) |
|
1062 { |
|
1063 res_close(result); |
|
1064 *status = U_INVALID_FORMAT_ERROR; |
|
1065 |
|
1066 if (token == TOK_EOF) |
|
1067 { |
|
1068 error(startline, "unterminated table"); |
|
1069 } |
|
1070 else |
|
1071 { |
|
1072 error(line, "Unexpected token %s", tokenNames[token]); |
|
1073 } |
|
1074 |
|
1075 return NULL; |
|
1076 } |
|
1077 |
|
1078 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); |
|
1079 |
|
1080 if (U_FAILURE(*status)) |
|
1081 { |
|
1082 res_close(result); |
|
1083 return NULL; |
|
1084 } |
|
1085 |
|
1086 if (uprv_strcmp(subtag, "default") == 0) |
|
1087 { |
|
1088 member = parseResource(state, subtag, NULL, status); |
|
1089 |
|
1090 if (U_FAILURE(*status)) |
|
1091 { |
|
1092 res_close(result); |
|
1093 return NULL; |
|
1094 } |
|
1095 |
|
1096 table_add(result, member, line, status); |
|
1097 } |
|
1098 else |
|
1099 { |
|
1100 token = peekToken(state, 0, &tokenValue, &line, &comment, status); |
|
1101 /* this probably needs to be refactored or recursively use the parser */ |
|
1102 /* first we assume that our collation table won't have the explicit type */ |
|
1103 /* then, we cannot handle aliases */ |
|
1104 if(token == TOK_OPEN_BRACE) { |
|
1105 token = getToken(state, &tokenValue, &comment, &line, status); |
|
1106 collationRes = table_open(state->bundle, subtag, NULL, status); |
|
1107 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ |
|
1108 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) { |
|
1109 table_add(result, collationRes, startline, status); |
|
1110 } |
|
1111 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ |
|
1112 /* we could have a table too */ |
|
1113 token = peekToken(state, 1, &tokenValue, &line, &comment, status); |
|
1114 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); |
|
1115 if(uprv_strcmp(typeKeyword, "alias") == 0) { |
|
1116 member = parseResource(state, subtag, NULL, status); |
|
1117 if (U_FAILURE(*status)) |
|
1118 { |
|
1119 res_close(result); |
|
1120 return NULL; |
|
1121 } |
|
1122 |
|
1123 table_add(result, member, line, status); |
|
1124 } else { |
|
1125 res_close(result); |
|
1126 *status = U_INVALID_FORMAT_ERROR; |
|
1127 return NULL; |
|
1128 } |
|
1129 } else { |
|
1130 res_close(result); |
|
1131 *status = U_INVALID_FORMAT_ERROR; |
|
1132 return NULL; |
|
1133 } |
|
1134 } |
|
1135 |
|
1136 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ |
|
1137 |
|
1138 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ |
|
1139 |
|
1140 if (U_FAILURE(*status)) |
|
1141 { |
|
1142 res_close(result); |
|
1143 return NULL; |
|
1144 } |
|
1145 } |
|
1146 } |
|
1147 } |
|
1148 |
|
1149 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, |
|
1150 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ |
|
1151 static struct SResource * |
|
1152 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) |
|
1153 { |
|
1154 struct SResource *member = NULL; |
|
1155 struct UString *tokenValue=NULL; |
|
1156 struct UString comment; |
|
1157 enum ETokenType token; |
|
1158 char subtag[1024]; |
|
1159 uint32_t line; |
|
1160 UBool readToken = FALSE; |
|
1161 |
|
1162 /* '{' . (name resource)* '}' */ |
|
1163 |
|
1164 if(isVerbose()){ |
|
1165 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1166 } |
|
1167 for (;;) |
|
1168 { |
|
1169 ustr_init(&comment); |
|
1170 token = getToken(state, &tokenValue, &comment, &line, status); |
|
1171 |
|
1172 if (token == TOK_CLOSE_BRACE) |
|
1173 { |
|
1174 if (!readToken) { |
|
1175 warning(startline, "Encountered empty table"); |
|
1176 } |
|
1177 return table; |
|
1178 } |
|
1179 |
|
1180 if (token != TOK_STRING) |
|
1181 { |
|
1182 *status = U_INVALID_FORMAT_ERROR; |
|
1183 |
|
1184 if (token == TOK_EOF) |
|
1185 { |
|
1186 error(startline, "unterminated table"); |
|
1187 } |
|
1188 else |
|
1189 { |
|
1190 error(line, "unexpected token %s", tokenNames[token]); |
|
1191 } |
|
1192 |
|
1193 return NULL; |
|
1194 } |
|
1195 |
|
1196 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { |
|
1197 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); |
|
1198 } else { |
|
1199 *status = U_INVALID_FORMAT_ERROR; |
|
1200 error(line, "invariant characters required for table keys"); |
|
1201 return NULL; |
|
1202 } |
|
1203 |
|
1204 if (U_FAILURE(*status)) |
|
1205 { |
|
1206 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); |
|
1207 return NULL; |
|
1208 } |
|
1209 |
|
1210 member = parseResource(state, subtag, &comment, status); |
|
1211 |
|
1212 if (member == NULL || U_FAILURE(*status)) |
|
1213 { |
|
1214 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); |
|
1215 return NULL; |
|
1216 } |
|
1217 |
|
1218 table_add(table, member, line, status); |
|
1219 |
|
1220 if (U_FAILURE(*status)) |
|
1221 { |
|
1222 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); |
|
1223 return NULL; |
|
1224 } |
|
1225 readToken = TRUE; |
|
1226 ustr_deinit(&comment); |
|
1227 } |
|
1228 |
|
1229 /* not reached */ |
|
1230 /* A compiler warning will appear if all paths don't contain a return statement. */ |
|
1231 /* *status = U_INTERNAL_PROGRAM_ERROR; |
|
1232 return NULL;*/ |
|
1233 } |
|
1234 |
|
1235 static struct SResource * |
|
1236 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
1237 { |
|
1238 struct SResource *result; |
|
1239 |
|
1240 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) |
|
1241 { |
|
1242 return parseCollationElements(state, tag, startline, FALSE, status); |
|
1243 } |
|
1244 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) |
|
1245 { |
|
1246 return parseCollationElements(state, tag, startline, TRUE, status); |
|
1247 } |
|
1248 if(isVerbose()){ |
|
1249 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1250 } |
|
1251 |
|
1252 result = table_open(state->bundle, tag, comment, status); |
|
1253 |
|
1254 if (result == NULL || U_FAILURE(*status)) |
|
1255 { |
|
1256 return NULL; |
|
1257 } |
|
1258 return realParseTable(state, result, tag, startline, status); |
|
1259 } |
|
1260 |
|
1261 static struct SResource * |
|
1262 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
1263 { |
|
1264 struct SResource *result = NULL; |
|
1265 struct SResource *member = NULL; |
|
1266 struct UString *tokenValue; |
|
1267 struct UString memberComments; |
|
1268 enum ETokenType token; |
|
1269 UBool readToken = FALSE; |
|
1270 |
|
1271 result = array_open(state->bundle, tag, comment, status); |
|
1272 |
|
1273 if (result == NULL || U_FAILURE(*status)) |
|
1274 { |
|
1275 return NULL; |
|
1276 } |
|
1277 if(isVerbose()){ |
|
1278 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1279 } |
|
1280 |
|
1281 ustr_init(&memberComments); |
|
1282 |
|
1283 /* '{' . resource [','] '}' */ |
|
1284 for (;;) |
|
1285 { |
|
1286 /* reset length */ |
|
1287 ustr_setlen(&memberComments, 0, status); |
|
1288 |
|
1289 /* check for end of array, but don't consume next token unless it really is the end */ |
|
1290 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); |
|
1291 |
|
1292 |
|
1293 if (token == TOK_CLOSE_BRACE) |
|
1294 { |
|
1295 getToken(state, NULL, NULL, NULL, status); |
|
1296 if (!readToken) { |
|
1297 warning(startline, "Encountered empty array"); |
|
1298 } |
|
1299 break; |
|
1300 } |
|
1301 |
|
1302 if (token == TOK_EOF) |
|
1303 { |
|
1304 res_close(result); |
|
1305 *status = U_INVALID_FORMAT_ERROR; |
|
1306 error(startline, "unterminated array"); |
|
1307 return NULL; |
|
1308 } |
|
1309 |
|
1310 /* string arrays are a special case */ |
|
1311 if (token == TOK_STRING) |
|
1312 { |
|
1313 getToken(state, &tokenValue, &memberComments, NULL, status); |
|
1314 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); |
|
1315 } |
|
1316 else |
|
1317 { |
|
1318 member = parseResource(state, NULL, &memberComments, status); |
|
1319 } |
|
1320 |
|
1321 if (member == NULL || U_FAILURE(*status)) |
|
1322 { |
|
1323 res_close(result); |
|
1324 return NULL; |
|
1325 } |
|
1326 |
|
1327 array_add(result, member, status); |
|
1328 |
|
1329 if (U_FAILURE(*status)) |
|
1330 { |
|
1331 res_close(result); |
|
1332 return NULL; |
|
1333 } |
|
1334 |
|
1335 /* eat optional comma if present */ |
|
1336 token = peekToken(state, 0, NULL, NULL, NULL, status); |
|
1337 |
|
1338 if (token == TOK_COMMA) |
|
1339 { |
|
1340 getToken(state, NULL, NULL, NULL, status); |
|
1341 } |
|
1342 |
|
1343 if (U_FAILURE(*status)) |
|
1344 { |
|
1345 res_close(result); |
|
1346 return NULL; |
|
1347 } |
|
1348 readToken = TRUE; |
|
1349 } |
|
1350 |
|
1351 ustr_deinit(&memberComments); |
|
1352 return result; |
|
1353 } |
|
1354 |
|
1355 static struct SResource * |
|
1356 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
1357 { |
|
1358 struct SResource *result = NULL; |
|
1359 enum ETokenType token; |
|
1360 char *string; |
|
1361 int32_t value; |
|
1362 UBool readToken = FALSE; |
|
1363 char *stopstring; |
|
1364 uint32_t len; |
|
1365 struct UString memberComments; |
|
1366 |
|
1367 result = intvector_open(state->bundle, tag, comment, status); |
|
1368 |
|
1369 if (result == NULL || U_FAILURE(*status)) |
|
1370 { |
|
1371 return NULL; |
|
1372 } |
|
1373 |
|
1374 if(isVerbose()){ |
|
1375 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1376 } |
|
1377 ustr_init(&memberComments); |
|
1378 /* '{' . string [','] '}' */ |
|
1379 for (;;) |
|
1380 { |
|
1381 ustr_setlen(&memberComments, 0, status); |
|
1382 |
|
1383 /* check for end of array, but don't consume next token unless it really is the end */ |
|
1384 token = peekToken(state, 0, NULL, NULL,&memberComments, status); |
|
1385 |
|
1386 if (token == TOK_CLOSE_BRACE) |
|
1387 { |
|
1388 /* it's the end, consume the close brace */ |
|
1389 getToken(state, NULL, NULL, NULL, status); |
|
1390 if (!readToken) { |
|
1391 warning(startline, "Encountered empty int vector"); |
|
1392 } |
|
1393 ustr_deinit(&memberComments); |
|
1394 return result; |
|
1395 } |
|
1396 |
|
1397 string = getInvariantString(state, NULL, NULL, status); |
|
1398 |
|
1399 if (U_FAILURE(*status)) |
|
1400 { |
|
1401 res_close(result); |
|
1402 return NULL; |
|
1403 } |
|
1404 |
|
1405 /* For handling illegal char in the Intvector */ |
|
1406 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ |
|
1407 len=(uint32_t)(stopstring-string); |
|
1408 |
|
1409 if(len==uprv_strlen(string)) |
|
1410 { |
|
1411 intvector_add(result, value, status); |
|
1412 uprv_free(string); |
|
1413 token = peekToken(state, 0, NULL, NULL, NULL, status); |
|
1414 } |
|
1415 else |
|
1416 { |
|
1417 uprv_free(string); |
|
1418 *status=U_INVALID_CHAR_FOUND; |
|
1419 } |
|
1420 |
|
1421 if (U_FAILURE(*status)) |
|
1422 { |
|
1423 res_close(result); |
|
1424 return NULL; |
|
1425 } |
|
1426 |
|
1427 /* the comma is optional (even though it is required to prevent the reader from concatenating |
|
1428 consecutive entries) so that a missing comma on the last entry isn't an error */ |
|
1429 if (token == TOK_COMMA) |
|
1430 { |
|
1431 getToken(state, NULL, NULL, NULL, status); |
|
1432 } |
|
1433 readToken = TRUE; |
|
1434 } |
|
1435 |
|
1436 /* not reached */ |
|
1437 /* A compiler warning will appear if all paths don't contain a return statement. */ |
|
1438 /* intvector_close(result, status); |
|
1439 *status = U_INTERNAL_PROGRAM_ERROR; |
|
1440 return NULL;*/ |
|
1441 } |
|
1442 |
|
1443 static struct SResource * |
|
1444 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
1445 { |
|
1446 struct SResource *result = NULL; |
|
1447 uint8_t *value; |
|
1448 char *string; |
|
1449 char toConv[3] = {'\0', '\0', '\0'}; |
|
1450 uint32_t count; |
|
1451 uint32_t i; |
|
1452 uint32_t line; |
|
1453 char *stopstring; |
|
1454 uint32_t len; |
|
1455 |
|
1456 string = getInvariantString(state, &line, NULL, status); |
|
1457 |
|
1458 if (string == NULL || U_FAILURE(*status)) |
|
1459 { |
|
1460 return NULL; |
|
1461 } |
|
1462 |
|
1463 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
1464 |
|
1465 if (U_FAILURE(*status)) |
|
1466 { |
|
1467 uprv_free(string); |
|
1468 return NULL; |
|
1469 } |
|
1470 |
|
1471 if(isVerbose()){ |
|
1472 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1473 } |
|
1474 |
|
1475 count = (uint32_t)uprv_strlen(string); |
|
1476 if (count > 0){ |
|
1477 if((count % 2)==0){ |
|
1478 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count)); |
|
1479 |
|
1480 if (value == NULL) |
|
1481 { |
|
1482 uprv_free(string); |
|
1483 *status = U_MEMORY_ALLOCATION_ERROR; |
|
1484 return NULL; |
|
1485 } |
|
1486 |
|
1487 for (i = 0; i < count; i += 2) |
|
1488 { |
|
1489 toConv[0] = string[i]; |
|
1490 toConv[1] = string[i + 1]; |
|
1491 |
|
1492 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); |
|
1493 len=(uint32_t)(stopstring-toConv); |
|
1494 |
|
1495 if(len!=uprv_strlen(toConv)) |
|
1496 { |
|
1497 uprv_free(string); |
|
1498 *status=U_INVALID_CHAR_FOUND; |
|
1499 return NULL; |
|
1500 } |
|
1501 } |
|
1502 |
|
1503 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status); |
|
1504 |
|
1505 uprv_free(value); |
|
1506 } |
|
1507 else |
|
1508 { |
|
1509 *status = U_INVALID_CHAR_FOUND; |
|
1510 uprv_free(string); |
|
1511 error(line, "Encountered invalid binary string"); |
|
1512 return NULL; |
|
1513 } |
|
1514 } |
|
1515 else |
|
1516 { |
|
1517 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status); |
|
1518 warning(startline, "Encountered empty binary tag"); |
|
1519 } |
|
1520 uprv_free(string); |
|
1521 |
|
1522 return result; |
|
1523 } |
|
1524 |
|
1525 static struct SResource * |
|
1526 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) |
|
1527 { |
|
1528 struct SResource *result = NULL; |
|
1529 int32_t value; |
|
1530 char *string; |
|
1531 char *stopstring; |
|
1532 uint32_t len; |
|
1533 |
|
1534 string = getInvariantString(state, NULL, NULL, status); |
|
1535 |
|
1536 if (string == NULL || U_FAILURE(*status)) |
|
1537 { |
|
1538 return NULL; |
|
1539 } |
|
1540 |
|
1541 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
1542 |
|
1543 if (U_FAILURE(*status)) |
|
1544 { |
|
1545 uprv_free(string); |
|
1546 return NULL; |
|
1547 } |
|
1548 |
|
1549 if(isVerbose()){ |
|
1550 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1551 } |
|
1552 |
|
1553 if (uprv_strlen(string) <= 0) |
|
1554 { |
|
1555 warning(startline, "Encountered empty integer. Default value is 0."); |
|
1556 } |
|
1557 |
|
1558 /* Allow integer support for hexdecimal, octal digit and decimal*/ |
|
1559 /* and handle illegal char in the integer*/ |
|
1560 value = uprv_strtoul(string, &stopstring, 0); |
|
1561 len=(uint32_t)(stopstring-string); |
|
1562 if(len==uprv_strlen(string)) |
|
1563 { |
|
1564 result = int_open(state->bundle, tag, value, comment, status); |
|
1565 } |
|
1566 else |
|
1567 { |
|
1568 *status=U_INVALID_CHAR_FOUND; |
|
1569 } |
|
1570 uprv_free(string); |
|
1571 |
|
1572 return result; |
|
1573 } |
|
1574 |
|
1575 static struct SResource * |
|
1576 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) |
|
1577 { |
|
1578 struct SResource *result; |
|
1579 FileStream *file; |
|
1580 int32_t len; |
|
1581 uint8_t *data; |
|
1582 char *filename; |
|
1583 uint32_t line; |
|
1584 char *fullname = NULL; |
|
1585 filename = getInvariantString(state, &line, NULL, status); |
|
1586 |
|
1587 if (U_FAILURE(*status)) |
|
1588 { |
|
1589 return NULL; |
|
1590 } |
|
1591 |
|
1592 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
1593 |
|
1594 if (U_FAILURE(*status)) |
|
1595 { |
|
1596 uprv_free(filename); |
|
1597 return NULL; |
|
1598 } |
|
1599 |
|
1600 if(isVerbose()){ |
|
1601 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1602 } |
|
1603 |
|
1604 /* Open the input file for reading */ |
|
1605 if (state->inputdir == NULL) |
|
1606 { |
|
1607 #if 1 |
|
1608 /* |
|
1609 * Always save file file name, even if there's |
|
1610 * no input directory specified. MIGHT BREAK SOMETHING |
|
1611 */ |
|
1612 int32_t filenameLength = uprv_strlen(filename); |
|
1613 |
|
1614 fullname = (char *) uprv_malloc(filenameLength + 1); |
|
1615 uprv_strcpy(fullname, filename); |
|
1616 #endif |
|
1617 |
|
1618 file = T_FileStream_open(filename, "rb"); |
|
1619 } |
|
1620 else |
|
1621 { |
|
1622 |
|
1623 int32_t count = (int32_t)uprv_strlen(filename); |
|
1624 |
|
1625 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) |
|
1626 { |
|
1627 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); |
|
1628 |
|
1629 /* test for NULL */ |
|
1630 if(fullname == NULL) |
|
1631 { |
|
1632 *status = U_MEMORY_ALLOCATION_ERROR; |
|
1633 return NULL; |
|
1634 } |
|
1635 |
|
1636 uprv_strcpy(fullname, state->inputdir); |
|
1637 |
|
1638 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; |
|
1639 fullname[state->inputdirLength + 1] = '\0'; |
|
1640 |
|
1641 uprv_strcat(fullname, filename); |
|
1642 } |
|
1643 else |
|
1644 { |
|
1645 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1); |
|
1646 |
|
1647 /* test for NULL */ |
|
1648 if(fullname == NULL) |
|
1649 { |
|
1650 *status = U_MEMORY_ALLOCATION_ERROR; |
|
1651 return NULL; |
|
1652 } |
|
1653 |
|
1654 uprv_strcpy(fullname, state->inputdir); |
|
1655 uprv_strcat(fullname, filename); |
|
1656 } |
|
1657 |
|
1658 file = T_FileStream_open(fullname, "rb"); |
|
1659 |
|
1660 } |
|
1661 |
|
1662 if (file == NULL) |
|
1663 { |
|
1664 error(line, "couldn't open input file %s", filename); |
|
1665 *status = U_FILE_ACCESS_ERROR; |
|
1666 return NULL; |
|
1667 } |
|
1668 |
|
1669 len = T_FileStream_size(file); |
|
1670 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); |
|
1671 /* test for NULL */ |
|
1672 if(data == NULL) |
|
1673 { |
|
1674 *status = U_MEMORY_ALLOCATION_ERROR; |
|
1675 T_FileStream_close (file); |
|
1676 return NULL; |
|
1677 } |
|
1678 |
|
1679 /* int32_t numRead = */ T_FileStream_read (file, data, len); |
|
1680 T_FileStream_close (file); |
|
1681 |
|
1682 result = bin_open(state->bundle, tag, len, data, fullname, comment, status); |
|
1683 |
|
1684 uprv_free(data); |
|
1685 uprv_free(filename); |
|
1686 uprv_free(fullname); |
|
1687 |
|
1688 return result; |
|
1689 } |
|
1690 |
|
1691 static struct SResource * |
|
1692 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) |
|
1693 { |
|
1694 struct SResource *result; |
|
1695 int32_t len=0; |
|
1696 char *filename; |
|
1697 uint32_t line; |
|
1698 UChar *pTarget = NULL; |
|
1699 |
|
1700 UCHARBUF *ucbuf; |
|
1701 char *fullname = NULL; |
|
1702 int32_t count = 0; |
|
1703 const char* cp = NULL; |
|
1704 const UChar* uBuffer = NULL; |
|
1705 |
|
1706 filename = getInvariantString(state, &line, NULL, status); |
|
1707 count = (int32_t)uprv_strlen(filename); |
|
1708 |
|
1709 if (U_FAILURE(*status)) |
|
1710 { |
|
1711 return NULL; |
|
1712 } |
|
1713 |
|
1714 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
|
1715 |
|
1716 if (U_FAILURE(*status)) |
|
1717 { |
|
1718 uprv_free(filename); |
|
1719 return NULL; |
|
1720 } |
|
1721 |
|
1722 if(isVerbose()){ |
|
1723 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1724 } |
|
1725 |
|
1726 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); |
|
1727 /* test for NULL */ |
|
1728 if(fullname == NULL) |
|
1729 { |
|
1730 *status = U_MEMORY_ALLOCATION_ERROR; |
|
1731 uprv_free(filename); |
|
1732 return NULL; |
|
1733 } |
|
1734 |
|
1735 if(state->inputdir!=NULL){ |
|
1736 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) |
|
1737 { |
|
1738 |
|
1739 uprv_strcpy(fullname, state->inputdir); |
|
1740 |
|
1741 fullname[state->inputdirLength] = U_FILE_SEP_CHAR; |
|
1742 fullname[state->inputdirLength + 1] = '\0'; |
|
1743 |
|
1744 uprv_strcat(fullname, filename); |
|
1745 } |
|
1746 else |
|
1747 { |
|
1748 uprv_strcpy(fullname, state->inputdir); |
|
1749 uprv_strcat(fullname, filename); |
|
1750 } |
|
1751 }else{ |
|
1752 uprv_strcpy(fullname,filename); |
|
1753 } |
|
1754 |
|
1755 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); |
|
1756 |
|
1757 if (U_FAILURE(*status)) { |
|
1758 error(line, "couldn't open input file %s\n", filename); |
|
1759 return NULL; |
|
1760 } |
|
1761 |
|
1762 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); |
|
1763 result = string_open(state->bundle, tag, uBuffer, len, comment, status); |
|
1764 |
|
1765 ucbuf_close(ucbuf); |
|
1766 |
|
1767 uprv_free(pTarget); |
|
1768 |
|
1769 uprv_free(filename); |
|
1770 uprv_free(fullname); |
|
1771 |
|
1772 return result; |
|
1773 } |
|
1774 |
|
1775 |
|
1776 |
|
1777 |
|
1778 |
|
1779 U_STRING_DECL(k_type_string, "string", 6); |
|
1780 U_STRING_DECL(k_type_binary, "binary", 6); |
|
1781 U_STRING_DECL(k_type_bin, "bin", 3); |
|
1782 U_STRING_DECL(k_type_table, "table", 5); |
|
1783 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); |
|
1784 U_STRING_DECL(k_type_int, "int", 3); |
|
1785 U_STRING_DECL(k_type_integer, "integer", 7); |
|
1786 U_STRING_DECL(k_type_array, "array", 5); |
|
1787 U_STRING_DECL(k_type_alias, "alias", 5); |
|
1788 U_STRING_DECL(k_type_intvector, "intvector", 9); |
|
1789 U_STRING_DECL(k_type_import, "import", 6); |
|
1790 U_STRING_DECL(k_type_include, "include", 7); |
|
1791 |
|
1792 /* Various non-standard processing plugins that create one or more special resources. */ |
|
1793 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); |
|
1794 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); |
|
1795 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); |
|
1796 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); |
|
1797 |
|
1798 typedef enum EResourceType |
|
1799 { |
|
1800 RT_UNKNOWN, |
|
1801 RT_STRING, |
|
1802 RT_BINARY, |
|
1803 RT_TABLE, |
|
1804 RT_TABLE_NO_FALLBACK, |
|
1805 RT_INTEGER, |
|
1806 RT_ARRAY, |
|
1807 RT_ALIAS, |
|
1808 RT_INTVECTOR, |
|
1809 RT_IMPORT, |
|
1810 RT_INCLUDE, |
|
1811 RT_PROCESS_UCA_RULES, |
|
1812 RT_PROCESS_COLLATION, |
|
1813 RT_PROCESS_TRANSLITERATOR, |
|
1814 RT_PROCESS_DEPENDENCY, |
|
1815 RT_RESERVED |
|
1816 } EResourceType; |
|
1817 |
|
1818 static struct { |
|
1819 const char *nameChars; /* only used for debugging */ |
|
1820 const UChar *nameUChars; |
|
1821 ParseResourceFunction *parseFunction; |
|
1822 } gResourceTypes[] = { |
|
1823 {"Unknown", NULL, NULL}, |
|
1824 {"string", k_type_string, parseString}, |
|
1825 {"binary", k_type_binary, parseBinary}, |
|
1826 {"table", k_type_table, parseTable}, |
|
1827 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ |
|
1828 {"integer", k_type_integer, parseInteger}, |
|
1829 {"array", k_type_array, parseArray}, |
|
1830 {"alias", k_type_alias, parseAlias}, |
|
1831 {"intvector", k_type_intvector, parseIntVector}, |
|
1832 {"import", k_type_import, parseImport}, |
|
1833 {"include", k_type_include, parseInclude}, |
|
1834 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, |
|
1835 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, |
|
1836 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, |
|
1837 {"process(dependency)", k_type_plugin_dependency, parseDependency}, |
|
1838 {"reserved", NULL, NULL} |
|
1839 }; |
|
1840 |
|
1841 void initParser() |
|
1842 { |
|
1843 U_STRING_INIT(k_type_string, "string", 6); |
|
1844 U_STRING_INIT(k_type_binary, "binary", 6); |
|
1845 U_STRING_INIT(k_type_bin, "bin", 3); |
|
1846 U_STRING_INIT(k_type_table, "table", 5); |
|
1847 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); |
|
1848 U_STRING_INIT(k_type_int, "int", 3); |
|
1849 U_STRING_INIT(k_type_integer, "integer", 7); |
|
1850 U_STRING_INIT(k_type_array, "array", 5); |
|
1851 U_STRING_INIT(k_type_alias, "alias", 5); |
|
1852 U_STRING_INIT(k_type_intvector, "intvector", 9); |
|
1853 U_STRING_INIT(k_type_import, "import", 6); |
|
1854 U_STRING_INIT(k_type_include, "include", 7); |
|
1855 |
|
1856 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); |
|
1857 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); |
|
1858 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); |
|
1859 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); |
|
1860 } |
|
1861 |
|
1862 static inline UBool isTable(enum EResourceType type) { |
|
1863 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); |
|
1864 } |
|
1865 |
|
1866 static enum EResourceType |
|
1867 parseResourceType(ParseState* state, UErrorCode *status) |
|
1868 { |
|
1869 struct UString *tokenValue; |
|
1870 struct UString comment; |
|
1871 enum EResourceType result = RT_UNKNOWN; |
|
1872 uint32_t line=0; |
|
1873 ustr_init(&comment); |
|
1874 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); |
|
1875 |
|
1876 if (U_FAILURE(*status)) |
|
1877 { |
|
1878 return RT_UNKNOWN; |
|
1879 } |
|
1880 |
|
1881 *status = U_ZERO_ERROR; |
|
1882 |
|
1883 /* Search for normal types */ |
|
1884 result=RT_UNKNOWN; |
|
1885 while ((result=(EResourceType)(result+1)) < RT_RESERVED) { |
|
1886 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { |
|
1887 break; |
|
1888 } |
|
1889 } |
|
1890 /* Now search for the aliases */ |
|
1891 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { |
|
1892 result = RT_INTEGER; |
|
1893 } |
|
1894 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { |
|
1895 result = RT_BINARY; |
|
1896 } |
|
1897 else if (result == RT_RESERVED) { |
|
1898 char tokenBuffer[1024]; |
|
1899 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); |
|
1900 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; |
|
1901 *status = U_INVALID_FORMAT_ERROR; |
|
1902 error(line, "unknown resource type '%s'", tokenBuffer); |
|
1903 } |
|
1904 |
|
1905 return result; |
|
1906 } |
|
1907 |
|
1908 /* parse a non-top-level resource */ |
|
1909 static struct SResource * |
|
1910 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) |
|
1911 { |
|
1912 enum ETokenType token; |
|
1913 enum EResourceType resType = RT_UNKNOWN; |
|
1914 ParseResourceFunction *parseFunction = NULL; |
|
1915 struct UString *tokenValue; |
|
1916 uint32_t startline; |
|
1917 uint32_t line; |
|
1918 |
|
1919 |
|
1920 token = getToken(state, &tokenValue, NULL, &startline, status); |
|
1921 |
|
1922 if(isVerbose()){ |
|
1923 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); |
|
1924 } |
|
1925 |
|
1926 /* name . [ ':' type ] '{' resource '}' */ |
|
1927 /* This function parses from the colon onwards. If the colon is present, parse the |
|
1928 type then try to parse a resource of that type. If there is no explicit type, |
|
1929 work it out using the lookahead tokens. */ |
|
1930 switch (token) |
|
1931 { |
|
1932 case TOK_EOF: |
|
1933 *status = U_INVALID_FORMAT_ERROR; |
|
1934 error(startline, "Unexpected EOF encountered"); |
|
1935 return NULL; |
|
1936 |
|
1937 case TOK_ERROR: |
|
1938 *status = U_INVALID_FORMAT_ERROR; |
|
1939 return NULL; |
|
1940 |
|
1941 case TOK_COLON: |
|
1942 resType = parseResourceType(state, status); |
|
1943 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); |
|
1944 |
|
1945 if (U_FAILURE(*status)) |
|
1946 { |
|
1947 return NULL; |
|
1948 } |
|
1949 |
|
1950 break; |
|
1951 |
|
1952 case TOK_OPEN_BRACE: |
|
1953 break; |
|
1954 |
|
1955 default: |
|
1956 *status = U_INVALID_FORMAT_ERROR; |
|
1957 error(startline, "syntax error while reading a resource, expected '{' or ':'"); |
|
1958 return NULL; |
|
1959 } |
|
1960 |
|
1961 |
|
1962 if (resType == RT_UNKNOWN) |
|
1963 { |
|
1964 /* No explicit type, so try to work it out. At this point, we've read the first '{'. |
|
1965 We could have any of the following: |
|
1966 { { => array (nested) |
|
1967 { :/} => array |
|
1968 { string , => string array |
|
1969 |
|
1970 { string { => table |
|
1971 |
|
1972 { string :/{ => table |
|
1973 { string } => string |
|
1974 */ |
|
1975 |
|
1976 token = peekToken(state, 0, NULL, &line, NULL,status); |
|
1977 |
|
1978 if (U_FAILURE(*status)) |
|
1979 { |
|
1980 return NULL; |
|
1981 } |
|
1982 |
|
1983 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) |
|
1984 { |
|
1985 resType = RT_ARRAY; |
|
1986 } |
|
1987 else if (token == TOK_STRING) |
|
1988 { |
|
1989 token = peekToken(state, 1, NULL, &line, NULL, status); |
|
1990 |
|
1991 if (U_FAILURE(*status)) |
|
1992 { |
|
1993 return NULL; |
|
1994 } |
|
1995 |
|
1996 switch (token) |
|
1997 { |
|
1998 case TOK_COMMA: resType = RT_ARRAY; break; |
|
1999 case TOK_OPEN_BRACE: resType = RT_TABLE; break; |
|
2000 case TOK_CLOSE_BRACE: resType = RT_STRING; break; |
|
2001 case TOK_COLON: resType = RT_TABLE; break; |
|
2002 default: |
|
2003 *status = U_INVALID_FORMAT_ERROR; |
|
2004 error(line, "Unexpected token after string, expected ',', '{' or '}'"); |
|
2005 return NULL; |
|
2006 } |
|
2007 } |
|
2008 else |
|
2009 { |
|
2010 *status = U_INVALID_FORMAT_ERROR; |
|
2011 error(line, "Unexpected token after '{'"); |
|
2012 return NULL; |
|
2013 } |
|
2014 |
|
2015 /* printf("Type guessed as %s\n", resourceNames[resType]); */ |
|
2016 } else if(resType == RT_TABLE_NO_FALLBACK) { |
|
2017 *status = U_INVALID_FORMAT_ERROR; |
|
2018 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); |
|
2019 return NULL; |
|
2020 } |
|
2021 |
|
2022 |
|
2023 /* We should now know what we need to parse next, so call the appropriate parser |
|
2024 function and return. */ |
|
2025 parseFunction = gResourceTypes[resType].parseFunction; |
|
2026 if (parseFunction != NULL) { |
|
2027 return parseFunction(state, tag, startline, comment, status); |
|
2028 } |
|
2029 else { |
|
2030 *status = U_INTERNAL_PROGRAM_ERROR; |
|
2031 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); |
|
2032 } |
|
2033 |
|
2034 return NULL; |
|
2035 } |
|
2036 |
|
2037 /* parse the top-level resource */ |
|
2038 struct SRBRoot * |
|
2039 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, |
|
2040 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) |
|
2041 { |
|
2042 struct UString *tokenValue; |
|
2043 struct UString comment; |
|
2044 uint32_t line; |
|
2045 enum EResourceType bundleType; |
|
2046 enum ETokenType token; |
|
2047 ParseState state; |
|
2048 uint32_t i; |
|
2049 |
|
2050 |
|
2051 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) |
|
2052 { |
|
2053 ustr_init(&state.lookahead[i].value); |
|
2054 ustr_init(&state.lookahead[i].comment); |
|
2055 } |
|
2056 |
|
2057 initLookahead(&state, buf, status); |
|
2058 |
|
2059 state.inputdir = inputDir; |
|
2060 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; |
|
2061 state.outputdir = outputDir; |
|
2062 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; |
|
2063 state.makeBinaryCollation = makeBinaryCollation; |
|
2064 state.omitCollationRules = omitCollationRules; |
|
2065 |
|
2066 ustr_init(&comment); |
|
2067 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); |
|
2068 |
|
2069 state.bundle = bundle_open(&comment, FALSE, status); |
|
2070 |
|
2071 if (state.bundle == NULL || U_FAILURE(*status)) |
|
2072 { |
|
2073 return NULL; |
|
2074 } |
|
2075 |
|
2076 |
|
2077 bundle_setlocale(state.bundle, tokenValue->fChars, status); |
|
2078 |
|
2079 /* The following code is to make Empty bundle work no matter with :table specifer or not */ |
|
2080 token = getToken(&state, NULL, NULL, &line, status); |
|
2081 if(token==TOK_COLON) { |
|
2082 *status=U_ZERO_ERROR; |
|
2083 bundleType=parseResourceType(&state, status); |
|
2084 |
|
2085 if(isTable(bundleType)) |
|
2086 { |
|
2087 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); |
|
2088 } |
|
2089 else |
|
2090 { |
|
2091 *status=U_PARSE_ERROR; |
|
2092 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); |
|
2093 } |
|
2094 } |
|
2095 else |
|
2096 { |
|
2097 /* not a colon */ |
|
2098 if(token==TOK_OPEN_BRACE) |
|
2099 { |
|
2100 *status=U_ZERO_ERROR; |
|
2101 bundleType=RT_TABLE; |
|
2102 } |
|
2103 else |
|
2104 { |
|
2105 /* neither colon nor open brace */ |
|
2106 *status=U_PARSE_ERROR; |
|
2107 bundleType=RT_UNKNOWN; |
|
2108 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); |
|
2109 } |
|
2110 } |
|
2111 |
|
2112 if (U_FAILURE(*status)) |
|
2113 { |
|
2114 bundle_close(state.bundle, status); |
|
2115 return NULL; |
|
2116 } |
|
2117 |
|
2118 if(bundleType==RT_TABLE_NO_FALLBACK) { |
|
2119 /* |
|
2120 * Parse a top-level table with the table(nofallback) declaration. |
|
2121 * This is the same as a regular table, but also sets the |
|
2122 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . |
|
2123 */ |
|
2124 state.bundle->noFallback=TRUE; |
|
2125 } |
|
2126 /* top-level tables need not handle special table names like "collations" */ |
|
2127 realParseTable(&state, state.bundle->fRoot, NULL, line, status); |
|
2128 if(dependencyArray!=NULL){ |
|
2129 table_add(state.bundle->fRoot, dependencyArray, 0, status); |
|
2130 dependencyArray = NULL; |
|
2131 } |
|
2132 if (U_FAILURE(*status)) |
|
2133 { |
|
2134 bundle_close(state.bundle, status); |
|
2135 res_close(dependencyArray); |
|
2136 return NULL; |
|
2137 } |
|
2138 |
|
2139 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) |
|
2140 { |
|
2141 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); |
|
2142 if(isStrict()){ |
|
2143 *status = U_INVALID_FORMAT_ERROR; |
|
2144 return NULL; |
|
2145 } |
|
2146 } |
|
2147 |
|
2148 cleanupLookahead(&state); |
|
2149 ustr_deinit(&comment); |
|
2150 return state.bundle; |
|
2151 } |