intl/icu/source/tools/genrb/parse.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 1998-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *
     9 * File parse.cpp
    10 *
    11 * Modification History:
    12 *
    13 *   Date          Name          Description
    14 *   05/26/99     stephen       Creation.
    15 *   02/25/00     weiv          Overhaul to write udata
    16 *   5/10/01      Ram           removed ustdio dependency
    17 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
    18 *******************************************************************************
    19 */
    21 #include "ucol_imp.h"
    22 #include "parse.h"
    23 #include "errmsg.h"
    24 #include "uhash.h"
    25 #include "cmemory.h"
    26 #include "cstring.h"
    27 #include "uinvchar.h"
    28 #include "read.h"
    29 #include "ustr.h"
    30 #include "reslist.h"
    31 #include "rbt_pars.h"
    32 #include "genrb.h"
    33 #include "unicode/ustring.h"
    34 #include "unicode/uscript.h"
    35 #include "unicode/putil.h"
    36 #include <stdio.h>
    38 /* Number of tokens to read ahead of the current stream position */
    39 #define MAX_LOOKAHEAD   3
    41 #define CR               0x000D
    42 #define LF               0x000A
    43 #define SPACE            0x0020
    44 #define TAB              0x0009
    45 #define ESCAPE           0x005C
    46 #define HASH             0x0023
    47 #define QUOTE            0x0027
    48 #define ZERO             0x0030
    49 #define STARTCOMMAND     0x005B
    50 #define ENDCOMMAND       0x005D
    51 #define OPENSQBRACKET    0x005B
    52 #define CLOSESQBRACKET   0x005D
    54 struct Lookahead
    55 {
    56      enum   ETokenType type;
    57      struct UString    value;
    58      struct UString    comment;
    59      uint32_t          line;
    60 };
    62 /* keep in sync with token defines in read.h */
    63 const char *tokenNames[TOK_TOKEN_COUNT] =
    64 {
    65      "string",             /* A string token, such as "MonthNames" */
    66      "'{'",                 /* An opening brace character */
    67      "'}'",                 /* A closing brace character */
    68      "','",                 /* A comma */
    69      "':'",                 /* A colon */
    71      "<end of file>",     /* End of the file has been reached successfully */
    72      "<end of line>"
    73 };
    75 /* Just to store "TRUE" */
    76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
    78 typedef struct {
    79     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
    80     uint32_t          lookaheadPosition;
    81     UCHARBUF         *buffer;
    82     struct SRBRoot *bundle;
    83     const char     *inputdir;
    84     uint32_t        inputdirLength;
    85     const char     *outputdir;
    86     uint32_t        outputdirLength;
    87     UBool           makeBinaryCollation;
    88     UBool           omitCollationRules;
    89 } ParseState;
    91 typedef struct SResource *
    92 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
    94 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
    96 /* The nature of the lookahead buffer:
    97    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
    98    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
    99    When getToken is called, the current pointer is moved to the next slot and the
   100    old slot is filled with the next token from the reader by calling getNextToken.
   101    The token values are stored in the slot, which means that token values don't
   102    survive a call to getToken, ie.
   104    UString *value;
   106    getToken(&value, NULL, status);
   107    getToken(NULL,   NULL, status);       bad - value is now a different string
   108 */
   109 static void
   110 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
   111 {
   112     static uint32_t initTypeStrings = 0;
   113     uint32_t i;
   115     if (!initTypeStrings)
   116     {
   117         initTypeStrings = 1;
   118     }
   120     state->lookaheadPosition   = 0;
   121     state->buffer              = buf;
   123     resetLineNumber();
   125     for (i = 0; i < MAX_LOOKAHEAD; i++)
   126     {
   127         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
   128         if (U_FAILURE(*status))
   129         {
   130             return;
   131         }
   132     }
   134     *status = U_ZERO_ERROR;
   135 }
   137 static void
   138 cleanupLookahead(ParseState* state)
   139 {
   140     uint32_t i;
   141     for (i = 0; i <= MAX_LOOKAHEAD; i++)
   142     {
   143         ustr_deinit(&state->lookahead[i].value);
   144         ustr_deinit(&state->lookahead[i].comment);
   145     }
   147 }
   149 static enum ETokenType
   150 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
   151 {
   152     enum ETokenType result;
   153     uint32_t          i;
   155     result = state->lookahead[state->lookaheadPosition].type;
   157     if (tokenValue != NULL)
   158     {
   159         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
   160     }
   162     if (linenumber != NULL)
   163     {
   164         *linenumber = state->lookahead[state->lookaheadPosition].line;
   165     }
   167     if (comment != NULL)
   168     {
   169         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
   170     }
   172     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
   173     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
   174     ustr_setlen(&state->lookahead[i].comment, 0, status);
   175     ustr_setlen(&state->lookahead[i].value, 0, status);
   176     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
   178     /* printf("getToken, returning %s\n", tokenNames[result]); */
   180     return result;
   181 }
   183 static enum ETokenType
   184 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
   185 {
   186     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
   188     if (U_FAILURE(*status))
   189     {
   190         return TOK_ERROR;
   191     }
   193     if (lookaheadCount >= MAX_LOOKAHEAD)
   194     {
   195         *status = U_INTERNAL_PROGRAM_ERROR;
   196         return TOK_ERROR;
   197     }
   199     if (tokenValue != NULL)
   200     {
   201         *tokenValue = &state->lookahead[i].value;
   202     }
   204     if (linenumber != NULL)
   205     {
   206         *linenumber = state->lookahead[i].line;
   207     }
   209     if(comment != NULL){
   210         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
   211     }
   213     return state->lookahead[i].type;
   214 }
   216 static void
   217 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
   218 {
   219     uint32_t        line;
   221     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
   223     if (linenumber != NULL)
   224     {
   225         *linenumber = line;
   226     }
   228     if (U_FAILURE(*status))
   229     {
   230         return;
   231     }
   233     if (token != expectedToken)
   234     {
   235         *status = U_INVALID_FORMAT_ERROR;
   236         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
   237     }
   238     else
   239     {
   240         *status = U_ZERO_ERROR;
   241     }
   242 }
   244 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
   245 {
   246     struct UString *tokenValue;
   247     char           *result;
   248     uint32_t        count;
   250     expect(state, TOK_STRING, &tokenValue, comment, line, status);
   252     if (U_FAILURE(*status))
   253     {
   254         return NULL;
   255     }
   257     count = u_strlen(tokenValue->fChars);
   258     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
   259         *status = U_INVALID_FORMAT_ERROR;
   260         error(*line, "invariant characters required for table keys, binary data, etc.");
   261         return NULL;
   262     }
   264     result = static_cast<char *>(uprv_malloc(count+1));
   266     if (result == NULL)
   267     {
   268         *status = U_MEMORY_ALLOCATION_ERROR;
   269         return NULL;
   270     }
   272     u_UCharsToChars(tokenValue->fChars, result, count+1);
   273     return result;
   274 }
   276 static struct SResource *
   277 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
   278 {
   279     struct SResource *result = NULL;
   280     struct UString   *tokenValue;
   281     FileStream       *file          = NULL;
   282     char              filename[256] = { '\0' };
   283     char              cs[128]       = { '\0' };
   284     uint32_t          line;
   285     UBool quoted = FALSE;
   286     UCHARBUF *ucbuf=NULL;
   287     UChar32   c     = 0;
   288     const char* cp  = NULL;
   289     UChar *pTarget     = NULL;
   290     UChar *target      = NULL;
   291     UChar *targetLimit = NULL;
   292     int32_t size = 0;
   294     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
   296     if(isVerbose()){
   297         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   298     }
   300     if (U_FAILURE(*status))
   301     {
   302         return NULL;
   303     }
   304     /* make the filename including the directory */
   305     if (state->inputdir != NULL)
   306     {
   307         uprv_strcat(filename, state->inputdir);
   309         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   310         {
   311             uprv_strcat(filename, U_FILE_SEP_STRING);
   312         }
   313     }
   315     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
   317     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   319     if (U_FAILURE(*status))
   320     {
   321         return NULL;
   322     }
   323     uprv_strcat(filename, cs);
   325     if(state->omitCollationRules) {
   326         return res_none();
   327     }
   329     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
   331     if (U_FAILURE(*status)) {
   332         error(line, "An error occured while opening the input file %s\n", filename);
   333         return NULL;
   334     }
   336     /* We allocate more space than actually required
   337     * since the actual size needed for storing UChars
   338     * is not known in UTF-8 byte stream
   339     */
   340     size        = ucbuf_size(ucbuf) + 1;
   341     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
   342     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
   343     target      = pTarget;
   344     targetLimit = pTarget+size;
   346     /* read the rules into the buffer */
   347     while (target < targetLimit)
   348     {
   349         c = ucbuf_getc(ucbuf, status);
   350         if(c == QUOTE) {
   351             quoted = (UBool)!quoted;
   352         }
   353         /* weiv (06/26/2002): adding the following:
   354          * - preserving spaces in commands [...]
   355          * - # comments until the end of line
   356          */
   357         if (c == STARTCOMMAND && !quoted)
   358         {
   359             /* preserve commands
   360              * closing bracket will be handled by the
   361              * append at the end of the loop
   362              */
   363             while(c != ENDCOMMAND) {
   364                 U_APPEND_CHAR32_ONLY(c, target);
   365                 c = ucbuf_getc(ucbuf, status);
   366             }
   367         }
   368         else if (c == HASH && !quoted) {
   369             /* skip comments */
   370             while(c != CR && c != LF) {
   371                 c = ucbuf_getc(ucbuf, status);
   372             }
   373             continue;
   374         }
   375         else if (c == ESCAPE)
   376         {
   377             c = unescape(ucbuf, status);
   379             if (c == (UChar32)U_ERR)
   380             {
   381                 uprv_free(pTarget);
   382                 T_FileStream_close(file);
   383                 return NULL;
   384             }
   385         }
   386         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
   387         {
   388             /* ignore spaces carriage returns
   389             * and line feed unless in the form \uXXXX
   390             */
   391             continue;
   392         }
   394         /* Append UChar * after dissembling if c > 0xffff*/
   395         if (c != (UChar32)U_EOF)
   396         {
   397             U_APPEND_CHAR32_ONLY(c, target);
   398         }
   399         else
   400         {
   401             break;
   402         }
   403     }
   405     /* terminate the string */
   406     if(target < targetLimit){
   407         *target = 0x0000;
   408     }
   410     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
   413     ucbuf_close(ucbuf);
   414     uprv_free(pTarget);
   415     T_FileStream_close(file);
   417     return result;
   418 }
   420 static struct SResource *
   421 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
   422 {
   423     struct SResource *result = NULL;
   424     struct UString   *tokenValue;
   425     FileStream       *file          = NULL;
   426     char              filename[256] = { '\0' };
   427     char              cs[128]       = { '\0' };
   428     uint32_t          line;
   429     UCHARBUF *ucbuf=NULL;
   430     const char* cp  = NULL;
   431     UChar *pTarget     = NULL;
   432     const UChar *pSource     = NULL;
   433     int32_t size = 0;
   435     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
   437     if(isVerbose()){
   438         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   439     }
   441     if (U_FAILURE(*status))
   442     {
   443         return NULL;
   444     }
   445     /* make the filename including the directory */
   446     if (state->inputdir != NULL)
   447     {
   448         uprv_strcat(filename, state->inputdir);
   450         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   451         {
   452             uprv_strcat(filename, U_FILE_SEP_STRING);
   453         }
   454     }
   456     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
   458     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   460     if (U_FAILURE(*status))
   461     {
   462         return NULL;
   463     }
   464     uprv_strcat(filename, cs);
   467     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
   469     if (U_FAILURE(*status)) {
   470         error(line, "An error occured while opening the input file %s\n", filename);
   471         return NULL;
   472     }
   474     /* We allocate more space than actually required
   475     * since the actual size needed for storing UChars
   476     * is not known in UTF-8 byte stream
   477     */
   478     pSource = ucbuf_getBuffer(ucbuf, &size, status);
   479     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
   480     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
   482 #if !UCONFIG_NO_TRANSLITERATION
   483     size = utrans_stripRules(pSource, size, pTarget, status);
   484 #else
   485     size = 0;
   486     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
   487 #endif
   488     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
   490     ucbuf_close(ucbuf);
   491     uprv_free(pTarget);
   492     T_FileStream_close(file);
   494     return result;
   495 }
   496 static struct SResource* dependencyArray = NULL;
   498 static struct SResource *
   499 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   500 {
   501     struct SResource *result = NULL;
   502     struct SResource *elem = NULL;
   503     struct UString   *tokenValue;
   504     uint32_t          line;
   505     char              filename[256] = { '\0' };
   506     char              cs[128]       = { '\0' };
   508     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
   510     if(isVerbose()){
   511         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   512     }
   514     if (U_FAILURE(*status))
   515     {
   516         return NULL;
   517     }
   518     /* make the filename including the directory */
   519     if (state->outputdir != NULL)
   520     {
   521         uprv_strcat(filename, state->outputdir);
   523         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
   524         {
   525             uprv_strcat(filename, U_FILE_SEP_STRING);
   526         }
   527     }
   529     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
   531     if (U_FAILURE(*status))
   532     {
   533         return NULL;
   534     }
   535     uprv_strcat(filename, cs);
   536     if(!T_FileStream_file_exists(filename)){
   537         if(isStrict()){
   538             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
   539         }else{
   540             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);       
   541         }
   542     }
   543     if(dependencyArray==NULL){
   544         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
   545     }
   546     if(tag!=NULL){
   547         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
   548     }
   549     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
   551     array_add(dependencyArray, elem, status);
   553     if (U_FAILURE(*status))
   554     {
   555         return NULL;
   556     }
   557     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   558     return result;
   559 }
   560 static struct SResource *
   561 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   562 {
   563     struct UString   *tokenValue;
   564     struct SResource *result = NULL;
   566 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
   567     {
   568         return parseUCARules(tag, startline, status);
   569     }*/
   570     if(isVerbose()){
   571         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   572     }
   573     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
   575     if (U_SUCCESS(*status))
   576     {
   577         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
   578         doesn't survive expect either) */
   580         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
   581         if(U_SUCCESS(*status) && result) {
   582             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   584             if (U_FAILURE(*status))
   585             {
   586                 res_close(result);
   587                 return NULL;
   588             }
   589         }
   590     }
   592     return result;
   593 }
   595 static struct SResource *
   596 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   597 {
   598     struct UString   *tokenValue;
   599     struct SResource *result  = NULL;
   601     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
   603     if(isVerbose()){
   604         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   605     }
   607     if (U_SUCCESS(*status))
   608     {
   609         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
   610         doesn't survive expect either) */
   612         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
   614         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   616         if (U_FAILURE(*status))
   617         {
   618             res_close(result);
   619             return NULL;
   620         }
   621     }
   623     return result;
   624 }
   626 typedef struct{
   627     const char* inputDir;
   628     const char* outputDir;
   629 } GenrbData;
   631 static struct SResource* resLookup(struct SResource* res, const char* key){
   632     struct SResource *current = NULL;
   633     struct SResTable *list;
   634     if (res == res_none()) {
   635         return NULL;
   636     }
   638     list = &(res->u.fTable);
   640     current = list->fFirst;
   641     while (current != NULL) {
   642         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
   643             return current;
   644         }
   645         current = current->fNext;
   646     }
   647     return NULL;
   648 }
   650 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
   651     struct SRBRoot *data         = NULL;
   652     UCHARBUF       *ucbuf        = NULL;
   653     GenrbData* genrbdata = (GenrbData*) context;
   654     int localeLength = strlen(locale);
   655     char* filename = (char*)uprv_malloc(localeLength+5);
   656     char           *inputDirBuf  = NULL;
   657     char           *openFileName = NULL;
   658     const char* cp = "";
   659     UChar* urules = NULL;
   660     int32_t urulesLength = 0;
   661     int32_t i = 0;
   662     int32_t dirlen  = 0;
   663     int32_t filelen = 0;
   664     struct SResource* root;
   665     struct SResource* collations;
   666     struct SResource* collation;
   667     struct SResource* sequence;
   669     memcpy(filename, locale, localeLength);
   670     for(i = 0; i < localeLength; i++){
   671         if(filename[i] == '-'){
   672             filename[i] = '_';
   673         }
   674     }
   675     filename[localeLength]   = '.';
   676     filename[localeLength+1] = 't';
   677     filename[localeLength+2] = 'x';
   678     filename[localeLength+3] = 't';
   679     filename[localeLength+4] = 0;
   682     if (status==NULL || U_FAILURE(*status)) {
   683         return NULL;
   684     }
   685     if(filename==NULL){
   686         *status=U_ILLEGAL_ARGUMENT_ERROR;
   687         return NULL;
   688     }else{
   689         filelen = (int32_t)uprv_strlen(filename);
   690     }
   691     if(genrbdata->inputDir == NULL) {
   692         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
   693         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
   694         openFileName[0] = '\0';
   695         if (filenameBegin != NULL) {
   696             /*
   697              * When a filename ../../../data/root.txt is specified,
   698              * we presume that the input directory is ../../../data
   699              * This is very important when the resource file includes
   700              * another file, like UCARules.txt or thaidict.brk.
   701              */
   702             int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
   703             inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
   705             /* test for NULL */
   706             if(inputDirBuf == NULL) {
   707                 *status = U_MEMORY_ALLOCATION_ERROR;
   708                 goto finish;
   709             }
   711             inputDirBuf[filenameSize - 1] = 0;
   712             genrbdata->inputDir = inputDirBuf;
   713             dirlen  = (int32_t)uprv_strlen(genrbdata->inputDir);
   714         }
   715     }else{
   716         dirlen  = (int32_t)uprv_strlen(genrbdata->inputDir);
   718         if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
   719             openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
   721             /* test for NULL */
   722             if(openFileName == NULL) {
   723                 *status = U_MEMORY_ALLOCATION_ERROR;
   724                 goto finish;
   725             }
   727             openFileName[0] = '\0';
   728             /*
   729              * append the input dir to openFileName if the first char in
   730              * filename is not file seperation char and the last char input directory is  not '.'.
   731              * This is to support :
   732              * genrb -s. /home/icu/data
   733              * genrb -s. icu/data
   734              * The user cannot mix notations like
   735              * genrb -s. /icu/data --- the absolute path specified. -s redundant
   736              * user should use
   737              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
   738              */
   739             if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
   740                 uprv_strcpy(openFileName, genrbdata->inputDir);
   741                 openFileName[dirlen]     = U_FILE_SEP_CHAR;
   742             }
   743             openFileName[dirlen + 1] = '\0';
   744         } else {
   745             openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
   747             /* test for NULL */
   748             if(openFileName == NULL) {
   749                 *status = U_MEMORY_ALLOCATION_ERROR;
   750                 goto finish;
   751             }
   753             uprv_strcpy(openFileName, genrbdata->inputDir);
   755         }
   756     }
   757     uprv_strcat(openFileName, filename);
   758     /* printf("%s\n", openFileName);  */
   759     *status = U_ZERO_ERROR;
   760     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
   762     if(*status == U_FILE_ACCESS_ERROR) {
   764         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
   765         goto finish;
   766     }
   767     if (ucbuf == NULL || U_FAILURE(*status)) {
   768         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
   769         goto finish;
   770     }
   772     /* Parse the data into an SRBRoot */
   773     data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status);
   775     root = data->fRoot;
   776     collations = resLookup(root, "collations");
   777     if (collations != NULL) {
   778       collation = resLookup(collations, type);
   779       if (collation != NULL) {
   780         sequence = resLookup(collation, "Sequence");
   781         if (sequence != NULL) {
   782           urules = sequence->u.fString.fChars;
   783           urulesLength = sequence->u.fString.fLength;
   784           *pLength = urulesLength;
   785         }
   786       }
   787     }
   789 finish:
   790     if (inputDirBuf != NULL) {
   791         uprv_free(inputDirBuf);
   792     }
   794     if (openFileName != NULL) {
   795         uprv_free(openFileName);
   796     }
   798     if(ucbuf) {
   799         ucbuf_close(ucbuf);
   800     }
   802     return urules;
   803 }
   805 // Quick-and-dirty escaping function.
   806 // Assumes that we are on an ASCII-based platform.
   807 static void
   808 escape(const UChar *s, char *buffer) {
   809     int32_t length = u_strlen(s);
   810     int32_t i = 0;
   811     for (;;) {
   812         UChar32 c;
   813         U16_NEXT(s, i, length, c);
   814         if (c == 0) {
   815             *buffer = 0;
   816             return;
   817         } else if (0x20 <= c && c <= 0x7e) {
   818             // printable ASCII
   819             *buffer++ = (char)c;  // assumes ASCII-based platform
   820         } else {
   821             buffer += sprintf(buffer, "\\u%04X", (int)c);
   822         }
   823     }
   824 }
   826 static struct SResource *
   827 addCollation(ParseState* state, struct SResource  *result, uint32_t startline, UErrorCode *status)
   828 {
   829     struct SResource  *member = NULL;
   830     struct UString    *tokenValue;
   831     struct UString     comment;
   832     enum   ETokenType  token;
   833     char               subtag[1024];
   834     UVersionInfo       version;
   835     uint32_t           line;
   836     GenrbData genrbdata;
   837     /* '{' . (name resource)* '}' */
   838     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
   840     for (;;)
   841     {
   842         ustr_init(&comment);
   843         token = getToken(state, &tokenValue, &comment, &line, status);
   845         if (token == TOK_CLOSE_BRACE)
   846         {
   847             return result;
   848         }
   850         if (token != TOK_STRING)
   851         {
   852             res_close(result);
   853             *status = U_INVALID_FORMAT_ERROR;
   855             if (token == TOK_EOF)
   856             {
   857                 error(startline, "unterminated table");
   858             }
   859             else
   860             {
   861                 error(line, "Unexpected token %s", tokenNames[token]);
   862             }
   864             return NULL;
   865         }
   867         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   869         if (U_FAILURE(*status))
   870         {
   871             res_close(result);
   872             return NULL;
   873         }
   875         member = parseResource(state, subtag, NULL, status);
   877         if (U_FAILURE(*status))
   878         {
   879             res_close(result);
   880             return NULL;
   881         }
   883         if (uprv_strcmp(subtag, "Version") == 0)
   884         {
   885             char     ver[40];
   886             int32_t length = member->u.fString.fLength;
   888             if (length >= (int32_t) sizeof(ver))
   889             {
   890                 length = (int32_t) sizeof(ver) - 1;
   891             }
   893             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
   894             u_versionFromString(version, ver);
   896             table_add(result, member, line, status);
   898         }
   899         else if (uprv_strcmp(subtag, "Override") == 0)
   900         {
   901             // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
   902             table_add(result, member, line, status);
   904         }
   905         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
   906         {
   907             /* discard duplicate %%CollationBin if any*/
   908         }
   909         else if (uprv_strcmp(subtag, "Sequence") == 0)
   910         {
   911 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
   912             warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
   913 #else
   914             if(state->makeBinaryCollation) {
   916                 /* do the collation elements */
   917                 int32_t     len   = 0;
   918                 uint8_t   *data  = NULL;
   919                 UCollator *coll  = NULL;
   920                 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
   921                 int32_t reorderCodeCount;
   922                 int32_t reorderCodeIndex;
   923                 UParseError parseError;
   925                 genrbdata.inputDir = state->inputdir;
   926                 genrbdata.outputDir = state->outputdir;
   928                 UErrorCode intStatus = U_ZERO_ERROR;
   929                 uprv_memset(&parseError, 0, sizeof(parseError));
   930                 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
   931                                                UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
   933                 if (U_SUCCESS(intStatus) && coll != NULL)
   934                 {
   935                     len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
   936                     data = (uint8_t *)uprv_malloc(len);
   937                     intStatus = U_ZERO_ERROR;
   938                     len = ucol_cloneBinary(coll, data, len, &intStatus);
   940                     /* tailoring rules version */
   941                     /* This is wrong! */
   942                     /*coll->dataInfo.dataVersion[1] = version[0];*/
   943                     /* Copy tailoring version. Builder version already */
   944                     /* set in ucol_openRules */
   945                     ((UCATableHeader *)data)->version[1] = version[0];
   946                     ((UCATableHeader *)data)->version[2] = version[1];
   947                     ((UCATableHeader *)data)->version[3] = version[2];
   949                     if (U_SUCCESS(intStatus) && data != NULL)
   950                     {
   951                         struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
   952                         table_add(result, collationBin, line, status);
   953                         uprv_free(data);
   955                         reorderCodeCount = ucol_getReorderCodes(
   956                             coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
   957                         if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
   958                             struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
   959                             for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
   960                                 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
   961                             }
   962                             table_add(result, reorderCodeRes, line, status);
   963                         }
   964                     }
   965                     else
   966                     {
   967                         warning(line, "could not obtain rules from collator");
   968                         if(isStrict()){
   969                             *status = U_INVALID_FORMAT_ERROR;
   970                             return NULL;
   971                         }
   972                     }
   974                     ucol_close(coll);
   975                 }
   976                 else
   977                 {
   978                     if(intStatus == U_FILE_ACCESS_ERROR) {
   979                         error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
   980                         *status = intStatus;
   981                         return NULL;
   982                     }
   983                     char preBuffer[100], postBuffer[100];
   984                     escape(parseError.preContext, preBuffer);
   985                     escape(parseError.postContext, postBuffer);
   986                     warning(line,
   987                             "%%%%CollationBin could not be constructed from CollationElements\n"
   988                             "  check context, check that the FractionalUCA.txt UCA version "
   989                             "matches the current UCD version\n"
   990                             "  UErrorCode=%s  UParseError={ line=%d offset=%d pre=<> post=<> }",
   991                             u_errorName(intStatus),
   992                             parseError.line,
   993                             parseError.offset,
   994                             preBuffer,
   995                             postBuffer);
   996                     if(isStrict()){
   997                         *status = intStatus;
   998                         return NULL;
   999                     }
  1001             } else {
  1002                 if(isVerbose()) {
  1003                     printf("Not building Collation binary\n");
  1006 #endif
  1007             /* in order to achieve smaller data files, we can direct genrb */
  1008             /* to omit collation rules */
  1009             if(state->omitCollationRules) {
  1010                 bundle_closeString(state->bundle, member);
  1011             } else {
  1012                 table_add(result, member, line, status);
  1015         if (U_FAILURE(*status))
  1017             res_close(result);
  1018             return NULL;
  1022     // Reached the end without a TOK_CLOSE_BRACE.  Should be an error.
  1023     *status = U_INTERNAL_PROGRAM_ERROR;
  1024     return NULL;
  1027 static struct SResource *
  1028 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
  1030     struct SResource  *result = NULL;
  1031     struct SResource  *member = NULL;
  1032     struct SResource  *collationRes = NULL;
  1033     struct UString    *tokenValue;
  1034     struct UString     comment;
  1035     enum   ETokenType  token;
  1036     char               subtag[1024], typeKeyword[1024];
  1037     uint32_t           line;
  1039     result = table_open(state->bundle, tag, NULL, status);
  1041     if (result == NULL || U_FAILURE(*status))
  1043         return NULL;
  1045     if(isVerbose()){
  1046         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1048     if(!newCollation) {
  1049         return addCollation(state, result, startline, status);
  1051     else {
  1052         for(;;) {
  1053             ustr_init(&comment);
  1054             token = getToken(state, &tokenValue, &comment, &line, status);
  1056             if (token == TOK_CLOSE_BRACE)
  1058                 return result;
  1061             if (token != TOK_STRING)
  1063                 res_close(result);
  1064                 *status = U_INVALID_FORMAT_ERROR;
  1066                 if (token == TOK_EOF)
  1068                     error(startline, "unterminated table");
  1070                 else
  1072                     error(line, "Unexpected token %s", tokenNames[token]);
  1075                 return NULL;
  1078             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
  1080             if (U_FAILURE(*status))
  1082                 res_close(result);
  1083                 return NULL;
  1086             if (uprv_strcmp(subtag, "default") == 0)
  1088                 member = parseResource(state, subtag, NULL, status);
  1090                 if (U_FAILURE(*status))
  1092                     res_close(result);
  1093                     return NULL;
  1096                 table_add(result, member, line, status);
  1098             else
  1100                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
  1101                 /* this probably needs to be refactored or recursively use the parser */
  1102                 /* first we assume that our collation table won't have the explicit type */
  1103                 /* then, we cannot handle aliases */
  1104                 if(token == TOK_OPEN_BRACE) {
  1105                     token = getToken(state, &tokenValue, &comment, &line, status);
  1106                     collationRes = table_open(state->bundle, subtag, NULL, status);
  1107                     collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
  1108                     if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
  1109                         table_add(result, collationRes, startline, status);
  1111                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
  1112                     /* we could have a table too */
  1113                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
  1114                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
  1115                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
  1116                         member = parseResource(state, subtag, NULL, status);
  1117                         if (U_FAILURE(*status))
  1119                             res_close(result);
  1120                             return NULL;
  1123                         table_add(result, member, line, status);
  1124                     } else {
  1125                         res_close(result);
  1126                         *status = U_INVALID_FORMAT_ERROR;
  1127                         return NULL;
  1129                 } else {
  1130                     res_close(result);
  1131                     *status = U_INVALID_FORMAT_ERROR;
  1132                     return NULL;
  1136             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
  1138             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
  1140             if (U_FAILURE(*status))
  1142                 res_close(result);
  1143                 return NULL;
  1149 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
  1150    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
  1151 static struct SResource *
  1152 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
  1154     struct SResource  *member = NULL;
  1155     struct UString    *tokenValue=NULL;
  1156     struct UString    comment;
  1157     enum   ETokenType token;
  1158     char              subtag[1024];
  1159     uint32_t          line;
  1160     UBool             readToken = FALSE;
  1162     /* '{' . (name resource)* '}' */
  1164     if(isVerbose()){
  1165         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
  1167     for (;;)
  1169         ustr_init(&comment);
  1170         token = getToken(state, &tokenValue, &comment, &line, status);
  1172         if (token == TOK_CLOSE_BRACE)
  1174             if (!readToken) {
  1175                 warning(startline, "Encountered empty table");
  1177             return table;
  1180         if (token != TOK_STRING)
  1182             *status = U_INVALID_FORMAT_ERROR;
  1184             if (token == TOK_EOF)
  1186                 error(startline, "unterminated table");
  1188             else
  1190                 error(line, "unexpected token %s", tokenNames[token]);
  1193             return NULL;
  1196         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
  1197             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
  1198         } else {
  1199             *status = U_INVALID_FORMAT_ERROR;
  1200             error(line, "invariant characters required for table keys");
  1201             return NULL;
  1204         if (U_FAILURE(*status))
  1206             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
  1207             return NULL;
  1210         member = parseResource(state, subtag, &comment, status);
  1212         if (member == NULL || U_FAILURE(*status))
  1214             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
  1215             return NULL;
  1218         table_add(table, member, line, status);
  1220         if (U_FAILURE(*status))
  1222             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
  1223             return NULL;
  1225         readToken = TRUE;
  1226         ustr_deinit(&comment);
  1229     /* not reached */
  1230     /* A compiler warning will appear if all paths don't contain a return statement. */
  1231 /*     *status = U_INTERNAL_PROGRAM_ERROR;
  1232      return NULL;*/
  1235 static struct SResource *
  1236 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
  1238     struct SResource *result;
  1240     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
  1242         return parseCollationElements(state, tag, startline, FALSE, status);
  1244     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
  1246         return parseCollationElements(state, tag, startline, TRUE, status);
  1248     if(isVerbose()){
  1249         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1252     result = table_open(state->bundle, tag, comment, status);
  1254     if (result == NULL || U_FAILURE(*status))
  1256         return NULL;
  1258     return realParseTable(state, result, tag, startline,  status);
  1261 static struct SResource *
  1262 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
  1264     struct SResource  *result = NULL;
  1265     struct SResource  *member = NULL;
  1266     struct UString    *tokenValue;
  1267     struct UString    memberComments;
  1268     enum   ETokenType token;
  1269     UBool             readToken = FALSE;
  1271     result = array_open(state->bundle, tag, comment, status);
  1273     if (result == NULL || U_FAILURE(*status))
  1275         return NULL;
  1277     if(isVerbose()){
  1278         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1281     ustr_init(&memberComments);
  1283     /* '{' . resource [','] '}' */
  1284     for (;;)
  1286         /* reset length */
  1287         ustr_setlen(&memberComments, 0, status);
  1289         /* check for end of array, but don't consume next token unless it really is the end */
  1290         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
  1293         if (token == TOK_CLOSE_BRACE)
  1295             getToken(state, NULL, NULL, NULL, status);
  1296             if (!readToken) {
  1297                 warning(startline, "Encountered empty array");
  1299             break;
  1302         if (token == TOK_EOF)
  1304             res_close(result);
  1305             *status = U_INVALID_FORMAT_ERROR;
  1306             error(startline, "unterminated array");
  1307             return NULL;
  1310         /* string arrays are a special case */
  1311         if (token == TOK_STRING)
  1313             getToken(state, &tokenValue, &memberComments, NULL, status);
  1314             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
  1316         else
  1318             member = parseResource(state, NULL, &memberComments, status);
  1321         if (member == NULL || U_FAILURE(*status))
  1323             res_close(result);
  1324             return NULL;
  1327         array_add(result, member, status);
  1329         if (U_FAILURE(*status))
  1331             res_close(result);
  1332             return NULL;
  1335         /* eat optional comma if present */
  1336         token = peekToken(state, 0, NULL, NULL, NULL, status);
  1338         if (token == TOK_COMMA)
  1340             getToken(state, NULL, NULL, NULL, status);
  1343         if (U_FAILURE(*status))
  1345             res_close(result);
  1346             return NULL;
  1348         readToken = TRUE;
  1351     ustr_deinit(&memberComments);
  1352     return result;
  1355 static struct SResource *
  1356 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
  1358     struct SResource  *result = NULL;
  1359     enum   ETokenType  token;
  1360     char              *string;
  1361     int32_t            value;
  1362     UBool              readToken = FALSE;
  1363     char              *stopstring;
  1364     uint32_t           len;
  1365     struct UString     memberComments;
  1367     result = intvector_open(state->bundle, tag, comment, status);
  1369     if (result == NULL || U_FAILURE(*status))
  1371         return NULL;
  1374     if(isVerbose()){
  1375         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1377     ustr_init(&memberComments);
  1378     /* '{' . string [','] '}' */
  1379     for (;;)
  1381         ustr_setlen(&memberComments, 0, status);
  1383         /* check for end of array, but don't consume next token unless it really is the end */
  1384         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
  1386         if (token == TOK_CLOSE_BRACE)
  1388             /* it's the end, consume the close brace */
  1389             getToken(state, NULL, NULL, NULL, status);
  1390             if (!readToken) {
  1391                 warning(startline, "Encountered empty int vector");
  1393             ustr_deinit(&memberComments);
  1394             return result;
  1397         string = getInvariantString(state, NULL, NULL, status);
  1399         if (U_FAILURE(*status))
  1401             res_close(result);
  1402             return NULL;
  1405         /* For handling illegal char in the Intvector */
  1406         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
  1407         len=(uint32_t)(stopstring-string);
  1409         if(len==uprv_strlen(string))
  1411             intvector_add(result, value, status);
  1412             uprv_free(string);
  1413             token = peekToken(state, 0, NULL, NULL, NULL, status);
  1415         else
  1417             uprv_free(string);
  1418             *status=U_INVALID_CHAR_FOUND;
  1421         if (U_FAILURE(*status))
  1423             res_close(result);
  1424             return NULL;
  1427         /* the comma is optional (even though it is required to prevent the reader from concatenating
  1428         consecutive entries) so that a missing comma on the last entry isn't an error */
  1429         if (token == TOK_COMMA)
  1431             getToken(state, NULL, NULL, NULL, status);
  1433         readToken = TRUE;
  1436     /* not reached */
  1437     /* A compiler warning will appear if all paths don't contain a return statement. */
  1438 /*    intvector_close(result, status);
  1439     *status = U_INTERNAL_PROGRAM_ERROR;
  1440     return NULL;*/
  1443 static struct SResource *
  1444 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
  1446     struct SResource *result = NULL;
  1447     uint8_t          *value;
  1448     char             *string;
  1449     char              toConv[3] = {'\0', '\0', '\0'};
  1450     uint32_t          count;
  1451     uint32_t          i;
  1452     uint32_t          line;
  1453     char             *stopstring;
  1454     uint32_t          len;
  1456     string = getInvariantString(state, &line, NULL, status);
  1458     if (string == NULL || U_FAILURE(*status))
  1460         return NULL;
  1463     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
  1465     if (U_FAILURE(*status))
  1467         uprv_free(string);
  1468         return NULL;
  1471     if(isVerbose()){
  1472         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1475     count = (uint32_t)uprv_strlen(string);
  1476     if (count > 0){
  1477         if((count % 2)==0){
  1478             value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
  1480             if (value == NULL)
  1482                 uprv_free(string);
  1483                 *status = U_MEMORY_ALLOCATION_ERROR;
  1484                 return NULL;
  1487             for (i = 0; i < count; i += 2)
  1489                 toConv[0] = string[i];
  1490                 toConv[1] = string[i + 1];
  1492                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
  1493                 len=(uint32_t)(stopstring-toConv);
  1495                 if(len!=uprv_strlen(toConv))
  1497                     uprv_free(string);
  1498                     *status=U_INVALID_CHAR_FOUND;
  1499                     return NULL;
  1503             result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
  1505             uprv_free(value);
  1507         else
  1509             *status = U_INVALID_CHAR_FOUND;
  1510             uprv_free(string);
  1511             error(line, "Encountered invalid binary string");
  1512             return NULL;
  1515     else
  1517         result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
  1518         warning(startline, "Encountered empty binary tag");
  1520     uprv_free(string);
  1522     return result;
  1525 static struct SResource *
  1526 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
  1528     struct SResource *result = NULL;
  1529     int32_t           value;
  1530     char             *string;
  1531     char             *stopstring;
  1532     uint32_t          len;
  1534     string = getInvariantString(state, NULL, NULL, status);
  1536     if (string == NULL || U_FAILURE(*status))
  1538         return NULL;
  1541     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
  1543     if (U_FAILURE(*status))
  1545         uprv_free(string);
  1546         return NULL;
  1549     if(isVerbose()){
  1550         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1553     if (uprv_strlen(string) <= 0)
  1555         warning(startline, "Encountered empty integer. Default value is 0.");
  1558     /* Allow integer support for hexdecimal, octal digit and decimal*/
  1559     /* and handle illegal char in the integer*/
  1560     value = uprv_strtoul(string, &stopstring, 0);
  1561     len=(uint32_t)(stopstring-string);
  1562     if(len==uprv_strlen(string))
  1564         result = int_open(state->bundle, tag, value, comment, status);
  1566     else
  1568         *status=U_INVALID_CHAR_FOUND;
  1570     uprv_free(string);
  1572     return result;
  1575 static struct SResource *
  1576 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
  1578     struct SResource *result;
  1579     FileStream       *file;
  1580     int32_t           len;
  1581     uint8_t          *data;
  1582     char             *filename;
  1583     uint32_t          line;
  1584     char     *fullname = NULL;
  1585     filename = getInvariantString(state, &line, NULL, status);
  1587     if (U_FAILURE(*status))
  1589         return NULL;
  1592     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
  1594     if (U_FAILURE(*status))
  1596         uprv_free(filename);
  1597         return NULL;
  1600     if(isVerbose()){
  1601         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1604     /* Open the input file for reading */
  1605     if (state->inputdir == NULL)
  1607 #if 1
  1608         /* 
  1609          * Always save file file name, even if there's
  1610          * no input directory specified. MIGHT BREAK SOMETHING
  1611          */
  1612         int32_t filenameLength = uprv_strlen(filename);
  1614         fullname = (char *) uprv_malloc(filenameLength + 1);
  1615         uprv_strcpy(fullname, filename);
  1616 #endif
  1618         file = T_FileStream_open(filename, "rb");
  1620     else
  1623         int32_t  count     = (int32_t)uprv_strlen(filename);
  1625         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
  1627             fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
  1629             /* test for NULL */
  1630             if(fullname == NULL)
  1632                 *status = U_MEMORY_ALLOCATION_ERROR;
  1633                 return NULL;
  1636             uprv_strcpy(fullname, state->inputdir);
  1638             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
  1639             fullname[state->inputdirLength + 1] = '\0';
  1641             uprv_strcat(fullname, filename);
  1643         else
  1645             fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
  1647             /* test for NULL */
  1648             if(fullname == NULL)
  1650                 *status = U_MEMORY_ALLOCATION_ERROR;
  1651                 return NULL;
  1654             uprv_strcpy(fullname, state->inputdir);
  1655             uprv_strcat(fullname, filename);
  1658         file = T_FileStream_open(fullname, "rb");
  1662     if (file == NULL)
  1664         error(line, "couldn't open input file %s", filename);
  1665         *status = U_FILE_ACCESS_ERROR;
  1666         return NULL;
  1669     len  = T_FileStream_size(file);
  1670     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
  1671     /* test for NULL */
  1672     if(data == NULL)
  1674         *status = U_MEMORY_ALLOCATION_ERROR;
  1675         T_FileStream_close (file);
  1676         return NULL;
  1679     /* int32_t numRead = */ T_FileStream_read  (file, data, len);
  1680     T_FileStream_close (file);
  1682     result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
  1684     uprv_free(data);
  1685     uprv_free(filename);
  1686     uprv_free(fullname);
  1688     return result;
  1691 static struct SResource *
  1692 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
  1694     struct SResource *result;
  1695     int32_t           len=0;
  1696     char             *filename;
  1697     uint32_t          line;
  1698     UChar *pTarget     = NULL;
  1700     UCHARBUF *ucbuf;
  1701     char     *fullname = NULL;
  1702     int32_t  count     = 0;
  1703     const char* cp = NULL;
  1704     const UChar* uBuffer = NULL;
  1706     filename = getInvariantString(state, &line, NULL, status);
  1707     count     = (int32_t)uprv_strlen(filename);
  1709     if (U_FAILURE(*status))
  1711         return NULL;
  1714     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
  1716     if (U_FAILURE(*status))
  1718         uprv_free(filename);
  1719         return NULL;
  1722     if(isVerbose()){
  1723         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1726     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
  1727     /* test for NULL */
  1728     if(fullname == NULL)
  1730         *status = U_MEMORY_ALLOCATION_ERROR;
  1731         uprv_free(filename);
  1732         return NULL;
  1735     if(state->inputdir!=NULL){
  1736         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
  1739             uprv_strcpy(fullname, state->inputdir);
  1741             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
  1742             fullname[state->inputdirLength + 1] = '\0';
  1744             uprv_strcat(fullname, filename);
  1746         else
  1748             uprv_strcpy(fullname, state->inputdir);
  1749             uprv_strcat(fullname, filename);
  1751     }else{
  1752         uprv_strcpy(fullname,filename);
  1755     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
  1757     if (U_FAILURE(*status)) {
  1758         error(line, "couldn't open input file %s\n", filename);
  1759         return NULL;
  1762     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
  1763     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
  1765     ucbuf_close(ucbuf);
  1767     uprv_free(pTarget);
  1769     uprv_free(filename);
  1770     uprv_free(fullname);
  1772     return result;
  1779 U_STRING_DECL(k_type_string,    "string",    6);
  1780 U_STRING_DECL(k_type_binary,    "binary",    6);
  1781 U_STRING_DECL(k_type_bin,       "bin",       3);
  1782 U_STRING_DECL(k_type_table,     "table",     5);
  1783 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
  1784 U_STRING_DECL(k_type_int,       "int",       3);
  1785 U_STRING_DECL(k_type_integer,   "integer",   7);
  1786 U_STRING_DECL(k_type_array,     "array",     5);
  1787 U_STRING_DECL(k_type_alias,     "alias",     5);
  1788 U_STRING_DECL(k_type_intvector, "intvector", 9);
  1789 U_STRING_DECL(k_type_import,    "import",    6);
  1790 U_STRING_DECL(k_type_include,   "include",   7);
  1792 /* Various non-standard processing plugins that create one or more special resources. */
  1793 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
  1794 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
  1795 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
  1796 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
  1798 typedef enum EResourceType
  1800     RT_UNKNOWN,
  1801     RT_STRING,
  1802     RT_BINARY,
  1803     RT_TABLE,
  1804     RT_TABLE_NO_FALLBACK,
  1805     RT_INTEGER,
  1806     RT_ARRAY,
  1807     RT_ALIAS,
  1808     RT_INTVECTOR,
  1809     RT_IMPORT,
  1810     RT_INCLUDE,
  1811     RT_PROCESS_UCA_RULES,
  1812     RT_PROCESS_COLLATION,
  1813     RT_PROCESS_TRANSLITERATOR,
  1814     RT_PROCESS_DEPENDENCY,
  1815     RT_RESERVED
  1816 } EResourceType;
  1818 static struct {
  1819     const char *nameChars;   /* only used for debugging */
  1820     const UChar *nameUChars;
  1821     ParseResourceFunction *parseFunction;
  1822 } gResourceTypes[] = {
  1823     {"Unknown", NULL, NULL},
  1824     {"string", k_type_string, parseString},
  1825     {"binary", k_type_binary, parseBinary},
  1826     {"table", k_type_table, parseTable},
  1827     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
  1828     {"integer", k_type_integer, parseInteger},
  1829     {"array", k_type_array, parseArray},
  1830     {"alias", k_type_alias, parseAlias},
  1831     {"intvector", k_type_intvector, parseIntVector},
  1832     {"import", k_type_import, parseImport},
  1833     {"include", k_type_include, parseInclude},
  1834     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
  1835     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
  1836     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
  1837     {"process(dependency)", k_type_plugin_dependency, parseDependency},
  1838     {"reserved", NULL, NULL}
  1839 };
  1841 void initParser()
  1843     U_STRING_INIT(k_type_string,    "string",    6);
  1844     U_STRING_INIT(k_type_binary,    "binary",    6);
  1845     U_STRING_INIT(k_type_bin,       "bin",       3);
  1846     U_STRING_INIT(k_type_table,     "table",     5);
  1847     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
  1848     U_STRING_INIT(k_type_int,       "int",       3);
  1849     U_STRING_INIT(k_type_integer,   "integer",   7);
  1850     U_STRING_INIT(k_type_array,     "array",     5);
  1851     U_STRING_INIT(k_type_alias,     "alias",     5);
  1852     U_STRING_INIT(k_type_intvector, "intvector", 9);
  1853     U_STRING_INIT(k_type_import,    "import",    6);
  1854     U_STRING_INIT(k_type_include,   "include",   7);
  1856     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
  1857     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
  1858     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
  1859     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
  1862 static inline UBool isTable(enum EResourceType type) {
  1863     return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
  1866 static enum EResourceType
  1867 parseResourceType(ParseState* state, UErrorCode *status)
  1869     struct UString        *tokenValue;
  1870     struct UString        comment;
  1871     enum   EResourceType  result = RT_UNKNOWN;
  1872     uint32_t              line=0;
  1873     ustr_init(&comment);
  1874     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
  1876     if (U_FAILURE(*status))
  1878         return RT_UNKNOWN;
  1881     *status = U_ZERO_ERROR;
  1883     /* Search for normal types */
  1884     result=RT_UNKNOWN;
  1885     while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
  1886         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
  1887             break;
  1890     /* Now search for the aliases */
  1891     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
  1892         result = RT_INTEGER;
  1894     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
  1895         result = RT_BINARY;
  1897     else if (result == RT_RESERVED) {
  1898         char tokenBuffer[1024];
  1899         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
  1900         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
  1901         *status = U_INVALID_FORMAT_ERROR;
  1902         error(line, "unknown resource type '%s'", tokenBuffer);
  1905     return result;
  1908 /* parse a non-top-level resource */
  1909 static struct SResource *
  1910 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
  1912     enum   ETokenType      token;
  1913     enum   EResourceType  resType = RT_UNKNOWN;
  1914     ParseResourceFunction *parseFunction = NULL;
  1915     struct UString        *tokenValue;
  1916     uint32_t                 startline;
  1917     uint32_t                 line;
  1920     token = getToken(state, &tokenValue, NULL, &startline, status);
  1922     if(isVerbose()){
  1923         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
  1926     /* name . [ ':' type ] '{' resource '}' */
  1927     /* This function parses from the colon onwards.  If the colon is present, parse the
  1928     type then try to parse a resource of that type.  If there is no explicit type,
  1929     work it out using the lookahead tokens. */
  1930     switch (token)
  1932     case TOK_EOF:
  1933         *status = U_INVALID_FORMAT_ERROR;
  1934         error(startline, "Unexpected EOF encountered");
  1935         return NULL;
  1937     case TOK_ERROR:
  1938         *status = U_INVALID_FORMAT_ERROR;
  1939         return NULL;
  1941     case TOK_COLON:
  1942         resType = parseResourceType(state, status);
  1943         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
  1945         if (U_FAILURE(*status))
  1947             return NULL;
  1950         break;
  1952     case TOK_OPEN_BRACE:
  1953         break;
  1955     default:
  1956         *status = U_INVALID_FORMAT_ERROR;
  1957         error(startline, "syntax error while reading a resource, expected '{' or ':'");
  1958         return NULL;
  1962     if (resType == RT_UNKNOWN)
  1964         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
  1965         We could have any of the following:
  1966         { {         => array (nested)
  1967         { :/}       => array
  1968         { string ,  => string array
  1970         { string {  => table
  1972         { string :/{    => table
  1973         { string }      => string
  1974         */
  1976         token = peekToken(state, 0, NULL, &line, NULL,status);
  1978         if (U_FAILURE(*status))
  1980             return NULL;
  1983         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
  1985             resType = RT_ARRAY;
  1987         else if (token == TOK_STRING)
  1989             token = peekToken(state, 1, NULL, &line, NULL, status);
  1991             if (U_FAILURE(*status))
  1993                 return NULL;
  1996             switch (token)
  1998             case TOK_COMMA:         resType = RT_ARRAY;  break;
  1999             case TOK_OPEN_BRACE:    resType = RT_TABLE;  break;
  2000             case TOK_CLOSE_BRACE:   resType = RT_STRING; break;
  2001             case TOK_COLON:         resType = RT_TABLE;  break;
  2002             default:
  2003                 *status = U_INVALID_FORMAT_ERROR;
  2004                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
  2005                 return NULL;
  2008         else
  2010             *status = U_INVALID_FORMAT_ERROR;
  2011             error(line, "Unexpected token after '{'");
  2012             return NULL;
  2015         /* printf("Type guessed as %s\n", resourceNames[resType]); */
  2016     } else if(resType == RT_TABLE_NO_FALLBACK) {
  2017         *status = U_INVALID_FORMAT_ERROR;
  2018         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
  2019         return NULL;
  2023     /* We should now know what we need to parse next, so call the appropriate parser
  2024     function and return. */
  2025     parseFunction = gResourceTypes[resType].parseFunction;
  2026     if (parseFunction != NULL) {
  2027         return parseFunction(state, tag, startline, comment, status);
  2029     else {
  2030         *status = U_INTERNAL_PROGRAM_ERROR;
  2031         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
  2034     return NULL;
  2037 /* parse the top-level resource */
  2038 struct SRBRoot *
  2039 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir,
  2040       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
  2042     struct UString    *tokenValue;
  2043     struct UString    comment;
  2044     uint32_t           line;
  2045     enum EResourceType bundleType;
  2046     enum ETokenType    token;
  2047     ParseState state;
  2048     uint32_t i;
  2051     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
  2053         ustr_init(&state.lookahead[i].value);
  2054         ustr_init(&state.lookahead[i].comment);
  2057     initLookahead(&state, buf, status);
  2059     state.inputdir       = inputDir;
  2060     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
  2061     state.outputdir       = outputDir;
  2062     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
  2063     state.makeBinaryCollation = makeBinaryCollation;
  2064     state.omitCollationRules = omitCollationRules;
  2066     ustr_init(&comment);
  2067     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
  2069     state.bundle = bundle_open(&comment, FALSE, status);
  2071     if (state.bundle == NULL || U_FAILURE(*status))
  2073         return NULL;
  2077     bundle_setlocale(state.bundle, tokenValue->fChars, status);
  2079     /* The following code is to make Empty bundle work no matter with :table specifer or not */
  2080     token = getToken(&state, NULL, NULL, &line, status);
  2081     if(token==TOK_COLON) {
  2082         *status=U_ZERO_ERROR;
  2083         bundleType=parseResourceType(&state, status);
  2085         if(isTable(bundleType))
  2087             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
  2089         else
  2091             *status=U_PARSE_ERROR;
  2092              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
  2095     else
  2097         /* not a colon */
  2098         if(token==TOK_OPEN_BRACE)
  2100             *status=U_ZERO_ERROR;
  2101             bundleType=RT_TABLE;
  2103         else
  2105             /* neither colon nor open brace */
  2106             *status=U_PARSE_ERROR;
  2107             bundleType=RT_UNKNOWN;
  2108             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
  2112     if (U_FAILURE(*status))
  2114         bundle_close(state.bundle, status);
  2115         return NULL;
  2118     if(bundleType==RT_TABLE_NO_FALLBACK) {
  2119         /*
  2120          * Parse a top-level table with the table(nofallback) declaration.
  2121          * This is the same as a regular table, but also sets the
  2122          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
  2123          */
  2124         state.bundle->noFallback=TRUE;
  2126     /* top-level tables need not handle special table names like "collations" */
  2127     realParseTable(&state, state.bundle->fRoot, NULL, line, status);
  2128     if(dependencyArray!=NULL){
  2129         table_add(state.bundle->fRoot, dependencyArray, 0, status);
  2130         dependencyArray = NULL;
  2132    if (U_FAILURE(*status))
  2134         bundle_close(state.bundle, status);
  2135         res_close(dependencyArray);
  2136         return NULL;
  2139     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
  2141         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
  2142         if(isStrict()){
  2143             *status = U_INVALID_FORMAT_ERROR;
  2144             return NULL;
  2148     cleanupLookahead(&state);
  2149     ustr_deinit(&comment);
  2150     return state.bundle;

mercurial