michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 1998-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * michael@0: * File parse.cpp michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 05/26/99 stephen Creation. michael@0: * 02/25/00 weiv Overhaul to write udata michael@0: * 5/10/01 Ram removed ustdio dependency michael@0: * 06/10/2001 Dominic Ludlam Rewritten michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: #include "ucol_imp.h" michael@0: #include "parse.h" michael@0: #include "errmsg.h" michael@0: #include "uhash.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "uinvchar.h" michael@0: #include "read.h" michael@0: #include "ustr.h" michael@0: #include "reslist.h" michael@0: #include "rbt_pars.h" michael@0: #include "genrb.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/uscript.h" michael@0: #include "unicode/putil.h" michael@0: #include michael@0: michael@0: /* Number of tokens to read ahead of the current stream position */ michael@0: #define MAX_LOOKAHEAD 3 michael@0: michael@0: #define CR 0x000D michael@0: #define LF 0x000A michael@0: #define SPACE 0x0020 michael@0: #define TAB 0x0009 michael@0: #define ESCAPE 0x005C michael@0: #define HASH 0x0023 michael@0: #define QUOTE 0x0027 michael@0: #define ZERO 0x0030 michael@0: #define STARTCOMMAND 0x005B michael@0: #define ENDCOMMAND 0x005D michael@0: #define OPENSQBRACKET 0x005B michael@0: #define CLOSESQBRACKET 0x005D michael@0: michael@0: struct Lookahead michael@0: { michael@0: enum ETokenType type; michael@0: struct UString value; michael@0: struct UString comment; michael@0: uint32_t line; michael@0: }; michael@0: michael@0: /* keep in sync with token defines in read.h */ michael@0: const char *tokenNames[TOK_TOKEN_COUNT] = michael@0: { michael@0: "string", /* A string token, such as "MonthNames" */ michael@0: "'{'", /* An opening brace character */ michael@0: "'}'", /* A closing brace character */ michael@0: "','", /* A comma */ michael@0: "':'", /* A colon */ michael@0: michael@0: "", /* End of the file has been reached successfully */ michael@0: "" michael@0: }; michael@0: michael@0: /* Just to store "TRUE" */ michael@0: //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; michael@0: michael@0: typedef struct { michael@0: struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; michael@0: uint32_t lookaheadPosition; michael@0: UCHARBUF *buffer; michael@0: struct SRBRoot *bundle; michael@0: const char *inputdir; michael@0: uint32_t inputdirLength; michael@0: const char *outputdir; michael@0: uint32_t outputdirLength; michael@0: UBool makeBinaryCollation; michael@0: UBool omitCollationRules; michael@0: } ParseState; michael@0: michael@0: typedef struct SResource * michael@0: ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); michael@0: michael@0: static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status); michael@0: michael@0: /* The nature of the lookahead buffer: michael@0: There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides michael@0: MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. michael@0: When getToken is called, the current pointer is moved to the next slot and the michael@0: old slot is filled with the next token from the reader by calling getNextToken. michael@0: The token values are stored in the slot, which means that token values don't michael@0: survive a call to getToken, ie. michael@0: michael@0: UString *value; michael@0: michael@0: getToken(&value, NULL, status); michael@0: getToken(NULL, NULL, status); bad - value is now a different string michael@0: */ michael@0: static void michael@0: initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status) michael@0: { michael@0: static uint32_t initTypeStrings = 0; michael@0: uint32_t i; michael@0: michael@0: if (!initTypeStrings) michael@0: { michael@0: initTypeStrings = 1; michael@0: } michael@0: michael@0: state->lookaheadPosition = 0; michael@0: state->buffer = buf; michael@0: michael@0: resetLineNumber(); michael@0: michael@0: for (i = 0; i < MAX_LOOKAHEAD; i++) michael@0: { michael@0: state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return; michael@0: } michael@0: } michael@0: michael@0: *status = U_ZERO_ERROR; michael@0: } michael@0: michael@0: static void michael@0: cleanupLookahead(ParseState* state) michael@0: { michael@0: uint32_t i; michael@0: for (i = 0; i <= MAX_LOOKAHEAD; i++) michael@0: { michael@0: ustr_deinit(&state->lookahead[i].value); michael@0: ustr_deinit(&state->lookahead[i].comment); michael@0: } michael@0: michael@0: } michael@0: michael@0: static enum ETokenType michael@0: getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) michael@0: { michael@0: enum ETokenType result; michael@0: uint32_t i; michael@0: michael@0: result = state->lookahead[state->lookaheadPosition].type; michael@0: michael@0: if (tokenValue != NULL) michael@0: { michael@0: *tokenValue = &state->lookahead[state->lookaheadPosition].value; michael@0: } michael@0: michael@0: if (linenumber != NULL) michael@0: { michael@0: *linenumber = state->lookahead[state->lookaheadPosition].line; michael@0: } michael@0: michael@0: if (comment != NULL) michael@0: { michael@0: ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); michael@0: } michael@0: michael@0: i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); michael@0: state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); michael@0: ustr_setlen(&state->lookahead[i].comment, 0, status); michael@0: ustr_setlen(&state->lookahead[i].value, 0, status); michael@0: state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status); michael@0: michael@0: /* printf("getToken, returning %s\n", tokenNames[result]); */ michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static enum ETokenType michael@0: peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) michael@0: { michael@0: uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return TOK_ERROR; michael@0: } michael@0: michael@0: if (lookaheadCount >= MAX_LOOKAHEAD) michael@0: { michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return TOK_ERROR; michael@0: } michael@0: michael@0: if (tokenValue != NULL) michael@0: { michael@0: *tokenValue = &state->lookahead[i].value; michael@0: } michael@0: michael@0: if (linenumber != NULL) michael@0: { michael@0: *linenumber = state->lookahead[i].line; michael@0: } michael@0: michael@0: if(comment != NULL){ michael@0: ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status); michael@0: } michael@0: michael@0: return state->lookahead[i].type; michael@0: } michael@0: michael@0: static void michael@0: expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) michael@0: { michael@0: uint32_t line; michael@0: michael@0: enum ETokenType token = getToken(state, tokenValue, comment, &line, status); michael@0: michael@0: if (linenumber != NULL) michael@0: { michael@0: *linenumber = line; michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return; michael@0: } michael@0: michael@0: if (token != expectedToken) michael@0: { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); michael@0: } michael@0: else michael@0: { michael@0: *status = U_ZERO_ERROR; michael@0: } michael@0: } michael@0: michael@0: static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct UString *tokenValue; michael@0: char *result; michael@0: uint32_t count; michael@0: michael@0: expect(state, TOK_STRING, &tokenValue, comment, line, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: count = u_strlen(tokenValue->fChars); michael@0: if(!uprv_isInvariantUString(tokenValue->fChars, count)) { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(*line, "invariant characters required for table keys, binary data, etc."); michael@0: return NULL; michael@0: } michael@0: michael@0: result = static_cast(uprv_malloc(count+1)); michael@0: michael@0: if (result == NULL) michael@0: { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, result, count+1); michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: struct UString *tokenValue; michael@0: FileStream *file = NULL; michael@0: char filename[256] = { '\0' }; michael@0: char cs[128] = { '\0' }; michael@0: uint32_t line; michael@0: UBool quoted = FALSE; michael@0: UCHARBUF *ucbuf=NULL; michael@0: UChar32 c = 0; michael@0: const char* cp = NULL; michael@0: UChar *pTarget = NULL; michael@0: UChar *target = NULL; michael@0: UChar *targetLimit = NULL; michael@0: int32_t size = 0; michael@0: michael@0: expect(state, TOK_STRING, &tokenValue, NULL, &line, status); michael@0: michael@0: if(isVerbose()){ michael@0: printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: /* make the filename including the directory */ michael@0: if (state->inputdir != NULL) michael@0: { michael@0: uprv_strcat(filename, state->inputdir); michael@0: michael@0: if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) michael@0: { michael@0: uprv_strcat(filename, U_FILE_SEP_STRING); michael@0: } michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: uprv_strcat(filename, cs); michael@0: michael@0: if(state->omitCollationRules) { michael@0: return res_none(); michael@0: } michael@0: michael@0: ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: error(line, "An error occured while opening the input file %s\n", filename); michael@0: return NULL; michael@0: } michael@0: michael@0: /* We allocate more space than actually required michael@0: * since the actual size needed for storing UChars michael@0: * is not known in UTF-8 byte stream michael@0: */ michael@0: size = ucbuf_size(ucbuf) + 1; michael@0: pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); michael@0: uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); michael@0: target = pTarget; michael@0: targetLimit = pTarget+size; michael@0: michael@0: /* read the rules into the buffer */ michael@0: while (target < targetLimit) michael@0: { michael@0: c = ucbuf_getc(ucbuf, status); michael@0: if(c == QUOTE) { michael@0: quoted = (UBool)!quoted; michael@0: } michael@0: /* weiv (06/26/2002): adding the following: michael@0: * - preserving spaces in commands [...] michael@0: * - # comments until the end of line michael@0: */ michael@0: if (c == STARTCOMMAND && !quoted) michael@0: { michael@0: /* preserve commands michael@0: * closing bracket will be handled by the michael@0: * append at the end of the loop michael@0: */ michael@0: while(c != ENDCOMMAND) { michael@0: U_APPEND_CHAR32_ONLY(c, target); michael@0: c = ucbuf_getc(ucbuf, status); michael@0: } michael@0: } michael@0: else if (c == HASH && !quoted) { michael@0: /* skip comments */ michael@0: while(c != CR && c != LF) { michael@0: c = ucbuf_getc(ucbuf, status); michael@0: } michael@0: continue; michael@0: } michael@0: else if (c == ESCAPE) michael@0: { michael@0: c = unescape(ucbuf, status); michael@0: michael@0: if (c == (UChar32)U_ERR) michael@0: { michael@0: uprv_free(pTarget); michael@0: T_FileStream_close(file); michael@0: return NULL; michael@0: } michael@0: } michael@0: else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) michael@0: { michael@0: /* ignore spaces carriage returns michael@0: * and line feed unless in the form \uXXXX michael@0: */ michael@0: continue; michael@0: } michael@0: michael@0: /* Append UChar * after dissembling if c > 0xffff*/ michael@0: if (c != (UChar32)U_EOF) michael@0: { michael@0: U_APPEND_CHAR32_ONLY(c, target); michael@0: } michael@0: else michael@0: { michael@0: break; michael@0: } michael@0: } michael@0: michael@0: /* terminate the string */ michael@0: if(target < targetLimit){ michael@0: *target = 0x0000; michael@0: } michael@0: michael@0: result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); michael@0: michael@0: michael@0: ucbuf_close(ucbuf); michael@0: uprv_free(pTarget); michael@0: T_FileStream_close(file); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: struct UString *tokenValue; michael@0: FileStream *file = NULL; michael@0: char filename[256] = { '\0' }; michael@0: char cs[128] = { '\0' }; michael@0: uint32_t line; michael@0: UCHARBUF *ucbuf=NULL; michael@0: const char* cp = NULL; michael@0: UChar *pTarget = NULL; michael@0: const UChar *pSource = NULL; michael@0: int32_t size = 0; michael@0: michael@0: expect(state, TOK_STRING, &tokenValue, NULL, &line, status); michael@0: michael@0: if(isVerbose()){ michael@0: printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: /* make the filename including the directory */ michael@0: if (state->inputdir != NULL) michael@0: { michael@0: uprv_strcat(filename, state->inputdir); michael@0: michael@0: if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) michael@0: { michael@0: uprv_strcat(filename, U_FILE_SEP_STRING); michael@0: } michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: uprv_strcat(filename, cs); michael@0: michael@0: michael@0: ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: error(line, "An error occured while opening the input file %s\n", filename); michael@0: return NULL; michael@0: } michael@0: michael@0: /* We allocate more space than actually required michael@0: * since the actual size needed for storing UChars michael@0: * is not known in UTF-8 byte stream michael@0: */ michael@0: pSource = ucbuf_getBuffer(ucbuf, &size, status); michael@0: pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); michael@0: uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: size = utrans_stripRules(pSource, size, pTarget, status); michael@0: #else michael@0: size = 0; michael@0: fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); michael@0: #endif michael@0: result = string_open(state->bundle, tag, pTarget, size, NULL, status); michael@0: michael@0: ucbuf_close(ucbuf); michael@0: uprv_free(pTarget); michael@0: T_FileStream_close(file); michael@0: michael@0: return result; michael@0: } michael@0: static struct SResource* dependencyArray = NULL; michael@0: michael@0: static struct SResource * michael@0: parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: struct SResource *elem = NULL; michael@0: struct UString *tokenValue; michael@0: uint32_t line; michael@0: char filename[256] = { '\0' }; michael@0: char cs[128] = { '\0' }; michael@0: michael@0: expect(state, TOK_STRING, &tokenValue, NULL, &line, status); michael@0: michael@0: if(isVerbose()){ michael@0: printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: /* make the filename including the directory */ michael@0: if (state->outputdir != NULL) michael@0: { michael@0: uprv_strcat(filename, state->outputdir); michael@0: michael@0: if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR) michael@0: { michael@0: uprv_strcat(filename, U_FILE_SEP_STRING); michael@0: } michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: uprv_strcat(filename, cs); michael@0: if(!T_FileStream_file_exists(filename)){ michael@0: if(isStrict()){ michael@0: error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); michael@0: }else{ michael@0: warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); michael@0: } michael@0: } michael@0: if(dependencyArray==NULL){ michael@0: dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status); michael@0: } michael@0: if(tag!=NULL){ michael@0: result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); michael@0: } michael@0: elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); michael@0: michael@0: array_add(dependencyArray, elem, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: return result; michael@0: } michael@0: static struct SResource * michael@0: parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) michael@0: { michael@0: struct UString *tokenValue; michael@0: struct SResource *result = NULL; michael@0: michael@0: /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) michael@0: { michael@0: return parseUCARules(tag, startline, status); michael@0: }*/ michael@0: if(isVerbose()){ michael@0: printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); michael@0: michael@0: if (U_SUCCESS(*status)) michael@0: { michael@0: /* create the string now - tokenValue doesn't survive a call to getToken (and therefore michael@0: doesn't survive expect either) */ michael@0: michael@0: result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); michael@0: if(U_SUCCESS(*status) && result) { michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct UString *tokenValue; michael@0: struct SResource *result = NULL; michael@0: michael@0: expect(state, TOK_STRING, &tokenValue, NULL, NULL, status); michael@0: michael@0: if(isVerbose()){ michael@0: printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: if (U_SUCCESS(*status)) michael@0: { michael@0: /* create the string now - tokenValue doesn't survive a call to getToken (and therefore michael@0: doesn't survive expect either) */ michael@0: michael@0: result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: typedef struct{ michael@0: const char* inputDir; michael@0: const char* outputDir; michael@0: } GenrbData; michael@0: michael@0: static struct SResource* resLookup(struct SResource* res, const char* key){ michael@0: struct SResource *current = NULL; michael@0: struct SResTable *list; michael@0: if (res == res_none()) { michael@0: return NULL; michael@0: } michael@0: michael@0: list = &(res->u.fTable); michael@0: michael@0: current = list->fFirst; michael@0: while (current != NULL) { michael@0: if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { michael@0: return current; michael@0: } michael@0: current = current->fNext; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ michael@0: struct SRBRoot *data = NULL; michael@0: UCHARBUF *ucbuf = NULL; michael@0: GenrbData* genrbdata = (GenrbData*) context; michael@0: int localeLength = strlen(locale); michael@0: char* filename = (char*)uprv_malloc(localeLength+5); michael@0: char *inputDirBuf = NULL; michael@0: char *openFileName = NULL; michael@0: const char* cp = ""; michael@0: UChar* urules = NULL; michael@0: int32_t urulesLength = 0; michael@0: int32_t i = 0; michael@0: int32_t dirlen = 0; michael@0: int32_t filelen = 0; michael@0: struct SResource* root; michael@0: struct SResource* collations; michael@0: struct SResource* collation; michael@0: struct SResource* sequence; michael@0: michael@0: memcpy(filename, locale, localeLength); michael@0: for(i = 0; i < localeLength; i++){ michael@0: if(filename[i] == '-'){ michael@0: filename[i] = '_'; michael@0: } michael@0: } michael@0: filename[localeLength] = '.'; michael@0: filename[localeLength+1] = 't'; michael@0: filename[localeLength+2] = 'x'; michael@0: filename[localeLength+3] = 't'; michael@0: filename[localeLength+4] = 0; michael@0: michael@0: michael@0: if (status==NULL || U_FAILURE(*status)) { michael@0: return NULL; michael@0: } michael@0: if(filename==NULL){ michael@0: *status=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: }else{ michael@0: filelen = (int32_t)uprv_strlen(filename); michael@0: } michael@0: if(genrbdata->inputDir == NULL) { michael@0: const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); michael@0: openFileName = (char *) uprv_malloc(dirlen + filelen + 2); michael@0: openFileName[0] = '\0'; michael@0: if (filenameBegin != NULL) { michael@0: /* michael@0: * When a filename ../../../data/root.txt is specified, michael@0: * we presume that the input directory is ../../../data michael@0: * This is very important when the resource file includes michael@0: * another file, like UCARules.txt or thaidict.brk. michael@0: */ michael@0: int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); michael@0: inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize); michael@0: michael@0: /* test for NULL */ michael@0: if(inputDirBuf == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: goto finish; michael@0: } michael@0: michael@0: inputDirBuf[filenameSize - 1] = 0; michael@0: genrbdata->inputDir = inputDirBuf; michael@0: dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); michael@0: } michael@0: }else{ michael@0: dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); michael@0: michael@0: if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { michael@0: openFileName = (char *) uprv_malloc(dirlen + filelen + 2); michael@0: michael@0: /* test for NULL */ michael@0: if(openFileName == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: goto finish; michael@0: } michael@0: michael@0: openFileName[0] = '\0'; michael@0: /* michael@0: * append the input dir to openFileName if the first char in michael@0: * filename is not file seperation char and the last char input directory is not '.'. michael@0: * This is to support : michael@0: * genrb -s. /home/icu/data michael@0: * genrb -s. icu/data michael@0: * The user cannot mix notations like michael@0: * genrb -s. /icu/data --- the absolute path specified. -s redundant michael@0: * user should use michael@0: * genrb -s. icu/data --- start from CWD and look in icu/data dir michael@0: */ michael@0: if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){ michael@0: uprv_strcpy(openFileName, genrbdata->inputDir); michael@0: openFileName[dirlen] = U_FILE_SEP_CHAR; michael@0: } michael@0: openFileName[dirlen + 1] = '\0'; michael@0: } else { michael@0: openFileName = (char *) uprv_malloc(dirlen + filelen + 1); michael@0: michael@0: /* test for NULL */ michael@0: if(openFileName == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: goto finish; michael@0: } michael@0: michael@0: uprv_strcpy(openFileName, genrbdata->inputDir); michael@0: michael@0: } michael@0: } michael@0: uprv_strcat(openFileName, filename); michael@0: /* printf("%s\n", openFileName); */ michael@0: *status = U_ZERO_ERROR; michael@0: ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); michael@0: michael@0: if(*status == U_FILE_ACCESS_ERROR) { michael@0: michael@0: fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName); michael@0: goto finish; michael@0: } michael@0: if (ucbuf == NULL || U_FAILURE(*status)) { michael@0: fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status)); michael@0: goto finish; michael@0: } michael@0: michael@0: /* Parse the data into an SRBRoot */ michael@0: data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, FALSE, status); michael@0: michael@0: root = data->fRoot; michael@0: collations = resLookup(root, "collations"); michael@0: if (collations != NULL) { michael@0: collation = resLookup(collations, type); michael@0: if (collation != NULL) { michael@0: sequence = resLookup(collation, "Sequence"); michael@0: if (sequence != NULL) { michael@0: urules = sequence->u.fString.fChars; michael@0: urulesLength = sequence->u.fString.fLength; michael@0: *pLength = urulesLength; michael@0: } michael@0: } michael@0: } michael@0: michael@0: finish: michael@0: if (inputDirBuf != NULL) { michael@0: uprv_free(inputDirBuf); michael@0: } michael@0: michael@0: if (openFileName != NULL) { michael@0: uprv_free(openFileName); michael@0: } michael@0: michael@0: if(ucbuf) { michael@0: ucbuf_close(ucbuf); michael@0: } michael@0: michael@0: return urules; michael@0: } michael@0: michael@0: // Quick-and-dirty escaping function. michael@0: // Assumes that we are on an ASCII-based platform. michael@0: static void michael@0: escape(const UChar *s, char *buffer) { michael@0: int32_t length = u_strlen(s); michael@0: int32_t i = 0; michael@0: for (;;) { michael@0: UChar32 c; michael@0: U16_NEXT(s, i, length, c); michael@0: if (c == 0) { michael@0: *buffer = 0; michael@0: return; michael@0: } else if (0x20 <= c && c <= 0x7e) { michael@0: // printable ASCII michael@0: *buffer++ = (char)c; // assumes ASCII-based platform michael@0: } else { michael@0: buffer += sprintf(buffer, "\\u%04X", (int)c); michael@0: } michael@0: } michael@0: } michael@0: michael@0: static struct SResource * michael@0: addCollation(ParseState* state, struct SResource *result, uint32_t startline, UErrorCode *status) michael@0: { michael@0: struct SResource *member = NULL; michael@0: struct UString *tokenValue; michael@0: struct UString comment; michael@0: enum ETokenType token; michael@0: char subtag[1024]; michael@0: UVersionInfo version; michael@0: uint32_t line; michael@0: GenrbData genrbdata; michael@0: /* '{' . (name resource)* '}' */ michael@0: version[0]=0; version[1]=0; version[2]=0; version[3]=0; michael@0: michael@0: for (;;) michael@0: { michael@0: ustr_init(&comment); michael@0: token = getToken(state, &tokenValue, &comment, &line, status); michael@0: michael@0: if (token == TOK_CLOSE_BRACE) michael@0: { michael@0: return result; michael@0: } michael@0: michael@0: if (token != TOK_STRING) michael@0: { michael@0: res_close(result); michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: michael@0: if (token == TOK_EOF) michael@0: { michael@0: error(startline, "unterminated table"); michael@0: } michael@0: else michael@0: { michael@0: error(line, "Unexpected token %s", tokenNames[token]); michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: member = parseResource(state, subtag, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: if (uprv_strcmp(subtag, "Version") == 0) michael@0: { michael@0: char ver[40]; michael@0: int32_t length = member->u.fString.fLength; michael@0: michael@0: if (length >= (int32_t) sizeof(ver)) michael@0: { michael@0: length = (int32_t) sizeof(ver) - 1; michael@0: } michael@0: michael@0: u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ michael@0: u_versionFromString(version, ver); michael@0: michael@0: table_add(result, member, line, status); michael@0: michael@0: } michael@0: else if (uprv_strcmp(subtag, "Override") == 0) michael@0: { michael@0: // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0); michael@0: table_add(result, member, line, status); michael@0: michael@0: } michael@0: else if(uprv_strcmp(subtag, "%%CollationBin")==0) michael@0: { michael@0: /* discard duplicate %%CollationBin if any*/ michael@0: } michael@0: else if (uprv_strcmp(subtag, "Sequence") == 0) michael@0: { michael@0: #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO michael@0: warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); michael@0: #else michael@0: if(state->makeBinaryCollation) { michael@0: michael@0: /* do the collation elements */ michael@0: int32_t len = 0; michael@0: uint8_t *data = NULL; michael@0: UCollator *coll = NULL; michael@0: int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)]; michael@0: int32_t reorderCodeCount; michael@0: int32_t reorderCodeIndex; michael@0: UParseError parseError; michael@0: michael@0: genrbdata.inputDir = state->inputdir; michael@0: genrbdata.outputDir = state->outputdir; michael@0: michael@0: UErrorCode intStatus = U_ZERO_ERROR; michael@0: uprv_memset(&parseError, 0, sizeof(parseError)); michael@0: coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength, michael@0: UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus); michael@0: michael@0: if (U_SUCCESS(intStatus) && coll != NULL) michael@0: { michael@0: len = ucol_cloneBinary(coll, NULL, 0, &intStatus); michael@0: data = (uint8_t *)uprv_malloc(len); michael@0: intStatus = U_ZERO_ERROR; michael@0: len = ucol_cloneBinary(coll, data, len, &intStatus); michael@0: michael@0: /* tailoring rules version */ michael@0: /* This is wrong! */ michael@0: /*coll->dataInfo.dataVersion[1] = version[0];*/ michael@0: /* Copy tailoring version. Builder version already */ michael@0: /* set in ucol_openRules */ michael@0: ((UCATableHeader *)data)->version[1] = version[0]; michael@0: ((UCATableHeader *)data)->version[2] = version[1]; michael@0: ((UCATableHeader *)data)->version[3] = version[2]; michael@0: michael@0: if (U_SUCCESS(intStatus) && data != NULL) michael@0: { michael@0: struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status); michael@0: table_add(result, collationBin, line, status); michael@0: uprv_free(data); michael@0: michael@0: reorderCodeCount = ucol_getReorderCodes( michael@0: coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); michael@0: if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { michael@0: struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status); michael@0: for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) { michael@0: intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status); michael@0: } michael@0: table_add(result, reorderCodeRes, line, status); michael@0: } michael@0: } michael@0: else michael@0: { michael@0: warning(line, "could not obtain rules from collator"); michael@0: if(isStrict()){ michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: ucol_close(coll); michael@0: } michael@0: else michael@0: { michael@0: if(intStatus == U_FILE_ACCESS_ERROR) { michael@0: error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly."); michael@0: *status = intStatus; michael@0: return NULL; michael@0: } michael@0: char preBuffer[100], postBuffer[100]; michael@0: escape(parseError.preContext, preBuffer); michael@0: escape(parseError.postContext, postBuffer); michael@0: warning(line, michael@0: "%%%%CollationBin could not be constructed from CollationElements\n" michael@0: " check context, check that the FractionalUCA.txt UCA version " michael@0: "matches the current UCD version\n" michael@0: " UErrorCode=%s UParseError={ line=%d offset=%d pre=<> post=<> }", michael@0: u_errorName(intStatus), michael@0: parseError.line, michael@0: parseError.offset, michael@0: preBuffer, michael@0: postBuffer); michael@0: if(isStrict()){ michael@0: *status = intStatus; michael@0: return NULL; michael@0: } michael@0: } michael@0: } else { michael@0: if(isVerbose()) { michael@0: printf("Not building Collation binary\n"); michael@0: } michael@0: } michael@0: #endif michael@0: /* in order to achieve smaller data files, we can direct genrb */ michael@0: /* to omit collation rules */ michael@0: if(state->omitCollationRules) { michael@0: bundle_closeString(state->bundle, member); michael@0: } else { michael@0: table_add(result, member, line, status); michael@0: } michael@0: } michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: // Reached the end without a TOK_CLOSE_BRACE. Should be an error. michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: struct SResource *member = NULL; michael@0: struct SResource *collationRes = NULL; michael@0: struct UString *tokenValue; michael@0: struct UString comment; michael@0: enum ETokenType token; michael@0: char subtag[1024], typeKeyword[1024]; michael@0: uint32_t line; michael@0: michael@0: result = table_open(state->bundle, tag, NULL, status); michael@0: michael@0: if (result == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: if(isVerbose()){ michael@0: printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: if(!newCollation) { michael@0: return addCollation(state, result, startline, status); michael@0: } michael@0: else { michael@0: for(;;) { michael@0: ustr_init(&comment); michael@0: token = getToken(state, &tokenValue, &comment, &line, status); michael@0: michael@0: if (token == TOK_CLOSE_BRACE) michael@0: { michael@0: return result; michael@0: } michael@0: michael@0: if (token != TOK_STRING) michael@0: { michael@0: res_close(result); michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: michael@0: if (token == TOK_EOF) michael@0: { michael@0: error(startline, "unterminated table"); michael@0: } michael@0: else michael@0: { michael@0: error(line, "Unexpected token %s", tokenNames[token]); michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: if (uprv_strcmp(subtag, "default") == 0) michael@0: { michael@0: member = parseResource(state, subtag, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: table_add(result, member, line, status); michael@0: } michael@0: else michael@0: { michael@0: token = peekToken(state, 0, &tokenValue, &line, &comment, status); michael@0: /* this probably needs to be refactored or recursively use the parser */ michael@0: /* first we assume that our collation table won't have the explicit type */ michael@0: /* then, we cannot handle aliases */ michael@0: if(token == TOK_OPEN_BRACE) { michael@0: token = getToken(state, &tokenValue, &comment, &line, status); michael@0: collationRes = table_open(state->bundle, subtag, NULL, status); michael@0: collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ michael@0: if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) { michael@0: table_add(result, collationRes, startline, status); michael@0: } michael@0: } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ michael@0: /* we could have a table too */ michael@0: token = peekToken(state, 1, &tokenValue, &line, &comment, status); michael@0: u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); michael@0: if(uprv_strcmp(typeKeyword, "alias") == 0) { michael@0: member = parseResource(state, subtag, NULL, status); michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: table_add(result, member, line, status); michael@0: } else { michael@0: res_close(result); michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return NULL; michael@0: } michael@0: } else { michael@0: res_close(result); michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ michael@0: michael@0: /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Necessary, because CollationElements requires the bundle->fRoot member to be present which, michael@0: if this weren't special-cased, wouldn't be set until the entire file had been processed. */ michael@0: static struct SResource * michael@0: realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) michael@0: { michael@0: struct SResource *member = NULL; michael@0: struct UString *tokenValue=NULL; michael@0: struct UString comment; michael@0: enum ETokenType token; michael@0: char subtag[1024]; michael@0: uint32_t line; michael@0: UBool readToken = FALSE; michael@0: michael@0: /* '{' . (name resource)* '}' */ michael@0: michael@0: if(isVerbose()){ michael@0: printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: for (;;) michael@0: { michael@0: ustr_init(&comment); michael@0: token = getToken(state, &tokenValue, &comment, &line, status); michael@0: michael@0: if (token == TOK_CLOSE_BRACE) michael@0: { michael@0: if (!readToken) { michael@0: warning(startline, "Encountered empty table"); michael@0: } michael@0: return table; michael@0: } michael@0: michael@0: if (token != TOK_STRING) michael@0: { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: michael@0: if (token == TOK_EOF) michael@0: { michael@0: error(startline, "unterminated table"); michael@0: } michael@0: else michael@0: { michael@0: error(line, "unexpected token %s", tokenNames[token]); michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: if(uprv_isInvariantUString(tokenValue->fChars, -1)) { michael@0: u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); michael@0: } else { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(line, "invariant characters required for table keys"); michael@0: return NULL; michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); michael@0: return NULL; michael@0: } michael@0: michael@0: member = parseResource(state, subtag, &comment, status); michael@0: michael@0: if (member == NULL || U_FAILURE(*status)) michael@0: { michael@0: error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); michael@0: return NULL; michael@0: } michael@0: michael@0: table_add(table, member, line, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); michael@0: return NULL; michael@0: } michael@0: readToken = TRUE; michael@0: ustr_deinit(&comment); michael@0: } michael@0: michael@0: /* not reached */ michael@0: /* A compiler warning will appear if all paths don't contain a return statement. */ michael@0: /* *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return NULL;*/ michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result; michael@0: michael@0: if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) michael@0: { michael@0: return parseCollationElements(state, tag, startline, FALSE, status); michael@0: } michael@0: if (tag != NULL && uprv_strcmp(tag, "collations") == 0) michael@0: { michael@0: return parseCollationElements(state, tag, startline, TRUE, status); michael@0: } michael@0: if(isVerbose()){ michael@0: printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: result = table_open(state->bundle, tag, comment, status); michael@0: michael@0: if (result == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: return realParseTable(state, result, tag, startline, status); michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: struct SResource *member = NULL; michael@0: struct UString *tokenValue; michael@0: struct UString memberComments; michael@0: enum ETokenType token; michael@0: UBool readToken = FALSE; michael@0: michael@0: result = array_open(state->bundle, tag, comment, status); michael@0: michael@0: if (result == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: if(isVerbose()){ michael@0: printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: ustr_init(&memberComments); michael@0: michael@0: /* '{' . resource [','] '}' */ michael@0: for (;;) michael@0: { michael@0: /* reset length */ michael@0: ustr_setlen(&memberComments, 0, status); michael@0: michael@0: /* check for end of array, but don't consume next token unless it really is the end */ michael@0: token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status); michael@0: michael@0: michael@0: if (token == TOK_CLOSE_BRACE) michael@0: { michael@0: getToken(state, NULL, NULL, NULL, status); michael@0: if (!readToken) { michael@0: warning(startline, "Encountered empty array"); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: if (token == TOK_EOF) michael@0: { michael@0: res_close(result); michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(startline, "unterminated array"); michael@0: return NULL; michael@0: } michael@0: michael@0: /* string arrays are a special case */ michael@0: if (token == TOK_STRING) michael@0: { michael@0: getToken(state, &tokenValue, &memberComments, NULL, status); michael@0: member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); michael@0: } michael@0: else michael@0: { michael@0: member = parseResource(state, NULL, &memberComments, status); michael@0: } michael@0: michael@0: if (member == NULL || U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: array_add(result, member, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: /* eat optional comma if present */ michael@0: token = peekToken(state, 0, NULL, NULL, NULL, status); michael@0: michael@0: if (token == TOK_COMMA) michael@0: { michael@0: getToken(state, NULL, NULL, NULL, status); michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: readToken = TRUE; michael@0: } michael@0: michael@0: ustr_deinit(&memberComments); michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: enum ETokenType token; michael@0: char *string; michael@0: int32_t value; michael@0: UBool readToken = FALSE; michael@0: char *stopstring; michael@0: uint32_t len; michael@0: struct UString memberComments; michael@0: michael@0: result = intvector_open(state->bundle, tag, comment, status); michael@0: michael@0: if (result == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: if(isVerbose()){ michael@0: printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: ustr_init(&memberComments); michael@0: /* '{' . string [','] '}' */ michael@0: for (;;) michael@0: { michael@0: ustr_setlen(&memberComments, 0, status); michael@0: michael@0: /* check for end of array, but don't consume next token unless it really is the end */ michael@0: token = peekToken(state, 0, NULL, NULL,&memberComments, status); michael@0: michael@0: if (token == TOK_CLOSE_BRACE) michael@0: { michael@0: /* it's the end, consume the close brace */ michael@0: getToken(state, NULL, NULL, NULL, status); michael@0: if (!readToken) { michael@0: warning(startline, "Encountered empty int vector"); michael@0: } michael@0: ustr_deinit(&memberComments); michael@0: return result; michael@0: } michael@0: michael@0: string = getInvariantString(state, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: /* For handling illegal char in the Intvector */ michael@0: value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ michael@0: len=(uint32_t)(stopstring-string); michael@0: michael@0: if(len==uprv_strlen(string)) michael@0: { michael@0: intvector_add(result, value, status); michael@0: uprv_free(string); michael@0: token = peekToken(state, 0, NULL, NULL, NULL, status); michael@0: } michael@0: else michael@0: { michael@0: uprv_free(string); michael@0: *status=U_INVALID_CHAR_FOUND; michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: res_close(result); michael@0: return NULL; michael@0: } michael@0: michael@0: /* the comma is optional (even though it is required to prevent the reader from concatenating michael@0: consecutive entries) so that a missing comma on the last entry isn't an error */ michael@0: if (token == TOK_COMMA) michael@0: { michael@0: getToken(state, NULL, NULL, NULL, status); michael@0: } michael@0: readToken = TRUE; michael@0: } michael@0: michael@0: /* not reached */ michael@0: /* A compiler warning will appear if all paths don't contain a return statement. */ michael@0: /* intvector_close(result, status); michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: return NULL;*/ michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: uint8_t *value; michael@0: char *string; michael@0: char toConv[3] = {'\0', '\0', '\0'}; michael@0: uint32_t count; michael@0: uint32_t i; michael@0: uint32_t line; michael@0: char *stopstring; michael@0: uint32_t len; michael@0: michael@0: string = getInvariantString(state, &line, NULL, status); michael@0: michael@0: if (string == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: uprv_free(string); michael@0: return NULL; michael@0: } michael@0: michael@0: if(isVerbose()){ michael@0: printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: count = (uint32_t)uprv_strlen(string); michael@0: if (count > 0){ michael@0: if((count % 2)==0){ michael@0: value = static_cast(uprv_malloc(sizeof(uint8_t) * count)); michael@0: michael@0: if (value == NULL) michael@0: { michael@0: uprv_free(string); michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: for (i = 0; i < count; i += 2) michael@0: { michael@0: toConv[0] = string[i]; michael@0: toConv[1] = string[i + 1]; michael@0: michael@0: value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); michael@0: len=(uint32_t)(stopstring-toConv); michael@0: michael@0: if(len!=uprv_strlen(toConv)) michael@0: { michael@0: uprv_free(string); michael@0: *status=U_INVALID_CHAR_FOUND; michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status); michael@0: michael@0: uprv_free(value); michael@0: } michael@0: else michael@0: { michael@0: *status = U_INVALID_CHAR_FOUND; michael@0: uprv_free(string); michael@0: error(line, "Encountered invalid binary string"); michael@0: return NULL; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: result = bin_open(state->bundle, tag, 0, NULL, "",comment,status); michael@0: warning(startline, "Encountered empty binary tag"); michael@0: } michael@0: uprv_free(string); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result = NULL; michael@0: int32_t value; michael@0: char *string; michael@0: char *stopstring; michael@0: uint32_t len; michael@0: michael@0: string = getInvariantString(state, NULL, NULL, status); michael@0: michael@0: if (string == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: uprv_free(string); michael@0: return NULL; michael@0: } michael@0: michael@0: if(isVerbose()){ michael@0: printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: if (uprv_strlen(string) <= 0) michael@0: { michael@0: warning(startline, "Encountered empty integer. Default value is 0."); michael@0: } michael@0: michael@0: /* Allow integer support for hexdecimal, octal digit and decimal*/ michael@0: /* and handle illegal char in the integer*/ michael@0: value = uprv_strtoul(string, &stopstring, 0); michael@0: len=(uint32_t)(stopstring-string); michael@0: if(len==uprv_strlen(string)) michael@0: { michael@0: result = int_open(state->bundle, tag, value, comment, status); michael@0: } michael@0: else michael@0: { michael@0: *status=U_INVALID_CHAR_FOUND; michael@0: } michael@0: uprv_free(string); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result; michael@0: FileStream *file; michael@0: int32_t len; michael@0: uint8_t *data; michael@0: char *filename; michael@0: uint32_t line; michael@0: char *fullname = NULL; michael@0: filename = getInvariantString(state, &line, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: uprv_free(filename); michael@0: return NULL; michael@0: } michael@0: michael@0: if(isVerbose()){ michael@0: printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: /* Open the input file for reading */ michael@0: if (state->inputdir == NULL) michael@0: { michael@0: #if 1 michael@0: /* michael@0: * Always save file file name, even if there's michael@0: * no input directory specified. MIGHT BREAK SOMETHING michael@0: */ michael@0: int32_t filenameLength = uprv_strlen(filename); michael@0: michael@0: fullname = (char *) uprv_malloc(filenameLength + 1); michael@0: uprv_strcpy(fullname, filename); michael@0: #endif michael@0: michael@0: file = T_FileStream_open(filename, "rb"); michael@0: } michael@0: else michael@0: { michael@0: michael@0: int32_t count = (int32_t)uprv_strlen(filename); michael@0: michael@0: if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) michael@0: { michael@0: fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); michael@0: michael@0: /* test for NULL */ michael@0: if(fullname == NULL) michael@0: { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: uprv_strcpy(fullname, state->inputdir); michael@0: michael@0: fullname[state->inputdirLength] = U_FILE_SEP_CHAR; michael@0: fullname[state->inputdirLength + 1] = '\0'; michael@0: michael@0: uprv_strcat(fullname, filename); michael@0: } michael@0: else michael@0: { michael@0: fullname = (char *) uprv_malloc(state->inputdirLength + count + 1); michael@0: michael@0: /* test for NULL */ michael@0: if(fullname == NULL) michael@0: { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: uprv_strcpy(fullname, state->inputdir); michael@0: uprv_strcat(fullname, filename); michael@0: } michael@0: michael@0: file = T_FileStream_open(fullname, "rb"); michael@0: michael@0: } michael@0: michael@0: if (file == NULL) michael@0: { michael@0: error(line, "couldn't open input file %s", filename); michael@0: *status = U_FILE_ACCESS_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: len = T_FileStream_size(file); michael@0: data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); michael@0: /* test for NULL */ michael@0: if(data == NULL) michael@0: { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: T_FileStream_close (file); michael@0: return NULL; michael@0: } michael@0: michael@0: /* int32_t numRead = */ T_FileStream_read (file, data, len); michael@0: T_FileStream_close (file); michael@0: michael@0: result = bin_open(state->bundle, tag, len, data, fullname, comment, status); michael@0: michael@0: uprv_free(data); michael@0: uprv_free(filename); michael@0: uprv_free(fullname); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: static struct SResource * michael@0: parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) michael@0: { michael@0: struct SResource *result; michael@0: int32_t len=0; michael@0: char *filename; michael@0: uint32_t line; michael@0: UChar *pTarget = NULL; michael@0: michael@0: UCHARBUF *ucbuf; michael@0: char *fullname = NULL; michael@0: int32_t count = 0; michael@0: const char* cp = NULL; michael@0: const UChar* uBuffer = NULL; michael@0: michael@0: filename = getInvariantString(state, &line, NULL, status); michael@0: count = (int32_t)uprv_strlen(filename); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: uprv_free(filename); michael@0: return NULL; michael@0: } michael@0: michael@0: if(isVerbose()){ michael@0: printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: fullname = (char *) uprv_malloc(state->inputdirLength + count + 2); michael@0: /* test for NULL */ michael@0: if(fullname == NULL) michael@0: { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(filename); michael@0: return NULL; michael@0: } michael@0: michael@0: if(state->inputdir!=NULL){ michael@0: if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR) michael@0: { michael@0: michael@0: uprv_strcpy(fullname, state->inputdir); michael@0: michael@0: fullname[state->inputdirLength] = U_FILE_SEP_CHAR; michael@0: fullname[state->inputdirLength + 1] = '\0'; michael@0: michael@0: uprv_strcat(fullname, filename); michael@0: } michael@0: else michael@0: { michael@0: uprv_strcpy(fullname, state->inputdir); michael@0: uprv_strcat(fullname, filename); michael@0: } michael@0: }else{ michael@0: uprv_strcpy(fullname,filename); michael@0: } michael@0: michael@0: ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: error(line, "couldn't open input file %s\n", filename); michael@0: return NULL; michael@0: } michael@0: michael@0: uBuffer = ucbuf_getBuffer(ucbuf,&len,status); michael@0: result = string_open(state->bundle, tag, uBuffer, len, comment, status); michael@0: michael@0: ucbuf_close(ucbuf); michael@0: michael@0: uprv_free(pTarget); michael@0: michael@0: uprv_free(filename); michael@0: uprv_free(fullname); michael@0: michael@0: return result; michael@0: } michael@0: michael@0: michael@0: michael@0: michael@0: michael@0: U_STRING_DECL(k_type_string, "string", 6); michael@0: U_STRING_DECL(k_type_binary, "binary", 6); michael@0: U_STRING_DECL(k_type_bin, "bin", 3); michael@0: U_STRING_DECL(k_type_table, "table", 5); michael@0: U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); michael@0: U_STRING_DECL(k_type_int, "int", 3); michael@0: U_STRING_DECL(k_type_integer, "integer", 7); michael@0: U_STRING_DECL(k_type_array, "array", 5); michael@0: U_STRING_DECL(k_type_alias, "alias", 5); michael@0: U_STRING_DECL(k_type_intvector, "intvector", 9); michael@0: U_STRING_DECL(k_type_import, "import", 6); michael@0: U_STRING_DECL(k_type_include, "include", 7); michael@0: michael@0: /* Various non-standard processing plugins that create one or more special resources. */ michael@0: U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); michael@0: U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); michael@0: U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); michael@0: U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); michael@0: michael@0: typedef enum EResourceType michael@0: { michael@0: RT_UNKNOWN, michael@0: RT_STRING, michael@0: RT_BINARY, michael@0: RT_TABLE, michael@0: RT_TABLE_NO_FALLBACK, michael@0: RT_INTEGER, michael@0: RT_ARRAY, michael@0: RT_ALIAS, michael@0: RT_INTVECTOR, michael@0: RT_IMPORT, michael@0: RT_INCLUDE, michael@0: RT_PROCESS_UCA_RULES, michael@0: RT_PROCESS_COLLATION, michael@0: RT_PROCESS_TRANSLITERATOR, michael@0: RT_PROCESS_DEPENDENCY, michael@0: RT_RESERVED michael@0: } EResourceType; michael@0: michael@0: static struct { michael@0: const char *nameChars; /* only used for debugging */ michael@0: const UChar *nameUChars; michael@0: ParseResourceFunction *parseFunction; michael@0: } gResourceTypes[] = { michael@0: {"Unknown", NULL, NULL}, michael@0: {"string", k_type_string, parseString}, michael@0: {"binary", k_type_binary, parseBinary}, michael@0: {"table", k_type_table, parseTable}, michael@0: {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ michael@0: {"integer", k_type_integer, parseInteger}, michael@0: {"array", k_type_array, parseArray}, michael@0: {"alias", k_type_alias, parseAlias}, michael@0: {"intvector", k_type_intvector, parseIntVector}, michael@0: {"import", k_type_import, parseImport}, michael@0: {"include", k_type_include, parseInclude}, michael@0: {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, michael@0: {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, michael@0: {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, michael@0: {"process(dependency)", k_type_plugin_dependency, parseDependency}, michael@0: {"reserved", NULL, NULL} michael@0: }; michael@0: michael@0: void initParser() michael@0: { michael@0: U_STRING_INIT(k_type_string, "string", 6); michael@0: U_STRING_INIT(k_type_binary, "binary", 6); michael@0: U_STRING_INIT(k_type_bin, "bin", 3); michael@0: U_STRING_INIT(k_type_table, "table", 5); michael@0: U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); michael@0: U_STRING_INIT(k_type_int, "int", 3); michael@0: U_STRING_INIT(k_type_integer, "integer", 7); michael@0: U_STRING_INIT(k_type_array, "array", 5); michael@0: U_STRING_INIT(k_type_alias, "alias", 5); michael@0: U_STRING_INIT(k_type_intvector, "intvector", 9); michael@0: U_STRING_INIT(k_type_import, "import", 6); michael@0: U_STRING_INIT(k_type_include, "include", 7); michael@0: michael@0: U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); michael@0: U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); michael@0: U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); michael@0: U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); michael@0: } michael@0: michael@0: static inline UBool isTable(enum EResourceType type) { michael@0: return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); michael@0: } michael@0: michael@0: static enum EResourceType michael@0: parseResourceType(ParseState* state, UErrorCode *status) michael@0: { michael@0: struct UString *tokenValue; michael@0: struct UString comment; michael@0: enum EResourceType result = RT_UNKNOWN; michael@0: uint32_t line=0; michael@0: ustr_init(&comment); michael@0: expect(state, TOK_STRING, &tokenValue, &comment, &line, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return RT_UNKNOWN; michael@0: } michael@0: michael@0: *status = U_ZERO_ERROR; michael@0: michael@0: /* Search for normal types */ michael@0: result=RT_UNKNOWN; michael@0: while ((result=(EResourceType)(result+1)) < RT_RESERVED) { michael@0: if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { michael@0: break; michael@0: } michael@0: } michael@0: /* Now search for the aliases */ michael@0: if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { michael@0: result = RT_INTEGER; michael@0: } michael@0: else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { michael@0: result = RT_BINARY; michael@0: } michael@0: else if (result == RT_RESERVED) { michael@0: char tokenBuffer[1024]; michael@0: u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); michael@0: tokenBuffer[sizeof(tokenBuffer) - 1] = 0; michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(line, "unknown resource type '%s'", tokenBuffer); michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /* parse a non-top-level resource */ michael@0: static struct SResource * michael@0: parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status) michael@0: { michael@0: enum ETokenType token; michael@0: enum EResourceType resType = RT_UNKNOWN; michael@0: ParseResourceFunction *parseFunction = NULL; michael@0: struct UString *tokenValue; michael@0: uint32_t startline; michael@0: uint32_t line; michael@0: michael@0: michael@0: token = getToken(state, &tokenValue, NULL, &startline, status); michael@0: michael@0: if(isVerbose()){ michael@0: printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); michael@0: } michael@0: michael@0: /* name . [ ':' type ] '{' resource '}' */ michael@0: /* This function parses from the colon onwards. If the colon is present, parse the michael@0: type then try to parse a resource of that type. If there is no explicit type, michael@0: work it out using the lookahead tokens. */ michael@0: switch (token) michael@0: { michael@0: case TOK_EOF: michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(startline, "Unexpected EOF encountered"); michael@0: return NULL; michael@0: michael@0: case TOK_ERROR: michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return NULL; michael@0: michael@0: case TOK_COLON: michael@0: resType = parseResourceType(state, status); michael@0: expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: break; michael@0: michael@0: case TOK_OPEN_BRACE: michael@0: break; michael@0: michael@0: default: michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(startline, "syntax error while reading a resource, expected '{' or ':'"); michael@0: return NULL; michael@0: } michael@0: michael@0: michael@0: if (resType == RT_UNKNOWN) michael@0: { michael@0: /* No explicit type, so try to work it out. At this point, we've read the first '{'. michael@0: We could have any of the following: michael@0: { { => array (nested) michael@0: { :/} => array michael@0: { string , => string array michael@0: michael@0: { string { => table michael@0: michael@0: { string :/{ => table michael@0: { string } => string michael@0: */ michael@0: michael@0: token = peekToken(state, 0, NULL, &line, NULL,status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) michael@0: { michael@0: resType = RT_ARRAY; michael@0: } michael@0: else if (token == TOK_STRING) michael@0: { michael@0: token = peekToken(state, 1, NULL, &line, NULL, status); michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: switch (token) michael@0: { michael@0: case TOK_COMMA: resType = RT_ARRAY; break; michael@0: case TOK_OPEN_BRACE: resType = RT_TABLE; break; michael@0: case TOK_CLOSE_BRACE: resType = RT_STRING; break; michael@0: case TOK_COLON: resType = RT_TABLE; break; michael@0: default: michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(line, "Unexpected token after string, expected ',', '{' or '}'"); michael@0: return NULL; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(line, "Unexpected token after '{'"); michael@0: return NULL; michael@0: } michael@0: michael@0: /* printf("Type guessed as %s\n", resourceNames[resType]); */ michael@0: } else if(resType == RT_TABLE_NO_FALLBACK) { michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); michael@0: return NULL; michael@0: } michael@0: michael@0: michael@0: /* We should now know what we need to parse next, so call the appropriate parser michael@0: function and return. */ michael@0: parseFunction = gResourceTypes[resType].parseFunction; michael@0: if (parseFunction != NULL) { michael@0: return parseFunction(state, tag, startline, comment, status); michael@0: } michael@0: else { michael@0: *status = U_INTERNAL_PROGRAM_ERROR; michael@0: error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: /* parse the top-level resource */ michael@0: struct SRBRoot * michael@0: parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, michael@0: UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) michael@0: { michael@0: struct UString *tokenValue; michael@0: struct UString comment; michael@0: uint32_t line; michael@0: enum EResourceType bundleType; michael@0: enum ETokenType token; michael@0: ParseState state; michael@0: uint32_t i; michael@0: michael@0: michael@0: for (i = 0; i < MAX_LOOKAHEAD + 1; i++) michael@0: { michael@0: ustr_init(&state.lookahead[i].value); michael@0: ustr_init(&state.lookahead[i].comment); michael@0: } michael@0: michael@0: initLookahead(&state, buf, status); michael@0: michael@0: state.inputdir = inputDir; michael@0: state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0; michael@0: state.outputdir = outputDir; michael@0: state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0; michael@0: state.makeBinaryCollation = makeBinaryCollation; michael@0: state.omitCollationRules = omitCollationRules; michael@0: michael@0: ustr_init(&comment); michael@0: expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); michael@0: michael@0: state.bundle = bundle_open(&comment, FALSE, status); michael@0: michael@0: if (state.bundle == NULL || U_FAILURE(*status)) michael@0: { michael@0: return NULL; michael@0: } michael@0: michael@0: michael@0: bundle_setlocale(state.bundle, tokenValue->fChars, status); michael@0: michael@0: /* The following code is to make Empty bundle work no matter with :table specifer or not */ michael@0: token = getToken(&state, NULL, NULL, &line, status); michael@0: if(token==TOK_COLON) { michael@0: *status=U_ZERO_ERROR; michael@0: bundleType=parseResourceType(&state, status); michael@0: michael@0: if(isTable(bundleType)) michael@0: { michael@0: expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status); michael@0: } michael@0: else michael@0: { michael@0: *status=U_PARSE_ERROR; michael@0: error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); michael@0: } michael@0: } michael@0: else michael@0: { michael@0: /* not a colon */ michael@0: if(token==TOK_OPEN_BRACE) michael@0: { michael@0: *status=U_ZERO_ERROR; michael@0: bundleType=RT_TABLE; michael@0: } michael@0: else michael@0: { michael@0: /* neither colon nor open brace */ michael@0: *status=U_PARSE_ERROR; michael@0: bundleType=RT_UNKNOWN; michael@0: error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); michael@0: } michael@0: } michael@0: michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: bundle_close(state.bundle, status); michael@0: return NULL; michael@0: } michael@0: michael@0: if(bundleType==RT_TABLE_NO_FALLBACK) { michael@0: /* michael@0: * Parse a top-level table with the table(nofallback) declaration. michael@0: * This is the same as a regular table, but also sets the michael@0: * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . michael@0: */ michael@0: state.bundle->noFallback=TRUE; michael@0: } michael@0: /* top-level tables need not handle special table names like "collations" */ michael@0: realParseTable(&state, state.bundle->fRoot, NULL, line, status); michael@0: if(dependencyArray!=NULL){ michael@0: table_add(state.bundle->fRoot, dependencyArray, 0, status); michael@0: dependencyArray = NULL; michael@0: } michael@0: if (U_FAILURE(*status)) michael@0: { michael@0: bundle_close(state.bundle, status); michael@0: res_close(dependencyArray); michael@0: return NULL; michael@0: } michael@0: michael@0: if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF) michael@0: { michael@0: warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); michael@0: if(isStrict()){ michael@0: *status = U_INVALID_FORMAT_ERROR; michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: cleanupLookahead(&state); michael@0: ustr_deinit(&comment); michael@0: return state.bundle; michael@0: }