intl/icu/source/tools/genrb/read.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 1998-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *
     9 * File read.c
    10 *
    11 * Modification History:
    12 *
    13 *   Date        Name        Description
    14 *   05/26/99    stephen     Creation.
    15 *   5/10/01     Ram         removed ustdio dependency
    16 *******************************************************************************
    17 */
    19 #include "read.h"
    20 #include "errmsg.h"
    21 #include "unicode/ustring.h"
    22 #include "unicode/utf16.h"
    24 #define OPENBRACE    0x007B
    25 #define CLOSEBRACE   0x007D
    26 #define COMMA        0x002C
    27 #define QUOTE        0x0022
    28 #define ESCAPE       0x005C
    29 #define SLASH        0x002F
    30 #define ASTERISK     0x002A
    31 #define SPACE        0x0020
    32 #define COLON        0x003A
    33 #define BADBOM       0xFFFE
    34 #define CR           0x000D
    35 #define LF           0x000A
    37 static int32_t lineCount;
    39 /* Protos */
    40 static enum ETokenType getStringToken(UCHARBUF *buf,
    41                                       UChar32 initialChar,
    42                                       struct UString *token,
    43                                       UErrorCode *status);
    45 static UChar32 getNextChar           (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
    46 static void    seekUntilNewline      (UCHARBUF *buf, struct UString *token, UErrorCode *status);
    47 static void    seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
    48 static UBool   isWhitespace          (UChar32 c);
    49 static UBool   isNewline             (UChar32 c);
    51 U_CFUNC void resetLineNumber() {
    52     lineCount = 1;
    53 }
    55 /* Read and return the next token from the stream.  If the token is of
    56    type eString, fill in the token parameter with the token.  If the
    57    token is eError, then the status parameter will contain the
    58    specific error.  This will be eItemNotFound at the end of file,
    59    indicating that all tokens have been returned.  This method will
    60    never return eString twice in a row; instead, multiple adjacent
    61    string tokens will be merged into one, with no intervening
    62    space. */
    63 U_CFUNC enum ETokenType
    64 getNextToken(UCHARBUF* buf,
    65              struct UString *token,
    66              uint32_t *linenumber, /* out: linenumber of token */
    67              struct UString *comment,
    68              UErrorCode *status) {
    69     enum ETokenType result;
    70     UChar32         c;
    72     if (U_FAILURE(*status)) {
    73         return TOK_ERROR;
    74     }
    76     /* Skip whitespace */
    77     c = getNextChar(buf, TRUE, comment, status);
    79     if (U_FAILURE(*status)) {
    80         return TOK_ERROR;
    81     }
    83     *linenumber = lineCount;
    85     switch(c) {
    86     case BADBOM:
    87         return TOK_ERROR;
    88     case OPENBRACE:
    89         return TOK_OPEN_BRACE;
    90     case CLOSEBRACE:
    91         return TOK_CLOSE_BRACE;
    92     case COMMA:
    93         return TOK_COMMA;
    94     case U_EOF:
    95         return TOK_EOF;
    96     case COLON:
    97         return TOK_COLON;
    99     default:
   100         result = getStringToken(buf, c, token, status);
   101     }
   103     *linenumber = lineCount;
   104     return result;
   105 }
   107 /* Copy a string token into the given UnicodeString.  Upon entry, we
   108    have already read the first character of the string token, which is
   109    not a whitespace character (but may be a QUOTE or ESCAPE). This
   110    function reads all subsequent characters that belong with this
   111    string, and copy them into the token parameter. The other
   112    important, and slightly convoluted purpose of this function is to
   113    merge adjacent strings.  It looks forward a bit, and if the next
   114    non comment, non whitespace item is a string, it reads it in as
   115    well.  If two adjacent strings are quoted, they are merged without
   116    intervening space.  Otherwise a single SPACE character is
   117    inserted. */
   118 static enum ETokenType getStringToken(UCHARBUF* buf,
   119                                       UChar32 initialChar,
   120                                       struct UString *token,
   121                                       UErrorCode *status) {
   122     UBool    lastStringWasQuoted;
   123     UChar32  c;
   124     UChar    target[3] = { '\0' };
   125     UChar    *pTarget   = target;
   126     int      len=0;
   127     UBool    isFollowingCharEscaped=FALSE;
   128     UBool    isNLUnescaped = FALSE;
   129     UChar32  prevC=0;
   131     /* We are guaranteed on entry that initialChar is not a whitespace
   132        character. If we are at the EOF, or have some other problem, it
   133        doesn't matter; we still want to validly return the initialChar
   134        (if nothing else) as a string token. */
   136     if (U_FAILURE(*status)) {
   137         return TOK_ERROR;
   138     }
   140     /* setup */
   141     lastStringWasQuoted = FALSE;
   142     c = initialChar;
   143     ustr_setlen(token, 0, status);
   145     if (U_FAILURE(*status)) {
   146         return TOK_ERROR;
   147     }
   149     for (;;) {
   150         if (c == QUOTE) {
   151             if (!lastStringWasQuoted && token->fLength > 0) {
   152                 ustr_ucat(token, SPACE, status);
   154                 if (U_FAILURE(*status)) {
   155                     return TOK_ERROR;
   156                 }
   157             }
   159             lastStringWasQuoted = TRUE;
   161             for (;;) {
   162                 c = ucbuf_getc(buf,status);
   164                 /* EOF reached */
   165                 if (c == U_EOF) {
   166                     return TOK_EOF;
   167                 }
   169                 /* Unterminated quoted strings */
   170                 if (U_FAILURE(*status)) {
   171                     return TOK_ERROR;
   172                 }
   174                 if (c == QUOTE && !isFollowingCharEscaped) {
   175                     break;
   176                 }
   178                 if (c == ESCAPE  && !isFollowingCharEscaped) {
   179                     pTarget = target;
   180                     c       = unescape(buf, status);
   182                     if (c == U_ERR) {
   183                         return TOK_ERROR;
   184                     }
   185                     if(c == CR || c == LF){
   186                         isNLUnescaped = TRUE;
   187                     }
   188                 }               
   190                 if(c==ESCAPE && !isFollowingCharEscaped){
   191                     isFollowingCharEscaped = TRUE;
   192                 }else{
   193                     U_APPEND_CHAR32(c, pTarget,len);
   194                     pTarget = target;
   195                     ustr_uscat(token, pTarget,len, status);
   196                     isFollowingCharEscaped = FALSE;
   197                     len=0;
   198                     if(c == CR || c == LF){
   199                         if(isNLUnescaped == FALSE && prevC!=CR){
   200                             lineCount++;
   201                         }
   202                         isNLUnescaped = FALSE;
   203                     }
   204                 }
   206                 if (U_FAILURE(*status)) {
   207                     return TOK_ERROR;
   208                 }
   209                 prevC = c;
   210             }
   211         } else {
   212             if (token->fLength > 0) {
   213                 ustr_ucat(token, SPACE, status);
   215                 if (U_FAILURE(*status)) {
   216                     return TOK_ERROR;
   217                 }
   218             }
   220             if(lastStringWasQuoted){
   221                 if(getShowWarning()){
   222                     warning(lineCount, "Mixing quoted and unquoted strings");
   223                 }
   224                 if(isStrict()){
   225                     return TOK_ERROR;
   226                 }
   228             }
   230             lastStringWasQuoted = FALSE;
   232             /* if we reach here we are mixing 
   233              * quoted and unquoted strings
   234              * warn in normal mode and error in
   235              * pedantic mode
   236              */
   238             if (c == ESCAPE) {
   239                 pTarget = target;
   240                 c       = unescape(buf, status);
   242                 /* EOF reached */
   243                 if (c == U_EOF) {
   244                     return TOK_ERROR;
   245                 }
   246             }
   248             U_APPEND_CHAR32(c, pTarget,len);
   249             pTarget = target;
   250             ustr_uscat(token, pTarget,len, status);
   251             len=0;
   253             if (U_FAILURE(*status)) {
   254                 return TOK_ERROR;
   255             }
   257             for (;;) {
   258                 /* DON'T skip whitespace */
   259                 c = getNextChar(buf, FALSE, NULL, status);
   261                 /* EOF reached */
   262                 if (c == U_EOF) {
   263                     ucbuf_ungetc(c, buf);
   264                     return TOK_STRING;
   265                 }
   267                 if (U_FAILURE(*status)) {
   268                     return TOK_STRING;
   269                 }
   271                 if (c == QUOTE
   272                         || c == OPENBRACE
   273                         || c == CLOSEBRACE
   274                         || c == COMMA
   275                         || c == COLON) {
   276                     ucbuf_ungetc(c, buf);
   277                     break;
   278                 }
   280                 if (isWhitespace(c)) {
   281                     break;
   282                 }
   284                 if (c == ESCAPE) {
   285                     pTarget = target;
   286                     c       = unescape(buf, status);
   288                     if (c == U_ERR) {
   289                         return TOK_ERROR;
   290                     }
   291                 }
   293                 U_APPEND_CHAR32(c, pTarget,len);
   294                 pTarget = target;
   295                 ustr_uscat(token, pTarget,len, status);
   296                 len=0;
   297                 if (U_FAILURE(*status)) {
   298                     return TOK_ERROR;
   299                 }
   300             }
   301         }
   303         /* DO skip whitespace */
   304         c = getNextChar(buf, TRUE, NULL, status);
   306         if (U_FAILURE(*status)) {
   307             return TOK_STRING;
   308         }
   310         if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
   311             ucbuf_ungetc(c, buf);
   312             return TOK_STRING;
   313         }
   314     }
   315 }
   317 /* Retrieve the next character.  If skipwhite is
   318    true, whitespace is skipped as well. */
   319 static UChar32 getNextChar(UCHARBUF* buf,
   320                            UBool skipwhite,
   321                            struct UString *token,
   322                            UErrorCode *status) {
   323     UChar32 c, c2;
   325     if (U_FAILURE(*status)) {
   326         return U_EOF;
   327     }
   329     for (;;) {
   330         c = ucbuf_getc(buf,status);
   332         if (c == U_EOF) {
   333             return U_EOF;
   334         }
   336         if (skipwhite && isWhitespace(c)) {
   337             continue;
   338         }
   340         /* This also handles the get() failing case */
   341         if (c != SLASH) {
   342             return c;
   343         }
   345         c = ucbuf_getc(buf,status); /* "/c" */
   347         if (c == U_EOF) {
   348             return U_EOF;
   349         }
   351         switch (c) {
   352         case SLASH:  /* "//" */
   353             seekUntilNewline(buf, NULL, status);
   354             break;
   356         case ASTERISK:  /* " / * " */
   357             c2 = ucbuf_getc(buf, status); /* "/ * c" */
   358             if(c2 == ASTERISK){  /* "/ * *" */
   359                 /* parse multi-line comment and store it in token*/
   360                 seekUntilEndOfComment(buf, token, status);
   361             } else {
   362                 ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *".  Include c2  back in buffer.  */
   363                 seekUntilEndOfComment(buf, NULL, status);
   364             }
   365             break;
   367         default:
   368             ucbuf_ungetc(c, buf); /* "/c" - put back the c */
   369             /* If get() failed this is a NOP */
   370             return SLASH;
   371         }
   373     }
   374 }
   376 static void seekUntilNewline(UCHARBUF* buf,
   377                              struct UString *token,
   378                              UErrorCode *status) {
   379     UChar32 c;
   381     if (U_FAILURE(*status)) {
   382         return;
   383     }
   385     do {
   386         c = ucbuf_getc(buf,status);
   387         /* add the char to token */
   388         if(token!=NULL){
   389             ustr_u32cat(token, c, status);
   390         }
   391     } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
   392 }
   394 static void seekUntilEndOfComment(UCHARBUF *buf,
   395                                   struct UString *token,
   396                                   UErrorCode *status) {
   397     UChar32  c, d;
   398     uint32_t line;
   400     if (U_FAILURE(*status)) {
   401         return;
   402     }
   404     line = lineCount;
   406     do {
   407         c = ucbuf_getc(buf, status);
   409         if (c == ASTERISK) {
   410             d = ucbuf_getc(buf, status);
   412             if (d != SLASH) {
   413                 ucbuf_ungetc(d, buf);
   414             } else {
   415                 break;
   416             }
   417         }
   418         /* add the char to token */
   419         if(token!=NULL){
   420             ustr_u32cat(token, c, status);
   421         }
   422         /* increment the lineCount */
   423         isNewline(c);
   425     } while (c != U_EOF && *status == U_ZERO_ERROR);
   427     if (c == U_EOF) {
   428         *status = U_INVALID_FORMAT_ERROR;
   429         error(line, "unterminated comment detected");
   430     }
   431 }
   433 U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
   434     if (U_FAILURE(*status)) {
   435         return U_EOF;
   436     }
   438     /* We expect to be called after the ESCAPE has been seen, but
   439      * u_fgetcx needs an ESCAPE to do its magic. */
   440     ucbuf_ungetc(ESCAPE, buf);
   442     return ucbuf_getcx32(buf, status);
   443 }
   445 static UBool isWhitespace(UChar32 c) {
   446     switch (c) {
   447         /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
   448     case 0x000A:
   449     case 0x2029:
   450         lineCount++;
   451     case 0x000D:
   452     case 0x0020:
   453     case 0x0009:
   454     case 0xFEFF:
   455         return TRUE;
   457     default:
   458         return FALSE;
   459     }
   460 }
   462 static UBool isNewline(UChar32 c) {
   463     switch (c) {
   464         /* '\n', '\r', 0x2029 */
   465     case 0x000A:
   466     case 0x2029:
   467         lineCount++;
   468     case 0x000D:
   469         return TRUE;
   471     default:
   472         return FALSE;
   473     }
   474 }

mercurial