The Tor Browser: intl/icu/source/tools/toolutil/uparse.c@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*

     2 *******************************************************************************

3 *

     4 *   Copyright (C) 2000-2012, International Business Machines

     5 *   Corporation and others.  All Rights Reserved.

6 *

     7 *******************************************************************************

     8 *   file name:  uparse.c

     9 *   encoding:   US-ASCII

    10 *   tab size:   8 (not used)

    11 *   indentation:4

    12 *

    13 *   created on: 2000apr18

    14 *   created by: Markus W. Scherer

    15 *

    16 *   This file provides a parser for files that are delimited by one single

    17 *   character like ';' or TAB. Example: the Unicode Character Properties files

    18 *   like UnicodeData.txt are semicolon-delimited.

    19 */

    21 #include "unicode/utypes.h"

    22 #include "unicode/uchar.h"

    23 #include "unicode/ustring.h"

    24 #include "unicode/utf16.h"

    25 #include "cstring.h"

    26 #include "filestrm.h"

    27 #include "uparse.h"

    28 #include "ustr_imp.h"

    30 #include <stdio.h>

    32 U_CAPI const char * U_EXPORT2

    33 u_skipWhitespace(const char *s) {

    34     while(U_IS_INV_WHITESPACE(*s)) {

    35         ++s;

    36     }

    37     return s;

    38 }

    40 U_CAPI char * U_EXPORT2

    41 u_rtrim(char *s) {

    42     char *end=uprv_strchr(s, 0);

    43     while(s<end && U_IS_INV_WHITESPACE(*(end-1))) {

    44         *--end = 0;

    45     }

    46     return end;

    47 }

    49 /*

    50  * If the string starts with # @missing: then return the pointer to the

    51  * following non-whitespace character.

    52  * Otherwise return the original pointer.

    53  * Unicode 5.0 adds such lines in some data files to document

    54  * default property values.

    55  * Poor man's regex for variable amounts of white space.

    56  */

    57 static const char *

    58 getMissingLimit(const char *s) {

    59     const char *s0=s;

    60     if(

    61         *(s=u_skipWhitespace(s))=='#' &&

    62         *(s=u_skipWhitespace(s+1))=='@' &&

    63         0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) &&

    64         *(s=u_skipWhitespace(s+7))==':'

    65     ) {

    66         return u_skipWhitespace(s+1);

    67     } else {

    68         return s0;

    69     }

    70 }

    72 U_CAPI void U_EXPORT2

    73 u_parseDelimitedFile(const char *filename, char delimiter,

    74                      char *fields[][2], int32_t fieldCount,

    75                      UParseLineFn *lineFn, void *context,

    76                      UErrorCode *pErrorCode) {

    77     FileStream *file;

    78     char line[300];

    79     char *start, *limit;

    80     int32_t i, length;

    82     if(U_FAILURE(*pErrorCode)) {

    83         return;

    84     }

    86     if(fields==NULL || lineFn==NULL || fieldCount<=0) {

    87         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

    88         return;

    89     }

    91     if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {

    92         filename=NULL;

    93         file=T_FileStream_stdin();

    94     } else {

    95         file=T_FileStream_open(filename, "r");

    96     }

    97     if(file==NULL) {

    98         *pErrorCode=U_FILE_ACCESS_ERROR;

    99         return;

   100     }

   102     while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) {

   103         /* remove trailing newline characters */

   104         length=(int32_t)(u_rtrim(line)-line);

   106         /*

   107          * detect a line with # @missing:

   108          * start parsing after that, or else from the beginning of the line

   109          * set the default warning for @missing lines

   110          */

   111         start=(char *)getMissingLimit(line);

   112         if(start==line) {

   113             *pErrorCode=U_ZERO_ERROR;

   114         } else {

   115             *pErrorCode=U_USING_DEFAULT_WARNING;

   116         }

   118         /* skip this line if it is empty or a comment */

   119         if(*start==0 || *start=='#') {

   120             continue;

   121         }

   123         /* remove in-line comments */

   124         limit=uprv_strchr(start, '#');

   125         if(limit!=NULL) {

   126             /* get white space before the pound sign */

   127             while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) {

   128                 --limit;

   129             }

   131             /* truncate the line */

   132             *limit=0;

   133         }

   135         /* skip lines with only whitespace */

   136         if(u_skipWhitespace(start)[0]==0) {

   137             continue;

   138         }

   140         /* for each field, call the corresponding field function */

   141         for(i=0; i<fieldCount; ++i) {

   142             /* set the limit pointer of this field */

   143             limit=start;

   144             while(*limit!=delimiter && *limit!=0) {

   145                 ++limit;

   146             }

   148             /* set the field start and limit in the fields array */

   149             fields[i][0]=start;

   150             fields[i][1]=limit;

   152             /* set start to the beginning of the next field, if any */

   153             start=limit;

   154             if(*start!=0) {

   155                 ++start;

   156             } else if(i+1<fieldCount) {

   157                 *pErrorCode=U_PARSE_ERROR;

   158                 limit=line+length;

   159                 i=fieldCount;

   160                 break;

   161             }

   162         }

   164         /* error in a field function? */

   165         if(U_FAILURE(*pErrorCode)) {

   166             break;

   167         }

   169         /* call the field function */

   170         lineFn(context, fields, fieldCount, pErrorCode);

   171         if(U_FAILURE(*pErrorCode)) {

   172             break;

   173         }

   174     }

   176     if(filename!=NULL) {

   177         T_FileStream_close(file);

   178     }

   179 }

   181 /*

   182  * parse a list of code points

   183  * store them as a UTF-32 string in dest[destCapacity]

   184  * return the number of code points

   185  */

   186 U_CAPI int32_t U_EXPORT2

   187 u_parseCodePoints(const char *s,

   188                   uint32_t *dest, int32_t destCapacity,

   189                   UErrorCode *pErrorCode) {

   190     char *end;

   191     uint32_t value;

   192     int32_t count;

   194     if(U_FAILURE(*pErrorCode)) {

   195         return 0;

   196     }

   197     if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {

   198         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

   199         return 0;

   200     }

   202     count=0;

   203     for(;;) {

   204         s=u_skipWhitespace(s);

   205         if(*s==';' || *s==0) {

   206             return count;

   207         }

   209         /* read one code point */

   210         value=(uint32_t)uprv_strtoul(s, &end, 16);

   211         if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {

   212             *pErrorCode=U_PARSE_ERROR;

   213             return 0;

   214         }

   216         /* append it to the destination array */

   217         if(count<destCapacity) {

   218             dest[count++]=value;

   219         } else {

   220             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

   221         }

   223         /* go to the following characters */

   224         s=end;

   225     }

   226 }

   228 /*

   229  * parse a list of code points

   230  * store them as a string in dest[destCapacity]

   231  * set the first code point in *pFirst

   232  * @return The length of the string in numbers of UChars.

   233  */

   234 U_CAPI int32_t U_EXPORT2

   235 u_parseString(const char *s,

   236               UChar *dest, int32_t destCapacity,

   237               uint32_t *pFirst,

   238               UErrorCode *pErrorCode) {

   239     char *end;

   240     uint32_t value;

   241     int32_t destLength;

   243     if(U_FAILURE(*pErrorCode)) {

   244         return 0;

   245     }

   246     if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {

   247         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

   248         return 0;

   249     }

   251     if(pFirst!=NULL) {

   252         *pFirst=0xffffffff;

   253     }

   255     destLength=0;

   256     for(;;) {

   257         s=u_skipWhitespace(s);

   258         if(*s==';' || *s==0) {

   259             if(destLength<destCapacity) {

   260                 dest[destLength]=0;

   261             } else if(destLength==destCapacity) {

   262                 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;

   263             } else {

   264                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;

   265             }

   266             return destLength;

   267         }

   269         /* read one code point */

   270         value=(uint32_t)uprv_strtoul(s, &end, 16);

   271         if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {

   272             *pErrorCode=U_PARSE_ERROR;

   273             return 0;

   274         }

   276         /* store the first code point */

   277         if(pFirst!=NULL) {

   278             *pFirst=value;

   279             pFirst=NULL;

   280         }

   282         /* append it to the destination array */

   283         if((destLength+U16_LENGTH(value))<=destCapacity) {

   284             U16_APPEND_UNSAFE(dest, destLength, value);

   285         } else {

   286             destLength+=U16_LENGTH(value);

   287         }

   289         /* go to the following characters */

   290         s=end;

   291     }

   292 }

   294 /* read a range like start or start..end */

   295 U_CAPI int32_t U_EXPORT2

   296 u_parseCodePointRangeAnyTerminator(const char *s,

   297                                    uint32_t *pStart, uint32_t *pEnd,

   298                                    const char **terminator,

   299                                    UErrorCode *pErrorCode) {

   300     char *end;

   301     uint32_t value;

   303     if(U_FAILURE(*pErrorCode)) {

   304         return 0;

   305     }

   306     if(s==NULL || pStart==NULL || pEnd==NULL) {

   307         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

   308         return 0;

   309     }

   311     /* read the start code point */

   312     s=u_skipWhitespace(s);

   313     value=(uint32_t)uprv_strtoul(s, &end, 16);

   314     if(end<=s || value>=0x110000) {

   315         *pErrorCode=U_PARSE_ERROR;

   316         return 0;

   317     }

   318     *pStart=*pEnd=value;

   320     /* is there a "..end"? */

   321     s=u_skipWhitespace(end);

   322     if(*s!='.' || s[1]!='.') {

   323         *terminator=end;

   324         return 1;

   325     }

   326     s=u_skipWhitespace(s+2);

   328     /* read the end code point */

   329     value=(uint32_t)uprv_strtoul(s, &end, 16);

   330     if(end<=s || value>=0x110000) {

   331         *pErrorCode=U_PARSE_ERROR;

   332         return 0;

   333     }

   334     *pEnd=value;

   336     /* is this a valid range? */

   337     if(value<*pStart) {

   338         *pErrorCode=U_PARSE_ERROR;

   339         return 0;

   340     }

   342     *terminator=end;

   343     return value-*pStart+1;

   344 }

   346 U_CAPI int32_t U_EXPORT2

   347 u_parseCodePointRange(const char *s,

   348                       uint32_t *pStart, uint32_t *pEnd,

   349                       UErrorCode *pErrorCode) {

   350     const char *terminator;

   351     int32_t rangeLength=

   352         u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode);

   353     if(U_SUCCESS(*pErrorCode)) {

   354         terminator=u_skipWhitespace(terminator);

   355         if(*terminator!=';' && *terminator!=0) {

   356             *pErrorCode=U_PARSE_ERROR;

   357             return 0;

   358         }

   359     }

   360     return rangeLength;

   361 }

   363 U_CAPI int32_t U_EXPORT2

   364 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) {

   365     const char *read = source;

   366     int32_t i = 0;

   367     unsigned int value = 0;

   368     if(sLen == -1) {

   369         sLen = (int32_t)strlen(source);

   370     }

   372     while(read < source+sLen) {

   373         sscanf(read, "%2x", &value);

   374         if(i < destCapacity) {

   375             dest[i] = (char)value;

   376         }

   377         i++;

   378         read += 2;

   379     }

   380     return u_terminateChars(dest, destCapacity, i, status);

   381 }

The Tor Browser / file revision

intl/icu/source/tools/toolutil/uparse.c@6474c204b198

intl/icu/source/tools/toolutil/uparse.c