The Tor Browser: intl/icu/source/common/util

     1 /*

     2 **********************************************************************

     3 *   Copyright (c) 2001-2011, International Business Machines

     4 *   Corporation and others.  All Rights Reserved.

     5 **********************************************************************

     6 *   Date        Name        Description

     7 *   11/19/2001  aliu        Creation.

     8 **********************************************************************

     9 */

    11 #include "unicode/uchar.h"

    12 #include "unicode/utf16.h"

    13 #include "patternprops.h"

    14 #include "util.h"

    16 U_NAMESPACE_BEGIN

    18 /**

    19  * Parse an integer at pos, either of the form \d+ or of the form

    20  * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,

    21  * or octal format.

    22  * @param pos INPUT-OUTPUT parameter.  On input, the first

    23  * character to parse.  On output, the character after the last

    24  * parsed character.

    25  */

    26 int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) {

    27     int32_t count = 0;

    28     int32_t value = 0;

    29     int32_t p = pos;

    30     int8_t radix = 10;

    32     if (p < limit && rule.charAt(p) == 48 /*0*/) {

    33         if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) {

    34             p += 2;

    35             radix = 16;

    36         }

    37         else {

    38             p++;

    39             count = 1;

    40             radix = 8;

    41         }

    42     }

    44     while (p < limit) {

    45         int32_t d = u_digit(rule.charAt(p++), radix);

    46         if (d < 0) {

    47             --p;

    48             break;

    49         }

    50         ++count;

    51         int32_t v = (value * radix) + d;

    52         if (v <= value) {

    53             // If there are too many input digits, at some point

    54             // the value will go negative, e.g., if we have seen

    55             // "0x8000000" already and there is another '0', when

    56             // we parse the next 0 the value will go negative.

    57             return 0;

    58         }

    59         value = v;

    60     }

    61     if (count > 0) {

    62         pos = p;

    63     }

    64     return value;

    65 }

    67 /**

    68  * Parse a pattern string starting at offset pos.  Keywords are

    69  * matched case-insensitively.  Spaces may be skipped and may be

    70  * optional or required.  Integer values may be parsed, and if

    71  * they are, they will be returned in the given array.  If

    72  * successful, the offset of the next non-space character is

    73  * returned.  On failure, -1 is returned.

    74  * @param pattern must only contain lowercase characters, which

    75  * will match their uppercase equivalents as well.  A space

    76  * character matches one or more required spaces.  A '~' character

    77  * matches zero or more optional spaces.  A '#' character matches

    78  * an integer and stores it in parsedInts, which the caller must

    79  * ensure has enough capacity.

    80  * @param parsedInts array to receive parsed integers.  Caller

    81  * must ensure that parsedInts.length is >= the number of '#'

    82  * signs in 'pattern'.

    83  * @return the position after the last character parsed, or -1 if

    84  * the parse failed

    85  */

    86 int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,

    87                               const UnicodeString& pattern, int32_t* parsedInts) {

    88     // TODO Update this to handle surrogates

    89     int32_t p;

    90     int32_t intCount = 0; // number of integers parsed

    91     for (int32_t i=0; i<pattern.length(); ++i) {

    92         UChar cpat = pattern.charAt(i);

    93         UChar c;

    94         switch (cpat) {

    95         case 32 /*' '*/:

    96             if (pos >= limit) {

    97                 return -1;

    98             }

    99             c = rule.charAt(pos++);

   100             if (!PatternProps::isWhiteSpace(c)) {

   101                 return -1;

   102             }

   103             // FALL THROUGH to skipWhitespace

   104         case 126 /*'~'*/:

   105             pos = skipWhitespace(rule, pos);

   106             break;

   107         case 35 /*'#'*/:

   108             p = pos;

   109             parsedInts[intCount++] = parseInteger(rule, p, limit);

   110             if (p == pos) {

   111                 // Syntax error; failed to parse integer

   112                 return -1;

   113             }

   114             pos = p;

   115             break;

   116         default:

   117             if (pos >= limit) {

   118                 return -1;

   119             }

   120             c = (UChar) u_tolower(rule.charAt(pos++));

   121             if (c != cpat) {

   122                 return -1;

   123             }

   124             break;

   125         }

   126     }

   127     return pos;

   128 }

   130 /**

   131  * Parse a Unicode identifier from the given string at the given

   132  * position.  Return the identifier, or an empty string if there

   133  * is no identifier.

   134  * @param str the string to parse

   135  * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the

   136  * first character to examine.  It must be less than str.length(),

   137  * and it must not point to a whitespace character.  That is, must

   138  * have pos < str.length().  On

   139  * OUTPUT, the position after the last parsed character.

   140  * @return the Unicode identifier, or an empty string if there is

   141  * no valid identifier at pos.

   142  */

   143 UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {

   144     // assert(pos < str.length());

   145     UnicodeString buf;

   146     int p = pos;

   147     while (p < str.length()) {

   148         UChar32 ch = str.char32At(p);

   149         if (buf.length() == 0) {

   150             if (u_isIDStart(ch)) {

   151                 buf.append(ch);

   152             } else {

   153                 buf.truncate(0);

   154                 return buf;

   155             }

   156         } else {

   157             if (u_isIDPart(ch)) {

   158                 buf.append(ch);

   159             } else {

   160                 break;

   161             }

   162         }

   163         p += U16_LENGTH(ch);

   164     }

   165     pos = p;

   166     return buf;

   167 }

   169 /**

   170  * Parse an unsigned 31-bit integer at the given offset.  Use

   171  * UCharacter.digit() to parse individual characters into digits.

   172  * @param text the text to be parsed

   173  * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the

   174  * offset within text at which to start parsing; it should point

   175  * to a valid digit.  On exit, pos[0] is the offset after the last

   176  * parsed character.  If the parse failed, it will be unchanged on

   177  * exit.  Must be >= 0 on entry.

   178  * @param radix the radix in which to parse; must be >= 2 and <=

   179  * 36.

   180  * @return a non-negative parsed number, or -1 upon parse failure.

   181  * Parse fails if there are no digits, that is, if pos[0] does not

   182  * point to a valid digit on entry, or if the number to be parsed

   183  * does not fit into a 31-bit unsigned integer.

   184  */

   185 int32_t ICU_Utility::parseNumber(const UnicodeString& text,

   186                                  int32_t& pos, int8_t radix) {

   187     // assert(pos[0] >= 0);

   188     // assert(radix >= 2);

   189     // assert(radix <= 36);

   190     int32_t n = 0;

   191     int32_t p = pos;

   192     while (p < text.length()) {

   193         UChar32 ch = text.char32At(p);

   194         int32_t d = u_digit(ch, radix);

   195         if (d < 0) {

   196             break;

   197         }

   198         n = radix*n + d;

   199         // ASSUME that when a 32-bit integer overflows it becomes

   200         // negative.  E.g., 214748364 * 10 + 8 => negative value.

   201         if (n < 0) {

   202             return -1;

   203         }

   204         ++p;

   205     }

   206     if (p == pos) {

   207         return -1;

   208     }

   209     pos = p;

   210     return n;

   211 }

   213 U_NAMESPACE_END

The Tor Browser / file revision

intl/icu/source/common/util_props.cpp@fc2d59ddac77

intl/icu/source/common/util_props.cpp