intl/icu/source/common/util.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/util.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,239 @@
     1.4 +/*
     1.5 + **********************************************************************
     1.6 + *   Copyright (c) 2001-2011, International Business Machines
     1.7 + *   Corporation and others.  All Rights Reserved.
     1.8 + **********************************************************************
     1.9 + *   Date        Name        Description
    1.10 + *   11/19/2001  aliu        Creation.
    1.11 + **********************************************************************
    1.12 + */
    1.13 +
    1.14 +#ifndef ICU_UTIL_H
    1.15 +#define ICU_UTIL_H
    1.16 +
    1.17 +#include "unicode/utypes.h"
    1.18 +#include "unicode/uobject.h"
    1.19 +#include "unicode/unistr.h"
    1.20 +
    1.21 +//--------------------------------------------------------------------
    1.22 +// class ICU_Utility
    1.23 +// i18n utility functions, scoped into the class ICU_Utility.
    1.24 +//--------------------------------------------------------------------
    1.25 +
    1.26 +U_NAMESPACE_BEGIN
    1.27 +
    1.28 +class UnicodeMatcher;
    1.29 +
    1.30 +class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
    1.31 + public:
    1.32 +
    1.33 +    /**
    1.34 +     * Append a number to the given UnicodeString in the given radix.
    1.35 +     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
    1.36 +     * radices 11 through 36.
    1.37 +     * @param result the digits of the number are appended here
    1.38 +     * @param n the number to be converted to digits; may be negative.
    1.39 +     * If negative, a '-' is prepended to the digits.
    1.40 +     * @param radix a radix from 2 to 36 inclusive.
    1.41 +     * @param minDigits the minimum number of digits, not including
    1.42 +     * any '-', to produce.  Values less than 2 have no effect.  One
    1.43 +     * digit is always emitted regardless of this parameter.
    1.44 +     * @return a reference to result
    1.45 +     */
    1.46 +    static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
    1.47 +                                       int32_t radix = 10,
    1.48 +                                       int32_t minDigits = 1);
    1.49 +
    1.50 +    /**
    1.51 +     * Return true if the character is NOT printable ASCII.
    1.52 +     *
    1.53 +     * This method should really be in UnicodeString (or similar).  For
    1.54 +     * now, we implement it here and share it with friend classes.
    1.55 +     */
    1.56 +    static UBool isUnprintable(UChar32 c);
    1.57 +
    1.58 +    /**
    1.59 +     * Escape unprintable characters using \uxxxx notation for U+0000 to
    1.60 +     * U+FFFF and \Uxxxxxxxx for U+10000 and above.  If the character is
    1.61 +     * printable ASCII, then do nothing and return FALSE.  Otherwise,
    1.62 +     * append the escaped notation and return TRUE.
    1.63 +     */
    1.64 +    static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
    1.65 +
    1.66 +    /**
    1.67 +     * Returns the index of a character, ignoring quoted text.
    1.68 +     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
    1.69 +     * found by a search for 'h'.
    1.70 +     * @param text text to be searched
    1.71 +     * @param start the beginning index, inclusive; <code>0 <= start
    1.72 +     * <= limit</code>.
    1.73 +     * @param limit the ending index, exclusive; <code>start <= limit
    1.74 +     * <= text.length()</code>.
    1.75 +     * @param c character to search for
    1.76 +     * @return Offset of the first instance of c, or -1 if not found.
    1.77 +     */
    1.78 +//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
    1.79 +//    static int32_t quotedIndexOf(const UnicodeString& text,
    1.80 +//                                 int32_t start, int32_t limit,
    1.81 +//                                 UChar c);
    1.82 +
    1.83 +    /**
    1.84 +     * Skip over a sequence of zero or more white space characters at pos.
    1.85 +     * @param advance if true, advance pos to the first non-white-space
    1.86 +     * character at or after pos, or str.length(), if there is none.
    1.87 +     * Otherwise leave pos unchanged.
    1.88 +     * @return the index of the first non-white-space character at or
    1.89 +     * after pos, or str.length(), if there is none.
    1.90 +     */
    1.91 +    static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
    1.92 +                                  UBool advance = FALSE);
    1.93 +
    1.94 +    /**
    1.95 +     * Skip over Pattern_White_Space in a Replaceable.
    1.96 +     * Skipping may be done in the forward or
    1.97 +     * reverse direction.  In either case, the leftmost index will be
    1.98 +     * inclusive, and the rightmost index will be exclusive.  That is,
    1.99 +     * given a range defined as [start, limit), the call
   1.100 +     * skipWhitespace(text, start, limit) will advance start past leading
   1.101 +     * whitespace, whereas the call skipWhitespace(text, limit, start),
   1.102 +     * will back up limit past trailing whitespace.
   1.103 +     * @param text the text to be analyzed
   1.104 +     * @param pos either the start or limit of a range of 'text', to skip
   1.105 +     * leading or trailing whitespace, respectively
   1.106 +     * @param stop either the limit or start of a range of 'text', to skip
   1.107 +     * leading or trailing whitespace, respectively
   1.108 +     * @return the new start or limit, depending on what was passed in to
   1.109 +     * 'pos'
   1.110 +     */
   1.111 +//?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
   1.112 +//?    static int32_t skipWhitespace(const Replaceable& text,
   1.113 +//?                                  int32_t pos, int32_t stop);
   1.114 +
   1.115 +    /**
   1.116 +     * Parse a single non-whitespace character 'ch', optionally
   1.117 +     * preceded by whitespace.
   1.118 +     * @param id the string to be parsed
   1.119 +     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
   1.120 +     * offset of the first character to be parsed.  On output, pos[0]
   1.121 +     * is the index after the last parsed character.  If the parse
   1.122 +     * fails, pos[0] will be unchanged.
   1.123 +     * @param ch the non-whitespace character to be parsed.
   1.124 +     * @return true if 'ch' is seen preceded by zero or more
   1.125 +     * whitespace characters.
   1.126 +     */
   1.127 +    static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
   1.128 +
   1.129 +    /**
   1.130 +     * Parse a pattern string starting at offset pos.  Keywords are
   1.131 +     * matched case-insensitively.  Spaces may be skipped and may be
   1.132 +     * optional or required.  Integer values may be parsed, and if
   1.133 +     * they are, they will be returned in the given array.  If
   1.134 +     * successful, the offset of the next non-space character is
   1.135 +     * returned.  On failure, -1 is returned.
   1.136 +     * @param pattern must only contain lowercase characters, which
   1.137 +     * will match their uppercase equivalents as well.  A space
   1.138 +     * character matches one or more required spaces.  A '~' character
   1.139 +     * matches zero or more optional spaces.  A '#' character matches
   1.140 +     * an integer and stores it in parsedInts, which the caller must
   1.141 +     * ensure has enough capacity.
   1.142 +     * @param parsedInts array to receive parsed integers.  Caller
   1.143 +     * must ensure that parsedInts.length is >= the number of '#'
   1.144 +     * signs in 'pattern'.
   1.145 +     * @return the position after the last character parsed, or -1 if
   1.146 +     * the parse failed
   1.147 +     */
   1.148 +    static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
   1.149 +                                const UnicodeString& pattern, int32_t* parsedInts);
   1.150 +        
   1.151 +    /**
   1.152 +     * Parse a pattern string within the given Replaceable and a parsing
   1.153 +     * pattern.  Characters are matched literally and case-sensitively
   1.154 +     * except for the following special characters:
   1.155 +     *
   1.156 +     * ~  zero or more Pattern_White_Space chars
   1.157 +     *
   1.158 +     * If end of pattern is reached with all matches along the way,
   1.159 +     * pos is advanced to the first unparsed index and returned.
   1.160 +     * Otherwise -1 is returned.
   1.161 +     * @param pat pattern that controls parsing
   1.162 +     * @param text text to be parsed, starting at index
   1.163 +     * @param index offset to first character to parse
   1.164 +     * @param limit offset after last character to parse
   1.165 +     * @return index after last parsed character, or -1 on parse failure.
   1.166 +     */
   1.167 +    static int32_t parsePattern(const UnicodeString& pat,
   1.168 +                                const Replaceable& text,
   1.169 +                                int32_t index,
   1.170 +                                int32_t limit);
   1.171 +
   1.172 +    /**
   1.173 +     * Parse an integer at pos, either of the form \d+ or of the form
   1.174 +     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
   1.175 +     * or octal format.
   1.176 +     * @param pos INPUT-OUTPUT parameter.  On input, the first
   1.177 +     * character to parse.  On output, the character after the last
   1.178 +     * parsed character.
   1.179 +     */
   1.180 +    static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
   1.181 +
   1.182 +    /**
   1.183 +     * Parse a Unicode identifier from the given string at the given
   1.184 +     * position.  Return the identifier, or an empty string if there
   1.185 +     * is no identifier.
   1.186 +     * @param str the string to parse
   1.187 +     * @param pos INPUT-OUPUT parameter.  On INPUT, pos is the
   1.188 +     * first character to examine.  It must be less than str.length(),
   1.189 +     * and it must not point to a whitespace character.  That is, must
   1.190 +     * have pos < str.length() and
   1.191 +     * !UCharacter::isWhitespace(str.char32At(pos)).  On
   1.192 +     * OUTPUT, the position after the last parsed character.
   1.193 +     * @return the Unicode identifier, or an empty string if there is
   1.194 +     * no valid identifier at pos.
   1.195 +     */
   1.196 +    static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
   1.197 +
   1.198 +    /**
   1.199 +     * Parse an unsigned 31-bit integer at the given offset.  Use
   1.200 +     * UCharacter.digit() to parse individual characters into digits.
   1.201 +     * @param text the text to be parsed
   1.202 +     * @param pos INPUT-OUTPUT parameter.  On entry, pos is the
   1.203 +     * offset within text at which to start parsing; it should point
   1.204 +     * to a valid digit.  On exit, pos is the offset after the last
   1.205 +     * parsed character.  If the parse failed, it will be unchanged on
   1.206 +     * exit.  Must be >= 0 on entry.
   1.207 +     * @param radix the radix in which to parse; must be >= 2 and <=
   1.208 +     * 36.
   1.209 +     * @return a non-negative parsed number, or -1 upon parse failure.
   1.210 +     * Parse fails if there are no digits, that is, if pos does not
   1.211 +     * point to a valid digit on entry, or if the number to be parsed
   1.212 +     * does not fit into a 31-bit unsigned integer.
   1.213 +     */
   1.214 +    static int32_t parseNumber(const UnicodeString& text,
   1.215 +                               int32_t& pos, int8_t radix);
   1.216 +
   1.217 +    static void appendToRule(UnicodeString& rule,
   1.218 +                             UChar32 c,
   1.219 +                             UBool isLiteral,
   1.220 +                             UBool escapeUnprintable,
   1.221 +                             UnicodeString& quoteBuf);
   1.222 +    
   1.223 +    static void appendToRule(UnicodeString& rule,
   1.224 +                             const UnicodeString& text,
   1.225 +                             UBool isLiteral,
   1.226 +                             UBool escapeUnprintable,
   1.227 +                             UnicodeString& quoteBuf);
   1.228 +
   1.229 +    static void appendToRule(UnicodeString& rule,
   1.230 +                             const UnicodeMatcher* matcher,
   1.231 +                             UBool escapeUnprintable,
   1.232 +                             UnicodeString& quoteBuf);
   1.233 +
   1.234 +private:
   1.235 +    // do not instantiate
   1.236 +    ICU_Utility();
   1.237 +};
   1.238 +
   1.239 +U_NAMESPACE_END
   1.240 +
   1.241 +#endif
   1.242 +//eof

mercurial