1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/util.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,239 @@ 1.4 +/* 1.5 + ********************************************************************** 1.6 + * Copyright (c) 2001-2011, International Business Machines 1.7 + * Corporation and others. All Rights Reserved. 1.8 + ********************************************************************** 1.9 + * Date Name Description 1.10 + * 11/19/2001 aliu Creation. 1.11 + ********************************************************************** 1.12 + */ 1.13 + 1.14 +#ifndef ICU_UTIL_H 1.15 +#define ICU_UTIL_H 1.16 + 1.17 +#include "unicode/utypes.h" 1.18 +#include "unicode/uobject.h" 1.19 +#include "unicode/unistr.h" 1.20 + 1.21 +//-------------------------------------------------------------------- 1.22 +// class ICU_Utility 1.23 +// i18n utility functions, scoped into the class ICU_Utility. 1.24 +//-------------------------------------------------------------------- 1.25 + 1.26 +U_NAMESPACE_BEGIN 1.27 + 1.28 +class UnicodeMatcher; 1.29 + 1.30 +class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ { 1.31 + public: 1.32 + 1.33 + /** 1.34 + * Append a number to the given UnicodeString in the given radix. 1.35 + * Standard digits '0'-'9' are used and letters 'A'-'Z' for 1.36 + * radices 11 through 36. 1.37 + * @param result the digits of the number are appended here 1.38 + * @param n the number to be converted to digits; may be negative. 1.39 + * If negative, a '-' is prepended to the digits. 1.40 + * @param radix a radix from 2 to 36 inclusive. 1.41 + * @param minDigits the minimum number of digits, not including 1.42 + * any '-', to produce. Values less than 2 have no effect. One 1.43 + * digit is always emitted regardless of this parameter. 1.44 + * @return a reference to result 1.45 + */ 1.46 + static UnicodeString& appendNumber(UnicodeString& result, int32_t n, 1.47 + int32_t radix = 10, 1.48 + int32_t minDigits = 1); 1.49 + 1.50 + /** 1.51 + * Return true if the character is NOT printable ASCII. 1.52 + * 1.53 + * This method should really be in UnicodeString (or similar). For 1.54 + * now, we implement it here and share it with friend classes. 1.55 + */ 1.56 + static UBool isUnprintable(UChar32 c); 1.57 + 1.58 + /** 1.59 + * Escape unprintable characters using \uxxxx notation for U+0000 to 1.60 + * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is 1.61 + * printable ASCII, then do nothing and return FALSE. Otherwise, 1.62 + * append the escaped notation and return TRUE. 1.63 + */ 1.64 + static UBool escapeUnprintable(UnicodeString& result, UChar32 c); 1.65 + 1.66 + /** 1.67 + * Returns the index of a character, ignoring quoted text. 1.68 + * For example, in the string "abc'hide'h", the 'h' in "hide" will not be 1.69 + * found by a search for 'h'. 1.70 + * @param text text to be searched 1.71 + * @param start the beginning index, inclusive; <code>0 <= start 1.72 + * <= limit</code>. 1.73 + * @param limit the ending index, exclusive; <code>start <= limit 1.74 + * <= text.length()</code>. 1.75 + * @param c character to search for 1.76 + * @return Offset of the first instance of c, or -1 if not found. 1.77 + */ 1.78 +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. 1.79 +// static int32_t quotedIndexOf(const UnicodeString& text, 1.80 +// int32_t start, int32_t limit, 1.81 +// UChar c); 1.82 + 1.83 + /** 1.84 + * Skip over a sequence of zero or more white space characters at pos. 1.85 + * @param advance if true, advance pos to the first non-white-space 1.86 + * character at or after pos, or str.length(), if there is none. 1.87 + * Otherwise leave pos unchanged. 1.88 + * @return the index of the first non-white-space character at or 1.89 + * after pos, or str.length(), if there is none. 1.90 + */ 1.91 + static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos, 1.92 + UBool advance = FALSE); 1.93 + 1.94 + /** 1.95 + * Skip over Pattern_White_Space in a Replaceable. 1.96 + * Skipping may be done in the forward or 1.97 + * reverse direction. In either case, the leftmost index will be 1.98 + * inclusive, and the rightmost index will be exclusive. That is, 1.99 + * given a range defined as [start, limit), the call 1.100 + * skipWhitespace(text, start, limit) will advance start past leading 1.101 + * whitespace, whereas the call skipWhitespace(text, limit, start), 1.102 + * will back up limit past trailing whitespace. 1.103 + * @param text the text to be analyzed 1.104 + * @param pos either the start or limit of a range of 'text', to skip 1.105 + * leading or trailing whitespace, respectively 1.106 + * @param stop either the limit or start of a range of 'text', to skip 1.107 + * leading or trailing whitespace, respectively 1.108 + * @return the new start or limit, depending on what was passed in to 1.109 + * 'pos' 1.110 + */ 1.111 +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. 1.112 +//? static int32_t skipWhitespace(const Replaceable& text, 1.113 +//? int32_t pos, int32_t stop); 1.114 + 1.115 + /** 1.116 + * Parse a single non-whitespace character 'ch', optionally 1.117 + * preceded by whitespace. 1.118 + * @param id the string to be parsed 1.119 + * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the 1.120 + * offset of the first character to be parsed. On output, pos[0] 1.121 + * is the index after the last parsed character. If the parse 1.122 + * fails, pos[0] will be unchanged. 1.123 + * @param ch the non-whitespace character to be parsed. 1.124 + * @return true if 'ch' is seen preceded by zero or more 1.125 + * whitespace characters. 1.126 + */ 1.127 + static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch); 1.128 + 1.129 + /** 1.130 + * Parse a pattern string starting at offset pos. Keywords are 1.131 + * matched case-insensitively. Spaces may be skipped and may be 1.132 + * optional or required. Integer values may be parsed, and if 1.133 + * they are, they will be returned in the given array. If 1.134 + * successful, the offset of the next non-space character is 1.135 + * returned. On failure, -1 is returned. 1.136 + * @param pattern must only contain lowercase characters, which 1.137 + * will match their uppercase equivalents as well. A space 1.138 + * character matches one or more required spaces. A '~' character 1.139 + * matches zero or more optional spaces. A '#' character matches 1.140 + * an integer and stores it in parsedInts, which the caller must 1.141 + * ensure has enough capacity. 1.142 + * @param parsedInts array to receive parsed integers. Caller 1.143 + * must ensure that parsedInts.length is >= the number of '#' 1.144 + * signs in 'pattern'. 1.145 + * @return the position after the last character parsed, or -1 if 1.146 + * the parse failed 1.147 + */ 1.148 + static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, 1.149 + const UnicodeString& pattern, int32_t* parsedInts); 1.150 + 1.151 + /** 1.152 + * Parse a pattern string within the given Replaceable and a parsing 1.153 + * pattern. Characters are matched literally and case-sensitively 1.154 + * except for the following special characters: 1.155 + * 1.156 + * ~ zero or more Pattern_White_Space chars 1.157 + * 1.158 + * If end of pattern is reached with all matches along the way, 1.159 + * pos is advanced to the first unparsed index and returned. 1.160 + * Otherwise -1 is returned. 1.161 + * @param pat pattern that controls parsing 1.162 + * @param text text to be parsed, starting at index 1.163 + * @param index offset to first character to parse 1.164 + * @param limit offset after last character to parse 1.165 + * @return index after last parsed character, or -1 on parse failure. 1.166 + */ 1.167 + static int32_t parsePattern(const UnicodeString& pat, 1.168 + const Replaceable& text, 1.169 + int32_t index, 1.170 + int32_t limit); 1.171 + 1.172 + /** 1.173 + * Parse an integer at pos, either of the form \d+ or of the form 1.174 + * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, 1.175 + * or octal format. 1.176 + * @param pos INPUT-OUTPUT parameter. On input, the first 1.177 + * character to parse. On output, the character after the last 1.178 + * parsed character. 1.179 + */ 1.180 + static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit); 1.181 + 1.182 + /** 1.183 + * Parse a Unicode identifier from the given string at the given 1.184 + * position. Return the identifier, or an empty string if there 1.185 + * is no identifier. 1.186 + * @param str the string to parse 1.187 + * @param pos INPUT-OUPUT parameter. On INPUT, pos is the 1.188 + * first character to examine. It must be less than str.length(), 1.189 + * and it must not point to a whitespace character. That is, must 1.190 + * have pos < str.length() and 1.191 + * !UCharacter::isWhitespace(str.char32At(pos)). On 1.192 + * OUTPUT, the position after the last parsed character. 1.193 + * @return the Unicode identifier, or an empty string if there is 1.194 + * no valid identifier at pos. 1.195 + */ 1.196 + static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos); 1.197 + 1.198 + /** 1.199 + * Parse an unsigned 31-bit integer at the given offset. Use 1.200 + * UCharacter.digit() to parse individual characters into digits. 1.201 + * @param text the text to be parsed 1.202 + * @param pos INPUT-OUTPUT parameter. On entry, pos is the 1.203 + * offset within text at which to start parsing; it should point 1.204 + * to a valid digit. On exit, pos is the offset after the last 1.205 + * parsed character. If the parse failed, it will be unchanged on 1.206 + * exit. Must be >= 0 on entry. 1.207 + * @param radix the radix in which to parse; must be >= 2 and <= 1.208 + * 36. 1.209 + * @return a non-negative parsed number, or -1 upon parse failure. 1.210 + * Parse fails if there are no digits, that is, if pos does not 1.211 + * point to a valid digit on entry, or if the number to be parsed 1.212 + * does not fit into a 31-bit unsigned integer. 1.213 + */ 1.214 + static int32_t parseNumber(const UnicodeString& text, 1.215 + int32_t& pos, int8_t radix); 1.216 + 1.217 + static void appendToRule(UnicodeString& rule, 1.218 + UChar32 c, 1.219 + UBool isLiteral, 1.220 + UBool escapeUnprintable, 1.221 + UnicodeString& quoteBuf); 1.222 + 1.223 + static void appendToRule(UnicodeString& rule, 1.224 + const UnicodeString& text, 1.225 + UBool isLiteral, 1.226 + UBool escapeUnprintable, 1.227 + UnicodeString& quoteBuf); 1.228 + 1.229 + static void appendToRule(UnicodeString& rule, 1.230 + const UnicodeMatcher* matcher, 1.231 + UBool escapeUnprintable, 1.232 + UnicodeString& quoteBuf); 1.233 + 1.234 +private: 1.235 + // do not instantiate 1.236 + ICU_Utility(); 1.237 +}; 1.238 + 1.239 +U_NAMESPACE_END 1.240 + 1.241 +#endif 1.242 +//eof