1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/uniset.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1691 @@ 1.4 +/* 1.5 +*************************************************************************** 1.6 +* Copyright (C) 1999-2013, International Business Machines Corporation 1.7 +* and others. All Rights Reserved. 1.8 +*************************************************************************** 1.9 +* Date Name Description 1.10 +* 10/20/99 alan Creation. 1.11 +*************************************************************************** 1.12 +*/ 1.13 + 1.14 +#ifndef UNICODESET_H 1.15 +#define UNICODESET_H 1.16 + 1.17 +#include "unicode/unifilt.h" 1.18 +#include "unicode/unistr.h" 1.19 +#include "unicode/uset.h" 1.20 + 1.21 +/** 1.22 + * \file 1.23 + * \brief C++ API: Unicode Set 1.24 + */ 1.25 + 1.26 +U_NAMESPACE_BEGIN 1.27 + 1.28 +// Forward Declarations. 1.29 +void UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */ 1.30 + 1.31 +class BMPSet; 1.32 +class ParsePosition; 1.33 +class RBBIRuleScanner; 1.34 +class SymbolTable; 1.35 +class UnicodeSetStringSpan; 1.36 +class UVector; 1.37 +class RuleCharacterIterator; 1.38 + 1.39 +/** 1.40 + * A mutable set of Unicode characters and multicharacter strings. Objects of this class 1.41 + * represent <em>character classes</em> used in regular expressions. 1.42 + * A character specifies a subset of Unicode code points. Legal 1.43 + * code points are U+0000 to U+10FFFF, inclusive. 1.44 + * 1.45 + * <p>The UnicodeSet class is not designed to be subclassed. 1.46 + * 1.47 + * <p><code>UnicodeSet</code> supports two APIs. The first is the 1.48 + * <em>operand</em> API that allows the caller to modify the value of 1.49 + * a <code>UnicodeSet</code> object. It conforms to Java 2's 1.50 + * <code>java.util.Set</code> interface, although 1.51 + * <code>UnicodeSet</code> does not actually implement that 1.52 + * interface. All methods of <code>Set</code> are supported, with the 1.53 + * modification that they take a character range or single character 1.54 + * instead of an <code>Object</code>, and they take a 1.55 + * <code>UnicodeSet</code> instead of a <code>Collection</code>. The 1.56 + * operand API may be thought of in terms of boolean logic: a boolean 1.57 + * OR is implemented by <code>add</code>, a boolean AND is implemented 1.58 + * by <code>retain</code>, a boolean XOR is implemented by 1.59 + * <code>complement</code> taking an argument, and a boolean NOT is 1.60 + * implemented by <code>complement</code> with no argument. In terms 1.61 + * of traditional set theory function names, <code>add</code> is a 1.62 + * union, <code>retain</code> is an intersection, <code>remove</code> 1.63 + * is an asymmetric difference, and <code>complement</code> with no 1.64 + * argument is a set complement with respect to the superset range 1.65 + * <code>MIN_VALUE-MAX_VALUE</code> 1.66 + * 1.67 + * <p>The second API is the 1.68 + * <code>applyPattern()</code>/<code>toPattern()</code> API from the 1.69 + * <code>java.text.Format</code>-derived classes. Unlike the 1.70 + * methods that add characters, add categories, and control the logic 1.71 + * of the set, the method <code>applyPattern()</code> sets all 1.72 + * attributes of a <code>UnicodeSet</code> at once, based on a 1.73 + * string pattern. 1.74 + * 1.75 + * <p><b>Pattern syntax</b></p> 1.76 + * 1.77 + * Patterns are accepted by the constructors and the 1.78 + * <code>applyPattern()</code> methods and returned by the 1.79 + * <code>toPattern()</code> method. These patterns follow a syntax 1.80 + * similar to that employed by version 8 regular expression character 1.81 + * classes. Here are some simple examples: 1.82 + * 1.83 + * \htmlonly<blockquote>\endhtmlonly 1.84 + * <table> 1.85 + * <tr align="top"> 1.86 + * <td nowrap valign="top" align="left"><code>[]</code></td> 1.87 + * <td valign="top">No characters</td> 1.88 + * </tr><tr align="top"> 1.89 + * <td nowrap valign="top" align="left"><code>[a]</code></td> 1.90 + * <td valign="top">The character 'a'</td> 1.91 + * </tr><tr align="top"> 1.92 + * <td nowrap valign="top" align="left"><code>[ae]</code></td> 1.93 + * <td valign="top">The characters 'a' and 'e'</td> 1.94 + * </tr> 1.95 + * <tr> 1.96 + * <td nowrap valign="top" align="left"><code>[a-e]</code></td> 1.97 + * <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code 1.98 + * point order</td> 1.99 + * </tr> 1.100 + * <tr> 1.101 + * <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td> 1.102 + * <td valign="top">The character U+4E01</td> 1.103 + * </tr> 1.104 + * <tr> 1.105 + * <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td> 1.106 + * <td valign="top">The character 'a' and the multicharacter strings "ab" and 1.107 + * "ac"</td> 1.108 + * </tr> 1.109 + * <tr> 1.110 + * <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td> 1.111 + * <td valign="top">All characters in the general category Uppercase Letter</td> 1.112 + * </tr> 1.113 + * </table> 1.114 + * \htmlonly</blockquote>\endhtmlonly 1.115 + * 1.116 + * Any character may be preceded by a backslash in order to remove any special 1.117 + * meaning. White space characters, as defined by UCharacter.isWhitespace(), are 1.118 + * ignored, unless they are escaped. 1.119 + * 1.120 + * <p>Property patterns specify a set of characters having a certain 1.121 + * property as defined by the Unicode standard. Both the POSIX-like 1.122 + * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a 1.123 + * complete list of supported property patterns, see the User's Guide 1.124 + * for UnicodeSet at 1.125 + * <a href="http://icu-project.org/userguide/unicodeSet.html"> 1.126 + * http://icu-project.org/userguide/unicodeSet.html</a>. 1.127 + * Actual determination of property data is defined by the underlying 1.128 + * Unicode database as implemented by UCharacter. 1.129 + * 1.130 + * <p>Patterns specify individual characters, ranges of characters, and 1.131 + * Unicode property sets. When elements are concatenated, they 1.132 + * specify their union. To complement a set, place a '^' immediately 1.133 + * after the opening '['. Property patterns are inverted by modifying 1.134 + * their delimiters; "[:^foo]" and "\\P{foo}". In any other location, 1.135 + * '^' has no special meaning. 1.136 + * 1.137 + * <p>Ranges are indicated by placing two a '-' between two 1.138 + * characters, as in "a-z". This specifies the range of all 1.139 + * characters from the left to the right, in Unicode order. If the 1.140 + * left character is greater than or equal to the 1.141 + * right character it is a syntax error. If a '-' occurs as the first 1.142 + * character after the opening '[' or '[^', or if it occurs as the 1.143 + * last character before the closing ']', then it is taken as a 1.144 + * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same 1.145 + * set of three characters, 'a', 'b', and '-'. 1.146 + * 1.147 + * <p>Sets may be intersected using the '&' operator or the asymmetric 1.148 + * set difference may be taken using the '-' operator, for example, 1.149 + * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters 1.150 + * with values less than 4096. Operators ('&' and '|') have equal 1.151 + * precedence and bind left-to-right. Thus 1.152 + * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to 1.153 + * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for 1.154 + * difference; intersection is commutative. 1.155 + * 1.156 + * <table> 1.157 + * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a' 1.158 + * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a' 1.159 + * through 'z' and all letters in between, in Unicode order 1.160 + * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing 1.161 + * all characters but 'a' through 'z', 1.162 + * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF 1.163 + * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code> 1.164 + * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em> 1.165 + * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code> 1.166 + * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em> 1.167 + * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code> 1.168 + * <td>The asymmetric difference of sets specified by <em>pat1</em> and 1.169 + * <em>pat2</em> 1.170 + * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code> 1.171 + * <td>The set of characters having the specified 1.172 + * Unicode property; in 1.173 + * this case, Unicode uppercase letters 1.174 + * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code> 1.175 + * <td>The set of characters <em>not</em> having the given 1.176 + * Unicode property 1.177 + * </table> 1.178 + * 1.179 + * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p> 1.180 + * 1.181 + * <p><b>Formal syntax</b></p> 1.182 + * 1.183 + * \htmlonly<blockquote>\endhtmlonly 1.184 + * <table> 1.185 + * <tr align="top"> 1.186 + * <td nowrap valign="top" align="right"><code>pattern := </code></td> 1.187 + * <td valign="top"><code>('[' '^'? item* ']') | 1.188 + * property</code></td> 1.189 + * </tr> 1.190 + * <tr align="top"> 1.191 + * <td nowrap valign="top" align="right"><code>item := </code></td> 1.192 + * <td valign="top"><code>char | (char '-' char) | pattern-expr<br> 1.193 + * </code></td> 1.194 + * </tr> 1.195 + * <tr align="top"> 1.196 + * <td nowrap valign="top" align="right"><code>pattern-expr := </code></td> 1.197 + * <td valign="top"><code>pattern | pattern-expr pattern | 1.198 + * pattern-expr op pattern<br> 1.199 + * </code></td> 1.200 + * </tr> 1.201 + * <tr align="top"> 1.202 + * <td nowrap valign="top" align="right"><code>op := </code></td> 1.203 + * <td valign="top"><code>'&' | '-'<br> 1.204 + * </code></td> 1.205 + * </tr> 1.206 + * <tr align="top"> 1.207 + * <td nowrap valign="top" align="right"><code>special := </code></td> 1.208 + * <td valign="top"><code>'[' | ']' | '-'<br> 1.209 + * </code></td> 1.210 + * </tr> 1.211 + * <tr align="top"> 1.212 + * <td nowrap valign="top" align="right"><code>char := </code></td> 1.213 + * <td valign="top"><em>any character that is not</em><code> special<br> 1.214 + * | ('\' </code><em>any character</em><code>)<br> 1.215 + * | ('\\u' hex hex hex hex)<br> 1.216 + * </code></td> 1.217 + * </tr> 1.218 + * <tr align="top"> 1.219 + * <td nowrap valign="top" align="right"><code>hex := </code></td> 1.220 + * <td valign="top"><em>any character for which 1.221 + * </em><code>Character.digit(c, 16)</code><em> 1.222 + * returns a non-negative result</em></td> 1.223 + * </tr> 1.224 + * <tr> 1.225 + * <td nowrap valign="top" align="right"><code>property := </code></td> 1.226 + * <td valign="top"><em>a Unicode property set pattern</em></td> 1.227 + * </tr> 1.228 + * </table> 1.229 + * <br> 1.230 + * <table border="1"> 1.231 + * <tr> 1.232 + * <td>Legend: <table> 1.233 + * <tr> 1.234 + * <td nowrap valign="top"><code>a := b</code></td> 1.235 + * <td width="20" valign="top"> </td> 1.236 + * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td> 1.237 + * </tr> 1.238 + * <tr> 1.239 + * <td nowrap valign="top"><code>a?</code></td> 1.240 + * <td valign="top"></td> 1.241 + * <td valign="top">zero or one instance of <code>a</code><br> 1.242 + * </td> 1.243 + * </tr> 1.244 + * <tr> 1.245 + * <td nowrap valign="top"><code>a*</code></td> 1.246 + * <td valign="top"></td> 1.247 + * <td valign="top">one or more instances of <code>a</code><br> 1.248 + * </td> 1.249 + * </tr> 1.250 + * <tr> 1.251 + * <td nowrap valign="top"><code>a | b</code></td> 1.252 + * <td valign="top"></td> 1.253 + * <td valign="top">either <code>a</code> or <code>b</code><br> 1.254 + * </td> 1.255 + * </tr> 1.256 + * <tr> 1.257 + * <td nowrap valign="top"><code>'a'</code></td> 1.258 + * <td valign="top"></td> 1.259 + * <td valign="top">the literal string between the quotes </td> 1.260 + * </tr> 1.261 + * </table> 1.262 + * </td> 1.263 + * </tr> 1.264 + * </table> 1.265 + * \htmlonly</blockquote>\endhtmlonly 1.266 + * 1.267 + * <p>Note: 1.268 + * - Most UnicodeSet methods do not take a UErrorCode parameter because 1.269 + * there are usually very few opportunities for failure other than a shortage 1.270 + * of memory, error codes in low-level C++ string methods would be inconvenient, 1.271 + * and the error code as the last parameter (ICU convention) would prevent 1.272 + * the use of default parameter values. 1.273 + * Instead, such methods set the UnicodeSet into a "bogus" state 1.274 + * (see isBogus()) if an error occurs. 1.275 + * 1.276 + * @author Alan Liu 1.277 + * @stable ICU 2.0 1.278 + */ 1.279 +class U_COMMON_API UnicodeSet : public UnicodeFilter { 1.280 + 1.281 + int32_t len; // length of list used; 0 <= len <= capacity 1.282 + int32_t capacity; // capacity of list 1.283 + UChar32* list; // MUST be terminated with HIGH 1.284 + BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL. 1.285 + UChar32* buffer; // internal buffer, may be NULL 1.286 + int32_t bufferCapacity; // capacity of buffer 1.287 + int32_t patLen; 1.288 + 1.289 + /** 1.290 + * The pattern representation of this set. This may not be the 1.291 + * most economical pattern. It is the pattern supplied to 1.292 + * applyPattern(), with variables substituted and whitespace 1.293 + * removed. For sets constructed without applyPattern(), or 1.294 + * modified using the non-pattern API, this string will be empty, 1.295 + * indicating that toPattern() must generate a pattern 1.296 + * representation from the inversion list. 1.297 + */ 1.298 + UChar *pat; 1.299 + UVector* strings; // maintained in sorted order 1.300 + UnicodeSetStringSpan *stringSpan; 1.301 + 1.302 +private: 1.303 + enum { // constants 1.304 + kIsBogus = 1 // This set is bogus (i.e. not valid) 1.305 + }; 1.306 + uint8_t fFlags; // Bit flag (see constants above) 1.307 +public: 1.308 + /** 1.309 + * Determine if this object contains a valid set. 1.310 + * A bogus set has no value. It is different from an empty set. 1.311 + * It can be used to indicate that no set value is available. 1.312 + * 1.313 + * @return TRUE if the set is valid, FALSE otherwise 1.314 + * @see setToBogus() 1.315 + * @stable ICU 4.0 1.316 + */ 1.317 + inline UBool isBogus(void) const; 1.318 + 1.319 + /** 1.320 + * Make this UnicodeSet object invalid. 1.321 + * The string will test TRUE with isBogus(). 1.322 + * 1.323 + * A bogus set has no value. It is different from an empty set. 1.324 + * It can be used to indicate that no set value is available. 1.325 + * 1.326 + * This utility function is used throughout the UnicodeSet 1.327 + * implementation to indicate that a UnicodeSet operation failed, 1.328 + * and may be used in other functions, 1.329 + * especially but not exclusively when such functions do not 1.330 + * take a UErrorCode for simplicity. 1.331 + * 1.332 + * @see isBogus() 1.333 + * @stable ICU 4.0 1.334 + */ 1.335 + void setToBogus(); 1.336 + 1.337 +public: 1.338 + 1.339 + enum { 1.340 + /** 1.341 + * Minimum value that can be stored in a UnicodeSet. 1.342 + * @stable ICU 2.4 1.343 + */ 1.344 + MIN_VALUE = 0, 1.345 + 1.346 + /** 1.347 + * Maximum value that can be stored in a UnicodeSet. 1.348 + * @stable ICU 2.4 1.349 + */ 1.350 + MAX_VALUE = 0x10ffff 1.351 + }; 1.352 + 1.353 + //---------------------------------------------------------------- 1.354 + // Constructors &c 1.355 + //---------------------------------------------------------------- 1.356 + 1.357 +public: 1.358 + 1.359 + /** 1.360 + * Constructs an empty set. 1.361 + * @stable ICU 2.0 1.362 + */ 1.363 + UnicodeSet(); 1.364 + 1.365 + /** 1.366 + * Constructs a set containing the given range. If <code>end > 1.367 + * start</code> then an empty set is created. 1.368 + * 1.369 + * @param start first character, inclusive, of range 1.370 + * @param end last character, inclusive, of range 1.371 + * @stable ICU 2.4 1.372 + */ 1.373 + UnicodeSet(UChar32 start, UChar32 end); 1.374 + 1.375 + /** 1.376 + * Constructs a set from the given pattern. See the class 1.377 + * description for the syntax of the pattern language. 1.378 + * @param pattern a string specifying what characters are in the set 1.379 + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern 1.380 + * contains a syntax error. 1.381 + * @stable ICU 2.0 1.382 + */ 1.383 + UnicodeSet(const UnicodeString& pattern, 1.384 + UErrorCode& status); 1.385 + 1.386 +#ifndef U_HIDE_INTERNAL_API 1.387 + /** 1.388 + * Constructs a set from the given pattern. See the class 1.389 + * description for the syntax of the pattern language. 1.390 + * @param pattern a string specifying what characters are in the set 1.391 + * @param options bitmask for options to apply to the pattern. 1.392 + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. 1.393 + * @param symbols a symbol table mapping variable names to values 1.394 + * and stand-in characters to UnicodeSets; may be NULL 1.395 + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern 1.396 + * contains a syntax error. 1.397 + * @internal 1.398 + */ 1.399 + UnicodeSet(const UnicodeString& pattern, 1.400 + uint32_t options, 1.401 + const SymbolTable* symbols, 1.402 + UErrorCode& status); 1.403 +#endif /* U_HIDE_INTERNAL_API */ 1.404 + 1.405 + /** 1.406 + * Constructs a set from the given pattern. See the class description 1.407 + * for the syntax of the pattern language. 1.408 + * @param pattern a string specifying what characters are in the set 1.409 + * @param pos on input, the position in pattern at which to start parsing. 1.410 + * On output, the position after the last character parsed. 1.411 + * @param options bitmask for options to apply to the pattern. 1.412 + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. 1.413 + * @param symbols a symbol table mapping variable names to values 1.414 + * and stand-in characters to UnicodeSets; may be NULL 1.415 + * @param status input-output error code 1.416 + * @stable ICU 2.8 1.417 + */ 1.418 + UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 1.419 + uint32_t options, 1.420 + const SymbolTable* symbols, 1.421 + UErrorCode& status); 1.422 + 1.423 + /** 1.424 + * Constructs a set that is identical to the given UnicodeSet. 1.425 + * @stable ICU 2.0 1.426 + */ 1.427 + UnicodeSet(const UnicodeSet& o); 1.428 + 1.429 + /** 1.430 + * Destructs the set. 1.431 + * @stable ICU 2.0 1.432 + */ 1.433 + virtual ~UnicodeSet(); 1.434 + 1.435 + /** 1.436 + * Assigns this object to be a copy of another. 1.437 + * A frozen set will not be modified. 1.438 + * @stable ICU 2.0 1.439 + */ 1.440 + UnicodeSet& operator=(const UnicodeSet& o); 1.441 + 1.442 + /** 1.443 + * Compares the specified object with this set for equality. Returns 1.444 + * <tt>true</tt> if the two sets 1.445 + * have the same size, and every member of the specified set is 1.446 + * contained in this set (or equivalently, every member of this set is 1.447 + * contained in the specified set). 1.448 + * 1.449 + * @param o set to be compared for equality with this set. 1.450 + * @return <tt>true</tt> if the specified set is equal to this set. 1.451 + * @stable ICU 2.0 1.452 + */ 1.453 + virtual UBool operator==(const UnicodeSet& o) const; 1.454 + 1.455 + /** 1.456 + * Compares the specified object with this set for equality. Returns 1.457 + * <tt>true</tt> if the specified set is not equal to this set. 1.458 + * @stable ICU 2.0 1.459 + */ 1.460 + UBool operator!=(const UnicodeSet& o) const; 1.461 + 1.462 + /** 1.463 + * Returns a copy of this object. All UnicodeFunctor objects have 1.464 + * to support cloning in order to allow classes using 1.465 + * UnicodeFunctors, such as Transliterator, to implement cloning. 1.466 + * If this set is frozen, then the clone will be frozen as well. 1.467 + * Use cloneAsThawed() for a mutable clone of a frozen set. 1.468 + * @see cloneAsThawed 1.469 + * @stable ICU 2.0 1.470 + */ 1.471 + virtual UnicodeFunctor* clone() const; 1.472 + 1.473 + /** 1.474 + * Returns the hash code value for this set. 1.475 + * 1.476 + * @return the hash code value for this set. 1.477 + * @see Object#hashCode() 1.478 + * @stable ICU 2.0 1.479 + */ 1.480 + virtual int32_t hashCode(void) const; 1.481 + 1.482 + /** 1.483 + * Get a UnicodeSet pointer from a USet 1.484 + * 1.485 + * @param uset a USet (the ICU plain C type for UnicodeSet) 1.486 + * @return the corresponding UnicodeSet pointer. 1.487 + * 1.488 + * @stable ICU 4.2 1.489 + */ 1.490 + inline static UnicodeSet *fromUSet(USet *uset); 1.491 + 1.492 + /** 1.493 + * Get a UnicodeSet pointer from a const USet 1.494 + * 1.495 + * @param uset a const USet (the ICU plain C type for UnicodeSet) 1.496 + * @return the corresponding UnicodeSet pointer. 1.497 + * 1.498 + * @stable ICU 4.2 1.499 + */ 1.500 + inline static const UnicodeSet *fromUSet(const USet *uset); 1.501 + 1.502 + /** 1.503 + * Produce a USet * pointer for this UnicodeSet. 1.504 + * USet is the plain C type for UnicodeSet 1.505 + * 1.506 + * @return a USet pointer for this UnicodeSet 1.507 + * @stable ICU 4.2 1.508 + */ 1.509 + inline USet *toUSet(); 1.510 + 1.511 + 1.512 + /** 1.513 + * Produce a const USet * pointer for this UnicodeSet. 1.514 + * USet is the plain C type for UnicodeSet 1.515 + * 1.516 + * @return a const USet pointer for this UnicodeSet 1.517 + * @stable ICU 4.2 1.518 + */ 1.519 + inline const USet * toUSet() const; 1.520 + 1.521 + 1.522 + //---------------------------------------------------------------- 1.523 + // Freezable API 1.524 + //---------------------------------------------------------------- 1.525 + 1.526 + /** 1.527 + * Determines whether the set has been frozen (made immutable) or not. 1.528 + * See the ICU4J Freezable interface for details. 1.529 + * @return TRUE/FALSE for whether the set has been frozen 1.530 + * @see freeze 1.531 + * @see cloneAsThawed 1.532 + * @stable ICU 3.8 1.533 + */ 1.534 + inline UBool isFrozen() const; 1.535 + 1.536 + /** 1.537 + * Freeze the set (make it immutable). 1.538 + * Once frozen, it cannot be unfrozen and is therefore thread-safe 1.539 + * until it is deleted. 1.540 + * See the ICU4J Freezable interface for details. 1.541 + * Freezing the set may also make some operations faster, for example 1.542 + * contains() and span(). 1.543 + * A frozen set will not be modified. (It remains frozen.) 1.544 + * @return this set. 1.545 + * @see isFrozen 1.546 + * @see cloneAsThawed 1.547 + * @stable ICU 3.8 1.548 + */ 1.549 + UnicodeFunctor *freeze(); 1.550 + 1.551 + /** 1.552 + * Clone the set and make the clone mutable. 1.553 + * See the ICU4J Freezable interface for details. 1.554 + * @return the mutable clone 1.555 + * @see freeze 1.556 + * @see isFrozen 1.557 + * @stable ICU 3.8 1.558 + */ 1.559 + UnicodeFunctor *cloneAsThawed() const; 1.560 + 1.561 + //---------------------------------------------------------------- 1.562 + // Public API 1.563 + //---------------------------------------------------------------- 1.564 + 1.565 + /** 1.566 + * Make this object represent the range <code>start - end</code>. 1.567 + * If <code>end > start</code> then this object is set to an 1.568 + * an empty range. 1.569 + * A frozen set will not be modified. 1.570 + * 1.571 + * @param start first character in the set, inclusive 1.572 + * @param end last character in the set, inclusive 1.573 + * @stable ICU 2.4 1.574 + */ 1.575 + UnicodeSet& set(UChar32 start, UChar32 end); 1.576 + 1.577 + /** 1.578 + * Return true if the given position, in the given pattern, appears 1.579 + * to be the start of a UnicodeSet pattern. 1.580 + * @stable ICU 2.4 1.581 + */ 1.582 + static UBool resemblesPattern(const UnicodeString& pattern, 1.583 + int32_t pos); 1.584 + 1.585 + /** 1.586 + * Modifies this set to represent the set specified by the given 1.587 + * pattern, ignoring Unicode Pattern_White_Space characters. 1.588 + * See the class description for the syntax of the pattern language. 1.589 + * A frozen set will not be modified. 1.590 + * @param pattern a string specifying what characters are in the set 1.591 + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern 1.592 + * contains a syntax error. 1.593 + * <em> Empties the set passed before applying the pattern.</em> 1.594 + * @return a reference to this 1.595 + * @stable ICU 2.0 1.596 + */ 1.597 + UnicodeSet& applyPattern(const UnicodeString& pattern, 1.598 + UErrorCode& status); 1.599 + 1.600 +#ifndef U_HIDE_INTERNAL_API 1.601 + /** 1.602 + * Modifies this set to represent the set specified by the given 1.603 + * pattern, optionally ignoring Unicode Pattern_White_Space characters. 1.604 + * See the class description for the syntax of the pattern language. 1.605 + * A frozen set will not be modified. 1.606 + * @param pattern a string specifying what characters are in the set 1.607 + * @param options bitmask for options to apply to the pattern. 1.608 + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. 1.609 + * @param symbols a symbol table mapping variable names to 1.610 + * values and stand-ins to UnicodeSets; may be NULL 1.611 + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern 1.612 + * contains a syntax error. 1.613 + *<em> Empties the set passed before applying the pattern.</em> 1.614 + * @return a reference to this 1.615 + * @internal 1.616 + */ 1.617 + UnicodeSet& applyPattern(const UnicodeString& pattern, 1.618 + uint32_t options, 1.619 + const SymbolTable* symbols, 1.620 + UErrorCode& status); 1.621 +#endif /* U_HIDE_INTERNAL_API */ 1.622 + 1.623 + /** 1.624 + * Parses the given pattern, starting at the given position. The 1.625 + * character at pattern.charAt(pos.getIndex()) must be '[', or the 1.626 + * parse fails. Parsing continues until the corresponding closing 1.627 + * ']'. If a syntax error is encountered between the opening and 1.628 + * closing brace, the parse fails. Upon return from a successful 1.629 + * parse, the ParsePosition is updated to point to the character 1.630 + * following the closing ']', and a StringBuffer containing a 1.631 + * pairs list for the parsed pattern is returned. This method calls 1.632 + * itself recursively to parse embedded subpatterns. 1.633 + *<em> Empties the set passed before applying the pattern.</em> 1.634 + * A frozen set will not be modified. 1.635 + * 1.636 + * @param pattern the string containing the pattern to be parsed. 1.637 + * The portion of the string from pos.getIndex(), which must be a 1.638 + * '[', to the corresponding closing ']', is parsed. 1.639 + * @param pos upon entry, the position at which to being parsing. 1.640 + * The character at pattern.charAt(pos.getIndex()) must be a '['. 1.641 + * Upon return from a successful parse, pos.getIndex() is either 1.642 + * the character after the closing ']' of the parsed pattern, or 1.643 + * pattern.length() if the closing ']' is the last character of 1.644 + * the pattern string. 1.645 + * @param options bitmask for options to apply to the pattern. 1.646 + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. 1.647 + * @param symbols a symbol table mapping variable names to 1.648 + * values and stand-ins to UnicodeSets; may be NULL 1.649 + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern 1.650 + * contains a syntax error. 1.651 + * @return a reference to this 1.652 + * @stable ICU 2.8 1.653 + */ 1.654 + UnicodeSet& applyPattern(const UnicodeString& pattern, 1.655 + ParsePosition& pos, 1.656 + uint32_t options, 1.657 + const SymbolTable* symbols, 1.658 + UErrorCode& status); 1.659 + 1.660 + /** 1.661 + * Returns a string representation of this set. If the result of 1.662 + * calling this function is passed to a UnicodeSet constructor, it 1.663 + * will produce another set that is equal to this one. 1.664 + * A frozen set will not be modified. 1.665 + * @param result the string to receive the rules. Previous 1.666 + * contents will be deleted. 1.667 + * @param escapeUnprintable if TRUE then convert unprintable 1.668 + * character to their hex escape representations, \\uxxxx or 1.669 + * \\Uxxxxxxxx. Unprintable characters are those other than 1.670 + * U+000A, U+0020..U+007E. 1.671 + * @stable ICU 2.0 1.672 + */ 1.673 + virtual UnicodeString& toPattern(UnicodeString& result, 1.674 + UBool escapeUnprintable = FALSE) const; 1.675 + 1.676 + /** 1.677 + * Modifies this set to contain those code points which have the given value 1.678 + * for the given binary or enumerated property, as returned by 1.679 + * u_getIntPropertyValue. Prior contents of this set are lost. 1.680 + * A frozen set will not be modified. 1.681 + * 1.682 + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 1.683 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 1.684 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. 1.685 + * 1.686 + * @param value a value in the range u_getIntPropertyMinValue(prop).. 1.687 + * u_getIntPropertyMaxValue(prop), with one exception. If prop is 1.688 + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but 1.689 + * rather a mask value produced by U_GET_GC_MASK(). This allows grouped 1.690 + * categories such as [:L:] to be represented. 1.691 + * 1.692 + * @param ec error code input/output parameter 1.693 + * 1.694 + * @return a reference to this set 1.695 + * 1.696 + * @stable ICU 2.4 1.697 + */ 1.698 + UnicodeSet& applyIntPropertyValue(UProperty prop, 1.699 + int32_t value, 1.700 + UErrorCode& ec); 1.701 + 1.702 + /** 1.703 + * Modifies this set to contain those code points which have the 1.704 + * given value for the given property. Prior contents of this 1.705 + * set are lost. 1.706 + * A frozen set will not be modified. 1.707 + * 1.708 + * @param prop a property alias, either short or long. The name is matched 1.709 + * loosely. See PropertyAliases.txt for names and a description of loose 1.710 + * matching. If the value string is empty, then this string is interpreted 1.711 + * as either a General_Category value alias, a Script value alias, a binary 1.712 + * property alias, or a special ID. Special IDs are matched loosely and 1.713 + * correspond to the following sets: 1.714 + * 1.715 + * "ANY" = [\\u0000-\\U0010FFFF], 1.716 + * "ASCII" = [\\u0000-\\u007F], 1.717 + * "Assigned" = [:^Cn:]. 1.718 + * 1.719 + * @param value a value alias, either short or long. The name is matched 1.720 + * loosely. See PropertyValueAliases.txt for names and a description of 1.721 + * loose matching. In addition to aliases listed, numeric values and 1.722 + * canonical combining classes may be expressed numerically, e.g., ("nv", 1.723 + * "0.5") or ("ccc", "220"). The value string may also be empty. 1.724 + * 1.725 + * @param ec error code input/output parameter 1.726 + * 1.727 + * @return a reference to this set 1.728 + * 1.729 + * @stable ICU 2.4 1.730 + */ 1.731 + UnicodeSet& applyPropertyAlias(const UnicodeString& prop, 1.732 + const UnicodeString& value, 1.733 + UErrorCode& ec); 1.734 + 1.735 + /** 1.736 + * Returns the number of elements in this set (its cardinality). 1.737 + * Note than the elements of a set may include both individual 1.738 + * codepoints and strings. 1.739 + * 1.740 + * @return the number of elements in this set (its cardinality). 1.741 + * @stable ICU 2.0 1.742 + */ 1.743 + virtual int32_t size(void) const; 1.744 + 1.745 + /** 1.746 + * Returns <tt>true</tt> if this set contains no elements. 1.747 + * 1.748 + * @return <tt>true</tt> if this set contains no elements. 1.749 + * @stable ICU 2.0 1.750 + */ 1.751 + virtual UBool isEmpty(void) const; 1.752 + 1.753 + /** 1.754 + * Returns true if this set contains the given character. 1.755 + * This function works faster with a frozen set. 1.756 + * @param c character to be checked for containment 1.757 + * @return true if the test condition is met 1.758 + * @stable ICU 2.0 1.759 + */ 1.760 + virtual UBool contains(UChar32 c) const; 1.761 + 1.762 + /** 1.763 + * Returns true if this set contains every character 1.764 + * of the given range. 1.765 + * @param start first character, inclusive, of the range 1.766 + * @param end last character, inclusive, of the range 1.767 + * @return true if the test condition is met 1.768 + * @stable ICU 2.0 1.769 + */ 1.770 + virtual UBool contains(UChar32 start, UChar32 end) const; 1.771 + 1.772 + /** 1.773 + * Returns <tt>true</tt> if this set contains the given 1.774 + * multicharacter string. 1.775 + * @param s string to be checked for containment 1.776 + * @return <tt>true</tt> if this set contains the specified string 1.777 + * @stable ICU 2.4 1.778 + */ 1.779 + UBool contains(const UnicodeString& s) const; 1.780 + 1.781 + /** 1.782 + * Returns true if this set contains all the characters and strings 1.783 + * of the given set. 1.784 + * @param c set to be checked for containment 1.785 + * @return true if the test condition is met 1.786 + * @stable ICU 2.4 1.787 + */ 1.788 + virtual UBool containsAll(const UnicodeSet& c) const; 1.789 + 1.790 + /** 1.791 + * Returns true if this set contains all the characters 1.792 + * of the given string. 1.793 + * @param s string containing characters to be checked for containment 1.794 + * @return true if the test condition is met 1.795 + * @stable ICU 2.4 1.796 + */ 1.797 + UBool containsAll(const UnicodeString& s) const; 1.798 + 1.799 + /** 1.800 + * Returns true if this set contains none of the characters 1.801 + * of the given range. 1.802 + * @param start first character, inclusive, of the range 1.803 + * @param end last character, inclusive, of the range 1.804 + * @return true if the test condition is met 1.805 + * @stable ICU 2.4 1.806 + */ 1.807 + UBool containsNone(UChar32 start, UChar32 end) const; 1.808 + 1.809 + /** 1.810 + * Returns true if this set contains none of the characters and strings 1.811 + * of the given set. 1.812 + * @param c set to be checked for containment 1.813 + * @return true if the test condition is met 1.814 + * @stable ICU 2.4 1.815 + */ 1.816 + UBool containsNone(const UnicodeSet& c) const; 1.817 + 1.818 + /** 1.819 + * Returns true if this set contains none of the characters 1.820 + * of the given string. 1.821 + * @param s string containing characters to be checked for containment 1.822 + * @return true if the test condition is met 1.823 + * @stable ICU 2.4 1.824 + */ 1.825 + UBool containsNone(const UnicodeString& s) const; 1.826 + 1.827 + /** 1.828 + * Returns true if this set contains one or more of the characters 1.829 + * in the given range. 1.830 + * @param start first character, inclusive, of the range 1.831 + * @param end last character, inclusive, of the range 1.832 + * @return true if the condition is met 1.833 + * @stable ICU 2.4 1.834 + */ 1.835 + inline UBool containsSome(UChar32 start, UChar32 end) const; 1.836 + 1.837 + /** 1.838 + * Returns true if this set contains one or more of the characters 1.839 + * and strings of the given set. 1.840 + * @param s The set to be checked for containment 1.841 + * @return true if the condition is met 1.842 + * @stable ICU 2.4 1.843 + */ 1.844 + inline UBool containsSome(const UnicodeSet& s) const; 1.845 + 1.846 + /** 1.847 + * Returns true if this set contains one or more of the characters 1.848 + * of the given string. 1.849 + * @param s string containing characters to be checked for containment 1.850 + * @return true if the condition is met 1.851 + * @stable ICU 2.4 1.852 + */ 1.853 + inline UBool containsSome(const UnicodeString& s) const; 1.854 + 1.855 + /** 1.856 + * Returns the length of the initial substring of the input string which 1.857 + * consists only of characters and strings that are contained in this set 1.858 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1.859 + * or only of characters and strings that are not contained 1.860 + * in this set (USET_SPAN_NOT_CONTAINED). 1.861 + * See USetSpanCondition for details. 1.862 + * Similar to the strspn() C library function. 1.863 + * Unpaired surrogates are treated according to contains() of their surrogate code points. 1.864 + * This function works faster with a frozen set and with a non-negative string length argument. 1.865 + * @param s start of the string 1.866 + * @param length of the string; can be -1 for NUL-terminated 1.867 + * @param spanCondition specifies the containment condition 1.868 + * @return the length of the initial substring according to the spanCondition; 1.869 + * 0 if the start of the string does not fit the spanCondition 1.870 + * @stable ICU 3.8 1.871 + * @see USetSpanCondition 1.872 + */ 1.873 + int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; 1.874 + 1.875 + /** 1.876 + * Returns the end of the substring of the input string according to the USetSpanCondition. 1.877 + * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code> 1.878 + * after pinning start to 0<=start<=s.length(). 1.879 + * @param s the string 1.880 + * @param start the start index in the string for the span operation 1.881 + * @param spanCondition specifies the containment condition 1.882 + * @return the exclusive end of the substring according to the spanCondition; 1.883 + * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition 1.884 + * @stable ICU 4.4 1.885 + * @see USetSpanCondition 1.886 + */ 1.887 + inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const; 1.888 + 1.889 + /** 1.890 + * Returns the start of the trailing substring of the input string which 1.891 + * consists only of characters and strings that are contained in this set 1.892 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1.893 + * or only of characters and strings that are not contained 1.894 + * in this set (USET_SPAN_NOT_CONTAINED). 1.895 + * See USetSpanCondition for details. 1.896 + * Unpaired surrogates are treated according to contains() of their surrogate code points. 1.897 + * This function works faster with a frozen set and with a non-negative string length argument. 1.898 + * @param s start of the string 1.899 + * @param length of the string; can be -1 for NUL-terminated 1.900 + * @param spanCondition specifies the containment condition 1.901 + * @return the start of the trailing substring according to the spanCondition; 1.902 + * the string length if the end of the string does not fit the spanCondition 1.903 + * @stable ICU 3.8 1.904 + * @see USetSpanCondition 1.905 + */ 1.906 + int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; 1.907 + 1.908 + /** 1.909 + * Returns the start of the substring of the input string according to the USetSpanCondition. 1.910 + * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code> 1.911 + * after pinning limit to 0<=end<=s.length(). 1.912 + * @param s the string 1.913 + * @param limit the exclusive-end index in the string for the span operation 1.914 + * (use s.length() or INT32_MAX for spanning back from the end of the string) 1.915 + * @param spanCondition specifies the containment condition 1.916 + * @return the start of the substring according to the spanCondition; 1.917 + * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition 1.918 + * @stable ICU 4.4 1.919 + * @see USetSpanCondition 1.920 + */ 1.921 + inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const; 1.922 + 1.923 + /** 1.924 + * Returns the length of the initial substring of the input string which 1.925 + * consists only of characters and strings that are contained in this set 1.926 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1.927 + * or only of characters and strings that are not contained 1.928 + * in this set (USET_SPAN_NOT_CONTAINED). 1.929 + * See USetSpanCondition for details. 1.930 + * Similar to the strspn() C library function. 1.931 + * Malformed byte sequences are treated according to contains(0xfffd). 1.932 + * This function works faster with a frozen set and with a non-negative string length argument. 1.933 + * @param s start of the string (UTF-8) 1.934 + * @param length of the string; can be -1 for NUL-terminated 1.935 + * @param spanCondition specifies the containment condition 1.936 + * @return the length of the initial substring according to the spanCondition; 1.937 + * 0 if the start of the string does not fit the spanCondition 1.938 + * @stable ICU 3.8 1.939 + * @see USetSpanCondition 1.940 + */ 1.941 + int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; 1.942 + 1.943 + /** 1.944 + * Returns the start of the trailing substring of the input string which 1.945 + * consists only of characters and strings that are contained in this set 1.946 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), 1.947 + * or only of characters and strings that are not contained 1.948 + * in this set (USET_SPAN_NOT_CONTAINED). 1.949 + * See USetSpanCondition for details. 1.950 + * Malformed byte sequences are treated according to contains(0xfffd). 1.951 + * This function works faster with a frozen set and with a non-negative string length argument. 1.952 + * @param s start of the string (UTF-8) 1.953 + * @param length of the string; can be -1 for NUL-terminated 1.954 + * @param spanCondition specifies the containment condition 1.955 + * @return the start of the trailing substring according to the spanCondition; 1.956 + * the string length if the end of the string does not fit the spanCondition 1.957 + * @stable ICU 3.8 1.958 + * @see USetSpanCondition 1.959 + */ 1.960 + int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; 1.961 + 1.962 + /** 1.963 + * Implement UnicodeMatcher::matches() 1.964 + * @stable ICU 2.4 1.965 + */ 1.966 + virtual UMatchDegree matches(const Replaceable& text, 1.967 + int32_t& offset, 1.968 + int32_t limit, 1.969 + UBool incremental); 1.970 + 1.971 +private: 1.972 + /** 1.973 + * Returns the longest match for s in text at the given position. 1.974 + * If limit > start then match forward from start+1 to limit 1.975 + * matching all characters except s.charAt(0). If limit < start, 1.976 + * go backward starting from start-1 matching all characters 1.977 + * except s.charAt(s.length()-1). This method assumes that the 1.978 + * first character, text.charAt(start), matches s, so it does not 1.979 + * check it. 1.980 + * @param text the text to match 1.981 + * @param start the first character to match. In the forward 1.982 + * direction, text.charAt(start) is matched against s.charAt(0). 1.983 + * In the reverse direction, it is matched against 1.984 + * s.charAt(s.length()-1). 1.985 + * @param limit the limit offset for matching, either last+1 in 1.986 + * the forward direction, or last-1 in the reverse direction, 1.987 + * where last is the index of the last character to match. 1.988 + * @param s 1.989 + * @return If part of s matches up to the limit, return |limit - 1.990 + * start|. If all of s matches before reaching the limit, return 1.991 + * s.length(). If there is a mismatch between s and text, return 1.992 + * 0 1.993 + */ 1.994 + static int32_t matchRest(const Replaceable& text, 1.995 + int32_t start, int32_t limit, 1.996 + const UnicodeString& s); 1.997 + 1.998 + /** 1.999 + * Returns the smallest value i such that c < list[i]. Caller 1.1000 + * must ensure that c is a legal value or this method will enter 1.1001 + * an infinite loop. This method performs a binary search. 1.1002 + * @param c a character in the range MIN_VALUE..MAX_VALUE 1.1003 + * inclusive 1.1004 + * @return the smallest integer i in the range 0..len-1, 1.1005 + * inclusive, such that c < list[i] 1.1006 + */ 1.1007 + int32_t findCodePoint(UChar32 c) const; 1.1008 + 1.1009 +public: 1.1010 + 1.1011 + /** 1.1012 + * Implementation of UnicodeMatcher API. Union the set of all 1.1013 + * characters that may be matched by this object into the given 1.1014 + * set. 1.1015 + * @param toUnionTo the set into which to union the source characters 1.1016 + * @stable ICU 2.4 1.1017 + */ 1.1018 + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; 1.1019 + 1.1020 + /** 1.1021 + * Returns the index of the given character within this set, where 1.1022 + * the set is ordered by ascending code point. If the character 1.1023 + * is not in this set, return -1. The inverse of this method is 1.1024 + * <code>charAt()</code>. 1.1025 + * @return an index from 0..size()-1, or -1 1.1026 + * @stable ICU 2.4 1.1027 + */ 1.1028 + int32_t indexOf(UChar32 c) const; 1.1029 + 1.1030 + /** 1.1031 + * Returns the character at the given index within this set, where 1.1032 + * the set is ordered by ascending code point. If the index is 1.1033 + * out of range, return (UChar32)-1. The inverse of this method is 1.1034 + * <code>indexOf()</code>. 1.1035 + * @param index an index from 0..size()-1 1.1036 + * @return the character at the given index, or (UChar32)-1. 1.1037 + * @stable ICU 2.4 1.1038 + */ 1.1039 + UChar32 charAt(int32_t index) const; 1.1040 + 1.1041 + /** 1.1042 + * Adds the specified range to this set if it is not already 1.1043 + * present. If this set already contains the specified range, 1.1044 + * the call leaves this set unchanged. If <code>end > start</code> 1.1045 + * then an empty range is added, leaving the set unchanged. 1.1046 + * This is equivalent to a boolean logic OR, or a set UNION. 1.1047 + * A frozen set will not be modified. 1.1048 + * 1.1049 + * @param start first character, inclusive, of range to be added 1.1050 + * to this set. 1.1051 + * @param end last character, inclusive, of range to be added 1.1052 + * to this set. 1.1053 + * @stable ICU 2.0 1.1054 + */ 1.1055 + virtual UnicodeSet& add(UChar32 start, UChar32 end); 1.1056 + 1.1057 + /** 1.1058 + * Adds the specified character to this set if it is not already 1.1059 + * present. If this set already contains the specified character, 1.1060 + * the call leaves this set unchanged. 1.1061 + * A frozen set will not be modified. 1.1062 + * @stable ICU 2.0 1.1063 + */ 1.1064 + UnicodeSet& add(UChar32 c); 1.1065 + 1.1066 + /** 1.1067 + * Adds the specified multicharacter to this set if it is not already 1.1068 + * present. If this set already contains the multicharacter, 1.1069 + * the call leaves this set unchanged. 1.1070 + * Thus "ch" => {"ch"} 1.1071 + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1.1072 + * A frozen set will not be modified. 1.1073 + * @param s the source string 1.1074 + * @return this object, for chaining 1.1075 + * @stable ICU 2.4 1.1076 + */ 1.1077 + UnicodeSet& add(const UnicodeString& s); 1.1078 + 1.1079 + private: 1.1080 + /** 1.1081 + * @return a code point IF the string consists of a single one. 1.1082 + * otherwise returns -1. 1.1083 + * @param s string to test 1.1084 + */ 1.1085 + static int32_t getSingleCP(const UnicodeString& s); 1.1086 + 1.1087 + void _add(const UnicodeString& s); 1.1088 + 1.1089 + public: 1.1090 + /** 1.1091 + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} 1.1092 + * If this set already any particular character, it has no effect on that character. 1.1093 + * A frozen set will not be modified. 1.1094 + * @param s the source string 1.1095 + * @return this object, for chaining 1.1096 + * @stable ICU 2.4 1.1097 + */ 1.1098 + UnicodeSet& addAll(const UnicodeString& s); 1.1099 + 1.1100 + /** 1.1101 + * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 1.1102 + * If this set already any particular character, it has no effect on that character. 1.1103 + * A frozen set will not be modified. 1.1104 + * @param s the source string 1.1105 + * @return this object, for chaining 1.1106 + * @stable ICU 2.4 1.1107 + */ 1.1108 + UnicodeSet& retainAll(const UnicodeString& s); 1.1109 + 1.1110 + /** 1.1111 + * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} 1.1112 + * If this set already any particular character, it has no effect on that character. 1.1113 + * A frozen set will not be modified. 1.1114 + * @param s the source string 1.1115 + * @return this object, for chaining 1.1116 + * @stable ICU 2.4 1.1117 + */ 1.1118 + UnicodeSet& complementAll(const UnicodeString& s); 1.1119 + 1.1120 + /** 1.1121 + * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} 1.1122 + * If this set already any particular character, it has no effect on that character. 1.1123 + * A frozen set will not be modified. 1.1124 + * @param s the source string 1.1125 + * @return this object, for chaining 1.1126 + * @stable ICU 2.4 1.1127 + */ 1.1128 + UnicodeSet& removeAll(const UnicodeString& s); 1.1129 + 1.1130 + /** 1.1131 + * Makes a set from a multicharacter string. Thus "ch" => {"ch"} 1.1132 + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1.1133 + * @param s the source string 1.1134 + * @return a newly created set containing the given string. 1.1135 + * The caller owns the return object and is responsible for deleting it. 1.1136 + * @stable ICU 2.4 1.1137 + */ 1.1138 + static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s); 1.1139 + 1.1140 + 1.1141 + /** 1.1142 + * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} 1.1143 + * @param s the source string 1.1144 + * @return a newly created set containing the given characters 1.1145 + * The caller owns the return object and is responsible for deleting it. 1.1146 + * @stable ICU 2.4 1.1147 + */ 1.1148 + static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s); 1.1149 + 1.1150 + /** 1.1151 + * Retain only the elements in this set that are contained in the 1.1152 + * specified range. If <code>end > start</code> then an empty range is 1.1153 + * retained, leaving the set empty. This is equivalent to 1.1154 + * a boolean logic AND, or a set INTERSECTION. 1.1155 + * A frozen set will not be modified. 1.1156 + * 1.1157 + * @param start first character, inclusive, of range to be retained 1.1158 + * to this set. 1.1159 + * @param end last character, inclusive, of range to be retained 1.1160 + * to this set. 1.1161 + * @stable ICU 2.0 1.1162 + */ 1.1163 + virtual UnicodeSet& retain(UChar32 start, UChar32 end); 1.1164 + 1.1165 + 1.1166 + /** 1.1167 + * Retain the specified character from this set if it is present. 1.1168 + * A frozen set will not be modified. 1.1169 + * @stable ICU 2.0 1.1170 + */ 1.1171 + UnicodeSet& retain(UChar32 c); 1.1172 + 1.1173 + /** 1.1174 + * Removes the specified range from this set if it is present. 1.1175 + * The set will not contain the specified range once the call 1.1176 + * returns. If <code>end > start</code> then an empty range is 1.1177 + * removed, leaving the set unchanged. 1.1178 + * A frozen set will not be modified. 1.1179 + * 1.1180 + * @param start first character, inclusive, of range to be removed 1.1181 + * from this set. 1.1182 + * @param end last character, inclusive, of range to be removed 1.1183 + * from this set. 1.1184 + * @stable ICU 2.0 1.1185 + */ 1.1186 + virtual UnicodeSet& remove(UChar32 start, UChar32 end); 1.1187 + 1.1188 + /** 1.1189 + * Removes the specified character from this set if it is present. 1.1190 + * The set will not contain the specified range once the call 1.1191 + * returns. 1.1192 + * A frozen set will not be modified. 1.1193 + * @stable ICU 2.0 1.1194 + */ 1.1195 + UnicodeSet& remove(UChar32 c); 1.1196 + 1.1197 + /** 1.1198 + * Removes the specified string from this set if it is present. 1.1199 + * The set will not contain the specified character once the call 1.1200 + * returns. 1.1201 + * A frozen set will not be modified. 1.1202 + * @param s the source string 1.1203 + * @return this object, for chaining 1.1204 + * @stable ICU 2.4 1.1205 + */ 1.1206 + UnicodeSet& remove(const UnicodeString& s); 1.1207 + 1.1208 + /** 1.1209 + * Inverts this set. This operation modifies this set so that 1.1210 + * its value is its complement. This is equivalent to 1.1211 + * <code>complement(MIN_VALUE, MAX_VALUE)</code>. 1.1212 + * A frozen set will not be modified. 1.1213 + * @stable ICU 2.0 1.1214 + */ 1.1215 + virtual UnicodeSet& complement(void); 1.1216 + 1.1217 + /** 1.1218 + * Complements the specified range in this set. Any character in 1.1219 + * the range will be removed if it is in this set, or will be 1.1220 + * added if it is not in this set. If <code>end > start</code> 1.1221 + * then an empty range is complemented, leaving the set unchanged. 1.1222 + * This is equivalent to a boolean logic XOR. 1.1223 + * A frozen set will not be modified. 1.1224 + * 1.1225 + * @param start first character, inclusive, of range to be removed 1.1226 + * from this set. 1.1227 + * @param end last character, inclusive, of range to be removed 1.1228 + * from this set. 1.1229 + * @stable ICU 2.0 1.1230 + */ 1.1231 + virtual UnicodeSet& complement(UChar32 start, UChar32 end); 1.1232 + 1.1233 + /** 1.1234 + * Complements the specified character in this set. The character 1.1235 + * will be removed if it is in this set, or will be added if it is 1.1236 + * not in this set. 1.1237 + * A frozen set will not be modified. 1.1238 + * @stable ICU 2.0 1.1239 + */ 1.1240 + UnicodeSet& complement(UChar32 c); 1.1241 + 1.1242 + /** 1.1243 + * Complement the specified string in this set. 1.1244 + * The set will not contain the specified string once the call 1.1245 + * returns. 1.1246 + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1.1247 + * A frozen set will not be modified. 1.1248 + * @param s the string to complement 1.1249 + * @return this object, for chaining 1.1250 + * @stable ICU 2.4 1.1251 + */ 1.1252 + UnicodeSet& complement(const UnicodeString& s); 1.1253 + 1.1254 + /** 1.1255 + * Adds all of the elements in the specified set to this set if 1.1256 + * they're not already present. This operation effectively 1.1257 + * modifies this set so that its value is the <i>union</i> of the two 1.1258 + * sets. The behavior of this operation is unspecified if the specified 1.1259 + * collection is modified while the operation is in progress. 1.1260 + * A frozen set will not be modified. 1.1261 + * 1.1262 + * @param c set whose elements are to be added to this set. 1.1263 + * @see #add(UChar32, UChar32) 1.1264 + * @stable ICU 2.0 1.1265 + */ 1.1266 + virtual UnicodeSet& addAll(const UnicodeSet& c); 1.1267 + 1.1268 + /** 1.1269 + * Retains only the elements in this set that are contained in the 1.1270 + * specified set. In other words, removes from this set all of 1.1271 + * its elements that are not contained in the specified set. This 1.1272 + * operation effectively modifies this set so that its value is 1.1273 + * the <i>intersection</i> of the two sets. 1.1274 + * A frozen set will not be modified. 1.1275 + * 1.1276 + * @param c set that defines which elements this set will retain. 1.1277 + * @stable ICU 2.0 1.1278 + */ 1.1279 + virtual UnicodeSet& retainAll(const UnicodeSet& c); 1.1280 + 1.1281 + /** 1.1282 + * Removes from this set all of its elements that are contained in the 1.1283 + * specified set. This operation effectively modifies this 1.1284 + * set so that its value is the <i>asymmetric set difference</i> of 1.1285 + * the two sets. 1.1286 + * A frozen set will not be modified. 1.1287 + * 1.1288 + * @param c set that defines which elements will be removed from 1.1289 + * this set. 1.1290 + * @stable ICU 2.0 1.1291 + */ 1.1292 + virtual UnicodeSet& removeAll(const UnicodeSet& c); 1.1293 + 1.1294 + /** 1.1295 + * Complements in this set all elements contained in the specified 1.1296 + * set. Any character in the other set will be removed if it is 1.1297 + * in this set, or will be added if it is not in this set. 1.1298 + * A frozen set will not be modified. 1.1299 + * 1.1300 + * @param c set that defines which elements will be xor'ed from 1.1301 + * this set. 1.1302 + * @stable ICU 2.4 1.1303 + */ 1.1304 + virtual UnicodeSet& complementAll(const UnicodeSet& c); 1.1305 + 1.1306 + /** 1.1307 + * Removes all of the elements from this set. This set will be 1.1308 + * empty after this call returns. 1.1309 + * A frozen set will not be modified. 1.1310 + * @stable ICU 2.0 1.1311 + */ 1.1312 + virtual UnicodeSet& clear(void); 1.1313 + 1.1314 + /** 1.1315 + * Close this set over the given attribute. For the attribute 1.1316 + * USET_CASE, the result is to modify this set so that: 1.1317 + * 1.1318 + * 1. For each character or string 'a' in this set, all strings or 1.1319 + * characters 'b' such that foldCase(a) == foldCase(b) are added 1.1320 + * to this set. 1.1321 + * 1.1322 + * 2. For each string 'e' in the resulting set, if e != 1.1323 + * foldCase(e), 'e' will be removed. 1.1324 + * 1.1325 + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] 1.1326 + * 1.1327 + * (Here foldCase(x) refers to the operation u_strFoldCase, and a 1.1328 + * == b denotes that the contents are the same, not pointer 1.1329 + * comparison.) 1.1330 + * 1.1331 + * A frozen set will not be modified. 1.1332 + * 1.1333 + * @param attribute bitmask for attributes to close over. 1.1334 + * Currently only the USET_CASE bit is supported. Any undefined bits 1.1335 + * are ignored. 1.1336 + * @return a reference to this set. 1.1337 + * @stable ICU 4.2 1.1338 + */ 1.1339 + UnicodeSet& closeOver(int32_t attribute); 1.1340 + 1.1341 + /** 1.1342 + * Remove all strings from this set. 1.1343 + * 1.1344 + * @return a reference to this set. 1.1345 + * @stable ICU 4.2 1.1346 + */ 1.1347 + virtual UnicodeSet &removeAllStrings(); 1.1348 + 1.1349 + /** 1.1350 + * Iteration method that returns the number of ranges contained in 1.1351 + * this set. 1.1352 + * @see #getRangeStart 1.1353 + * @see #getRangeEnd 1.1354 + * @stable ICU 2.4 1.1355 + */ 1.1356 + virtual int32_t getRangeCount(void) const; 1.1357 + 1.1358 + /** 1.1359 + * Iteration method that returns the first character in the 1.1360 + * specified range of this set. 1.1361 + * @see #getRangeCount 1.1362 + * @see #getRangeEnd 1.1363 + * @stable ICU 2.4 1.1364 + */ 1.1365 + virtual UChar32 getRangeStart(int32_t index) const; 1.1366 + 1.1367 + /** 1.1368 + * Iteration method that returns the last character in the 1.1369 + * specified range of this set. 1.1370 + * @see #getRangeStart 1.1371 + * @see #getRangeEnd 1.1372 + * @stable ICU 2.4 1.1373 + */ 1.1374 + virtual UChar32 getRangeEnd(int32_t index) const; 1.1375 + 1.1376 + /** 1.1377 + * Serializes this set into an array of 16-bit integers. Serialization 1.1378 + * (currently) only records the characters in the set; multicharacter 1.1379 + * strings are ignored. 1.1380 + * 1.1381 + * The array has following format (each line is one 16-bit 1.1382 + * integer): 1.1383 + * 1.1384 + * length = (n+2*m) | (m!=0?0x8000:0) 1.1385 + * bmpLength = n; present if m!=0 1.1386 + * bmp[0] 1.1387 + * bmp[1] 1.1388 + * ... 1.1389 + * bmp[n-1] 1.1390 + * supp-high[0] 1.1391 + * supp-low[0] 1.1392 + * supp-high[1] 1.1393 + * supp-low[1] 1.1394 + * ... 1.1395 + * supp-high[m-1] 1.1396 + * supp-low[m-1] 1.1397 + * 1.1398 + * The array starts with a header. After the header are n bmp 1.1399 + * code points, then m supplementary code points. Either n or m 1.1400 + * or both may be zero. n+2*m is always <= 0x7FFF. 1.1401 + * 1.1402 + * If there are no supplementary characters (if m==0) then the 1.1403 + * header is one 16-bit integer, 'length', with value n. 1.1404 + * 1.1405 + * If there are supplementary characters (if m!=0) then the header 1.1406 + * is two 16-bit integers. The first, 'length', has value 1.1407 + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. 1.1408 + * 1.1409 + * After the header the code points are stored in ascending order. 1.1410 + * Supplementary code points are stored as most significant 16 1.1411 + * bits followed by least significant 16 bits. 1.1412 + * 1.1413 + * @param dest pointer to buffer of destCapacity 16-bit integers. 1.1414 + * May be NULL only if destCapacity is zero. 1.1415 + * @param destCapacity size of dest, or zero. Must not be negative. 1.1416 + * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR 1.1417 + * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if 1.1418 + * n+2*m+(m!=0?2:1) > destCapacity. 1.1419 + * @return the total length of the serialized format, including 1.1420 + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other 1.1421 + * than U_BUFFER_OVERFLOW_ERROR. 1.1422 + * @stable ICU 2.4 1.1423 + */ 1.1424 + int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; 1.1425 + 1.1426 + /** 1.1427 + * Reallocate this objects internal structures to take up the least 1.1428 + * possible space, without changing this object's value. 1.1429 + * A frozen set will not be modified. 1.1430 + * @stable ICU 2.4 1.1431 + */ 1.1432 + virtual UnicodeSet& compact(); 1.1433 + 1.1434 + /** 1.1435 + * Return the class ID for this class. This is useful only for 1.1436 + * comparing to a return value from getDynamicClassID(). For example: 1.1437 + * <pre> 1.1438 + * . Base* polymorphic_pointer = createPolymorphicObject(); 1.1439 + * . if (polymorphic_pointer->getDynamicClassID() == 1.1440 + * . Derived::getStaticClassID()) ... 1.1441 + * </pre> 1.1442 + * @return The class ID for all objects of this class. 1.1443 + * @stable ICU 2.0 1.1444 + */ 1.1445 + static UClassID U_EXPORT2 getStaticClassID(void); 1.1446 + 1.1447 + /** 1.1448 + * Implement UnicodeFunctor API. 1.1449 + * 1.1450 + * @return The class ID for this object. All objects of a given 1.1451 + * class have the same class ID. Objects of other classes have 1.1452 + * different class IDs. 1.1453 + * @stable ICU 2.4 1.1454 + */ 1.1455 + virtual UClassID getDynamicClassID(void) const; 1.1456 + 1.1457 +private: 1.1458 + 1.1459 + // Private API for the USet API 1.1460 + 1.1461 + friend class USetAccess; 1.1462 + 1.1463 + int32_t getStringCount() const; 1.1464 + 1.1465 + const UnicodeString* getString(int32_t index) const; 1.1466 + 1.1467 + //---------------------------------------------------------------- 1.1468 + // RuleBasedTransliterator support 1.1469 + //---------------------------------------------------------------- 1.1470 + 1.1471 +private: 1.1472 + 1.1473 + /** 1.1474 + * Returns <tt>true</tt> if this set contains any character whose low byte 1.1475 + * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for 1.1476 + * indexing. 1.1477 + */ 1.1478 + virtual UBool matchesIndexValue(uint8_t v) const; 1.1479 + 1.1480 +private: 1.1481 + friend class RBBIRuleScanner; 1.1482 + 1.1483 + //---------------------------------------------------------------- 1.1484 + // Implementation: Clone as thawed (see ICU4J Freezable) 1.1485 + //---------------------------------------------------------------- 1.1486 + 1.1487 + UnicodeSet(const UnicodeSet& o, UBool /* asThawed */); 1.1488 + 1.1489 + //---------------------------------------------------------------- 1.1490 + // Implementation: Pattern parsing 1.1491 + //---------------------------------------------------------------- 1.1492 + 1.1493 + void applyPatternIgnoreSpace(const UnicodeString& pattern, 1.1494 + ParsePosition& pos, 1.1495 + const SymbolTable* symbols, 1.1496 + UErrorCode& status); 1.1497 + 1.1498 + void applyPattern(RuleCharacterIterator& chars, 1.1499 + const SymbolTable* symbols, 1.1500 + UnicodeString& rebuiltPat, 1.1501 + uint32_t options, 1.1502 + UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), 1.1503 + UErrorCode& ec); 1.1504 + 1.1505 + //---------------------------------------------------------------- 1.1506 + // Implementation: Utility methods 1.1507 + //---------------------------------------------------------------- 1.1508 + 1.1509 + void ensureCapacity(int32_t newLen, UErrorCode& ec); 1.1510 + 1.1511 + void ensureBufferCapacity(int32_t newLen, UErrorCode& ec); 1.1512 + 1.1513 + void swapBuffers(void); 1.1514 + 1.1515 + UBool allocateStrings(UErrorCode &status); 1.1516 + 1.1517 + UnicodeString& _toPattern(UnicodeString& result, 1.1518 + UBool escapeUnprintable) const; 1.1519 + 1.1520 + UnicodeString& _generatePattern(UnicodeString& result, 1.1521 + UBool escapeUnprintable) const; 1.1522 + 1.1523 + static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable); 1.1524 + 1.1525 + static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); 1.1526 + 1.1527 + //---------------------------------------------------------------- 1.1528 + // Implementation: Fundamental operators 1.1529 + //---------------------------------------------------------------- 1.1530 + 1.1531 + void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); 1.1532 + 1.1533 + void add(const UChar32* other, int32_t otherLen, int8_t polarity); 1.1534 + 1.1535 + void retain(const UChar32* other, int32_t otherLen, int8_t polarity); 1.1536 + 1.1537 + /** 1.1538 + * Return true if the given position, in the given pattern, appears 1.1539 + * to be the start of a property set pattern [:foo:], \\p{foo}, or 1.1540 + * \\P{foo}, or \\N{name}. 1.1541 + */ 1.1542 + static UBool resemblesPropertyPattern(const UnicodeString& pattern, 1.1543 + int32_t pos); 1.1544 + 1.1545 + static UBool resemblesPropertyPattern(RuleCharacterIterator& chars, 1.1546 + int32_t iterOpts); 1.1547 + 1.1548 + /** 1.1549 + * Parse the given property pattern at the given parse position 1.1550 + * and set this UnicodeSet to the result. 1.1551 + * 1.1552 + * The original design document is out of date, but still useful. 1.1553 + * Ignore the property and value names: 1.1554 + * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html 1.1555 + * 1.1556 + * Recognized syntax: 1.1557 + * 1.1558 + * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]" 1.1559 + * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P" 1.1560 + * \\N{name} - white space not allowed within "\\N" 1.1561 + * 1.1562 + * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored. 1.1563 + * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading 1.1564 + * and trailing space is deleted, and internal runs of whitespace 1.1565 + * are collapsed to a single space. 1.1566 + * 1.1567 + * We support binary properties, enumerated properties, and the 1.1568 + * following non-enumerated properties: 1.1569 + * 1.1570 + * Numeric_Value 1.1571 + * Name 1.1572 + * Unicode_1_Name 1.1573 + * 1.1574 + * @param pattern the pattern string 1.1575 + * @param ppos on entry, the position at which to begin parsing. 1.1576 + * This should be one of the locations marked '^': 1.1577 + * 1.1578 + * [:blah:] \\p{blah} \\P{blah} \\N{name} 1.1579 + * ^ % ^ % ^ % ^ % 1.1580 + * 1.1581 + * On return, the position after the last character parsed, that is, 1.1582 + * the locations marked '%'. If the parse fails, ppos is returned 1.1583 + * unchanged. 1.1584 + * @param ec status 1.1585 + * @return a reference to this. 1.1586 + */ 1.1587 + UnicodeSet& applyPropertyPattern(const UnicodeString& pattern, 1.1588 + ParsePosition& ppos, 1.1589 + UErrorCode &ec); 1.1590 + 1.1591 + void applyPropertyPattern(RuleCharacterIterator& chars, 1.1592 + UnicodeString& rebuiltPat, 1.1593 + UErrorCode& ec); 1.1594 + 1.1595 + friend void UnicodeSet_initInclusion(int32_t src, UErrorCode &status); 1.1596 + static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status); 1.1597 + 1.1598 + /** 1.1599 + * A filter that returns TRUE if the given code point should be 1.1600 + * included in the UnicodeSet being constructed. 1.1601 + */ 1.1602 + typedef UBool (*Filter)(UChar32 codePoint, void* context); 1.1603 + 1.1604 + /** 1.1605 + * Given a filter, set this UnicodeSet to the code points 1.1606 + * contained by that filter. The filter MUST be 1.1607 + * property-conformant. That is, if it returns value v for one 1.1608 + * code point, then it must return v for all affiliated code 1.1609 + * points, as defined by the inclusions list. See 1.1610 + * getInclusions(). 1.1611 + * src is a UPropertySource value. 1.1612 + */ 1.1613 + void applyFilter(Filter filter, 1.1614 + void* context, 1.1615 + int32_t src, 1.1616 + UErrorCode &status); 1.1617 + 1.1618 + /** 1.1619 + * Set the new pattern to cache. 1.1620 + */ 1.1621 + void setPattern(const UnicodeString& newPat); 1.1622 + /** 1.1623 + * Release existing cached pattern. 1.1624 + */ 1.1625 + void releasePattern(); 1.1626 + 1.1627 + friend class UnicodeSetIterator; 1.1628 +}; 1.1629 + 1.1630 + 1.1631 + 1.1632 +inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { 1.1633 + return !operator==(o); 1.1634 +} 1.1635 + 1.1636 +inline UBool UnicodeSet::isFrozen() const { 1.1637 + return (UBool)(bmpSet!=NULL || stringSpan!=NULL); 1.1638 +} 1.1639 + 1.1640 +inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { 1.1641 + return !containsNone(start, end); 1.1642 +} 1.1643 + 1.1644 +inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const { 1.1645 + return !containsNone(s); 1.1646 +} 1.1647 + 1.1648 +inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { 1.1649 + return !containsNone(s); 1.1650 +} 1.1651 + 1.1652 +inline UBool UnicodeSet::isBogus() const { 1.1653 + return (UBool)(fFlags & kIsBogus); 1.1654 +} 1.1655 + 1.1656 +inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { 1.1657 + return reinterpret_cast<UnicodeSet *>(uset); 1.1658 +} 1.1659 + 1.1660 +inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) { 1.1661 + return reinterpret_cast<const UnicodeSet *>(uset); 1.1662 +} 1.1663 + 1.1664 +inline USet *UnicodeSet::toUSet() { 1.1665 + return reinterpret_cast<USet *>(this); 1.1666 +} 1.1667 + 1.1668 +inline const USet *UnicodeSet::toUSet() const { 1.1669 + return reinterpret_cast<const USet *>(this); 1.1670 +} 1.1671 + 1.1672 +inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const { 1.1673 + int32_t sLength=s.length(); 1.1674 + if(start<0) { 1.1675 + start=0; 1.1676 + } else if(start>sLength) { 1.1677 + start=sLength; 1.1678 + } 1.1679 + return start+span(s.getBuffer()+start, sLength-start, spanCondition); 1.1680 +} 1.1681 + 1.1682 +inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const { 1.1683 + int32_t sLength=s.length(); 1.1684 + if(limit<0) { 1.1685 + limit=0; 1.1686 + } else if(limit>sLength) { 1.1687 + limit=sLength; 1.1688 + } 1.1689 + return spanBack(s.getBuffer(), limit, spanCondition); 1.1690 +} 1.1691 + 1.1692 +U_NAMESPACE_END 1.1693 + 1.1694 +#endif