Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2002-2012, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: uset.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2002mar07 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * C version of UnicodeSet. |
michael@0 | 17 | */ |
michael@0 | 18 | |
michael@0 | 19 | |
michael@0 | 20 | /** |
michael@0 | 21 | * \file |
michael@0 | 22 | * \brief C API: Unicode Set |
michael@0 | 23 | * |
michael@0 | 24 | * <p>This is a C wrapper around the C++ UnicodeSet class.</p> |
michael@0 | 25 | */ |
michael@0 | 26 | |
michael@0 | 27 | #ifndef __USET_H__ |
michael@0 | 28 | #define __USET_H__ |
michael@0 | 29 | |
michael@0 | 30 | #include "unicode/utypes.h" |
michael@0 | 31 | #include "unicode/uchar.h" |
michael@0 | 32 | #include "unicode/localpointer.h" |
michael@0 | 33 | |
michael@0 | 34 | #ifndef UCNV_H |
michael@0 | 35 | struct USet; |
michael@0 | 36 | /** |
michael@0 | 37 | * A UnicodeSet. Use the uset_* API to manipulate. Create with |
michael@0 | 38 | * uset_open*, and destroy with uset_close. |
michael@0 | 39 | * @stable ICU 2.4 |
michael@0 | 40 | */ |
michael@0 | 41 | typedef struct USet USet; |
michael@0 | 42 | #endif |
michael@0 | 43 | |
michael@0 | 44 | /** |
michael@0 | 45 | * Bitmask values to be passed to uset_openPatternOptions() or |
michael@0 | 46 | * uset_applyPattern() taking an option parameter. |
michael@0 | 47 | * @stable ICU 2.4 |
michael@0 | 48 | */ |
michael@0 | 49 | enum { |
michael@0 | 50 | /** |
michael@0 | 51 | * Ignore white space within patterns unless quoted or escaped. |
michael@0 | 52 | * @stable ICU 2.4 |
michael@0 | 53 | */ |
michael@0 | 54 | USET_IGNORE_SPACE = 1, |
michael@0 | 55 | |
michael@0 | 56 | /** |
michael@0 | 57 | * Enable case insensitive matching. E.g., "[ab]" with this flag |
michael@0 | 58 | * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will |
michael@0 | 59 | * match all except 'a', 'A', 'b', and 'B'. This performs a full |
michael@0 | 60 | * closure over case mappings, e.g. U+017F for s. |
michael@0 | 61 | * |
michael@0 | 62 | * The resulting set is a superset of the input for the code points but |
michael@0 | 63 | * not for the strings. |
michael@0 | 64 | * It performs a case mapping closure of the code points and adds |
michael@0 | 65 | * full case folding strings for the code points, and reduces strings of |
michael@0 | 66 | * the original set to their full case folding equivalents. |
michael@0 | 67 | * |
michael@0 | 68 | * This is designed for case-insensitive matches, for example |
michael@0 | 69 | * in regular expressions. The full code point case closure allows checking of |
michael@0 | 70 | * an input character directly against the closure set. |
michael@0 | 71 | * Strings are matched by comparing the case-folded form from the closure |
michael@0 | 72 | * set with an incremental case folding of the string in question. |
michael@0 | 73 | * |
michael@0 | 74 | * The closure set will also contain single code points if the original |
michael@0 | 75 | * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). |
michael@0 | 76 | * This is not necessary (that is, redundant) for the above matching method |
michael@0 | 77 | * but results in the same closure sets regardless of whether the original |
michael@0 | 78 | * set contained the code point or a string. |
michael@0 | 79 | * |
michael@0 | 80 | * @stable ICU 2.4 |
michael@0 | 81 | */ |
michael@0 | 82 | USET_CASE_INSENSITIVE = 2, |
michael@0 | 83 | |
michael@0 | 84 | /** |
michael@0 | 85 | * Enable case insensitive matching. E.g., "[ab]" with this flag |
michael@0 | 86 | * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will |
michael@0 | 87 | * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, |
michael@0 | 88 | * title-, and uppercase mappings as well as the case folding |
michael@0 | 89 | * of each existing element in the set. |
michael@0 | 90 | * @stable ICU 3.2 |
michael@0 | 91 | */ |
michael@0 | 92 | USET_ADD_CASE_MAPPINGS = 4 |
michael@0 | 93 | }; |
michael@0 | 94 | |
michael@0 | 95 | /** |
michael@0 | 96 | * Argument values for whether span() and similar functions continue while |
michael@0 | 97 | * the current character is contained vs. not contained in the set. |
michael@0 | 98 | * |
michael@0 | 99 | * The functionality is straightforward for sets with only single code points, |
michael@0 | 100 | * without strings (which is the common case): |
michael@0 | 101 | * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE |
michael@0 | 102 | * work the same. |
michael@0 | 103 | * - span() and spanBack() partition any string the same way when |
michael@0 | 104 | * alternating between span(USET_SPAN_NOT_CONTAINED) and |
michael@0 | 105 | * span(either "contained" condition). |
michael@0 | 106 | * - Using a complemented (inverted) set and the opposite span conditions |
michael@0 | 107 | * yields the same results. |
michael@0 | 108 | * |
michael@0 | 109 | * When a set contains multi-code point strings, then these statements may not |
michael@0 | 110 | * be true, depending on the strings in the set (for example, whether they |
michael@0 | 111 | * overlap with each other) and the string that is processed. |
michael@0 | 112 | * For a set with strings: |
michael@0 | 113 | * - The complement of the set contains the opposite set of code points, |
michael@0 | 114 | * but the same set of strings. |
michael@0 | 115 | * Therefore, complementing both the set and the span conditions |
michael@0 | 116 | * may yield different results. |
michael@0 | 117 | * - When starting spans at different positions in a string |
michael@0 | 118 | * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different |
michael@0 | 119 | * because a set string may start before the later position. |
michael@0 | 120 | * - span(USET_SPAN_SIMPLE) may be shorter than |
michael@0 | 121 | * span(USET_SPAN_CONTAINED) because it will not recursively try |
michael@0 | 122 | * all possible paths. |
michael@0 | 123 | * For example, with a set which contains the three strings "xy", "xya" and "ax", |
michael@0 | 124 | * span("xyax", USET_SPAN_CONTAINED) will return 4 but |
michael@0 | 125 | * span("xyax", USET_SPAN_SIMPLE) will return 3. |
michael@0 | 126 | * span(USET_SPAN_SIMPLE) will never be longer than |
michael@0 | 127 | * span(USET_SPAN_CONTAINED). |
michael@0 | 128 | * - With either "contained" condition, span() and spanBack() may partition |
michael@0 | 129 | * a string in different ways. |
michael@0 | 130 | * For example, with a set which contains the two strings "ab" and "ba", |
michael@0 | 131 | * and when processing the string "aba", |
michael@0 | 132 | * span() will yield contained/not-contained boundaries of { 0, 2, 3 } |
michael@0 | 133 | * while spanBack() will yield boundaries of { 0, 1, 3 }. |
michael@0 | 134 | * |
michael@0 | 135 | * Note: If it is important to get the same boundaries whether iterating forward |
michael@0 | 136 | * or backward through a string, then either only span() should be used and |
michael@0 | 137 | * the boundaries cached for backward operation, or an ICU BreakIterator |
michael@0 | 138 | * could be used. |
michael@0 | 139 | * |
michael@0 | 140 | * Note: Unpaired surrogates are treated like surrogate code points. |
michael@0 | 141 | * Similarly, set strings match only on code point boundaries, |
michael@0 | 142 | * never in the middle of a surrogate pair. |
michael@0 | 143 | * Illegal UTF-8 sequences are treated like U+FFFD. |
michael@0 | 144 | * When processing UTF-8 strings, malformed set strings |
michael@0 | 145 | * (strings with unpaired surrogates which cannot be converted to UTF-8) |
michael@0 | 146 | * are ignored. |
michael@0 | 147 | * |
michael@0 | 148 | * @stable ICU 3.8 |
michael@0 | 149 | */ |
michael@0 | 150 | typedef enum USetSpanCondition { |
michael@0 | 151 | /** |
michael@0 | 152 | * Continue a span() while there is no set element at the current position. |
michael@0 | 153 | * Stops before the first set element (character or string). |
michael@0 | 154 | * (For code points only, this is like while contains(current)==FALSE). |
michael@0 | 155 | * |
michael@0 | 156 | * When span() returns, the substring between where it started and the position |
michael@0 | 157 | * it returned consists only of characters that are not in the set, |
michael@0 | 158 | * and none of its strings overlap with the span. |
michael@0 | 159 | * |
michael@0 | 160 | * @stable ICU 3.8 |
michael@0 | 161 | */ |
michael@0 | 162 | USET_SPAN_NOT_CONTAINED = 0, |
michael@0 | 163 | /** |
michael@0 | 164 | * Continue a span() while there is a set element at the current position. |
michael@0 | 165 | * (For characters only, this is like while contains(current)==TRUE). |
michael@0 | 166 | * |
michael@0 | 167 | * When span() returns, the substring between where it started and the position |
michael@0 | 168 | * it returned consists only of set elements (characters or strings) that are in the set. |
michael@0 | 169 | * |
michael@0 | 170 | * If a set contains strings, then the span will be the longest substring |
michael@0 | 171 | * matching any of the possible concatenations of set elements (characters or strings). |
michael@0 | 172 | * (There must be a single, non-overlapping concatenation of characters or strings.) |
michael@0 | 173 | * This is equivalent to a POSIX regular expression for (OR of each set element)*. |
michael@0 | 174 | * |
michael@0 | 175 | * @stable ICU 3.8 |
michael@0 | 176 | */ |
michael@0 | 177 | USET_SPAN_CONTAINED = 1, |
michael@0 | 178 | /** |
michael@0 | 179 | * Continue a span() while there is a set element at the current position. |
michael@0 | 180 | * (For characters only, this is like while contains(current)==TRUE). |
michael@0 | 181 | * |
michael@0 | 182 | * When span() returns, the substring between where it started and the position |
michael@0 | 183 | * it returned consists only of set elements (characters or strings) that are in the set. |
michael@0 | 184 | * |
michael@0 | 185 | * If a set only contains single characters, then this is the same |
michael@0 | 186 | * as USET_SPAN_CONTAINED. |
michael@0 | 187 | * |
michael@0 | 188 | * If a set contains strings, then the span will be the longest substring |
michael@0 | 189 | * with a match at each position with the longest single set element (character or string). |
michael@0 | 190 | * |
michael@0 | 191 | * Use this span condition together with other longest-match algorithms, |
michael@0 | 192 | * such as ICU converters (ucnv_getUnicodeSet()). |
michael@0 | 193 | * |
michael@0 | 194 | * @stable ICU 3.8 |
michael@0 | 195 | */ |
michael@0 | 196 | USET_SPAN_SIMPLE = 2, |
michael@0 | 197 | /** |
michael@0 | 198 | * One more than the last span condition. |
michael@0 | 199 | * @stable ICU 3.8 |
michael@0 | 200 | */ |
michael@0 | 201 | USET_SPAN_CONDITION_COUNT |
michael@0 | 202 | } USetSpanCondition; |
michael@0 | 203 | |
michael@0 | 204 | enum { |
michael@0 | 205 | /** |
michael@0 | 206 | * Capacity of USerializedSet::staticArray. |
michael@0 | 207 | * Enough for any single-code point set. |
michael@0 | 208 | * Also provides padding for nice sizeof(USerializedSet). |
michael@0 | 209 | * @stable ICU 2.4 |
michael@0 | 210 | */ |
michael@0 | 211 | USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 |
michael@0 | 212 | }; |
michael@0 | 213 | |
michael@0 | 214 | /** |
michael@0 | 215 | * A serialized form of a Unicode set. Limited manipulations are |
michael@0 | 216 | * possible directly on a serialized set. See below. |
michael@0 | 217 | * @stable ICU 2.4 |
michael@0 | 218 | */ |
michael@0 | 219 | typedef struct USerializedSet { |
michael@0 | 220 | /** |
michael@0 | 221 | * The serialized Unicode Set. |
michael@0 | 222 | * @stable ICU 2.4 |
michael@0 | 223 | */ |
michael@0 | 224 | const uint16_t *array; |
michael@0 | 225 | /** |
michael@0 | 226 | * The length of the array that contains BMP characters. |
michael@0 | 227 | * @stable ICU 2.4 |
michael@0 | 228 | */ |
michael@0 | 229 | int32_t bmpLength; |
michael@0 | 230 | /** |
michael@0 | 231 | * The total length of the array. |
michael@0 | 232 | * @stable ICU 2.4 |
michael@0 | 233 | */ |
michael@0 | 234 | int32_t length; |
michael@0 | 235 | /** |
michael@0 | 236 | * A small buffer for the array to reduce memory allocations. |
michael@0 | 237 | * @stable ICU 2.4 |
michael@0 | 238 | */ |
michael@0 | 239 | uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; |
michael@0 | 240 | } USerializedSet; |
michael@0 | 241 | |
michael@0 | 242 | /********************************************************************* |
michael@0 | 243 | * USet API |
michael@0 | 244 | *********************************************************************/ |
michael@0 | 245 | |
michael@0 | 246 | /** |
michael@0 | 247 | * Create an empty USet object. |
michael@0 | 248 | * Equivalent to uset_open(1, 0). |
michael@0 | 249 | * @return a newly created USet. The caller must call uset_close() on |
michael@0 | 250 | * it when done. |
michael@0 | 251 | * @stable ICU 4.2 |
michael@0 | 252 | */ |
michael@0 | 253 | U_STABLE USet* U_EXPORT2 |
michael@0 | 254 | uset_openEmpty(void); |
michael@0 | 255 | |
michael@0 | 256 | /** |
michael@0 | 257 | * Creates a USet object that contains the range of characters |
michael@0 | 258 | * start..end, inclusive. If <code>start > end</code> |
michael@0 | 259 | * then an empty set is created (same as using uset_openEmpty()). |
michael@0 | 260 | * @param start first character of the range, inclusive |
michael@0 | 261 | * @param end last character of the range, inclusive |
michael@0 | 262 | * @return a newly created USet. The caller must call uset_close() on |
michael@0 | 263 | * it when done. |
michael@0 | 264 | * @stable ICU 2.4 |
michael@0 | 265 | */ |
michael@0 | 266 | U_STABLE USet* U_EXPORT2 |
michael@0 | 267 | uset_open(UChar32 start, UChar32 end); |
michael@0 | 268 | |
michael@0 | 269 | /** |
michael@0 | 270 | * Creates a set from the given pattern. See the UnicodeSet class |
michael@0 | 271 | * description for the syntax of the pattern language. |
michael@0 | 272 | * @param pattern a string specifying what characters are in the set |
michael@0 | 273 | * @param patternLength the length of the pattern, or -1 if null |
michael@0 | 274 | * terminated |
michael@0 | 275 | * @param ec the error code |
michael@0 | 276 | * @stable ICU 2.4 |
michael@0 | 277 | */ |
michael@0 | 278 | U_STABLE USet* U_EXPORT2 |
michael@0 | 279 | uset_openPattern(const UChar* pattern, int32_t patternLength, |
michael@0 | 280 | UErrorCode* ec); |
michael@0 | 281 | |
michael@0 | 282 | /** |
michael@0 | 283 | * Creates a set from the given pattern. See the UnicodeSet class |
michael@0 | 284 | * description for the syntax of the pattern language. |
michael@0 | 285 | * @param pattern a string specifying what characters are in the set |
michael@0 | 286 | * @param patternLength the length of the pattern, or -1 if null |
michael@0 | 287 | * terminated |
michael@0 | 288 | * @param options bitmask for options to apply to the pattern. |
michael@0 | 289 | * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. |
michael@0 | 290 | * @param ec the error code |
michael@0 | 291 | * @stable ICU 2.4 |
michael@0 | 292 | */ |
michael@0 | 293 | U_STABLE USet* U_EXPORT2 |
michael@0 | 294 | uset_openPatternOptions(const UChar* pattern, int32_t patternLength, |
michael@0 | 295 | uint32_t options, |
michael@0 | 296 | UErrorCode* ec); |
michael@0 | 297 | |
michael@0 | 298 | /** |
michael@0 | 299 | * Disposes of the storage used by a USet object. This function should |
michael@0 | 300 | * be called exactly once for objects returned by uset_open(). |
michael@0 | 301 | * @param set the object to dispose of |
michael@0 | 302 | * @stable ICU 2.4 |
michael@0 | 303 | */ |
michael@0 | 304 | U_STABLE void U_EXPORT2 |
michael@0 | 305 | uset_close(USet* set); |
michael@0 | 306 | |
michael@0 | 307 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 308 | |
michael@0 | 309 | U_NAMESPACE_BEGIN |
michael@0 | 310 | |
michael@0 | 311 | /** |
michael@0 | 312 | * \class LocalUSetPointer |
michael@0 | 313 | * "Smart pointer" class, closes a USet via uset_close(). |
michael@0 | 314 | * For most methods see the LocalPointerBase base class. |
michael@0 | 315 | * |
michael@0 | 316 | * @see LocalPointerBase |
michael@0 | 317 | * @see LocalPointer |
michael@0 | 318 | * @stable ICU 4.4 |
michael@0 | 319 | */ |
michael@0 | 320 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); |
michael@0 | 321 | |
michael@0 | 322 | U_NAMESPACE_END |
michael@0 | 323 | |
michael@0 | 324 | #endif |
michael@0 | 325 | |
michael@0 | 326 | /** |
michael@0 | 327 | * Returns a copy of this object. |
michael@0 | 328 | * If this set is frozen, then the clone will be frozen as well. |
michael@0 | 329 | * Use uset_cloneAsThawed() for a mutable clone of a frozen set. |
michael@0 | 330 | * @param set the original set |
michael@0 | 331 | * @return the newly allocated copy of the set |
michael@0 | 332 | * @see uset_cloneAsThawed |
michael@0 | 333 | * @stable ICU 3.8 |
michael@0 | 334 | */ |
michael@0 | 335 | U_STABLE USet * U_EXPORT2 |
michael@0 | 336 | uset_clone(const USet *set); |
michael@0 | 337 | |
michael@0 | 338 | /** |
michael@0 | 339 | * Determines whether the set has been frozen (made immutable) or not. |
michael@0 | 340 | * See the ICU4J Freezable interface for details. |
michael@0 | 341 | * @param set the set |
michael@0 | 342 | * @return TRUE/FALSE for whether the set has been frozen |
michael@0 | 343 | * @see uset_freeze |
michael@0 | 344 | * @see uset_cloneAsThawed |
michael@0 | 345 | * @stable ICU 3.8 |
michael@0 | 346 | */ |
michael@0 | 347 | U_STABLE UBool U_EXPORT2 |
michael@0 | 348 | uset_isFrozen(const USet *set); |
michael@0 | 349 | |
michael@0 | 350 | /** |
michael@0 | 351 | * Freeze the set (make it immutable). |
michael@0 | 352 | * Once frozen, it cannot be unfrozen and is therefore thread-safe |
michael@0 | 353 | * until it is deleted. |
michael@0 | 354 | * See the ICU4J Freezable interface for details. |
michael@0 | 355 | * Freezing the set may also make some operations faster, for example |
michael@0 | 356 | * uset_contains() and uset_span(). |
michael@0 | 357 | * A frozen set will not be modified. (It remains frozen.) |
michael@0 | 358 | * @param set the set |
michael@0 | 359 | * @return the same set, now frozen |
michael@0 | 360 | * @see uset_isFrozen |
michael@0 | 361 | * @see uset_cloneAsThawed |
michael@0 | 362 | * @stable ICU 3.8 |
michael@0 | 363 | */ |
michael@0 | 364 | U_STABLE void U_EXPORT2 |
michael@0 | 365 | uset_freeze(USet *set); |
michael@0 | 366 | |
michael@0 | 367 | /** |
michael@0 | 368 | * Clone the set and make the clone mutable. |
michael@0 | 369 | * See the ICU4J Freezable interface for details. |
michael@0 | 370 | * @param set the set |
michael@0 | 371 | * @return the mutable clone |
michael@0 | 372 | * @see uset_freeze |
michael@0 | 373 | * @see uset_isFrozen |
michael@0 | 374 | * @see uset_clone |
michael@0 | 375 | * @stable ICU 3.8 |
michael@0 | 376 | */ |
michael@0 | 377 | U_STABLE USet * U_EXPORT2 |
michael@0 | 378 | uset_cloneAsThawed(const USet *set); |
michael@0 | 379 | |
michael@0 | 380 | /** |
michael@0 | 381 | * Causes the USet object to represent the range <code>start - end</code>. |
michael@0 | 382 | * If <code>start > end</code> then this USet is set to an empty range. |
michael@0 | 383 | * A frozen set will not be modified. |
michael@0 | 384 | * @param set the object to set to the given range |
michael@0 | 385 | * @param start first character in the set, inclusive |
michael@0 | 386 | * @param end last character in the set, inclusive |
michael@0 | 387 | * @stable ICU 3.2 |
michael@0 | 388 | */ |
michael@0 | 389 | U_STABLE void U_EXPORT2 |
michael@0 | 390 | uset_set(USet* set, |
michael@0 | 391 | UChar32 start, UChar32 end); |
michael@0 | 392 | |
michael@0 | 393 | /** |
michael@0 | 394 | * Modifies the set to represent the set specified by the given |
michael@0 | 395 | * pattern. See the UnicodeSet class description for the syntax of |
michael@0 | 396 | * the pattern language. See also the User Guide chapter about UnicodeSet. |
michael@0 | 397 | * <em>Empties the set passed before applying the pattern.</em> |
michael@0 | 398 | * A frozen set will not be modified. |
michael@0 | 399 | * @param set The set to which the pattern is to be applied. |
michael@0 | 400 | * @param pattern A pointer to UChar string specifying what characters are in the set. |
michael@0 | 401 | * The character at pattern[0] must be a '['. |
michael@0 | 402 | * @param patternLength The length of the UChar string. -1 if NUL terminated. |
michael@0 | 403 | * @param options A bitmask for options to apply to the pattern. |
michael@0 | 404 | * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. |
michael@0 | 405 | * @param status Returns an error if the pattern cannot be parsed. |
michael@0 | 406 | * @return Upon successful parse, the value is either |
michael@0 | 407 | * the index of the character after the closing ']' |
michael@0 | 408 | * of the parsed pattern. |
michael@0 | 409 | * If the status code indicates failure, then the return value |
michael@0 | 410 | * is the index of the error in the source. |
michael@0 | 411 | * |
michael@0 | 412 | * @stable ICU 2.8 |
michael@0 | 413 | */ |
michael@0 | 414 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 415 | uset_applyPattern(USet *set, |
michael@0 | 416 | const UChar *pattern, int32_t patternLength, |
michael@0 | 417 | uint32_t options, |
michael@0 | 418 | UErrorCode *status); |
michael@0 | 419 | |
michael@0 | 420 | /** |
michael@0 | 421 | * Modifies the set to contain those code points which have the given value |
michael@0 | 422 | * for the given binary or enumerated property, as returned by |
michael@0 | 423 | * u_getIntPropertyValue. Prior contents of this set are lost. |
michael@0 | 424 | * A frozen set will not be modified. |
michael@0 | 425 | * |
michael@0 | 426 | * @param set the object to contain the code points defined by the property |
michael@0 | 427 | * |
michael@0 | 428 | * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 |
michael@0 | 429 | * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 |
michael@0 | 430 | * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. |
michael@0 | 431 | * |
michael@0 | 432 | * @param value a value in the range u_getIntPropertyMinValue(prop).. |
michael@0 | 433 | * u_getIntPropertyMaxValue(prop), with one exception. If prop is |
michael@0 | 434 | * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but |
michael@0 | 435 | * rather a mask value produced by U_GET_GC_MASK(). This allows grouped |
michael@0 | 436 | * categories such as [:L:] to be represented. |
michael@0 | 437 | * |
michael@0 | 438 | * @param ec error code input/output parameter |
michael@0 | 439 | * |
michael@0 | 440 | * @stable ICU 3.2 |
michael@0 | 441 | */ |
michael@0 | 442 | U_STABLE void U_EXPORT2 |
michael@0 | 443 | uset_applyIntPropertyValue(USet* set, |
michael@0 | 444 | UProperty prop, int32_t value, UErrorCode* ec); |
michael@0 | 445 | |
michael@0 | 446 | /** |
michael@0 | 447 | * Modifies the set to contain those code points which have the |
michael@0 | 448 | * given value for the given property. Prior contents of this |
michael@0 | 449 | * set are lost. |
michael@0 | 450 | * A frozen set will not be modified. |
michael@0 | 451 | * |
michael@0 | 452 | * @param set the object to contain the code points defined by the given |
michael@0 | 453 | * property and value alias |
michael@0 | 454 | * |
michael@0 | 455 | * @param prop a string specifying a property alias, either short or long. |
michael@0 | 456 | * The name is matched loosely. See PropertyAliases.txt for names and a |
michael@0 | 457 | * description of loose matching. If the value string is empty, then this |
michael@0 | 458 | * string is interpreted as either a General_Category value alias, a Script |
michael@0 | 459 | * value alias, a binary property alias, or a special ID. Special IDs are |
michael@0 | 460 | * matched loosely and correspond to the following sets: |
michael@0 | 461 | * |
michael@0 | 462 | * "ANY" = [\\u0000-\\U0010FFFF], |
michael@0 | 463 | * "ASCII" = [\\u0000-\\u007F], |
michael@0 | 464 | * "Assigned" = [:^Cn:]. |
michael@0 | 465 | * |
michael@0 | 466 | * @param propLength the length of the prop, or -1 if NULL |
michael@0 | 467 | * |
michael@0 | 468 | * @param value a string specifying a value alias, either short or long. |
michael@0 | 469 | * The name is matched loosely. See PropertyValueAliases.txt for names |
michael@0 | 470 | * and a description of loose matching. In addition to aliases listed, |
michael@0 | 471 | * numeric values and canonical combining classes may be expressed |
michael@0 | 472 | * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string |
michael@0 | 473 | * may also be empty. |
michael@0 | 474 | * |
michael@0 | 475 | * @param valueLength the length of the value, or -1 if NULL |
michael@0 | 476 | * |
michael@0 | 477 | * @param ec error code input/output parameter |
michael@0 | 478 | * |
michael@0 | 479 | * @stable ICU 3.2 |
michael@0 | 480 | */ |
michael@0 | 481 | U_STABLE void U_EXPORT2 |
michael@0 | 482 | uset_applyPropertyAlias(USet* set, |
michael@0 | 483 | const UChar *prop, int32_t propLength, |
michael@0 | 484 | const UChar *value, int32_t valueLength, |
michael@0 | 485 | UErrorCode* ec); |
michael@0 | 486 | |
michael@0 | 487 | /** |
michael@0 | 488 | * Return true if the given position, in the given pattern, appears |
michael@0 | 489 | * to be the start of a UnicodeSet pattern. |
michael@0 | 490 | * |
michael@0 | 491 | * @param pattern a string specifying the pattern |
michael@0 | 492 | * @param patternLength the length of the pattern, or -1 if NULL |
michael@0 | 493 | * @param pos the given position |
michael@0 | 494 | * @stable ICU 3.2 |
michael@0 | 495 | */ |
michael@0 | 496 | U_STABLE UBool U_EXPORT2 |
michael@0 | 497 | uset_resemblesPattern(const UChar *pattern, int32_t patternLength, |
michael@0 | 498 | int32_t pos); |
michael@0 | 499 | |
michael@0 | 500 | /** |
michael@0 | 501 | * Returns a string representation of this set. If the result of |
michael@0 | 502 | * calling this function is passed to a uset_openPattern(), it |
michael@0 | 503 | * will produce another set that is equal to this one. |
michael@0 | 504 | * @param set the set |
michael@0 | 505 | * @param result the string to receive the rules, may be NULL |
michael@0 | 506 | * @param resultCapacity the capacity of result, may be 0 if result is NULL |
michael@0 | 507 | * @param escapeUnprintable if TRUE then convert unprintable |
michael@0 | 508 | * character to their hex escape representations, \\uxxxx or |
michael@0 | 509 | * \\Uxxxxxxxx. Unprintable characters are those other than |
michael@0 | 510 | * U+000A, U+0020..U+007E. |
michael@0 | 511 | * @param ec error code. |
michael@0 | 512 | * @return length of string, possibly larger than resultCapacity |
michael@0 | 513 | * @stable ICU 2.4 |
michael@0 | 514 | */ |
michael@0 | 515 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 516 | uset_toPattern(const USet* set, |
michael@0 | 517 | UChar* result, int32_t resultCapacity, |
michael@0 | 518 | UBool escapeUnprintable, |
michael@0 | 519 | UErrorCode* ec); |
michael@0 | 520 | |
michael@0 | 521 | /** |
michael@0 | 522 | * Adds the given character to the given USet. After this call, |
michael@0 | 523 | * uset_contains(set, c) will return TRUE. |
michael@0 | 524 | * A frozen set will not be modified. |
michael@0 | 525 | * @param set the object to which to add the character |
michael@0 | 526 | * @param c the character to add |
michael@0 | 527 | * @stable ICU 2.4 |
michael@0 | 528 | */ |
michael@0 | 529 | U_STABLE void U_EXPORT2 |
michael@0 | 530 | uset_add(USet* set, UChar32 c); |
michael@0 | 531 | |
michael@0 | 532 | /** |
michael@0 | 533 | * Adds all of the elements in the specified set to this set if |
michael@0 | 534 | * they're not already present. This operation effectively |
michael@0 | 535 | * modifies this set so that its value is the <i>union</i> of the two |
michael@0 | 536 | * sets. The behavior of this operation is unspecified if the specified |
michael@0 | 537 | * collection is modified while the operation is in progress. |
michael@0 | 538 | * A frozen set will not be modified. |
michael@0 | 539 | * |
michael@0 | 540 | * @param set the object to which to add the set |
michael@0 | 541 | * @param additionalSet the source set whose elements are to be added to this set. |
michael@0 | 542 | * @stable ICU 2.6 |
michael@0 | 543 | */ |
michael@0 | 544 | U_STABLE void U_EXPORT2 |
michael@0 | 545 | uset_addAll(USet* set, const USet *additionalSet); |
michael@0 | 546 | |
michael@0 | 547 | /** |
michael@0 | 548 | * Adds the given range of characters to the given USet. After this call, |
michael@0 | 549 | * uset_contains(set, start, end) will return TRUE. |
michael@0 | 550 | * A frozen set will not be modified. |
michael@0 | 551 | * @param set the object to which to add the character |
michael@0 | 552 | * @param start the first character of the range to add, inclusive |
michael@0 | 553 | * @param end the last character of the range to add, inclusive |
michael@0 | 554 | * @stable ICU 2.2 |
michael@0 | 555 | */ |
michael@0 | 556 | U_STABLE void U_EXPORT2 |
michael@0 | 557 | uset_addRange(USet* set, UChar32 start, UChar32 end); |
michael@0 | 558 | |
michael@0 | 559 | /** |
michael@0 | 560 | * Adds the given string to the given USet. After this call, |
michael@0 | 561 | * uset_containsString(set, str, strLen) will return TRUE. |
michael@0 | 562 | * A frozen set will not be modified. |
michael@0 | 563 | * @param set the object to which to add the character |
michael@0 | 564 | * @param str the string to add |
michael@0 | 565 | * @param strLen the length of the string or -1 if null terminated. |
michael@0 | 566 | * @stable ICU 2.4 |
michael@0 | 567 | */ |
michael@0 | 568 | U_STABLE void U_EXPORT2 |
michael@0 | 569 | uset_addString(USet* set, const UChar* str, int32_t strLen); |
michael@0 | 570 | |
michael@0 | 571 | /** |
michael@0 | 572 | * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} |
michael@0 | 573 | * If this set already any particular character, it has no effect on that character. |
michael@0 | 574 | * A frozen set will not be modified. |
michael@0 | 575 | * @param set the object to which to add the character |
michael@0 | 576 | * @param str the source string |
michael@0 | 577 | * @param strLen the length of the string or -1 if null terminated. |
michael@0 | 578 | * @stable ICU 3.4 |
michael@0 | 579 | */ |
michael@0 | 580 | U_STABLE void U_EXPORT2 |
michael@0 | 581 | uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); |
michael@0 | 582 | |
michael@0 | 583 | /** |
michael@0 | 584 | * Removes the given character from the given USet. After this call, |
michael@0 | 585 | * uset_contains(set, c) will return FALSE. |
michael@0 | 586 | * A frozen set will not be modified. |
michael@0 | 587 | * @param set the object from which to remove the character |
michael@0 | 588 | * @param c the character to remove |
michael@0 | 589 | * @stable ICU 2.4 |
michael@0 | 590 | */ |
michael@0 | 591 | U_STABLE void U_EXPORT2 |
michael@0 | 592 | uset_remove(USet* set, UChar32 c); |
michael@0 | 593 | |
michael@0 | 594 | /** |
michael@0 | 595 | * Removes the given range of characters from the given USet. After this call, |
michael@0 | 596 | * uset_contains(set, start, end) will return FALSE. |
michael@0 | 597 | * A frozen set will not be modified. |
michael@0 | 598 | * @param set the object to which to add the character |
michael@0 | 599 | * @param start the first character of the range to remove, inclusive |
michael@0 | 600 | * @param end the last character of the range to remove, inclusive |
michael@0 | 601 | * @stable ICU 2.2 |
michael@0 | 602 | */ |
michael@0 | 603 | U_STABLE void U_EXPORT2 |
michael@0 | 604 | uset_removeRange(USet* set, UChar32 start, UChar32 end); |
michael@0 | 605 | |
michael@0 | 606 | /** |
michael@0 | 607 | * Removes the given string to the given USet. After this call, |
michael@0 | 608 | * uset_containsString(set, str, strLen) will return FALSE. |
michael@0 | 609 | * A frozen set will not be modified. |
michael@0 | 610 | * @param set the object to which to add the character |
michael@0 | 611 | * @param str the string to remove |
michael@0 | 612 | * @param strLen the length of the string or -1 if null terminated. |
michael@0 | 613 | * @stable ICU 2.4 |
michael@0 | 614 | */ |
michael@0 | 615 | U_STABLE void U_EXPORT2 |
michael@0 | 616 | uset_removeString(USet* set, const UChar* str, int32_t strLen); |
michael@0 | 617 | |
michael@0 | 618 | /** |
michael@0 | 619 | * Removes from this set all of its elements that are contained in the |
michael@0 | 620 | * specified set. This operation effectively modifies this |
michael@0 | 621 | * set so that its value is the <i>asymmetric set difference</i> of |
michael@0 | 622 | * the two sets. |
michael@0 | 623 | * A frozen set will not be modified. |
michael@0 | 624 | * @param set the object from which the elements are to be removed |
michael@0 | 625 | * @param removeSet the object that defines which elements will be |
michael@0 | 626 | * removed from this set |
michael@0 | 627 | * @stable ICU 3.2 |
michael@0 | 628 | */ |
michael@0 | 629 | U_STABLE void U_EXPORT2 |
michael@0 | 630 | uset_removeAll(USet* set, const USet* removeSet); |
michael@0 | 631 | |
michael@0 | 632 | /** |
michael@0 | 633 | * Retain only the elements in this set that are contained in the |
michael@0 | 634 | * specified range. If <code>start > end</code> then an empty range is |
michael@0 | 635 | * retained, leaving the set empty. This is equivalent to |
michael@0 | 636 | * a boolean logic AND, or a set INTERSECTION. |
michael@0 | 637 | * A frozen set will not be modified. |
michael@0 | 638 | * |
michael@0 | 639 | * @param set the object for which to retain only the specified range |
michael@0 | 640 | * @param start first character, inclusive, of range to be retained |
michael@0 | 641 | * to this set. |
michael@0 | 642 | * @param end last character, inclusive, of range to be retained |
michael@0 | 643 | * to this set. |
michael@0 | 644 | * @stable ICU 3.2 |
michael@0 | 645 | */ |
michael@0 | 646 | U_STABLE void U_EXPORT2 |
michael@0 | 647 | uset_retain(USet* set, UChar32 start, UChar32 end); |
michael@0 | 648 | |
michael@0 | 649 | /** |
michael@0 | 650 | * Retains only the elements in this set that are contained in the |
michael@0 | 651 | * specified set. In other words, removes from this set all of |
michael@0 | 652 | * its elements that are not contained in the specified set. This |
michael@0 | 653 | * operation effectively modifies this set so that its value is |
michael@0 | 654 | * the <i>intersection</i> of the two sets. |
michael@0 | 655 | * A frozen set will not be modified. |
michael@0 | 656 | * |
michael@0 | 657 | * @param set the object on which to perform the retain |
michael@0 | 658 | * @param retain set that defines which elements this set will retain |
michael@0 | 659 | * @stable ICU 3.2 |
michael@0 | 660 | */ |
michael@0 | 661 | U_STABLE void U_EXPORT2 |
michael@0 | 662 | uset_retainAll(USet* set, const USet* retain); |
michael@0 | 663 | |
michael@0 | 664 | /** |
michael@0 | 665 | * Reallocate this objects internal structures to take up the least |
michael@0 | 666 | * possible space, without changing this object's value. |
michael@0 | 667 | * A frozen set will not be modified. |
michael@0 | 668 | * |
michael@0 | 669 | * @param set the object on which to perfrom the compact |
michael@0 | 670 | * @stable ICU 3.2 |
michael@0 | 671 | */ |
michael@0 | 672 | U_STABLE void U_EXPORT2 |
michael@0 | 673 | uset_compact(USet* set); |
michael@0 | 674 | |
michael@0 | 675 | /** |
michael@0 | 676 | * Inverts this set. This operation modifies this set so that |
michael@0 | 677 | * its value is its complement. This operation does not affect |
michael@0 | 678 | * the multicharacter strings, if any. |
michael@0 | 679 | * A frozen set will not be modified. |
michael@0 | 680 | * @param set the set |
michael@0 | 681 | * @stable ICU 2.4 |
michael@0 | 682 | */ |
michael@0 | 683 | U_STABLE void U_EXPORT2 |
michael@0 | 684 | uset_complement(USet* set); |
michael@0 | 685 | |
michael@0 | 686 | /** |
michael@0 | 687 | * Complements in this set all elements contained in the specified |
michael@0 | 688 | * set. Any character in the other set will be removed if it is |
michael@0 | 689 | * in this set, or will be added if it is not in this set. |
michael@0 | 690 | * A frozen set will not be modified. |
michael@0 | 691 | * |
michael@0 | 692 | * @param set the set with which to complement |
michael@0 | 693 | * @param complement set that defines which elements will be xor'ed |
michael@0 | 694 | * from this set. |
michael@0 | 695 | * @stable ICU 3.2 |
michael@0 | 696 | */ |
michael@0 | 697 | U_STABLE void U_EXPORT2 |
michael@0 | 698 | uset_complementAll(USet* set, const USet* complement); |
michael@0 | 699 | |
michael@0 | 700 | /** |
michael@0 | 701 | * Removes all of the elements from this set. This set will be |
michael@0 | 702 | * empty after this call returns. |
michael@0 | 703 | * A frozen set will not be modified. |
michael@0 | 704 | * @param set the set |
michael@0 | 705 | * @stable ICU 2.4 |
michael@0 | 706 | */ |
michael@0 | 707 | U_STABLE void U_EXPORT2 |
michael@0 | 708 | uset_clear(USet* set); |
michael@0 | 709 | |
michael@0 | 710 | /** |
michael@0 | 711 | * Close this set over the given attribute. For the attribute |
michael@0 | 712 | * USET_CASE, the result is to modify this set so that: |
michael@0 | 713 | * |
michael@0 | 714 | * 1. For each character or string 'a' in this set, all strings or |
michael@0 | 715 | * characters 'b' such that foldCase(a) == foldCase(b) are added |
michael@0 | 716 | * to this set. |
michael@0 | 717 | * |
michael@0 | 718 | * 2. For each string 'e' in the resulting set, if e != |
michael@0 | 719 | * foldCase(e), 'e' will be removed. |
michael@0 | 720 | * |
michael@0 | 721 | * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] |
michael@0 | 722 | * |
michael@0 | 723 | * (Here foldCase(x) refers to the operation u_strFoldCase, and a |
michael@0 | 724 | * == b denotes that the contents are the same, not pointer |
michael@0 | 725 | * comparison.) |
michael@0 | 726 | * |
michael@0 | 727 | * A frozen set will not be modified. |
michael@0 | 728 | * |
michael@0 | 729 | * @param set the set |
michael@0 | 730 | * |
michael@0 | 731 | * @param attributes bitmask for attributes to close over. |
michael@0 | 732 | * Currently only the USET_CASE bit is supported. Any undefined bits |
michael@0 | 733 | * are ignored. |
michael@0 | 734 | * @stable ICU 4.2 |
michael@0 | 735 | */ |
michael@0 | 736 | U_STABLE void U_EXPORT2 |
michael@0 | 737 | uset_closeOver(USet* set, int32_t attributes); |
michael@0 | 738 | |
michael@0 | 739 | /** |
michael@0 | 740 | * Remove all strings from this set. |
michael@0 | 741 | * |
michael@0 | 742 | * @param set the set |
michael@0 | 743 | * @stable ICU 4.2 |
michael@0 | 744 | */ |
michael@0 | 745 | U_STABLE void U_EXPORT2 |
michael@0 | 746 | uset_removeAllStrings(USet* set); |
michael@0 | 747 | |
michael@0 | 748 | /** |
michael@0 | 749 | * Returns TRUE if the given USet contains no characters and no |
michael@0 | 750 | * strings. |
michael@0 | 751 | * @param set the set |
michael@0 | 752 | * @return true if set is empty |
michael@0 | 753 | * @stable ICU 2.4 |
michael@0 | 754 | */ |
michael@0 | 755 | U_STABLE UBool U_EXPORT2 |
michael@0 | 756 | uset_isEmpty(const USet* set); |
michael@0 | 757 | |
michael@0 | 758 | /** |
michael@0 | 759 | * Returns TRUE if the given USet contains the given character. |
michael@0 | 760 | * This function works faster with a frozen set. |
michael@0 | 761 | * @param set the set |
michael@0 | 762 | * @param c The codepoint to check for within the set |
michael@0 | 763 | * @return true if set contains c |
michael@0 | 764 | * @stable ICU 2.4 |
michael@0 | 765 | */ |
michael@0 | 766 | U_STABLE UBool U_EXPORT2 |
michael@0 | 767 | uset_contains(const USet* set, UChar32 c); |
michael@0 | 768 | |
michael@0 | 769 | /** |
michael@0 | 770 | * Returns TRUE if the given USet contains all characters c |
michael@0 | 771 | * where start <= c && c <= end. |
michael@0 | 772 | * @param set the set |
michael@0 | 773 | * @param start the first character of the range to test, inclusive |
michael@0 | 774 | * @param end the last character of the range to test, inclusive |
michael@0 | 775 | * @return TRUE if set contains the range |
michael@0 | 776 | * @stable ICU 2.2 |
michael@0 | 777 | */ |
michael@0 | 778 | U_STABLE UBool U_EXPORT2 |
michael@0 | 779 | uset_containsRange(const USet* set, UChar32 start, UChar32 end); |
michael@0 | 780 | |
michael@0 | 781 | /** |
michael@0 | 782 | * Returns TRUE if the given USet contains the given string. |
michael@0 | 783 | * @param set the set |
michael@0 | 784 | * @param str the string |
michael@0 | 785 | * @param strLen the length of the string or -1 if null terminated. |
michael@0 | 786 | * @return true if set contains str |
michael@0 | 787 | * @stable ICU 2.4 |
michael@0 | 788 | */ |
michael@0 | 789 | U_STABLE UBool U_EXPORT2 |
michael@0 | 790 | uset_containsString(const USet* set, const UChar* str, int32_t strLen); |
michael@0 | 791 | |
michael@0 | 792 | /** |
michael@0 | 793 | * Returns the index of the given character within this set, where |
michael@0 | 794 | * the set is ordered by ascending code point. If the character |
michael@0 | 795 | * is not in this set, return -1. The inverse of this method is |
michael@0 | 796 | * <code>charAt()</code>. |
michael@0 | 797 | * @param set the set |
michael@0 | 798 | * @param c the character to obtain the index for |
michael@0 | 799 | * @return an index from 0..size()-1, or -1 |
michael@0 | 800 | * @stable ICU 3.2 |
michael@0 | 801 | */ |
michael@0 | 802 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 803 | uset_indexOf(const USet* set, UChar32 c); |
michael@0 | 804 | |
michael@0 | 805 | /** |
michael@0 | 806 | * Returns the character at the given index within this set, where |
michael@0 | 807 | * the set is ordered by ascending code point. If the index is |
michael@0 | 808 | * out of range, return (UChar32)-1. The inverse of this method is |
michael@0 | 809 | * <code>indexOf()</code>. |
michael@0 | 810 | * @param set the set |
michael@0 | 811 | * @param charIndex an index from 0..size()-1 to obtain the char for |
michael@0 | 812 | * @return the character at the given index, or (UChar32)-1. |
michael@0 | 813 | * @stable ICU 3.2 |
michael@0 | 814 | */ |
michael@0 | 815 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 816 | uset_charAt(const USet* set, int32_t charIndex); |
michael@0 | 817 | |
michael@0 | 818 | /** |
michael@0 | 819 | * Returns the number of characters and strings contained in the given |
michael@0 | 820 | * USet. |
michael@0 | 821 | * @param set the set |
michael@0 | 822 | * @return a non-negative integer counting the characters and strings |
michael@0 | 823 | * contained in set |
michael@0 | 824 | * @stable ICU 2.4 |
michael@0 | 825 | */ |
michael@0 | 826 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 827 | uset_size(const USet* set); |
michael@0 | 828 | |
michael@0 | 829 | /** |
michael@0 | 830 | * Returns the number of items in this set. An item is either a range |
michael@0 | 831 | * of characters or a single multicharacter string. |
michael@0 | 832 | * @param set the set |
michael@0 | 833 | * @return a non-negative integer counting the character ranges |
michael@0 | 834 | * and/or strings contained in set |
michael@0 | 835 | * @stable ICU 2.4 |
michael@0 | 836 | */ |
michael@0 | 837 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 838 | uset_getItemCount(const USet* set); |
michael@0 | 839 | |
michael@0 | 840 | /** |
michael@0 | 841 | * Returns an item of this set. An item is either a range of |
michael@0 | 842 | * characters or a single multicharacter string. |
michael@0 | 843 | * @param set the set |
michael@0 | 844 | * @param itemIndex a non-negative integer in the range 0.. |
michael@0 | 845 | * uset_getItemCount(set)-1 |
michael@0 | 846 | * @param start pointer to variable to receive first character |
michael@0 | 847 | * in range, inclusive |
michael@0 | 848 | * @param end pointer to variable to receive last character in range, |
michael@0 | 849 | * inclusive |
michael@0 | 850 | * @param str buffer to receive the string, may be NULL |
michael@0 | 851 | * @param strCapacity capacity of str, or 0 if str is NULL |
michael@0 | 852 | * @param ec error code |
michael@0 | 853 | * @return the length of the string (>= 2), or 0 if the item is a |
michael@0 | 854 | * range, in which case it is the range *start..*end, or -1 if |
michael@0 | 855 | * itemIndex is out of range |
michael@0 | 856 | * @stable ICU 2.4 |
michael@0 | 857 | */ |
michael@0 | 858 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 859 | uset_getItem(const USet* set, int32_t itemIndex, |
michael@0 | 860 | UChar32* start, UChar32* end, |
michael@0 | 861 | UChar* str, int32_t strCapacity, |
michael@0 | 862 | UErrorCode* ec); |
michael@0 | 863 | |
michael@0 | 864 | /** |
michael@0 | 865 | * Returns true if set1 contains all the characters and strings |
michael@0 | 866 | * of set2. It answers the question, 'Is set1 a superset of set2?' |
michael@0 | 867 | * @param set1 set to be checked for containment |
michael@0 | 868 | * @param set2 set to be checked for containment |
michael@0 | 869 | * @return true if the test condition is met |
michael@0 | 870 | * @stable ICU 3.2 |
michael@0 | 871 | */ |
michael@0 | 872 | U_STABLE UBool U_EXPORT2 |
michael@0 | 873 | uset_containsAll(const USet* set1, const USet* set2); |
michael@0 | 874 | |
michael@0 | 875 | /** |
michael@0 | 876 | * Returns true if this set contains all the characters |
michael@0 | 877 | * of the given string. This is does not check containment of grapheme |
michael@0 | 878 | * clusters, like uset_containsString. |
michael@0 | 879 | * @param set set of characters to be checked for containment |
michael@0 | 880 | * @param str string containing codepoints to be checked for containment |
michael@0 | 881 | * @param strLen the length of the string or -1 if null terminated. |
michael@0 | 882 | * @return true if the test condition is met |
michael@0 | 883 | * @stable ICU 3.4 |
michael@0 | 884 | */ |
michael@0 | 885 | U_STABLE UBool U_EXPORT2 |
michael@0 | 886 | uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); |
michael@0 | 887 | |
michael@0 | 888 | /** |
michael@0 | 889 | * Returns true if set1 contains none of the characters and strings |
michael@0 | 890 | * of set2. It answers the question, 'Is set1 a disjoint set of set2?' |
michael@0 | 891 | * @param set1 set to be checked for containment |
michael@0 | 892 | * @param set2 set to be checked for containment |
michael@0 | 893 | * @return true if the test condition is met |
michael@0 | 894 | * @stable ICU 3.2 |
michael@0 | 895 | */ |
michael@0 | 896 | U_STABLE UBool U_EXPORT2 |
michael@0 | 897 | uset_containsNone(const USet* set1, const USet* set2); |
michael@0 | 898 | |
michael@0 | 899 | /** |
michael@0 | 900 | * Returns true if set1 contains some of the characters and strings |
michael@0 | 901 | * of set2. It answers the question, 'Does set1 and set2 have an intersection?' |
michael@0 | 902 | * @param set1 set to be checked for containment |
michael@0 | 903 | * @param set2 set to be checked for containment |
michael@0 | 904 | * @return true if the test condition is met |
michael@0 | 905 | * @stable ICU 3.2 |
michael@0 | 906 | */ |
michael@0 | 907 | U_STABLE UBool U_EXPORT2 |
michael@0 | 908 | uset_containsSome(const USet* set1, const USet* set2); |
michael@0 | 909 | |
michael@0 | 910 | /** |
michael@0 | 911 | * Returns the length of the initial substring of the input string which |
michael@0 | 912 | * consists only of characters and strings that are contained in this set |
michael@0 | 913 | * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), |
michael@0 | 914 | * or only of characters and strings that are not contained |
michael@0 | 915 | * in this set (USET_SPAN_NOT_CONTAINED). |
michael@0 | 916 | * See USetSpanCondition for details. |
michael@0 | 917 | * Similar to the strspn() C library function. |
michael@0 | 918 | * Unpaired surrogates are treated according to contains() of their surrogate code points. |
michael@0 | 919 | * This function works faster with a frozen set and with a non-negative string length argument. |
michael@0 | 920 | * @param set the set |
michael@0 | 921 | * @param s start of the string |
michael@0 | 922 | * @param length of the string; can be -1 for NUL-terminated |
michael@0 | 923 | * @param spanCondition specifies the containment condition |
michael@0 | 924 | * @return the length of the initial substring according to the spanCondition; |
michael@0 | 925 | * 0 if the start of the string does not fit the spanCondition |
michael@0 | 926 | * @stable ICU 3.8 |
michael@0 | 927 | * @see USetSpanCondition |
michael@0 | 928 | */ |
michael@0 | 929 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 930 | uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); |
michael@0 | 931 | |
michael@0 | 932 | /** |
michael@0 | 933 | * Returns the start of the trailing substring of the input string which |
michael@0 | 934 | * consists only of characters and strings that are contained in this set |
michael@0 | 935 | * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), |
michael@0 | 936 | * or only of characters and strings that are not contained |
michael@0 | 937 | * in this set (USET_SPAN_NOT_CONTAINED). |
michael@0 | 938 | * See USetSpanCondition for details. |
michael@0 | 939 | * Unpaired surrogates are treated according to contains() of their surrogate code points. |
michael@0 | 940 | * This function works faster with a frozen set and with a non-negative string length argument. |
michael@0 | 941 | * @param set the set |
michael@0 | 942 | * @param s start of the string |
michael@0 | 943 | * @param length of the string; can be -1 for NUL-terminated |
michael@0 | 944 | * @param spanCondition specifies the containment condition |
michael@0 | 945 | * @return the start of the trailing substring according to the spanCondition; |
michael@0 | 946 | * the string length if the end of the string does not fit the spanCondition |
michael@0 | 947 | * @stable ICU 3.8 |
michael@0 | 948 | * @see USetSpanCondition |
michael@0 | 949 | */ |
michael@0 | 950 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 951 | uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); |
michael@0 | 952 | |
michael@0 | 953 | /** |
michael@0 | 954 | * Returns the length of the initial substring of the input string which |
michael@0 | 955 | * consists only of characters and strings that are contained in this set |
michael@0 | 956 | * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), |
michael@0 | 957 | * or only of characters and strings that are not contained |
michael@0 | 958 | * in this set (USET_SPAN_NOT_CONTAINED). |
michael@0 | 959 | * See USetSpanCondition for details. |
michael@0 | 960 | * Similar to the strspn() C library function. |
michael@0 | 961 | * Malformed byte sequences are treated according to contains(0xfffd). |
michael@0 | 962 | * This function works faster with a frozen set and with a non-negative string length argument. |
michael@0 | 963 | * @param set the set |
michael@0 | 964 | * @param s start of the string (UTF-8) |
michael@0 | 965 | * @param length of the string; can be -1 for NUL-terminated |
michael@0 | 966 | * @param spanCondition specifies the containment condition |
michael@0 | 967 | * @return the length of the initial substring according to the spanCondition; |
michael@0 | 968 | * 0 if the start of the string does not fit the spanCondition |
michael@0 | 969 | * @stable ICU 3.8 |
michael@0 | 970 | * @see USetSpanCondition |
michael@0 | 971 | */ |
michael@0 | 972 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 973 | uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); |
michael@0 | 974 | |
michael@0 | 975 | /** |
michael@0 | 976 | * Returns the start of the trailing substring of the input string which |
michael@0 | 977 | * consists only of characters and strings that are contained in this set |
michael@0 | 978 | * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), |
michael@0 | 979 | * or only of characters and strings that are not contained |
michael@0 | 980 | * in this set (USET_SPAN_NOT_CONTAINED). |
michael@0 | 981 | * See USetSpanCondition for details. |
michael@0 | 982 | * Malformed byte sequences are treated according to contains(0xfffd). |
michael@0 | 983 | * This function works faster with a frozen set and with a non-negative string length argument. |
michael@0 | 984 | * @param set the set |
michael@0 | 985 | * @param s start of the string (UTF-8) |
michael@0 | 986 | * @param length of the string; can be -1 for NUL-terminated |
michael@0 | 987 | * @param spanCondition specifies the containment condition |
michael@0 | 988 | * @return the start of the trailing substring according to the spanCondition; |
michael@0 | 989 | * the string length if the end of the string does not fit the spanCondition |
michael@0 | 990 | * @stable ICU 3.8 |
michael@0 | 991 | * @see USetSpanCondition |
michael@0 | 992 | */ |
michael@0 | 993 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 994 | uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); |
michael@0 | 995 | |
michael@0 | 996 | /** |
michael@0 | 997 | * Returns true if set1 contains all of the characters and strings |
michael@0 | 998 | * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' |
michael@0 | 999 | * @param set1 set to be checked for containment |
michael@0 | 1000 | * @param set2 set to be checked for containment |
michael@0 | 1001 | * @return true if the test condition is met |
michael@0 | 1002 | * @stable ICU 3.2 |
michael@0 | 1003 | */ |
michael@0 | 1004 | U_STABLE UBool U_EXPORT2 |
michael@0 | 1005 | uset_equals(const USet* set1, const USet* set2); |
michael@0 | 1006 | |
michael@0 | 1007 | /********************************************************************* |
michael@0 | 1008 | * Serialized set API |
michael@0 | 1009 | *********************************************************************/ |
michael@0 | 1010 | |
michael@0 | 1011 | /** |
michael@0 | 1012 | * Serializes this set into an array of 16-bit integers. Serialization |
michael@0 | 1013 | * (currently) only records the characters in the set; multicharacter |
michael@0 | 1014 | * strings are ignored. |
michael@0 | 1015 | * |
michael@0 | 1016 | * The array |
michael@0 | 1017 | * has following format (each line is one 16-bit integer): |
michael@0 | 1018 | * |
michael@0 | 1019 | * length = (n+2*m) | (m!=0?0x8000:0) |
michael@0 | 1020 | * bmpLength = n; present if m!=0 |
michael@0 | 1021 | * bmp[0] |
michael@0 | 1022 | * bmp[1] |
michael@0 | 1023 | * ... |
michael@0 | 1024 | * bmp[n-1] |
michael@0 | 1025 | * supp-high[0] |
michael@0 | 1026 | * supp-low[0] |
michael@0 | 1027 | * supp-high[1] |
michael@0 | 1028 | * supp-low[1] |
michael@0 | 1029 | * ... |
michael@0 | 1030 | * supp-high[m-1] |
michael@0 | 1031 | * supp-low[m-1] |
michael@0 | 1032 | * |
michael@0 | 1033 | * The array starts with a header. After the header are n bmp |
michael@0 | 1034 | * code points, then m supplementary code points. Either n or m |
michael@0 | 1035 | * or both may be zero. n+2*m is always <= 0x7FFF. |
michael@0 | 1036 | * |
michael@0 | 1037 | * If there are no supplementary characters (if m==0) then the |
michael@0 | 1038 | * header is one 16-bit integer, 'length', with value n. |
michael@0 | 1039 | * |
michael@0 | 1040 | * If there are supplementary characters (if m!=0) then the header |
michael@0 | 1041 | * is two 16-bit integers. The first, 'length', has value |
michael@0 | 1042 | * (n+2*m)|0x8000. The second, 'bmpLength', has value n. |
michael@0 | 1043 | * |
michael@0 | 1044 | * After the header the code points are stored in ascending order. |
michael@0 | 1045 | * Supplementary code points are stored as most significant 16 |
michael@0 | 1046 | * bits followed by least significant 16 bits. |
michael@0 | 1047 | * |
michael@0 | 1048 | * @param set the set |
michael@0 | 1049 | * @param dest pointer to buffer of destCapacity 16-bit integers. |
michael@0 | 1050 | * May be NULL only if destCapacity is zero. |
michael@0 | 1051 | * @param destCapacity size of dest, or zero. Must not be negative. |
michael@0 | 1052 | * @param pErrorCode pointer to the error code. Will be set to |
michael@0 | 1053 | * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to |
michael@0 | 1054 | * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. |
michael@0 | 1055 | * @return the total length of the serialized format, including |
michael@0 | 1056 | * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other |
michael@0 | 1057 | * than U_BUFFER_OVERFLOW_ERROR. |
michael@0 | 1058 | * @stable ICU 2.4 |
michael@0 | 1059 | */ |
michael@0 | 1060 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1061 | uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); |
michael@0 | 1062 | |
michael@0 | 1063 | /** |
michael@0 | 1064 | * Given a serialized array, fill in the given serialized set object. |
michael@0 | 1065 | * @param fillSet pointer to result |
michael@0 | 1066 | * @param src pointer to start of array |
michael@0 | 1067 | * @param srcLength length of array |
michael@0 | 1068 | * @return true if the given array is valid, otherwise false |
michael@0 | 1069 | * @stable ICU 2.4 |
michael@0 | 1070 | */ |
michael@0 | 1071 | U_STABLE UBool U_EXPORT2 |
michael@0 | 1072 | uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); |
michael@0 | 1073 | |
michael@0 | 1074 | /** |
michael@0 | 1075 | * Set the USerializedSet to contain the given character (and nothing |
michael@0 | 1076 | * else). |
michael@0 | 1077 | * @param fillSet pointer to result |
michael@0 | 1078 | * @param c The codepoint to set |
michael@0 | 1079 | * @stable ICU 2.4 |
michael@0 | 1080 | */ |
michael@0 | 1081 | U_STABLE void U_EXPORT2 |
michael@0 | 1082 | uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); |
michael@0 | 1083 | |
michael@0 | 1084 | /** |
michael@0 | 1085 | * Returns TRUE if the given USerializedSet contains the given |
michael@0 | 1086 | * character. |
michael@0 | 1087 | * @param set the serialized set |
michael@0 | 1088 | * @param c The codepoint to check for within the set |
michael@0 | 1089 | * @return true if set contains c |
michael@0 | 1090 | * @stable ICU 2.4 |
michael@0 | 1091 | */ |
michael@0 | 1092 | U_STABLE UBool U_EXPORT2 |
michael@0 | 1093 | uset_serializedContains(const USerializedSet* set, UChar32 c); |
michael@0 | 1094 | |
michael@0 | 1095 | /** |
michael@0 | 1096 | * Returns the number of disjoint ranges of characters contained in |
michael@0 | 1097 | * the given serialized set. Ignores any strings contained in the |
michael@0 | 1098 | * set. |
michael@0 | 1099 | * @param set the serialized set |
michael@0 | 1100 | * @return a non-negative integer counting the character ranges |
michael@0 | 1101 | * contained in set |
michael@0 | 1102 | * @stable ICU 2.4 |
michael@0 | 1103 | */ |
michael@0 | 1104 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1105 | uset_getSerializedRangeCount(const USerializedSet* set); |
michael@0 | 1106 | |
michael@0 | 1107 | /** |
michael@0 | 1108 | * Returns a range of characters contained in the given serialized |
michael@0 | 1109 | * set. |
michael@0 | 1110 | * @param set the serialized set |
michael@0 | 1111 | * @param rangeIndex a non-negative integer in the range 0.. |
michael@0 | 1112 | * uset_getSerializedRangeCount(set)-1 |
michael@0 | 1113 | * @param pStart pointer to variable to receive first character |
michael@0 | 1114 | * in range, inclusive |
michael@0 | 1115 | * @param pEnd pointer to variable to receive last character in range, |
michael@0 | 1116 | * inclusive |
michael@0 | 1117 | * @return true if rangeIndex is valid, otherwise false |
michael@0 | 1118 | * @stable ICU 2.4 |
michael@0 | 1119 | */ |
michael@0 | 1120 | U_STABLE UBool U_EXPORT2 |
michael@0 | 1121 | uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, |
michael@0 | 1122 | UChar32* pStart, UChar32* pEnd); |
michael@0 | 1123 | |
michael@0 | 1124 | #endif |