intl/icu/source/common/unicode/uset.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/uset.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1124 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2002-2012, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  uset.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2002mar07
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   C version of UnicodeSet.
    1.20 +*/
    1.21 +
    1.22 +
    1.23 +/**
    1.24 + * \file
    1.25 + * \brief C API: Unicode Set
    1.26 + *
    1.27 + * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
    1.28 + */
    1.29 +
    1.30 +#ifndef __USET_H__
    1.31 +#define __USET_H__
    1.32 +
    1.33 +#include "unicode/utypes.h"
    1.34 +#include "unicode/uchar.h"
    1.35 +#include "unicode/localpointer.h"
    1.36 +
    1.37 +#ifndef UCNV_H
    1.38 +struct USet;
    1.39 +/**
    1.40 + * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
    1.41 + * uset_open*, and destroy with uset_close.
    1.42 + * @stable ICU 2.4
    1.43 + */
    1.44 +typedef struct USet USet;
    1.45 +#endif
    1.46 +
    1.47 +/**
    1.48 + * Bitmask values to be passed to uset_openPatternOptions() or
    1.49 + * uset_applyPattern() taking an option parameter.
    1.50 + * @stable ICU 2.4
    1.51 + */
    1.52 +enum {
    1.53 +    /**
    1.54 +     * Ignore white space within patterns unless quoted or escaped.
    1.55 +     * @stable ICU 2.4
    1.56 +     */
    1.57 +    USET_IGNORE_SPACE = 1,  
    1.58 +
    1.59 +    /**
    1.60 +     * Enable case insensitive matching.  E.g., "[ab]" with this flag
    1.61 +     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
    1.62 +     * match all except 'a', 'A', 'b', and 'B'. This performs a full
    1.63 +     * closure over case mappings, e.g. U+017F for s.
    1.64 +     *
    1.65 +     * The resulting set is a superset of the input for the code points but
    1.66 +     * not for the strings.
    1.67 +     * It performs a case mapping closure of the code points and adds
    1.68 +     * full case folding strings for the code points, and reduces strings of
    1.69 +     * the original set to their full case folding equivalents.
    1.70 +     *
    1.71 +     * This is designed for case-insensitive matches, for example
    1.72 +     * in regular expressions. The full code point case closure allows checking of
    1.73 +     * an input character directly against the closure set.
    1.74 +     * Strings are matched by comparing the case-folded form from the closure
    1.75 +     * set with an incremental case folding of the string in question.
    1.76 +     *
    1.77 +     * The closure set will also contain single code points if the original
    1.78 +     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
    1.79 +     * This is not necessary (that is, redundant) for the above matching method
    1.80 +     * but results in the same closure sets regardless of whether the original
    1.81 +     * set contained the code point or a string.
    1.82 +     *
    1.83 +     * @stable ICU 2.4
    1.84 +     */
    1.85 +    USET_CASE_INSENSITIVE = 2,  
    1.86 +
    1.87 +    /**
    1.88 +     * Enable case insensitive matching.  E.g., "[ab]" with this flag
    1.89 +     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
    1.90 +     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
    1.91 +     * title-, and uppercase mappings as well as the case folding
    1.92 +     * of each existing element in the set.
    1.93 +     * @stable ICU 3.2
    1.94 +     */
    1.95 +    USET_ADD_CASE_MAPPINGS = 4
    1.96 +};
    1.97 +
    1.98 +/**
    1.99 + * Argument values for whether span() and similar functions continue while
   1.100 + * the current character is contained vs. not contained in the set.
   1.101 + *
   1.102 + * The functionality is straightforward for sets with only single code points,
   1.103 + * without strings (which is the common case):
   1.104 + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE
   1.105 + *   work the same.
   1.106 + * - span() and spanBack() partition any string the same way when
   1.107 + *   alternating between span(USET_SPAN_NOT_CONTAINED) and
   1.108 + *   span(either "contained" condition).
   1.109 + * - Using a complemented (inverted) set and the opposite span conditions
   1.110 + *   yields the same results.
   1.111 + *
   1.112 + * When a set contains multi-code point strings, then these statements may not
   1.113 + * be true, depending on the strings in the set (for example, whether they
   1.114 + * overlap with each other) and the string that is processed.
   1.115 + * For a set with strings:
   1.116 + * - The complement of the set contains the opposite set of code points,
   1.117 + *   but the same set of strings.
   1.118 + *   Therefore, complementing both the set and the span conditions
   1.119 + *   may yield different results.
   1.120 + * - When starting spans at different positions in a string
   1.121 + *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
   1.122 + *   because a set string may start before the later position.
   1.123 + * - span(USET_SPAN_SIMPLE) may be shorter than
   1.124 + *   span(USET_SPAN_CONTAINED) because it will not recursively try
   1.125 + *   all possible paths.
   1.126 + *   For example, with a set which contains the three strings "xy", "xya" and "ax",
   1.127 + *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
   1.128 + *   span("xyax", USET_SPAN_SIMPLE) will return 3.
   1.129 + *   span(USET_SPAN_SIMPLE) will never be longer than
   1.130 + *   span(USET_SPAN_CONTAINED).
   1.131 + * - With either "contained" condition, span() and spanBack() may partition
   1.132 + *   a string in different ways.
   1.133 + *   For example, with a set which contains the two strings "ab" and "ba",
   1.134 + *   and when processing the string "aba",
   1.135 + *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
   1.136 + *   while spanBack() will yield boundaries of { 0, 1, 3 }.
   1.137 + *
   1.138 + * Note: If it is important to get the same boundaries whether iterating forward
   1.139 + * or backward through a string, then either only span() should be used and
   1.140 + * the boundaries cached for backward operation, or an ICU BreakIterator
   1.141 + * could be used.
   1.142 + *
   1.143 + * Note: Unpaired surrogates are treated like surrogate code points.
   1.144 + * Similarly, set strings match only on code point boundaries,
   1.145 + * never in the middle of a surrogate pair.
   1.146 + * Illegal UTF-8 sequences are treated like U+FFFD.
   1.147 + * When processing UTF-8 strings, malformed set strings
   1.148 + * (strings with unpaired surrogates which cannot be converted to UTF-8)
   1.149 + * are ignored.
   1.150 + *
   1.151 + * @stable ICU 3.8
   1.152 + */
   1.153 +typedef enum USetSpanCondition {
   1.154 +    /**
   1.155 +     * Continue a span() while there is no set element at the current position.
   1.156 +     * Stops before the first set element (character or string).
   1.157 +     * (For code points only, this is like while contains(current)==FALSE).
   1.158 +     *
   1.159 +     * When span() returns, the substring between where it started and the position
   1.160 +     * it returned consists only of characters that are not in the set,
   1.161 +     * and none of its strings overlap with the span.
   1.162 +     *
   1.163 +     * @stable ICU 3.8
   1.164 +     */
   1.165 +    USET_SPAN_NOT_CONTAINED = 0,
   1.166 +    /**
   1.167 +     * Continue a span() while there is a set element at the current position.
   1.168 +     * (For characters only, this is like while contains(current)==TRUE).
   1.169 +     *
   1.170 +     * When span() returns, the substring between where it started and the position
   1.171 +     * it returned consists only of set elements (characters or strings) that are in the set.
   1.172 +     *
   1.173 +     * If a set contains strings, then the span will be the longest substring
   1.174 +     * matching any of the possible concatenations of set elements (characters or strings).
   1.175 +     * (There must be a single, non-overlapping concatenation of characters or strings.)
   1.176 +     * This is equivalent to a POSIX regular expression for (OR of each set element)*.
   1.177 +     *
   1.178 +     * @stable ICU 3.8
   1.179 +     */
   1.180 +    USET_SPAN_CONTAINED = 1,
   1.181 +    /**
   1.182 +     * Continue a span() while there is a set element at the current position.
   1.183 +     * (For characters only, this is like while contains(current)==TRUE).
   1.184 +     *
   1.185 +     * When span() returns, the substring between where it started and the position
   1.186 +     * it returned consists only of set elements (characters or strings) that are in the set.
   1.187 +     *
   1.188 +     * If a set only contains single characters, then this is the same
   1.189 +     * as USET_SPAN_CONTAINED.
   1.190 +     *
   1.191 +     * If a set contains strings, then the span will be the longest substring
   1.192 +     * with a match at each position with the longest single set element (character or string).
   1.193 +     *
   1.194 +     * Use this span condition together with other longest-match algorithms,
   1.195 +     * such as ICU converters (ucnv_getUnicodeSet()).
   1.196 +     *
   1.197 +     * @stable ICU 3.8
   1.198 +     */
   1.199 +    USET_SPAN_SIMPLE = 2,
   1.200 +    /**
   1.201 +     * One more than the last span condition.
   1.202 +     * @stable ICU 3.8
   1.203 +     */
   1.204 +    USET_SPAN_CONDITION_COUNT
   1.205 +} USetSpanCondition;
   1.206 +
   1.207 +enum {
   1.208 +    /**
   1.209 +     * Capacity of USerializedSet::staticArray.
   1.210 +     * Enough for any single-code point set.
   1.211 +     * Also provides padding for nice sizeof(USerializedSet).
   1.212 +     * @stable ICU 2.4
   1.213 +     */
   1.214 +    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
   1.215 +};
   1.216 +
   1.217 +/**
   1.218 + * A serialized form of a Unicode set.  Limited manipulations are
   1.219 + * possible directly on a serialized set.  See below.
   1.220 + * @stable ICU 2.4
   1.221 + */
   1.222 +typedef struct USerializedSet {
   1.223 +    /**
   1.224 +     * The serialized Unicode Set.
   1.225 +     * @stable ICU 2.4
   1.226 +     */
   1.227 +    const uint16_t *array;
   1.228 +    /**
   1.229 +     * The length of the array that contains BMP characters.
   1.230 +     * @stable ICU 2.4
   1.231 +     */
   1.232 +    int32_t bmpLength;
   1.233 +    /**
   1.234 +     * The total length of the array.
   1.235 +     * @stable ICU 2.4
   1.236 +     */
   1.237 +    int32_t length;
   1.238 +    /**
   1.239 +     * A small buffer for the array to reduce memory allocations.
   1.240 +     * @stable ICU 2.4
   1.241 +     */
   1.242 +    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
   1.243 +} USerializedSet;
   1.244 +
   1.245 +/*********************************************************************
   1.246 + * USet API
   1.247 + *********************************************************************/
   1.248 +
   1.249 +/**
   1.250 + * Create an empty USet object.
   1.251 + * Equivalent to uset_open(1, 0).
   1.252 + * @return a newly created USet.  The caller must call uset_close() on
   1.253 + * it when done.
   1.254 + * @stable ICU 4.2
   1.255 + */
   1.256 +U_STABLE USet* U_EXPORT2
   1.257 +uset_openEmpty(void);
   1.258 +
   1.259 +/**
   1.260 + * Creates a USet object that contains the range of characters
   1.261 + * start..end, inclusive.  If <code>start > end</code> 
   1.262 + * then an empty set is created (same as using uset_openEmpty()).
   1.263 + * @param start first character of the range, inclusive
   1.264 + * @param end last character of the range, inclusive
   1.265 + * @return a newly created USet.  The caller must call uset_close() on
   1.266 + * it when done.
   1.267 + * @stable ICU 2.4
   1.268 + */
   1.269 +U_STABLE USet* U_EXPORT2
   1.270 +uset_open(UChar32 start, UChar32 end);
   1.271 +
   1.272 +/**
   1.273 + * Creates a set from the given pattern.  See the UnicodeSet class
   1.274 + * description for the syntax of the pattern language.
   1.275 + * @param pattern a string specifying what characters are in the set
   1.276 + * @param patternLength the length of the pattern, or -1 if null
   1.277 + * terminated
   1.278 + * @param ec the error code
   1.279 + * @stable ICU 2.4
   1.280 + */
   1.281 +U_STABLE USet* U_EXPORT2
   1.282 +uset_openPattern(const UChar* pattern, int32_t patternLength,
   1.283 +                 UErrorCode* ec);
   1.284 +
   1.285 +/**
   1.286 + * Creates a set from the given pattern.  See the UnicodeSet class
   1.287 + * description for the syntax of the pattern language.
   1.288 + * @param pattern a string specifying what characters are in the set
   1.289 + * @param patternLength the length of the pattern, or -1 if null
   1.290 + * terminated
   1.291 + * @param options bitmask for options to apply to the pattern.
   1.292 + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
   1.293 + * @param ec the error code
   1.294 + * @stable ICU 2.4
   1.295 + */
   1.296 +U_STABLE USet* U_EXPORT2
   1.297 +uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
   1.298 +                 uint32_t options,
   1.299 +                 UErrorCode* ec);
   1.300 +
   1.301 +/**
   1.302 + * Disposes of the storage used by a USet object.  This function should
   1.303 + * be called exactly once for objects returned by uset_open().
   1.304 + * @param set the object to dispose of
   1.305 + * @stable ICU 2.4
   1.306 + */
   1.307 +U_STABLE void U_EXPORT2
   1.308 +uset_close(USet* set);
   1.309 +
   1.310 +#if U_SHOW_CPLUSPLUS_API
   1.311 +
   1.312 +U_NAMESPACE_BEGIN
   1.313 +
   1.314 +/**
   1.315 + * \class LocalUSetPointer
   1.316 + * "Smart pointer" class, closes a USet via uset_close().
   1.317 + * For most methods see the LocalPointerBase base class.
   1.318 + *
   1.319 + * @see LocalPointerBase
   1.320 + * @see LocalPointer
   1.321 + * @stable ICU 4.4
   1.322 + */
   1.323 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
   1.324 +
   1.325 +U_NAMESPACE_END
   1.326 +
   1.327 +#endif
   1.328 +
   1.329 +/**
   1.330 + * Returns a copy of this object.
   1.331 + * If this set is frozen, then the clone will be frozen as well.
   1.332 + * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
   1.333 + * @param set the original set
   1.334 + * @return the newly allocated copy of the set
   1.335 + * @see uset_cloneAsThawed
   1.336 + * @stable ICU 3.8
   1.337 + */
   1.338 +U_STABLE USet * U_EXPORT2
   1.339 +uset_clone(const USet *set);
   1.340 +
   1.341 +/**
   1.342 + * Determines whether the set has been frozen (made immutable) or not.
   1.343 + * See the ICU4J Freezable interface for details.
   1.344 + * @param set the set
   1.345 + * @return TRUE/FALSE for whether the set has been frozen
   1.346 + * @see uset_freeze
   1.347 + * @see uset_cloneAsThawed
   1.348 + * @stable ICU 3.8
   1.349 + */
   1.350 +U_STABLE UBool U_EXPORT2
   1.351 +uset_isFrozen(const USet *set);
   1.352 +
   1.353 +/**
   1.354 + * Freeze the set (make it immutable).
   1.355 + * Once frozen, it cannot be unfrozen and is therefore thread-safe
   1.356 + * until it is deleted.
   1.357 + * See the ICU4J Freezable interface for details.
   1.358 + * Freezing the set may also make some operations faster, for example
   1.359 + * uset_contains() and uset_span().
   1.360 + * A frozen set will not be modified. (It remains frozen.)
   1.361 + * @param set the set
   1.362 + * @return the same set, now frozen
   1.363 + * @see uset_isFrozen
   1.364 + * @see uset_cloneAsThawed
   1.365 + * @stable ICU 3.8
   1.366 + */
   1.367 +U_STABLE void U_EXPORT2
   1.368 +uset_freeze(USet *set);
   1.369 +
   1.370 +/**
   1.371 + * Clone the set and make the clone mutable.
   1.372 + * See the ICU4J Freezable interface for details.
   1.373 + * @param set the set
   1.374 + * @return the mutable clone
   1.375 + * @see uset_freeze
   1.376 + * @see uset_isFrozen
   1.377 + * @see uset_clone
   1.378 + * @stable ICU 3.8
   1.379 + */
   1.380 +U_STABLE USet * U_EXPORT2
   1.381 +uset_cloneAsThawed(const USet *set);
   1.382 +
   1.383 +/**
   1.384 + * Causes the USet object to represent the range <code>start - end</code>.
   1.385 + * If <code>start > end</code> then this USet is set to an empty range.
   1.386 + * A frozen set will not be modified.
   1.387 + * @param set the object to set to the given range
   1.388 + * @param start first character in the set, inclusive
   1.389 + * @param end last character in the set, inclusive
   1.390 + * @stable ICU 3.2
   1.391 + */
   1.392 +U_STABLE void U_EXPORT2
   1.393 +uset_set(USet* set,
   1.394 +         UChar32 start, UChar32 end);
   1.395 +
   1.396 +/**
   1.397 + * Modifies the set to represent the set specified by the given
   1.398 + * pattern. See the UnicodeSet class description for the syntax of 
   1.399 + * the pattern language. See also the User Guide chapter about UnicodeSet.
   1.400 + * <em>Empties the set passed before applying the pattern.</em>
   1.401 + * A frozen set will not be modified.
   1.402 + * @param set               The set to which the pattern is to be applied. 
   1.403 + * @param pattern           A pointer to UChar string specifying what characters are in the set.
   1.404 + *                          The character at pattern[0] must be a '['.
   1.405 + * @param patternLength     The length of the UChar string. -1 if NUL terminated.
   1.406 + * @param options           A bitmask for options to apply to the pattern.
   1.407 + *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
   1.408 + * @param status            Returns an error if the pattern cannot be parsed.
   1.409 + * @return                  Upon successful parse, the value is either
   1.410 + *                          the index of the character after the closing ']' 
   1.411 + *                          of the parsed pattern.
   1.412 + *                          If the status code indicates failure, then the return value 
   1.413 + *                          is the index of the error in the source.
   1.414 + *
   1.415 + * @stable ICU 2.8
   1.416 + */
   1.417 +U_STABLE int32_t U_EXPORT2 
   1.418 +uset_applyPattern(USet *set,
   1.419 +                  const UChar *pattern, int32_t patternLength,
   1.420 +                  uint32_t options,
   1.421 +                  UErrorCode *status);
   1.422 +
   1.423 +/**
   1.424 + * Modifies the set to contain those code points which have the given value
   1.425 + * for the given binary or enumerated property, as returned by
   1.426 + * u_getIntPropertyValue.  Prior contents of this set are lost.
   1.427 + * A frozen set will not be modified.
   1.428 + *
   1.429 + * @param set the object to contain the code points defined by the property
   1.430 + *
   1.431 + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
   1.432 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
   1.433 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
   1.434 + *
   1.435 + * @param value a value in the range u_getIntPropertyMinValue(prop)..
   1.436 + * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
   1.437 + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
   1.438 + * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
   1.439 + * categories such as [:L:] to be represented.
   1.440 + *
   1.441 + * @param ec error code input/output parameter
   1.442 + *
   1.443 + * @stable ICU 3.2
   1.444 + */
   1.445 +U_STABLE void U_EXPORT2
   1.446 +uset_applyIntPropertyValue(USet* set,
   1.447 +                           UProperty prop, int32_t value, UErrorCode* ec);
   1.448 +
   1.449 +/**
   1.450 + * Modifies the set to contain those code points which have the
   1.451 + * given value for the given property.  Prior contents of this
   1.452 + * set are lost.
   1.453 + * A frozen set will not be modified.
   1.454 + *
   1.455 + * @param set the object to contain the code points defined by the given
   1.456 + * property and value alias
   1.457 + *
   1.458 + * @param prop a string specifying a property alias, either short or long.
   1.459 + * The name is matched loosely.  See PropertyAliases.txt for names and a
   1.460 + * description of loose matching.  If the value string is empty, then this
   1.461 + * string is interpreted as either a General_Category value alias, a Script
   1.462 + * value alias, a binary property alias, or a special ID.  Special IDs are
   1.463 + * matched loosely and correspond to the following sets:
   1.464 + *
   1.465 + * "ANY" = [\\u0000-\\U0010FFFF],
   1.466 + * "ASCII" = [\\u0000-\\u007F],
   1.467 + * "Assigned" = [:^Cn:].
   1.468 + *
   1.469 + * @param propLength the length of the prop, or -1 if NULL
   1.470 + *
   1.471 + * @param value a string specifying a value alias, either short or long.
   1.472 + * The name is matched loosely.  See PropertyValueAliases.txt for names
   1.473 + * and a description of loose matching.  In addition to aliases listed,
   1.474 + * numeric values and canonical combining classes may be expressed
   1.475 + * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
   1.476 + * may also be empty.
   1.477 + *
   1.478 + * @param valueLength the length of the value, or -1 if NULL
   1.479 + *
   1.480 + * @param ec error code input/output parameter
   1.481 + *
   1.482 + * @stable ICU 3.2
   1.483 + */
   1.484 +U_STABLE void U_EXPORT2
   1.485 +uset_applyPropertyAlias(USet* set,
   1.486 +                        const UChar *prop, int32_t propLength,
   1.487 +                        const UChar *value, int32_t valueLength,
   1.488 +                        UErrorCode* ec);
   1.489 +
   1.490 +/**
   1.491 + * Return true if the given position, in the given pattern, appears
   1.492 + * to be the start of a UnicodeSet pattern.
   1.493 + *
   1.494 + * @param pattern a string specifying the pattern
   1.495 + * @param patternLength the length of the pattern, or -1 if NULL
   1.496 + * @param pos the given position
   1.497 + * @stable ICU 3.2
   1.498 + */
   1.499 +U_STABLE UBool U_EXPORT2
   1.500 +uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
   1.501 +                      int32_t pos);
   1.502 +
   1.503 +/**
   1.504 + * Returns a string representation of this set.  If the result of
   1.505 + * calling this function is passed to a uset_openPattern(), it
   1.506 + * will produce another set that is equal to this one.
   1.507 + * @param set the set
   1.508 + * @param result the string to receive the rules, may be NULL
   1.509 + * @param resultCapacity the capacity of result, may be 0 if result is NULL
   1.510 + * @param escapeUnprintable if TRUE then convert unprintable
   1.511 + * character to their hex escape representations, \\uxxxx or
   1.512 + * \\Uxxxxxxxx.  Unprintable characters are those other than
   1.513 + * U+000A, U+0020..U+007E.
   1.514 + * @param ec error code.
   1.515 + * @return length of string, possibly larger than resultCapacity
   1.516 + * @stable ICU 2.4
   1.517 + */
   1.518 +U_STABLE int32_t U_EXPORT2
   1.519 +uset_toPattern(const USet* set,
   1.520 +               UChar* result, int32_t resultCapacity,
   1.521 +               UBool escapeUnprintable,
   1.522 +               UErrorCode* ec);
   1.523 +
   1.524 +/**
   1.525 + * Adds the given character to the given USet.  After this call,
   1.526 + * uset_contains(set, c) will return TRUE.
   1.527 + * A frozen set will not be modified.
   1.528 + * @param set the object to which to add the character
   1.529 + * @param c the character to add
   1.530 + * @stable ICU 2.4
   1.531 + */
   1.532 +U_STABLE void U_EXPORT2
   1.533 +uset_add(USet* set, UChar32 c);
   1.534 +
   1.535 +/**
   1.536 + * Adds all of the elements in the specified set to this set if
   1.537 + * they're not already present.  This operation effectively
   1.538 + * modifies this set so that its value is the <i>union</i> of the two
   1.539 + * sets.  The behavior of this operation is unspecified if the specified
   1.540 + * collection is modified while the operation is in progress.
   1.541 + * A frozen set will not be modified.
   1.542 + *
   1.543 + * @param set the object to which to add the set
   1.544 + * @param additionalSet the source set whose elements are to be added to this set.
   1.545 + * @stable ICU 2.6
   1.546 + */
   1.547 +U_STABLE void U_EXPORT2
   1.548 +uset_addAll(USet* set, const USet *additionalSet);
   1.549 +
   1.550 +/**
   1.551 + * Adds the given range of characters to the given USet.  After this call,
   1.552 + * uset_contains(set, start, end) will return TRUE.
   1.553 + * A frozen set will not be modified.
   1.554 + * @param set the object to which to add the character
   1.555 + * @param start the first character of the range to add, inclusive
   1.556 + * @param end the last character of the range to add, inclusive
   1.557 + * @stable ICU 2.2
   1.558 + */
   1.559 +U_STABLE void U_EXPORT2
   1.560 +uset_addRange(USet* set, UChar32 start, UChar32 end);
   1.561 +
   1.562 +/**
   1.563 + * Adds the given string to the given USet.  After this call,
   1.564 + * uset_containsString(set, str, strLen) will return TRUE.
   1.565 + * A frozen set will not be modified.
   1.566 + * @param set the object to which to add the character
   1.567 + * @param str the string to add
   1.568 + * @param strLen the length of the string or -1 if null terminated.
   1.569 + * @stable ICU 2.4
   1.570 + */
   1.571 +U_STABLE void U_EXPORT2
   1.572 +uset_addString(USet* set, const UChar* str, int32_t strLen);
   1.573 +
   1.574 +/**
   1.575 + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
   1.576 + * If this set already any particular character, it has no effect on that character.
   1.577 + * A frozen set will not be modified.
   1.578 + * @param set the object to which to add the character
   1.579 + * @param str the source string
   1.580 + * @param strLen the length of the string or -1 if null terminated.
   1.581 + * @stable ICU 3.4
   1.582 + */
   1.583 +U_STABLE void U_EXPORT2
   1.584 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
   1.585 +
   1.586 +/**
   1.587 + * Removes the given character from the given USet.  After this call,
   1.588 + * uset_contains(set, c) will return FALSE.
   1.589 + * A frozen set will not be modified.
   1.590 + * @param set the object from which to remove the character
   1.591 + * @param c the character to remove
   1.592 + * @stable ICU 2.4
   1.593 + */
   1.594 +U_STABLE void U_EXPORT2
   1.595 +uset_remove(USet* set, UChar32 c);
   1.596 +
   1.597 +/**
   1.598 + * Removes the given range of characters from the given USet.  After this call,
   1.599 + * uset_contains(set, start, end) will return FALSE.
   1.600 + * A frozen set will not be modified.
   1.601 + * @param set the object to which to add the character
   1.602 + * @param start the first character of the range to remove, inclusive
   1.603 + * @param end the last character of the range to remove, inclusive
   1.604 + * @stable ICU 2.2
   1.605 + */
   1.606 +U_STABLE void U_EXPORT2
   1.607 +uset_removeRange(USet* set, UChar32 start, UChar32 end);
   1.608 +
   1.609 +/**
   1.610 + * Removes the given string to the given USet.  After this call,
   1.611 + * uset_containsString(set, str, strLen) will return FALSE.
   1.612 + * A frozen set will not be modified.
   1.613 + * @param set the object to which to add the character
   1.614 + * @param str the string to remove
   1.615 + * @param strLen the length of the string or -1 if null terminated.
   1.616 + * @stable ICU 2.4
   1.617 + */
   1.618 +U_STABLE void U_EXPORT2
   1.619 +uset_removeString(USet* set, const UChar* str, int32_t strLen);
   1.620 +
   1.621 +/**
   1.622 + * Removes from this set all of its elements that are contained in the
   1.623 + * specified set.  This operation effectively modifies this
   1.624 + * set so that its value is the <i>asymmetric set difference</i> of
   1.625 + * the two sets.
   1.626 + * A frozen set will not be modified.
   1.627 + * @param set the object from which the elements are to be removed
   1.628 + * @param removeSet the object that defines which elements will be
   1.629 + * removed from this set
   1.630 + * @stable ICU 3.2
   1.631 + */
   1.632 +U_STABLE void U_EXPORT2
   1.633 +uset_removeAll(USet* set, const USet* removeSet);
   1.634 +
   1.635 +/**
   1.636 + * Retain only the elements in this set that are contained in the
   1.637 + * specified range.  If <code>start > end</code> then an empty range is
   1.638 + * retained, leaving the set empty.  This is equivalent to
   1.639 + * a boolean logic AND, or a set INTERSECTION.
   1.640 + * A frozen set will not be modified.
   1.641 + *
   1.642 + * @param set the object for which to retain only the specified range
   1.643 + * @param start first character, inclusive, of range to be retained
   1.644 + * to this set.
   1.645 + * @param end last character, inclusive, of range to be retained
   1.646 + * to this set.
   1.647 + * @stable ICU 3.2
   1.648 + */
   1.649 +U_STABLE void U_EXPORT2
   1.650 +uset_retain(USet* set, UChar32 start, UChar32 end);
   1.651 +
   1.652 +/**
   1.653 + * Retains only the elements in this set that are contained in the
   1.654 + * specified set.  In other words, removes from this set all of
   1.655 + * its elements that are not contained in the specified set.  This
   1.656 + * operation effectively modifies this set so that its value is
   1.657 + * the <i>intersection</i> of the two sets.
   1.658 + * A frozen set will not be modified.
   1.659 + *
   1.660 + * @param set the object on which to perform the retain
   1.661 + * @param retain set that defines which elements this set will retain
   1.662 + * @stable ICU 3.2
   1.663 + */
   1.664 +U_STABLE void U_EXPORT2
   1.665 +uset_retainAll(USet* set, const USet* retain);
   1.666 +
   1.667 +/**
   1.668 + * Reallocate this objects internal structures to take up the least
   1.669 + * possible space, without changing this object's value.
   1.670 + * A frozen set will not be modified.
   1.671 + *
   1.672 + * @param set the object on which to perfrom the compact
   1.673 + * @stable ICU 3.2
   1.674 + */
   1.675 +U_STABLE void U_EXPORT2
   1.676 +uset_compact(USet* set);
   1.677 +
   1.678 +/**
   1.679 + * Inverts this set.  This operation modifies this set so that
   1.680 + * its value is its complement.  This operation does not affect
   1.681 + * the multicharacter strings, if any.
   1.682 + * A frozen set will not be modified.
   1.683 + * @param set the set
   1.684 + * @stable ICU 2.4
   1.685 + */
   1.686 +U_STABLE void U_EXPORT2
   1.687 +uset_complement(USet* set);
   1.688 +
   1.689 +/**
   1.690 + * Complements in this set all elements contained in the specified
   1.691 + * set.  Any character in the other set will be removed if it is
   1.692 + * in this set, or will be added if it is not in this set.
   1.693 + * A frozen set will not be modified.
   1.694 + *
   1.695 + * @param set the set with which to complement
   1.696 + * @param complement set that defines which elements will be xor'ed
   1.697 + * from this set.
   1.698 + * @stable ICU 3.2
   1.699 + */
   1.700 +U_STABLE void U_EXPORT2
   1.701 +uset_complementAll(USet* set, const USet* complement);
   1.702 +
   1.703 +/**
   1.704 + * Removes all of the elements from this set.  This set will be
   1.705 + * empty after this call returns.
   1.706 + * A frozen set will not be modified.
   1.707 + * @param set the set
   1.708 + * @stable ICU 2.4
   1.709 + */
   1.710 +U_STABLE void U_EXPORT2
   1.711 +uset_clear(USet* set);
   1.712 +
   1.713 +/**
   1.714 + * Close this set over the given attribute.  For the attribute
   1.715 + * USET_CASE, the result is to modify this set so that:
   1.716 + *
   1.717 + * 1. For each character or string 'a' in this set, all strings or
   1.718 + * characters 'b' such that foldCase(a) == foldCase(b) are added
   1.719 + * to this set.
   1.720 + *
   1.721 + * 2. For each string 'e' in the resulting set, if e !=
   1.722 + * foldCase(e), 'e' will be removed.
   1.723 + *
   1.724 + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
   1.725 + *
   1.726 + * (Here foldCase(x) refers to the operation u_strFoldCase, and a
   1.727 + * == b denotes that the contents are the same, not pointer
   1.728 + * comparison.)
   1.729 + *
   1.730 + * A frozen set will not be modified.
   1.731 + *
   1.732 + * @param set the set
   1.733 + *
   1.734 + * @param attributes bitmask for attributes to close over.
   1.735 + * Currently only the USET_CASE bit is supported.  Any undefined bits
   1.736 + * are ignored.
   1.737 + * @stable ICU 4.2
   1.738 + */
   1.739 +U_STABLE void U_EXPORT2
   1.740 +uset_closeOver(USet* set, int32_t attributes);
   1.741 +
   1.742 +/**
   1.743 + * Remove all strings from this set.
   1.744 + *
   1.745 + * @param set the set
   1.746 + * @stable ICU 4.2
   1.747 + */
   1.748 +U_STABLE void U_EXPORT2
   1.749 +uset_removeAllStrings(USet* set);
   1.750 +
   1.751 +/**
   1.752 + * Returns TRUE if the given USet contains no characters and no
   1.753 + * strings.
   1.754 + * @param set the set
   1.755 + * @return true if set is empty
   1.756 + * @stable ICU 2.4
   1.757 + */
   1.758 +U_STABLE UBool U_EXPORT2
   1.759 +uset_isEmpty(const USet* set);
   1.760 +
   1.761 +/**
   1.762 + * Returns TRUE if the given USet contains the given character.
   1.763 + * This function works faster with a frozen set.
   1.764 + * @param set the set
   1.765 + * @param c The codepoint to check for within the set
   1.766 + * @return true if set contains c
   1.767 + * @stable ICU 2.4
   1.768 + */
   1.769 +U_STABLE UBool U_EXPORT2
   1.770 +uset_contains(const USet* set, UChar32 c);
   1.771 +
   1.772 +/**
   1.773 + * Returns TRUE if the given USet contains all characters c
   1.774 + * where start <= c && c <= end.
   1.775 + * @param set the set
   1.776 + * @param start the first character of the range to test, inclusive
   1.777 + * @param end the last character of the range to test, inclusive
   1.778 + * @return TRUE if set contains the range
   1.779 + * @stable ICU 2.2
   1.780 + */
   1.781 +U_STABLE UBool U_EXPORT2
   1.782 +uset_containsRange(const USet* set, UChar32 start, UChar32 end);
   1.783 +
   1.784 +/**
   1.785 + * Returns TRUE if the given USet contains the given string.
   1.786 + * @param set the set
   1.787 + * @param str the string
   1.788 + * @param strLen the length of the string or -1 if null terminated.
   1.789 + * @return true if set contains str
   1.790 + * @stable ICU 2.4
   1.791 + */
   1.792 +U_STABLE UBool U_EXPORT2
   1.793 +uset_containsString(const USet* set, const UChar* str, int32_t strLen);
   1.794 +
   1.795 +/**
   1.796 + * Returns the index of the given character within this set, where
   1.797 + * the set is ordered by ascending code point.  If the character
   1.798 + * is not in this set, return -1.  The inverse of this method is
   1.799 + * <code>charAt()</code>.
   1.800 + * @param set the set
   1.801 + * @param c the character to obtain the index for
   1.802 + * @return an index from 0..size()-1, or -1
   1.803 + * @stable ICU 3.2
   1.804 + */
   1.805 +U_STABLE int32_t U_EXPORT2
   1.806 +uset_indexOf(const USet* set, UChar32 c);
   1.807 +
   1.808 +/**
   1.809 + * Returns the character at the given index within this set, where
   1.810 + * the set is ordered by ascending code point.  If the index is
   1.811 + * out of range, return (UChar32)-1.  The inverse of this method is
   1.812 + * <code>indexOf()</code>.
   1.813 + * @param set the set
   1.814 + * @param charIndex an index from 0..size()-1 to obtain the char for
   1.815 + * @return the character at the given index, or (UChar32)-1.
   1.816 + * @stable ICU 3.2
   1.817 + */
   1.818 +U_STABLE UChar32 U_EXPORT2
   1.819 +uset_charAt(const USet* set, int32_t charIndex);
   1.820 +
   1.821 +/**
   1.822 + * Returns the number of characters and strings contained in the given
   1.823 + * USet.
   1.824 + * @param set the set
   1.825 + * @return a non-negative integer counting the characters and strings
   1.826 + * contained in set
   1.827 + * @stable ICU 2.4
   1.828 + */
   1.829 +U_STABLE int32_t U_EXPORT2
   1.830 +uset_size(const USet* set);
   1.831 +
   1.832 +/**
   1.833 + * Returns the number of items in this set.  An item is either a range
   1.834 + * of characters or a single multicharacter string.
   1.835 + * @param set the set
   1.836 + * @return a non-negative integer counting the character ranges
   1.837 + * and/or strings contained in set
   1.838 + * @stable ICU 2.4
   1.839 + */
   1.840 +U_STABLE int32_t U_EXPORT2
   1.841 +uset_getItemCount(const USet* set);
   1.842 +
   1.843 +/**
   1.844 + * Returns an item of this set.  An item is either a range of
   1.845 + * characters or a single multicharacter string.
   1.846 + * @param set the set
   1.847 + * @param itemIndex a non-negative integer in the range 0..
   1.848 + * uset_getItemCount(set)-1
   1.849 + * @param start pointer to variable to receive first character
   1.850 + * in range, inclusive
   1.851 + * @param end pointer to variable to receive last character in range,
   1.852 + * inclusive
   1.853 + * @param str buffer to receive the string, may be NULL
   1.854 + * @param strCapacity capacity of str, or 0 if str is NULL
   1.855 + * @param ec error code
   1.856 + * @return the length of the string (>= 2), or 0 if the item is a
   1.857 + * range, in which case it is the range *start..*end, or -1 if
   1.858 + * itemIndex is out of range
   1.859 + * @stable ICU 2.4
   1.860 + */
   1.861 +U_STABLE int32_t U_EXPORT2
   1.862 +uset_getItem(const USet* set, int32_t itemIndex,
   1.863 +             UChar32* start, UChar32* end,
   1.864 +             UChar* str, int32_t strCapacity,
   1.865 +             UErrorCode* ec);
   1.866 +
   1.867 +/**
   1.868 + * Returns true if set1 contains all the characters and strings
   1.869 + * of set2. It answers the question, 'Is set1 a superset of set2?'
   1.870 + * @param set1 set to be checked for containment
   1.871 + * @param set2 set to be checked for containment
   1.872 + * @return true if the test condition is met
   1.873 + * @stable ICU 3.2
   1.874 + */
   1.875 +U_STABLE UBool U_EXPORT2
   1.876 +uset_containsAll(const USet* set1, const USet* set2);
   1.877 +
   1.878 +/**
   1.879 + * Returns true if this set contains all the characters
   1.880 + * of the given string. This is does not check containment of grapheme
   1.881 + * clusters, like uset_containsString.
   1.882 + * @param set set of characters to be checked for containment
   1.883 + * @param str string containing codepoints to be checked for containment
   1.884 + * @param strLen the length of the string or -1 if null terminated.
   1.885 + * @return true if the test condition is met
   1.886 + * @stable ICU 3.4
   1.887 + */
   1.888 +U_STABLE UBool U_EXPORT2
   1.889 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
   1.890 +
   1.891 +/**
   1.892 + * Returns true if set1 contains none of the characters and strings
   1.893 + * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
   1.894 + * @param set1 set to be checked for containment
   1.895 + * @param set2 set to be checked for containment
   1.896 + * @return true if the test condition is met
   1.897 + * @stable ICU 3.2
   1.898 + */
   1.899 +U_STABLE UBool U_EXPORT2
   1.900 +uset_containsNone(const USet* set1, const USet* set2);
   1.901 +
   1.902 +/**
   1.903 + * Returns true if set1 contains some of the characters and strings
   1.904 + * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
   1.905 + * @param set1 set to be checked for containment
   1.906 + * @param set2 set to be checked for containment
   1.907 + * @return true if the test condition is met
   1.908 + * @stable ICU 3.2
   1.909 + */
   1.910 +U_STABLE UBool U_EXPORT2
   1.911 +uset_containsSome(const USet* set1, const USet* set2);
   1.912 +
   1.913 +/**
   1.914 + * Returns the length of the initial substring of the input string which
   1.915 + * consists only of characters and strings that are contained in this set
   1.916 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   1.917 + * or only of characters and strings that are not contained
   1.918 + * in this set (USET_SPAN_NOT_CONTAINED).
   1.919 + * See USetSpanCondition for details.
   1.920 + * Similar to the strspn() C library function.
   1.921 + * Unpaired surrogates are treated according to contains() of their surrogate code points.
   1.922 + * This function works faster with a frozen set and with a non-negative string length argument.
   1.923 + * @param set the set
   1.924 + * @param s start of the string
   1.925 + * @param length of the string; can be -1 for NUL-terminated
   1.926 + * @param spanCondition specifies the containment condition
   1.927 + * @return the length of the initial substring according to the spanCondition;
   1.928 + *         0 if the start of the string does not fit the spanCondition
   1.929 + * @stable ICU 3.8
   1.930 + * @see USetSpanCondition
   1.931 + */
   1.932 +U_STABLE int32_t U_EXPORT2
   1.933 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
   1.934 +
   1.935 +/**
   1.936 + * Returns the start of the trailing substring of the input string which
   1.937 + * consists only of characters and strings that are contained in this set
   1.938 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   1.939 + * or only of characters and strings that are not contained
   1.940 + * in this set (USET_SPAN_NOT_CONTAINED).
   1.941 + * See USetSpanCondition for details.
   1.942 + * Unpaired surrogates are treated according to contains() of their surrogate code points.
   1.943 + * This function works faster with a frozen set and with a non-negative string length argument.
   1.944 + * @param set the set
   1.945 + * @param s start of the string
   1.946 + * @param length of the string; can be -1 for NUL-terminated
   1.947 + * @param spanCondition specifies the containment condition
   1.948 + * @return the start of the trailing substring according to the spanCondition;
   1.949 + *         the string length if the end of the string does not fit the spanCondition
   1.950 + * @stable ICU 3.8
   1.951 + * @see USetSpanCondition
   1.952 + */
   1.953 +U_STABLE int32_t U_EXPORT2
   1.954 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
   1.955 +
   1.956 +/**
   1.957 + * Returns the length of the initial substring of the input string which
   1.958 + * consists only of characters and strings that are contained in this set
   1.959 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   1.960 + * or only of characters and strings that are not contained
   1.961 + * in this set (USET_SPAN_NOT_CONTAINED).
   1.962 + * See USetSpanCondition for details.
   1.963 + * Similar to the strspn() C library function.
   1.964 + * Malformed byte sequences are treated according to contains(0xfffd).
   1.965 + * This function works faster with a frozen set and with a non-negative string length argument.
   1.966 + * @param set the set
   1.967 + * @param s start of the string (UTF-8)
   1.968 + * @param length of the string; can be -1 for NUL-terminated
   1.969 + * @param spanCondition specifies the containment condition
   1.970 + * @return the length of the initial substring according to the spanCondition;
   1.971 + *         0 if the start of the string does not fit the spanCondition
   1.972 + * @stable ICU 3.8
   1.973 + * @see USetSpanCondition
   1.974 + */
   1.975 +U_STABLE int32_t U_EXPORT2
   1.976 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
   1.977 +
   1.978 +/**
   1.979 + * Returns the start of the trailing substring of the input string which
   1.980 + * consists only of characters and strings that are contained in this set
   1.981 + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   1.982 + * or only of characters and strings that are not contained
   1.983 + * in this set (USET_SPAN_NOT_CONTAINED).
   1.984 + * See USetSpanCondition for details.
   1.985 + * Malformed byte sequences are treated according to contains(0xfffd).
   1.986 + * This function works faster with a frozen set and with a non-negative string length argument.
   1.987 + * @param set the set
   1.988 + * @param s start of the string (UTF-8)
   1.989 + * @param length of the string; can be -1 for NUL-terminated
   1.990 + * @param spanCondition specifies the containment condition
   1.991 + * @return the start of the trailing substring according to the spanCondition;
   1.992 + *         the string length if the end of the string does not fit the spanCondition
   1.993 + * @stable ICU 3.8
   1.994 + * @see USetSpanCondition
   1.995 + */
   1.996 +U_STABLE int32_t U_EXPORT2
   1.997 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
   1.998 +
   1.999 +/**
  1.1000 + * Returns true if set1 contains all of the characters and strings
  1.1001 + * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
  1.1002 + * @param set1 set to be checked for containment
  1.1003 + * @param set2 set to be checked for containment
  1.1004 + * @return true if the test condition is met
  1.1005 + * @stable ICU 3.2
  1.1006 + */
  1.1007 +U_STABLE UBool U_EXPORT2
  1.1008 +uset_equals(const USet* set1, const USet* set2);
  1.1009 +
  1.1010 +/*********************************************************************
  1.1011 + * Serialized set API
  1.1012 + *********************************************************************/
  1.1013 +
  1.1014 +/**
  1.1015 + * Serializes this set into an array of 16-bit integers.  Serialization
  1.1016 + * (currently) only records the characters in the set; multicharacter
  1.1017 + * strings are ignored.
  1.1018 + *
  1.1019 + * The array
  1.1020 + * has following format (each line is one 16-bit integer):
  1.1021 + *
  1.1022 + *  length     = (n+2*m) | (m!=0?0x8000:0)
  1.1023 + *  bmpLength  = n; present if m!=0
  1.1024 + *  bmp[0]
  1.1025 + *  bmp[1]
  1.1026 + *  ...
  1.1027 + *  bmp[n-1]
  1.1028 + *  supp-high[0]
  1.1029 + *  supp-low[0]
  1.1030 + *  supp-high[1]
  1.1031 + *  supp-low[1]
  1.1032 + *  ...
  1.1033 + *  supp-high[m-1]
  1.1034 + *  supp-low[m-1]
  1.1035 + *
  1.1036 + * The array starts with a header.  After the header are n bmp
  1.1037 + * code points, then m supplementary code points.  Either n or m
  1.1038 + * or both may be zero.  n+2*m is always <= 0x7FFF.
  1.1039 + *
  1.1040 + * If there are no supplementary characters (if m==0) then the
  1.1041 + * header is one 16-bit integer, 'length', with value n.
  1.1042 + *
  1.1043 + * If there are supplementary characters (if m!=0) then the header
  1.1044 + * is two 16-bit integers.  The first, 'length', has value
  1.1045 + * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
  1.1046 + *
  1.1047 + * After the header the code points are stored in ascending order.
  1.1048 + * Supplementary code points are stored as most significant 16
  1.1049 + * bits followed by least significant 16 bits.
  1.1050 + *
  1.1051 + * @param set the set
  1.1052 + * @param dest pointer to buffer of destCapacity 16-bit integers.
  1.1053 + * May be NULL only if destCapacity is zero.
  1.1054 + * @param destCapacity size of dest, or zero.  Must not be negative.
  1.1055 + * @param pErrorCode pointer to the error code.  Will be set to
  1.1056 + * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
  1.1057 + * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
  1.1058 + * @return the total length of the serialized format, including
  1.1059 + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
  1.1060 + * than U_BUFFER_OVERFLOW_ERROR.
  1.1061 + * @stable ICU 2.4
  1.1062 + */
  1.1063 +U_STABLE int32_t U_EXPORT2
  1.1064 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
  1.1065 +
  1.1066 +/**
  1.1067 + * Given a serialized array, fill in the given serialized set object.
  1.1068 + * @param fillSet pointer to result
  1.1069 + * @param src pointer to start of array
  1.1070 + * @param srcLength length of array
  1.1071 + * @return true if the given array is valid, otherwise false
  1.1072 + * @stable ICU 2.4
  1.1073 + */
  1.1074 +U_STABLE UBool U_EXPORT2
  1.1075 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
  1.1076 +
  1.1077 +/**
  1.1078 + * Set the USerializedSet to contain the given character (and nothing
  1.1079 + * else).
  1.1080 + * @param fillSet pointer to result
  1.1081 + * @param c The codepoint to set
  1.1082 + * @stable ICU 2.4
  1.1083 + */
  1.1084 +U_STABLE void U_EXPORT2
  1.1085 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
  1.1086 +
  1.1087 +/**
  1.1088 + * Returns TRUE if the given USerializedSet contains the given
  1.1089 + * character.
  1.1090 + * @param set the serialized set
  1.1091 + * @param c The codepoint to check for within the set
  1.1092 + * @return true if set contains c
  1.1093 + * @stable ICU 2.4
  1.1094 + */
  1.1095 +U_STABLE UBool U_EXPORT2
  1.1096 +uset_serializedContains(const USerializedSet* set, UChar32 c);
  1.1097 +
  1.1098 +/**
  1.1099 + * Returns the number of disjoint ranges of characters contained in
  1.1100 + * the given serialized set.  Ignores any strings contained in the
  1.1101 + * set.
  1.1102 + * @param set the serialized set
  1.1103 + * @return a non-negative integer counting the character ranges
  1.1104 + * contained in set
  1.1105 + * @stable ICU 2.4
  1.1106 + */
  1.1107 +U_STABLE int32_t U_EXPORT2
  1.1108 +uset_getSerializedRangeCount(const USerializedSet* set);
  1.1109 +
  1.1110 +/**
  1.1111 + * Returns a range of characters contained in the given serialized
  1.1112 + * set.
  1.1113 + * @param set the serialized set
  1.1114 + * @param rangeIndex a non-negative integer in the range 0..
  1.1115 + * uset_getSerializedRangeCount(set)-1
  1.1116 + * @param pStart pointer to variable to receive first character
  1.1117 + * in range, inclusive
  1.1118 + * @param pEnd pointer to variable to receive last character in range,
  1.1119 + * inclusive
  1.1120 + * @return true if rangeIndex is valid, otherwise false
  1.1121 + * @stable ICU 2.4
  1.1122 + */
  1.1123 +U_STABLE UBool U_EXPORT2
  1.1124 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
  1.1125 +                        UChar32* pStart, UChar32* pEnd);
  1.1126 +
  1.1127 +#endif

mercurial