The Tor Browser: diff intl/icu/source/common/unicode/unistr.h

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/unistr.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,4470 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 1998-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*
    1.10 +* File unistr.h
    1.11 +*
    1.12 +* Modification History:
    1.13 +*
    1.14 +*   Date        Name        Description
    1.15 +*   09/25/98    stephen     Creation.
    1.16 +*   11/11/98    stephen     Changed per 11/9 code review.
    1.17 +*   04/20/99    stephen     Overhauled per 4/16 code review.
    1.18 +*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
    1.19 +*                           handleReplaceBetween(); other methods unchanged.
    1.20 +*   06/25/01    grhoten     Remove dependency on iostream.
    1.21 +******************************************************************************
    1.22 +*/
    1.23 +
    1.24 +#ifndef UNISTR_H
    1.25 +#define UNISTR_H
    1.26 +
    1.27 +/**
    1.28 + * \file 
    1.29 + * \brief C++ API: Unicode String 
    1.30 + */
    1.31 +
    1.32 +#include "unicode/utypes.h"
    1.33 +#include "unicode/rep.h"
    1.34 +#include "unicode/std_string.h"
    1.35 +#include "unicode/stringpiece.h"
    1.36 +#include "unicode/bytestream.h"
    1.37 +#include "unicode/ucasemap.h"
    1.38 +
    1.39 +struct UConverter;          // unicode/ucnv.h
    1.40 +class  StringThreadTest;
    1.41 +
    1.42 +#ifndef U_COMPARE_CODE_POINT_ORDER
    1.43 +/* see also ustring.h and unorm.h */
    1.44 +/**
    1.45 + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    1.46 + * Compare strings in code point order instead of code unit order.
    1.47 + * @stable ICU 2.2
    1.48 + */
    1.49 +#define U_COMPARE_CODE_POINT_ORDER  0x8000
    1.50 +#endif
    1.51 +
    1.52 +#ifndef USTRING_H
    1.53 +/**
    1.54 + * \ingroup ustring_ustrlen
    1.55 + */
    1.56 +U_STABLE int32_t U_EXPORT2
    1.57 +u_strlen(const UChar *s);
    1.58 +#endif
    1.59 +
    1.60 +/**
    1.61 + * \def U_STRING_CASE_MAPPER_DEFINED
    1.62 + * @internal
    1.63 + */
    1.64 +#ifndef U_STRING_CASE_MAPPER_DEFINED
    1.65 +#define U_STRING_CASE_MAPPER_DEFINED
    1.66 +
    1.67 +/**
    1.68 + * Internal string case mapping function type.
    1.69 + * @internal
    1.70 + */
    1.71 +typedef int32_t U_CALLCONV
    1.72 +UStringCaseMapper(const UCaseMap *csm,
    1.73 +                  UChar *dest, int32_t destCapacity,
    1.74 +                  const UChar *src, int32_t srcLength,
    1.75 +                  UErrorCode *pErrorCode);
    1.76 +
    1.77 +#endif
    1.78 +
    1.79 +U_NAMESPACE_BEGIN
    1.80 +
    1.81 +class BreakIterator;        // unicode/brkiter.h
    1.82 +class Locale;               // unicode/locid.h
    1.83 +class StringCharacterIterator;
    1.84 +class UnicodeStringAppendable;  // unicode/appendable.h
    1.85 +
    1.86 +/* The <iostream> include has been moved to unicode/ustream.h */
    1.87 +
    1.88 +/**
    1.89 + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    1.90 + * which constructs a Unicode string from an invariant-character char * string.
    1.91 + * About invariant characters see utypes.h.
    1.92 + * This constructor has no runtime dependency on conversion code and is
    1.93 + * therefore recommended over ones taking a charset name string
    1.94 + * (where the empty string "" indicates invariant-character conversion).
    1.95 + *
    1.96 + * @stable ICU 3.2
    1.97 + */
    1.98 +#define US_INV icu::UnicodeString::kInvariant
    1.99 +
   1.100 +/**
   1.101 + * Unicode String literals in C++.
   1.102 + * Dependent on the platform properties, different UnicodeString
   1.103 + * constructors should be used to create a UnicodeString object from
   1.104 + * a string literal.
   1.105 + * The macros are defined for maximum performance.
   1.106 + * They work only for strings that contain "invariant characters", i.e.,
   1.107 + * only latin letters, digits, and some punctuation.
   1.108 + * See utypes.h for details.
   1.109 + *
   1.110 + * The string parameter must be a C string literal.
   1.111 + * The length of the string, not including the terminating
   1.112 + * <code>NUL</code>, must be specified as a constant.
   1.113 + * The U_STRING_DECL macro should be invoked exactly once for one
   1.114 + * such string variable before it is used.
   1.115 + * @stable ICU 2.0
   1.116 + */
   1.117 +#if defined(U_DECLARE_UTF16)
   1.118 +#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
   1.119 +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
   1.120 +#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
   1.121 +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
   1.122 +#   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
   1.123 +#else
   1.124 +#   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
   1.125 +#endif
   1.126 +
   1.127 +/**
   1.128 + * Unicode String literals in C++.
   1.129 + * Dependent on the platform properties, different UnicodeString
   1.130 + * constructors should be used to create a UnicodeString object from
   1.131 + * a string literal.
   1.132 + * The macros are defined for improved performance.
   1.133 + * They work only for strings that contain "invariant characters", i.e.,
   1.134 + * only latin letters, digits, and some punctuation.
   1.135 + * See utypes.h for details.
   1.136 + *
   1.137 + * The string parameter must be a C string literal.
   1.138 + * @stable ICU 2.0
   1.139 + */
   1.140 +#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
   1.141 +
   1.142 +/**
   1.143 + * \def UNISTR_FROM_CHAR_EXPLICIT
   1.144 + * This can be defined to be empty or "explicit".
   1.145 + * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
   1.146 + * constructors are marked as explicit, preventing their inadvertent use.
   1.147 + * @stable ICU 49
   1.148 + */
   1.149 +#ifndef UNISTR_FROM_CHAR_EXPLICIT
   1.150 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
   1.151 +    // Auto-"explicit" in ICU library code.
   1.152 +#   define UNISTR_FROM_CHAR_EXPLICIT explicit
   1.153 +# else
   1.154 +    // Empty by default for source code compatibility.
   1.155 +#   define UNISTR_FROM_CHAR_EXPLICIT
   1.156 +# endif
   1.157 +#endif
   1.158 +
   1.159 +/**
   1.160 + * \def UNISTR_FROM_STRING_EXPLICIT
   1.161 + * This can be defined to be empty or "explicit".
   1.162 + * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
   1.163 + * constructors are marked as explicit, preventing their inadvertent use.
   1.164 + *
   1.165 + * In particular, this helps prevent accidentally depending on ICU conversion code
   1.166 + * by passing a string literal into an API with a const UnicodeString & parameter.
   1.167 + * @stable ICU 49
   1.168 + */
   1.169 +#ifndef UNISTR_FROM_STRING_EXPLICIT
   1.170 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
   1.171 +    // Auto-"explicit" in ICU library code.
   1.172 +#   define UNISTR_FROM_STRING_EXPLICIT explicit
   1.173 +# else
   1.174 +    // Empty by default for source code compatibility.
   1.175 +#   define UNISTR_FROM_STRING_EXPLICIT
   1.176 +# endif
   1.177 +#endif
   1.178 +
   1.179 +/**
   1.180 + * UnicodeString is a string class that stores Unicode characters directly and provides
   1.181 + * similar functionality as the Java String and StringBuffer classes.
   1.182 + * It is a concrete implementation of the abstract class Replaceable (for transliteration).
   1.183 + *
   1.184 + * The UnicodeString class is not suitable for subclassing.
   1.185 + *
   1.186 + * <p>For an overview of Unicode strings in C and C++ see the
   1.187 + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   1.188 + *
   1.189 + * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
   1.190 + * A Unicode character may be stored with either one code unit
   1.191 + * (the most common case) or with a matched pair of special code units
   1.192 + * ("surrogates"). The data type for code units is UChar. 
   1.193 + * For single-character handling, a Unicode character code <em>point</em> is a value
   1.194 + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
   1.195 + *
   1.196 + * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
   1.197 + * This is the same as with multi-byte char* strings in traditional string handling.
   1.198 + * Operations on partial strings typically do not test for code point boundaries.
   1.199 + * If necessary, the user needs to take care of such boundaries by testing for the code unit
   1.200 + * values or by using functions like
   1.201 + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
   1.202 + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
   1.203 + *
   1.204 + * UnicodeString methods are more lenient with regard to input parameter values
   1.205 + * than other ICU APIs. In particular:
   1.206 + * - If indexes are out of bounds for a UnicodeString object
   1.207 + *   (<0 or >length()) then they are "pinned" to the nearest boundary.
   1.208 + * - If primitive string pointer values (e.g., const UChar * or char *)
   1.209 + *   for input strings are NULL, then those input string parameters are treated
   1.210 + *   as if they pointed to an empty string.
   1.211 + *   However, this is <em>not</em> the case for char * parameters for charset names
   1.212 + *   or other IDs.
   1.213 + * - Most UnicodeString methods do not take a UErrorCode parameter because
   1.214 + *   there are usually very few opportunities for failure other than a shortage
   1.215 + *   of memory, error codes in low-level C++ string methods would be inconvenient,
   1.216 + *   and the error code as the last parameter (ICU convention) would prevent
   1.217 + *   the use of default parameter values.
   1.218 + *   Instead, such methods set the UnicodeString into a "bogus" state
   1.219 + *   (see isBogus()) if an error occurs.
   1.220 + *
   1.221 + * In string comparisons, two UnicodeString objects that are both "bogus"
   1.222 + * compare equal (to be transitive and prevent endless loops in sorting),
   1.223 + * and a "bogus" string compares less than any non-"bogus" one.
   1.224 + *
   1.225 + * Const UnicodeString methods are thread-safe. Multiple threads can use
   1.226 + * const methods on the same UnicodeString object simultaneously,
   1.227 + * but non-const methods must not be called concurrently (in multiple threads)
   1.228 + * with any other (const or non-const) methods.
   1.229 + *
   1.230 + * Similarly, const UnicodeString & parameters are thread-safe.
   1.231 + * One object may be passed in as such a parameter concurrently in multiple threads.
   1.232 + * This includes the const UnicodeString & parameters for
   1.233 + * copy construction, assignment, and cloning.
   1.234 + *
   1.235 + * <p>UnicodeString uses several storage methods.
   1.236 + * String contents can be stored inside the UnicodeString object itself,
   1.237 + * in an allocated and shared buffer, or in an outside buffer that is "aliased".
   1.238 + * Most of this is done transparently, but careful aliasing in particular provides
   1.239 + * significant performance improvements.
   1.240 + * Also, the internal buffer is accessible via special functions.
   1.241 + * For details see the
   1.242 + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   1.243 + *
   1.244 + * @see utf.h
   1.245 + * @see CharacterIterator
   1.246 + * @stable ICU 2.0
   1.247 + */
   1.248 +class U_COMMON_API UnicodeString : public Replaceable
   1.249 +{
   1.250 +public:
   1.251 +
   1.252 +  /**
   1.253 +   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
   1.254 +   * which constructs a Unicode string from an invariant-character char * string.
   1.255 +   * Use the macro US_INV instead of the full qualification for this value.
   1.256 +   *
   1.257 +   * @see US_INV
   1.258 +   * @stable ICU 3.2
   1.259 +   */
   1.260 +  enum EInvariant {
   1.261 +    /**
   1.262 +     * @see EInvariant
   1.263 +     * @stable ICU 3.2
   1.264 +     */
   1.265 +    kInvariant
   1.266 +  };
   1.267 +
   1.268 +  //========================================
   1.269 +  // Read-only operations
   1.270 +  //========================================
   1.271 +
   1.272 +  /* Comparison - bitwise only - for international comparison use collation */
   1.273 +
   1.274 +  /**
   1.275 +   * Equality operator. Performs only bitwise comparison.
   1.276 +   * @param text The UnicodeString to compare to this one.
   1.277 +   * @return TRUE if <TT>text</TT> contains the same characters as this one,
   1.278 +   * FALSE otherwise.
   1.279 +   * @stable ICU 2.0
   1.280 +   */
   1.281 +  inline UBool operator== (const UnicodeString& text) const;
   1.282 +
   1.283 +  /**
   1.284 +   * Inequality operator. Performs only bitwise comparison.
   1.285 +   * @param text The UnicodeString to compare to this one.
   1.286 +   * @return FALSE if <TT>text</TT> contains the same characters as this one,
   1.287 +   * TRUE otherwise.
   1.288 +   * @stable ICU 2.0
   1.289 +   */
   1.290 +  inline UBool operator!= (const UnicodeString& text) const;
   1.291 +
   1.292 +  /**
   1.293 +   * Greater than operator. Performs only bitwise comparison.
   1.294 +   * @param text The UnicodeString to compare to this one.
   1.295 +   * @return TRUE if the characters in this are bitwise
   1.296 +   * greater than the characters in <code>text</code>, FALSE otherwise
   1.297 +   * @stable ICU 2.0
   1.298 +   */
   1.299 +  inline UBool operator> (const UnicodeString& text) const;
   1.300 +
   1.301 +  /**
   1.302 +   * Less than operator. Performs only bitwise comparison.
   1.303 +   * @param text The UnicodeString to compare to this one.
   1.304 +   * @return TRUE if the characters in this are bitwise
   1.305 +   * less than the characters in <code>text</code>, FALSE otherwise
   1.306 +   * @stable ICU 2.0
   1.307 +   */
   1.308 +  inline UBool operator< (const UnicodeString& text) const;
   1.309 +
   1.310 +  /**
   1.311 +   * Greater than or equal operator. Performs only bitwise comparison.
   1.312 +   * @param text The UnicodeString to compare to this one.
   1.313 +   * @return TRUE if the characters in this are bitwise
   1.314 +   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
   1.315 +   * @stable ICU 2.0
   1.316 +   */
   1.317 +  inline UBool operator>= (const UnicodeString& text) const;
   1.318 +
   1.319 +  /**
   1.320 +   * Less than or equal operator. Performs only bitwise comparison.
   1.321 +   * @param text The UnicodeString to compare to this one.
   1.322 +   * @return TRUE if the characters in this are bitwise
   1.323 +   * less than or equal to the characters in <code>text</code>, FALSE otherwise
   1.324 +   * @stable ICU 2.0
   1.325 +   */
   1.326 +  inline UBool operator<= (const UnicodeString& text) const;
   1.327 +
   1.328 +  /**
   1.329 +   * Compare the characters bitwise in this UnicodeString to
   1.330 +   * the characters in <code>text</code>.
   1.331 +   * @param text The UnicodeString to compare to this one.
   1.332 +   * @return The result of bitwise character comparison: 0 if this
   1.333 +   * contains the same characters as <code>text</code>, -1 if the characters in
   1.334 +   * this are bitwise less than the characters in <code>text</code>, +1 if the
   1.335 +   * characters in this are bitwise greater than the characters
   1.336 +   * in <code>text</code>.
   1.337 +   * @stable ICU 2.0
   1.338 +   */
   1.339 +  inline int8_t compare(const UnicodeString& text) const;
   1.340 +
   1.341 +  /**
   1.342 +   * Compare the characters bitwise in the range
   1.343 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   1.344 +   * in the <b>entire string</b> <TT>text</TT>.
   1.345 +   * (The parameters "start" and "length" are not applied to the other text "text".)
   1.346 +   * @param start the offset at which the compare operation begins
   1.347 +   * @param length the number of characters of text to compare.
   1.348 +   * @param text the other text to be compared against this string.
   1.349 +   * @return The result of bitwise character comparison: 0 if this
   1.350 +   * contains the same characters as <code>text</code>, -1 if the characters in
   1.351 +   * this are bitwise less than the characters in <code>text</code>, +1 if the
   1.352 +   * characters in this are bitwise greater than the characters
   1.353 +   * in <code>text</code>.
   1.354 +   * @stable ICU 2.0
   1.355 +   */
   1.356 +  inline int8_t compare(int32_t start,
   1.357 +         int32_t length,
   1.358 +         const UnicodeString& text) const;
   1.359 +
   1.360 +  /**
   1.361 +   * Compare the characters bitwise in the range
   1.362 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   1.363 +   * in <TT>srcText</TT> in the range
   1.364 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.365 +   * @param start the offset at which the compare operation begins
   1.366 +   * @param length the number of characters in this to compare.
   1.367 +   * @param srcText the text to be compared
   1.368 +   * @param srcStart the offset into <TT>srcText</TT> to start comparison
   1.369 +   * @param srcLength the number of characters in <TT>src</TT> to compare
   1.370 +   * @return The result of bitwise character comparison: 0 if this
   1.371 +   * contains the same characters as <code>srcText</code>, -1 if the characters in
   1.372 +   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   1.373 +   * characters in this are bitwise greater than the characters
   1.374 +   * in <code>srcText</code>.
   1.375 +   * @stable ICU 2.0
   1.376 +   */
   1.377 +   inline int8_t compare(int32_t start,
   1.378 +         int32_t length,
   1.379 +         const UnicodeString& srcText,
   1.380 +         int32_t srcStart,
   1.381 +         int32_t srcLength) const;
   1.382 +
   1.383 +  /**
   1.384 +   * Compare the characters bitwise in this UnicodeString with the first
   1.385 +   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
   1.386 +   * @param srcChars The characters to compare to this UnicodeString.
   1.387 +   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   1.388 +   * @return The result of bitwise character comparison: 0 if this
   1.389 +   * contains the same characters as <code>srcChars</code>, -1 if the characters in
   1.390 +   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   1.391 +   * characters in this are bitwise greater than the characters
   1.392 +   * in <code>srcChars</code>.
   1.393 +   * @stable ICU 2.0
   1.394 +   */
   1.395 +  inline int8_t compare(const UChar *srcChars,
   1.396 +         int32_t srcLength) const;
   1.397 +
   1.398 +  /**
   1.399 +   * Compare the characters bitwise in the range
   1.400 +   * [<TT>start</TT>, <TT>start + length</TT>) with the first
   1.401 +   * <TT>length</TT> characters in <TT>srcChars</TT>
   1.402 +   * @param start the offset at which the compare operation begins
   1.403 +   * @param length the number of characters to compare.
   1.404 +   * @param srcChars the characters to be compared
   1.405 +   * @return The result of bitwise character comparison: 0 if this
   1.406 +   * contains the same characters as <code>srcChars</code>, -1 if the characters in
   1.407 +   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   1.408 +   * characters in this are bitwise greater than the characters
   1.409 +   * in <code>srcChars</code>.
   1.410 +   * @stable ICU 2.0
   1.411 +   */
   1.412 +  inline int8_t compare(int32_t start,
   1.413 +         int32_t length,
   1.414 +         const UChar *srcChars) const;
   1.415 +
   1.416 +  /**
   1.417 +   * Compare the characters bitwise in the range
   1.418 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   1.419 +   * in <TT>srcChars</TT> in the range
   1.420 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.421 +   * @param start the offset at which the compare operation begins
   1.422 +   * @param length the number of characters in this to compare
   1.423 +   * @param srcChars the characters to be compared
   1.424 +   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
   1.425 +   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   1.426 +   * @return The result of bitwise character comparison: 0 if this
   1.427 +   * contains the same characters as <code>srcChars</code>, -1 if the characters in
   1.428 +   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   1.429 +   * characters in this are bitwise greater than the characters
   1.430 +   * in <code>srcChars</code>.
   1.431 +   * @stable ICU 2.0
   1.432 +   */
   1.433 +  inline int8_t compare(int32_t start,
   1.434 +         int32_t length,
   1.435 +         const UChar *srcChars,
   1.436 +         int32_t srcStart,
   1.437 +         int32_t srcLength) const;
   1.438 +
   1.439 +  /**
   1.440 +   * Compare the characters bitwise in the range
   1.441 +   * [<TT>start</TT>, <TT>limit</TT>) with the characters
   1.442 +   * in <TT>srcText</TT> in the range
   1.443 +   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
   1.444 +   * @param start the offset at which the compare operation begins
   1.445 +   * @param limit the offset immediately following the compare operation
   1.446 +   * @param srcText the text to be compared
   1.447 +   * @param srcStart the offset into <TT>srcText</TT> to start comparison
   1.448 +   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
   1.449 +   * @return The result of bitwise character comparison: 0 if this
   1.450 +   * contains the same characters as <code>srcText</code>, -1 if the characters in
   1.451 +   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   1.452 +   * characters in this are bitwise greater than the characters
   1.453 +   * in <code>srcText</code>.
   1.454 +   * @stable ICU 2.0
   1.455 +   */
   1.456 +  inline int8_t compareBetween(int32_t start,
   1.457 +            int32_t limit,
   1.458 +            const UnicodeString& srcText,
   1.459 +            int32_t srcStart,
   1.460 +            int32_t srcLimit) const;
   1.461 +
   1.462 +  /**
   1.463 +   * Compare two Unicode strings in code point order.
   1.464 +   * The result may be different from the results of compare(), operator<, etc.
   1.465 +   * if supplementary characters are present:
   1.466 +   *
   1.467 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.468 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.469 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.470 +   * This function compares Unicode strings in code point order.
   1.471 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.472 +   *
   1.473 +   * @param text Another string to compare this one to.
   1.474 +   * @return a negative/zero/positive integer corresponding to whether
   1.475 +   * this string is less than/equal to/greater than the second one
   1.476 +   * in code point order
   1.477 +   * @stable ICU 2.0
   1.478 +   */
   1.479 +  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
   1.480 +
   1.481 +  /**
   1.482 +   * Compare two Unicode strings in code point order.
   1.483 +   * The result may be different from the results of compare(), operator<, etc.
   1.484 +   * if supplementary characters are present:
   1.485 +   *
   1.486 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.487 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.488 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.489 +   * This function compares Unicode strings in code point order.
   1.490 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.491 +   *
   1.492 +   * @param start The start offset in this string at which the compare operation begins.
   1.493 +   * @param length The number of code units from this string to compare.
   1.494 +   * @param srcText Another string to compare this one to.
   1.495 +   * @return a negative/zero/positive integer corresponding to whether
   1.496 +   * this string is less than/equal to/greater than the second one
   1.497 +   * in code point order
   1.498 +   * @stable ICU 2.0
   1.499 +   */
   1.500 +  inline int8_t compareCodePointOrder(int32_t start,
   1.501 +                                      int32_t length,
   1.502 +                                      const UnicodeString& srcText) const;
   1.503 +
   1.504 +  /**
   1.505 +   * Compare two Unicode strings in code point order.
   1.506 +   * The result may be different from the results of compare(), operator<, etc.
   1.507 +   * if supplementary characters are present:
   1.508 +   *
   1.509 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.510 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.511 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.512 +   * This function compares Unicode strings in code point order.
   1.513 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.514 +   *
   1.515 +   * @param start The start offset in this string at which the compare operation begins.
   1.516 +   * @param length The number of code units from this string to compare.
   1.517 +   * @param srcText Another string to compare this one to.
   1.518 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.519 +   * @param srcLength The number of code units from that string to compare.
   1.520 +   * @return a negative/zero/positive integer corresponding to whether
   1.521 +   * this string is less than/equal to/greater than the second one
   1.522 +   * in code point order
   1.523 +   * @stable ICU 2.0
   1.524 +   */
   1.525 +   inline int8_t compareCodePointOrder(int32_t start,
   1.526 +                                       int32_t length,
   1.527 +                                       const UnicodeString& srcText,
   1.528 +                                       int32_t srcStart,
   1.529 +                                       int32_t srcLength) const;
   1.530 +
   1.531 +  /**
   1.532 +   * Compare two Unicode strings in code point order.
   1.533 +   * The result may be different from the results of compare(), operator<, etc.
   1.534 +   * if supplementary characters are present:
   1.535 +   *
   1.536 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.537 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.538 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.539 +   * This function compares Unicode strings in code point order.
   1.540 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.541 +   *
   1.542 +   * @param srcChars A pointer to another string to compare this one to.
   1.543 +   * @param srcLength The number of code units from that string to compare.
   1.544 +   * @return a negative/zero/positive integer corresponding to whether
   1.545 +   * this string is less than/equal to/greater than the second one
   1.546 +   * in code point order
   1.547 +   * @stable ICU 2.0
   1.548 +   */
   1.549 +  inline int8_t compareCodePointOrder(const UChar *srcChars,
   1.550 +                                      int32_t srcLength) const;
   1.551 +
   1.552 +  /**
   1.553 +   * Compare two Unicode strings in code point order.
   1.554 +   * The result may be different from the results of compare(), operator<, etc.
   1.555 +   * if supplementary characters are present:
   1.556 +   *
   1.557 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.558 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.559 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.560 +   * This function compares Unicode strings in code point order.
   1.561 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.562 +   *
   1.563 +   * @param start The start offset in this string at which the compare operation begins.
   1.564 +   * @param length The number of code units from this string to compare.
   1.565 +   * @param srcChars A pointer to another string to compare this one to.
   1.566 +   * @return a negative/zero/positive integer corresponding to whether
   1.567 +   * this string is less than/equal to/greater than the second one
   1.568 +   * in code point order
   1.569 +   * @stable ICU 2.0
   1.570 +   */
   1.571 +  inline int8_t compareCodePointOrder(int32_t start,
   1.572 +                                      int32_t length,
   1.573 +                                      const UChar *srcChars) const;
   1.574 +
   1.575 +  /**
   1.576 +   * Compare two Unicode strings in code point order.
   1.577 +   * The result may be different from the results of compare(), operator<, etc.
   1.578 +   * if supplementary characters are present:
   1.579 +   *
   1.580 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.581 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.582 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.583 +   * This function compares Unicode strings in code point order.
   1.584 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.585 +   *
   1.586 +   * @param start The start offset in this string at which the compare operation begins.
   1.587 +   * @param length The number of code units from this string to compare.
   1.588 +   * @param srcChars A pointer to another string to compare this one to.
   1.589 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.590 +   * @param srcLength The number of code units from that string to compare.
   1.591 +   * @return a negative/zero/positive integer corresponding to whether
   1.592 +   * this string is less than/equal to/greater than the second one
   1.593 +   * in code point order
   1.594 +   * @stable ICU 2.0
   1.595 +   */
   1.596 +  inline int8_t compareCodePointOrder(int32_t start,
   1.597 +                                      int32_t length,
   1.598 +                                      const UChar *srcChars,
   1.599 +                                      int32_t srcStart,
   1.600 +                                      int32_t srcLength) const;
   1.601 +
   1.602 +  /**
   1.603 +   * Compare two Unicode strings in code point order.
   1.604 +   * The result may be different from the results of compare(), operator<, etc.
   1.605 +   * if supplementary characters are present:
   1.606 +   *
   1.607 +   * In UTF-16, supplementary characters (with code points U+10000 and above) are
   1.608 +   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   1.609 +   * which means that they compare as less than some other BMP characters like U+feff.
   1.610 +   * This function compares Unicode strings in code point order.
   1.611 +   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   1.612 +   *
   1.613 +   * @param start The start offset in this string at which the compare operation begins.
   1.614 +   * @param limit The offset after the last code unit from this string to compare.
   1.615 +   * @param srcText Another string to compare this one to.
   1.616 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.617 +   * @param srcLimit The offset after the last code unit from that string to compare.
   1.618 +   * @return a negative/zero/positive integer corresponding to whether
   1.619 +   * this string is less than/equal to/greater than the second one
   1.620 +   * in code point order
   1.621 +   * @stable ICU 2.0
   1.622 +   */
   1.623 +  inline int8_t compareCodePointOrderBetween(int32_t start,
   1.624 +                                             int32_t limit,
   1.625 +                                             const UnicodeString& srcText,
   1.626 +                                             int32_t srcStart,
   1.627 +                                             int32_t srcLimit) const;
   1.628 +
   1.629 +  /**
   1.630 +   * Compare two strings case-insensitively using full case folding.
   1.631 +   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
   1.632 +   *
   1.633 +   * @param text Another string to compare this one to.
   1.634 +   * @param options A bit set of options:
   1.635 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.636 +   *     Comparison in code unit order with default case folding.
   1.637 +   *
   1.638 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.639 +   *     Set to choose code point order instead of code unit order
   1.640 +   *     (see u_strCompare for details).
   1.641 +   *
   1.642 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.643 +   *
   1.644 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.645 +   * @stable ICU 2.0
   1.646 +   */
   1.647 +  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
   1.648 +
   1.649 +  /**
   1.650 +   * Compare two strings case-insensitively using full case folding.
   1.651 +   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   1.652 +   *
   1.653 +   * @param start The start offset in this string at which the compare operation begins.
   1.654 +   * @param length The number of code units from this string to compare.
   1.655 +   * @param srcText Another string to compare this one to.
   1.656 +   * @param options A bit set of options:
   1.657 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.658 +   *     Comparison in code unit order with default case folding.
   1.659 +   *
   1.660 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.661 +   *     Set to choose code point order instead of code unit order
   1.662 +   *     (see u_strCompare for details).
   1.663 +   *
   1.664 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.665 +   *
   1.666 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.667 +   * @stable ICU 2.0
   1.668 +   */
   1.669 +  inline int8_t caseCompare(int32_t start,
   1.670 +         int32_t length,
   1.671 +         const UnicodeString& srcText,
   1.672 +         uint32_t options) const;
   1.673 +
   1.674 +  /**
   1.675 +   * Compare two strings case-insensitively using full case folding.
   1.676 +   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   1.677 +   *
   1.678 +   * @param start The start offset in this string at which the compare operation begins.
   1.679 +   * @param length The number of code units from this string to compare.
   1.680 +   * @param srcText Another string to compare this one to.
   1.681 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.682 +   * @param srcLength The number of code units from that string to compare.
   1.683 +   * @param options A bit set of options:
   1.684 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.685 +   *     Comparison in code unit order with default case folding.
   1.686 +   *
   1.687 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.688 +   *     Set to choose code point order instead of code unit order
   1.689 +   *     (see u_strCompare for details).
   1.690 +   *
   1.691 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.692 +   *
   1.693 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.694 +   * @stable ICU 2.0
   1.695 +   */
   1.696 +  inline int8_t caseCompare(int32_t start,
   1.697 +         int32_t length,
   1.698 +         const UnicodeString& srcText,
   1.699 +         int32_t srcStart,
   1.700 +         int32_t srcLength,
   1.701 +         uint32_t options) const;
   1.702 +
   1.703 +  /**
   1.704 +   * Compare two strings case-insensitively using full case folding.
   1.705 +   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   1.706 +   *
   1.707 +   * @param srcChars A pointer to another string to compare this one to.
   1.708 +   * @param srcLength The number of code units from that string to compare.
   1.709 +   * @param options A bit set of options:
   1.710 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.711 +   *     Comparison in code unit order with default case folding.
   1.712 +   *
   1.713 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.714 +   *     Set to choose code point order instead of code unit order
   1.715 +   *     (see u_strCompare for details).
   1.716 +   *
   1.717 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.718 +   *
   1.719 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.720 +   * @stable ICU 2.0
   1.721 +   */
   1.722 +  inline int8_t caseCompare(const UChar *srcChars,
   1.723 +         int32_t srcLength,
   1.724 +         uint32_t options) const;
   1.725 +
   1.726 +  /**
   1.727 +   * Compare two strings case-insensitively using full case folding.
   1.728 +   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   1.729 +   *
   1.730 +   * @param start The start offset in this string at which the compare operation begins.
   1.731 +   * @param length The number of code units from this string to compare.
   1.732 +   * @param srcChars A pointer to another string to compare this one to.
   1.733 +   * @param options A bit set of options:
   1.734 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.735 +   *     Comparison in code unit order with default case folding.
   1.736 +   *
   1.737 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.738 +   *     Set to choose code point order instead of code unit order
   1.739 +   *     (see u_strCompare for details).
   1.740 +   *
   1.741 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.742 +   *
   1.743 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.744 +   * @stable ICU 2.0
   1.745 +   */
   1.746 +  inline int8_t caseCompare(int32_t start,
   1.747 +         int32_t length,
   1.748 +         const UChar *srcChars,
   1.749 +         uint32_t options) const;
   1.750 +
   1.751 +  /**
   1.752 +   * Compare two strings case-insensitively using full case folding.
   1.753 +   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   1.754 +   *
   1.755 +   * @param start The start offset in this string at which the compare operation begins.
   1.756 +   * @param length The number of code units from this string to compare.
   1.757 +   * @param srcChars A pointer to another string to compare this one to.
   1.758 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.759 +   * @param srcLength The number of code units from that string to compare.
   1.760 +   * @param options A bit set of options:
   1.761 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.762 +   *     Comparison in code unit order with default case folding.
   1.763 +   *
   1.764 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.765 +   *     Set to choose code point order instead of code unit order
   1.766 +   *     (see u_strCompare for details).
   1.767 +   *
   1.768 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.769 +   *
   1.770 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.771 +   * @stable ICU 2.0
   1.772 +   */
   1.773 +  inline int8_t caseCompare(int32_t start,
   1.774 +         int32_t length,
   1.775 +         const UChar *srcChars,
   1.776 +         int32_t srcStart,
   1.777 +         int32_t srcLength,
   1.778 +         uint32_t options) const;
   1.779 +
   1.780 +  /**
   1.781 +   * Compare two strings case-insensitively using full case folding.
   1.782 +   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
   1.783 +   *
   1.784 +   * @param start The start offset in this string at which the compare operation begins.
   1.785 +   * @param limit The offset after the last code unit from this string to compare.
   1.786 +   * @param srcText Another string to compare this one to.
   1.787 +   * @param srcStart The start offset in that string at which the compare operation begins.
   1.788 +   * @param srcLimit The offset after the last code unit from that string to compare.
   1.789 +   * @param options A bit set of options:
   1.790 +   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   1.791 +   *     Comparison in code unit order with default case folding.
   1.792 +   *
   1.793 +   *   - U_COMPARE_CODE_POINT_ORDER
   1.794 +   *     Set to choose code point order instead of code unit order
   1.795 +   *     (see u_strCompare for details).
   1.796 +   *
   1.797 +   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.798 +   *
   1.799 +   * @return A negative, zero, or positive integer indicating the comparison result.
   1.800 +   * @stable ICU 2.0
   1.801 +   */
   1.802 +  inline int8_t caseCompareBetween(int32_t start,
   1.803 +            int32_t limit,
   1.804 +            const UnicodeString& srcText,
   1.805 +            int32_t srcStart,
   1.806 +            int32_t srcLimit,
   1.807 +            uint32_t options) const;
   1.808 +
   1.809 +  /**
   1.810 +   * Determine if this starts with the characters in <TT>text</TT>
   1.811 +   * @param text The text to match.
   1.812 +   * @return TRUE if this starts with the characters in <TT>text</TT>,
   1.813 +   * FALSE otherwise
   1.814 +   * @stable ICU 2.0
   1.815 +   */
   1.816 +  inline UBool startsWith(const UnicodeString& text) const;
   1.817 +
   1.818 +  /**
   1.819 +   * Determine if this starts with the characters in <TT>srcText</TT>
   1.820 +   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.821 +   * @param srcText The text to match.
   1.822 +   * @param srcStart the offset into <TT>srcText</TT> to start matching
   1.823 +   * @param srcLength the number of characters in <TT>srcText</TT> to match
   1.824 +   * @return TRUE if this starts with the characters in <TT>text</TT>,
   1.825 +   * FALSE otherwise
   1.826 +   * @stable ICU 2.0
   1.827 +   */
   1.828 +  inline UBool startsWith(const UnicodeString& srcText,
   1.829 +            int32_t srcStart,
   1.830 +            int32_t srcLength) const;
   1.831 +
   1.832 +  /**
   1.833 +   * Determine if this starts with the characters in <TT>srcChars</TT>
   1.834 +   * @param srcChars The characters to match.
   1.835 +   * @param srcLength the number of characters in <TT>srcChars</TT>
   1.836 +   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
   1.837 +   * FALSE otherwise
   1.838 +   * @stable ICU 2.0
   1.839 +   */
   1.840 +  inline UBool startsWith(const UChar *srcChars,
   1.841 +            int32_t srcLength) const;
   1.842 +
   1.843 +  /**
   1.844 +   * Determine if this ends with the characters in <TT>srcChars</TT>
   1.845 +   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.846 +   * @param srcChars The characters to match.
   1.847 +   * @param srcStart the offset into <TT>srcText</TT> to start matching
   1.848 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1.849 +   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
   1.850 +   * @stable ICU 2.0
   1.851 +   */
   1.852 +  inline UBool startsWith(const UChar *srcChars,
   1.853 +            int32_t srcStart,
   1.854 +            int32_t srcLength) const;
   1.855 +
   1.856 +  /**
   1.857 +   * Determine if this ends with the characters in <TT>text</TT>
   1.858 +   * @param text The text to match.
   1.859 +   * @return TRUE if this ends with the characters in <TT>text</TT>,
   1.860 +   * FALSE otherwise
   1.861 +   * @stable ICU 2.0
   1.862 +   */
   1.863 +  inline UBool endsWith(const UnicodeString& text) const;
   1.864 +
   1.865 +  /**
   1.866 +   * Determine if this ends with the characters in <TT>srcText</TT>
   1.867 +   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.868 +   * @param srcText The text to match.
   1.869 +   * @param srcStart the offset into <TT>srcText</TT> to start matching
   1.870 +   * @param srcLength the number of characters in <TT>srcText</TT> to match
   1.871 +   * @return TRUE if this ends with the characters in <TT>text</TT>,
   1.872 +   * FALSE otherwise
   1.873 +   * @stable ICU 2.0
   1.874 +   */
   1.875 +  inline UBool endsWith(const UnicodeString& srcText,
   1.876 +          int32_t srcStart,
   1.877 +          int32_t srcLength) const;
   1.878 +
   1.879 +  /**
   1.880 +   * Determine if this ends with the characters in <TT>srcChars</TT>
   1.881 +   * @param srcChars The characters to match.
   1.882 +   * @param srcLength the number of characters in <TT>srcChars</TT>
   1.883 +   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   1.884 +   * FALSE otherwise
   1.885 +   * @stable ICU 2.0
   1.886 +   */
   1.887 +  inline UBool endsWith(const UChar *srcChars,
   1.888 +          int32_t srcLength) const;
   1.889 +
   1.890 +  /**
   1.891 +   * Determine if this ends with the characters in <TT>srcChars</TT>
   1.892 +   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1.893 +   * @param srcChars The characters to match.
   1.894 +   * @param srcStart the offset into <TT>srcText</TT> to start matching
   1.895 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1.896 +   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   1.897 +   * FALSE otherwise
   1.898 +   * @stable ICU 2.0
   1.899 +   */
   1.900 +  inline UBool endsWith(const UChar *srcChars,
   1.901 +          int32_t srcStart,
   1.902 +          int32_t srcLength) const;
   1.903 +
   1.904 +
   1.905 +  /* Searching - bitwise only */
   1.906 +
   1.907 +  /**
   1.908 +   * Locate in this the first occurrence of the characters in <TT>text</TT>,
   1.909 +   * using bitwise comparison.
   1.910 +   * @param text The text to search for.
   1.911 +   * @return The offset into this of the start of <TT>text</TT>,
   1.912 +   * or -1 if not found.
   1.913 +   * @stable ICU 2.0
   1.914 +   */
   1.915 +  inline int32_t indexOf(const UnicodeString& text) const;
   1.916 +
   1.917 +  /**
   1.918 +   * Locate in this the first occurrence of the characters in <TT>text</TT>
   1.919 +   * starting at offset <TT>start</TT>, using bitwise comparison.
   1.920 +   * @param text The text to search for.
   1.921 +   * @param start The offset at which searching will start.
   1.922 +   * @return The offset into this of the start of <TT>text</TT>,
   1.923 +   * or -1 if not found.
   1.924 +   * @stable ICU 2.0
   1.925 +   */
   1.926 +  inline int32_t indexOf(const UnicodeString& text,
   1.927 +              int32_t start) const;
   1.928 +
   1.929 +  /**
   1.930 +   * Locate in this the first occurrence in the range
   1.931 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1.932 +   * in <TT>text</TT>, using bitwise comparison.
   1.933 +   * @param text The text to search for.
   1.934 +   * @param start The offset at which searching will start.
   1.935 +   * @param length The number of characters to search
   1.936 +   * @return The offset into this of the start of <TT>text</TT>,
   1.937 +   * or -1 if not found.
   1.938 +   * @stable ICU 2.0
   1.939 +   */
   1.940 +  inline int32_t indexOf(const UnicodeString& text,
   1.941 +              int32_t start,
   1.942 +              int32_t length) const;
   1.943 +
   1.944 +  /**
   1.945 +   * Locate in this the first occurrence in the range
   1.946 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1.947 +   *  in <TT>srcText</TT> in the range
   1.948 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1.949 +   * using bitwise comparison.
   1.950 +   * @param srcText The text to search for.
   1.951 +   * @param srcStart the offset into <TT>srcText</TT> at which
   1.952 +   * to start matching
   1.953 +   * @param srcLength the number of characters in <TT>srcText</TT> to match
   1.954 +   * @param start the offset into this at which to start matching
   1.955 +   * @param length the number of characters in this to search
   1.956 +   * @return The offset into this of the start of <TT>text</TT>,
   1.957 +   * or -1 if not found.
   1.958 +   * @stable ICU 2.0
   1.959 +   */
   1.960 +  inline int32_t indexOf(const UnicodeString& srcText,
   1.961 +              int32_t srcStart,
   1.962 +              int32_t srcLength,
   1.963 +              int32_t start,
   1.964 +              int32_t length) const;
   1.965 +
   1.966 +  /**
   1.967 +   * Locate in this the first occurrence of the characters in
   1.968 +   * <TT>srcChars</TT>
   1.969 +   * starting at offset <TT>start</TT>, using bitwise comparison.
   1.970 +   * @param srcChars The text to search for.
   1.971 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1.972 +   * @param start the offset into this at which to start matching
   1.973 +   * @return The offset into this of the start of <TT>text</TT>,
   1.974 +   * or -1 if not found.
   1.975 +   * @stable ICU 2.0
   1.976 +   */
   1.977 +  inline int32_t indexOf(const UChar *srcChars,
   1.978 +              int32_t srcLength,
   1.979 +              int32_t start) const;
   1.980 +
   1.981 +  /**
   1.982 +   * Locate in this the first occurrence in the range
   1.983 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1.984 +   * in <TT>srcChars</TT>, using bitwise comparison.
   1.985 +   * @param srcChars The text to search for.
   1.986 +   * @param srcLength the number of characters in <TT>srcChars</TT>
   1.987 +   * @param start The offset at which searching will start.
   1.988 +   * @param length The number of characters to search
   1.989 +   * @return The offset into this of the start of <TT>srcChars</TT>,
   1.990 +   * or -1 if not found.
   1.991 +   * @stable ICU 2.0
   1.992 +   */
   1.993 +  inline int32_t indexOf(const UChar *srcChars,
   1.994 +              int32_t srcLength,
   1.995 +              int32_t start,
   1.996 +              int32_t length) const;
   1.997 +
   1.998 +  /**
   1.999 +   * Locate in this the first occurrence in the range
  1.1000 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1.1001 +   * in <TT>srcChars</TT> in the range
  1.1002 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1.1003 +   * using bitwise comparison.
  1.1004 +   * @param srcChars The text to search for.
  1.1005 +   * @param srcStart the offset into <TT>srcChars</TT> at which
  1.1006 +   * to start matching
  1.1007 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1.1008 +   * @param start the offset into this at which to start matching
  1.1009 +   * @param length the number of characters in this to search
  1.1010 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1011 +   * or -1 if not found.
  1.1012 +   * @stable ICU 2.0
  1.1013 +   */
  1.1014 +  int32_t indexOf(const UChar *srcChars,
  1.1015 +              int32_t srcStart,
  1.1016 +              int32_t srcLength,
  1.1017 +              int32_t start,
  1.1018 +              int32_t length) const;
  1.1019 +
  1.1020 +  /**
  1.1021 +   * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1.1022 +   * using bitwise comparison.
  1.1023 +   * @param c The code unit to search for.
  1.1024 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1025 +   * @stable ICU 2.0
  1.1026 +   */
  1.1027 +  inline int32_t indexOf(UChar c) const;
  1.1028 +
  1.1029 +  /**
  1.1030 +   * Locate in this the first occurrence of the code point <TT>c</TT>,
  1.1031 +   * using bitwise comparison.
  1.1032 +   *
  1.1033 +   * @param c The code point to search for.
  1.1034 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1035 +   * @stable ICU 2.0
  1.1036 +   */
  1.1037 +  inline int32_t indexOf(UChar32 c) const;
  1.1038 +
  1.1039 +  /**
  1.1040 +   * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1.1041 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1042 +   * @param c The code unit to search for.
  1.1043 +   * @param start The offset at which searching will start.
  1.1044 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1045 +   * @stable ICU 2.0
  1.1046 +   */
  1.1047 +  inline int32_t indexOf(UChar c,
  1.1048 +              int32_t start) const;
  1.1049 +
  1.1050 +  /**
  1.1051 +   * Locate in this the first occurrence of the code point <TT>c</TT>
  1.1052 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1053 +   *
  1.1054 +   * @param c The code point to search for.
  1.1055 +   * @param start The offset at which searching will start.
  1.1056 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1057 +   * @stable ICU 2.0
  1.1058 +   */
  1.1059 +  inline int32_t indexOf(UChar32 c,
  1.1060 +              int32_t start) const;
  1.1061 +
  1.1062 +  /**
  1.1063 +   * Locate in this the first occurrence of the BMP code point <code>c</code>
  1.1064 +   * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1.1065 +   * using bitwise comparison.
  1.1066 +   * @param c The code unit to search for.
  1.1067 +   * @param start the offset into this at which to start matching
  1.1068 +   * @param length the number of characters in this to search
  1.1069 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1070 +   * @stable ICU 2.0
  1.1071 +   */
  1.1072 +  inline int32_t indexOf(UChar c,
  1.1073 +              int32_t start,
  1.1074 +              int32_t length) const;
  1.1075 +
  1.1076 +  /**
  1.1077 +   * Locate in this the first occurrence of the code point <TT>c</TT>
  1.1078 +   * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1.1079 +   * using bitwise comparison.
  1.1080 +   *
  1.1081 +   * @param c The code point to search for.
  1.1082 +   * @param start the offset into this at which to start matching
  1.1083 +   * @param length the number of characters in this to search
  1.1084 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1085 +   * @stable ICU 2.0
  1.1086 +   */
  1.1087 +  inline int32_t indexOf(UChar32 c,
  1.1088 +              int32_t start,
  1.1089 +              int32_t length) const;
  1.1090 +
  1.1091 +  /**
  1.1092 +   * Locate in this the last occurrence of the characters in <TT>text</TT>,
  1.1093 +   * using bitwise comparison.
  1.1094 +   * @param text The text to search for.
  1.1095 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1096 +   * or -1 if not found.
  1.1097 +   * @stable ICU 2.0
  1.1098 +   */
  1.1099 +  inline int32_t lastIndexOf(const UnicodeString& text) const;
  1.1100 +
  1.1101 +  /**
  1.1102 +   * Locate in this the last occurrence of the characters in <TT>text</TT>
  1.1103 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1104 +   * @param text The text to search for.
  1.1105 +   * @param start The offset at which searching will start.
  1.1106 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1107 +   * or -1 if not found.
  1.1108 +   * @stable ICU 2.0
  1.1109 +   */
  1.1110 +  inline int32_t lastIndexOf(const UnicodeString& text,
  1.1111 +              int32_t start) const;
  1.1112 +
  1.1113 +  /**
  1.1114 +   * Locate in this the last occurrence in the range
  1.1115 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1.1116 +   * in <TT>text</TT>, using bitwise comparison.
  1.1117 +   * @param text The text to search for.
  1.1118 +   * @param start The offset at which searching will start.
  1.1119 +   * @param length The number of characters to search
  1.1120 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1121 +   * or -1 if not found.
  1.1122 +   * @stable ICU 2.0
  1.1123 +   */
  1.1124 +  inline int32_t lastIndexOf(const UnicodeString& text,
  1.1125 +              int32_t start,
  1.1126 +              int32_t length) const;
  1.1127 +
  1.1128 +  /**
  1.1129 +   * Locate in this the last occurrence in the range
  1.1130 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1.1131 +   * in <TT>srcText</TT> in the range
  1.1132 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1.1133 +   * using bitwise comparison.
  1.1134 +   * @param srcText The text to search for.
  1.1135 +   * @param srcStart the offset into <TT>srcText</TT> at which
  1.1136 +   * to start matching
  1.1137 +   * @param srcLength the number of characters in <TT>srcText</TT> to match
  1.1138 +   * @param start the offset into this at which to start matching
  1.1139 +   * @param length the number of characters in this to search
  1.1140 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1141 +   * or -1 if not found.
  1.1142 +   * @stable ICU 2.0
  1.1143 +   */
  1.1144 +  inline int32_t lastIndexOf(const UnicodeString& srcText,
  1.1145 +              int32_t srcStart,
  1.1146 +              int32_t srcLength,
  1.1147 +              int32_t start,
  1.1148 +              int32_t length) const;
  1.1149 +
  1.1150 +  /**
  1.1151 +   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
  1.1152 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1153 +   * @param srcChars The text to search for.
  1.1154 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1.1155 +   * @param start the offset into this at which to start matching
  1.1156 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1157 +   * or -1 if not found.
  1.1158 +   * @stable ICU 2.0
  1.1159 +   */
  1.1160 +  inline int32_t lastIndexOf(const UChar *srcChars,
  1.1161 +              int32_t srcLength,
  1.1162 +              int32_t start) const;
  1.1163 +
  1.1164 +  /**
  1.1165 +   * Locate in this the last occurrence in the range
  1.1166 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1.1167 +   * in <TT>srcChars</TT>, using bitwise comparison.
  1.1168 +   * @param srcChars The text to search for.
  1.1169 +   * @param srcLength the number of characters in <TT>srcChars</TT>
  1.1170 +   * @param start The offset at which searching will start.
  1.1171 +   * @param length The number of characters to search
  1.1172 +   * @return The offset into this of the start of <TT>srcChars</TT>,
  1.1173 +   * or -1 if not found.
  1.1174 +   * @stable ICU 2.0
  1.1175 +   */
  1.1176 +  inline int32_t lastIndexOf(const UChar *srcChars,
  1.1177 +              int32_t srcLength,
  1.1178 +              int32_t start,
  1.1179 +              int32_t length) const;
  1.1180 +
  1.1181 +  /**
  1.1182 +   * Locate in this the last occurrence in the range
  1.1183 +   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1.1184 +   * in <TT>srcChars</TT> in the range
  1.1185 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1.1186 +   * using bitwise comparison.
  1.1187 +   * @param srcChars The text to search for.
  1.1188 +   * @param srcStart the offset into <TT>srcChars</TT> at which
  1.1189 +   * to start matching
  1.1190 +   * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1.1191 +   * @param start the offset into this at which to start matching
  1.1192 +   * @param length the number of characters in this to search
  1.1193 +   * @return The offset into this of the start of <TT>text</TT>,
  1.1194 +   * or -1 if not found.
  1.1195 +   * @stable ICU 2.0
  1.1196 +   */
  1.1197 +  int32_t lastIndexOf(const UChar *srcChars,
  1.1198 +              int32_t srcStart,
  1.1199 +              int32_t srcLength,
  1.1200 +              int32_t start,
  1.1201 +              int32_t length) const;
  1.1202 +
  1.1203 +  /**
  1.1204 +   * Locate in this the last occurrence of the BMP code point <code>c</code>,
  1.1205 +   * using bitwise comparison.
  1.1206 +   * @param c The code unit to search for.
  1.1207 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1208 +   * @stable ICU 2.0
  1.1209 +   */
  1.1210 +  inline int32_t lastIndexOf(UChar c) const;
  1.1211 +
  1.1212 +  /**
  1.1213 +   * Locate in this the last occurrence of the code point <TT>c</TT>,
  1.1214 +   * using bitwise comparison.
  1.1215 +   *
  1.1216 +   * @param c The code point to search for.
  1.1217 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1218 +   * @stable ICU 2.0
  1.1219 +   */
  1.1220 +  inline int32_t lastIndexOf(UChar32 c) const;
  1.1221 +
  1.1222 +  /**
  1.1223 +   * Locate in this the last occurrence of the BMP code point <code>c</code>
  1.1224 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1225 +   * @param c The code unit to search for.
  1.1226 +   * @param start The offset at which searching will start.
  1.1227 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1228 +   * @stable ICU 2.0
  1.1229 +   */
  1.1230 +  inline int32_t lastIndexOf(UChar c,
  1.1231 +              int32_t start) const;
  1.1232 +
  1.1233 +  /**
  1.1234 +   * Locate in this the last occurrence of the code point <TT>c</TT>
  1.1235 +   * starting at offset <TT>start</TT>, using bitwise comparison.
  1.1236 +   *
  1.1237 +   * @param c The code point to search for.
  1.1238 +   * @param start The offset at which searching will start.
  1.1239 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1240 +   * @stable ICU 2.0
  1.1241 +   */
  1.1242 +  inline int32_t lastIndexOf(UChar32 c,
  1.1243 +              int32_t start) const;
  1.1244 +
  1.1245 +  /**
  1.1246 +   * Locate in this the last occurrence of the BMP code point <code>c</code>
  1.1247 +   * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1.1248 +   * using bitwise comparison.
  1.1249 +   * @param c The code unit to search for.
  1.1250 +   * @param start the offset into this at which to start matching
  1.1251 +   * @param length the number of characters in this to search
  1.1252 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1253 +   * @stable ICU 2.0
  1.1254 +   */
  1.1255 +  inline int32_t lastIndexOf(UChar c,
  1.1256 +              int32_t start,
  1.1257 +              int32_t length) const;
  1.1258 +
  1.1259 +  /**
  1.1260 +   * Locate in this the last occurrence of the code point <TT>c</TT>
  1.1261 +   * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1.1262 +   * using bitwise comparison.
  1.1263 +   *
  1.1264 +   * @param c The code point to search for.
  1.1265 +   * @param start the offset into this at which to start matching
  1.1266 +   * @param length the number of characters in this to search
  1.1267 +   * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1.1268 +   * @stable ICU 2.0
  1.1269 +   */
  1.1270 +  inline int32_t lastIndexOf(UChar32 c,
  1.1271 +              int32_t start,
  1.1272 +              int32_t length) const;
  1.1273 +
  1.1274 +
  1.1275 +  /* Character access */
  1.1276 +
  1.1277 +  /**
  1.1278 +   * Return the code unit at offset <tt>offset</tt>.
  1.1279 +   * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1.1280 +   * @param offset a valid offset into the text
  1.1281 +   * @return the code unit at offset <tt>offset</tt>
  1.1282 +   *         or 0xffff if the offset is not valid for this string
  1.1283 +   * @stable ICU 2.0
  1.1284 +   */
  1.1285 +  inline UChar charAt(int32_t offset) const;
  1.1286 +
  1.1287 +  /**
  1.1288 +   * Return the code unit at offset <tt>offset</tt>.
  1.1289 +   * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1.1290 +   * @param offset a valid offset into the text
  1.1291 +   * @return the code unit at offset <tt>offset</tt>
  1.1292 +   * @stable ICU 2.0
  1.1293 +   */
  1.1294 +  inline UChar operator[] (int32_t offset) const;
  1.1295 +
  1.1296 +  /**
  1.1297 +   * Return the code point that contains the code unit
  1.1298 +   * at offset <tt>offset</tt>.
  1.1299 +   * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1.1300 +   * @param offset a valid offset into the text
  1.1301 +   * that indicates the text offset of any of the code units
  1.1302 +   * that will be assembled into a code point (21-bit value) and returned
  1.1303 +   * @return the code point of text at <tt>offset</tt>
  1.1304 +   *         or 0xffff if the offset is not valid for this string
  1.1305 +   * @stable ICU 2.0
  1.1306 +   */
  1.1307 +  UChar32 char32At(int32_t offset) const;
  1.1308 +
  1.1309 +  /**
  1.1310 +   * Adjust a random-access offset so that
  1.1311 +   * it points to the beginning of a Unicode character.
  1.1312 +   * The offset that is passed in points to
  1.1313 +   * any code unit of a code point,
  1.1314 +   * while the returned offset will point to the first code unit
  1.1315 +   * of the same code point.
  1.1316 +   * In UTF-16, if the input offset points to a second surrogate
  1.1317 +   * of a surrogate pair, then the returned offset will point
  1.1318 +   * to the first surrogate.
  1.1319 +   * @param offset a valid offset into one code point of the text
  1.1320 +   * @return offset of the first code unit of the same code point
  1.1321 +   * @see U16_SET_CP_START
  1.1322 +   * @stable ICU 2.0
  1.1323 +   */
  1.1324 +  int32_t getChar32Start(int32_t offset) const;
  1.1325 +
  1.1326 +  /**
  1.1327 +   * Adjust a random-access offset so that
  1.1328 +   * it points behind a Unicode character.
  1.1329 +   * The offset that is passed in points behind
  1.1330 +   * any code unit of a code point,
  1.1331 +   * while the returned offset will point behind the last code unit
  1.1332 +   * of the same code point.
  1.1333 +   * In UTF-16, if the input offset points behind the first surrogate
  1.1334 +   * (i.e., to the second surrogate)
  1.1335 +   * of a surrogate pair, then the returned offset will point
  1.1336 +   * behind the second surrogate (i.e., to the first surrogate).
  1.1337 +   * @param offset a valid offset after any code unit of a code point of the text
  1.1338 +   * @return offset of the first code unit after the same code point
  1.1339 +   * @see U16_SET_CP_LIMIT
  1.1340 +   * @stable ICU 2.0
  1.1341 +   */
  1.1342 +  int32_t getChar32Limit(int32_t offset) const;
  1.1343 +
  1.1344 +  /**
  1.1345 +   * Move the code unit index along the string by delta code points.
  1.1346 +   * Interpret the input index as a code unit-based offset into the string,
  1.1347 +   * move the index forward or backward by delta code points, and
  1.1348 +   * return the resulting index.
  1.1349 +   * The input index should point to the first code unit of a code point,
  1.1350 +   * if there is more than one.
  1.1351 +   *
  1.1352 +   * Both input and output indexes are code unit-based as for all
  1.1353 +   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1.1354 +   * If delta<0 then the index is moved backward (toward the start of the string).
  1.1355 +   * If delta>0 then the index is moved forward (toward the end of the string).
  1.1356 +   *
  1.1357 +   * This behaves like CharacterIterator::move32(delta, kCurrent).
  1.1358 +   *
  1.1359 +   * Behavior for out-of-bounds indexes:
  1.1360 +   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
  1.1361 +   * if the input index<0 then it is pinned to 0;
  1.1362 +   * if it is index>length() then it is pinned to length().
  1.1363 +   * Afterwards, the index is moved by <code>delta</code> code points
  1.1364 +   * forward or backward,
  1.1365 +   * but no further backward than to 0 and no further forward than to length().
  1.1366 +   * The resulting index return value will be in between 0 and length(), inclusively.
  1.1367 +   *
  1.1368 +   * Examples:
  1.1369 +   * <pre>
  1.1370 +   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1.1371 +   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
  1.1372 +   *
  1.1373 +   * // initial index: position of U+10000
  1.1374 +   * int32_t index=1;
  1.1375 +   *
  1.1376 +   * // the following examples will all result in index==4, position of U+10ffff
  1.1377 +   *
  1.1378 +   * // skip 2 code points from some position in the string
  1.1379 +   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1.1380 +   *
  1.1381 +   * // go to the 3rd code point from the start of s (0-based)
  1.1382 +   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1.1383 +   *
  1.1384 +   * // go to the next-to-last code point of s
  1.1385 +   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1.1386 +   * </pre>
  1.1387 +   *
  1.1388 +   * @param index input code unit index
  1.1389 +   * @param delta (signed) code point count to move the index forward or backward
  1.1390 +   *        in the string
  1.1391 +   * @return the resulting code unit index
  1.1392 +   * @stable ICU 2.0
  1.1393 +   */
  1.1394 +  int32_t moveIndex32(int32_t index, int32_t delta) const;
  1.1395 +
  1.1396 +  /* Substring extraction */
  1.1397 +
  1.1398 +  /**
  1.1399 +   * Copy the characters in the range
  1.1400 +   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
  1.1401 +   * beginning at <tt>dstStart</tt>.
  1.1402 +   * If the string aliases to <code>dst</code> itself as an external buffer,
  1.1403 +   * then extract() will not copy the contents.
  1.1404 +   *
  1.1405 +   * @param start offset of first character which will be copied into the array
  1.1406 +   * @param length the number of characters to extract
  1.1407 +   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1.1408 +   * must be at least (<tt>dstStart + length</tt>).
  1.1409 +   * @param dstStart the offset in <TT>dst</TT> where the first character
  1.1410 +   * will be extracted
  1.1411 +   * @stable ICU 2.0
  1.1412 +   */
  1.1413 +  inline void extract(int32_t start,
  1.1414 +           int32_t length,
  1.1415 +           UChar *dst,
  1.1416 +           int32_t dstStart = 0) const;
  1.1417 +
  1.1418 +  /**
  1.1419 +   * Copy the contents of the string into dest.
  1.1420 +   * This is a convenience function that
  1.1421 +   * checks if there is enough space in dest,
  1.1422 +   * extracts the entire string if possible,
  1.1423 +   * and NUL-terminates dest if possible.
  1.1424 +   *
  1.1425 +   * If the string fits into dest but cannot be NUL-terminated
  1.1426 +   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1.1427 +   * If the string itself does not fit into dest
  1.1428 +   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1.1429 +   *
  1.1430 +   * If the string aliases to <code>dest</code> itself as an external buffer,
  1.1431 +   * then extract() will not copy the contents.
  1.1432 +   *
  1.1433 +   * @param dest Destination string buffer.
  1.1434 +   * @param destCapacity Number of UChars available at dest.
  1.1435 +   * @param errorCode ICU error code.
  1.1436 +   * @return length()
  1.1437 +   * @stable ICU 2.0
  1.1438 +   */
  1.1439 +  int32_t
  1.1440 +  extract(UChar *dest, int32_t destCapacity,
  1.1441 +          UErrorCode &errorCode) const;
  1.1442 +
  1.1443 +  /**
  1.1444 +   * Copy the characters in the range
  1.1445 +   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
  1.1446 +   * <tt>target</tt>.
  1.1447 +   * @param start offset of first character which will be copied
  1.1448 +   * @param length the number of characters to extract
  1.1449 +   * @param target UnicodeString into which to copy characters.
  1.1450 +   * @return A reference to <TT>target</TT>
  1.1451 +   * @stable ICU 2.0
  1.1452 +   */
  1.1453 +  inline void extract(int32_t start,
  1.1454 +           int32_t length,
  1.1455 +           UnicodeString& target) const;
  1.1456 +
  1.1457 +  /**
  1.1458 +   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1.1459 +   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
  1.1460 +   * @param start offset of first character which will be copied into the array
  1.1461 +   * @param limit offset immediately following the last character to be copied
  1.1462 +   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1.1463 +   * must be at least (<tt>dstStart + (limit - start)</tt>).
  1.1464 +   * @param dstStart the offset in <TT>dst</TT> where the first character
  1.1465 +   * will be extracted
  1.1466 +   * @stable ICU 2.0
  1.1467 +   */
  1.1468 +  inline void extractBetween(int32_t start,
  1.1469 +              int32_t limit,
  1.1470 +              UChar *dst,
  1.1471 +              int32_t dstStart = 0) const;
  1.1472 +
  1.1473 +  /**
  1.1474 +   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1.1475 +   * into the UnicodeString <tt>target</tt>.  Replaceable API.
  1.1476 +   * @param start offset of first character which will be copied
  1.1477 +   * @param limit offset immediately following the last character to be copied
  1.1478 +   * @param target UnicodeString into which to copy characters.
  1.1479 +   * @return A reference to <TT>target</TT>
  1.1480 +   * @stable ICU 2.0
  1.1481 +   */
  1.1482 +  virtual void extractBetween(int32_t start,
  1.1483 +              int32_t limit,
  1.1484 +              UnicodeString& target) const;
  1.1485 +
  1.1486 +  /**
  1.1487 +   * Copy the characters in the range 
  1.1488 +   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
  1.1489 +   * All characters must be invariant (see utypes.h).
  1.1490 +   * Use US_INV as the last, signature-distinguishing parameter.
  1.1491 +   *
  1.1492 +   * This function does not write any more than <code>targetLength</code>
  1.1493 +   * characters but returns the length of the entire output string
  1.1494 +   * so that one can allocate a larger buffer and call the function again
  1.1495 +   * if necessary.
  1.1496 +   * The output string is NUL-terminated if possible.
  1.1497 +   *
  1.1498 +   * @param start offset of first character which will be copied
  1.1499 +   * @param startLength the number of characters to extract
  1.1500 +   * @param target the target buffer for extraction, can be NULL
  1.1501 +   *               if targetLength is 0
  1.1502 +   * @param targetCapacity the length of the target buffer
  1.1503 +   * @param inv Signature-distinguishing paramater, use US_INV.
  1.1504 +   * @return the output string length, not including the terminating NUL
  1.1505 +   * @stable ICU 3.2
  1.1506 +   */
  1.1507 +  int32_t extract(int32_t start,
  1.1508 +           int32_t startLength,
  1.1509 +           char *target,
  1.1510 +           int32_t targetCapacity,
  1.1511 +           enum EInvariant inv) const;
  1.1512 +
  1.1513 +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1.1514 +
  1.1515 +  /**
  1.1516 +   * Copy the characters in the range
  1.1517 +   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1.1518 +   * in the platform's default codepage.
  1.1519 +   * This function does not write any more than <code>targetLength</code>
  1.1520 +   * characters but returns the length of the entire output string
  1.1521 +   * so that one can allocate a larger buffer and call the function again
  1.1522 +   * if necessary.
  1.1523 +   * The output string is NUL-terminated if possible.
  1.1524 +   *
  1.1525 +   * @param start offset of first character which will be copied
  1.1526 +   * @param startLength the number of characters to extract
  1.1527 +   * @param target the target buffer for extraction
  1.1528 +   * @param targetLength the length of the target buffer
  1.1529 +   * If <TT>target</TT> is NULL, then the number of bytes required for
  1.1530 +   * <TT>target</TT> is returned.
  1.1531 +   * @return the output string length, not including the terminating NUL
  1.1532 +   * @stable ICU 2.0
  1.1533 +   */
  1.1534 +  int32_t extract(int32_t start,
  1.1535 +           int32_t startLength,
  1.1536 +           char *target,
  1.1537 +           uint32_t targetLength) const;
  1.1538 +
  1.1539 +#endif
  1.1540 +
  1.1541 +#if !UCONFIG_NO_CONVERSION
  1.1542 +
  1.1543 +  /**
  1.1544 +   * Copy the characters in the range
  1.1545 +   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1.1546 +   * in a specified codepage.
  1.1547 +   * The output string is NUL-terminated.
  1.1548 +   *
  1.1549 +   * Recommendation: For invariant-character strings use
  1.1550 +   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1.1551 +   * because it avoids object code dependencies of UnicodeString on
  1.1552 +   * the conversion code.
  1.1553 +   *
  1.1554 +   * @param start offset of first character which will be copied
  1.1555 +   * @param startLength the number of characters to extract
  1.1556 +   * @param target the target buffer for extraction
  1.1557 +   * @param codepage the desired codepage for the characters.  0 has
  1.1558 +   * the special meaning of the default codepage
  1.1559 +   * If <code>codepage</code> is an empty string (<code>""</code>),
  1.1560 +   * then a simple conversion is performed on the codepage-invariant
  1.1561 +   * subset ("invariant characters") of the platform encoding. See utypes.h.
  1.1562 +   * If <TT>target</TT> is NULL, then the number of bytes required for
  1.1563 +   * <TT>target</TT> is returned. It is assumed that the target is big enough
  1.1564 +   * to fit all of the characters.
  1.1565 +   * @return the output string length, not including the terminating NUL
  1.1566 +   * @stable ICU 2.0
  1.1567 +   */
  1.1568 +  inline int32_t extract(int32_t start,
  1.1569 +                 int32_t startLength,
  1.1570 +                 char *target,
  1.1571 +                 const char *codepage = 0) const;
  1.1572 +
  1.1573 +  /**
  1.1574 +   * Copy the characters in the range
  1.1575 +   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1.1576 +   * in a specified codepage.
  1.1577 +   * This function does not write any more than <code>targetLength</code>
  1.1578 +   * characters but returns the length of the entire output string
  1.1579 +   * so that one can allocate a larger buffer and call the function again
  1.1580 +   * if necessary.
  1.1581 +   * The output string is NUL-terminated if possible.
  1.1582 +   *
  1.1583 +   * Recommendation: For invariant-character strings use
  1.1584 +   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1.1585 +   * because it avoids object code dependencies of UnicodeString on
  1.1586 +   * the conversion code.
  1.1587 +   *
  1.1588 +   * @param start offset of first character which will be copied
  1.1589 +   * @param startLength the number of characters to extract
  1.1590 +   * @param target the target buffer for extraction
  1.1591 +   * @param targetLength the length of the target buffer
  1.1592 +   * @param codepage the desired codepage for the characters.  0 has
  1.1593 +   * the special meaning of the default codepage
  1.1594 +   * If <code>codepage</code> is an empty string (<code>""</code>),
  1.1595 +   * then a simple conversion is performed on the codepage-invariant
  1.1596 +   * subset ("invariant characters") of the platform encoding. See utypes.h.
  1.1597 +   * If <TT>target</TT> is NULL, then the number of bytes required for
  1.1598 +   * <TT>target</TT> is returned.
  1.1599 +   * @return the output string length, not including the terminating NUL
  1.1600 +   * @stable ICU 2.0
  1.1601 +   */
  1.1602 +  int32_t extract(int32_t start,
  1.1603 +           int32_t startLength,
  1.1604 +           char *target,
  1.1605 +           uint32_t targetLength,
  1.1606 +           const char *codepage) const;
  1.1607 +
  1.1608 +  /**
  1.1609 +   * Convert the UnicodeString into a codepage string using an existing UConverter.
  1.1610 +   * The output string is NUL-terminated if possible.
  1.1611 +   *
  1.1612 +   * This function avoids the overhead of opening and closing a converter if
  1.1613 +   * multiple strings are extracted.
  1.1614 +   *
  1.1615 +   * @param dest destination string buffer, can be NULL if destCapacity==0
  1.1616 +   * @param destCapacity the number of chars available at dest
  1.1617 +   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1.1618 +   *        or NULL for the default converter
  1.1619 +   * @param errorCode normal ICU error code
  1.1620 +   * @return the length of the output string, not counting the terminating NUL;
  1.1621 +   *         if the length is greater than destCapacity, then the string will not fit
  1.1622 +   *         and a buffer of the indicated length would need to be passed in
  1.1623 +   * @stable ICU 2.0
  1.1624 +   */
  1.1625 +  int32_t extract(char *dest, int32_t destCapacity,
  1.1626 +                  UConverter *cnv,
  1.1627 +                  UErrorCode &errorCode) const;
  1.1628 +
  1.1629 +#endif
  1.1630 +
  1.1631 +  /**
  1.1632 +   * Create a temporary substring for the specified range.
  1.1633 +   * Unlike the substring constructor and setTo() functions,
  1.1634 +   * the object returned here will be a read-only alias (using getBuffer())
  1.1635 +   * rather than copying the text.
  1.1636 +   * As a result, this substring operation is much faster but requires
  1.1637 +   * that the original string not be modified or deleted during the lifetime
  1.1638 +   * of the returned substring object.
  1.1639 +   * @param start offset of the first character visible in the substring
  1.1640 +   * @param length length of the substring
  1.1641 +   * @return a read-only alias UnicodeString object for the substring
  1.1642 +   * @stable ICU 4.4
  1.1643 +   */
  1.1644 +  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1.1645 +
  1.1646 +  /**
  1.1647 +   * Create a temporary substring for the specified range.
  1.1648 +   * Same as tempSubString(start, length) except that the substring range
  1.1649 +   * is specified as a (start, limit) pair (with an exclusive limit index)
  1.1650 +   * rather than a (start, length) pair.
  1.1651 +   * @param start offset of the first character visible in the substring
  1.1652 +   * @param limit offset immediately following the last character visible in the substring
  1.1653 +   * @return a read-only alias UnicodeString object for the substring
  1.1654 +   * @stable ICU 4.4
  1.1655 +   */
  1.1656 +  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1.1657 +
  1.1658 +  /**
  1.1659 +   * Convert the UnicodeString to UTF-8 and write the result
  1.1660 +   * to a ByteSink. This is called by toUTF8String().
  1.1661 +   * Unpaired surrogates are replaced with U+FFFD.
  1.1662 +   * Calls u_strToUTF8WithSub().
  1.1663 +   *
  1.1664 +   * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1.1665 +   *             sink.Flush() is called at the end.
  1.1666 +   * @stable ICU 4.2
  1.1667 +   * @see toUTF8String
  1.1668 +   */
  1.1669 +  void toUTF8(ByteSink &sink) const;
  1.1670 +
  1.1671 +#if U_HAVE_STD_STRING
  1.1672 +
  1.1673 +  /**
  1.1674 +   * Convert the UnicodeString to UTF-8 and append the result
  1.1675 +   * to a standard string.
  1.1676 +   * Unpaired surrogates are replaced with U+FFFD.
  1.1677 +   * Calls toUTF8().
  1.1678 +   *
  1.1679 +   * @param result A standard string (or a compatible object)
  1.1680 +   *        to which the UTF-8 version of the string is appended.
  1.1681 +   * @return The string object.
  1.1682 +   * @stable ICU 4.2
  1.1683 +   * @see toUTF8
  1.1684 +   */
  1.1685 +  template<typename StringClass>
  1.1686 +  StringClass &toUTF8String(StringClass &result) const {
  1.1687 +    StringByteSink<StringClass> sbs(&result);
  1.1688 +    toUTF8(sbs);
  1.1689 +    return result;
  1.1690 +  }
  1.1691 +
  1.1692 +#endif
  1.1693 +
  1.1694 +  /**
  1.1695 +   * Convert the UnicodeString to UTF-32.
  1.1696 +   * Unpaired surrogates are replaced with U+FFFD.
  1.1697 +   * Calls u_strToUTF32WithSub().
  1.1698 +   *
  1.1699 +   * @param utf32 destination string buffer, can be NULL if capacity==0
  1.1700 +   * @param capacity the number of UChar32s available at utf32
  1.1701 +   * @param errorCode Standard ICU error code. Its input value must
  1.1702 +   *                  pass the U_SUCCESS() test, or else the function returns
  1.1703 +   *                  immediately. Check for U_FAILURE() on output or use with
  1.1704 +   *                  function chaining. (See User Guide for details.)
  1.1705 +   * @return The length of the UTF-32 string.
  1.1706 +   * @see fromUTF32
  1.1707 +   * @stable ICU 4.2
  1.1708 +   */
  1.1709 +  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1.1710 +
  1.1711 +  /* Length operations */
  1.1712 +
  1.1713 +  /**
  1.1714 +   * Return the length of the UnicodeString object.
  1.1715 +   * The length is the number of UChar code units are in the UnicodeString.
  1.1716 +   * If you want the number of code points, please use countChar32().
  1.1717 +   * @return the length of the UnicodeString object
  1.1718 +   * @see countChar32
  1.1719 +   * @stable ICU 2.0
  1.1720 +   */
  1.1721 +  inline int32_t length(void) const;
  1.1722 +
  1.1723 +  /**
  1.1724 +   * Count Unicode code points in the length UChar code units of the string.
  1.1725 +   * A code point may occupy either one or two UChar code units.
  1.1726 +   * Counting code points involves reading all code units.
  1.1727 +   *
  1.1728 +   * This functions is basically the inverse of moveIndex32().
  1.1729 +   *
  1.1730 +   * @param start the index of the first code unit to check
  1.1731 +   * @param length the number of UChar code units to check
  1.1732 +   * @return the number of code points in the specified code units
  1.1733 +   * @see length
  1.1734 +   * @stable ICU 2.0
  1.1735 +   */
  1.1736 +  int32_t
  1.1737 +  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1.1738 +
  1.1739 +  /**
  1.1740 +   * Check if the length UChar code units of the string
  1.1741 +   * contain more Unicode code points than a certain number.
  1.1742 +   * This is more efficient than counting all code points in this part of the string
  1.1743 +   * and comparing that number with a threshold.
  1.1744 +   * This function may not need to scan the string at all if the length
  1.1745 +   * falls within a certain range, and
  1.1746 +   * never needs to count more than 'number+1' code points.
  1.1747 +   * Logically equivalent to (countChar32(start, length)>number).
  1.1748 +   * A Unicode code point may occupy either one or two UChar code units.
  1.1749 +   *
  1.1750 +   * @param start the index of the first code unit to check (0 for the entire string)
  1.1751 +   * @param length the number of UChar code units to check
  1.1752 +   *               (use INT32_MAX for the entire string; remember that start/length
  1.1753 +   *                values are pinned)
  1.1754 +   * @param number The number of code points in the (sub)string is compared against
  1.1755 +   *               the 'number' parameter.
  1.1756 +   * @return Boolean value for whether the string contains more Unicode code points
  1.1757 +   *         than 'number'. Same as (u_countChar32(s, length)>number).
  1.1758 +   * @see countChar32
  1.1759 +   * @see u_strHasMoreChar32Than
  1.1760 +   * @stable ICU 2.4
  1.1761 +   */
  1.1762 +  UBool
  1.1763 +  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1.1764 +
  1.1765 +  /**
  1.1766 +   * Determine if this string is empty.
  1.1767 +   * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1.1768 +   * @stable ICU 2.0
  1.1769 +   */
  1.1770 +  inline UBool isEmpty(void) const;
  1.1771 +
  1.1772 +  /**
  1.1773 +   * Return the capacity of the internal buffer of the UnicodeString object.
  1.1774 +   * This is useful together with the getBuffer functions.
  1.1775 +   * See there for details.
  1.1776 +   *
  1.1777 +   * @return the number of UChars available in the internal buffer
  1.1778 +   * @see getBuffer
  1.1779 +   * @stable ICU 2.0
  1.1780 +   */
  1.1781 +  inline int32_t getCapacity(void) const;
  1.1782 +
  1.1783 +  /* Other operations */
  1.1784 +
  1.1785 +  /**
  1.1786 +   * Generate a hash code for this object.
  1.1787 +   * @return The hash code of this UnicodeString.
  1.1788 +   * @stable ICU 2.0
  1.1789 +   */
  1.1790 +  inline int32_t hashCode(void) const;
  1.1791 +
  1.1792 +  /**
  1.1793 +   * Determine if this object contains a valid string.
  1.1794 +   * A bogus string has no value. It is different from an empty string,
  1.1795 +   * although in both cases isEmpty() returns TRUE and length() returns 0.
  1.1796 +   * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1.1797 +   * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
  1.1798 +   * length() returns 0.
  1.1799 +   *
  1.1800 +   * @return TRUE if the string is bogus/invalid, FALSE otherwise
  1.1801 +   * @see setToBogus()
  1.1802 +   * @stable ICU 2.0
  1.1803 +   */
  1.1804 +  inline UBool isBogus(void) const;
  1.1805 +
  1.1806 +
  1.1807 +  //========================================
  1.1808 +  // Write operations
  1.1809 +  //========================================
  1.1810 +
  1.1811 +  /* Assignment operations */
  1.1812 +
  1.1813 +  /**
  1.1814 +   * Assignment operator.  Replace the characters in this UnicodeString
  1.1815 +   * with the characters from <TT>srcText</TT>.
  1.1816 +   * @param srcText The text containing the characters to replace
  1.1817 +   * @return a reference to this
  1.1818 +   * @stable ICU 2.0
  1.1819 +   */
  1.1820 +  UnicodeString &operator=(const UnicodeString &srcText);
  1.1821 +
  1.1822 +  /**
  1.1823 +   * Almost the same as the assignment operator.
  1.1824 +   * Replace the characters in this UnicodeString
  1.1825 +   * with the characters from <code>srcText</code>.
  1.1826 +   *
  1.1827 +   * This function works the same as the assignment operator
  1.1828 +   * for all strings except for ones that are readonly aliases.
  1.1829 +   *
  1.1830 +   * Starting with ICU 2.4, the assignment operator and the copy constructor
  1.1831 +   * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1.1832 +   * This function implements the old, more efficient but less safe behavior
  1.1833 +   * of making this string also a readonly alias to the same buffer.
  1.1834 +   *
  1.1835 +   * The fastCopyFrom function must be used only if it is known that the lifetime of
  1.1836 +   * this UnicodeString does not exceed the lifetime of the aliased buffer
  1.1837 +   * including its contents, for example for strings from resource bundles
  1.1838 +   * or aliases to string constants.
  1.1839 +   *
  1.1840 +   * @param src The text containing the characters to replace.
  1.1841 +   * @return a reference to this
  1.1842 +   * @stable ICU 2.4
  1.1843 +   */
  1.1844 +  UnicodeString &fastCopyFrom(const UnicodeString &src);
  1.1845 +
  1.1846 +  /**
  1.1847 +   * Assignment operator.  Replace the characters in this UnicodeString
  1.1848 +   * with the code unit <TT>ch</TT>.
  1.1849 +   * @param ch the code unit to replace
  1.1850 +   * @return a reference to this
  1.1851 +   * @stable ICU 2.0
  1.1852 +   */
  1.1853 +  inline UnicodeString& operator= (UChar ch);
  1.1854 +
  1.1855 +  /**
  1.1856 +   * Assignment operator.  Replace the characters in this UnicodeString
  1.1857 +   * with the code point <TT>ch</TT>.
  1.1858 +   * @param ch the code point to replace
  1.1859 +   * @return a reference to this
  1.1860 +   * @stable ICU 2.0
  1.1861 +   */
  1.1862 +  inline UnicodeString& operator= (UChar32 ch);
  1.1863 +
  1.1864 +  /**
  1.1865 +   * Set the text in the UnicodeString object to the characters
  1.1866 +   * in <TT>srcText</TT> in the range
  1.1867 +   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
  1.1868 +   * <TT>srcText</TT> is not modified.
  1.1869 +   * @param srcText the source for the new characters
  1.1870 +   * @param srcStart the offset into <TT>srcText</TT> where new characters
  1.1871 +   * will be obtained
  1.1872 +   * @return a reference to this
  1.1873 +   * @stable ICU 2.2
  1.1874 +   */
  1.1875 +  inline UnicodeString& setTo(const UnicodeString& srcText,
  1.1876 +               int32_t srcStart);
  1.1877 +
  1.1878 +  /**
  1.1879 +   * Set the text in the UnicodeString object to the characters
  1.1880 +   * in <TT>srcText</TT> in the range
  1.1881 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1.1882 +   * <TT>srcText</TT> is not modified.
  1.1883 +   * @param srcText the source for the new characters
  1.1884 +   * @param srcStart the offset into <TT>srcText</TT> where new characters
  1.1885 +   * will be obtained
  1.1886 +   * @param srcLength the number of characters in <TT>srcText</TT> in the
  1.1887 +   * replace string.
  1.1888 +   * @return a reference to this
  1.1889 +   * @stable ICU 2.0
  1.1890 +   */
  1.1891 +  inline UnicodeString& setTo(const UnicodeString& srcText,
  1.1892 +               int32_t srcStart,
  1.1893 +               int32_t srcLength);
  1.1894 +
  1.1895 +  /**
  1.1896 +   * Set the text in the UnicodeString object to the characters in
  1.1897 +   * <TT>srcText</TT>.
  1.1898 +   * <TT>srcText</TT> is not modified.
  1.1899 +   * @param srcText the source for the new characters
  1.1900 +   * @return a reference to this
  1.1901 +   * @stable ICU 2.0
  1.1902 +   */
  1.1903 +  inline UnicodeString& setTo(const UnicodeString& srcText);
  1.1904 +
  1.1905 +  /**
  1.1906 +   * Set the characters in the UnicodeString object to the characters
  1.1907 +   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  1.1908 +   * @param srcChars the source for the new characters
  1.1909 +   * @param srcLength the number of Unicode characters in srcChars.
  1.1910 +   * @return a reference to this
  1.1911 +   * @stable ICU 2.0
  1.1912 +   */
  1.1913 +  inline UnicodeString& setTo(const UChar *srcChars,
  1.1914 +               int32_t srcLength);
  1.1915 +
  1.1916 +  /**
  1.1917 +   * Set the characters in the UnicodeString object to the code unit
  1.1918 +   * <TT>srcChar</TT>.
  1.1919 +   * @param srcChar the code unit which becomes the UnicodeString's character
  1.1920 +   * content
  1.1921 +   * @return a reference to this
  1.1922 +   * @stable ICU 2.0
  1.1923 +   */
  1.1924 +  UnicodeString& setTo(UChar srcChar);
  1.1925 +
  1.1926 +  /**
  1.1927 +   * Set the characters in the UnicodeString object to the code point
  1.1928 +   * <TT>srcChar</TT>.
  1.1929 +   * @param srcChar the code point which becomes the UnicodeString's character
  1.1930 +   * content
  1.1931 +   * @return a reference to this
  1.1932 +   * @stable ICU 2.0
  1.1933 +   */
  1.1934 +  UnicodeString& setTo(UChar32 srcChar);
  1.1935 +
  1.1936 +  /**
  1.1937 +   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
  1.1938 +   * The text will be used for the UnicodeString object, but
  1.1939 +   * it will not be released when the UnicodeString is destroyed.
  1.1940 +   * This has copy-on-write semantics:
  1.1941 +   * When the string is modified, then the buffer is first copied into
  1.1942 +   * newly allocated memory.
  1.1943 +   * The aliased buffer is never modified.
  1.1944 +   *
  1.1945 +   * In an assignment to another UnicodeString, when using the copy constructor
  1.1946 +   * or the assignment operator, the text will be copied.
  1.1947 +   * When using fastCopyFrom(), the text will be aliased again,
  1.1948 +   * so that both strings then alias the same readonly-text.
  1.1949 +   *
  1.1950 +   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1.1951 +   *                     This must be true if <code>textLength==-1</code>.
  1.1952 +   * @param text The characters to alias for the UnicodeString.
  1.1953 +   * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1.1954 +   *                   If -1, then this constructor will determine the length
  1.1955 +   *                   by calling <code>u_strlen()</code>.
  1.1956 +   * @return a reference to this
  1.1957 +   * @stable ICU 2.0
  1.1958 +   */
  1.1959 +  UnicodeString &setTo(UBool isTerminated,
  1.1960 +                       const UChar *text,
  1.1961 +                       int32_t textLength);
  1.1962 +
  1.1963 +  /**
  1.1964 +   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
  1.1965 +   * The text will be used for the UnicodeString object, but
  1.1966 +   * it will not be released when the UnicodeString is destroyed.
  1.1967 +   * This has write-through semantics:
  1.1968 +   * For as long as the capacity of the buffer is sufficient, write operations
  1.1969 +   * will directly affect the buffer. When more capacity is necessary, then
  1.1970 +   * a new buffer will be allocated and the contents copied as with regularly
  1.1971 +   * constructed strings.
  1.1972 +   * In an assignment to another UnicodeString, the buffer will be copied.
  1.1973 +   * The extract(UChar *dst) function detects whether the dst pointer is the same
  1.1974 +   * as the string buffer itself and will in this case not copy the contents.
  1.1975 +   *
  1.1976 +   * @param buffer The characters to alias for the UnicodeString.
  1.1977 +   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1.1978 +   * @param buffCapacity The size of <code>buffer</code> in UChars.
  1.1979 +   * @return a reference to this
  1.1980 +   * @stable ICU 2.0
  1.1981 +   */
  1.1982 +  UnicodeString &setTo(UChar *buffer,
  1.1983 +                       int32_t buffLength,
  1.1984 +                       int32_t buffCapacity);
  1.1985 +
  1.1986 +  /**
  1.1987 +   * Make this UnicodeString object invalid.
  1.1988 +   * The string will test TRUE with isBogus().
  1.1989 +   *
  1.1990 +   * A bogus string has no value. It is different from an empty string.
  1.1991 +   * It can be used to indicate that no string value is available.
  1.1992 +   * getBuffer() and getTerminatedBuffer() return NULL, and
  1.1993 +   * length() returns 0.
  1.1994 +   *
  1.1995 +   * This utility function is used throughout the UnicodeString
  1.1996 +   * implementation to indicate that a UnicodeString operation failed,
  1.1997 +   * and may be used in other functions,
  1.1998 +   * especially but not exclusively when such functions do not
  1.1999 +   * take a UErrorCode for simplicity.
  1.2000 +   *
  1.2001 +   * The following methods, and no others, will clear a string object's bogus flag:
  1.2002 +   * - remove()
  1.2003 +   * - remove(0, INT32_MAX)
  1.2004 +   * - truncate(0)
  1.2005 +   * - operator=() (assignment operator)
  1.2006 +   * - setTo(...)
  1.2007 +   *
  1.2008 +   * The simplest ways to turn a bogus string into an empty one
  1.2009 +   * is to use the remove() function.
  1.2010 +   * Examples for other functions that are equivalent to "set to empty string":
  1.2011 +   * \code
  1.2012 +   * if(s.isBogus()) {
  1.2013 +   *   s.remove();           // set to an empty string (remove all), or
  1.2014 +   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  1.2015 +   *   s.truncate(0);        // set to an empty string (complete truncation), or
  1.2016 +   *   s=UnicodeString();    // assign an empty string, or
  1.2017 +   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  1.2018 +   *   static const UChar nul=0;
  1.2019 +   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
  1.2020 +   * }
  1.2021 +   * \endcode
  1.2022 +   *
  1.2023 +   * @see isBogus()
  1.2024 +   * @stable ICU 2.0
  1.2025 +   */
  1.2026 +  void setToBogus();
  1.2027 +
  1.2028 +  /**
  1.2029 +   * Set the character at the specified offset to the specified character.
  1.2030 +   * @param offset A valid offset into the text of the character to set
  1.2031 +   * @param ch The new character
  1.2032 +   * @return A reference to this
  1.2033 +   * @stable ICU 2.0
  1.2034 +   */
  1.2035 +  UnicodeString& setCharAt(int32_t offset,
  1.2036 +               UChar ch);
  1.2037 +
  1.2038 +
  1.2039 +  /* Append operations */
  1.2040 +
  1.2041 +  /**
  1.2042 +   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
  1.2043 +   * object.
  1.2044 +   * @param ch the code unit to be appended
  1.2045 +   * @return a reference to this
  1.2046 +   * @stable ICU 2.0
  1.2047 +   */
  1.2048 + inline  UnicodeString& operator+= (UChar ch);
  1.2049 +
  1.2050 +  /**
  1.2051 +   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
  1.2052 +   * object.
  1.2053 +   * @param ch the code point to be appended
  1.2054 +   * @return a reference to this
  1.2055 +   * @stable ICU 2.0
  1.2056 +   */
  1.2057 + inline  UnicodeString& operator+= (UChar32 ch);
  1.2058 +
  1.2059 +  /**
  1.2060 +   * Append operator. Append the characters in <TT>srcText</TT> to the
  1.2061 +   * UnicodeString object. <TT>srcText</TT> is not modified.
  1.2062 +   * @param srcText the source for the new characters
  1.2063 +   * @return a reference to this
  1.2064 +   * @stable ICU 2.0
  1.2065 +   */
  1.2066 +  inline UnicodeString& operator+= (const UnicodeString& srcText);
  1.2067 +
  1.2068 +  /**
  1.2069 +   * Append the characters
  1.2070 +   * in <TT>srcText</TT> in the range
  1.2071 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
  1.2072 +   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
  1.2073 +   * is not modified.
  1.2074 +   * @param srcText the source for the new characters
  1.2075 +   * @param srcStart the offset into <TT>srcText</TT> where new characters
  1.2076 +   * will be obtained
  1.2077 +   * @param srcLength the number of characters in <TT>srcText</TT> in
  1.2078 +   * the append string
  1.2079 +   * @return a reference to this
  1.2080 +   * @stable ICU 2.0
  1.2081 +   */
  1.2082 +  inline UnicodeString& append(const UnicodeString& srcText,
  1.2083 +            int32_t srcStart,
  1.2084 +            int32_t srcLength);
  1.2085 +
  1.2086 +  /**
  1.2087 +   * Append the characters in <TT>srcText</TT> to the UnicodeString object.
  1.2088 +   * <TT>srcText</TT> is not modified.
  1.2089 +   * @param srcText the source for the new characters
  1.2090 +   * @return a reference to this
  1.2091 +   * @stable ICU 2.0
  1.2092 +   */
  1.2093 +  inline UnicodeString& append(const UnicodeString& srcText);
  1.2094 +
  1.2095 +  /**
  1.2096 +   * Append the characters in <TT>srcChars</TT> in the range
  1.2097 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
  1.2098 +   * object at offset
  1.2099 +   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1.2100 +   * @param srcChars the source for the new characters
  1.2101 +   * @param srcStart the offset into <TT>srcChars</TT> where new characters
  1.2102 +   * will be obtained
  1.2103 +   * @param srcLength the number of characters in <TT>srcChars</TT> in
  1.2104 +   *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
  1.2105 +   * @return a reference to this
  1.2106 +   * @stable ICU 2.0
  1.2107 +   */
  1.2108 +  inline UnicodeString& append(const UChar *srcChars,
  1.2109 +            int32_t srcStart,
  1.2110 +            int32_t srcLength);
  1.2111 +
  1.2112 +  /**
  1.2113 +   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
  1.2114 +   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1.2115 +   * @param srcChars the source for the new characters
  1.2116 +   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
  1.2117 +   *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
  1.2118 +   * @return a reference to this
  1.2119 +   * @stable ICU 2.0
  1.2120 +   */
  1.2121 +  inline UnicodeString& append(const UChar *srcChars,
  1.2122 +            int32_t srcLength);
  1.2123 +
  1.2124 +  /**
  1.2125 +   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
  1.2126 +   * @param srcChar the code unit to append
  1.2127 +   * @return a reference to this
  1.2128 +   * @stable ICU 2.0
  1.2129 +   */
  1.2130 +  inline UnicodeString& append(UChar srcChar);
  1.2131 +
  1.2132 +  /**
  1.2133 +   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
  1.2134 +   * @param srcChar the code point to append
  1.2135 +   * @return a reference to this
  1.2136 +   * @stable ICU 2.0
  1.2137 +   */
  1.2138 +  UnicodeString& append(UChar32 srcChar);
  1.2139 +
  1.2140 +
  1.2141 +  /* Insert operations */
  1.2142 +
  1.2143 +  /**
  1.2144 +   * Insert the characters in <TT>srcText</TT> in the range
  1.2145 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  1.2146 +   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  1.2147 +   * @param start the offset where the insertion begins
  1.2148 +   * @param srcText the source for the new characters
  1.2149 +   * @param srcStart the offset into <TT>srcText</TT> where new characters
  1.2150 +   * will be obtained
  1.2151 +   * @param srcLength the number of characters in <TT>srcText</TT> in
  1.2152 +   * the insert string
  1.2153 +   * @return a reference to this
  1.2154 +   * @stable ICU 2.0
  1.2155 +   */
  1.2156 +  inline UnicodeString& insert(int32_t start,
  1.2157 +            const UnicodeString& srcText,
  1.2158 +            int32_t srcStart,
  1.2159 +            int32_t srcLength);
  1.2160 +
  1.2161 +  /**
  1.2162 +   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
  1.2163 +   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  1.2164 +   * @param start the offset where the insertion begins
  1.2165 +   * @param srcText the source for the new characters
  1.2166 +   * @return a reference to this
  1.2167 +   * @stable ICU 2.0
  1.2168 +   */
  1.2169 +  inline UnicodeString& insert(int32_t start,
  1.2170 +            const UnicodeString& srcText);
  1.2171 +
  1.2172 +  /**
  1.2173 +   * Insert the characters in <TT>srcChars</TT> in the range
  1.2174 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  1.2175 +   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1.2176 +   * @param start the offset at which the insertion begins
  1.2177 +   * @param srcChars the source for the new characters
  1.2178 +   * @param srcStart the offset into <TT>srcChars</TT> where new characters
  1.2179 +   * will be obtained
  1.2180 +   * @param srcLength the number of characters in <TT>srcChars</TT>
  1.2181 +   * in the insert string
  1.2182 +   * @return a reference to this
  1.2183 +   * @stable ICU 2.0
  1.2184 +   */
  1.2185 +  inline UnicodeString& insert(int32_t start,
  1.2186 +            const UChar *srcChars,
  1.2187 +            int32_t srcStart,
  1.2188 +            int32_t srcLength);
  1.2189 +
  1.2190 +  /**
  1.2191 +   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
  1.2192 +   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  1.2193 +   * @param start the offset where the insertion begins
  1.2194 +   * @param srcChars the source for the new characters
  1.2195 +   * @param srcLength the number of Unicode characters in srcChars.
  1.2196 +   * @return a reference to this
  1.2197 +   * @stable ICU 2.0
  1.2198 +   */
  1.2199 +  inline UnicodeString& insert(int32_t start,
  1.2200 +            const UChar *srcChars,
  1.2201 +            int32_t srcLength);
  1.2202 +
  1.2203 +  /**
  1.2204 +   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
  1.2205 +   * offset <TT>start</TT>.
  1.2206 +   * @param start the offset at which the insertion occurs
  1.2207 +   * @param srcChar the code unit to insert
  1.2208 +   * @return a reference to this
  1.2209 +   * @stable ICU 2.0
  1.2210 +   */
  1.2211 +  inline UnicodeString& insert(int32_t start,
  1.2212 +            UChar srcChar);
  1.2213 +
  1.2214 +  /**
  1.2215 +   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
  1.2216 +   * offset <TT>start</TT>.
  1.2217 +   * @param start the offset at which the insertion occurs
  1.2218 +   * @param srcChar the code point to insert
  1.2219 +   * @return a reference to this
  1.2220 +   * @stable ICU 2.0
  1.2221 +   */
  1.2222 +  inline UnicodeString& insert(int32_t start,
  1.2223 +            UChar32 srcChar);
  1.2224 +
  1.2225 +
  1.2226 +  /* Replace operations */
  1.2227 +
  1.2228 +  /**
  1.2229 +   * Replace the characters in the range
  1.2230 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  1.2231 +   * <TT>srcText</TT> in the range
  1.2232 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1.2233 +   * <TT>srcText</TT> is not modified.
  1.2234 +   * @param start the offset at which the replace operation begins
  1.2235 +   * @param length the number of characters to replace. The character at
  1.2236 +   * <TT>start + length</TT> is not modified.
  1.2237 +   * @param srcText the source for the new characters
  1.2238 +   * @param srcStart the offset into <TT>srcText</TT> where new characters
  1.2239 +   * will be obtained
  1.2240 +   * @param srcLength the number of characters in <TT>srcText</TT> in
  1.2241 +   * the replace string
  1.2242 +   * @return a reference to this
  1.2243 +   * @stable ICU 2.0
  1.2244 +   */
  1.2245 +  UnicodeString& replace(int32_t start,
  1.2246 +             int32_t length,
  1.2247 +             const UnicodeString& srcText,
  1.2248 +             int32_t srcStart,
  1.2249 +             int32_t srcLength);
  1.2250 +
  1.2251 +  /**
  1.2252 +   * Replace the characters in the range
  1.2253 +   * [<TT>start</TT>, <TT>start + length</TT>)
  1.2254 +   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
  1.2255 +   *  not modified.
  1.2256 +   * @param start the offset at which the replace operation begins
  1.2257 +   * @param length the number of characters to replace. The character at
  1.2258 +   * <TT>start + length</TT> is not modified.
  1.2259 +   * @param srcText the source for the new characters
  1.2260 +   * @return a reference to this
  1.2261 +   * @stable ICU 2.0
  1.2262 +   */
  1.2263 +  UnicodeString& replace(int32_t start,
  1.2264 +             int32_t length,
  1.2265 +             const UnicodeString& srcText);
  1.2266 +
  1.2267 +  /**
  1.2268 +   * Replace the characters in the range
  1.2269 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  1.2270 +   * <TT>srcChars</TT> in the range
  1.2271 +   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
  1.2272 +   * is not modified.
  1.2273 +   * @param start the offset at which the replace operation begins
  1.2274 +   * @param length the number of characters to replace.  The character at
  1.2275 +   * <TT>start + length</TT> is not modified.
  1.2276 +   * @param srcChars the source for the new characters
  1.2277 +   * @param srcStart the offset into <TT>srcChars</TT> where new characters
  1.2278 +   * will be obtained
  1.2279 +   * @param srcLength the number of characters in <TT>srcChars</TT>
  1.2280 +   * in the replace string
  1.2281 +   * @return a reference to this
  1.2282 +   * @stable ICU 2.0
  1.2283 +   */
  1.2284 +  UnicodeString& replace(int32_t start,
  1.2285 +             int32_t length,
  1.2286 +             const UChar *srcChars,
  1.2287 +             int32_t srcStart,
  1.2288 +             int32_t srcLength);
  1.2289 +
  1.2290 +  /**
  1.2291 +   * Replace the characters in the range
  1.2292 +   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  1.2293 +   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
  1.2294 +   * @param start the offset at which the replace operation begins
  1.2295 +   * @param length number of characters to replace.  The character at
  1.2296 +   * <TT>start + length</TT> is not modified.
  1.2297 +   * @param srcChars the source for the new characters
  1.2298 +   * @param srcLength the number of Unicode characters in srcChars
  1.2299 +   * @return a reference to this
  1.2300 +   * @stable ICU 2.0
  1.2301 +   */
  1.2302 +  inline UnicodeString& replace(int32_t start,
  1.2303 +             int32_t length,
  1.2304 +             const UChar *srcChars,
  1.2305 +             int32_t srcLength);
  1.2306 +
  1.2307 +  /**
  1.2308 +   * Replace the characters in the range
  1.2309 +   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
  1.2310 +   * <TT>srcChar</TT>.
  1.2311 +   * @param start the offset at which the replace operation begins
  1.2312 +   * @param length the number of characters to replace.  The character at
  1.2313 +   * <TT>start + length</TT> is not modified.
  1.2314 +   * @param srcChar the new code unit
  1.2315 +   * @return a reference to this
  1.2316 +   * @stable ICU 2.0
  1.2317 +   */
  1.2318 +  inline UnicodeString& replace(int32_t start,
  1.2319 +             int32_t length,
  1.2320 +             UChar srcChar);
  1.2321 +
  1.2322 +  /**
  1.2323 +   * Replace the characters in the range
  1.2324 +   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
  1.2325 +   * <TT>srcChar</TT>.
  1.2326 +   * @param start the offset at which the replace operation begins
  1.2327 +   * @param length the number of characters to replace.  The character at
  1.2328 +   * <TT>start + length</TT> is not modified.
  1.2329 +   * @param srcChar the new code point
  1.2330 +   * @return a reference to this
  1.2331 +   * @stable ICU 2.0
  1.2332 +   */
  1.2333 +  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  1.2334 +
  1.2335 +  /**
  1.2336 +   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  1.2337 +   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
  1.2338 +   * @param start the offset at which the replace operation begins
  1.2339 +   * @param limit the offset immediately following the replace range
  1.2340 +   * @param srcText the source for the new characters
  1.2341 +   * @return a reference to this
  1.2342 +   * @stable ICU 2.0
  1.2343 +   */
  1.2344 +  inline UnicodeString& replaceBetween(int32_t start,
  1.2345 +                int32_t limit,
  1.2346 +                const UnicodeString& srcText);
  1.2347 +
  1.2348 +  /**
  1.2349 +   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  1.2350 +   * with the characters in <TT>srcText</TT> in the range
  1.2351 +   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
  1.2352 +   * @param start the offset at which the replace operation begins
  1.2353 +   * @param limit the offset immediately following the replace range
  1.2354 +   * @param srcText the source for the new characters
  1.2355 +   * @param srcStart the offset into <TT>srcChars</TT> where new characters
  1.2356 +   * will be obtained
  1.2357 +   * @param srcLimit the offset immediately following the range to copy
  1.2358 +   * in <TT>srcText</TT>
  1.2359 +   * @return a reference to this
  1.2360 +   * @stable ICU 2.0
  1.2361 +   */
  1.2362 +  inline UnicodeString& replaceBetween(int32_t start,
  1.2363 +                int32_t limit,
  1.2364 +                const UnicodeString& srcText,
  1.2365 +                int32_t srcStart,
  1.2366 +                int32_t srcLimit);
  1.2367 +
  1.2368 +  /**
  1.2369 +   * Replace a substring of this object with the given text.
  1.2370 +   * @param start the beginning index, inclusive; <code>0 <= start
  1.2371 +   * <= limit</code>.
  1.2372 +   * @param limit the ending index, exclusive; <code>start <= limit
  1.2373 +   * <= length()</code>.
  1.2374 +   * @param text the text to replace characters <code>start</code>
  1.2375 +   * to <code>limit - 1</code>
  1.2376 +   * @stable ICU 2.0
  1.2377 +   */
  1.2378 +  virtual void handleReplaceBetween(int32_t start,
  1.2379 +                                    int32_t limit,
  1.2380 +                                    const UnicodeString& text);
  1.2381 +
  1.2382 +  /**
  1.2383 +   * Replaceable API
  1.2384 +   * @return TRUE if it has MetaData
  1.2385 +   * @stable ICU 2.4
  1.2386 +   */
  1.2387 +  virtual UBool hasMetaData() const;
  1.2388 +
  1.2389 +  /**
  1.2390 +   * Copy a substring of this object, retaining attribute (out-of-band)
  1.2391 +   * information.  This method is used to duplicate or reorder substrings.
  1.2392 +   * The destination index must not overlap the source range.
  1.2393 +   *
  1.2394 +   * @param start the beginning index, inclusive; <code>0 <= start <=
  1.2395 +   * limit</code>.
  1.2396 +   * @param limit the ending index, exclusive; <code>start <= limit <=
  1.2397 +   * length()</code>.
  1.2398 +   * @param dest the destination index.  The characters from
  1.2399 +   * <code>start..limit-1</code> will be copied to <code>dest</code>.
  1.2400 +   * Implementations of this method may assume that <code>dest <= start ||
  1.2401 +   * dest >= limit</code>.
  1.2402 +   * @stable ICU 2.0
  1.2403 +   */
  1.2404 +  virtual void copy(int32_t start, int32_t limit, int32_t dest);
  1.2405 +
  1.2406 +  /* Search and replace operations */
  1.2407 +
  1.2408 +  /**
  1.2409 +   * Replace all occurrences of characters in oldText with the characters
  1.2410 +   * in newText
  1.2411 +   * @param oldText the text containing the search text
  1.2412 +   * @param newText the text containing the replacement text
  1.2413 +   * @return a reference to this
  1.2414 +   * @stable ICU 2.0
  1.2415 +   */
  1.2416 +  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  1.2417 +                const UnicodeString& newText);
  1.2418 +
  1.2419 +  /**
  1.2420 +   * Replace all occurrences of characters in oldText with characters
  1.2421 +   * in newText
  1.2422 +   * in the range [<TT>start</TT>, <TT>start + length</TT>).
  1.2423 +   * @param start the start of the range in which replace will performed
  1.2424 +   * @param length the length of the range in which replace will be performed
  1.2425 +   * @param oldText the text containing the search text
  1.2426 +   * @param newText the text containing the replacement text
  1.2427 +   * @return a reference to this
  1.2428 +   * @stable ICU 2.0
  1.2429 +   */
  1.2430 +  inline UnicodeString& findAndReplace(int32_t start,
  1.2431 +                int32_t length,
  1.2432 +                const UnicodeString& oldText,
  1.2433 +                const UnicodeString& newText);
  1.2434 +
  1.2435 +  /**
  1.2436 +   * Replace all occurrences of characters in oldText in the range
  1.2437 +   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
  1.2438 +   * in newText in the range
  1.2439 +   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
  1.2440 +   * in the range [<TT>start</TT>, <TT>start + length</TT>).
  1.2441 +   * @param start the start of the range in which replace will performed
  1.2442 +   * @param length the length of the range in which replace will be performed
  1.2443 +   * @param oldText the text containing the search text
  1.2444 +   * @param oldStart the start of the search range in <TT>oldText</TT>
  1.2445 +   * @param oldLength the length of the search range in <TT>oldText</TT>
  1.2446 +   * @param newText the text containing the replacement text
  1.2447 +   * @param newStart the start of the replacement range in <TT>newText</TT>
  1.2448 +   * @param newLength the length of the replacement range in <TT>newText</TT>
  1.2449 +   * @return a reference to this
  1.2450 +   * @stable ICU 2.0
  1.2451 +   */
  1.2452 +  UnicodeString& findAndReplace(int32_t start,
  1.2453 +                int32_t length,
  1.2454 +                const UnicodeString& oldText,
  1.2455 +                int32_t oldStart,
  1.2456 +                int32_t oldLength,
  1.2457 +                const UnicodeString& newText,
  1.2458 +                int32_t newStart,
  1.2459 +                int32_t newLength);
  1.2460 +
  1.2461 +
  1.2462 +  /* Remove operations */
  1.2463 +
  1.2464 +  /**
  1.2465 +   * Remove all characters from the UnicodeString object.
  1.2466 +   * @return a reference to this
  1.2467 +   * @stable ICU 2.0
  1.2468 +   */
  1.2469 +  inline UnicodeString& remove(void);
  1.2470 +
  1.2471 +  /**
  1.2472 +   * Remove the characters in the range
  1.2473 +   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
  1.2474 +   * @param start the offset of the first character to remove
  1.2475 +   * @param length the number of characters to remove
  1.2476 +   * @return a reference to this
  1.2477 +   * @stable ICU 2.0
  1.2478 +   */
  1.2479 +  inline UnicodeString& remove(int32_t start,
  1.2480 +                               int32_t length = (int32_t)INT32_MAX);
  1.2481 +
  1.2482 +  /**
  1.2483 +   * Remove the characters in the range
  1.2484 +   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
  1.2485 +   * @param start the offset of the first character to remove
  1.2486 +   * @param limit the offset immediately following the range to remove
  1.2487 +   * @return a reference to this
  1.2488 +   * @stable ICU 2.0
  1.2489 +   */
  1.2490 +  inline UnicodeString& removeBetween(int32_t start,
  1.2491 +                                      int32_t limit = (int32_t)INT32_MAX);
  1.2492 +
  1.2493 +  /**
  1.2494 +   * Retain only the characters in the range
  1.2495 +   * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
  1.2496 +   * Removes characters before <code>start</code> and at and after <code>limit</code>.
  1.2497 +   * @param start the offset of the first character to retain
  1.2498 +   * @param limit the offset immediately following the range to retain
  1.2499 +   * @return a reference to this
  1.2500 +   * @stable ICU 4.4
  1.2501 +   */
  1.2502 +  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  1.2503 +
  1.2504 +  /* Length operations */
  1.2505 +
  1.2506 +  /**
  1.2507 +   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
  1.2508 +   * If the length of this UnicodeString is less than targetLength,
  1.2509 +   * length() - targetLength copies of padChar will be added to the
  1.2510 +   * beginning of this UnicodeString.
  1.2511 +   * @param targetLength the desired length of the string
  1.2512 +   * @param padChar the character to use for padding. Defaults to
  1.2513 +   * space (U+0020)
  1.2514 +   * @return TRUE if the text was padded, FALSE otherwise.
  1.2515 +   * @stable ICU 2.0
  1.2516 +   */
  1.2517 +  UBool padLeading(int32_t targetLength,
  1.2518 +                    UChar padChar = 0x0020);
  1.2519 +
  1.2520 +  /**
  1.2521 +   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
  1.2522 +   * If the length of this UnicodeString is less than targetLength,
  1.2523 +   * length() - targetLength copies of padChar will be added to the
  1.2524 +   * end of this UnicodeString.
  1.2525 +   * @param targetLength the desired length of the string
  1.2526 +   * @param padChar the character to use for padding. Defaults to
  1.2527 +   * space (U+0020)
  1.2528 +   * @return TRUE if the text was padded, FALSE otherwise.
  1.2529 +   * @stable ICU 2.0
  1.2530 +   */
  1.2531 +  UBool padTrailing(int32_t targetLength,
  1.2532 +                     UChar padChar = 0x0020);
  1.2533 +
  1.2534 +  /**
  1.2535 +   * Truncate this UnicodeString to the <TT>targetLength</TT>.
  1.2536 +   * @param targetLength the desired length of this UnicodeString.
  1.2537 +   * @return TRUE if the text was truncated, FALSE otherwise
  1.2538 +   * @stable ICU 2.0
  1.2539 +   */
  1.2540 +  inline UBool truncate(int32_t targetLength);
  1.2541 +
  1.2542 +  /**
  1.2543 +   * Trims leading and trailing whitespace from this UnicodeString.
  1.2544 +   * @return a reference to this
  1.2545 +   * @stable ICU 2.0
  1.2546 +   */
  1.2547 +  UnicodeString& trim(void);
  1.2548 +
  1.2549 +
  1.2550 +  /* Miscellaneous operations */
  1.2551 +
  1.2552 +  /**
  1.2553 +   * Reverse this UnicodeString in place.
  1.2554 +   * @return a reference to this
  1.2555 +   * @stable ICU 2.0
  1.2556 +   */
  1.2557 +  inline UnicodeString& reverse(void);
  1.2558 +
  1.2559 +  /**
  1.2560 +   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
  1.2561 +   * this UnicodeString.
  1.2562 +   * @param start the start of the range to reverse
  1.2563 +   * @param length the number of characters to to reverse
  1.2564 +   * @return a reference to this
  1.2565 +   * @stable ICU 2.0
  1.2566 +   */
  1.2567 +  inline UnicodeString& reverse(int32_t start,
  1.2568 +             int32_t length);
  1.2569 +
  1.2570 +  /**
  1.2571 +   * Convert the characters in this to UPPER CASE following the conventions of
  1.2572 +   * the default locale.
  1.2573 +   * @return A reference to this.
  1.2574 +   * @stable ICU 2.0
  1.2575 +   */
  1.2576 +  UnicodeString& toUpper(void);
  1.2577 +
  1.2578 +  /**
  1.2579 +   * Convert the characters in this to UPPER CASE following the conventions of
  1.2580 +   * a specific locale.
  1.2581 +   * @param locale The locale containing the conventions to use.
  1.2582 +   * @return A reference to this.
  1.2583 +   * @stable ICU 2.0
  1.2584 +   */
  1.2585 +  UnicodeString& toUpper(const Locale& locale);
  1.2586 +
  1.2587 +  /**
  1.2588 +   * Convert the characters in this to lower case following the conventions of
  1.2589 +   * the default locale.
  1.2590 +   * @return A reference to this.
  1.2591 +   * @stable ICU 2.0
  1.2592 +   */
  1.2593 +  UnicodeString& toLower(void);
  1.2594 +
  1.2595 +  /**
  1.2596 +   * Convert the characters in this to lower case following the conventions of
  1.2597 +   * a specific locale.
  1.2598 +   * @param locale The locale containing the conventions to use.
  1.2599 +   * @return A reference to this.
  1.2600 +   * @stable ICU 2.0
  1.2601 +   */
  1.2602 +  UnicodeString& toLower(const Locale& locale);
  1.2603 +
  1.2604 +#if !UCONFIG_NO_BREAK_ITERATION
  1.2605 +
  1.2606 +  /**
  1.2607 +   * Titlecase this string, convenience function using the default locale.
  1.2608 +   *
  1.2609 +   * Casing is locale-dependent and context-sensitive.
  1.2610 +   * Titlecasing uses a break iterator to find the first characters of words
  1.2611 +   * that are to be titlecased. It titlecases those characters and lowercases
  1.2612 +   * all others.
  1.2613 +   *
  1.2614 +   * The titlecase break iterator can be provided to customize for arbitrary
  1.2615 +   * styles, using rules and dictionaries beyond the standard iterators.
  1.2616 +   * It may be more efficient to always provide an iterator to avoid
  1.2617 +   * opening and closing one for each string.
  1.2618 +   * The standard titlecase iterator for the root locale implements the
  1.2619 +   * algorithm of Unicode TR 21.
  1.2620 +   *
  1.2621 +   * This function uses only the setText(), first() and next() methods of the
  1.2622 +   * provided break iterator.
  1.2623 +   *
  1.2624 +   * @param titleIter A break iterator to find the first characters of words
  1.2625 +   *                  that are to be titlecased.
  1.2626 +   *                  If none is provided (0), then a standard titlecase
  1.2627 +   *                  break iterator is opened.
  1.2628 +   *                  Otherwise the provided iterator is set to the string's text.
  1.2629 +   * @return A reference to this.
  1.2630 +   * @stable ICU 2.1
  1.2631 +   */
  1.2632 +  UnicodeString &toTitle(BreakIterator *titleIter);
  1.2633 +
  1.2634 +  /**
  1.2635 +   * Titlecase this string.
  1.2636 +   *
  1.2637 +   * Casing is locale-dependent and context-sensitive.
  1.2638 +   * Titlecasing uses a break iterator to find the first characters of words
  1.2639 +   * that are to be titlecased. It titlecases those characters and lowercases
  1.2640 +   * all others.
  1.2641 +   *
  1.2642 +   * The titlecase break iterator can be provided to customize for arbitrary
  1.2643 +   * styles, using rules and dictionaries beyond the standard iterators.
  1.2644 +   * It may be more efficient to always provide an iterator to avoid
  1.2645 +   * opening and closing one for each string.
  1.2646 +   * The standard titlecase iterator for the root locale implements the
  1.2647 +   * algorithm of Unicode TR 21.
  1.2648 +   *
  1.2649 +   * This function uses only the setText(), first() and next() methods of the
  1.2650 +   * provided break iterator.
  1.2651 +   *
  1.2652 +   * @param titleIter A break iterator to find the first characters of words
  1.2653 +   *                  that are to be titlecased.
  1.2654 +   *                  If none is provided (0), then a standard titlecase
  1.2655 +   *                  break iterator is opened.
  1.2656 +   *                  Otherwise the provided iterator is set to the string's text.
  1.2657 +   * @param locale    The locale to consider.
  1.2658 +   * @return A reference to this.
  1.2659 +   * @stable ICU 2.1
  1.2660 +   */
  1.2661 +  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  1.2662 +
  1.2663 +  /**
  1.2664 +   * Titlecase this string, with options.
  1.2665 +   *
  1.2666 +   * Casing is locale-dependent and context-sensitive.
  1.2667 +   * Titlecasing uses a break iterator to find the first characters of words
  1.2668 +   * that are to be titlecased. It titlecases those characters and lowercases
  1.2669 +   * all others. (This can be modified with options.)
  1.2670 +   *
  1.2671 +   * The titlecase break iterator can be provided to customize for arbitrary
  1.2672 +   * styles, using rules and dictionaries beyond the standard iterators.
  1.2673 +   * It may be more efficient to always provide an iterator to avoid
  1.2674 +   * opening and closing one for each string.
  1.2675 +   * The standard titlecase iterator for the root locale implements the
  1.2676 +   * algorithm of Unicode TR 21.
  1.2677 +   *
  1.2678 +   * This function uses only the setText(), first() and next() methods of the
  1.2679 +   * provided break iterator.
  1.2680 +   *
  1.2681 +   * @param titleIter A break iterator to find the first characters of words
  1.2682 +   *                  that are to be titlecased.
  1.2683 +   *                  If none is provided (0), then a standard titlecase
  1.2684 +   *                  break iterator is opened.
  1.2685 +   *                  Otherwise the provided iterator is set to the string's text.
  1.2686 +   * @param locale    The locale to consider.
  1.2687 +   * @param options Options bit set, see ucasemap_open().
  1.2688 +   * @return A reference to this.
  1.2689 +   * @see U_TITLECASE_NO_LOWERCASE
  1.2690 +   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  1.2691 +   * @see ucasemap_open
  1.2692 +   * @stable ICU 3.8
  1.2693 +   */
  1.2694 +  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  1.2695 +
  1.2696 +#endif
  1.2697 +
  1.2698 +  /**
  1.2699 +   * Case-folds the characters in this string.
  1.2700 +   *
  1.2701 +   * Case-folding is locale-independent and not context-sensitive,
  1.2702 +   * but there is an option for whether to include or exclude mappings for dotted I
  1.2703 +   * and dotless i that are marked with 'T' in CaseFolding.txt.
  1.2704 +   *
  1.2705 +   * The result may be longer or shorter than the original.
  1.2706 +   *
  1.2707 +   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  1.2708 +   * @return A reference to this.
  1.2709 +   * @stable ICU 2.0
  1.2710 +   */
  1.2711 +  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  1.2712 +
  1.2713 +  //========================================
  1.2714 +  // Access to the internal buffer
  1.2715 +  //========================================
  1.2716 +
  1.2717 +  /**
  1.2718 +   * Get a read/write pointer to the internal buffer.
  1.2719 +   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
  1.2720 +   * writable, and is still owned by the UnicodeString object.
  1.2721 +   * Calls to getBuffer(minCapacity) must not be nested, and
  1.2722 +   * must be matched with calls to releaseBuffer(newLength).
  1.2723 +   * If the string buffer was read-only or shared,
  1.2724 +   * then it will be reallocated and copied.
  1.2725 +   *
  1.2726 +   * An attempted nested call will return 0, and will not further modify the
  1.2727 +   * state of the UnicodeString object.
  1.2728 +   * It also returns 0 if the string is bogus.
  1.2729 +   *
  1.2730 +   * The actual capacity of the string buffer may be larger than minCapacity.
  1.2731 +   * getCapacity() returns the actual capacity.
  1.2732 +   * For many operations, the full capacity should be used to avoid reallocations.
  1.2733 +   *
  1.2734 +   * While the buffer is "open" between getBuffer(minCapacity)
  1.2735 +   * and releaseBuffer(newLength), the following applies:
  1.2736 +   * - The string length is set to 0.
  1.2737 +   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  1.2738 +   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  1.2739 +   * - You can read from and write to the returned buffer.
  1.2740 +   * - The previous string contents will still be in the buffer;
  1.2741 +   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
  1.2742 +   *   If the length() was greater than minCapacity, then any contents after minCapacity
  1.2743 +   *   may be lost.
  1.2744 +   *   The buffer contents is not NUL-terminated by getBuffer().
  1.2745 +   *   If length()<getCapacity() then you can terminate it by writing a NUL
  1.2746 +   *   at index length().
  1.2747 +   * - You must call releaseBuffer(newLength) before and in order to
  1.2748 +   *   return to normal UnicodeString operation.
  1.2749 +   *
  1.2750 +   * @param minCapacity the minimum number of UChars that are to be available
  1.2751 +   *        in the buffer, starting at the returned pointer;
  1.2752 +   *        default to the current string capacity if minCapacity==-1
  1.2753 +   * @return a writable pointer to the internal string buffer,
  1.2754 +   *         or 0 if an error occurs (nested calls, out of memory)
  1.2755 +   *
  1.2756 +   * @see releaseBuffer
  1.2757 +   * @see getTerminatedBuffer()
  1.2758 +   * @stable ICU 2.0
  1.2759 +   */
  1.2760 +  UChar *getBuffer(int32_t minCapacity);
  1.2761 +
  1.2762 +  /**
  1.2763 +   * Release a read/write buffer on a UnicodeString object with an
  1.2764 +   * "open" getBuffer(minCapacity).
  1.2765 +   * This function must be called in a matched pair with getBuffer(minCapacity).
  1.2766 +   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  1.2767 +   *
  1.2768 +   * It will set the string length to newLength, at most to the current capacity.
  1.2769 +   * If newLength==-1 then it will set the length according to the
  1.2770 +   * first NUL in the buffer, or to the capacity if there is no NUL.
  1.2771 +   *
  1.2772 +   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  1.2773 +   *
  1.2774 +   * @param newLength the new length of the UnicodeString object;
  1.2775 +   *        defaults to the current capacity if newLength is greater than that;
  1.2776 +   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
  1.2777 +   *        the current capacity of the string
  1.2778 +   *
  1.2779 +   * @see getBuffer(int32_t minCapacity)
  1.2780 +   * @stable ICU 2.0
  1.2781 +   */
  1.2782 +  void releaseBuffer(int32_t newLength=-1);
  1.2783 +
  1.2784 +  /**
  1.2785 +   * Get a read-only pointer to the internal buffer.
  1.2786 +   * This can be called at any time on a valid UnicodeString.
  1.2787 +   *
  1.2788 +   * It returns 0 if the string is bogus, or
  1.2789 +   * during an "open" getBuffer(minCapacity).
  1.2790 +   *
  1.2791 +   * It can be called as many times as desired.
  1.2792 +   * The pointer that it returns will remain valid until the UnicodeString object is modified,
  1.2793 +   * at which time the pointer is semantically invalidated and must not be used any more.
  1.2794 +   *
  1.2795 +   * The capacity of the buffer can be determined with getCapacity().
  1.2796 +   * The part after length() may or may not be initialized and valid,
  1.2797 +   * depending on the history of the UnicodeString object.
  1.2798 +   *
  1.2799 +   * The buffer contents is (probably) not NUL-terminated.
  1.2800 +   * You can check if it is with
  1.2801 +   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
  1.2802 +   * (See getTerminatedBuffer().)
  1.2803 +   *
  1.2804 +   * The buffer may reside in read-only memory. Its contents must not
  1.2805 +   * be modified.
  1.2806 +   *
  1.2807 +   * @return a read-only pointer to the internal string buffer,
  1.2808 +   *         or 0 if the string is empty or bogus
  1.2809 +   *
  1.2810 +   * @see getBuffer(int32_t minCapacity)
  1.2811 +   * @see getTerminatedBuffer()
  1.2812 +   * @stable ICU 2.0
  1.2813 +   */
  1.2814 +  inline const UChar *getBuffer() const;
  1.2815 +
  1.2816 +  /**
  1.2817 +   * Get a read-only pointer to the internal buffer,
  1.2818 +   * making sure that it is NUL-terminated.
  1.2819 +   * This can be called at any time on a valid UnicodeString.
  1.2820 +   *
  1.2821 +   * It returns 0 if the string is bogus, or
  1.2822 +   * during an "open" getBuffer(minCapacity), or if the buffer cannot
  1.2823 +   * be NUL-terminated (because memory allocation failed).
  1.2824 +   *
  1.2825 +   * It can be called as many times as desired.
  1.2826 +   * The pointer that it returns will remain valid until the UnicodeString object is modified,
  1.2827 +   * at which time the pointer is semantically invalidated and must not be used any more.
  1.2828 +   *
  1.2829 +   * The capacity of the buffer can be determined with getCapacity().
  1.2830 +   * The part after length()+1 may or may not be initialized and valid,
  1.2831 +   * depending on the history of the UnicodeString object.
  1.2832 +   *
  1.2833 +   * The buffer contents is guaranteed to be NUL-terminated.
  1.2834 +   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  1.2835 +   * is written.
  1.2836 +   * For this reason, this function is not const, unlike getBuffer().
  1.2837 +   * Note that a UnicodeString may also contain NUL characters as part of its contents.
  1.2838 +   *
  1.2839 +   * The buffer may reside in read-only memory. Its contents must not
  1.2840 +   * be modified.
  1.2841 +   *
  1.2842 +   * @return a read-only pointer to the internal string buffer,
  1.2843 +   *         or 0 if the string is empty or bogus
  1.2844 +   *
  1.2845 +   * @see getBuffer(int32_t minCapacity)
  1.2846 +   * @see getBuffer()
  1.2847 +   * @stable ICU 2.2
  1.2848 +   */
  1.2849 +  const UChar *getTerminatedBuffer();
  1.2850 +
  1.2851 +  //========================================
  1.2852 +  // Constructors
  1.2853 +  //========================================
  1.2854 +
  1.2855 +  /** Construct an empty UnicodeString.
  1.2856 +   * @stable ICU 2.0
  1.2857 +   */
  1.2858 +  inline UnicodeString();
  1.2859 +
  1.2860 +  /**
  1.2861 +   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
  1.2862 +   * @param capacity the number of UChars this UnicodeString should hold
  1.2863 +   * before a resize is necessary; if count is greater than 0 and count
  1.2864 +   * code points c take up more space than capacity, then capacity is adjusted
  1.2865 +   * accordingly.
  1.2866 +   * @param c is used to initially fill the string
  1.2867 +   * @param count specifies how many code points c are to be written in the
  1.2868 +   *              string
  1.2869 +   * @stable ICU 2.0
  1.2870 +   */
  1.2871 +  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  1.2872 +
  1.2873 +  /**
  1.2874 +   * Single UChar (code unit) constructor.
  1.2875 +   *
  1.2876 +   * It is recommended to mark this constructor "explicit" by
  1.2877 +   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  1.2878 +   * on the compiler command line or similar.
  1.2879 +   * @param ch the character to place in the UnicodeString
  1.2880 +   * @stable ICU 2.0
  1.2881 +   */
  1.2882 +  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
  1.2883 +
  1.2884 +  /**
  1.2885 +   * Single UChar32 (code point) constructor.
  1.2886 +   *
  1.2887 +   * It is recommended to mark this constructor "explicit" by
  1.2888 +   * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  1.2889 +   * on the compiler command line or similar.
  1.2890 +   * @param ch the character to place in the UnicodeString
  1.2891 +   * @stable ICU 2.0
  1.2892 +   */
  1.2893 +  UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  1.2894 +
  1.2895 +  /**
  1.2896 +   * UChar* constructor.
  1.2897 +   *
  1.2898 +   * It is recommended to mark this constructor "explicit" by
  1.2899 +   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  1.2900 +   * on the compiler command line or similar.
  1.2901 +   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
  1.2902 +   * must be NULL (U+0000) terminated.
  1.2903 +   * @stable ICU 2.0
  1.2904 +   */
  1.2905 +  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
  1.2906 +
  1.2907 +  /**
  1.2908 +   * UChar* constructor.
  1.2909 +   * @param text The characters to place in the UnicodeString.
  1.2910 +   * @param textLength The number of Unicode characters in <TT>text</TT>
  1.2911 +   * to copy.
  1.2912 +   * @stable ICU 2.0
  1.2913 +   */
  1.2914 +  UnicodeString(const UChar *text,
  1.2915 +        int32_t textLength);
  1.2916 +
  1.2917 +  /**
  1.2918 +   * Readonly-aliasing UChar* constructor.
  1.2919 +   * The text will be used for the UnicodeString object, but
  1.2920 +   * it will not be released when the UnicodeString is destroyed.
  1.2921 +   * This has copy-on-write semantics:
  1.2922 +   * When the string is modified, then the buffer is first copied into
  1.2923 +   * newly allocated memory.
  1.2924 +   * The aliased buffer is never modified.
  1.2925 +   *
  1.2926 +   * In an assignment to another UnicodeString, when using the copy constructor
  1.2927 +   * or the assignment operator, the text will be copied.
  1.2928 +   * When using fastCopyFrom(), the text will be aliased again,
  1.2929 +   * so that both strings then alias the same readonly-text.
  1.2930 +   *
  1.2931 +   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1.2932 +   *                     This must be true if <code>textLength==-1</code>.
  1.2933 +   * @param text The characters to alias for the UnicodeString.
  1.2934 +   * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1.2935 +   *                   If -1, then this constructor will determine the length
  1.2936 +   *                   by calling <code>u_strlen()</code>.
  1.2937 +   * @stable ICU 2.0
  1.2938 +   */
  1.2939 +  UnicodeString(UBool isTerminated,
  1.2940 +                const UChar *text,
  1.2941 +                int32_t textLength);
  1.2942 +
  1.2943 +  /**
  1.2944 +   * Writable-aliasing UChar* constructor.
  1.2945 +   * The text will be used for the UnicodeString object, but
  1.2946 +   * it will not be released when the UnicodeString is destroyed.
  1.2947 +   * This has write-through semantics:
  1.2948 +   * For as long as the capacity of the buffer is sufficient, write operations
  1.2949 +   * will directly affect the buffer. When more capacity is necessary, then
  1.2950 +   * a new buffer will be allocated and the contents copied as with regularly
  1.2951 +   * constructed strings.
  1.2952 +   * In an assignment to another UnicodeString, the buffer will be copied.
  1.2953 +   * The extract(UChar *dst) function detects whether the dst pointer is the same
  1.2954 +   * as the string buffer itself and will in this case not copy the contents.
  1.2955 +   *
  1.2956 +   * @param buffer The characters to alias for the UnicodeString.
  1.2957 +   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1.2958 +   * @param buffCapacity The size of <code>buffer</code> in UChars.
  1.2959 +   * @stable ICU 2.0
  1.2960 +   */
  1.2961 +  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  1.2962 +
  1.2963 +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1.2964 +
  1.2965 +  /**
  1.2966 +   * char* constructor.
  1.2967 +   * Uses the default converter (and thus depends on the ICU conversion code)
  1.2968 +   * unless U_CHARSET_IS_UTF8 is set to 1.
  1.2969 +   *
  1.2970 +   * For ASCII (really "invariant character") strings it is more efficient to use
  1.2971 +   * the constructor that takes a US_INV (for its enum EInvariant).
  1.2972 +   * For ASCII (invariant-character) string literals, see UNICODE_STRING and
  1.2973 +   * UNICODE_STRING_SIMPLE.
  1.2974 +   *
  1.2975 +   * It is recommended to mark this constructor "explicit" by
  1.2976 +   * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  1.2977 +   * on the compiler command line or similar.
  1.2978 +   * @param codepageData an array of bytes, null-terminated,
  1.2979 +   *                     in the platform's default codepage.
  1.2980 +   * @stable ICU 2.0
  1.2981 +   * @see UNICODE_STRING
  1.2982 +   * @see UNICODE_STRING_SIMPLE
  1.2983 +   */
  1.2984 +  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  1.2985 +
  1.2986 +  /**
  1.2987 +   * char* constructor.
  1.2988 +   * Uses the default converter (and thus depends on the ICU conversion code)
  1.2989 +   * unless U_CHARSET_IS_UTF8 is set to 1.
  1.2990 +   * @param codepageData an array of bytes in the platform's default codepage.
  1.2991 +   * @param dataLength The number of bytes in <TT>codepageData</TT>.
  1.2992 +   * @stable ICU 2.0
  1.2993 +   */
  1.2994 +  UnicodeString(const char *codepageData, int32_t dataLength);
  1.2995 +
  1.2996 +#endif
  1.2997 +
  1.2998 +#if !UCONFIG_NO_CONVERSION
  1.2999 +
  1.3000 +  /**
  1.3001 +   * char* constructor.
  1.3002 +   * @param codepageData an array of bytes, null-terminated
  1.3003 +   * @param codepage the encoding of <TT>codepageData</TT>.  The special
  1.3004 +   * value 0 for <TT>codepage</TT> indicates that the text is in the
  1.3005 +   * platform's default codepage.
  1.3006 +   *
  1.3007 +   * If <code>codepage</code> is an empty string (<code>""</code>),
  1.3008 +   * then a simple conversion is performed on the codepage-invariant
  1.3009 +   * subset ("invariant characters") of the platform encoding. See utypes.h.
  1.3010 +   * Recommendation: For invariant-character strings use the constructor
  1.3011 +   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  1.3012 +   * because it avoids object code dependencies of UnicodeString on
  1.3013 +   * the conversion code.
  1.3014 +   *
  1.3015 +   * @stable ICU 2.0
  1.3016 +   */
  1.3017 +  UnicodeString(const char *codepageData, const char *codepage);
  1.3018 +
  1.3019 +  /**
  1.3020 +   * char* constructor.
  1.3021 +   * @param codepageData an array of bytes.
  1.3022 +   * @param dataLength The number of bytes in <TT>codepageData</TT>.
  1.3023 +   * @param codepage the encoding of <TT>codepageData</TT>.  The special
  1.3024 +   * value 0 for <TT>codepage</TT> indicates that the text is in the
  1.3025 +   * platform's default codepage.
  1.3026 +   * If <code>codepage</code> is an empty string (<code>""</code>),
  1.3027 +   * then a simple conversion is performed on the codepage-invariant
  1.3028 +   * subset ("invariant characters") of the platform encoding. See utypes.h.
  1.3029 +   * Recommendation: For invariant-character strings use the constructor
  1.3030 +   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  1.3031 +   * because it avoids object code dependencies of UnicodeString on
  1.3032 +   * the conversion code.
  1.3033 +   *
  1.3034 +   * @stable ICU 2.0
  1.3035 +   */
  1.3036 +  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  1.3037 +
  1.3038 +  /**
  1.3039 +   * char * / UConverter constructor.
  1.3040 +   * This constructor uses an existing UConverter object to
  1.3041 +   * convert the codepage string to Unicode and construct a UnicodeString
  1.3042 +   * from that.
  1.3043 +   *
  1.3044 +   * The converter is reset at first.
  1.3045 +   * If the error code indicates a failure before this constructor is called,
  1.3046 +   * or if an error occurs during conversion or construction,
  1.3047 +   * then the string will be bogus.
  1.3048 +   *
  1.3049 +   * This function avoids the overhead of opening and closing a converter if
  1.3050 +   * multiple strings are constructed.
  1.3051 +   *
  1.3052 +   * @param src input codepage string
  1.3053 +   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  1.3054 +   * @param cnv converter object (ucnv_resetToUnicode() will be called),
  1.3055 +   *        can be NULL for the default converter
  1.3056 +   * @param errorCode normal ICU error code
  1.3057 +   * @stable ICU 2.0
  1.3058 +   */
  1.3059 +  UnicodeString(
  1.3060 +        const char *src, int32_t srcLength,
  1.3061 +        UConverter *cnv,
  1.3062 +        UErrorCode &errorCode);
  1.3063 +
  1.3064 +#endif
  1.3065 +
  1.3066 +  /**
  1.3067 +   * Constructs a Unicode string from an invariant-character char * string.
  1.3068 +   * About invariant characters see utypes.h.
  1.3069 +   * This constructor has no runtime dependency on conversion code and is
  1.3070 +   * therefore recommended over ones taking a charset name string
  1.3071 +   * (where the empty string "" indicates invariant-character conversion).
  1.3072 +   *
  1.3073 +   * Use the macro US_INV as the third, signature-distinguishing parameter.
  1.3074 +   *
  1.3075 +   * For example:
  1.3076 +   * \code
  1.3077 +   * void fn(const char *s) {
  1.3078 +   *   UnicodeString ustr(s, -1, US_INV);
  1.3079 +   *   // use ustr ...
  1.3080 +   * }
  1.3081 +   * \endcode
  1.3082 +   *
  1.3083 +   * @param src String using only invariant characters.
  1.3084 +   * @param length Length of src, or -1 if NUL-terminated.
  1.3085 +   * @param inv Signature-distinguishing paramater, use US_INV.
  1.3086 +   *
  1.3087 +   * @see US_INV
  1.3088 +   * @stable ICU 3.2
  1.3089 +   */
  1.3090 +  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  1.3091 +
  1.3092 +
  1.3093 +  /**
  1.3094 +   * Copy constructor.
  1.3095 +   * @param that The UnicodeString object to copy.
  1.3096 +   * @stable ICU 2.0
  1.3097 +   */
  1.3098 +  UnicodeString(const UnicodeString& that);
  1.3099 +
  1.3100 +  /**
  1.3101 +   * 'Substring' constructor from tail of source string.
  1.3102 +   * @param src The UnicodeString object to copy.
  1.3103 +   * @param srcStart The offset into <tt>src</tt> at which to start copying.
  1.3104 +   * @stable ICU 2.2
  1.3105 +   */
  1.3106 +  UnicodeString(const UnicodeString& src, int32_t srcStart);
  1.3107 +
  1.3108 +  /**
  1.3109 +   * 'Substring' constructor from subrange of source string.
  1.3110 +   * @param src The UnicodeString object to copy.
  1.3111 +   * @param srcStart The offset into <tt>src</tt> at which to start copying.
  1.3112 +   * @param srcLength The number of characters from <tt>src</tt> to copy.
  1.3113 +   * @stable ICU 2.2
  1.3114 +   */
  1.3115 +  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  1.3116 +
  1.3117 +  /**
  1.3118 +   * Clone this object, an instance of a subclass of Replaceable.
  1.3119 +   * Clones can be used concurrently in multiple threads.
  1.3120 +   * If a subclass does not implement clone(), or if an error occurs,
  1.3121 +   * then NULL is returned.
  1.3122 +   * The clone functions in all subclasses return a pointer to a Replaceable
  1.3123 +   * because some compilers do not support covariant (same-as-this)
  1.3124 +   * return types; cast to the appropriate subclass if necessary.
  1.3125 +   * The caller must delete the clone.
  1.3126 +   *
  1.3127 +   * @return a clone of this object
  1.3128 +   *
  1.3129 +   * @see Replaceable::clone
  1.3130 +   * @see getDynamicClassID
  1.3131 +   * @stable ICU 2.6
  1.3132 +   */
  1.3133 +  virtual Replaceable *clone() const;
  1.3134 +
  1.3135 +  /** Destructor.
  1.3136 +   * @stable ICU 2.0
  1.3137 +   */
  1.3138 +  virtual ~UnicodeString();
  1.3139 +
  1.3140 +  /**
  1.3141 +   * Create a UnicodeString from a UTF-8 string.
  1.3142 +   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  1.3143 +   * Calls u_strFromUTF8WithSub().
  1.3144 +   *
  1.3145 +   * @param utf8 UTF-8 input string.
  1.3146 +   *             Note that a StringPiece can be implicitly constructed
  1.3147 +   *             from a std::string or a NUL-terminated const char * string.
  1.3148 +   * @return A UnicodeString with equivalent UTF-16 contents.
  1.3149 +   * @see toUTF8
  1.3150 +   * @see toUTF8String
  1.3151 +   * @stable ICU 4.2
  1.3152 +   */
  1.3153 +  static UnicodeString fromUTF8(const StringPiece &utf8);
  1.3154 +
  1.3155 +  /**
  1.3156 +   * Create a UnicodeString from a UTF-32 string.
  1.3157 +   * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  1.3158 +   * Calls u_strFromUTF32WithSub().
  1.3159 +   *
  1.3160 +   * @param utf32 UTF-32 input string. Must not be NULL.
  1.3161 +   * @param length Length of the input string, or -1 if NUL-terminated.
  1.3162 +   * @return A UnicodeString with equivalent UTF-16 contents.
  1.3163 +   * @see toUTF32
  1.3164 +   * @stable ICU 4.2
  1.3165 +   */
  1.3166 +  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  1.3167 +
  1.3168 +  /* Miscellaneous operations */
  1.3169 +
  1.3170 +  /**
  1.3171 +   * Unescape a string of characters and return a string containing
  1.3172 +   * the result.  The following escape sequences are recognized:
  1.3173 +   *
  1.3174 +   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
  1.3175 +   * \\Uhhhhhhhh   8 hex digits
  1.3176 +   * \\xhh         1-2 hex digits
  1.3177 +   * \\ooo         1-3 octal digits; o in [0-7]
  1.3178 +   * \\cX          control-X; X is masked with 0x1F
  1.3179 +   *
  1.3180 +   * as well as the standard ANSI C escapes:
  1.3181 +   *
  1.3182 +   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  1.3183 +   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  1.3184 +   * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  1.3185 +   *
  1.3186 +   * Anything else following a backslash is generically escaped.  For
  1.3187 +   * example, "[a\\-z]" returns "[a-z]".
  1.3188 +   *
  1.3189 +   * If an escape sequence is ill-formed, this method returns an empty
  1.3190 +   * string.  An example of an ill-formed sequence is "\\u" followed by
  1.3191 +   * fewer than 4 hex digits.
  1.3192 +   *
  1.3193 +   * This function is similar to u_unescape() but not identical to it.
  1.3194 +   * The latter takes a source char*, so it does escape recognition
  1.3195 +   * and also invariant conversion.
  1.3196 +   *
  1.3197 +   * @return a string with backslash escapes interpreted, or an
  1.3198 +   * empty string on error.
  1.3199 +   * @see UnicodeString#unescapeAt()
  1.3200 +   * @see u_unescape()
  1.3201 +   * @see u_unescapeAt()
  1.3202 +   * @stable ICU 2.0
  1.3203 +   */
  1.3204 +  UnicodeString unescape() const;
  1.3205 +
  1.3206 +  /**
  1.3207 +   * Unescape a single escape sequence and return the represented
  1.3208 +   * character.  See unescape() for a listing of the recognized escape
  1.3209 +   * sequences.  The character at offset-1 is assumed (without
  1.3210 +   * checking) to be a backslash.  If the escape sequence is
  1.3211 +   * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  1.3212 +   * returned.
  1.3213 +   *
  1.3214 +   * @param offset an input output parameter.  On input, it is the
  1.3215 +   * offset into this string where the escape sequence is located,
  1.3216 +   * after the initial backslash.  On output, it is advanced after the
  1.3217 +   * last character parsed.  On error, it is not advanced at all.
  1.3218 +   * @return the character represented by the escape sequence at
  1.3219 +   * offset, or U_SENTINEL=-1 on error.
  1.3220 +   * @see UnicodeString#unescape()
  1.3221 +   * @see u_unescape()
  1.3222 +   * @see u_unescapeAt()
  1.3223 +   * @stable ICU 2.0
  1.3224 +   */
  1.3225 +  UChar32 unescapeAt(int32_t &offset) const;
  1.3226 +
  1.3227 +  /**
  1.3228 +   * ICU "poor man's RTTI", returns a UClassID for this class.
  1.3229 +   *
  1.3230 +   * @stable ICU 2.2
  1.3231 +   */
  1.3232 +  static UClassID U_EXPORT2 getStaticClassID();
  1.3233 +
  1.3234 +  /**
  1.3235 +   * ICU "poor man's RTTI", returns a UClassID for the actual class.
  1.3236 +   *
  1.3237 +   * @stable ICU 2.2
  1.3238 +   */
  1.3239 +  virtual UClassID getDynamicClassID() const;
  1.3240 +
  1.3241 +  //========================================
  1.3242 +  // Implementation methods
  1.3243 +  //========================================
  1.3244 +
  1.3245 +protected:
  1.3246 +  /**
  1.3247 +   * Implement Replaceable::getLength() (see jitterbug 1027).
  1.3248 +   * @stable ICU 2.4
  1.3249 +   */
  1.3250 +  virtual int32_t getLength() const;
  1.3251 +
  1.3252 +  /**
  1.3253 +   * The change in Replaceable to use virtual getCharAt() allows
  1.3254 +   * UnicodeString::charAt() to be inline again (see jitterbug 709).
  1.3255 +   * @stable ICU 2.4
  1.3256 +   */
  1.3257 +  virtual UChar getCharAt(int32_t offset) const;
  1.3258 +
  1.3259 +  /**
  1.3260 +   * The change in Replaceable to use virtual getChar32At() allows
  1.3261 +   * UnicodeString::char32At() to be inline again (see jitterbug 709).
  1.3262 +   * @stable ICU 2.4
  1.3263 +   */
  1.3264 +  virtual UChar32 getChar32At(int32_t offset) const;
  1.3265 +
  1.3266 +private:
  1.3267 +  // For char* constructors. Could be made public.
  1.3268 +  UnicodeString &setToUTF8(const StringPiece &utf8);
  1.3269 +  // For extract(char*).
  1.3270 +  // We could make a toUTF8(target, capacity, errorCode) public but not
  1.3271 +  // this version: New API will be cleaner if we make callers create substrings
  1.3272 +  // rather than having start+length on every method,
  1.3273 +  // and it should take a UErrorCode&.
  1.3274 +  int32_t
  1.3275 +  toUTF8(int32_t start, int32_t len,
  1.3276 +         char *target, int32_t capacity) const;
  1.3277 +
  1.3278 +  /**
  1.3279 +   * Internal string contents comparison, called by operator==.
  1.3280 +   * Requires: this & text not bogus and have same lengths.
  1.3281 +   */
  1.3282 +  UBool doEquals(const UnicodeString &text, int32_t len) const;
  1.3283 +
  1.3284 +  inline int8_t
  1.3285 +  doCompare(int32_t start,
  1.3286 +           int32_t length,
  1.3287 +           const UnicodeString& srcText,
  1.3288 +           int32_t srcStart,
  1.3289 +           int32_t srcLength) const;
  1.3290 +
  1.3291 +  int8_t doCompare(int32_t start,
  1.3292 +           int32_t length,
  1.3293 +           const UChar *srcChars,
  1.3294 +           int32_t srcStart,
  1.3295 +           int32_t srcLength) const;
  1.3296 +
  1.3297 +  inline int8_t
  1.3298 +  doCompareCodePointOrder(int32_t start,
  1.3299 +                          int32_t length,
  1.3300 +                          const UnicodeString& srcText,
  1.3301 +                          int32_t srcStart,
  1.3302 +                          int32_t srcLength) const;
  1.3303 +
  1.3304 +  int8_t doCompareCodePointOrder(int32_t start,
  1.3305 +                                 int32_t length,
  1.3306 +                                 const UChar *srcChars,
  1.3307 +                                 int32_t srcStart,
  1.3308 +                                 int32_t srcLength) const;
  1.3309 +
  1.3310 +  inline int8_t
  1.3311 +  doCaseCompare(int32_t start,
  1.3312 +                int32_t length,
  1.3313 +                const UnicodeString &srcText,
  1.3314 +                int32_t srcStart,
  1.3315 +                int32_t srcLength,
  1.3316 +                uint32_t options) const;
  1.3317 +
  1.3318 +  int8_t
  1.3319 +  doCaseCompare(int32_t start,
  1.3320 +                int32_t length,
  1.3321 +                const UChar *srcChars,
  1.3322 +                int32_t srcStart,
  1.3323 +                int32_t srcLength,
  1.3324 +                uint32_t options) const;
  1.3325 +
  1.3326 +  int32_t doIndexOf(UChar c,
  1.3327 +            int32_t start,
  1.3328 +            int32_t length) const;
  1.3329 +
  1.3330 +  int32_t doIndexOf(UChar32 c,
  1.3331 +                        int32_t start,
  1.3332 +                        int32_t length) const;
  1.3333 +
  1.3334 +  int32_t doLastIndexOf(UChar c,
  1.3335 +                int32_t start,
  1.3336 +                int32_t length) const;
  1.3337 +
  1.3338 +  int32_t doLastIndexOf(UChar32 c,
  1.3339 +                            int32_t start,
  1.3340 +                            int32_t length) const;
  1.3341 +
  1.3342 +  void doExtract(int32_t start,
  1.3343 +         int32_t length,
  1.3344 +         UChar *dst,
  1.3345 +         int32_t dstStart) const;
  1.3346 +
  1.3347 +  inline void doExtract(int32_t start,
  1.3348 +         int32_t length,
  1.3349 +         UnicodeString& target) const;
  1.3350 +
  1.3351 +  inline UChar doCharAt(int32_t offset)  const;
  1.3352 +
  1.3353 +  UnicodeString& doReplace(int32_t start,
  1.3354 +               int32_t length,
  1.3355 +               const UnicodeString& srcText,
  1.3356 +               int32_t srcStart,
  1.3357 +               int32_t srcLength);
  1.3358 +
  1.3359 +  UnicodeString& doReplace(int32_t start,
  1.3360 +               int32_t length,
  1.3361 +               const UChar *srcChars,
  1.3362 +               int32_t srcStart,
  1.3363 +               int32_t srcLength);
  1.3364 +
  1.3365 +  UnicodeString& doReverse(int32_t start,
  1.3366 +               int32_t length);
  1.3367 +
  1.3368 +  // calculate hash code
  1.3369 +  int32_t doHashCode(void) const;
  1.3370 +
  1.3371 +  // get pointer to start of array
  1.3372 +  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  1.3373 +  inline UChar* getArrayStart(void);
  1.3374 +  inline const UChar* getArrayStart(void) const;
  1.3375 +
  1.3376 +  // A UnicodeString object (not necessarily its current buffer)
  1.3377 +  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  1.3378 +  inline UBool isWritable() const;
  1.3379 +
  1.3380 +  // Is the current buffer writable?
  1.3381 +  inline UBool isBufferWritable() const;
  1.3382 +
  1.3383 +  // None of the following does releaseArray().
  1.3384 +  inline void setLength(int32_t len);        // sets only fShortLength and fLength
  1.3385 +  inline void setToEmpty();                  // sets fFlags=kShortString
  1.3386 +  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
  1.3387 +
  1.3388 +  // allocate the array; result may be fStackBuffer
  1.3389 +  // sets refCount to 1 if appropriate
  1.3390 +  // sets fArray, fCapacity, and fFlags
  1.3391 +  // returns boolean for success or failure
  1.3392 +  UBool allocate(int32_t capacity);
  1.3393 +
  1.3394 +  // release the array if owned
  1.3395 +  void releaseArray(void);
  1.3396 +
  1.3397 +  // turn a bogus string into an empty one
  1.3398 +  void unBogus();
  1.3399 +
  1.3400 +  // implements assigment operator, copy constructor, and fastCopyFrom()
  1.3401 +  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  1.3402 +
  1.3403 +  // Pin start and limit to acceptable values.
  1.3404 +  inline void pinIndex(int32_t& start) const;
  1.3405 +  inline void pinIndices(int32_t& start,
  1.3406 +                         int32_t& length) const;
  1.3407 +
  1.3408 +#if !UCONFIG_NO_CONVERSION
  1.3409 +
  1.3410 +  /* Internal extract() using UConverter. */
  1.3411 +  int32_t doExtract(int32_t start, int32_t length,
  1.3412 +                    char *dest, int32_t destCapacity,
  1.3413 +                    UConverter *cnv,
  1.3414 +                    UErrorCode &errorCode) const;
  1.3415 +
  1.3416 +  /*
  1.3417 +   * Real constructor for converting from codepage data.
  1.3418 +   * It assumes that it is called with !fRefCounted.
  1.3419 +   *
  1.3420 +   * If <code>codepage==0</code>, then the default converter
  1.3421 +   * is used for the platform encoding.
  1.3422 +   * If <code>codepage</code> is an empty string (<code>""</code>),
  1.3423 +   * then a simple conversion is performed on the codepage-invariant
  1.3424 +   * subset ("invariant characters") of the platform encoding. See utypes.h.
  1.3425 +   */
  1.3426 +  void doCodepageCreate(const char *codepageData,
  1.3427 +                        int32_t dataLength,
  1.3428 +                        const char *codepage);
  1.3429 +
  1.3430 +  /*
  1.3431 +   * Worker function for creating a UnicodeString from
  1.3432 +   * a codepage string using a UConverter.
  1.3433 +   */
  1.3434 +  void
  1.3435 +  doCodepageCreate(const char *codepageData,
  1.3436 +                   int32_t dataLength,
  1.3437 +                   UConverter *converter,
  1.3438 +                   UErrorCode &status);
  1.3439 +
  1.3440 +#endif
  1.3441 +
  1.3442 +  /*
  1.3443 +   * This function is called when write access to the array
  1.3444 +   * is necessary.
  1.3445 +   *
  1.3446 +   * We need to make a copy of the array if
  1.3447 +   * the buffer is read-only, or
  1.3448 +   * the buffer is refCounted (shared), and refCount>1, or
  1.3449 +   * the buffer is too small.
  1.3450 +   *
  1.3451 +   * Return FALSE if memory could not be allocated.
  1.3452 +   */
  1.3453 +  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  1.3454 +                            int32_t growCapacity = -1,
  1.3455 +                            UBool doCopyArray = TRUE,
  1.3456 +                            int32_t **pBufferToDelete = 0,
  1.3457 +                            UBool forceClone = FALSE);
  1.3458 +
  1.3459 +  /**
  1.3460 +   * Common function for UnicodeString case mappings.
  1.3461 +   * The stringCaseMapper has the same type UStringCaseMapper
  1.3462 +   * as in ustr_imp.h for ustrcase_map().
  1.3463 +   */
  1.3464 +  UnicodeString &
  1.3465 +  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
  1.3466 +
  1.3467 +  // ref counting
  1.3468 +  void addRef(void);
  1.3469 +  int32_t removeRef(void);
  1.3470 +  int32_t refCount(void) const;
  1.3471 +
  1.3472 +  // constants
  1.3473 +  enum {
  1.3474 +    // Set the stack buffer size so that sizeof(UnicodeString) is,
  1.3475 +    // naturally (without padding), a multiple of sizeof(pointer).
  1.3476 +    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
  1.3477 +    kInvalidUChar=0xffff, // invalid UChar index
  1.3478 +    kGrowSize=128, // grow size for this buffer
  1.3479 +    kInvalidHashCode=0, // invalid hash code
  1.3480 +    kEmptyHashCode=1, // hash code for empty string
  1.3481 +
  1.3482 +    // bit flag values for fFlags
  1.3483 +    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
  1.3484 +    kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
  1.3485 +    kRefCounted=4,      // there is a refCount field before the characters in fArray
  1.3486 +    kBufferIsReadonly=8,// do not write to this buffer
  1.3487 +    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
  1.3488 +                        // and releaseBuffer(newLength) must be called
  1.3489 +
  1.3490 +    // combined values for convenience
  1.3491 +    kShortString=kUsingStackBuffer,
  1.3492 +    kLongString=kRefCounted,
  1.3493 +    kReadonlyAlias=kBufferIsReadonly,
  1.3494 +    kWritableAlias=0
  1.3495 +  };
  1.3496 +
  1.3497 +  friend class StringThreadTest;
  1.3498 +  friend class UnicodeStringAppendable;
  1.3499 +
  1.3500 +  union StackBufferOrFields;        // forward declaration necessary before friend declaration
  1.3501 +  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  1.3502 +
  1.3503 +  /*
  1.3504 +   * The following are all the class fields that are stored
  1.3505 +   * in each UnicodeString object.
  1.3506 +   * Note that UnicodeString has virtual functions,
  1.3507 +   * therefore there is an implicit vtable pointer
  1.3508 +   * as the first real field.
  1.3509 +   * The fields should be aligned such that no padding is necessary.
  1.3510 +   * On 32-bit machines, the size should be 32 bytes,
  1.3511 +   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  1.3512 +   *
  1.3513 +   * We use a hack to achieve this.
  1.3514 +   *
  1.3515 +   * With at least some compilers, each of the following is forced to
  1.3516 +   * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  1.3517 +   * rounded up with additional padding if the fields do not already fit that requirement:
  1.3518 +   * - sizeof(class UnicodeString)
  1.3519 +   * - offsetof(UnicodeString, fUnion)
  1.3520 +   * - sizeof(fUnion)
  1.3521 +   * - sizeof(fFields)
  1.3522 +   *
  1.3523 +   * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
  1.3524 +   * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
  1.3525 +   * (Padding at the end of fFields is ok:
  1.3526 +   * As long as there is no padding after fStackBuffer, it is not wasted space.)
  1.3527 +   *
  1.3528 +   * We further assume that the compiler does not reorder the fields,
  1.3529 +   * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
  1.3530 +   * with at most some padding (but no other field) in between.
  1.3531 +   * (Padding there would be wasted space, but functionally harmless.)
  1.3532 +   *
  1.3533 +   * We use a few more sizeof(pointer)'s chunks of space with
  1.3534 +   * fRestOfStackBuffer, fShortLength and fFlags,
  1.3535 +   * to get up exactly to the intended sizeof(UnicodeString).
  1.3536 +   */
  1.3537 +  // (implicit) *vtable;
  1.3538 +  union StackBufferOrFields {
  1.3539 +    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
  1.3540 +    // else fFields is used
  1.3541 +    UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
  1.3542 +    struct {
  1.3543 +      UChar   *fArray;    // the Unicode data
  1.3544 +      int32_t fCapacity;  // capacity of fArray (in UChars)
  1.3545 +      int32_t fLength;    // number of characters in fArray if >127; else undefined
  1.3546 +    } fFields;
  1.3547 +  } fUnion;
  1.3548 +  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
  1.3549 +  int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
  1.3550 +  uint8_t fFlags;       // bit flags: see constants above
  1.3551 +};
  1.3552 +
  1.3553 +/**
  1.3554 + * Create a new UnicodeString with the concatenation of two others.
  1.3555 + *
  1.3556 + * @param s1 The first string to be copied to the new one.
  1.3557 + * @param s2 The second string to be copied to the new one, after s1.
  1.3558 + * @return UnicodeString(s1).append(s2)
  1.3559 + * @stable ICU 2.8
  1.3560 + */
  1.3561 +U_COMMON_API UnicodeString U_EXPORT2
  1.3562 +operator+ (const UnicodeString &s1, const UnicodeString &s2);
  1.3563 +
  1.3564 +//========================================
  1.3565 +// Inline members
  1.3566 +//========================================
  1.3567 +
  1.3568 +//========================================
  1.3569 +// Privates
  1.3570 +//========================================
  1.3571 +
  1.3572 +inline void
  1.3573 +UnicodeString::pinIndex(int32_t& start) const
  1.3574 +{
  1.3575 +  // pin index
  1.3576 +  if(start < 0) {
  1.3577 +    start = 0;
  1.3578 +  } else if(start > length()) {
  1.3579 +    start = length();
  1.3580 +  }
  1.3581 +}
  1.3582 +
  1.3583 +inline void
  1.3584 +UnicodeString::pinIndices(int32_t& start,
  1.3585 +                          int32_t& _length) const
  1.3586 +{
  1.3587 +  // pin indices
  1.3588 +  int32_t len = length();
  1.3589 +  if(start < 0) {
  1.3590 +    start = 0;
  1.3591 +  } else if(start > len) {
  1.3592 +    start = len;
  1.3593 +  }
  1.3594 +  if(_length < 0) {
  1.3595 +    _length = 0;
  1.3596 +  } else if(_length > (len - start)) {
  1.3597 +    _length = (len - start);
  1.3598 +  }
  1.3599 +}
  1.3600 +
  1.3601 +inline UChar*
  1.3602 +UnicodeString::getArrayStart()
  1.3603 +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  1.3604 +
  1.3605 +inline const UChar*
  1.3606 +UnicodeString::getArrayStart() const
  1.3607 +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  1.3608 +
  1.3609 +//========================================
  1.3610 +// Default constructor
  1.3611 +//========================================
  1.3612 +
  1.3613 +inline
  1.3614 +UnicodeString::UnicodeString()
  1.3615 +  : fShortLength(0),
  1.3616 +    fFlags(kShortString)
  1.3617 +{}
  1.3618 +
  1.3619 +//========================================
  1.3620 +// Read-only implementation methods
  1.3621 +//========================================
  1.3622 +inline int32_t
  1.3623 +UnicodeString::length() const
  1.3624 +{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
  1.3625 +
  1.3626 +inline int32_t
  1.3627 +UnicodeString::getCapacity() const
  1.3628 +{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
  1.3629 +
  1.3630 +inline int32_t
  1.3631 +UnicodeString::hashCode() const
  1.3632 +{ return doHashCode(); }
  1.3633 +
  1.3634 +inline UBool
  1.3635 +UnicodeString::isBogus() const
  1.3636 +{ return (UBool)(fFlags & kIsBogus); }
  1.3637 +
  1.3638 +inline UBool
  1.3639 +UnicodeString::isWritable() const
  1.3640 +{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
  1.3641 +
  1.3642 +inline UBool
  1.3643 +UnicodeString::isBufferWritable() const
  1.3644 +{
  1.3645 +  return (UBool)(
  1.3646 +      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  1.3647 +      (!(fFlags&kRefCounted) || refCount()==1));
  1.3648 +}
  1.3649 +
  1.3650 +inline const UChar *
  1.3651 +UnicodeString::getBuffer() const {
  1.3652 +  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
  1.3653 +    return 0;
  1.3654 +  } else if(fFlags&kUsingStackBuffer) {
  1.3655 +    return fUnion.fStackBuffer;
  1.3656 +  } else {
  1.3657 +    return fUnion.fFields.fArray;
  1.3658 +  }
  1.3659 +}
  1.3660 +
  1.3661 +//========================================
  1.3662 +// Read-only alias methods
  1.3663 +//========================================
  1.3664 +inline int8_t
  1.3665 +UnicodeString::doCompare(int32_t start,
  1.3666 +              int32_t thisLength,
  1.3667 +              const UnicodeString& srcText,
  1.3668 +              int32_t srcStart,
  1.3669 +              int32_t srcLength) const
  1.3670 +{
  1.3671 +  if(srcText.isBogus()) {
  1.3672 +    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  1.3673 +  } else {
  1.3674 +    srcText.pinIndices(srcStart, srcLength);
  1.3675 +    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  1.3676 +  }
  1.3677 +}
  1.3678 +
  1.3679 +inline UBool
  1.3680 +UnicodeString::operator== (const UnicodeString& text) const
  1.3681 +{
  1.3682 +  if(isBogus()) {
  1.3683 +    return text.isBogus();
  1.3684 +  } else {
  1.3685 +    int32_t len = length(), textLength = text.length();
  1.3686 +    return !text.isBogus() && len == textLength && doEquals(text, len);
  1.3687 +  }
  1.3688 +}
  1.3689 +
  1.3690 +inline UBool
  1.3691 +UnicodeString::operator!= (const UnicodeString& text) const
  1.3692 +{ return (! operator==(text)); }
  1.3693 +
  1.3694 +inline UBool
  1.3695 +UnicodeString::operator> (const UnicodeString& text) const
  1.3696 +{ return doCompare(0, length(), text, 0, text.length()) == 1; }
  1.3697 +
  1.3698 +inline UBool
  1.3699 +UnicodeString::operator< (const UnicodeString& text) const
  1.3700 +{ return doCompare(0, length(), text, 0, text.length()) == -1; }
  1.3701 +
  1.3702 +inline UBool
  1.3703 +UnicodeString::operator>= (const UnicodeString& text) const
  1.3704 +{ return doCompare(0, length(), text, 0, text.length()) != -1; }
  1.3705 +
  1.3706 +inline UBool
  1.3707 +UnicodeString::operator<= (const UnicodeString& text) const
  1.3708 +{ return doCompare(0, length(), text, 0, text.length()) != 1; }
  1.3709 +
  1.3710 +inline int8_t
  1.3711 +UnicodeString::compare(const UnicodeString& text) const
  1.3712 +{ return doCompare(0, length(), text, 0, text.length()); }
  1.3713 +
  1.3714 +inline int8_t
  1.3715 +UnicodeString::compare(int32_t start,
  1.3716 +               int32_t _length,
  1.3717 +               const UnicodeString& srcText) const
  1.3718 +{ return doCompare(start, _length, srcText, 0, srcText.length()); }
  1.3719 +
  1.3720 +inline int8_t
  1.3721 +UnicodeString::compare(const UChar *srcChars,
  1.3722 +               int32_t srcLength) const
  1.3723 +{ return doCompare(0, length(), srcChars, 0, srcLength); }
  1.3724 +
  1.3725 +inline int8_t
  1.3726 +UnicodeString::compare(int32_t start,
  1.3727 +               int32_t _length,
  1.3728 +               const UnicodeString& srcText,
  1.3729 +               int32_t srcStart,
  1.3730 +               int32_t srcLength) const
  1.3731 +{ return doCompare(start, _length, srcText, srcStart, srcLength); }
  1.3732 +
  1.3733 +inline int8_t
  1.3734 +UnicodeString::compare(int32_t start,
  1.3735 +               int32_t _length,
  1.3736 +               const UChar *srcChars) const
  1.3737 +{ return doCompare(start, _length, srcChars, 0, _length); }
  1.3738 +
  1.3739 +inline int8_t
  1.3740 +UnicodeString::compare(int32_t start,
  1.3741 +               int32_t _length,
  1.3742 +               const UChar *srcChars,
  1.3743 +               int32_t srcStart,
  1.3744 +               int32_t srcLength) const
  1.3745 +{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
  1.3746 +
  1.3747 +inline int8_t
  1.3748 +UnicodeString::compareBetween(int32_t start,
  1.3749 +                  int32_t limit,
  1.3750 +                  const UnicodeString& srcText,
  1.3751 +                  int32_t srcStart,
  1.3752 +                  int32_t srcLimit) const
  1.3753 +{ return doCompare(start, limit - start,
  1.3754 +           srcText, srcStart, srcLimit - srcStart); }
  1.3755 +
  1.3756 +inline int8_t
  1.3757 +UnicodeString::doCompareCodePointOrder(int32_t start,
  1.3758 +                                       int32_t thisLength,
  1.3759 +                                       const UnicodeString& srcText,
  1.3760 +                                       int32_t srcStart,
  1.3761 +                                       int32_t srcLength) const
  1.3762 +{
  1.3763 +  if(srcText.isBogus()) {
  1.3764 +    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  1.3765 +  } else {
  1.3766 +    srcText.pinIndices(srcStart, srcLength);
  1.3767 +    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  1.3768 +  }
  1.3769 +}
  1.3770 +
  1.3771 +inline int8_t
  1.3772 +UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  1.3773 +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  1.3774 +
  1.3775 +inline int8_t
  1.3776 +UnicodeString::compareCodePointOrder(int32_t start,
  1.3777 +                                     int32_t _length,
  1.3778 +                                     const UnicodeString& srcText) const
  1.3779 +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  1.3780 +
  1.3781 +inline int8_t
  1.3782 +UnicodeString::compareCodePointOrder(const UChar *srcChars,
  1.3783 +                                     int32_t srcLength) const
  1.3784 +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  1.3785 +
  1.3786 +inline int8_t
  1.3787 +UnicodeString::compareCodePointOrder(int32_t start,
  1.3788 +                                     int32_t _length,
  1.3789 +                                     const UnicodeString& srcText,
  1.3790 +                                     int32_t srcStart,
  1.3791 +                                     int32_t srcLength) const
  1.3792 +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  1.3793 +
  1.3794 +inline int8_t
  1.3795 +UnicodeString::compareCodePointOrder(int32_t start,
  1.3796 +                                     int32_t _length,
  1.3797 +                                     const UChar *srcChars) const
  1.3798 +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  1.3799 +
  1.3800 +inline int8_t
  1.3801 +UnicodeString::compareCodePointOrder(int32_t start,
  1.3802 +                                     int32_t _length,
  1.3803 +                                     const UChar *srcChars,
  1.3804 +                                     int32_t srcStart,
  1.3805 +                                     int32_t srcLength) const
  1.3806 +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  1.3807 +
  1.3808 +inline int8_t
  1.3809 +UnicodeString::compareCodePointOrderBetween(int32_t start,
  1.3810 +                                            int32_t limit,
  1.3811 +                                            const UnicodeString& srcText,
  1.3812 +                                            int32_t srcStart,
  1.3813 +                                            int32_t srcLimit) const
  1.3814 +{ return doCompareCodePointOrder(start, limit - start,
  1.3815 +           srcText, srcStart, srcLimit - srcStart); }
  1.3816 +
  1.3817 +inline int8_t
  1.3818 +UnicodeString::doCaseCompare(int32_t start,
  1.3819 +                             int32_t thisLength,
  1.3820 +                             const UnicodeString &srcText,
  1.3821 +                             int32_t srcStart,
  1.3822 +                             int32_t srcLength,
  1.3823 +                             uint32_t options) const
  1.3824 +{
  1.3825 +  if(srcText.isBogus()) {
  1.3826 +    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  1.3827 +  } else {
  1.3828 +    srcText.pinIndices(srcStart, srcLength);
  1.3829 +    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  1.3830 +  }
  1.3831 +}
  1.3832 +
  1.3833 +inline int8_t
  1.3834 +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  1.3835 +  return doCaseCompare(0, length(), text, 0, text.length(), options);
  1.3836 +}
  1.3837 +
  1.3838 +inline int8_t
  1.3839 +UnicodeString::caseCompare(int32_t start,
  1.3840 +                           int32_t _length,
  1.3841 +                           const UnicodeString &srcText,
  1.3842 +                           uint32_t options) const {
  1.3843 +  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  1.3844 +}
  1.3845 +
  1.3846 +inline int8_t
  1.3847 +UnicodeString::caseCompare(const UChar *srcChars,
  1.3848 +                           int32_t srcLength,
  1.3849 +                           uint32_t options) const {
  1.3850 +  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  1.3851 +}
  1.3852 +
  1.3853 +inline int8_t
  1.3854 +UnicodeString::caseCompare(int32_t start,
  1.3855 +                           int32_t _length,
  1.3856 +                           const UnicodeString &srcText,
  1.3857 +                           int32_t srcStart,
  1.3858 +                           int32_t srcLength,
  1.3859 +                           uint32_t options) const {
  1.3860 +  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  1.3861 +}
  1.3862 +
  1.3863 +inline int8_t
  1.3864 +UnicodeString::caseCompare(int32_t start,
  1.3865 +                           int32_t _length,
  1.3866 +                           const UChar *srcChars,
  1.3867 +                           uint32_t options) const {
  1.3868 +  return doCaseCompare(start, _length, srcChars, 0, _length, options);
  1.3869 +}
  1.3870 +
  1.3871 +inline int8_t
  1.3872 +UnicodeString::caseCompare(int32_t start,
  1.3873 +                           int32_t _length,
  1.3874 +                           const UChar *srcChars,
  1.3875 +                           int32_t srcStart,
  1.3876 +                           int32_t srcLength,
  1.3877 +                           uint32_t options) const {
  1.3878 +  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  1.3879 +}
  1.3880 +
  1.3881 +inline int8_t
  1.3882 +UnicodeString::caseCompareBetween(int32_t start,
  1.3883 +                                  int32_t limit,
  1.3884 +                                  const UnicodeString &srcText,
  1.3885 +                                  int32_t srcStart,
  1.3886 +                                  int32_t srcLimit,
  1.3887 +                                  uint32_t options) const {
  1.3888 +  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  1.3889 +}
  1.3890 +
  1.3891 +inline int32_t
  1.3892 +UnicodeString::indexOf(const UnicodeString& srcText,
  1.3893 +               int32_t srcStart,
  1.3894 +               int32_t srcLength,
  1.3895 +               int32_t start,
  1.3896 +               int32_t _length) const
  1.3897 +{
  1.3898 +  if(!srcText.isBogus()) {
  1.3899 +    srcText.pinIndices(srcStart, srcLength);
  1.3900 +    if(srcLength > 0) {
  1.3901 +      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  1.3902 +    }
  1.3903 +  }
  1.3904 +  return -1;
  1.3905 +}
  1.3906 +
  1.3907 +inline int32_t
  1.3908 +UnicodeString::indexOf(const UnicodeString& text) const
  1.3909 +{ return indexOf(text, 0, text.length(), 0, length()); }
  1.3910 +
  1.3911 +inline int32_t
  1.3912 +UnicodeString::indexOf(const UnicodeString& text,
  1.3913 +               int32_t start) const {
  1.3914 +  pinIndex(start);
  1.3915 +  return indexOf(text, 0, text.length(), start, length() - start);
  1.3916 +}
  1.3917 +
  1.3918 +inline int32_t
  1.3919 +UnicodeString::indexOf(const UnicodeString& text,
  1.3920 +               int32_t start,
  1.3921 +               int32_t _length) const
  1.3922 +{ return indexOf(text, 0, text.length(), start, _length); }
  1.3923 +
  1.3924 +inline int32_t
  1.3925 +UnicodeString::indexOf(const UChar *srcChars,
  1.3926 +               int32_t srcLength,
  1.3927 +               int32_t start) const {
  1.3928 +  pinIndex(start);
  1.3929 +  return indexOf(srcChars, 0, srcLength, start, length() - start);
  1.3930 +}
  1.3931 +
  1.3932 +inline int32_t
  1.3933 +UnicodeString::indexOf(const UChar *srcChars,
  1.3934 +               int32_t srcLength,
  1.3935 +               int32_t start,
  1.3936 +               int32_t _length) const
  1.3937 +{ return indexOf(srcChars, 0, srcLength, start, _length); }
  1.3938 +
  1.3939 +inline int32_t
  1.3940 +UnicodeString::indexOf(UChar c,
  1.3941 +               int32_t start,
  1.3942 +               int32_t _length) const
  1.3943 +{ return doIndexOf(c, start, _length); }
  1.3944 +
  1.3945 +inline int32_t
  1.3946 +UnicodeString::indexOf(UChar32 c,
  1.3947 +               int32_t start,
  1.3948 +               int32_t _length) const
  1.3949 +{ return doIndexOf(c, start, _length); }
  1.3950 +
  1.3951 +inline int32_t
  1.3952 +UnicodeString::indexOf(UChar c) const
  1.3953 +{ return doIndexOf(c, 0, length()); }
  1.3954 +
  1.3955 +inline int32_t
  1.3956 +UnicodeString::indexOf(UChar32 c) const
  1.3957 +{ return indexOf(c, 0, length()); }
  1.3958 +
  1.3959 +inline int32_t
  1.3960 +UnicodeString::indexOf(UChar c,
  1.3961 +               int32_t start) const {
  1.3962 +  pinIndex(start);
  1.3963 +  return doIndexOf(c, start, length() - start);
  1.3964 +}
  1.3965 +
  1.3966 +inline int32_t
  1.3967 +UnicodeString::indexOf(UChar32 c,
  1.3968 +               int32_t start) const {
  1.3969 +  pinIndex(start);
  1.3970 +  return indexOf(c, start, length() - start);
  1.3971 +}
  1.3972 +
  1.3973 +inline int32_t
  1.3974 +UnicodeString::lastIndexOf(const UChar *srcChars,
  1.3975 +               int32_t srcLength,
  1.3976 +               int32_t start,
  1.3977 +               int32_t _length) const
  1.3978 +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  1.3979 +
  1.3980 +inline int32_t
  1.3981 +UnicodeString::lastIndexOf(const UChar *srcChars,
  1.3982 +               int32_t srcLength,
  1.3983 +               int32_t start) const {
  1.3984 +  pinIndex(start);
  1.3985 +  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  1.3986 +}
  1.3987 +
  1.3988 +inline int32_t
  1.3989 +UnicodeString::lastIndexOf(const UnicodeString& srcText,
  1.3990 +               int32_t srcStart,
  1.3991 +               int32_t srcLength,
  1.3992 +               int32_t start,
  1.3993 +               int32_t _length) const
  1.3994 +{
  1.3995 +  if(!srcText.isBogus()) {
  1.3996 +    srcText.pinIndices(srcStart, srcLength);
  1.3997 +    if(srcLength > 0) {
  1.3998 +      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  1.3999 +    }
  1.4000 +  }
  1.4001 +  return -1;
  1.4002 +}
  1.4003 +
  1.4004 +inline int32_t
  1.4005 +UnicodeString::lastIndexOf(const UnicodeString& text,
  1.4006 +               int32_t start,
  1.4007 +               int32_t _length) const
  1.4008 +{ return lastIndexOf(text, 0, text.length(), start, _length); }
  1.4009 +
  1.4010 +inline int32_t
  1.4011 +UnicodeString::lastIndexOf(const UnicodeString& text,
  1.4012 +               int32_t start) const {
  1.4013 +  pinIndex(start);
  1.4014 +  return lastIndexOf(text, 0, text.length(), start, length() - start);
  1.4015 +}
  1.4016 +
  1.4017 +inline int32_t
  1.4018 +UnicodeString::lastIndexOf(const UnicodeString& text) const
  1.4019 +{ return lastIndexOf(text, 0, text.length(), 0, length()); }
  1.4020 +
  1.4021 +inline int32_t
  1.4022 +UnicodeString::lastIndexOf(UChar c,
  1.4023 +               int32_t start,
  1.4024 +               int32_t _length) const
  1.4025 +{ return doLastIndexOf(c, start, _length); }
  1.4026 +
  1.4027 +inline int32_t
  1.4028 +UnicodeString::lastIndexOf(UChar32 c,
  1.4029 +               int32_t start,
  1.4030 +               int32_t _length) const {
  1.4031 +  return doLastIndexOf(c, start, _length);
  1.4032 +}
  1.4033 +
  1.4034 +inline int32_t
  1.4035 +UnicodeString::lastIndexOf(UChar c) const
  1.4036 +{ return doLastIndexOf(c, 0, length()); }
  1.4037 +
  1.4038 +inline int32_t
  1.4039 +UnicodeString::lastIndexOf(UChar32 c) const {
  1.4040 +  return lastIndexOf(c, 0, length());
  1.4041 +}
  1.4042 +
  1.4043 +inline int32_t
  1.4044 +UnicodeString::lastIndexOf(UChar c,
  1.4045 +               int32_t start) const {
  1.4046 +  pinIndex(start);
  1.4047 +  return doLastIndexOf(c, start, length() - start);
  1.4048 +}
  1.4049 +
  1.4050 +inline int32_t
  1.4051 +UnicodeString::lastIndexOf(UChar32 c,
  1.4052 +               int32_t start) const {
  1.4053 +  pinIndex(start);
  1.4054 +  return lastIndexOf(c, start, length() - start);
  1.4055 +}
  1.4056 +
  1.4057 +inline UBool
  1.4058 +UnicodeString::startsWith(const UnicodeString& text) const
  1.4059 +{ return compare(0, text.length(), text, 0, text.length()) == 0; }
  1.4060 +
  1.4061 +inline UBool
  1.4062 +UnicodeString::startsWith(const UnicodeString& srcText,
  1.4063 +              int32_t srcStart,
  1.4064 +              int32_t srcLength) const
  1.4065 +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  1.4066 +
  1.4067 +inline UBool
  1.4068 +UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
  1.4069 +  if(srcLength < 0) {
  1.4070 +    srcLength = u_strlen(srcChars);
  1.4071 +  }
  1.4072 +  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  1.4073 +}
  1.4074 +
  1.4075 +inline UBool
  1.4076 +UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
  1.4077 +  if(srcLength < 0) {
  1.4078 +    srcLength = u_strlen(srcChars);
  1.4079 +  }
  1.4080 +  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  1.4081 +}
  1.4082 +
  1.4083 +inline UBool
  1.4084 +UnicodeString::endsWith(const UnicodeString& text) const
  1.4085 +{ return doCompare(length() - text.length(), text.length(),
  1.4086 +           text, 0, text.length()) == 0; }
  1.4087 +
  1.4088 +inline UBool
  1.4089 +UnicodeString::endsWith(const UnicodeString& srcText,
  1.4090 +            int32_t srcStart,
  1.4091 +            int32_t srcLength) const {
  1.4092 +  srcText.pinIndices(srcStart, srcLength);
  1.4093 +  return doCompare(length() - srcLength, srcLength,
  1.4094 +                   srcText, srcStart, srcLength) == 0;
  1.4095 +}
  1.4096 +
  1.4097 +inline UBool
  1.4098 +UnicodeString::endsWith(const UChar *srcChars,
  1.4099 +            int32_t srcLength) const {
  1.4100 +  if(srcLength < 0) {
  1.4101 +    srcLength = u_strlen(srcChars);
  1.4102 +  }
  1.4103 +  return doCompare(length() - srcLength, srcLength,
  1.4104 +                   srcChars, 0, srcLength) == 0;
  1.4105 +}
  1.4106 +
  1.4107 +inline UBool
  1.4108 +UnicodeString::endsWith(const UChar *srcChars,
  1.4109 +            int32_t srcStart,
  1.4110 +            int32_t srcLength) const {
  1.4111 +  if(srcLength < 0) {
  1.4112 +    srcLength = u_strlen(srcChars + srcStart);
  1.4113 +  }
  1.4114 +  return doCompare(length() - srcLength, srcLength,
  1.4115 +                   srcChars, srcStart, srcLength) == 0;
  1.4116 +}
  1.4117 +
  1.4118 +//========================================
  1.4119 +// replace
  1.4120 +//========================================
  1.4121 +inline UnicodeString&
  1.4122 +UnicodeString::replace(int32_t start,
  1.4123 +               int32_t _length,
  1.4124 +               const UnicodeString& srcText)
  1.4125 +{ return doReplace(start, _length, srcText, 0, srcText.length()); }
  1.4126 +
  1.4127 +inline UnicodeString&
  1.4128 +UnicodeString::replace(int32_t start,
  1.4129 +               int32_t _length,
  1.4130 +               const UnicodeString& srcText,
  1.4131 +               int32_t srcStart,
  1.4132 +               int32_t srcLength)
  1.4133 +{ return doReplace(start, _length, srcText, srcStart, srcLength); }
  1.4134 +
  1.4135 +inline UnicodeString&
  1.4136 +UnicodeString::replace(int32_t start,
  1.4137 +               int32_t _length,
  1.4138 +               const UChar *srcChars,
  1.4139 +               int32_t srcLength)
  1.4140 +{ return doReplace(start, _length, srcChars, 0, srcLength); }
  1.4141 +
  1.4142 +inline UnicodeString&
  1.4143 +UnicodeString::replace(int32_t start,
  1.4144 +               int32_t _length,
  1.4145 +               const UChar *srcChars,
  1.4146 +               int32_t srcStart,
  1.4147 +               int32_t srcLength)
  1.4148 +{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
  1.4149 +
  1.4150 +inline UnicodeString&
  1.4151 +UnicodeString::replace(int32_t start,
  1.4152 +               int32_t _length,
  1.4153 +               UChar srcChar)
  1.4154 +{ return doReplace(start, _length, &srcChar, 0, 1); }
  1.4155 +
  1.4156 +inline UnicodeString&
  1.4157 +UnicodeString::replaceBetween(int32_t start,
  1.4158 +                  int32_t limit,
  1.4159 +                  const UnicodeString& srcText)
  1.4160 +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  1.4161 +
  1.4162 +inline UnicodeString&
  1.4163 +UnicodeString::replaceBetween(int32_t start,
  1.4164 +                  int32_t limit,
  1.4165 +                  const UnicodeString& srcText,
  1.4166 +                  int32_t srcStart,
  1.4167 +                  int32_t srcLimit)
  1.4168 +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  1.4169 +
  1.4170 +inline UnicodeString&
  1.4171 +UnicodeString::findAndReplace(const UnicodeString& oldText,
  1.4172 +                  const UnicodeString& newText)
  1.4173 +{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
  1.4174 +            newText, 0, newText.length()); }
  1.4175 +
  1.4176 +inline UnicodeString&
  1.4177 +UnicodeString::findAndReplace(int32_t start,
  1.4178 +                  int32_t _length,
  1.4179 +                  const UnicodeString& oldText,
  1.4180 +                  const UnicodeString& newText)
  1.4181 +{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
  1.4182 +            newText, 0, newText.length()); }
  1.4183 +
  1.4184 +// ============================
  1.4185 +// extract
  1.4186 +// ============================
  1.4187 +inline void
  1.4188 +UnicodeString::doExtract(int32_t start,
  1.4189 +             int32_t _length,
  1.4190 +             UnicodeString& target) const
  1.4191 +{ target.replace(0, target.length(), *this, start, _length); }
  1.4192 +
  1.4193 +inline void
  1.4194 +UnicodeString::extract(int32_t start,
  1.4195 +               int32_t _length,
  1.4196 +               UChar *target,
  1.4197 +               int32_t targetStart) const
  1.4198 +{ doExtract(start, _length, target, targetStart); }
  1.4199 +
  1.4200 +inline void
  1.4201 +UnicodeString::extract(int32_t start,
  1.4202 +               int32_t _length,
  1.4203 +               UnicodeString& target) const
  1.4204 +{ doExtract(start, _length, target); }
  1.4205 +
  1.4206 +#if !UCONFIG_NO_CONVERSION
  1.4207 +
  1.4208 +inline int32_t
  1.4209 +UnicodeString::extract(int32_t start,
  1.4210 +               int32_t _length,
  1.4211 +               char *dst,
  1.4212 +               const char *codepage) const
  1.4213 +
  1.4214 +{
  1.4215 +  // This dstSize value will be checked explicitly
  1.4216 +  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  1.4217 +}
  1.4218 +
  1.4219 +#endif
  1.4220 +
  1.4221 +inline void
  1.4222 +UnicodeString::extractBetween(int32_t start,
  1.4223 +                  int32_t limit,
  1.4224 +                  UChar *dst,
  1.4225 +                  int32_t dstStart) const {
  1.4226 +  pinIndex(start);
  1.4227 +  pinIndex(limit);
  1.4228 +  doExtract(start, limit - start, dst, dstStart);
  1.4229 +}
  1.4230 +
  1.4231 +inline UnicodeString
  1.4232 +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  1.4233 +    return tempSubString(start, limit - start);
  1.4234 +}
  1.4235 +
  1.4236 +inline UChar
  1.4237 +UnicodeString::doCharAt(int32_t offset) const
  1.4238 +{
  1.4239 +  if((uint32_t)offset < (uint32_t)length()) {
  1.4240 +    return getArrayStart()[offset];
  1.4241 +  } else {
  1.4242 +    return kInvalidUChar;
  1.4243 +  }
  1.4244 +}
  1.4245 +
  1.4246 +inline UChar
  1.4247 +UnicodeString::charAt(int32_t offset) const
  1.4248 +{ return doCharAt(offset); }
  1.4249 +
  1.4250 +inline UChar
  1.4251 +UnicodeString::operator[] (int32_t offset) const
  1.4252 +{ return doCharAt(offset); }
  1.4253 +
  1.4254 +inline UBool
  1.4255 +UnicodeString::isEmpty() const {
  1.4256 +  return fShortLength == 0;
  1.4257 +}
  1.4258 +
  1.4259 +//========================================
  1.4260 +// Write implementation methods
  1.4261 +//========================================
  1.4262 +inline void
  1.4263 +UnicodeString::setLength(int32_t len) {
  1.4264 +  if(len <= 127) {
  1.4265 +    fShortLength = (int8_t)len;
  1.4266 +  } else {
  1.4267 +    fShortLength = (int8_t)-1;
  1.4268 +    fUnion.fFields.fLength = len;
  1.4269 +  }
  1.4270 +}
  1.4271 +
  1.4272 +inline void
  1.4273 +UnicodeString::setToEmpty() {
  1.4274 +  fShortLength = 0;
  1.4275 +  fFlags = kShortString;
  1.4276 +}
  1.4277 +
  1.4278 +inline void
  1.4279 +UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
  1.4280 +  setLength(len);
  1.4281 +  fUnion.fFields.fArray = array;
  1.4282 +  fUnion.fFields.fCapacity = capacity;
  1.4283 +}
  1.4284 +
  1.4285 +inline UnicodeString&
  1.4286 +UnicodeString::operator= (UChar ch)
  1.4287 +{ return doReplace(0, length(), &ch, 0, 1); }
  1.4288 +
  1.4289 +inline UnicodeString&
  1.4290 +UnicodeString::operator= (UChar32 ch)
  1.4291 +{ return replace(0, length(), ch); }
  1.4292 +
  1.4293 +inline UnicodeString&
  1.4294 +UnicodeString::setTo(const UnicodeString& srcText,
  1.4295 +             int32_t srcStart,
  1.4296 +             int32_t srcLength)
  1.4297 +{
  1.4298 +  unBogus();
  1.4299 +  return doReplace(0, length(), srcText, srcStart, srcLength);
  1.4300 +}
  1.4301 +
  1.4302 +inline UnicodeString&
  1.4303 +UnicodeString::setTo(const UnicodeString& srcText,
  1.4304 +             int32_t srcStart)
  1.4305 +{
  1.4306 +  unBogus();
  1.4307 +  srcText.pinIndex(srcStart);
  1.4308 +  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  1.4309 +}
  1.4310 +
  1.4311 +inline UnicodeString&
  1.4312 +UnicodeString::setTo(const UnicodeString& srcText)
  1.4313 +{
  1.4314 +  return copyFrom(srcText);
  1.4315 +}
  1.4316 +
  1.4317 +inline UnicodeString&
  1.4318 +UnicodeString::setTo(const UChar *srcChars,
  1.4319 +             int32_t srcLength)
  1.4320 +{
  1.4321 +  unBogus();
  1.4322 +  return doReplace(0, length(), srcChars, 0, srcLength);
  1.4323 +}
  1.4324 +
  1.4325 +inline UnicodeString&
  1.4326 +UnicodeString::setTo(UChar srcChar)
  1.4327 +{
  1.4328 +  unBogus();
  1.4329 +  return doReplace(0, length(), &srcChar, 0, 1);
  1.4330 +}
  1.4331 +
  1.4332 +inline UnicodeString&
  1.4333 +UnicodeString::setTo(UChar32 srcChar)
  1.4334 +{
  1.4335 +  unBogus();
  1.4336 +  return replace(0, length(), srcChar);
  1.4337 +}
  1.4338 +
  1.4339 +inline UnicodeString&
  1.4340 +UnicodeString::append(const UnicodeString& srcText,
  1.4341 +              int32_t srcStart,
  1.4342 +              int32_t srcLength)
  1.4343 +{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
  1.4344 +
  1.4345 +inline UnicodeString&
  1.4346 +UnicodeString::append(const UnicodeString& srcText)
  1.4347 +{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
  1.4348 +
  1.4349 +inline UnicodeString&
  1.4350 +UnicodeString::append(const UChar *srcChars,
  1.4351 +              int32_t srcStart,
  1.4352 +              int32_t srcLength)
  1.4353 +{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
  1.4354 +
  1.4355 +inline UnicodeString&
  1.4356 +UnicodeString::append(const UChar *srcChars,
  1.4357 +              int32_t srcLength)
  1.4358 +{ return doReplace(length(), 0, srcChars, 0, srcLength); }
  1.4359 +
  1.4360 +inline UnicodeString&
  1.4361 +UnicodeString::append(UChar srcChar)
  1.4362 +{ return doReplace(length(), 0, &srcChar, 0, 1); }
  1.4363 +
  1.4364 +inline UnicodeString&
  1.4365 +UnicodeString::operator+= (UChar ch)
  1.4366 +{ return doReplace(length(), 0, &ch, 0, 1); }
  1.4367 +
  1.4368 +inline UnicodeString&
  1.4369 +UnicodeString::operator+= (UChar32 ch) {
  1.4370 +  return append(ch);
  1.4371 +}
  1.4372 +
  1.4373 +inline UnicodeString&
  1.4374 +UnicodeString::operator+= (const UnicodeString& srcText)
  1.4375 +{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
  1.4376 +
  1.4377 +inline UnicodeString&
  1.4378 +UnicodeString::insert(int32_t start,
  1.4379 +              const UnicodeString& srcText,
  1.4380 +              int32_t srcStart,
  1.4381 +              int32_t srcLength)
  1.4382 +{ return doReplace(start, 0, srcText, srcStart, srcLength); }
  1.4383 +
  1.4384 +inline UnicodeString&
  1.4385 +UnicodeString::insert(int32_t start,
  1.4386 +              const UnicodeString& srcText)
  1.4387 +{ return doReplace(start, 0, srcText, 0, srcText.length()); }
  1.4388 +
  1.4389 +inline UnicodeString&
  1.4390 +UnicodeString::insert(int32_t start,
  1.4391 +              const UChar *srcChars,
  1.4392 +              int32_t srcStart,
  1.4393 +              int32_t srcLength)
  1.4394 +{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
  1.4395 +
  1.4396 +inline UnicodeString&
  1.4397 +UnicodeString::insert(int32_t start,
  1.4398 +              const UChar *srcChars,
  1.4399 +              int32_t srcLength)
  1.4400 +{ return doReplace(start, 0, srcChars, 0, srcLength); }
  1.4401 +
  1.4402 +inline UnicodeString&
  1.4403 +UnicodeString::insert(int32_t start,
  1.4404 +              UChar srcChar)
  1.4405 +{ return doReplace(start, 0, &srcChar, 0, 1); }
  1.4406 +
  1.4407 +inline UnicodeString&
  1.4408 +UnicodeString::insert(int32_t start,
  1.4409 +              UChar32 srcChar)
  1.4410 +{ return replace(start, 0, srcChar); }
  1.4411 +
  1.4412 +
  1.4413 +inline UnicodeString&
  1.4414 +UnicodeString::remove()
  1.4415 +{
  1.4416 +  // remove() of a bogus string makes the string empty and non-bogus
  1.4417 +  if(isBogus()) {
  1.4418 +    setToEmpty();
  1.4419 +  } else {
  1.4420 +    fShortLength = 0;
  1.4421 +  }
  1.4422 +  return *this;
  1.4423 +}
  1.4424 +
  1.4425 +inline UnicodeString&
  1.4426 +UnicodeString::remove(int32_t start,
  1.4427 +             int32_t _length)
  1.4428 +{
  1.4429 +    if(start <= 0 && _length == INT32_MAX) {
  1.4430 +        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  1.4431 +        return remove();
  1.4432 +    }
  1.4433 +    return doReplace(start, _length, NULL, 0, 0);
  1.4434 +}
  1.4435 +
  1.4436 +inline UnicodeString&
  1.4437 +UnicodeString::removeBetween(int32_t start,
  1.4438 +                int32_t limit)
  1.4439 +{ return doReplace(start, limit - start, NULL, 0, 0); }
  1.4440 +
  1.4441 +inline UnicodeString &
  1.4442 +UnicodeString::retainBetween(int32_t start, int32_t limit) {
  1.4443 +  truncate(limit);
  1.4444 +  return doReplace(0, start, NULL, 0, 0);
  1.4445 +}
  1.4446 +
  1.4447 +inline UBool
  1.4448 +UnicodeString::truncate(int32_t targetLength)
  1.4449 +{
  1.4450 +  if(isBogus() && targetLength == 0) {
  1.4451 +    // truncate(0) of a bogus string makes the string empty and non-bogus
  1.4452 +    unBogus();
  1.4453 +    return FALSE;
  1.4454 +  } else if((uint32_t)targetLength < (uint32_t)length()) {
  1.4455 +    setLength(targetLength);
  1.4456 +    return TRUE;
  1.4457 +  } else {
  1.4458 +    return FALSE;
  1.4459 +  }
  1.4460 +}
  1.4461 +
  1.4462 +inline UnicodeString&
  1.4463 +UnicodeString::reverse()
  1.4464 +{ return doReverse(0, length()); }
  1.4465 +
  1.4466 +inline UnicodeString&
  1.4467 +UnicodeString::reverse(int32_t start,
  1.4468 +               int32_t _length)
  1.4469 +{ return doReverse(start, _length); }
  1.4470 +
  1.4471 +U_NAMESPACE_END
  1.4472 +
  1.4473 +#endif
The Tor Browser / file diff

diff: intl/icu/source/common/unicode/unistr.h

intl/icu/source/common/unicode/unistr.h