intl/icu/source/common/unicode/unistr.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 1998-2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *
     7 * File unistr.h
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   09/25/98    stephen     Creation.
    13 *   11/11/98    stephen     Changed per 11/9 code review.
    14 *   04/20/99    stephen     Overhauled per 4/16 code review.
    15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
    16 *                           handleReplaceBetween(); other methods unchanged.
    17 *   06/25/01    grhoten     Remove dependency on iostream.
    18 ******************************************************************************
    19 */
    21 #ifndef UNISTR_H
    22 #define UNISTR_H
    24 /**
    25  * \file 
    26  * \brief C++ API: Unicode String 
    27  */
    29 #include "unicode/utypes.h"
    30 #include "unicode/rep.h"
    31 #include "unicode/std_string.h"
    32 #include "unicode/stringpiece.h"
    33 #include "unicode/bytestream.h"
    34 #include "unicode/ucasemap.h"
    36 struct UConverter;          // unicode/ucnv.h
    37 class  StringThreadTest;
    39 #ifndef U_COMPARE_CODE_POINT_ORDER
    40 /* see also ustring.h and unorm.h */
    41 /**
    42  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    43  * Compare strings in code point order instead of code unit order.
    44  * @stable ICU 2.2
    45  */
    46 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    47 #endif
    49 #ifndef USTRING_H
    50 /**
    51  * \ingroup ustring_ustrlen
    52  */
    53 U_STABLE int32_t U_EXPORT2
    54 u_strlen(const UChar *s);
    55 #endif
    57 /**
    58  * \def U_STRING_CASE_MAPPER_DEFINED
    59  * @internal
    60  */
    61 #ifndef U_STRING_CASE_MAPPER_DEFINED
    62 #define U_STRING_CASE_MAPPER_DEFINED
    64 /**
    65  * Internal string case mapping function type.
    66  * @internal
    67  */
    68 typedef int32_t U_CALLCONV
    69 UStringCaseMapper(const UCaseMap *csm,
    70                   UChar *dest, int32_t destCapacity,
    71                   const UChar *src, int32_t srcLength,
    72                   UErrorCode *pErrorCode);
    74 #endif
    76 U_NAMESPACE_BEGIN
    78 class BreakIterator;        // unicode/brkiter.h
    79 class Locale;               // unicode/locid.h
    80 class StringCharacterIterator;
    81 class UnicodeStringAppendable;  // unicode/appendable.h
    83 /* The <iostream> include has been moved to unicode/ustream.h */
    85 /**
    86  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    87  * which constructs a Unicode string from an invariant-character char * string.
    88  * About invariant characters see utypes.h.
    89  * This constructor has no runtime dependency on conversion code and is
    90  * therefore recommended over ones taking a charset name string
    91  * (where the empty string "" indicates invariant-character conversion).
    92  *
    93  * @stable ICU 3.2
    94  */
    95 #define US_INV icu::UnicodeString::kInvariant
    97 /**
    98  * Unicode String literals in C++.
    99  * Dependent on the platform properties, different UnicodeString
   100  * constructors should be used to create a UnicodeString object from
   101  * a string literal.
   102  * The macros are defined for maximum performance.
   103  * They work only for strings that contain "invariant characters", i.e.,
   104  * only latin letters, digits, and some punctuation.
   105  * See utypes.h for details.
   106  *
   107  * The string parameter must be a C string literal.
   108  * The length of the string, not including the terminating
   109  * <code>NUL</code>, must be specified as a constant.
   110  * The U_STRING_DECL macro should be invoked exactly once for one
   111  * such string variable before it is used.
   112  * @stable ICU 2.0
   113  */
   114 #if defined(U_DECLARE_UTF16)
   115 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
   116 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
   117 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
   118 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
   119 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
   120 #else
   121 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
   122 #endif
   124 /**
   125  * Unicode String literals in C++.
   126  * Dependent on the platform properties, different UnicodeString
   127  * constructors should be used to create a UnicodeString object from
   128  * a string literal.
   129  * The macros are defined for improved performance.
   130  * They work only for strings that contain "invariant characters", i.e.,
   131  * only latin letters, digits, and some punctuation.
   132  * See utypes.h for details.
   133  *
   134  * The string parameter must be a C string literal.
   135  * @stable ICU 2.0
   136  */
   137 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
   139 /**
   140  * \def UNISTR_FROM_CHAR_EXPLICIT
   141  * This can be defined to be empty or "explicit".
   142  * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
   143  * constructors are marked as explicit, preventing their inadvertent use.
   144  * @stable ICU 49
   145  */
   146 #ifndef UNISTR_FROM_CHAR_EXPLICIT
   147 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
   148     // Auto-"explicit" in ICU library code.
   149 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
   150 # else
   151     // Empty by default for source code compatibility.
   152 #   define UNISTR_FROM_CHAR_EXPLICIT
   153 # endif
   154 #endif
   156 /**
   157  * \def UNISTR_FROM_STRING_EXPLICIT
   158  * This can be defined to be empty or "explicit".
   159  * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
   160  * constructors are marked as explicit, preventing their inadvertent use.
   161  *
   162  * In particular, this helps prevent accidentally depending on ICU conversion code
   163  * by passing a string literal into an API with a const UnicodeString & parameter.
   164  * @stable ICU 49
   165  */
   166 #ifndef UNISTR_FROM_STRING_EXPLICIT
   167 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
   168     // Auto-"explicit" in ICU library code.
   169 #   define UNISTR_FROM_STRING_EXPLICIT explicit
   170 # else
   171     // Empty by default for source code compatibility.
   172 #   define UNISTR_FROM_STRING_EXPLICIT
   173 # endif
   174 #endif
   176 /**
   177  * UnicodeString is a string class that stores Unicode characters directly and provides
   178  * similar functionality as the Java String and StringBuffer classes.
   179  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
   180  *
   181  * The UnicodeString class is not suitable for subclassing.
   182  *
   183  * <p>For an overview of Unicode strings in C and C++ see the
   184  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   185  *
   186  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
   187  * A Unicode character may be stored with either one code unit
   188  * (the most common case) or with a matched pair of special code units
   189  * ("surrogates"). The data type for code units is UChar. 
   190  * For single-character handling, a Unicode character code <em>point</em> is a value
   191  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
   192  *
   193  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
   194  * This is the same as with multi-byte char* strings in traditional string handling.
   195  * Operations on partial strings typically do not test for code point boundaries.
   196  * If necessary, the user needs to take care of such boundaries by testing for the code unit
   197  * values or by using functions like
   198  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
   199  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
   200  *
   201  * UnicodeString methods are more lenient with regard to input parameter values
   202  * than other ICU APIs. In particular:
   203  * - If indexes are out of bounds for a UnicodeString object
   204  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
   205  * - If primitive string pointer values (e.g., const UChar * or char *)
   206  *   for input strings are NULL, then those input string parameters are treated
   207  *   as if they pointed to an empty string.
   208  *   However, this is <em>not</em> the case for char * parameters for charset names
   209  *   or other IDs.
   210  * - Most UnicodeString methods do not take a UErrorCode parameter because
   211  *   there are usually very few opportunities for failure other than a shortage
   212  *   of memory, error codes in low-level C++ string methods would be inconvenient,
   213  *   and the error code as the last parameter (ICU convention) would prevent
   214  *   the use of default parameter values.
   215  *   Instead, such methods set the UnicodeString into a "bogus" state
   216  *   (see isBogus()) if an error occurs.
   217  *
   218  * In string comparisons, two UnicodeString objects that are both "bogus"
   219  * compare equal (to be transitive and prevent endless loops in sorting),
   220  * and a "bogus" string compares less than any non-"bogus" one.
   221  *
   222  * Const UnicodeString methods are thread-safe. Multiple threads can use
   223  * const methods on the same UnicodeString object simultaneously,
   224  * but non-const methods must not be called concurrently (in multiple threads)
   225  * with any other (const or non-const) methods.
   226  *
   227  * Similarly, const UnicodeString & parameters are thread-safe.
   228  * One object may be passed in as such a parameter concurrently in multiple threads.
   229  * This includes the const UnicodeString & parameters for
   230  * copy construction, assignment, and cloning.
   231  *
   232  * <p>UnicodeString uses several storage methods.
   233  * String contents can be stored inside the UnicodeString object itself,
   234  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
   235  * Most of this is done transparently, but careful aliasing in particular provides
   236  * significant performance improvements.
   237  * Also, the internal buffer is accessible via special functions.
   238  * For details see the
   239  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   240  *
   241  * @see utf.h
   242  * @see CharacterIterator
   243  * @stable ICU 2.0
   244  */
   245 class U_COMMON_API UnicodeString : public Replaceable
   246 {
   247 public:
   249   /**
   250    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
   251    * which constructs a Unicode string from an invariant-character char * string.
   252    * Use the macro US_INV instead of the full qualification for this value.
   253    *
   254    * @see US_INV
   255    * @stable ICU 3.2
   256    */
   257   enum EInvariant {
   258     /**
   259      * @see EInvariant
   260      * @stable ICU 3.2
   261      */
   262     kInvariant
   263   };
   265   //========================================
   266   // Read-only operations
   267   //========================================
   269   /* Comparison - bitwise only - for international comparison use collation */
   271   /**
   272    * Equality operator. Performs only bitwise comparison.
   273    * @param text The UnicodeString to compare to this one.
   274    * @return TRUE if <TT>text</TT> contains the same characters as this one,
   275    * FALSE otherwise.
   276    * @stable ICU 2.0
   277    */
   278   inline UBool operator== (const UnicodeString& text) const;
   280   /**
   281    * Inequality operator. Performs only bitwise comparison.
   282    * @param text The UnicodeString to compare to this one.
   283    * @return FALSE if <TT>text</TT> contains the same characters as this one,
   284    * TRUE otherwise.
   285    * @stable ICU 2.0
   286    */
   287   inline UBool operator!= (const UnicodeString& text) const;
   289   /**
   290    * Greater than operator. Performs only bitwise comparison.
   291    * @param text The UnicodeString to compare to this one.
   292    * @return TRUE if the characters in this are bitwise
   293    * greater than the characters in <code>text</code>, FALSE otherwise
   294    * @stable ICU 2.0
   295    */
   296   inline UBool operator> (const UnicodeString& text) const;
   298   /**
   299    * Less than operator. Performs only bitwise comparison.
   300    * @param text The UnicodeString to compare to this one.
   301    * @return TRUE if the characters in this are bitwise
   302    * less than the characters in <code>text</code>, FALSE otherwise
   303    * @stable ICU 2.0
   304    */
   305   inline UBool operator< (const UnicodeString& text) const;
   307   /**
   308    * Greater than or equal operator. Performs only bitwise comparison.
   309    * @param text The UnicodeString to compare to this one.
   310    * @return TRUE if the characters in this are bitwise
   311    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
   312    * @stable ICU 2.0
   313    */
   314   inline UBool operator>= (const UnicodeString& text) const;
   316   /**
   317    * Less than or equal operator. Performs only bitwise comparison.
   318    * @param text The UnicodeString to compare to this one.
   319    * @return TRUE if the characters in this are bitwise
   320    * less than or equal to the characters in <code>text</code>, FALSE otherwise
   321    * @stable ICU 2.0
   322    */
   323   inline UBool operator<= (const UnicodeString& text) const;
   325   /**
   326    * Compare the characters bitwise in this UnicodeString to
   327    * the characters in <code>text</code>.
   328    * @param text The UnicodeString to compare to this one.
   329    * @return The result of bitwise character comparison: 0 if this
   330    * contains the same characters as <code>text</code>, -1 if the characters in
   331    * this are bitwise less than the characters in <code>text</code>, +1 if the
   332    * characters in this are bitwise greater than the characters
   333    * in <code>text</code>.
   334    * @stable ICU 2.0
   335    */
   336   inline int8_t compare(const UnicodeString& text) const;
   338   /**
   339    * Compare the characters bitwise in the range
   340    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   341    * in the <b>entire string</b> <TT>text</TT>.
   342    * (The parameters "start" and "length" are not applied to the other text "text".)
   343    * @param start the offset at which the compare operation begins
   344    * @param length the number of characters of text to compare.
   345    * @param text the other text to be compared against this string.
   346    * @return The result of bitwise character comparison: 0 if this
   347    * contains the same characters as <code>text</code>, -1 if the characters in
   348    * this are bitwise less than the characters in <code>text</code>, +1 if the
   349    * characters in this are bitwise greater than the characters
   350    * in <code>text</code>.
   351    * @stable ICU 2.0
   352    */
   353   inline int8_t compare(int32_t start,
   354          int32_t length,
   355          const UnicodeString& text) const;
   357   /**
   358    * Compare the characters bitwise in the range
   359    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   360    * in <TT>srcText</TT> in the range
   361    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   362    * @param start the offset at which the compare operation begins
   363    * @param length the number of characters in this to compare.
   364    * @param srcText the text to be compared
   365    * @param srcStart the offset into <TT>srcText</TT> to start comparison
   366    * @param srcLength the number of characters in <TT>src</TT> to compare
   367    * @return The result of bitwise character comparison: 0 if this
   368    * contains the same characters as <code>srcText</code>, -1 if the characters in
   369    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   370    * characters in this are bitwise greater than the characters
   371    * in <code>srcText</code>.
   372    * @stable ICU 2.0
   373    */
   374    inline int8_t compare(int32_t start,
   375          int32_t length,
   376          const UnicodeString& srcText,
   377          int32_t srcStart,
   378          int32_t srcLength) const;
   380   /**
   381    * Compare the characters bitwise in this UnicodeString with the first
   382    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
   383    * @param srcChars The characters to compare to this UnicodeString.
   384    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   385    * @return The result of bitwise character comparison: 0 if this
   386    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   387    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   388    * characters in this are bitwise greater than the characters
   389    * in <code>srcChars</code>.
   390    * @stable ICU 2.0
   391    */
   392   inline int8_t compare(const UChar *srcChars,
   393          int32_t srcLength) const;
   395   /**
   396    * Compare the characters bitwise in the range
   397    * [<TT>start</TT>, <TT>start + length</TT>) with the first
   398    * <TT>length</TT> characters in <TT>srcChars</TT>
   399    * @param start the offset at which the compare operation begins
   400    * @param length the number of characters to compare.
   401    * @param srcChars the characters to be compared
   402    * @return The result of bitwise character comparison: 0 if this
   403    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   404    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   405    * characters in this are bitwise greater than the characters
   406    * in <code>srcChars</code>.
   407    * @stable ICU 2.0
   408    */
   409   inline int8_t compare(int32_t start,
   410          int32_t length,
   411          const UChar *srcChars) const;
   413   /**
   414    * Compare the characters bitwise in the range
   415    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
   416    * in <TT>srcChars</TT> in the range
   417    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   418    * @param start the offset at which the compare operation begins
   419    * @param length the number of characters in this to compare
   420    * @param srcChars the characters to be compared
   421    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
   422    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
   423    * @return The result of bitwise character comparison: 0 if this
   424    * contains the same characters as <code>srcChars</code>, -1 if the characters in
   425    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
   426    * characters in this are bitwise greater than the characters
   427    * in <code>srcChars</code>.
   428    * @stable ICU 2.0
   429    */
   430   inline int8_t compare(int32_t start,
   431          int32_t length,
   432          const UChar *srcChars,
   433          int32_t srcStart,
   434          int32_t srcLength) const;
   436   /**
   437    * Compare the characters bitwise in the range
   438    * [<TT>start</TT>, <TT>limit</TT>) with the characters
   439    * in <TT>srcText</TT> in the range
   440    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
   441    * @param start the offset at which the compare operation begins
   442    * @param limit the offset immediately following the compare operation
   443    * @param srcText the text to be compared
   444    * @param srcStart the offset into <TT>srcText</TT> to start comparison
   445    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
   446    * @return The result of bitwise character comparison: 0 if this
   447    * contains the same characters as <code>srcText</code>, -1 if the characters in
   448    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
   449    * characters in this are bitwise greater than the characters
   450    * in <code>srcText</code>.
   451    * @stable ICU 2.0
   452    */
   453   inline int8_t compareBetween(int32_t start,
   454             int32_t limit,
   455             const UnicodeString& srcText,
   456             int32_t srcStart,
   457             int32_t srcLimit) const;
   459   /**
   460    * Compare two Unicode strings in code point order.
   461    * The result may be different from the results of compare(), operator<, etc.
   462    * if supplementary characters are present:
   463    *
   464    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   465    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   466    * which means that they compare as less than some other BMP characters like U+feff.
   467    * This function compares Unicode strings in code point order.
   468    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   469    *
   470    * @param text Another string to compare this one to.
   471    * @return a negative/zero/positive integer corresponding to whether
   472    * this string is less than/equal to/greater than the second one
   473    * in code point order
   474    * @stable ICU 2.0
   475    */
   476   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
   478   /**
   479    * Compare two Unicode strings in code point order.
   480    * The result may be different from the results of compare(), operator<, etc.
   481    * if supplementary characters are present:
   482    *
   483    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   484    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   485    * which means that they compare as less than some other BMP characters like U+feff.
   486    * This function compares Unicode strings in code point order.
   487    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   488    *
   489    * @param start The start offset in this string at which the compare operation begins.
   490    * @param length The number of code units from this string to compare.
   491    * @param srcText Another string to compare this one to.
   492    * @return a negative/zero/positive integer corresponding to whether
   493    * this string is less than/equal to/greater than the second one
   494    * in code point order
   495    * @stable ICU 2.0
   496    */
   497   inline int8_t compareCodePointOrder(int32_t start,
   498                                       int32_t length,
   499                                       const UnicodeString& srcText) const;
   501   /**
   502    * Compare two Unicode strings in code point order.
   503    * The result may be different from the results of compare(), operator<, etc.
   504    * if supplementary characters are present:
   505    *
   506    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   507    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   508    * which means that they compare as less than some other BMP characters like U+feff.
   509    * This function compares Unicode strings in code point order.
   510    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   511    *
   512    * @param start The start offset in this string at which the compare operation begins.
   513    * @param length The number of code units from this string to compare.
   514    * @param srcText Another string to compare this one to.
   515    * @param srcStart The start offset in that string at which the compare operation begins.
   516    * @param srcLength The number of code units from that string to compare.
   517    * @return a negative/zero/positive integer corresponding to whether
   518    * this string is less than/equal to/greater than the second one
   519    * in code point order
   520    * @stable ICU 2.0
   521    */
   522    inline int8_t compareCodePointOrder(int32_t start,
   523                                        int32_t length,
   524                                        const UnicodeString& srcText,
   525                                        int32_t srcStart,
   526                                        int32_t srcLength) const;
   528   /**
   529    * Compare two Unicode strings in code point order.
   530    * The result may be different from the results of compare(), operator<, etc.
   531    * if supplementary characters are present:
   532    *
   533    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   534    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   535    * which means that they compare as less than some other BMP characters like U+feff.
   536    * This function compares Unicode strings in code point order.
   537    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   538    *
   539    * @param srcChars A pointer to another string to compare this one to.
   540    * @param srcLength The number of code units from that string to compare.
   541    * @return a negative/zero/positive integer corresponding to whether
   542    * this string is less than/equal to/greater than the second one
   543    * in code point order
   544    * @stable ICU 2.0
   545    */
   546   inline int8_t compareCodePointOrder(const UChar *srcChars,
   547                                       int32_t srcLength) const;
   549   /**
   550    * Compare two Unicode strings in code point order.
   551    * The result may be different from the results of compare(), operator<, etc.
   552    * if supplementary characters are present:
   553    *
   554    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   555    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   556    * which means that they compare as less than some other BMP characters like U+feff.
   557    * This function compares Unicode strings in code point order.
   558    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   559    *
   560    * @param start The start offset in this string at which the compare operation begins.
   561    * @param length The number of code units from this string to compare.
   562    * @param srcChars A pointer to another string to compare this one to.
   563    * @return a negative/zero/positive integer corresponding to whether
   564    * this string is less than/equal to/greater than the second one
   565    * in code point order
   566    * @stable ICU 2.0
   567    */
   568   inline int8_t compareCodePointOrder(int32_t start,
   569                                       int32_t length,
   570                                       const UChar *srcChars) const;
   572   /**
   573    * Compare two Unicode strings in code point order.
   574    * The result may be different from the results of compare(), operator<, etc.
   575    * if supplementary characters are present:
   576    *
   577    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   578    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   579    * which means that they compare as less than some other BMP characters like U+feff.
   580    * This function compares Unicode strings in code point order.
   581    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   582    *
   583    * @param start The start offset in this string at which the compare operation begins.
   584    * @param length The number of code units from this string to compare.
   585    * @param srcChars A pointer to another string to compare this one to.
   586    * @param srcStart The start offset in that string at which the compare operation begins.
   587    * @param srcLength The number of code units from that string to compare.
   588    * @return a negative/zero/positive integer corresponding to whether
   589    * this string is less than/equal to/greater than the second one
   590    * in code point order
   591    * @stable ICU 2.0
   592    */
   593   inline int8_t compareCodePointOrder(int32_t start,
   594                                       int32_t length,
   595                                       const UChar *srcChars,
   596                                       int32_t srcStart,
   597                                       int32_t srcLength) const;
   599   /**
   600    * Compare two Unicode strings in code point order.
   601    * The result may be different from the results of compare(), operator<, etc.
   602    * if supplementary characters are present:
   603    *
   604    * In UTF-16, supplementary characters (with code points U+10000 and above) are
   605    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
   606    * which means that they compare as less than some other BMP characters like U+feff.
   607    * This function compares Unicode strings in code point order.
   608    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
   609    *
   610    * @param start The start offset in this string at which the compare operation begins.
   611    * @param limit The offset after the last code unit from this string to compare.
   612    * @param srcText Another string to compare this one to.
   613    * @param srcStart The start offset in that string at which the compare operation begins.
   614    * @param srcLimit The offset after the last code unit from that string to compare.
   615    * @return a negative/zero/positive integer corresponding to whether
   616    * this string is less than/equal to/greater than the second one
   617    * in code point order
   618    * @stable ICU 2.0
   619    */
   620   inline int8_t compareCodePointOrderBetween(int32_t start,
   621                                              int32_t limit,
   622                                              const UnicodeString& srcText,
   623                                              int32_t srcStart,
   624                                              int32_t srcLimit) const;
   626   /**
   627    * Compare two strings case-insensitively using full case folding.
   628    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
   629    *
   630    * @param text Another string to compare this one to.
   631    * @param options A bit set of options:
   632    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   633    *     Comparison in code unit order with default case folding.
   634    *
   635    *   - U_COMPARE_CODE_POINT_ORDER
   636    *     Set to choose code point order instead of code unit order
   637    *     (see u_strCompare for details).
   638    *
   639    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   640    *
   641    * @return A negative, zero, or positive integer indicating the comparison result.
   642    * @stable ICU 2.0
   643    */
   644   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
   646   /**
   647    * Compare two strings case-insensitively using full case folding.
   648    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   649    *
   650    * @param start The start offset in this string at which the compare operation begins.
   651    * @param length The number of code units from this string to compare.
   652    * @param srcText Another string to compare this one to.
   653    * @param options A bit set of options:
   654    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   655    *     Comparison in code unit order with default case folding.
   656    *
   657    *   - U_COMPARE_CODE_POINT_ORDER
   658    *     Set to choose code point order instead of code unit order
   659    *     (see u_strCompare for details).
   660    *
   661    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   662    *
   663    * @return A negative, zero, or positive integer indicating the comparison result.
   664    * @stable ICU 2.0
   665    */
   666   inline int8_t caseCompare(int32_t start,
   667          int32_t length,
   668          const UnicodeString& srcText,
   669          uint32_t options) const;
   671   /**
   672    * Compare two strings case-insensitively using full case folding.
   673    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
   674    *
   675    * @param start The start offset in this string at which the compare operation begins.
   676    * @param length The number of code units from this string to compare.
   677    * @param srcText Another string to compare this one to.
   678    * @param srcStart The start offset in that string at which the compare operation begins.
   679    * @param srcLength The number of code units from that string to compare.
   680    * @param options A bit set of options:
   681    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   682    *     Comparison in code unit order with default case folding.
   683    *
   684    *   - U_COMPARE_CODE_POINT_ORDER
   685    *     Set to choose code point order instead of code unit order
   686    *     (see u_strCompare for details).
   687    *
   688    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   689    *
   690    * @return A negative, zero, or positive integer indicating the comparison result.
   691    * @stable ICU 2.0
   692    */
   693   inline int8_t caseCompare(int32_t start,
   694          int32_t length,
   695          const UnicodeString& srcText,
   696          int32_t srcStart,
   697          int32_t srcLength,
   698          uint32_t options) const;
   700   /**
   701    * Compare two strings case-insensitively using full case folding.
   702    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   703    *
   704    * @param srcChars A pointer to another string to compare this one to.
   705    * @param srcLength The number of code units from that string to compare.
   706    * @param options A bit set of options:
   707    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   708    *     Comparison in code unit order with default case folding.
   709    *
   710    *   - U_COMPARE_CODE_POINT_ORDER
   711    *     Set to choose code point order instead of code unit order
   712    *     (see u_strCompare for details).
   713    *
   714    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   715    *
   716    * @return A negative, zero, or positive integer indicating the comparison result.
   717    * @stable ICU 2.0
   718    */
   719   inline int8_t caseCompare(const UChar *srcChars,
   720          int32_t srcLength,
   721          uint32_t options) const;
   723   /**
   724    * Compare two strings case-insensitively using full case folding.
   725    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   726    *
   727    * @param start The start offset in this string at which the compare operation begins.
   728    * @param length The number of code units from this string to compare.
   729    * @param srcChars A pointer to another string to compare this one to.
   730    * @param options A bit set of options:
   731    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   732    *     Comparison in code unit order with default case folding.
   733    *
   734    *   - U_COMPARE_CODE_POINT_ORDER
   735    *     Set to choose code point order instead of code unit order
   736    *     (see u_strCompare for details).
   737    *
   738    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   739    *
   740    * @return A negative, zero, or positive integer indicating the comparison result.
   741    * @stable ICU 2.0
   742    */
   743   inline int8_t caseCompare(int32_t start,
   744          int32_t length,
   745          const UChar *srcChars,
   746          uint32_t options) const;
   748   /**
   749    * Compare two strings case-insensitively using full case folding.
   750    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
   751    *
   752    * @param start The start offset in this string at which the compare operation begins.
   753    * @param length The number of code units from this string to compare.
   754    * @param srcChars A pointer to another string to compare this one to.
   755    * @param srcStart The start offset in that string at which the compare operation begins.
   756    * @param srcLength The number of code units from that string to compare.
   757    * @param options A bit set of options:
   758    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   759    *     Comparison in code unit order with default case folding.
   760    *
   761    *   - U_COMPARE_CODE_POINT_ORDER
   762    *     Set to choose code point order instead of code unit order
   763    *     (see u_strCompare for details).
   764    *
   765    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   766    *
   767    * @return A negative, zero, or positive integer indicating the comparison result.
   768    * @stable ICU 2.0
   769    */
   770   inline int8_t caseCompare(int32_t start,
   771          int32_t length,
   772          const UChar *srcChars,
   773          int32_t srcStart,
   774          int32_t srcLength,
   775          uint32_t options) const;
   777   /**
   778    * Compare two strings case-insensitively using full case folding.
   779    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
   780    *
   781    * @param start The start offset in this string at which the compare operation begins.
   782    * @param limit The offset after the last code unit from this string to compare.
   783    * @param srcText Another string to compare this one to.
   784    * @param srcStart The start offset in that string at which the compare operation begins.
   785    * @param srcLimit The offset after the last code unit from that string to compare.
   786    * @param options A bit set of options:
   787    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   788    *     Comparison in code unit order with default case folding.
   789    *
   790    *   - U_COMPARE_CODE_POINT_ORDER
   791    *     Set to choose code point order instead of code unit order
   792    *     (see u_strCompare for details).
   793    *
   794    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   795    *
   796    * @return A negative, zero, or positive integer indicating the comparison result.
   797    * @stable ICU 2.0
   798    */
   799   inline int8_t caseCompareBetween(int32_t start,
   800             int32_t limit,
   801             const UnicodeString& srcText,
   802             int32_t srcStart,
   803             int32_t srcLimit,
   804             uint32_t options) const;
   806   /**
   807    * Determine if this starts with the characters in <TT>text</TT>
   808    * @param text The text to match.
   809    * @return TRUE if this starts with the characters in <TT>text</TT>,
   810    * FALSE otherwise
   811    * @stable ICU 2.0
   812    */
   813   inline UBool startsWith(const UnicodeString& text) const;
   815   /**
   816    * Determine if this starts with the characters in <TT>srcText</TT>
   817    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   818    * @param srcText The text to match.
   819    * @param srcStart the offset into <TT>srcText</TT> to start matching
   820    * @param srcLength the number of characters in <TT>srcText</TT> to match
   821    * @return TRUE if this starts with the characters in <TT>text</TT>,
   822    * FALSE otherwise
   823    * @stable ICU 2.0
   824    */
   825   inline UBool startsWith(const UnicodeString& srcText,
   826             int32_t srcStart,
   827             int32_t srcLength) const;
   829   /**
   830    * Determine if this starts with the characters in <TT>srcChars</TT>
   831    * @param srcChars The characters to match.
   832    * @param srcLength the number of characters in <TT>srcChars</TT>
   833    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
   834    * FALSE otherwise
   835    * @stable ICU 2.0
   836    */
   837   inline UBool startsWith(const UChar *srcChars,
   838             int32_t srcLength) const;
   840   /**
   841    * Determine if this ends with the characters in <TT>srcChars</TT>
   842    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   843    * @param srcChars The characters to match.
   844    * @param srcStart the offset into <TT>srcText</TT> to start matching
   845    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   846    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
   847    * @stable ICU 2.0
   848    */
   849   inline UBool startsWith(const UChar *srcChars,
   850             int32_t srcStart,
   851             int32_t srcLength) const;
   853   /**
   854    * Determine if this ends with the characters in <TT>text</TT>
   855    * @param text The text to match.
   856    * @return TRUE if this ends with the characters in <TT>text</TT>,
   857    * FALSE otherwise
   858    * @stable ICU 2.0
   859    */
   860   inline UBool endsWith(const UnicodeString& text) const;
   862   /**
   863    * Determine if this ends with the characters in <TT>srcText</TT>
   864    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   865    * @param srcText The text to match.
   866    * @param srcStart the offset into <TT>srcText</TT> to start matching
   867    * @param srcLength the number of characters in <TT>srcText</TT> to match
   868    * @return TRUE if this ends with the characters in <TT>text</TT>,
   869    * FALSE otherwise
   870    * @stable ICU 2.0
   871    */
   872   inline UBool endsWith(const UnicodeString& srcText,
   873           int32_t srcStart,
   874           int32_t srcLength) const;
   876   /**
   877    * Determine if this ends with the characters in <TT>srcChars</TT>
   878    * @param srcChars The characters to match.
   879    * @param srcLength the number of characters in <TT>srcChars</TT>
   880    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   881    * FALSE otherwise
   882    * @stable ICU 2.0
   883    */
   884   inline UBool endsWith(const UChar *srcChars,
   885           int32_t srcLength) const;
   887   /**
   888    * Determine if this ends with the characters in <TT>srcChars</TT>
   889    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   890    * @param srcChars The characters to match.
   891    * @param srcStart the offset into <TT>srcText</TT> to start matching
   892    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   893    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
   894    * FALSE otherwise
   895    * @stable ICU 2.0
   896    */
   897   inline UBool endsWith(const UChar *srcChars,
   898           int32_t srcStart,
   899           int32_t srcLength) const;
   902   /* Searching - bitwise only */
   904   /**
   905    * Locate in this the first occurrence of the characters in <TT>text</TT>,
   906    * using bitwise comparison.
   907    * @param text The text to search for.
   908    * @return The offset into this of the start of <TT>text</TT>,
   909    * or -1 if not found.
   910    * @stable ICU 2.0
   911    */
   912   inline int32_t indexOf(const UnicodeString& text) const;
   914   /**
   915    * Locate in this the first occurrence of the characters in <TT>text</TT>
   916    * starting at offset <TT>start</TT>, using bitwise comparison.
   917    * @param text The text to search for.
   918    * @param start The offset at which searching will start.
   919    * @return The offset into this of the start of <TT>text</TT>,
   920    * or -1 if not found.
   921    * @stable ICU 2.0
   922    */
   923   inline int32_t indexOf(const UnicodeString& text,
   924               int32_t start) const;
   926   /**
   927    * Locate in this the first occurrence in the range
   928    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   929    * in <TT>text</TT>, using bitwise comparison.
   930    * @param text The text to search for.
   931    * @param start The offset at which searching will start.
   932    * @param length The number of characters to search
   933    * @return The offset into this of the start of <TT>text</TT>,
   934    * or -1 if not found.
   935    * @stable ICU 2.0
   936    */
   937   inline int32_t indexOf(const UnicodeString& text,
   938               int32_t start,
   939               int32_t length) const;
   941   /**
   942    * Locate in this the first occurrence in the range
   943    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   944    *  in <TT>srcText</TT> in the range
   945    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   946    * using bitwise comparison.
   947    * @param srcText The text to search for.
   948    * @param srcStart the offset into <TT>srcText</TT> at which
   949    * to start matching
   950    * @param srcLength the number of characters in <TT>srcText</TT> to match
   951    * @param start the offset into this at which to start matching
   952    * @param length the number of characters in this to search
   953    * @return The offset into this of the start of <TT>text</TT>,
   954    * or -1 if not found.
   955    * @stable ICU 2.0
   956    */
   957   inline int32_t indexOf(const UnicodeString& srcText,
   958               int32_t srcStart,
   959               int32_t srcLength,
   960               int32_t start,
   961               int32_t length) const;
   963   /**
   964    * Locate in this the first occurrence of the characters in
   965    * <TT>srcChars</TT>
   966    * starting at offset <TT>start</TT>, using bitwise comparison.
   967    * @param srcChars The text to search for.
   968    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   969    * @param start the offset into this at which to start matching
   970    * @return The offset into this of the start of <TT>text</TT>,
   971    * or -1 if not found.
   972    * @stable ICU 2.0
   973    */
   974   inline int32_t indexOf(const UChar *srcChars,
   975               int32_t srcLength,
   976               int32_t start) const;
   978   /**
   979    * Locate in this the first occurrence in the range
   980    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   981    * in <TT>srcChars</TT>, using bitwise comparison.
   982    * @param srcChars The text to search for.
   983    * @param srcLength the number of characters in <TT>srcChars</TT>
   984    * @param start The offset at which searching will start.
   985    * @param length The number of characters to search
   986    * @return The offset into this of the start of <TT>srcChars</TT>,
   987    * or -1 if not found.
   988    * @stable ICU 2.0
   989    */
   990   inline int32_t indexOf(const UChar *srcChars,
   991               int32_t srcLength,
   992               int32_t start,
   993               int32_t length) const;
   995   /**
   996    * Locate in this the first occurrence in the range
   997    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   998    * in <TT>srcChars</TT> in the range
   999    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1000    * using bitwise comparison.
  1001    * @param srcChars The text to search for.
  1002    * @param srcStart the offset into <TT>srcChars</TT> at which
  1003    * to start matching
  1004    * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1005    * @param start the offset into this at which to start matching
  1006    * @param length the number of characters in this to search
  1007    * @return The offset into this of the start of <TT>text</TT>,
  1008    * or -1 if not found.
  1009    * @stable ICU 2.0
  1010    */
  1011   int32_t indexOf(const UChar *srcChars,
  1012               int32_t srcStart,
  1013               int32_t srcLength,
  1014               int32_t start,
  1015               int32_t length) const;
  1017   /**
  1018    * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1019    * using bitwise comparison.
  1020    * @param c The code unit to search for.
  1021    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1022    * @stable ICU 2.0
  1023    */
  1024   inline int32_t indexOf(UChar c) const;
  1026   /**
  1027    * Locate in this the first occurrence of the code point <TT>c</TT>,
  1028    * using bitwise comparison.
  1030    * @param c The code point to search for.
  1031    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1032    * @stable ICU 2.0
  1033    */
  1034   inline int32_t indexOf(UChar32 c) const;
  1036   /**
  1037    * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1038    * starting at offset <TT>start</TT>, using bitwise comparison.
  1039    * @param c The code unit to search for.
  1040    * @param start The offset at which searching will start.
  1041    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1042    * @stable ICU 2.0
  1043    */
  1044   inline int32_t indexOf(UChar c,
  1045               int32_t start) const;
  1047   /**
  1048    * Locate in this the first occurrence of the code point <TT>c</TT>
  1049    * starting at offset <TT>start</TT>, using bitwise comparison.
  1051    * @param c The code point to search for.
  1052    * @param start The offset at which searching will start.
  1053    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1054    * @stable ICU 2.0
  1055    */
  1056   inline int32_t indexOf(UChar32 c,
  1057               int32_t start) const;
  1059   /**
  1060    * Locate in this the first occurrence of the BMP code point <code>c</code>
  1061    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1062    * using bitwise comparison.
  1063    * @param c The code unit to search for.
  1064    * @param start the offset into this at which to start matching
  1065    * @param length the number of characters in this to search
  1066    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1067    * @stable ICU 2.0
  1068    */
  1069   inline int32_t indexOf(UChar c,
  1070               int32_t start,
  1071               int32_t length) const;
  1073   /**
  1074    * Locate in this the first occurrence of the code point <TT>c</TT>
  1075    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1076    * using bitwise comparison.
  1078    * @param c The code point to search for.
  1079    * @param start the offset into this at which to start matching
  1080    * @param length the number of characters in this to search
  1081    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1082    * @stable ICU 2.0
  1083    */
  1084   inline int32_t indexOf(UChar32 c,
  1085               int32_t start,
  1086               int32_t length) const;
  1088   /**
  1089    * Locate in this the last occurrence of the characters in <TT>text</TT>,
  1090    * using bitwise comparison.
  1091    * @param text The text to search for.
  1092    * @return The offset into this of the start of <TT>text</TT>,
  1093    * or -1 if not found.
  1094    * @stable ICU 2.0
  1095    */
  1096   inline int32_t lastIndexOf(const UnicodeString& text) const;
  1098   /**
  1099    * Locate in this the last occurrence of the characters in <TT>text</TT>
  1100    * starting at offset <TT>start</TT>, using bitwise comparison.
  1101    * @param text The text to search for.
  1102    * @param start The offset at which searching will start.
  1103    * @return The offset into this of the start of <TT>text</TT>,
  1104    * or -1 if not found.
  1105    * @stable ICU 2.0
  1106    */
  1107   inline int32_t lastIndexOf(const UnicodeString& text,
  1108               int32_t start) const;
  1110   /**
  1111    * Locate in this the last occurrence in the range
  1112    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1113    * in <TT>text</TT>, using bitwise comparison.
  1114    * @param text The text to search for.
  1115    * @param start The offset at which searching will start.
  1116    * @param length The number of characters to search
  1117    * @return The offset into this of the start of <TT>text</TT>,
  1118    * or -1 if not found.
  1119    * @stable ICU 2.0
  1120    */
  1121   inline int32_t lastIndexOf(const UnicodeString& text,
  1122               int32_t start,
  1123               int32_t length) const;
  1125   /**
  1126    * Locate in this the last occurrence in the range
  1127    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1128    * in <TT>srcText</TT> in the range
  1129    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1130    * using bitwise comparison.
  1131    * @param srcText The text to search for.
  1132    * @param srcStart the offset into <TT>srcText</TT> at which
  1133    * to start matching
  1134    * @param srcLength the number of characters in <TT>srcText</TT> to match
  1135    * @param start the offset into this at which to start matching
  1136    * @param length the number of characters in this to search
  1137    * @return The offset into this of the start of <TT>text</TT>,
  1138    * or -1 if not found.
  1139    * @stable ICU 2.0
  1140    */
  1141   inline int32_t lastIndexOf(const UnicodeString& srcText,
  1142               int32_t srcStart,
  1143               int32_t srcLength,
  1144               int32_t start,
  1145               int32_t length) const;
  1147   /**
  1148    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
  1149    * starting at offset <TT>start</TT>, using bitwise comparison.
  1150    * @param srcChars The text to search for.
  1151    * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1152    * @param start the offset into this at which to start matching
  1153    * @return The offset into this of the start of <TT>text</TT>,
  1154    * or -1 if not found.
  1155    * @stable ICU 2.0
  1156    */
  1157   inline int32_t lastIndexOf(const UChar *srcChars,
  1158               int32_t srcLength,
  1159               int32_t start) const;
  1161   /**
  1162    * Locate in this the last occurrence in the range
  1163    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1164    * in <TT>srcChars</TT>, using bitwise comparison.
  1165    * @param srcChars The text to search for.
  1166    * @param srcLength the number of characters in <TT>srcChars</TT>
  1167    * @param start The offset at which searching will start.
  1168    * @param length The number of characters to search
  1169    * @return The offset into this of the start of <TT>srcChars</TT>,
  1170    * or -1 if not found.
  1171    * @stable ICU 2.0
  1172    */
  1173   inline int32_t lastIndexOf(const UChar *srcChars,
  1174               int32_t srcLength,
  1175               int32_t start,
  1176               int32_t length) const;
  1178   /**
  1179    * Locate in this the last occurrence in the range
  1180    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1181    * in <TT>srcChars</TT> in the range
  1182    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1183    * using bitwise comparison.
  1184    * @param srcChars The text to search for.
  1185    * @param srcStart the offset into <TT>srcChars</TT> at which
  1186    * to start matching
  1187    * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1188    * @param start the offset into this at which to start matching
  1189    * @param length the number of characters in this to search
  1190    * @return The offset into this of the start of <TT>text</TT>,
  1191    * or -1 if not found.
  1192    * @stable ICU 2.0
  1193    */
  1194   int32_t lastIndexOf(const UChar *srcChars,
  1195               int32_t srcStart,
  1196               int32_t srcLength,
  1197               int32_t start,
  1198               int32_t length) const;
  1200   /**
  1201    * Locate in this the last occurrence of the BMP code point <code>c</code>,
  1202    * using bitwise comparison.
  1203    * @param c The code unit to search for.
  1204    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1205    * @stable ICU 2.0
  1206    */
  1207   inline int32_t lastIndexOf(UChar c) const;
  1209   /**
  1210    * Locate in this the last occurrence of the code point <TT>c</TT>,
  1211    * using bitwise comparison.
  1213    * @param c The code point to search for.
  1214    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1215    * @stable ICU 2.0
  1216    */
  1217   inline int32_t lastIndexOf(UChar32 c) const;
  1219   /**
  1220    * Locate in this the last occurrence of the BMP code point <code>c</code>
  1221    * starting at offset <TT>start</TT>, using bitwise comparison.
  1222    * @param c The code unit to search for.
  1223    * @param start The offset at which searching will start.
  1224    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1225    * @stable ICU 2.0
  1226    */
  1227   inline int32_t lastIndexOf(UChar c,
  1228               int32_t start) const;
  1230   /**
  1231    * Locate in this the last occurrence of the code point <TT>c</TT>
  1232    * starting at offset <TT>start</TT>, using bitwise comparison.
  1234    * @param c The code point to search for.
  1235    * @param start The offset at which searching will start.
  1236    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1237    * @stable ICU 2.0
  1238    */
  1239   inline int32_t lastIndexOf(UChar32 c,
  1240               int32_t start) const;
  1242   /**
  1243    * Locate in this the last occurrence of the BMP code point <code>c</code>
  1244    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1245    * using bitwise comparison.
  1246    * @param c The code unit to search for.
  1247    * @param start the offset into this at which to start matching
  1248    * @param length the number of characters in this to search
  1249    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1250    * @stable ICU 2.0
  1251    */
  1252   inline int32_t lastIndexOf(UChar c,
  1253               int32_t start,
  1254               int32_t length) const;
  1256   /**
  1257    * Locate in this the last occurrence of the code point <TT>c</TT>
  1258    * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1259    * using bitwise comparison.
  1261    * @param c The code point to search for.
  1262    * @param start the offset into this at which to start matching
  1263    * @param length the number of characters in this to search
  1264    * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1265    * @stable ICU 2.0
  1266    */
  1267   inline int32_t lastIndexOf(UChar32 c,
  1268               int32_t start,
  1269               int32_t length) const;
  1272   /* Character access */
  1274   /**
  1275    * Return the code unit at offset <tt>offset</tt>.
  1276    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1277    * @param offset a valid offset into the text
  1278    * @return the code unit at offset <tt>offset</tt>
  1279    *         or 0xffff if the offset is not valid for this string
  1280    * @stable ICU 2.0
  1281    */
  1282   inline UChar charAt(int32_t offset) const;
  1284   /**
  1285    * Return the code unit at offset <tt>offset</tt>.
  1286    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1287    * @param offset a valid offset into the text
  1288    * @return the code unit at offset <tt>offset</tt>
  1289    * @stable ICU 2.0
  1290    */
  1291   inline UChar operator[] (int32_t offset) const;
  1293   /**
  1294    * Return the code point that contains the code unit
  1295    * at offset <tt>offset</tt>.
  1296    * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1297    * @param offset a valid offset into the text
  1298    * that indicates the text offset of any of the code units
  1299    * that will be assembled into a code point (21-bit value) and returned
  1300    * @return the code point of text at <tt>offset</tt>
  1301    *         or 0xffff if the offset is not valid for this string
  1302    * @stable ICU 2.0
  1303    */
  1304   UChar32 char32At(int32_t offset) const;
  1306   /**
  1307    * Adjust a random-access offset so that
  1308    * it points to the beginning of a Unicode character.
  1309    * The offset that is passed in points to
  1310    * any code unit of a code point,
  1311    * while the returned offset will point to the first code unit
  1312    * of the same code point.
  1313    * In UTF-16, if the input offset points to a second surrogate
  1314    * of a surrogate pair, then the returned offset will point
  1315    * to the first surrogate.
  1316    * @param offset a valid offset into one code point of the text
  1317    * @return offset of the first code unit of the same code point
  1318    * @see U16_SET_CP_START
  1319    * @stable ICU 2.0
  1320    */
  1321   int32_t getChar32Start(int32_t offset) const;
  1323   /**
  1324    * Adjust a random-access offset so that
  1325    * it points behind a Unicode character.
  1326    * The offset that is passed in points behind
  1327    * any code unit of a code point,
  1328    * while the returned offset will point behind the last code unit
  1329    * of the same code point.
  1330    * In UTF-16, if the input offset points behind the first surrogate
  1331    * (i.e., to the second surrogate)
  1332    * of a surrogate pair, then the returned offset will point
  1333    * behind the second surrogate (i.e., to the first surrogate).
  1334    * @param offset a valid offset after any code unit of a code point of the text
  1335    * @return offset of the first code unit after the same code point
  1336    * @see U16_SET_CP_LIMIT
  1337    * @stable ICU 2.0
  1338    */
  1339   int32_t getChar32Limit(int32_t offset) const;
  1341   /**
  1342    * Move the code unit index along the string by delta code points.
  1343    * Interpret the input index as a code unit-based offset into the string,
  1344    * move the index forward or backward by delta code points, and
  1345    * return the resulting index.
  1346    * The input index should point to the first code unit of a code point,
  1347    * if there is more than one.
  1349    * Both input and output indexes are code unit-based as for all
  1350    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1351    * If delta<0 then the index is moved backward (toward the start of the string).
  1352    * If delta>0 then the index is moved forward (toward the end of the string).
  1354    * This behaves like CharacterIterator::move32(delta, kCurrent).
  1356    * Behavior for out-of-bounds indexes:
  1357    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
  1358    * if the input index<0 then it is pinned to 0;
  1359    * if it is index>length() then it is pinned to length().
  1360    * Afterwards, the index is moved by <code>delta</code> code points
  1361    * forward or backward,
  1362    * but no further backward than to 0 and no further forward than to length().
  1363    * The resulting index return value will be in between 0 and length(), inclusively.
  1365    * Examples:
  1366    * <pre>
  1367    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1368    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
  1370    * // initial index: position of U+10000
  1371    * int32_t index=1;
  1373    * // the following examples will all result in index==4, position of U+10ffff
  1375    * // skip 2 code points from some position in the string
  1376    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1378    * // go to the 3rd code point from the start of s (0-based)
  1379    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1381    * // go to the next-to-last code point of s
  1382    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1383    * </pre>
  1385    * @param index input code unit index
  1386    * @param delta (signed) code point count to move the index forward or backward
  1387    *        in the string
  1388    * @return the resulting code unit index
  1389    * @stable ICU 2.0
  1390    */
  1391   int32_t moveIndex32(int32_t index, int32_t delta) const;
  1393   /* Substring extraction */
  1395   /**
  1396    * Copy the characters in the range
  1397    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
  1398    * beginning at <tt>dstStart</tt>.
  1399    * If the string aliases to <code>dst</code> itself as an external buffer,
  1400    * then extract() will not copy the contents.
  1402    * @param start offset of first character which will be copied into the array
  1403    * @param length the number of characters to extract
  1404    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1405    * must be at least (<tt>dstStart + length</tt>).
  1406    * @param dstStart the offset in <TT>dst</TT> where the first character
  1407    * will be extracted
  1408    * @stable ICU 2.0
  1409    */
  1410   inline void extract(int32_t start,
  1411            int32_t length,
  1412            UChar *dst,
  1413            int32_t dstStart = 0) const;
  1415   /**
  1416    * Copy the contents of the string into dest.
  1417    * This is a convenience function that
  1418    * checks if there is enough space in dest,
  1419    * extracts the entire string if possible,
  1420    * and NUL-terminates dest if possible.
  1422    * If the string fits into dest but cannot be NUL-terminated
  1423    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1424    * If the string itself does not fit into dest
  1425    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1427    * If the string aliases to <code>dest</code> itself as an external buffer,
  1428    * then extract() will not copy the contents.
  1430    * @param dest Destination string buffer.
  1431    * @param destCapacity Number of UChars available at dest.
  1432    * @param errorCode ICU error code.
  1433    * @return length()
  1434    * @stable ICU 2.0
  1435    */
  1436   int32_t
  1437   extract(UChar *dest, int32_t destCapacity,
  1438           UErrorCode &errorCode) const;
  1440   /**
  1441    * Copy the characters in the range
  1442    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
  1443    * <tt>target</tt>.
  1444    * @param start offset of first character which will be copied
  1445    * @param length the number of characters to extract
  1446    * @param target UnicodeString into which to copy characters.
  1447    * @return A reference to <TT>target</TT>
  1448    * @stable ICU 2.0
  1449    */
  1450   inline void extract(int32_t start,
  1451            int32_t length,
  1452            UnicodeString& target) const;
  1454   /**
  1455    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1456    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
  1457    * @param start offset of first character which will be copied into the array
  1458    * @param limit offset immediately following the last character to be copied
  1459    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
  1460    * must be at least (<tt>dstStart + (limit - start)</tt>).
  1461    * @param dstStart the offset in <TT>dst</TT> where the first character
  1462    * will be extracted
  1463    * @stable ICU 2.0
  1464    */
  1465   inline void extractBetween(int32_t start,
  1466               int32_t limit,
  1467               UChar *dst,
  1468               int32_t dstStart = 0) const;
  1470   /**
  1471    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1472    * into the UnicodeString <tt>target</tt>.  Replaceable API.
  1473    * @param start offset of first character which will be copied
  1474    * @param limit offset immediately following the last character to be copied
  1475    * @param target UnicodeString into which to copy characters.
  1476    * @return A reference to <TT>target</TT>
  1477    * @stable ICU 2.0
  1478    */
  1479   virtual void extractBetween(int32_t start,
  1480               int32_t limit,
  1481               UnicodeString& target) const;
  1483   /**
  1484    * Copy the characters in the range 
  1485    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
  1486    * All characters must be invariant (see utypes.h).
  1487    * Use US_INV as the last, signature-distinguishing parameter.
  1489    * This function does not write any more than <code>targetLength</code>
  1490    * characters but returns the length of the entire output string
  1491    * so that one can allocate a larger buffer and call the function again
  1492    * if necessary.
  1493    * The output string is NUL-terminated if possible.
  1495    * @param start offset of first character which will be copied
  1496    * @param startLength the number of characters to extract
  1497    * @param target the target buffer for extraction, can be NULL
  1498    *               if targetLength is 0
  1499    * @param targetCapacity the length of the target buffer
  1500    * @param inv Signature-distinguishing paramater, use US_INV.
  1501    * @return the output string length, not including the terminating NUL
  1502    * @stable ICU 3.2
  1503    */
  1504   int32_t extract(int32_t start,
  1505            int32_t startLength,
  1506            char *target,
  1507            int32_t targetCapacity,
  1508            enum EInvariant inv) const;
  1510 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1512   /**
  1513    * Copy the characters in the range
  1514    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1515    * in the platform's default codepage.
  1516    * This function does not write any more than <code>targetLength</code>
  1517    * characters but returns the length of the entire output string
  1518    * so that one can allocate a larger buffer and call the function again
  1519    * if necessary.
  1520    * The output string is NUL-terminated if possible.
  1522    * @param start offset of first character which will be copied
  1523    * @param startLength the number of characters to extract
  1524    * @param target the target buffer for extraction
  1525    * @param targetLength the length of the target buffer
  1526    * If <TT>target</TT> is NULL, then the number of bytes required for
  1527    * <TT>target</TT> is returned.
  1528    * @return the output string length, not including the terminating NUL
  1529    * @stable ICU 2.0
  1530    */
  1531   int32_t extract(int32_t start,
  1532            int32_t startLength,
  1533            char *target,
  1534            uint32_t targetLength) const;
  1536 #endif
  1538 #if !UCONFIG_NO_CONVERSION
  1540   /**
  1541    * Copy the characters in the range
  1542    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1543    * in a specified codepage.
  1544    * The output string is NUL-terminated.
  1546    * Recommendation: For invariant-character strings use
  1547    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1548    * because it avoids object code dependencies of UnicodeString on
  1549    * the conversion code.
  1551    * @param start offset of first character which will be copied
  1552    * @param startLength the number of characters to extract
  1553    * @param target the target buffer for extraction
  1554    * @param codepage the desired codepage for the characters.  0 has
  1555    * the special meaning of the default codepage
  1556    * If <code>codepage</code> is an empty string (<code>""</code>),
  1557    * then a simple conversion is performed on the codepage-invariant
  1558    * subset ("invariant characters") of the platform encoding. See utypes.h.
  1559    * If <TT>target</TT> is NULL, then the number of bytes required for
  1560    * <TT>target</TT> is returned. It is assumed that the target is big enough
  1561    * to fit all of the characters.
  1562    * @return the output string length, not including the terminating NUL
  1563    * @stable ICU 2.0
  1564    */
  1565   inline int32_t extract(int32_t start,
  1566                  int32_t startLength,
  1567                  char *target,
  1568                  const char *codepage = 0) const;
  1570   /**
  1571    * Copy the characters in the range
  1572    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1573    * in a specified codepage.
  1574    * This function does not write any more than <code>targetLength</code>
  1575    * characters but returns the length of the entire output string
  1576    * so that one can allocate a larger buffer and call the function again
  1577    * if necessary.
  1578    * The output string is NUL-terminated if possible.
  1580    * Recommendation: For invariant-character strings use
  1581    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1582    * because it avoids object code dependencies of UnicodeString on
  1583    * the conversion code.
  1585    * @param start offset of first character which will be copied
  1586    * @param startLength the number of characters to extract
  1587    * @param target the target buffer for extraction
  1588    * @param targetLength the length of the target buffer
  1589    * @param codepage the desired codepage for the characters.  0 has
  1590    * the special meaning of the default codepage
  1591    * If <code>codepage</code> is an empty string (<code>""</code>),
  1592    * then a simple conversion is performed on the codepage-invariant
  1593    * subset ("invariant characters") of the platform encoding. See utypes.h.
  1594    * If <TT>target</TT> is NULL, then the number of bytes required for
  1595    * <TT>target</TT> is returned.
  1596    * @return the output string length, not including the terminating NUL
  1597    * @stable ICU 2.0
  1598    */
  1599   int32_t extract(int32_t start,
  1600            int32_t startLength,
  1601            char *target,
  1602            uint32_t targetLength,
  1603            const char *codepage) const;
  1605   /**
  1606    * Convert the UnicodeString into a codepage string using an existing UConverter.
  1607    * The output string is NUL-terminated if possible.
  1609    * This function avoids the overhead of opening and closing a converter if
  1610    * multiple strings are extracted.
  1612    * @param dest destination string buffer, can be NULL if destCapacity==0
  1613    * @param destCapacity the number of chars available at dest
  1614    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1615    *        or NULL for the default converter
  1616    * @param errorCode normal ICU error code
  1617    * @return the length of the output string, not counting the terminating NUL;
  1618    *         if the length is greater than destCapacity, then the string will not fit
  1619    *         and a buffer of the indicated length would need to be passed in
  1620    * @stable ICU 2.0
  1621    */
  1622   int32_t extract(char *dest, int32_t destCapacity,
  1623                   UConverter *cnv,
  1624                   UErrorCode &errorCode) const;
  1626 #endif
  1628   /**
  1629    * Create a temporary substring for the specified range.
  1630    * Unlike the substring constructor and setTo() functions,
  1631    * the object returned here will be a read-only alias (using getBuffer())
  1632    * rather than copying the text.
  1633    * As a result, this substring operation is much faster but requires
  1634    * that the original string not be modified or deleted during the lifetime
  1635    * of the returned substring object.
  1636    * @param start offset of the first character visible in the substring
  1637    * @param length length of the substring
  1638    * @return a read-only alias UnicodeString object for the substring
  1639    * @stable ICU 4.4
  1640    */
  1641   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1643   /**
  1644    * Create a temporary substring for the specified range.
  1645    * Same as tempSubString(start, length) except that the substring range
  1646    * is specified as a (start, limit) pair (with an exclusive limit index)
  1647    * rather than a (start, length) pair.
  1648    * @param start offset of the first character visible in the substring
  1649    * @param limit offset immediately following the last character visible in the substring
  1650    * @return a read-only alias UnicodeString object for the substring
  1651    * @stable ICU 4.4
  1652    */
  1653   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1655   /**
  1656    * Convert the UnicodeString to UTF-8 and write the result
  1657    * to a ByteSink. This is called by toUTF8String().
  1658    * Unpaired surrogates are replaced with U+FFFD.
  1659    * Calls u_strToUTF8WithSub().
  1661    * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1662    *             sink.Flush() is called at the end.
  1663    * @stable ICU 4.2
  1664    * @see toUTF8String
  1665    */
  1666   void toUTF8(ByteSink &sink) const;
  1668 #if U_HAVE_STD_STRING
  1670   /**
  1671    * Convert the UnicodeString to UTF-8 and append the result
  1672    * to a standard string.
  1673    * Unpaired surrogates are replaced with U+FFFD.
  1674    * Calls toUTF8().
  1676    * @param result A standard string (or a compatible object)
  1677    *        to which the UTF-8 version of the string is appended.
  1678    * @return The string object.
  1679    * @stable ICU 4.2
  1680    * @see toUTF8
  1681    */
  1682   template<typename StringClass>
  1683   StringClass &toUTF8String(StringClass &result) const {
  1684     StringByteSink<StringClass> sbs(&result);
  1685     toUTF8(sbs);
  1686     return result;
  1689 #endif
  1691   /**
  1692    * Convert the UnicodeString to UTF-32.
  1693    * Unpaired surrogates are replaced with U+FFFD.
  1694    * Calls u_strToUTF32WithSub().
  1696    * @param utf32 destination string buffer, can be NULL if capacity==0
  1697    * @param capacity the number of UChar32s available at utf32
  1698    * @param errorCode Standard ICU error code. Its input value must
  1699    *                  pass the U_SUCCESS() test, or else the function returns
  1700    *                  immediately. Check for U_FAILURE() on output or use with
  1701    *                  function chaining. (See User Guide for details.)
  1702    * @return The length of the UTF-32 string.
  1703    * @see fromUTF32
  1704    * @stable ICU 4.2
  1705    */
  1706   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1708   /* Length operations */
  1710   /**
  1711    * Return the length of the UnicodeString object.
  1712    * The length is the number of UChar code units are in the UnicodeString.
  1713    * If you want the number of code points, please use countChar32().
  1714    * @return the length of the UnicodeString object
  1715    * @see countChar32
  1716    * @stable ICU 2.0
  1717    */
  1718   inline int32_t length(void) const;
  1720   /**
  1721    * Count Unicode code points in the length UChar code units of the string.
  1722    * A code point may occupy either one or two UChar code units.
  1723    * Counting code points involves reading all code units.
  1725    * This functions is basically the inverse of moveIndex32().
  1727    * @param start the index of the first code unit to check
  1728    * @param length the number of UChar code units to check
  1729    * @return the number of code points in the specified code units
  1730    * @see length
  1731    * @stable ICU 2.0
  1732    */
  1733   int32_t
  1734   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1736   /**
  1737    * Check if the length UChar code units of the string
  1738    * contain more Unicode code points than a certain number.
  1739    * This is more efficient than counting all code points in this part of the string
  1740    * and comparing that number with a threshold.
  1741    * This function may not need to scan the string at all if the length
  1742    * falls within a certain range, and
  1743    * never needs to count more than 'number+1' code points.
  1744    * Logically equivalent to (countChar32(start, length)>number).
  1745    * A Unicode code point may occupy either one or two UChar code units.
  1747    * @param start the index of the first code unit to check (0 for the entire string)
  1748    * @param length the number of UChar code units to check
  1749    *               (use INT32_MAX for the entire string; remember that start/length
  1750    *                values are pinned)
  1751    * @param number The number of code points in the (sub)string is compared against
  1752    *               the 'number' parameter.
  1753    * @return Boolean value for whether the string contains more Unicode code points
  1754    *         than 'number'. Same as (u_countChar32(s, length)>number).
  1755    * @see countChar32
  1756    * @see u_strHasMoreChar32Than
  1757    * @stable ICU 2.4
  1758    */
  1759   UBool
  1760   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1762   /**
  1763    * Determine if this string is empty.
  1764    * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1765    * @stable ICU 2.0
  1766    */
  1767   inline UBool isEmpty(void) const;
  1769   /**
  1770    * Return the capacity of the internal buffer of the UnicodeString object.
  1771    * This is useful together with the getBuffer functions.
  1772    * See there for details.
  1774    * @return the number of UChars available in the internal buffer
  1775    * @see getBuffer
  1776    * @stable ICU 2.0
  1777    */
  1778   inline int32_t getCapacity(void) const;
  1780   /* Other operations */
  1782   /**
  1783    * Generate a hash code for this object.
  1784    * @return The hash code of this UnicodeString.
  1785    * @stable ICU 2.0
  1786    */
  1787   inline int32_t hashCode(void) const;
  1789   /**
  1790    * Determine if this object contains a valid string.
  1791    * A bogus string has no value. It is different from an empty string,
  1792    * although in both cases isEmpty() returns TRUE and length() returns 0.
  1793    * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1794    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
  1795    * length() returns 0.
  1797    * @return TRUE if the string is bogus/invalid, FALSE otherwise
  1798    * @see setToBogus()
  1799    * @stable ICU 2.0
  1800    */
  1801   inline UBool isBogus(void) const;
  1804   //========================================
  1805   // Write operations
  1806   //========================================
  1808   /* Assignment operations */
  1810   /**
  1811    * Assignment operator.  Replace the characters in this UnicodeString
  1812    * with the characters from <TT>srcText</TT>.
  1813    * @param srcText The text containing the characters to replace
  1814    * @return a reference to this
  1815    * @stable ICU 2.0
  1816    */
  1817   UnicodeString &operator=(const UnicodeString &srcText);
  1819   /**
  1820    * Almost the same as the assignment operator.
  1821    * Replace the characters in this UnicodeString
  1822    * with the characters from <code>srcText</code>.
  1824    * This function works the same as the assignment operator
  1825    * for all strings except for ones that are readonly aliases.
  1827    * Starting with ICU 2.4, the assignment operator and the copy constructor
  1828    * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1829    * This function implements the old, more efficient but less safe behavior
  1830    * of making this string also a readonly alias to the same buffer.
  1832    * The fastCopyFrom function must be used only if it is known that the lifetime of
  1833    * this UnicodeString does not exceed the lifetime of the aliased buffer
  1834    * including its contents, for example for strings from resource bundles
  1835    * or aliases to string constants.
  1837    * @param src The text containing the characters to replace.
  1838    * @return a reference to this
  1839    * @stable ICU 2.4
  1840    */
  1841   UnicodeString &fastCopyFrom(const UnicodeString &src);
  1843   /**
  1844    * Assignment operator.  Replace the characters in this UnicodeString
  1845    * with the code unit <TT>ch</TT>.
  1846    * @param ch the code unit to replace
  1847    * @return a reference to this
  1848    * @stable ICU 2.0
  1849    */
  1850   inline UnicodeString& operator= (UChar ch);
  1852   /**
  1853    * Assignment operator.  Replace the characters in this UnicodeString
  1854    * with the code point <TT>ch</TT>.
  1855    * @param ch the code point to replace
  1856    * @return a reference to this
  1857    * @stable ICU 2.0
  1858    */
  1859   inline UnicodeString& operator= (UChar32 ch);
  1861   /**
  1862    * Set the text in the UnicodeString object to the characters
  1863    * in <TT>srcText</TT> in the range
  1864    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
  1865    * <TT>srcText</TT> is not modified.
  1866    * @param srcText the source for the new characters
  1867    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1868    * will be obtained
  1869    * @return a reference to this
  1870    * @stable ICU 2.2
  1871    */
  1872   inline UnicodeString& setTo(const UnicodeString& srcText,
  1873                int32_t srcStart);
  1875   /**
  1876    * Set the text in the UnicodeString object to the characters
  1877    * in <TT>srcText</TT> in the range
  1878    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1879    * <TT>srcText</TT> is not modified.
  1880    * @param srcText the source for the new characters
  1881    * @param srcStart the offset into <TT>srcText</TT> where new characters
  1882    * will be obtained
  1883    * @param srcLength the number of characters in <TT>srcText</TT> in the
  1884    * replace string.
  1885    * @return a reference to this
  1886    * @stable ICU 2.0
  1887    */
  1888   inline UnicodeString& setTo(const UnicodeString& srcText,
  1889                int32_t srcStart,
  1890                int32_t srcLength);
  1892   /**
  1893    * Set the text in the UnicodeString object to the characters in
  1894    * <TT>srcText</TT>.
  1895    * <TT>srcText</TT> is not modified.
  1896    * @param srcText the source for the new characters
  1897    * @return a reference to this
  1898    * @stable ICU 2.0
  1899    */
  1900   inline UnicodeString& setTo(const UnicodeString& srcText);
  1902   /**
  1903    * Set the characters in the UnicodeString object to the characters
  1904    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  1905    * @param srcChars the source for the new characters
  1906    * @param srcLength the number of Unicode characters in srcChars.
  1907    * @return a reference to this
  1908    * @stable ICU 2.0
  1909    */
  1910   inline UnicodeString& setTo(const UChar *srcChars,
  1911                int32_t srcLength);
  1913   /**
  1914    * Set the characters in the UnicodeString object to the code unit
  1915    * <TT>srcChar</TT>.
  1916    * @param srcChar the code unit which becomes the UnicodeString's character
  1917    * content
  1918    * @return a reference to this
  1919    * @stable ICU 2.0
  1920    */
  1921   UnicodeString& setTo(UChar srcChar);
  1923   /**
  1924    * Set the characters in the UnicodeString object to the code point
  1925    * <TT>srcChar</TT>.
  1926    * @param srcChar the code point which becomes the UnicodeString's character
  1927    * content
  1928    * @return a reference to this
  1929    * @stable ICU 2.0
  1930    */
  1931   UnicodeString& setTo(UChar32 srcChar);
  1933   /**
  1934    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
  1935    * The text will be used for the UnicodeString object, but
  1936    * it will not be released when the UnicodeString is destroyed.
  1937    * This has copy-on-write semantics:
  1938    * When the string is modified, then the buffer is first copied into
  1939    * newly allocated memory.
  1940    * The aliased buffer is never modified.
  1942    * In an assignment to another UnicodeString, when using the copy constructor
  1943    * or the assignment operator, the text will be copied.
  1944    * When using fastCopyFrom(), the text will be aliased again,
  1945    * so that both strings then alias the same readonly-text.
  1947    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1948    *                     This must be true if <code>textLength==-1</code>.
  1949    * @param text The characters to alias for the UnicodeString.
  1950    * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1951    *                   If -1, then this constructor will determine the length
  1952    *                   by calling <code>u_strlen()</code>.
  1953    * @return a reference to this
  1954    * @stable ICU 2.0
  1955    */
  1956   UnicodeString &setTo(UBool isTerminated,
  1957                        const UChar *text,
  1958                        int32_t textLength);
  1960   /**
  1961    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
  1962    * The text will be used for the UnicodeString object, but
  1963    * it will not be released when the UnicodeString is destroyed.
  1964    * This has write-through semantics:
  1965    * For as long as the capacity of the buffer is sufficient, write operations
  1966    * will directly affect the buffer. When more capacity is necessary, then
  1967    * a new buffer will be allocated and the contents copied as with regularly
  1968    * constructed strings.
  1969    * In an assignment to another UnicodeString, the buffer will be copied.
  1970    * The extract(UChar *dst) function detects whether the dst pointer is the same
  1971    * as the string buffer itself and will in this case not copy the contents.
  1973    * @param buffer The characters to alias for the UnicodeString.
  1974    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1975    * @param buffCapacity The size of <code>buffer</code> in UChars.
  1976    * @return a reference to this
  1977    * @stable ICU 2.0
  1978    */
  1979   UnicodeString &setTo(UChar *buffer,
  1980                        int32_t buffLength,
  1981                        int32_t buffCapacity);
  1983   /**
  1984    * Make this UnicodeString object invalid.
  1985    * The string will test TRUE with isBogus().
  1987    * A bogus string has no value. It is different from an empty string.
  1988    * It can be used to indicate that no string value is available.
  1989    * getBuffer() and getTerminatedBuffer() return NULL, and
  1990    * length() returns 0.
  1992    * This utility function is used throughout the UnicodeString
  1993    * implementation to indicate that a UnicodeString operation failed,
  1994    * and may be used in other functions,
  1995    * especially but not exclusively when such functions do not
  1996    * take a UErrorCode for simplicity.
  1998    * The following methods, and no others, will clear a string object's bogus flag:
  1999    * - remove()
  2000    * - remove(0, INT32_MAX)
  2001    * - truncate(0)
  2002    * - operator=() (assignment operator)
  2003    * - setTo(...)
  2005    * The simplest ways to turn a bogus string into an empty one
  2006    * is to use the remove() function.
  2007    * Examples for other functions that are equivalent to "set to empty string":
  2008    * \code
  2009    * if(s.isBogus()) {
  2010    *   s.remove();           // set to an empty string (remove all), or
  2011    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  2012    *   s.truncate(0);        // set to an empty string (complete truncation), or
  2013    *   s=UnicodeString();    // assign an empty string, or
  2014    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  2015    *   static const UChar nul=0;
  2016    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
  2017    * }
  2018    * \endcode
  2020    * @see isBogus()
  2021    * @stable ICU 2.0
  2022    */
  2023   void setToBogus();
  2025   /**
  2026    * Set the character at the specified offset to the specified character.
  2027    * @param offset A valid offset into the text of the character to set
  2028    * @param ch The new character
  2029    * @return A reference to this
  2030    * @stable ICU 2.0
  2031    */
  2032   UnicodeString& setCharAt(int32_t offset,
  2033                UChar ch);
  2036   /* Append operations */
  2038   /**
  2039    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
  2040    * object.
  2041    * @param ch the code unit to be appended
  2042    * @return a reference to this
  2043    * @stable ICU 2.0
  2044    */
  2045  inline  UnicodeString& operator+= (UChar ch);
  2047   /**
  2048    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
  2049    * object.
  2050    * @param ch the code point to be appended
  2051    * @return a reference to this
  2052    * @stable ICU 2.0
  2053    */
  2054  inline  UnicodeString& operator+= (UChar32 ch);
  2056   /**
  2057    * Append operator. Append the characters in <TT>srcText</TT> to the
  2058    * UnicodeString object. <TT>srcText</TT> is not modified.
  2059    * @param srcText the source for the new characters
  2060    * @return a reference to this
  2061    * @stable ICU 2.0
  2062    */
  2063   inline UnicodeString& operator+= (const UnicodeString& srcText);
  2065   /**
  2066    * Append the characters
  2067    * in <TT>srcText</TT> in the range
  2068    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
  2069    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
  2070    * is not modified.
  2071    * @param srcText the source for the new characters
  2072    * @param srcStart the offset into <TT>srcText</TT> where new characters
  2073    * will be obtained
  2074    * @param srcLength the number of characters in <TT>srcText</TT> in
  2075    * the append string
  2076    * @return a reference to this
  2077    * @stable ICU 2.0
  2078    */
  2079   inline UnicodeString& append(const UnicodeString& srcText,
  2080             int32_t srcStart,
  2081             int32_t srcLength);
  2083   /**
  2084    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
  2085    * <TT>srcText</TT> is not modified.
  2086    * @param srcText the source for the new characters
  2087    * @return a reference to this
  2088    * @stable ICU 2.0
  2089    */
  2090   inline UnicodeString& append(const UnicodeString& srcText);
  2092   /**
  2093    * Append the characters in <TT>srcChars</TT> in the range
  2094    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
  2095    * object at offset
  2096    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2097    * @param srcChars the source for the new characters
  2098    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2099    * will be obtained
  2100    * @param srcLength the number of characters in <TT>srcChars</TT> in
  2101    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
  2102    * @return a reference to this
  2103    * @stable ICU 2.0
  2104    */
  2105   inline UnicodeString& append(const UChar *srcChars,
  2106             int32_t srcStart,
  2107             int32_t srcLength);
  2109   /**
  2110    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
  2111    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2112    * @param srcChars the source for the new characters
  2113    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
  2114    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
  2115    * @return a reference to this
  2116    * @stable ICU 2.0
  2117    */
  2118   inline UnicodeString& append(const UChar *srcChars,
  2119             int32_t srcLength);
  2121   /**
  2122    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
  2123    * @param srcChar the code unit to append
  2124    * @return a reference to this
  2125    * @stable ICU 2.0
  2126    */
  2127   inline UnicodeString& append(UChar srcChar);
  2129   /**
  2130    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
  2131    * @param srcChar the code point to append
  2132    * @return a reference to this
  2133    * @stable ICU 2.0
  2134    */
  2135   UnicodeString& append(UChar32 srcChar);
  2138   /* Insert operations */
  2140   /**
  2141    * Insert the characters in <TT>srcText</TT> in the range
  2142    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2143    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2144    * @param start the offset where the insertion begins
  2145    * @param srcText the source for the new characters
  2146    * @param srcStart the offset into <TT>srcText</TT> where new characters
  2147    * will be obtained
  2148    * @param srcLength the number of characters in <TT>srcText</TT> in
  2149    * the insert string
  2150    * @return a reference to this
  2151    * @stable ICU 2.0
  2152    */
  2153   inline UnicodeString& insert(int32_t start,
  2154             const UnicodeString& srcText,
  2155             int32_t srcStart,
  2156             int32_t srcLength);
  2158   /**
  2159    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
  2160    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2161    * @param start the offset where the insertion begins
  2162    * @param srcText the source for the new characters
  2163    * @return a reference to this
  2164    * @stable ICU 2.0
  2165    */
  2166   inline UnicodeString& insert(int32_t start,
  2167             const UnicodeString& srcText);
  2169   /**
  2170    * Insert the characters in <TT>srcChars</TT> in the range
  2171    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2172    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2173    * @param start the offset at which the insertion begins
  2174    * @param srcChars the source for the new characters
  2175    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2176    * will be obtained
  2177    * @param srcLength the number of characters in <TT>srcChars</TT>
  2178    * in the insert string
  2179    * @return a reference to this
  2180    * @stable ICU 2.0
  2181    */
  2182   inline UnicodeString& insert(int32_t start,
  2183             const UChar *srcChars,
  2184             int32_t srcStart,
  2185             int32_t srcLength);
  2187   /**
  2188    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
  2189    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2190    * @param start the offset where the insertion begins
  2191    * @param srcChars the source for the new characters
  2192    * @param srcLength the number of Unicode characters in srcChars.
  2193    * @return a reference to this
  2194    * @stable ICU 2.0
  2195    */
  2196   inline UnicodeString& insert(int32_t start,
  2197             const UChar *srcChars,
  2198             int32_t srcLength);
  2200   /**
  2201    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
  2202    * offset <TT>start</TT>.
  2203    * @param start the offset at which the insertion occurs
  2204    * @param srcChar the code unit to insert
  2205    * @return a reference to this
  2206    * @stable ICU 2.0
  2207    */
  2208   inline UnicodeString& insert(int32_t start,
  2209             UChar srcChar);
  2211   /**
  2212    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
  2213    * offset <TT>start</TT>.
  2214    * @param start the offset at which the insertion occurs
  2215    * @param srcChar the code point to insert
  2216    * @return a reference to this
  2217    * @stable ICU 2.0
  2218    */
  2219   inline UnicodeString& insert(int32_t start,
  2220             UChar32 srcChar);
  2223   /* Replace operations */
  2225   /**
  2226    * Replace the characters in the range
  2227    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2228    * <TT>srcText</TT> in the range
  2229    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  2230    * <TT>srcText</TT> is not modified.
  2231    * @param start the offset at which the replace operation begins
  2232    * @param length the number of characters to replace. The character at
  2233    * <TT>start + length</TT> is not modified.
  2234    * @param srcText the source for the new characters
  2235    * @param srcStart the offset into <TT>srcText</TT> where new characters
  2236    * will be obtained
  2237    * @param srcLength the number of characters in <TT>srcText</TT> in
  2238    * the replace string
  2239    * @return a reference to this
  2240    * @stable ICU 2.0
  2241    */
  2242   UnicodeString& replace(int32_t start,
  2243              int32_t length,
  2244              const UnicodeString& srcText,
  2245              int32_t srcStart,
  2246              int32_t srcLength);
  2248   /**
  2249    * Replace the characters in the range
  2250    * [<TT>start</TT>, <TT>start + length</TT>)
  2251    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
  2252    *  not modified.
  2253    * @param start the offset at which the replace operation begins
  2254    * @param length the number of characters to replace. The character at
  2255    * <TT>start + length</TT> is not modified.
  2256    * @param srcText the source for the new characters
  2257    * @return a reference to this
  2258    * @stable ICU 2.0
  2259    */
  2260   UnicodeString& replace(int32_t start,
  2261              int32_t length,
  2262              const UnicodeString& srcText);
  2264   /**
  2265    * Replace the characters in the range
  2266    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2267    * <TT>srcChars</TT> in the range
  2268    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
  2269    * is not modified.
  2270    * @param start the offset at which the replace operation begins
  2271    * @param length the number of characters to replace.  The character at
  2272    * <TT>start + length</TT> is not modified.
  2273    * @param srcChars the source for the new characters
  2274    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2275    * will be obtained
  2276    * @param srcLength the number of characters in <TT>srcChars</TT>
  2277    * in the replace string
  2278    * @return a reference to this
  2279    * @stable ICU 2.0
  2280    */
  2281   UnicodeString& replace(int32_t start,
  2282              int32_t length,
  2283              const UChar *srcChars,
  2284              int32_t srcStart,
  2285              int32_t srcLength);
  2287   /**
  2288    * Replace the characters in the range
  2289    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2290    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
  2291    * @param start the offset at which the replace operation begins
  2292    * @param length number of characters to replace.  The character at
  2293    * <TT>start + length</TT> is not modified.
  2294    * @param srcChars the source for the new characters
  2295    * @param srcLength the number of Unicode characters in srcChars
  2296    * @return a reference to this
  2297    * @stable ICU 2.0
  2298    */
  2299   inline UnicodeString& replace(int32_t start,
  2300              int32_t length,
  2301              const UChar *srcChars,
  2302              int32_t srcLength);
  2304   /**
  2305    * Replace the characters in the range
  2306    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
  2307    * <TT>srcChar</TT>.
  2308    * @param start the offset at which the replace operation begins
  2309    * @param length the number of characters to replace.  The character at
  2310    * <TT>start + length</TT> is not modified.
  2311    * @param srcChar the new code unit
  2312    * @return a reference to this
  2313    * @stable ICU 2.0
  2314    */
  2315   inline UnicodeString& replace(int32_t start,
  2316              int32_t length,
  2317              UChar srcChar);
  2319   /**
  2320    * Replace the characters in the range
  2321    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
  2322    * <TT>srcChar</TT>.
  2323    * @param start the offset at which the replace operation begins
  2324    * @param length the number of characters to replace.  The character at
  2325    * <TT>start + length</TT> is not modified.
  2326    * @param srcChar the new code point
  2327    * @return a reference to this
  2328    * @stable ICU 2.0
  2329    */
  2330   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  2332   /**
  2333    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2334    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
  2335    * @param start the offset at which the replace operation begins
  2336    * @param limit the offset immediately following the replace range
  2337    * @param srcText the source for the new characters
  2338    * @return a reference to this
  2339    * @stable ICU 2.0
  2340    */
  2341   inline UnicodeString& replaceBetween(int32_t start,
  2342                 int32_t limit,
  2343                 const UnicodeString& srcText);
  2345   /**
  2346    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2347    * with the characters in <TT>srcText</TT> in the range
  2348    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
  2349    * @param start the offset at which the replace operation begins
  2350    * @param limit the offset immediately following the replace range
  2351    * @param srcText the source for the new characters
  2352    * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2353    * will be obtained
  2354    * @param srcLimit the offset immediately following the range to copy
  2355    * in <TT>srcText</TT>
  2356    * @return a reference to this
  2357    * @stable ICU 2.0
  2358    */
  2359   inline UnicodeString& replaceBetween(int32_t start,
  2360                 int32_t limit,
  2361                 const UnicodeString& srcText,
  2362                 int32_t srcStart,
  2363                 int32_t srcLimit);
  2365   /**
  2366    * Replace a substring of this object with the given text.
  2367    * @param start the beginning index, inclusive; <code>0 <= start
  2368    * <= limit</code>.
  2369    * @param limit the ending index, exclusive; <code>start <= limit
  2370    * <= length()</code>.
  2371    * @param text the text to replace characters <code>start</code>
  2372    * to <code>limit - 1</code>
  2373    * @stable ICU 2.0
  2374    */
  2375   virtual void handleReplaceBetween(int32_t start,
  2376                                     int32_t limit,
  2377                                     const UnicodeString& text);
  2379   /**
  2380    * Replaceable API
  2381    * @return TRUE if it has MetaData
  2382    * @stable ICU 2.4
  2383    */
  2384   virtual UBool hasMetaData() const;
  2386   /**
  2387    * Copy a substring of this object, retaining attribute (out-of-band)
  2388    * information.  This method is used to duplicate or reorder substrings.
  2389    * The destination index must not overlap the source range.
  2391    * @param start the beginning index, inclusive; <code>0 <= start <=
  2392    * limit</code>.
  2393    * @param limit the ending index, exclusive; <code>start <= limit <=
  2394    * length()</code>.
  2395    * @param dest the destination index.  The characters from
  2396    * <code>start..limit-1</code> will be copied to <code>dest</code>.
  2397    * Implementations of this method may assume that <code>dest <= start ||
  2398    * dest >= limit</code>.
  2399    * @stable ICU 2.0
  2400    */
  2401   virtual void copy(int32_t start, int32_t limit, int32_t dest);
  2403   /* Search and replace operations */
  2405   /**
  2406    * Replace all occurrences of characters in oldText with the characters
  2407    * in newText
  2408    * @param oldText the text containing the search text
  2409    * @param newText the text containing the replacement text
  2410    * @return a reference to this
  2411    * @stable ICU 2.0
  2412    */
  2413   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2414                 const UnicodeString& newText);
  2416   /**
  2417    * Replace all occurrences of characters in oldText with characters
  2418    * in newText
  2419    * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2420    * @param start the start of the range in which replace will performed
  2421    * @param length the length of the range in which replace will be performed
  2422    * @param oldText the text containing the search text
  2423    * @param newText the text containing the replacement text
  2424    * @return a reference to this
  2425    * @stable ICU 2.0
  2426    */
  2427   inline UnicodeString& findAndReplace(int32_t start,
  2428                 int32_t length,
  2429                 const UnicodeString& oldText,
  2430                 const UnicodeString& newText);
  2432   /**
  2433    * Replace all occurrences of characters in oldText in the range
  2434    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
  2435    * in newText in the range
  2436    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
  2437    * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2438    * @param start the start of the range in which replace will performed
  2439    * @param length the length of the range in which replace will be performed
  2440    * @param oldText the text containing the search text
  2441    * @param oldStart the start of the search range in <TT>oldText</TT>
  2442    * @param oldLength the length of the search range in <TT>oldText</TT>
  2443    * @param newText the text containing the replacement text
  2444    * @param newStart the start of the replacement range in <TT>newText</TT>
  2445    * @param newLength the length of the replacement range in <TT>newText</TT>
  2446    * @return a reference to this
  2447    * @stable ICU 2.0
  2448    */
  2449   UnicodeString& findAndReplace(int32_t start,
  2450                 int32_t length,
  2451                 const UnicodeString& oldText,
  2452                 int32_t oldStart,
  2453                 int32_t oldLength,
  2454                 const UnicodeString& newText,
  2455                 int32_t newStart,
  2456                 int32_t newLength);
  2459   /* Remove operations */
  2461   /**
  2462    * Remove all characters from the UnicodeString object.
  2463    * @return a reference to this
  2464    * @stable ICU 2.0
  2465    */
  2466   inline UnicodeString& remove(void);
  2468   /**
  2469    * Remove the characters in the range
  2470    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
  2471    * @param start the offset of the first character to remove
  2472    * @param length the number of characters to remove
  2473    * @return a reference to this
  2474    * @stable ICU 2.0
  2475    */
  2476   inline UnicodeString& remove(int32_t start,
  2477                                int32_t length = (int32_t)INT32_MAX);
  2479   /**
  2480    * Remove the characters in the range
  2481    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
  2482    * @param start the offset of the first character to remove
  2483    * @param limit the offset immediately following the range to remove
  2484    * @return a reference to this
  2485    * @stable ICU 2.0
  2486    */
  2487   inline UnicodeString& removeBetween(int32_t start,
  2488                                       int32_t limit = (int32_t)INT32_MAX);
  2490   /**
  2491    * Retain only the characters in the range
  2492    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
  2493    * Removes characters before <code>start</code> and at and after <code>limit</code>.
  2494    * @param start the offset of the first character to retain
  2495    * @param limit the offset immediately following the range to retain
  2496    * @return a reference to this
  2497    * @stable ICU 4.4
  2498    */
  2499   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  2501   /* Length operations */
  2503   /**
  2504    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
  2505    * If the length of this UnicodeString is less than targetLength,
  2506    * length() - targetLength copies of padChar will be added to the
  2507    * beginning of this UnicodeString.
  2508    * @param targetLength the desired length of the string
  2509    * @param padChar the character to use for padding. Defaults to
  2510    * space (U+0020)
  2511    * @return TRUE if the text was padded, FALSE otherwise.
  2512    * @stable ICU 2.0
  2513    */
  2514   UBool padLeading(int32_t targetLength,
  2515                     UChar padChar = 0x0020);
  2517   /**
  2518    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
  2519    * If the length of this UnicodeString is less than targetLength,
  2520    * length() - targetLength copies of padChar will be added to the
  2521    * end of this UnicodeString.
  2522    * @param targetLength the desired length of the string
  2523    * @param padChar the character to use for padding. Defaults to
  2524    * space (U+0020)
  2525    * @return TRUE if the text was padded, FALSE otherwise.
  2526    * @stable ICU 2.0
  2527    */
  2528   UBool padTrailing(int32_t targetLength,
  2529                      UChar padChar = 0x0020);
  2531   /**
  2532    * Truncate this UnicodeString to the <TT>targetLength</TT>.
  2533    * @param targetLength the desired length of this UnicodeString.
  2534    * @return TRUE if the text was truncated, FALSE otherwise
  2535    * @stable ICU 2.0
  2536    */
  2537   inline UBool truncate(int32_t targetLength);
  2539   /**
  2540    * Trims leading and trailing whitespace from this UnicodeString.
  2541    * @return a reference to this
  2542    * @stable ICU 2.0
  2543    */
  2544   UnicodeString& trim(void);
  2547   /* Miscellaneous operations */
  2549   /**
  2550    * Reverse this UnicodeString in place.
  2551    * @return a reference to this
  2552    * @stable ICU 2.0
  2553    */
  2554   inline UnicodeString& reverse(void);
  2556   /**
  2557    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
  2558    * this UnicodeString.
  2559    * @param start the start of the range to reverse
  2560    * @param length the number of characters to to reverse
  2561    * @return a reference to this
  2562    * @stable ICU 2.0
  2563    */
  2564   inline UnicodeString& reverse(int32_t start,
  2565              int32_t length);
  2567   /**
  2568    * Convert the characters in this to UPPER CASE following the conventions of
  2569    * the default locale.
  2570    * @return A reference to this.
  2571    * @stable ICU 2.0
  2572    */
  2573   UnicodeString& toUpper(void);
  2575   /**
  2576    * Convert the characters in this to UPPER CASE following the conventions of
  2577    * a specific locale.
  2578    * @param locale The locale containing the conventions to use.
  2579    * @return A reference to this.
  2580    * @stable ICU 2.0
  2581    */
  2582   UnicodeString& toUpper(const Locale& locale);
  2584   /**
  2585    * Convert the characters in this to lower case following the conventions of
  2586    * the default locale.
  2587    * @return A reference to this.
  2588    * @stable ICU 2.0
  2589    */
  2590   UnicodeString& toLower(void);
  2592   /**
  2593    * Convert the characters in this to lower case following the conventions of
  2594    * a specific locale.
  2595    * @param locale The locale containing the conventions to use.
  2596    * @return A reference to this.
  2597    * @stable ICU 2.0
  2598    */
  2599   UnicodeString& toLower(const Locale& locale);
  2601 #if !UCONFIG_NO_BREAK_ITERATION
  2603   /**
  2604    * Titlecase this string, convenience function using the default locale.
  2606    * Casing is locale-dependent and context-sensitive.
  2607    * Titlecasing uses a break iterator to find the first characters of words
  2608    * that are to be titlecased. It titlecases those characters and lowercases
  2609    * all others.
  2611    * The titlecase break iterator can be provided to customize for arbitrary
  2612    * styles, using rules and dictionaries beyond the standard iterators.
  2613    * It may be more efficient to always provide an iterator to avoid
  2614    * opening and closing one for each string.
  2615    * The standard titlecase iterator for the root locale implements the
  2616    * algorithm of Unicode TR 21.
  2618    * This function uses only the setText(), first() and next() methods of the
  2619    * provided break iterator.
  2621    * @param titleIter A break iterator to find the first characters of words
  2622    *                  that are to be titlecased.
  2623    *                  If none is provided (0), then a standard titlecase
  2624    *                  break iterator is opened.
  2625    *                  Otherwise the provided iterator is set to the string's text.
  2626    * @return A reference to this.
  2627    * @stable ICU 2.1
  2628    */
  2629   UnicodeString &toTitle(BreakIterator *titleIter);
  2631   /**
  2632    * Titlecase this string.
  2634    * Casing is locale-dependent and context-sensitive.
  2635    * Titlecasing uses a break iterator to find the first characters of words
  2636    * that are to be titlecased. It titlecases those characters and lowercases
  2637    * all others.
  2639    * The titlecase break iterator can be provided to customize for arbitrary
  2640    * styles, using rules and dictionaries beyond the standard iterators.
  2641    * It may be more efficient to always provide an iterator to avoid
  2642    * opening and closing one for each string.
  2643    * The standard titlecase iterator for the root locale implements the
  2644    * algorithm of Unicode TR 21.
  2646    * This function uses only the setText(), first() and next() methods of the
  2647    * provided break iterator.
  2649    * @param titleIter A break iterator to find the first characters of words
  2650    *                  that are to be titlecased.
  2651    *                  If none is provided (0), then a standard titlecase
  2652    *                  break iterator is opened.
  2653    *                  Otherwise the provided iterator is set to the string's text.
  2654    * @param locale    The locale to consider.
  2655    * @return A reference to this.
  2656    * @stable ICU 2.1
  2657    */
  2658   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2660   /**
  2661    * Titlecase this string, with options.
  2663    * Casing is locale-dependent and context-sensitive.
  2664    * Titlecasing uses a break iterator to find the first characters of words
  2665    * that are to be titlecased. It titlecases those characters and lowercases
  2666    * all others. (This can be modified with options.)
  2668    * The titlecase break iterator can be provided to customize for arbitrary
  2669    * styles, using rules and dictionaries beyond the standard iterators.
  2670    * It may be more efficient to always provide an iterator to avoid
  2671    * opening and closing one for each string.
  2672    * The standard titlecase iterator for the root locale implements the
  2673    * algorithm of Unicode TR 21.
  2675    * This function uses only the setText(), first() and next() methods of the
  2676    * provided break iterator.
  2678    * @param titleIter A break iterator to find the first characters of words
  2679    *                  that are to be titlecased.
  2680    *                  If none is provided (0), then a standard titlecase
  2681    *                  break iterator is opened.
  2682    *                  Otherwise the provided iterator is set to the string's text.
  2683    * @param locale    The locale to consider.
  2684    * @param options Options bit set, see ucasemap_open().
  2685    * @return A reference to this.
  2686    * @see U_TITLECASE_NO_LOWERCASE
  2687    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  2688    * @see ucasemap_open
  2689    * @stable ICU 3.8
  2690    */
  2691   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  2693 #endif
  2695   /**
  2696    * Case-folds the characters in this string.
  2698    * Case-folding is locale-independent and not context-sensitive,
  2699    * but there is an option for whether to include or exclude mappings for dotted I
  2700    * and dotless i that are marked with 'T' in CaseFolding.txt.
  2702    * The result may be longer or shorter than the original.
  2704    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2705    * @return A reference to this.
  2706    * @stable ICU 2.0
  2707    */
  2708   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2710   //========================================
  2711   // Access to the internal buffer
  2712   //========================================
  2714   /**
  2715    * Get a read/write pointer to the internal buffer.
  2716    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
  2717    * writable, and is still owned by the UnicodeString object.
  2718    * Calls to getBuffer(minCapacity) must not be nested, and
  2719    * must be matched with calls to releaseBuffer(newLength).
  2720    * If the string buffer was read-only or shared,
  2721    * then it will be reallocated and copied.
  2723    * An attempted nested call will return 0, and will not further modify the
  2724    * state of the UnicodeString object.
  2725    * It also returns 0 if the string is bogus.
  2727    * The actual capacity of the string buffer may be larger than minCapacity.
  2728    * getCapacity() returns the actual capacity.
  2729    * For many operations, the full capacity should be used to avoid reallocations.
  2731    * While the buffer is "open" between getBuffer(minCapacity)
  2732    * and releaseBuffer(newLength), the following applies:
  2733    * - The string length is set to 0.
  2734    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2735    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2736    * - You can read from and write to the returned buffer.
  2737    * - The previous string contents will still be in the buffer;
  2738    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2739    *   If the length() was greater than minCapacity, then any contents after minCapacity
  2740    *   may be lost.
  2741    *   The buffer contents is not NUL-terminated by getBuffer().
  2742    *   If length()<getCapacity() then you can terminate it by writing a NUL
  2743    *   at index length().
  2744    * - You must call releaseBuffer(newLength) before and in order to
  2745    *   return to normal UnicodeString operation.
  2747    * @param minCapacity the minimum number of UChars that are to be available
  2748    *        in the buffer, starting at the returned pointer;
  2749    *        default to the current string capacity if minCapacity==-1
  2750    * @return a writable pointer to the internal string buffer,
  2751    *         or 0 if an error occurs (nested calls, out of memory)
  2753    * @see releaseBuffer
  2754    * @see getTerminatedBuffer()
  2755    * @stable ICU 2.0
  2756    */
  2757   UChar *getBuffer(int32_t minCapacity);
  2759   /**
  2760    * Release a read/write buffer on a UnicodeString object with an
  2761    * "open" getBuffer(minCapacity).
  2762    * This function must be called in a matched pair with getBuffer(minCapacity).
  2763    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2765    * It will set the string length to newLength, at most to the current capacity.
  2766    * If newLength==-1 then it will set the length according to the
  2767    * first NUL in the buffer, or to the capacity if there is no NUL.
  2769    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2771    * @param newLength the new length of the UnicodeString object;
  2772    *        defaults to the current capacity if newLength is greater than that;
  2773    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2774    *        the current capacity of the string
  2776    * @see getBuffer(int32_t minCapacity)
  2777    * @stable ICU 2.0
  2778    */
  2779   void releaseBuffer(int32_t newLength=-1);
  2781   /**
  2782    * Get a read-only pointer to the internal buffer.
  2783    * This can be called at any time on a valid UnicodeString.
  2785    * It returns 0 if the string is bogus, or
  2786    * during an "open" getBuffer(minCapacity).
  2788    * It can be called as many times as desired.
  2789    * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2790    * at which time the pointer is semantically invalidated and must not be used any more.
  2792    * The capacity of the buffer can be determined with getCapacity().
  2793    * The part after length() may or may not be initialized and valid,
  2794    * depending on the history of the UnicodeString object.
  2796    * The buffer contents is (probably) not NUL-terminated.
  2797    * You can check if it is with
  2798    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
  2799    * (See getTerminatedBuffer().)
  2801    * The buffer may reside in read-only memory. Its contents must not
  2802    * be modified.
  2804    * @return a read-only pointer to the internal string buffer,
  2805    *         or 0 if the string is empty or bogus
  2807    * @see getBuffer(int32_t minCapacity)
  2808    * @see getTerminatedBuffer()
  2809    * @stable ICU 2.0
  2810    */
  2811   inline const UChar *getBuffer() const;
  2813   /**
  2814    * Get a read-only pointer to the internal buffer,
  2815    * making sure that it is NUL-terminated.
  2816    * This can be called at any time on a valid UnicodeString.
  2818    * It returns 0 if the string is bogus, or
  2819    * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2820    * be NUL-terminated (because memory allocation failed).
  2822    * It can be called as many times as desired.
  2823    * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2824    * at which time the pointer is semantically invalidated and must not be used any more.
  2826    * The capacity of the buffer can be determined with getCapacity().
  2827    * The part after length()+1 may or may not be initialized and valid,
  2828    * depending on the history of the UnicodeString object.
  2830    * The buffer contents is guaranteed to be NUL-terminated.
  2831    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2832    * is written.
  2833    * For this reason, this function is not const, unlike getBuffer().
  2834    * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2836    * The buffer may reside in read-only memory. Its contents must not
  2837    * be modified.
  2839    * @return a read-only pointer to the internal string buffer,
  2840    *         or 0 if the string is empty or bogus
  2842    * @see getBuffer(int32_t minCapacity)
  2843    * @see getBuffer()
  2844    * @stable ICU 2.2
  2845    */
  2846   const UChar *getTerminatedBuffer();
  2848   //========================================
  2849   // Constructors
  2850   //========================================
  2852   /** Construct an empty UnicodeString.
  2853    * @stable ICU 2.0
  2854    */
  2855   inline UnicodeString();
  2857   /**
  2858    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
  2859    * @param capacity the number of UChars this UnicodeString should hold
  2860    * before a resize is necessary; if count is greater than 0 and count
  2861    * code points c take up more space than capacity, then capacity is adjusted
  2862    * accordingly.
  2863    * @param c is used to initially fill the string
  2864    * @param count specifies how many code points c are to be written in the
  2865    *              string
  2866    * @stable ICU 2.0
  2867    */
  2868   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2870   /**
  2871    * Single UChar (code unit) constructor.
  2873    * It is recommended to mark this constructor "explicit" by
  2874    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2875    * on the compiler command line or similar.
  2876    * @param ch the character to place in the UnicodeString
  2877    * @stable ICU 2.0
  2878    */
  2879   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
  2881   /**
  2882    * Single UChar32 (code point) constructor.
  2884    * It is recommended to mark this constructor "explicit" by
  2885    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2886    * on the compiler command line or similar.
  2887    * @param ch the character to place in the UnicodeString
  2888    * @stable ICU 2.0
  2889    */
  2890   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  2892   /**
  2893    * UChar* constructor.
  2895    * It is recommended to mark this constructor "explicit" by
  2896    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2897    * on the compiler command line or similar.
  2898    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
  2899    * must be NULL (U+0000) terminated.
  2900    * @stable ICU 2.0
  2901    */
  2902   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
  2904   /**
  2905    * UChar* constructor.
  2906    * @param text The characters to place in the UnicodeString.
  2907    * @param textLength The number of Unicode characters in <TT>text</TT>
  2908    * to copy.
  2909    * @stable ICU 2.0
  2910    */
  2911   UnicodeString(const UChar *text,
  2912         int32_t textLength);
  2914   /**
  2915    * Readonly-aliasing UChar* constructor.
  2916    * The text will be used for the UnicodeString object, but
  2917    * it will not be released when the UnicodeString is destroyed.
  2918    * This has copy-on-write semantics:
  2919    * When the string is modified, then the buffer is first copied into
  2920    * newly allocated memory.
  2921    * The aliased buffer is never modified.
  2923    * In an assignment to another UnicodeString, when using the copy constructor
  2924    * or the assignment operator, the text will be copied.
  2925    * When using fastCopyFrom(), the text will be aliased again,
  2926    * so that both strings then alias the same readonly-text.
  2928    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  2929    *                     This must be true if <code>textLength==-1</code>.
  2930    * @param text The characters to alias for the UnicodeString.
  2931    * @param textLength The number of Unicode characters in <code>text</code> to alias.
  2932    *                   If -1, then this constructor will determine the length
  2933    *                   by calling <code>u_strlen()</code>.
  2934    * @stable ICU 2.0
  2935    */
  2936   UnicodeString(UBool isTerminated,
  2937                 const UChar *text,
  2938                 int32_t textLength);
  2940   /**
  2941    * Writable-aliasing UChar* constructor.
  2942    * The text will be used for the UnicodeString object, but
  2943    * it will not be released when the UnicodeString is destroyed.
  2944    * This has write-through semantics:
  2945    * For as long as the capacity of the buffer is sufficient, write operations
  2946    * will directly affect the buffer. When more capacity is necessary, then
  2947    * a new buffer will be allocated and the contents copied as with regularly
  2948    * constructed strings.
  2949    * In an assignment to another UnicodeString, the buffer will be copied.
  2950    * The extract(UChar *dst) function detects whether the dst pointer is the same
  2951    * as the string buffer itself and will in this case not copy the contents.
  2953    * @param buffer The characters to alias for the UnicodeString.
  2954    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  2955    * @param buffCapacity The size of <code>buffer</code> in UChars.
  2956    * @stable ICU 2.0
  2957    */
  2958   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  2960 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  2962   /**
  2963    * char* constructor.
  2964    * Uses the default converter (and thus depends on the ICU conversion code)
  2965    * unless U_CHARSET_IS_UTF8 is set to 1.
  2967    * For ASCII (really "invariant character") strings it is more efficient to use
  2968    * the constructor that takes a US_INV (for its enum EInvariant).
  2969    * For ASCII (invariant-character) string literals, see UNICODE_STRING and
  2970    * UNICODE_STRING_SIMPLE.
  2972    * It is recommended to mark this constructor "explicit" by
  2973    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2974    * on the compiler command line or similar.
  2975    * @param codepageData an array of bytes, null-terminated,
  2976    *                     in the platform's default codepage.
  2977    * @stable ICU 2.0
  2978    * @see UNICODE_STRING
  2979    * @see UNICODE_STRING_SIMPLE
  2980    */
  2981   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  2983   /**
  2984    * char* constructor.
  2985    * Uses the default converter (and thus depends on the ICU conversion code)
  2986    * unless U_CHARSET_IS_UTF8 is set to 1.
  2987    * @param codepageData an array of bytes in the platform's default codepage.
  2988    * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2989    * @stable ICU 2.0
  2990    */
  2991   UnicodeString(const char *codepageData, int32_t dataLength);
  2993 #endif
  2995 #if !UCONFIG_NO_CONVERSION
  2997   /**
  2998    * char* constructor.
  2999    * @param codepageData an array of bytes, null-terminated
  3000    * @param codepage the encoding of <TT>codepageData</TT>.  The special
  3001    * value 0 for <TT>codepage</TT> indicates that the text is in the
  3002    * platform's default codepage.
  3004    * If <code>codepage</code> is an empty string (<code>""</code>),
  3005    * then a simple conversion is performed on the codepage-invariant
  3006    * subset ("invariant characters") of the platform encoding. See utypes.h.
  3007    * Recommendation: For invariant-character strings use the constructor
  3008    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  3009    * because it avoids object code dependencies of UnicodeString on
  3010    * the conversion code.
  3012    * @stable ICU 2.0
  3013    */
  3014   UnicodeString(const char *codepageData, const char *codepage);
  3016   /**
  3017    * char* constructor.
  3018    * @param codepageData an array of bytes.
  3019    * @param dataLength The number of bytes in <TT>codepageData</TT>.
  3020    * @param codepage the encoding of <TT>codepageData</TT>.  The special
  3021    * value 0 for <TT>codepage</TT> indicates that the text is in the
  3022    * platform's default codepage.
  3023    * If <code>codepage</code> is an empty string (<code>""</code>),
  3024    * then a simple conversion is performed on the codepage-invariant
  3025    * subset ("invariant characters") of the platform encoding. See utypes.h.
  3026    * Recommendation: For invariant-character strings use the constructor
  3027    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  3028    * because it avoids object code dependencies of UnicodeString on
  3029    * the conversion code.
  3031    * @stable ICU 2.0
  3032    */
  3033   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  3035   /**
  3036    * char * / UConverter constructor.
  3037    * This constructor uses an existing UConverter object to
  3038    * convert the codepage string to Unicode and construct a UnicodeString
  3039    * from that.
  3041    * The converter is reset at first.
  3042    * If the error code indicates a failure before this constructor is called,
  3043    * or if an error occurs during conversion or construction,
  3044    * then the string will be bogus.
  3046    * This function avoids the overhead of opening and closing a converter if
  3047    * multiple strings are constructed.
  3049    * @param src input codepage string
  3050    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  3051    * @param cnv converter object (ucnv_resetToUnicode() will be called),
  3052    *        can be NULL for the default converter
  3053    * @param errorCode normal ICU error code
  3054    * @stable ICU 2.0
  3055    */
  3056   UnicodeString(
  3057         const char *src, int32_t srcLength,
  3058         UConverter *cnv,
  3059         UErrorCode &errorCode);
  3061 #endif
  3063   /**
  3064    * Constructs a Unicode string from an invariant-character char * string.
  3065    * About invariant characters see utypes.h.
  3066    * This constructor has no runtime dependency on conversion code and is
  3067    * therefore recommended over ones taking a charset name string
  3068    * (where the empty string "" indicates invariant-character conversion).
  3070    * Use the macro US_INV as the third, signature-distinguishing parameter.
  3072    * For example:
  3073    * \code
  3074    * void fn(const char *s) {
  3075    *   UnicodeString ustr(s, -1, US_INV);
  3076    *   // use ustr ...
  3077    * }
  3078    * \endcode
  3080    * @param src String using only invariant characters.
  3081    * @param length Length of src, or -1 if NUL-terminated.
  3082    * @param inv Signature-distinguishing paramater, use US_INV.
  3084    * @see US_INV
  3085    * @stable ICU 3.2
  3086    */
  3087   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  3090   /**
  3091    * Copy constructor.
  3092    * @param that The UnicodeString object to copy.
  3093    * @stable ICU 2.0
  3094    */
  3095   UnicodeString(const UnicodeString& that);
  3097   /**
  3098    * 'Substring' constructor from tail of source string.
  3099    * @param src The UnicodeString object to copy.
  3100    * @param srcStart The offset into <tt>src</tt> at which to start copying.
  3101    * @stable ICU 2.2
  3102    */
  3103   UnicodeString(const UnicodeString& src, int32_t srcStart);
  3105   /**
  3106    * 'Substring' constructor from subrange of source string.
  3107    * @param src The UnicodeString object to copy.
  3108    * @param srcStart The offset into <tt>src</tt> at which to start copying.
  3109    * @param srcLength The number of characters from <tt>src</tt> to copy.
  3110    * @stable ICU 2.2
  3111    */
  3112   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3114   /**
  3115    * Clone this object, an instance of a subclass of Replaceable.
  3116    * Clones can be used concurrently in multiple threads.
  3117    * If a subclass does not implement clone(), or if an error occurs,
  3118    * then NULL is returned.
  3119    * The clone functions in all subclasses return a pointer to a Replaceable
  3120    * because some compilers do not support covariant (same-as-this)
  3121    * return types; cast to the appropriate subclass if necessary.
  3122    * The caller must delete the clone.
  3124    * @return a clone of this object
  3126    * @see Replaceable::clone
  3127    * @see getDynamicClassID
  3128    * @stable ICU 2.6
  3129    */
  3130   virtual Replaceable *clone() const;
  3132   /** Destructor.
  3133    * @stable ICU 2.0
  3134    */
  3135   virtual ~UnicodeString();
  3137   /**
  3138    * Create a UnicodeString from a UTF-8 string.
  3139    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3140    * Calls u_strFromUTF8WithSub().
  3142    * @param utf8 UTF-8 input string.
  3143    *             Note that a StringPiece can be implicitly constructed
  3144    *             from a std::string or a NUL-terminated const char * string.
  3145    * @return A UnicodeString with equivalent UTF-16 contents.
  3146    * @see toUTF8
  3147    * @see toUTF8String
  3148    * @stable ICU 4.2
  3149    */
  3150   static UnicodeString fromUTF8(const StringPiece &utf8);
  3152   /**
  3153    * Create a UnicodeString from a UTF-32 string.
  3154    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3155    * Calls u_strFromUTF32WithSub().
  3157    * @param utf32 UTF-32 input string. Must not be NULL.
  3158    * @param length Length of the input string, or -1 if NUL-terminated.
  3159    * @return A UnicodeString with equivalent UTF-16 contents.
  3160    * @see toUTF32
  3161    * @stable ICU 4.2
  3162    */
  3163   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  3165   /* Miscellaneous operations */
  3167   /**
  3168    * Unescape a string of characters and return a string containing
  3169    * the result.  The following escape sequences are recognized:
  3171    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
  3172    * \\Uhhhhhhhh   8 hex digits
  3173    * \\xhh         1-2 hex digits
  3174    * \\ooo         1-3 octal digits; o in [0-7]
  3175    * \\cX          control-X; X is masked with 0x1F
  3177    * as well as the standard ANSI C escapes:
  3179    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  3180    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  3181    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  3183    * Anything else following a backslash is generically escaped.  For
  3184    * example, "[a\\-z]" returns "[a-z]".
  3186    * If an escape sequence is ill-formed, this method returns an empty
  3187    * string.  An example of an ill-formed sequence is "\\u" followed by
  3188    * fewer than 4 hex digits.
  3190    * This function is similar to u_unescape() but not identical to it.
  3191    * The latter takes a source char*, so it does escape recognition
  3192    * and also invariant conversion.
  3194    * @return a string with backslash escapes interpreted, or an
  3195    * empty string on error.
  3196    * @see UnicodeString#unescapeAt()
  3197    * @see u_unescape()
  3198    * @see u_unescapeAt()
  3199    * @stable ICU 2.0
  3200    */
  3201   UnicodeString unescape() const;
  3203   /**
  3204    * Unescape a single escape sequence and return the represented
  3205    * character.  See unescape() for a listing of the recognized escape
  3206    * sequences.  The character at offset-1 is assumed (without
  3207    * checking) to be a backslash.  If the escape sequence is
  3208    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  3209    * returned.
  3211    * @param offset an input output parameter.  On input, it is the
  3212    * offset into this string where the escape sequence is located,
  3213    * after the initial backslash.  On output, it is advanced after the
  3214    * last character parsed.  On error, it is not advanced at all.
  3215    * @return the character represented by the escape sequence at
  3216    * offset, or U_SENTINEL=-1 on error.
  3217    * @see UnicodeString#unescape()
  3218    * @see u_unescape()
  3219    * @see u_unescapeAt()
  3220    * @stable ICU 2.0
  3221    */
  3222   UChar32 unescapeAt(int32_t &offset) const;
  3224   /**
  3225    * ICU "poor man's RTTI", returns a UClassID for this class.
  3227    * @stable ICU 2.2
  3228    */
  3229   static UClassID U_EXPORT2 getStaticClassID();
  3231   /**
  3232    * ICU "poor man's RTTI", returns a UClassID for the actual class.
  3234    * @stable ICU 2.2
  3235    */
  3236   virtual UClassID getDynamicClassID() const;
  3238   //========================================
  3239   // Implementation methods
  3240   //========================================
  3242 protected:
  3243   /**
  3244    * Implement Replaceable::getLength() (see jitterbug 1027).
  3245    * @stable ICU 2.4
  3246    */
  3247   virtual int32_t getLength() const;
  3249   /**
  3250    * The change in Replaceable to use virtual getCharAt() allows
  3251    * UnicodeString::charAt() to be inline again (see jitterbug 709).
  3252    * @stable ICU 2.4
  3253    */
  3254   virtual UChar getCharAt(int32_t offset) const;
  3256   /**
  3257    * The change in Replaceable to use virtual getChar32At() allows
  3258    * UnicodeString::char32At() to be inline again (see jitterbug 709).
  3259    * @stable ICU 2.4
  3260    */
  3261   virtual UChar32 getChar32At(int32_t offset) const;
  3263 private:
  3264   // For char* constructors. Could be made public.
  3265   UnicodeString &setToUTF8(const StringPiece &utf8);
  3266   // For extract(char*).
  3267   // We could make a toUTF8(target, capacity, errorCode) public but not
  3268   // this version: New API will be cleaner if we make callers create substrings
  3269   // rather than having start+length on every method,
  3270   // and it should take a UErrorCode&.
  3271   int32_t
  3272   toUTF8(int32_t start, int32_t len,
  3273          char *target, int32_t capacity) const;
  3275   /**
  3276    * Internal string contents comparison, called by operator==.
  3277    * Requires: this & text not bogus and have same lengths.
  3278    */
  3279   UBool doEquals(const UnicodeString &text, int32_t len) const;
  3281   inline int8_t
  3282   doCompare(int32_t start,
  3283            int32_t length,
  3284            const UnicodeString& srcText,
  3285            int32_t srcStart,
  3286            int32_t srcLength) const;
  3288   int8_t doCompare(int32_t start,
  3289            int32_t length,
  3290            const UChar *srcChars,
  3291            int32_t srcStart,
  3292            int32_t srcLength) const;
  3294   inline int8_t
  3295   doCompareCodePointOrder(int32_t start,
  3296                           int32_t length,
  3297                           const UnicodeString& srcText,
  3298                           int32_t srcStart,
  3299                           int32_t srcLength) const;
  3301   int8_t doCompareCodePointOrder(int32_t start,
  3302                                  int32_t length,
  3303                                  const UChar *srcChars,
  3304                                  int32_t srcStart,
  3305                                  int32_t srcLength) const;
  3307   inline int8_t
  3308   doCaseCompare(int32_t start,
  3309                 int32_t length,
  3310                 const UnicodeString &srcText,
  3311                 int32_t srcStart,
  3312                 int32_t srcLength,
  3313                 uint32_t options) const;
  3315   int8_t
  3316   doCaseCompare(int32_t start,
  3317                 int32_t length,
  3318                 const UChar *srcChars,
  3319                 int32_t srcStart,
  3320                 int32_t srcLength,
  3321                 uint32_t options) const;
  3323   int32_t doIndexOf(UChar c,
  3324             int32_t start,
  3325             int32_t length) const;
  3327   int32_t doIndexOf(UChar32 c,
  3328                         int32_t start,
  3329                         int32_t length) const;
  3331   int32_t doLastIndexOf(UChar c,
  3332                 int32_t start,
  3333                 int32_t length) const;
  3335   int32_t doLastIndexOf(UChar32 c,
  3336                             int32_t start,
  3337                             int32_t length) const;
  3339   void doExtract(int32_t start,
  3340          int32_t length,
  3341          UChar *dst,
  3342          int32_t dstStart) const;
  3344   inline void doExtract(int32_t start,
  3345          int32_t length,
  3346          UnicodeString& target) const;
  3348   inline UChar doCharAt(int32_t offset)  const;
  3350   UnicodeString& doReplace(int32_t start,
  3351                int32_t length,
  3352                const UnicodeString& srcText,
  3353                int32_t srcStart,
  3354                int32_t srcLength);
  3356   UnicodeString& doReplace(int32_t start,
  3357                int32_t length,
  3358                const UChar *srcChars,
  3359                int32_t srcStart,
  3360                int32_t srcLength);
  3362   UnicodeString& doReverse(int32_t start,
  3363                int32_t length);
  3365   // calculate hash code
  3366   int32_t doHashCode(void) const;
  3368   // get pointer to start of array
  3369   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  3370   inline UChar* getArrayStart(void);
  3371   inline const UChar* getArrayStart(void) const;
  3373   // A UnicodeString object (not necessarily its current buffer)
  3374   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  3375   inline UBool isWritable() const;
  3377   // Is the current buffer writable?
  3378   inline UBool isBufferWritable() const;
  3380   // None of the following does releaseArray().
  3381   inline void setLength(int32_t len);        // sets only fShortLength and fLength
  3382   inline void setToEmpty();                  // sets fFlags=kShortString
  3383   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
  3385   // allocate the array; result may be fStackBuffer
  3386   // sets refCount to 1 if appropriate
  3387   // sets fArray, fCapacity, and fFlags
  3388   // returns boolean for success or failure
  3389   UBool allocate(int32_t capacity);
  3391   // release the array if owned
  3392   void releaseArray(void);
  3394   // turn a bogus string into an empty one
  3395   void unBogus();
  3397   // implements assigment operator, copy constructor, and fastCopyFrom()
  3398   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  3400   // Pin start and limit to acceptable values.
  3401   inline void pinIndex(int32_t& start) const;
  3402   inline void pinIndices(int32_t& start,
  3403                          int32_t& length) const;
  3405 #if !UCONFIG_NO_CONVERSION
  3407   /* Internal extract() using UConverter. */
  3408   int32_t doExtract(int32_t start, int32_t length,
  3409                     char *dest, int32_t destCapacity,
  3410                     UConverter *cnv,
  3411                     UErrorCode &errorCode) const;
  3413   /*
  3414    * Real constructor for converting from codepage data.
  3415    * It assumes that it is called with !fRefCounted.
  3417    * If <code>codepage==0</code>, then the default converter
  3418    * is used for the platform encoding.
  3419    * If <code>codepage</code> is an empty string (<code>""</code>),
  3420    * then a simple conversion is performed on the codepage-invariant
  3421    * subset ("invariant characters") of the platform encoding. See utypes.h.
  3422    */
  3423   void doCodepageCreate(const char *codepageData,
  3424                         int32_t dataLength,
  3425                         const char *codepage);
  3427   /*
  3428    * Worker function for creating a UnicodeString from
  3429    * a codepage string using a UConverter.
  3430    */
  3431   void
  3432   doCodepageCreate(const char *codepageData,
  3433                    int32_t dataLength,
  3434                    UConverter *converter,
  3435                    UErrorCode &status);
  3437 #endif
  3439   /*
  3440    * This function is called when write access to the array
  3441    * is necessary.
  3443    * We need to make a copy of the array if
  3444    * the buffer is read-only, or
  3445    * the buffer is refCounted (shared), and refCount>1, or
  3446    * the buffer is too small.
  3448    * Return FALSE if memory could not be allocated.
  3449    */
  3450   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3451                             int32_t growCapacity = -1,
  3452                             UBool doCopyArray = TRUE,
  3453                             int32_t **pBufferToDelete = 0,
  3454                             UBool forceClone = FALSE);
  3456   /**
  3457    * Common function for UnicodeString case mappings.
  3458    * The stringCaseMapper has the same type UStringCaseMapper
  3459    * as in ustr_imp.h for ustrcase_map().
  3460    */
  3461   UnicodeString &
  3462   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
  3464   // ref counting
  3465   void addRef(void);
  3466   int32_t removeRef(void);
  3467   int32_t refCount(void) const;
  3469   // constants
  3470   enum {
  3471     // Set the stack buffer size so that sizeof(UnicodeString) is,
  3472     // naturally (without padding), a multiple of sizeof(pointer).
  3473     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
  3474     kInvalidUChar=0xffff, // invalid UChar index
  3475     kGrowSize=128, // grow size for this buffer
  3476     kInvalidHashCode=0, // invalid hash code
  3477     kEmptyHashCode=1, // hash code for empty string
  3479     // bit flag values for fFlags
  3480     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
  3481     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
  3482     kRefCounted=4,      // there is a refCount field before the characters in fArray
  3483     kBufferIsReadonly=8,// do not write to this buffer
  3484     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
  3485                         // and releaseBuffer(newLength) must be called
  3487     // combined values for convenience
  3488     kShortString=kUsingStackBuffer,
  3489     kLongString=kRefCounted,
  3490     kReadonlyAlias=kBufferIsReadonly,
  3491     kWritableAlias=0
  3492   };
  3494   friend class StringThreadTest;
  3495   friend class UnicodeStringAppendable;
  3497   union StackBufferOrFields;        // forward declaration necessary before friend declaration
  3498   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  3500   /*
  3501    * The following are all the class fields that are stored
  3502    * in each UnicodeString object.
  3503    * Note that UnicodeString has virtual functions,
  3504    * therefore there is an implicit vtable pointer
  3505    * as the first real field.
  3506    * The fields should be aligned such that no padding is necessary.
  3507    * On 32-bit machines, the size should be 32 bytes,
  3508    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3510    * We use a hack to achieve this.
  3512    * With at least some compilers, each of the following is forced to
  3513    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  3514    * rounded up with additional padding if the fields do not already fit that requirement:
  3515    * - sizeof(class UnicodeString)
  3516    * - offsetof(UnicodeString, fUnion)
  3517    * - sizeof(fUnion)
  3518    * - sizeof(fFields)
  3520    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
  3521    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
  3522    * (Padding at the end of fFields is ok:
  3523    * As long as there is no padding after fStackBuffer, it is not wasted space.)
  3525    * We further assume that the compiler does not reorder the fields,
  3526    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
  3527    * with at most some padding (but no other field) in between.
  3528    * (Padding there would be wasted space, but functionally harmless.)
  3530    * We use a few more sizeof(pointer)'s chunks of space with
  3531    * fRestOfStackBuffer, fShortLength and fFlags,
  3532    * to get up exactly to the intended sizeof(UnicodeString).
  3533    */
  3534   // (implicit) *vtable;
  3535   union StackBufferOrFields {
  3536     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
  3537     // else fFields is used
  3538     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
  3539     struct {
  3540       UChar   *fArray;    // the Unicode data
  3541       int32_t fCapacity;  // capacity of fArray (in UChars)
  3542       int32_t fLength;    // number of characters in fArray if >127; else undefined
  3543     } fFields;
  3544   } fUnion;
  3545   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
  3546   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
  3547   uint8_t fFlags;       // bit flags: see constants above
  3548 };
  3550 /**
  3551  * Create a new UnicodeString with the concatenation of two others.
  3553  * @param s1 The first string to be copied to the new one.
  3554  * @param s2 The second string to be copied to the new one, after s1.
  3555  * @return UnicodeString(s1).append(s2)
  3556  * @stable ICU 2.8
  3557  */
  3558 U_COMMON_API UnicodeString U_EXPORT2
  3559 operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3561 //========================================
  3562 // Inline members
  3563 //========================================
  3565 //========================================
  3566 // Privates
  3567 //========================================
  3569 inline void
  3570 UnicodeString::pinIndex(int32_t& start) const
  3572   // pin index
  3573   if(start < 0) {
  3574     start = 0;
  3575   } else if(start > length()) {
  3576     start = length();
  3580 inline void
  3581 UnicodeString::pinIndices(int32_t& start,
  3582                           int32_t& _length) const
  3584   // pin indices
  3585   int32_t len = length();
  3586   if(start < 0) {
  3587     start = 0;
  3588   } else if(start > len) {
  3589     start = len;
  3591   if(_length < 0) {
  3592     _length = 0;
  3593   } else if(_length > (len - start)) {
  3594     _length = (len - start);
  3598 inline UChar*
  3599 UnicodeString::getArrayStart()
  3600 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  3602 inline const UChar*
  3603 UnicodeString::getArrayStart() const
  3604 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  3606 //========================================
  3607 // Default constructor
  3608 //========================================
  3610 inline
  3611 UnicodeString::UnicodeString()
  3612   : fShortLength(0),
  3613     fFlags(kShortString)
  3614 {}
  3616 //========================================
  3617 // Read-only implementation methods
  3618 //========================================
  3619 inline int32_t
  3620 UnicodeString::length() const
  3621 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
  3623 inline int32_t
  3624 UnicodeString::getCapacity() const
  3625 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
  3627 inline int32_t
  3628 UnicodeString::hashCode() const
  3629 { return doHashCode(); }
  3631 inline UBool
  3632 UnicodeString::isBogus() const
  3633 { return (UBool)(fFlags & kIsBogus); }
  3635 inline UBool
  3636 UnicodeString::isWritable() const
  3637 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
  3639 inline UBool
  3640 UnicodeString::isBufferWritable() const
  3642   return (UBool)(
  3643       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  3644       (!(fFlags&kRefCounted) || refCount()==1));
  3647 inline const UChar *
  3648 UnicodeString::getBuffer() const {
  3649   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
  3650     return 0;
  3651   } else if(fFlags&kUsingStackBuffer) {
  3652     return fUnion.fStackBuffer;
  3653   } else {
  3654     return fUnion.fFields.fArray;
  3658 //========================================
  3659 // Read-only alias methods
  3660 //========================================
  3661 inline int8_t
  3662 UnicodeString::doCompare(int32_t start,
  3663               int32_t thisLength,
  3664               const UnicodeString& srcText,
  3665               int32_t srcStart,
  3666               int32_t srcLength) const
  3668   if(srcText.isBogus()) {
  3669     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3670   } else {
  3671     srcText.pinIndices(srcStart, srcLength);
  3672     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3676 inline UBool
  3677 UnicodeString::operator== (const UnicodeString& text) const
  3679   if(isBogus()) {
  3680     return text.isBogus();
  3681   } else {
  3682     int32_t len = length(), textLength = text.length();
  3683     return !text.isBogus() && len == textLength && doEquals(text, len);
  3687 inline UBool
  3688 UnicodeString::operator!= (const UnicodeString& text) const
  3689 { return (! operator==(text)); }
  3691 inline UBool
  3692 UnicodeString::operator> (const UnicodeString& text) const
  3693 { return doCompare(0, length(), text, 0, text.length()) == 1; }
  3695 inline UBool
  3696 UnicodeString::operator< (const UnicodeString& text) const
  3697 { return doCompare(0, length(), text, 0, text.length()) == -1; }
  3699 inline UBool
  3700 UnicodeString::operator>= (const UnicodeString& text) const
  3701 { return doCompare(0, length(), text, 0, text.length()) != -1; }
  3703 inline UBool
  3704 UnicodeString::operator<= (const UnicodeString& text) const
  3705 { return doCompare(0, length(), text, 0, text.length()) != 1; }
  3707 inline int8_t
  3708 UnicodeString::compare(const UnicodeString& text) const
  3709 { return doCompare(0, length(), text, 0, text.length()); }
  3711 inline int8_t
  3712 UnicodeString::compare(int32_t start,
  3713                int32_t _length,
  3714                const UnicodeString& srcText) const
  3715 { return doCompare(start, _length, srcText, 0, srcText.length()); }
  3717 inline int8_t
  3718 UnicodeString::compare(const UChar *srcChars,
  3719                int32_t srcLength) const
  3720 { return doCompare(0, length(), srcChars, 0, srcLength); }
  3722 inline int8_t
  3723 UnicodeString::compare(int32_t start,
  3724                int32_t _length,
  3725                const UnicodeString& srcText,
  3726                int32_t srcStart,
  3727                int32_t srcLength) const
  3728 { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3730 inline int8_t
  3731 UnicodeString::compare(int32_t start,
  3732                int32_t _length,
  3733                const UChar *srcChars) const
  3734 { return doCompare(start, _length, srcChars, 0, _length); }
  3736 inline int8_t
  3737 UnicodeString::compare(int32_t start,
  3738                int32_t _length,
  3739                const UChar *srcChars,
  3740                int32_t srcStart,
  3741                int32_t srcLength) const
  3742 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  3744 inline int8_t
  3745 UnicodeString::compareBetween(int32_t start,
  3746                   int32_t limit,
  3747                   const UnicodeString& srcText,
  3748                   int32_t srcStart,
  3749                   int32_t srcLimit) const
  3750 { return doCompare(start, limit - start,
  3751            srcText, srcStart, srcLimit - srcStart); }
  3753 inline int8_t
  3754 UnicodeString::doCompareCodePointOrder(int32_t start,
  3755                                        int32_t thisLength,
  3756                                        const UnicodeString& srcText,
  3757                                        int32_t srcStart,
  3758                                        int32_t srcLength) const
  3760   if(srcText.isBogus()) {
  3761     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3762   } else {
  3763     srcText.pinIndices(srcStart, srcLength);
  3764     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3768 inline int8_t
  3769 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  3770 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  3772 inline int8_t
  3773 UnicodeString::compareCodePointOrder(int32_t start,
  3774                                      int32_t _length,
  3775                                      const UnicodeString& srcText) const
  3776 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  3778 inline int8_t
  3779 UnicodeString::compareCodePointOrder(const UChar *srcChars,
  3780                                      int32_t srcLength) const
  3781 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  3783 inline int8_t
  3784 UnicodeString::compareCodePointOrder(int32_t start,
  3785                                      int32_t _length,
  3786                                      const UnicodeString& srcText,
  3787                                      int32_t srcStart,
  3788                                      int32_t srcLength) const
  3789 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  3791 inline int8_t
  3792 UnicodeString::compareCodePointOrder(int32_t start,
  3793                                      int32_t _length,
  3794                                      const UChar *srcChars) const
  3795 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  3797 inline int8_t
  3798 UnicodeString::compareCodePointOrder(int32_t start,
  3799                                      int32_t _length,
  3800                                      const UChar *srcChars,
  3801                                      int32_t srcStart,
  3802                                      int32_t srcLength) const
  3803 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  3805 inline int8_t
  3806 UnicodeString::compareCodePointOrderBetween(int32_t start,
  3807                                             int32_t limit,
  3808                                             const UnicodeString& srcText,
  3809                                             int32_t srcStart,
  3810                                             int32_t srcLimit) const
  3811 { return doCompareCodePointOrder(start, limit - start,
  3812            srcText, srcStart, srcLimit - srcStart); }
  3814 inline int8_t
  3815 UnicodeString::doCaseCompare(int32_t start,
  3816                              int32_t thisLength,
  3817                              const UnicodeString &srcText,
  3818                              int32_t srcStart,
  3819                              int32_t srcLength,
  3820                              uint32_t options) const
  3822   if(srcText.isBogus()) {
  3823     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3824   } else {
  3825     srcText.pinIndices(srcStart, srcLength);
  3826     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  3830 inline int8_t
  3831 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  3832   return doCaseCompare(0, length(), text, 0, text.length(), options);
  3835 inline int8_t
  3836 UnicodeString::caseCompare(int32_t start,
  3837                            int32_t _length,
  3838                            const UnicodeString &srcText,
  3839                            uint32_t options) const {
  3840   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  3843 inline int8_t
  3844 UnicodeString::caseCompare(const UChar *srcChars,
  3845                            int32_t srcLength,
  3846                            uint32_t options) const {
  3847   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  3850 inline int8_t
  3851 UnicodeString::caseCompare(int32_t start,
  3852                            int32_t _length,
  3853                            const UnicodeString &srcText,
  3854                            int32_t srcStart,
  3855                            int32_t srcLength,
  3856                            uint32_t options) const {
  3857   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  3860 inline int8_t
  3861 UnicodeString::caseCompare(int32_t start,
  3862                            int32_t _length,
  3863                            const UChar *srcChars,
  3864                            uint32_t options) const {
  3865   return doCaseCompare(start, _length, srcChars, 0, _length, options);
  3868 inline int8_t
  3869 UnicodeString::caseCompare(int32_t start,
  3870                            int32_t _length,
  3871                            const UChar *srcChars,
  3872                            int32_t srcStart,
  3873                            int32_t srcLength,
  3874                            uint32_t options) const {
  3875   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  3878 inline int8_t
  3879 UnicodeString::caseCompareBetween(int32_t start,
  3880                                   int32_t limit,
  3881                                   const UnicodeString &srcText,
  3882                                   int32_t srcStart,
  3883                                   int32_t srcLimit,
  3884                                   uint32_t options) const {
  3885   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  3888 inline int32_t
  3889 UnicodeString::indexOf(const UnicodeString& srcText,
  3890                int32_t srcStart,
  3891                int32_t srcLength,
  3892                int32_t start,
  3893                int32_t _length) const
  3895   if(!srcText.isBogus()) {
  3896     srcText.pinIndices(srcStart, srcLength);
  3897     if(srcLength > 0) {
  3898       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3901   return -1;
  3904 inline int32_t
  3905 UnicodeString::indexOf(const UnicodeString& text) const
  3906 { return indexOf(text, 0, text.length(), 0, length()); }
  3908 inline int32_t
  3909 UnicodeString::indexOf(const UnicodeString& text,
  3910                int32_t start) const {
  3911   pinIndex(start);
  3912   return indexOf(text, 0, text.length(), start, length() - start);
  3915 inline int32_t
  3916 UnicodeString::indexOf(const UnicodeString& text,
  3917                int32_t start,
  3918                int32_t _length) const
  3919 { return indexOf(text, 0, text.length(), start, _length); }
  3921 inline int32_t
  3922 UnicodeString::indexOf(const UChar *srcChars,
  3923                int32_t srcLength,
  3924                int32_t start) const {
  3925   pinIndex(start);
  3926   return indexOf(srcChars, 0, srcLength, start, length() - start);
  3929 inline int32_t
  3930 UnicodeString::indexOf(const UChar *srcChars,
  3931                int32_t srcLength,
  3932                int32_t start,
  3933                int32_t _length) const
  3934 { return indexOf(srcChars, 0, srcLength, start, _length); }
  3936 inline int32_t
  3937 UnicodeString::indexOf(UChar c,
  3938                int32_t start,
  3939                int32_t _length) const
  3940 { return doIndexOf(c, start, _length); }
  3942 inline int32_t
  3943 UnicodeString::indexOf(UChar32 c,
  3944                int32_t start,
  3945                int32_t _length) const
  3946 { return doIndexOf(c, start, _length); }
  3948 inline int32_t
  3949 UnicodeString::indexOf(UChar c) const
  3950 { return doIndexOf(c, 0, length()); }
  3952 inline int32_t
  3953 UnicodeString::indexOf(UChar32 c) const
  3954 { return indexOf(c, 0, length()); }
  3956 inline int32_t
  3957 UnicodeString::indexOf(UChar c,
  3958                int32_t start) const {
  3959   pinIndex(start);
  3960   return doIndexOf(c, start, length() - start);
  3963 inline int32_t
  3964 UnicodeString::indexOf(UChar32 c,
  3965                int32_t start) const {
  3966   pinIndex(start);
  3967   return indexOf(c, start, length() - start);
  3970 inline int32_t
  3971 UnicodeString::lastIndexOf(const UChar *srcChars,
  3972                int32_t srcLength,
  3973                int32_t start,
  3974                int32_t _length) const
  3975 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  3977 inline int32_t
  3978 UnicodeString::lastIndexOf(const UChar *srcChars,
  3979                int32_t srcLength,
  3980                int32_t start) const {
  3981   pinIndex(start);
  3982   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  3985 inline int32_t
  3986 UnicodeString::lastIndexOf(const UnicodeString& srcText,
  3987                int32_t srcStart,
  3988                int32_t srcLength,
  3989                int32_t start,
  3990                int32_t _length) const
  3992   if(!srcText.isBogus()) {
  3993     srcText.pinIndices(srcStart, srcLength);
  3994     if(srcLength > 0) {
  3995       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3998   return -1;
  4001 inline int32_t
  4002 UnicodeString::lastIndexOf(const UnicodeString& text,
  4003                int32_t start,
  4004                int32_t _length) const
  4005 { return lastIndexOf(text, 0, text.length(), start, _length); }
  4007 inline int32_t
  4008 UnicodeString::lastIndexOf(const UnicodeString& text,
  4009                int32_t start) const {
  4010   pinIndex(start);
  4011   return lastIndexOf(text, 0, text.length(), start, length() - start);
  4014 inline int32_t
  4015 UnicodeString::lastIndexOf(const UnicodeString& text) const
  4016 { return lastIndexOf(text, 0, text.length(), 0, length()); }
  4018 inline int32_t
  4019 UnicodeString::lastIndexOf(UChar c,
  4020                int32_t start,
  4021                int32_t _length) const
  4022 { return doLastIndexOf(c, start, _length); }
  4024 inline int32_t
  4025 UnicodeString::lastIndexOf(UChar32 c,
  4026                int32_t start,
  4027                int32_t _length) const {
  4028   return doLastIndexOf(c, start, _length);
  4031 inline int32_t
  4032 UnicodeString::lastIndexOf(UChar c) const
  4033 { return doLastIndexOf(c, 0, length()); }
  4035 inline int32_t
  4036 UnicodeString::lastIndexOf(UChar32 c) const {
  4037   return lastIndexOf(c, 0, length());
  4040 inline int32_t
  4041 UnicodeString::lastIndexOf(UChar c,
  4042                int32_t start) const {
  4043   pinIndex(start);
  4044   return doLastIndexOf(c, start, length() - start);
  4047 inline int32_t
  4048 UnicodeString::lastIndexOf(UChar32 c,
  4049                int32_t start) const {
  4050   pinIndex(start);
  4051   return lastIndexOf(c, start, length() - start);
  4054 inline UBool
  4055 UnicodeString::startsWith(const UnicodeString& text) const
  4056 { return compare(0, text.length(), text, 0, text.length()) == 0; }
  4058 inline UBool
  4059 UnicodeString::startsWith(const UnicodeString& srcText,
  4060               int32_t srcStart,
  4061               int32_t srcLength) const
  4062 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  4064 inline UBool
  4065 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
  4066   if(srcLength < 0) {
  4067     srcLength = u_strlen(srcChars);
  4069   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  4072 inline UBool
  4073 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
  4074   if(srcLength < 0) {
  4075     srcLength = u_strlen(srcChars);
  4077   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  4080 inline UBool
  4081 UnicodeString::endsWith(const UnicodeString& text) const
  4082 { return doCompare(length() - text.length(), text.length(),
  4083            text, 0, text.length()) == 0; }
  4085 inline UBool
  4086 UnicodeString::endsWith(const UnicodeString& srcText,
  4087             int32_t srcStart,
  4088             int32_t srcLength) const {
  4089   srcText.pinIndices(srcStart, srcLength);
  4090   return doCompare(length() - srcLength, srcLength,
  4091                    srcText, srcStart, srcLength) == 0;
  4094 inline UBool
  4095 UnicodeString::endsWith(const UChar *srcChars,
  4096             int32_t srcLength) const {
  4097   if(srcLength < 0) {
  4098     srcLength = u_strlen(srcChars);
  4100   return doCompare(length() - srcLength, srcLength,
  4101                    srcChars, 0, srcLength) == 0;
  4104 inline UBool
  4105 UnicodeString::endsWith(const UChar *srcChars,
  4106             int32_t srcStart,
  4107             int32_t srcLength) const {
  4108   if(srcLength < 0) {
  4109     srcLength = u_strlen(srcChars + srcStart);
  4111   return doCompare(length() - srcLength, srcLength,
  4112                    srcChars, srcStart, srcLength) == 0;
  4115 //========================================
  4116 // replace
  4117 //========================================
  4118 inline UnicodeString&
  4119 UnicodeString::replace(int32_t start,
  4120                int32_t _length,
  4121                const UnicodeString& srcText)
  4122 { return doReplace(start, _length, srcText, 0, srcText.length()); }
  4124 inline UnicodeString&
  4125 UnicodeString::replace(int32_t start,
  4126                int32_t _length,
  4127                const UnicodeString& srcText,
  4128                int32_t srcStart,
  4129                int32_t srcLength)
  4130 { return doReplace(start, _length, srcText, srcStart, srcLength); }
  4132 inline UnicodeString&
  4133 UnicodeString::replace(int32_t start,
  4134                int32_t _length,
  4135                const UChar *srcChars,
  4136                int32_t srcLength)
  4137 { return doReplace(start, _length, srcChars, 0, srcLength); }
  4139 inline UnicodeString&
  4140 UnicodeString::replace(int32_t start,
  4141                int32_t _length,
  4142                const UChar *srcChars,
  4143                int32_t srcStart,
  4144                int32_t srcLength)
  4145 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  4147 inline UnicodeString&
  4148 UnicodeString::replace(int32_t start,
  4149                int32_t _length,
  4150                UChar srcChar)
  4151 { return doReplace(start, _length, &srcChar, 0, 1); }
  4153 inline UnicodeString&
  4154 UnicodeString::replaceBetween(int32_t start,
  4155                   int32_t limit,
  4156                   const UnicodeString& srcText)
  4157 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  4159 inline UnicodeString&
  4160 UnicodeString::replaceBetween(int32_t start,
  4161                   int32_t limit,
  4162                   const UnicodeString& srcText,
  4163                   int32_t srcStart,
  4164                   int32_t srcLimit)
  4165 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  4167 inline UnicodeString&
  4168 UnicodeString::findAndReplace(const UnicodeString& oldText,
  4169                   const UnicodeString& newText)
  4170 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
  4171             newText, 0, newText.length()); }
  4173 inline UnicodeString&
  4174 UnicodeString::findAndReplace(int32_t start,
  4175                   int32_t _length,
  4176                   const UnicodeString& oldText,
  4177                   const UnicodeString& newText)
  4178 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
  4179             newText, 0, newText.length()); }
  4181 // ============================
  4182 // extract
  4183 // ============================
  4184 inline void
  4185 UnicodeString::doExtract(int32_t start,
  4186              int32_t _length,
  4187              UnicodeString& target) const
  4188 { target.replace(0, target.length(), *this, start, _length); }
  4190 inline void
  4191 UnicodeString::extract(int32_t start,
  4192                int32_t _length,
  4193                UChar *target,
  4194                int32_t targetStart) const
  4195 { doExtract(start, _length, target, targetStart); }
  4197 inline void
  4198 UnicodeString::extract(int32_t start,
  4199                int32_t _length,
  4200                UnicodeString& target) const
  4201 { doExtract(start, _length, target); }
  4203 #if !UCONFIG_NO_CONVERSION
  4205 inline int32_t
  4206 UnicodeString::extract(int32_t start,
  4207                int32_t _length,
  4208                char *dst,
  4209                const char *codepage) const
  4212   // This dstSize value will be checked explicitly
  4213   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  4216 #endif
  4218 inline void
  4219 UnicodeString::extractBetween(int32_t start,
  4220                   int32_t limit,
  4221                   UChar *dst,
  4222                   int32_t dstStart) const {
  4223   pinIndex(start);
  4224   pinIndex(limit);
  4225   doExtract(start, limit - start, dst, dstStart);
  4228 inline UnicodeString
  4229 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  4230     return tempSubString(start, limit - start);
  4233 inline UChar
  4234 UnicodeString::doCharAt(int32_t offset) const
  4236   if((uint32_t)offset < (uint32_t)length()) {
  4237     return getArrayStart()[offset];
  4238   } else {
  4239     return kInvalidUChar;
  4243 inline UChar
  4244 UnicodeString::charAt(int32_t offset) const
  4245 { return doCharAt(offset); }
  4247 inline UChar
  4248 UnicodeString::operator[] (int32_t offset) const
  4249 { return doCharAt(offset); }
  4251 inline UBool
  4252 UnicodeString::isEmpty() const {
  4253   return fShortLength == 0;
  4256 //========================================
  4257 // Write implementation methods
  4258 //========================================
  4259 inline void
  4260 UnicodeString::setLength(int32_t len) {
  4261   if(len <= 127) {
  4262     fShortLength = (int8_t)len;
  4263   } else {
  4264     fShortLength = (int8_t)-1;
  4265     fUnion.fFields.fLength = len;
  4269 inline void
  4270 UnicodeString::setToEmpty() {
  4271   fShortLength = 0;
  4272   fFlags = kShortString;
  4275 inline void
  4276 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
  4277   setLength(len);
  4278   fUnion.fFields.fArray = array;
  4279   fUnion.fFields.fCapacity = capacity;
  4282 inline UnicodeString&
  4283 UnicodeString::operator= (UChar ch)
  4284 { return doReplace(0, length(), &ch, 0, 1); }
  4286 inline UnicodeString&
  4287 UnicodeString::operator= (UChar32 ch)
  4288 { return replace(0, length(), ch); }
  4290 inline UnicodeString&
  4291 UnicodeString::setTo(const UnicodeString& srcText,
  4292              int32_t srcStart,
  4293              int32_t srcLength)
  4295   unBogus();
  4296   return doReplace(0, length(), srcText, srcStart, srcLength);
  4299 inline UnicodeString&
  4300 UnicodeString::setTo(const UnicodeString& srcText,
  4301              int32_t srcStart)
  4303   unBogus();
  4304   srcText.pinIndex(srcStart);
  4305   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  4308 inline UnicodeString&
  4309 UnicodeString::setTo(const UnicodeString& srcText)
  4311   return copyFrom(srcText);
  4314 inline UnicodeString&
  4315 UnicodeString::setTo(const UChar *srcChars,
  4316              int32_t srcLength)
  4318   unBogus();
  4319   return doReplace(0, length(), srcChars, 0, srcLength);
  4322 inline UnicodeString&
  4323 UnicodeString::setTo(UChar srcChar)
  4325   unBogus();
  4326   return doReplace(0, length(), &srcChar, 0, 1);
  4329 inline UnicodeString&
  4330 UnicodeString::setTo(UChar32 srcChar)
  4332   unBogus();
  4333   return replace(0, length(), srcChar);
  4336 inline UnicodeString&
  4337 UnicodeString::append(const UnicodeString& srcText,
  4338               int32_t srcStart,
  4339               int32_t srcLength)
  4340 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
  4342 inline UnicodeString&
  4343 UnicodeString::append(const UnicodeString& srcText)
  4344 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
  4346 inline UnicodeString&
  4347 UnicodeString::append(const UChar *srcChars,
  4348               int32_t srcStart,
  4349               int32_t srcLength)
  4350 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
  4352 inline UnicodeString&
  4353 UnicodeString::append(const UChar *srcChars,
  4354               int32_t srcLength)
  4355 { return doReplace(length(), 0, srcChars, 0, srcLength); }
  4357 inline UnicodeString&
  4358 UnicodeString::append(UChar srcChar)
  4359 { return doReplace(length(), 0, &srcChar, 0, 1); }
  4361 inline UnicodeString&
  4362 UnicodeString::operator+= (UChar ch)
  4363 { return doReplace(length(), 0, &ch, 0, 1); }
  4365 inline UnicodeString&
  4366 UnicodeString::operator+= (UChar32 ch) {
  4367   return append(ch);
  4370 inline UnicodeString&
  4371 UnicodeString::operator+= (const UnicodeString& srcText)
  4372 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
  4374 inline UnicodeString&
  4375 UnicodeString::insert(int32_t start,
  4376               const UnicodeString& srcText,
  4377               int32_t srcStart,
  4378               int32_t srcLength)
  4379 { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4381 inline UnicodeString&
  4382 UnicodeString::insert(int32_t start,
  4383               const UnicodeString& srcText)
  4384 { return doReplace(start, 0, srcText, 0, srcText.length()); }
  4386 inline UnicodeString&
  4387 UnicodeString::insert(int32_t start,
  4388               const UChar *srcChars,
  4389               int32_t srcStart,
  4390               int32_t srcLength)
  4391 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4393 inline UnicodeString&
  4394 UnicodeString::insert(int32_t start,
  4395               const UChar *srcChars,
  4396               int32_t srcLength)
  4397 { return doReplace(start, 0, srcChars, 0, srcLength); }
  4399 inline UnicodeString&
  4400 UnicodeString::insert(int32_t start,
  4401               UChar srcChar)
  4402 { return doReplace(start, 0, &srcChar, 0, 1); }
  4404 inline UnicodeString&
  4405 UnicodeString::insert(int32_t start,
  4406               UChar32 srcChar)
  4407 { return replace(start, 0, srcChar); }
  4410 inline UnicodeString&
  4411 UnicodeString::remove()
  4413   // remove() of a bogus string makes the string empty and non-bogus
  4414   if(isBogus()) {
  4415     setToEmpty();
  4416   } else {
  4417     fShortLength = 0;
  4419   return *this;
  4422 inline UnicodeString&
  4423 UnicodeString::remove(int32_t start,
  4424              int32_t _length)
  4426     if(start <= 0 && _length == INT32_MAX) {
  4427         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4428         return remove();
  4430     return doReplace(start, _length, NULL, 0, 0);
  4433 inline UnicodeString&
  4434 UnicodeString::removeBetween(int32_t start,
  4435                 int32_t limit)
  4436 { return doReplace(start, limit - start, NULL, 0, 0); }
  4438 inline UnicodeString &
  4439 UnicodeString::retainBetween(int32_t start, int32_t limit) {
  4440   truncate(limit);
  4441   return doReplace(0, start, NULL, 0, 0);
  4444 inline UBool
  4445 UnicodeString::truncate(int32_t targetLength)
  4447   if(isBogus() && targetLength == 0) {
  4448     // truncate(0) of a bogus string makes the string empty and non-bogus
  4449     unBogus();
  4450     return FALSE;
  4451   } else if((uint32_t)targetLength < (uint32_t)length()) {
  4452     setLength(targetLength);
  4453     return TRUE;
  4454   } else {
  4455     return FALSE;
  4459 inline UnicodeString&
  4460 UnicodeString::reverse()
  4461 { return doReverse(0, length()); }
  4463 inline UnicodeString&
  4464 UnicodeString::reverse(int32_t start,
  4465                int32_t _length)
  4466 { return doReverse(start, _length); }
  4468 U_NAMESPACE_END
  4470 #endif

mercurial