The Tor Browser: intl/icu/source/common/unicode/unistr.h@129ffea94266

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*

     2 **********************************************************************

     3 *   Copyright (C) 1998-2013, International Business Machines

     4 *   Corporation and others.  All Rights Reserved.

     5 **********************************************************************

6 *

     7 * File unistr.h

8 *

     9 * Modification History:

    10 *

    11 *   Date        Name        Description

    12 *   09/25/98    stephen     Creation.

    13 *   11/11/98    stephen     Changed per 11/9 code review.

    14 *   04/20/99    stephen     Overhauled per 4/16 code review.

    15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method

    16 *                           handleReplaceBetween(); other methods unchanged.

    17 *   06/25/01    grhoten     Remove dependency on iostream.

    18 ******************************************************************************

    19 */

    21 #ifndef UNISTR_H

    22 #define UNISTR_H

    24 /**

    25  * \file

    26  * \brief C++ API: Unicode String

    27  */

    29 #include "unicode/utypes.h"

    30 #include "unicode/rep.h"

    31 #include "unicode/std_string.h"

    32 #include "unicode/stringpiece.h"

    33 #include "unicode/bytestream.h"

    34 #include "unicode/ucasemap.h"

    36 struct UConverter;          // unicode/ucnv.h

    37 class  StringThreadTest;

    39 #ifndef U_COMPARE_CODE_POINT_ORDER

    40 /* see also ustring.h and unorm.h */

    41 /**

    42  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:

    43  * Compare strings in code point order instead of code unit order.

    44  * @stable ICU 2.2

    45  */

    46 #define U_COMPARE_CODE_POINT_ORDER  0x8000

    47 #endif

    49 #ifndef USTRING_H

    50 /**

    51  * \ingroup ustring_ustrlen

    52  */

    53 U_STABLE int32_t U_EXPORT2

    54 u_strlen(const UChar *s);

    55 #endif

    57 /**

    58  * \def U_STRING_CASE_MAPPER_DEFINED

    59  * @internal

    60  */

    61 #ifndef U_STRING_CASE_MAPPER_DEFINED

    62 #define U_STRING_CASE_MAPPER_DEFINED

    64 /**

    65  * Internal string case mapping function type.

    66  * @internal

    67  */

    68 typedef int32_t U_CALLCONV

    69 UStringCaseMapper(const UCaseMap *csm,

    70                   UChar *dest, int32_t destCapacity,

    71                   const UChar *src, int32_t srcLength,

    72                   UErrorCode *pErrorCode);

    74 #endif

    76 U_NAMESPACE_BEGIN

    78 class BreakIterator;        // unicode/brkiter.h

    79 class Locale;               // unicode/locid.h

    80 class StringCharacterIterator;

    81 class UnicodeStringAppendable;  // unicode/appendable.h

    83 /* The <iostream> include has been moved to unicode/ustream.h */

    85 /**

    86  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor

    87  * which constructs a Unicode string from an invariant-character char * string.

    88  * About invariant characters see utypes.h.

    89  * This constructor has no runtime dependency on conversion code and is

    90  * therefore recommended over ones taking a charset name string

    91  * (where the empty string "" indicates invariant-character conversion).

    92  *

    93  * @stable ICU 3.2

    94  */

    95 #define US_INV icu::UnicodeString::kInvariant

    97 /**

    98  * Unicode String literals in C++.

    99  * Dependent on the platform properties, different UnicodeString

   100  * constructors should be used to create a UnicodeString object from

   101  * a string literal.

   102  * The macros are defined for maximum performance.

   103  * They work only for strings that contain "invariant characters", i.e.,

   104  * only latin letters, digits, and some punctuation.

   105  * See utypes.h for details.

   106  *

   107  * The string parameter must be a C string literal.

   108  * The length of the string, not including the terminating

   109  * <code>NUL</code>, must be specified as a constant.

   110  * The U_STRING_DECL macro should be invoked exactly once for one

   111  * such string variable before it is used.

   112  * @stable ICU 2.0

   113  */

   114 #if defined(U_DECLARE_UTF16)

   115 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)

   116 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))

   117 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)

   118 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY

   119 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)

   120 #else

   121 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)

   122 #endif

   124 /**

   125  * Unicode String literals in C++.

   126  * Dependent on the platform properties, different UnicodeString

   127  * constructors should be used to create a UnicodeString object from

   128  * a string literal.

   129  * The macros are defined for improved performance.

   130  * They work only for strings that contain "invariant characters", i.e.,

   131  * only latin letters, digits, and some punctuation.

   132  * See utypes.h for details.

   133  *

   134  * The string parameter must be a C string literal.

   135  * @stable ICU 2.0

   136  */

   137 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)

   139 /**

   140  * \def UNISTR_FROM_CHAR_EXPLICIT

   141  * This can be defined to be empty or "explicit".

   142  * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)

   143  * constructors are marked as explicit, preventing their inadvertent use.

   144  * @stable ICU 49

   145  */

   146 #ifndef UNISTR_FROM_CHAR_EXPLICIT

   147 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)

   148     // Auto-"explicit" in ICU library code.

   149 #   define UNISTR_FROM_CHAR_EXPLICIT explicit

   150 # else

   151     // Empty by default for source code compatibility.

   152 #   define UNISTR_FROM_CHAR_EXPLICIT

   153 # endif

   154 #endif

   156 /**

   157  * \def UNISTR_FROM_STRING_EXPLICIT

   158  * This can be defined to be empty or "explicit".

   159  * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)

   160  * constructors are marked as explicit, preventing their inadvertent use.

   161  *

   162  * In particular, this helps prevent accidentally depending on ICU conversion code

   163  * by passing a string literal into an API with a const UnicodeString & parameter.

   164  * @stable ICU 49

   165  */

   166 #ifndef UNISTR_FROM_STRING_EXPLICIT

   167 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)

   168     // Auto-"explicit" in ICU library code.

   169 #   define UNISTR_FROM_STRING_EXPLICIT explicit

   170 # else

   171     // Empty by default for source code compatibility.

   172 #   define UNISTR_FROM_STRING_EXPLICIT

   173 # endif

   174 #endif

   176 /**

   177  * UnicodeString is a string class that stores Unicode characters directly and provides

   178  * similar functionality as the Java String and StringBuffer classes.

   179  * It is a concrete implementation of the abstract class Replaceable (for transliteration).

   180  *

   181  * The UnicodeString class is not suitable for subclassing.

   182  *

   183  * <p>For an overview of Unicode strings in C and C++ see the

   184  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>

   185  *

   186  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.

   187  * A Unicode character may be stored with either one code unit

   188  * (the most common case) or with a matched pair of special code units

   189  * ("surrogates"). The data type for code units is UChar.

   190  * For single-character handling, a Unicode character code <em>point</em> is a value

   191  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>

   192  *

   193  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.

   194  * This is the same as with multi-byte char* strings in traditional string handling.

   195  * Operations on partial strings typically do not test for code point boundaries.

   196  * If necessary, the user needs to take care of such boundaries by testing for the code unit

   197  * values or by using functions like

   198  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()

   199  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>

   200  *

   201  * UnicodeString methods are more lenient with regard to input parameter values

   202  * than other ICU APIs. In particular:

   203  * - If indexes are out of bounds for a UnicodeString object

   204  *   (<0 or >length()) then they are "pinned" to the nearest boundary.

   205  * - If primitive string pointer values (e.g., const UChar * or char *)

   206  *   for input strings are NULL, then those input string parameters are treated

   207  *   as if they pointed to an empty string.

   208  *   However, this is <em>not</em> the case for char * parameters for charset names

   209  *   or other IDs.

   210  * - Most UnicodeString methods do not take a UErrorCode parameter because

   211  *   there are usually very few opportunities for failure other than a shortage

   212  *   of memory, error codes in low-level C++ string methods would be inconvenient,

   213  *   and the error code as the last parameter (ICU convention) would prevent

   214  *   the use of default parameter values.

   215  *   Instead, such methods set the UnicodeString into a "bogus" state

   216  *   (see isBogus()) if an error occurs.

   217  *

   218  * In string comparisons, two UnicodeString objects that are both "bogus"

   219  * compare equal (to be transitive and prevent endless loops in sorting),

   220  * and a "bogus" string compares less than any non-"bogus" one.

   221  *

   222  * Const UnicodeString methods are thread-safe. Multiple threads can use

   223  * const methods on the same UnicodeString object simultaneously,

   224  * but non-const methods must not be called concurrently (in multiple threads)

   225  * with any other (const or non-const) methods.

   226  *

   227  * Similarly, const UnicodeString & parameters are thread-safe.

   228  * One object may be passed in as such a parameter concurrently in multiple threads.

   229  * This includes the const UnicodeString & parameters for

   230  * copy construction, assignment, and cloning.

   231  *

   232  * <p>UnicodeString uses several storage methods.

   233  * String contents can be stored inside the UnicodeString object itself,

   234  * in an allocated and shared buffer, or in an outside buffer that is "aliased".

   235  * Most of this is done transparently, but careful aliasing in particular provides

   236  * significant performance improvements.

   237  * Also, the internal buffer is accessible via special functions.

   238  * For details see the

   239  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>

   240  *

   241  * @see utf.h

   242  * @see CharacterIterator

   243  * @stable ICU 2.0

   244  */

   245 class U_COMMON_API UnicodeString : public Replaceable

   246 {

   247 public:

   249   /**

   250    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor

   251    * which constructs a Unicode string from an invariant-character char * string.

   252    * Use the macro US_INV instead of the full qualification for this value.

   253    *

   254    * @see US_INV

   255    * @stable ICU 3.2

   256    */

   257   enum EInvariant {

   258     /**

   259      * @see EInvariant

   260      * @stable ICU 3.2

   261      */

   262     kInvariant

   263   };

   265   //========================================

   266   // Read-only operations

   267   //========================================

   269   /* Comparison - bitwise only - for international comparison use collation */

   271   /**

   272    * Equality operator. Performs only bitwise comparison.

   273    * @param text The UnicodeString to compare to this one.

   274    * @return TRUE if <TT>text</TT> contains the same characters as this one,

   275    * FALSE otherwise.

   276    * @stable ICU 2.0

   277    */

   278   inline UBool operator== (const UnicodeString& text) const;

   280   /**

   281    * Inequality operator. Performs only bitwise comparison.

   282    * @param text The UnicodeString to compare to this one.

   283    * @return FALSE if <TT>text</TT> contains the same characters as this one,

   284    * TRUE otherwise.

   285    * @stable ICU 2.0

   286    */

   287   inline UBool operator!= (const UnicodeString& text) const;

   289   /**

   290    * Greater than operator. Performs only bitwise comparison.

   291    * @param text The UnicodeString to compare to this one.

   292    * @return TRUE if the characters in this are bitwise

   293    * greater than the characters in <code>text</code>, FALSE otherwise

   294    * @stable ICU 2.0

   295    */

   296   inline UBool operator> (const UnicodeString& text) const;

   298   /**

   299    * Less than operator. Performs only bitwise comparison.

   300    * @param text The UnicodeString to compare to this one.

   301    * @return TRUE if the characters in this are bitwise

   302    * less than the characters in <code>text</code>, FALSE otherwise

   303    * @stable ICU 2.0

   304    */

   305   inline UBool operator< (const UnicodeString& text) const;

   307   /**

   308    * Greater than or equal operator. Performs only bitwise comparison.

   309    * @param text The UnicodeString to compare to this one.

   310    * @return TRUE if the characters in this are bitwise

   311    * greater than or equal to the characters in <code>text</code>, FALSE otherwise

   312    * @stable ICU 2.0

   313    */

   314   inline UBool operator>= (const UnicodeString& text) const;

   316   /**

   317    * Less than or equal operator. Performs only bitwise comparison.

   318    * @param text The UnicodeString to compare to this one.

   319    * @return TRUE if the characters in this are bitwise

   320    * less than or equal to the characters in <code>text</code>, FALSE otherwise

   321    * @stable ICU 2.0

   322    */

   323   inline UBool operator<= (const UnicodeString& text) const;

   325   /**

   326    * Compare the characters bitwise in this UnicodeString to

   327    * the characters in <code>text</code>.

   328    * @param text The UnicodeString to compare to this one.

   329    * @return The result of bitwise character comparison: 0 if this

   330    * contains the same characters as <code>text</code>, -1 if the characters in

   331    * this are bitwise less than the characters in <code>text</code>, +1 if the

   332    * characters in this are bitwise greater than the characters

   333    * in <code>text</code>.

   334    * @stable ICU 2.0

   335    */

   336   inline int8_t compare(const UnicodeString& text) const;

   338   /**

   339    * Compare the characters bitwise in the range

   340    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   341    * in the <b>entire string</b> <TT>text</TT>.

   342    * (The parameters "start" and "length" are not applied to the other text "text".)

   343    * @param start the offset at which the compare operation begins

   344    * @param length the number of characters of text to compare.

   345    * @param text the other text to be compared against this string.

   346    * @return The result of bitwise character comparison: 0 if this

   347    * contains the same characters as <code>text</code>, -1 if the characters in

   348    * this are bitwise less than the characters in <code>text</code>, +1 if the

   349    * characters in this are bitwise greater than the characters

   350    * in <code>text</code>.

   351    * @stable ICU 2.0

   352    */

   353   inline int8_t compare(int32_t start,

   354          int32_t length,

   355          const UnicodeString& text) const;

   357   /**

   358    * Compare the characters bitwise in the range

   359    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   360    * in <TT>srcText</TT> in the range

   361    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   362    * @param start the offset at which the compare operation begins

   363    * @param length the number of characters in this to compare.

   364    * @param srcText the text to be compared

   365    * @param srcStart the offset into <TT>srcText</TT> to start comparison

   366    * @param srcLength the number of characters in <TT>src</TT> to compare

   367    * @return The result of bitwise character comparison: 0 if this

   368    * contains the same characters as <code>srcText</code>, -1 if the characters in

   369    * this are bitwise less than the characters in <code>srcText</code>, +1 if the

   370    * characters in this are bitwise greater than the characters

   371    * in <code>srcText</code>.

   372    * @stable ICU 2.0

   373    */

   374    inline int8_t compare(int32_t start,

   375          int32_t length,

   376          const UnicodeString& srcText,

   377          int32_t srcStart,

   378          int32_t srcLength) const;

   380   /**

   381    * Compare the characters bitwise in this UnicodeString with the first

   382    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.

   383    * @param srcChars The characters to compare to this UnicodeString.

   384    * @param srcLength the number of characters in <TT>srcChars</TT> to compare

   385    * @return The result of bitwise character comparison: 0 if this

   386    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   387    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   388    * characters in this are bitwise greater than the characters

   389    * in <code>srcChars</code>.

   390    * @stable ICU 2.0

   391    */

   392   inline int8_t compare(const UChar *srcChars,

   393          int32_t srcLength) const;

   395   /**

   396    * Compare the characters bitwise in the range

   397    * [<TT>start</TT>, <TT>start + length</TT>) with the first

   398    * <TT>length</TT> characters in <TT>srcChars</TT>

   399    * @param start the offset at which the compare operation begins

   400    * @param length the number of characters to compare.

   401    * @param srcChars the characters to be compared

   402    * @return The result of bitwise character comparison: 0 if this

   403    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   404    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   405    * characters in this are bitwise greater than the characters

   406    * in <code>srcChars</code>.

   407    * @stable ICU 2.0

   408    */

   409   inline int8_t compare(int32_t start,

   410          int32_t length,

   411          const UChar *srcChars) const;

   413   /**

   414    * Compare the characters bitwise in the range

   415    * [<TT>start</TT>, <TT>start + length</TT>) with the characters

   416    * in <TT>srcChars</TT> in the range

   417    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   418    * @param start the offset at which the compare operation begins

   419    * @param length the number of characters in this to compare

   420    * @param srcChars the characters to be compared

   421    * @param srcStart the offset into <TT>srcChars</TT> to start comparison

   422    * @param srcLength the number of characters in <TT>srcChars</TT> to compare

   423    * @return The result of bitwise character comparison: 0 if this

   424    * contains the same characters as <code>srcChars</code>, -1 if the characters in

   425    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the

   426    * characters in this are bitwise greater than the characters

   427    * in <code>srcChars</code>.

   428    * @stable ICU 2.0

   429    */

   430   inline int8_t compare(int32_t start,

   431          int32_t length,

   432          const UChar *srcChars,

   433          int32_t srcStart,

   434          int32_t srcLength) const;

   436   /**

   437    * Compare the characters bitwise in the range

   438    * [<TT>start</TT>, <TT>limit</TT>) with the characters

   439    * in <TT>srcText</TT> in the range

   440    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).

   441    * @param start the offset at which the compare operation begins

   442    * @param limit the offset immediately following the compare operation

   443    * @param srcText the text to be compared

   444    * @param srcStart the offset into <TT>srcText</TT> to start comparison

   445    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison

   446    * @return The result of bitwise character comparison: 0 if this

   447    * contains the same characters as <code>srcText</code>, -1 if the characters in

   448    * this are bitwise less than the characters in <code>srcText</code>, +1 if the

   449    * characters in this are bitwise greater than the characters

   450    * in <code>srcText</code>.

   451    * @stable ICU 2.0

   452    */

   453   inline int8_t compareBetween(int32_t start,

   454             int32_t limit,

   455             const UnicodeString& srcText,

   456             int32_t srcStart,

   457             int32_t srcLimit) const;

   459   /**

   460    * Compare two Unicode strings in code point order.

   461    * The result may be different from the results of compare(), operator<, etc.

   462    * if supplementary characters are present:

   463    *

   464    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   465    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   466    * which means that they compare as less than some other BMP characters like U+feff.

   467    * This function compares Unicode strings in code point order.

   468    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   469    *

   470    * @param text Another string to compare this one to.

   471    * @return a negative/zero/positive integer corresponding to whether

   472    * this string is less than/equal to/greater than the second one

   473    * in code point order

   474    * @stable ICU 2.0

   475    */

   476   inline int8_t compareCodePointOrder(const UnicodeString& text) const;

   478   /**

   479    * Compare two Unicode strings in code point order.

   480    * The result may be different from the results of compare(), operator<, etc.

   481    * if supplementary characters are present:

   482    *

   483    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   484    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   485    * which means that they compare as less than some other BMP characters like U+feff.

   486    * This function compares Unicode strings in code point order.

   487    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   488    *

   489    * @param start The start offset in this string at which the compare operation begins.

   490    * @param length The number of code units from this string to compare.

   491    * @param srcText Another string to compare this one to.

   492    * @return a negative/zero/positive integer corresponding to whether

   493    * this string is less than/equal to/greater than the second one

   494    * in code point order

   495    * @stable ICU 2.0

   496    */

   497   inline int8_t compareCodePointOrder(int32_t start,

   498                                       int32_t length,

   499                                       const UnicodeString& srcText) const;

   501   /**

   502    * Compare two Unicode strings in code point order.

   503    * The result may be different from the results of compare(), operator<, etc.

   504    * if supplementary characters are present:

   505    *

   506    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   507    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   508    * which means that they compare as less than some other BMP characters like U+feff.

   509    * This function compares Unicode strings in code point order.

   510    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   511    *

   512    * @param start The start offset in this string at which the compare operation begins.

   513    * @param length The number of code units from this string to compare.

   514    * @param srcText Another string to compare this one to.

   515    * @param srcStart The start offset in that string at which the compare operation begins.

   516    * @param srcLength The number of code units from that string to compare.

   517    * @return a negative/zero/positive integer corresponding to whether

   518    * this string is less than/equal to/greater than the second one

   519    * in code point order

   520    * @stable ICU 2.0

   521    */

   522    inline int8_t compareCodePointOrder(int32_t start,

   523                                        int32_t length,

   524                                        const UnicodeString& srcText,

   525                                        int32_t srcStart,

   526                                        int32_t srcLength) const;

   528   /**

   529    * Compare two Unicode strings in code point order.

   530    * The result may be different from the results of compare(), operator<, etc.

   531    * if supplementary characters are present:

   532    *

   533    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   534    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   535    * which means that they compare as less than some other BMP characters like U+feff.

   536    * This function compares Unicode strings in code point order.

   537    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   538    *

   539    * @param srcChars A pointer to another string to compare this one to.

   540    * @param srcLength The number of code units from that string to compare.

   541    * @return a negative/zero/positive integer corresponding to whether

   542    * this string is less than/equal to/greater than the second one

   543    * in code point order

   544    * @stable ICU 2.0

   545    */

   546   inline int8_t compareCodePointOrder(const UChar *srcChars,

   547                                       int32_t srcLength) const;

   549   /**

   550    * Compare two Unicode strings in code point order.

   551    * The result may be different from the results of compare(), operator<, etc.

   552    * if supplementary characters are present:

   553    *

   554    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   555    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   556    * which means that they compare as less than some other BMP characters like U+feff.

   557    * This function compares Unicode strings in code point order.

   558    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   559    *

   560    * @param start The start offset in this string at which the compare operation begins.

   561    * @param length The number of code units from this string to compare.

   562    * @param srcChars A pointer to another string to compare this one to.

   563    * @return a negative/zero/positive integer corresponding to whether

   564    * this string is less than/equal to/greater than the second one

   565    * in code point order

   566    * @stable ICU 2.0

   567    */

   568   inline int8_t compareCodePointOrder(int32_t start,

   569                                       int32_t length,

   570                                       const UChar *srcChars) const;

   572   /**

   573    * Compare two Unicode strings in code point order.

   574    * The result may be different from the results of compare(), operator<, etc.

   575    * if supplementary characters are present:

   576    *

   577    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   578    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   579    * which means that they compare as less than some other BMP characters like U+feff.

   580    * This function compares Unicode strings in code point order.

   581    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   582    *

   583    * @param start The start offset in this string at which the compare operation begins.

   584    * @param length The number of code units from this string to compare.

   585    * @param srcChars A pointer to another string to compare this one to.

   586    * @param srcStart The start offset in that string at which the compare operation begins.

   587    * @param srcLength The number of code units from that string to compare.

   588    * @return a negative/zero/positive integer corresponding to whether

   589    * this string is less than/equal to/greater than the second one

   590    * in code point order

   591    * @stable ICU 2.0

   592    */

   593   inline int8_t compareCodePointOrder(int32_t start,

   594                                       int32_t length,

   595                                       const UChar *srcChars,

   596                                       int32_t srcStart,

   597                                       int32_t srcLength) const;

   599   /**

   600    * Compare two Unicode strings in code point order.

   601    * The result may be different from the results of compare(), operator<, etc.

   602    * if supplementary characters are present:

   603    *

   604    * In UTF-16, supplementary characters (with code points U+10000 and above) are

   605    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,

   606    * which means that they compare as less than some other BMP characters like U+feff.

   607    * This function compares Unicode strings in code point order.

   608    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

   609    *

   610    * @param start The start offset in this string at which the compare operation begins.

   611    * @param limit The offset after the last code unit from this string to compare.

   612    * @param srcText Another string to compare this one to.

   613    * @param srcStart The start offset in that string at which the compare operation begins.

   614    * @param srcLimit The offset after the last code unit from that string to compare.

   615    * @return a negative/zero/positive integer corresponding to whether

   616    * this string is less than/equal to/greater than the second one

   617    * in code point order

   618    * @stable ICU 2.0

   619    */

   620   inline int8_t compareCodePointOrderBetween(int32_t start,

   621                                              int32_t limit,

   622                                              const UnicodeString& srcText,

   623                                              int32_t srcStart,

   624                                              int32_t srcLimit) const;

   626   /**

   627    * Compare two strings case-insensitively using full case folding.

   628    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).

   629    *

   630    * @param text Another string to compare this one to.

   631    * @param options A bit set of options:

   632    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   633    *     Comparison in code unit order with default case folding.

   634    *

   635    *   - U_COMPARE_CODE_POINT_ORDER

   636    *     Set to choose code point order instead of code unit order

   637    *     (see u_strCompare for details).

   638    *

   639    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   640    *

   641    * @return A negative, zero, or positive integer indicating the comparison result.

   642    * @stable ICU 2.0

   643    */

   644   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;

   646   /**

   647    * Compare two strings case-insensitively using full case folding.

   648    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

   649    *

   650    * @param start The start offset in this string at which the compare operation begins.

   651    * @param length The number of code units from this string to compare.

   652    * @param srcText Another string to compare this one to.

   653    * @param options A bit set of options:

   654    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   655    *     Comparison in code unit order with default case folding.

   656    *

   657    *   - U_COMPARE_CODE_POINT_ORDER

   658    *     Set to choose code point order instead of code unit order

   659    *     (see u_strCompare for details).

   660    *

   661    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   662    *

   663    * @return A negative, zero, or positive integer indicating the comparison result.

   664    * @stable ICU 2.0

   665    */

   666   inline int8_t caseCompare(int32_t start,

   667          int32_t length,

   668          const UnicodeString& srcText,

   669          uint32_t options) const;

   671   /**

   672    * Compare two strings case-insensitively using full case folding.

   673    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

   674    *

   675    * @param start The start offset in this string at which the compare operation begins.

   676    * @param length The number of code units from this string to compare.

   677    * @param srcText Another string to compare this one to.

   678    * @param srcStart The start offset in that string at which the compare operation begins.

   679    * @param srcLength The number of code units from that string to compare.

   680    * @param options A bit set of options:

   681    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   682    *     Comparison in code unit order with default case folding.

   683    *

   684    *   - U_COMPARE_CODE_POINT_ORDER

   685    *     Set to choose code point order instead of code unit order

   686    *     (see u_strCompare for details).

   687    *

   688    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   689    *

   690    * @return A negative, zero, or positive integer indicating the comparison result.

   691    * @stable ICU 2.0

   692    */

   693   inline int8_t caseCompare(int32_t start,

   694          int32_t length,

   695          const UnicodeString& srcText,

   696          int32_t srcStart,

   697          int32_t srcLength,

   698          uint32_t options) const;

   700   /**

   701    * Compare two strings case-insensitively using full case folding.

   702    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   703    *

   704    * @param srcChars A pointer to another string to compare this one to.

   705    * @param srcLength The number of code units from that string to compare.

   706    * @param options A bit set of options:

   707    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   708    *     Comparison in code unit order with default case folding.

   709    *

   710    *   - U_COMPARE_CODE_POINT_ORDER

   711    *     Set to choose code point order instead of code unit order

   712    *     (see u_strCompare for details).

   713    *

   714    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   715    *

   716    * @return A negative, zero, or positive integer indicating the comparison result.

   717    * @stable ICU 2.0

   718    */

   719   inline int8_t caseCompare(const UChar *srcChars,

   720          int32_t srcLength,

   721          uint32_t options) const;

   723   /**

   724    * Compare two strings case-insensitively using full case folding.

   725    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   726    *

   727    * @param start The start offset in this string at which the compare operation begins.

   728    * @param length The number of code units from this string to compare.

   729    * @param srcChars A pointer to another string to compare this one to.

   730    * @param options A bit set of options:

   731    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   732    *     Comparison in code unit order with default case folding.

   733    *

   734    *   - U_COMPARE_CODE_POINT_ORDER

   735    *     Set to choose code point order instead of code unit order

   736    *     (see u_strCompare for details).

   737    *

   738    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   739    *

   740    * @return A negative, zero, or positive integer indicating the comparison result.

   741    * @stable ICU 2.0

   742    */

   743   inline int8_t caseCompare(int32_t start,

   744          int32_t length,

   745          const UChar *srcChars,

   746          uint32_t options) const;

   748   /**

   749    * Compare two strings case-insensitively using full case folding.

   750    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

   751    *

   752    * @param start The start offset in this string at which the compare operation begins.

   753    * @param length The number of code units from this string to compare.

   754    * @param srcChars A pointer to another string to compare this one to.

   755    * @param srcStart The start offset in that string at which the compare operation begins.

   756    * @param srcLength The number of code units from that string to compare.

   757    * @param options A bit set of options:

   758    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   759    *     Comparison in code unit order with default case folding.

   760    *

   761    *   - U_COMPARE_CODE_POINT_ORDER

   762    *     Set to choose code point order instead of code unit order

   763    *     (see u_strCompare for details).

   764    *

   765    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   766    *

   767    * @return A negative, zero, or positive integer indicating the comparison result.

   768    * @stable ICU 2.0

   769    */

   770   inline int8_t caseCompare(int32_t start,

   771          int32_t length,

   772          const UChar *srcChars,

   773          int32_t srcStart,

   774          int32_t srcLength,

   775          uint32_t options) const;

   777   /**

   778    * Compare two strings case-insensitively using full case folding.

   779    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).

   780    *

   781    * @param start The start offset in this string at which the compare operation begins.

   782    * @param limit The offset after the last code unit from this string to compare.

   783    * @param srcText Another string to compare this one to.

   784    * @param srcStart The start offset in that string at which the compare operation begins.

   785    * @param srcLimit The offset after the last code unit from that string to compare.

   786    * @param options A bit set of options:

   787    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:

   788    *     Comparison in code unit order with default case folding.

   789    *

   790    *   - U_COMPARE_CODE_POINT_ORDER

   791    *     Set to choose code point order instead of code unit order

   792    *     (see u_strCompare for details).

   793    *

   794    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I

   795    *

   796    * @return A negative, zero, or positive integer indicating the comparison result.

   797    * @stable ICU 2.0

   798    */

   799   inline int8_t caseCompareBetween(int32_t start,

   800             int32_t limit,

   801             const UnicodeString& srcText,

   802             int32_t srcStart,

   803             int32_t srcLimit,

   804             uint32_t options) const;

   806   /**

   807    * Determine if this starts with the characters in <TT>text</TT>

   808    * @param text The text to match.

   809    * @return TRUE if this starts with the characters in <TT>text</TT>,

   810    * FALSE otherwise

   811    * @stable ICU 2.0

   812    */

   813   inline UBool startsWith(const UnicodeString& text) const;

   815   /**

   816    * Determine if this starts with the characters in <TT>srcText</TT>

   817    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   818    * @param srcText The text to match.

   819    * @param srcStart the offset into <TT>srcText</TT> to start matching

   820    * @param srcLength the number of characters in <TT>srcText</TT> to match

   821    * @return TRUE if this starts with the characters in <TT>text</TT>,

   822    * FALSE otherwise

   823    * @stable ICU 2.0

   824    */

   825   inline UBool startsWith(const UnicodeString& srcText,

   826             int32_t srcStart,

   827             int32_t srcLength) const;

   829   /**

   830    * Determine if this starts with the characters in <TT>srcChars</TT>

   831    * @param srcChars The characters to match.

   832    * @param srcLength the number of characters in <TT>srcChars</TT>

   833    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,

   834    * FALSE otherwise

   835    * @stable ICU 2.0

   836    */

   837   inline UBool startsWith(const UChar *srcChars,

   838             int32_t srcLength) const;

   840   /**

   841    * Determine if this ends with the characters in <TT>srcChars</TT>

   842    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   843    * @param srcChars The characters to match.

   844    * @param srcStart the offset into <TT>srcText</TT> to start matching

   845    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   846    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise

   847    * @stable ICU 2.0

   848    */

   849   inline UBool startsWith(const UChar *srcChars,

   850             int32_t srcStart,

   851             int32_t srcLength) const;

   853   /**

   854    * Determine if this ends with the characters in <TT>text</TT>

   855    * @param text The text to match.

   856    * @return TRUE if this ends with the characters in <TT>text</TT>,

   857    * FALSE otherwise

   858    * @stable ICU 2.0

   859    */

   860   inline UBool endsWith(const UnicodeString& text) const;

   862   /**

   863    * Determine if this ends with the characters in <TT>srcText</TT>

   864    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   865    * @param srcText The text to match.

   866    * @param srcStart the offset into <TT>srcText</TT> to start matching

   867    * @param srcLength the number of characters in <TT>srcText</TT> to match

   868    * @return TRUE if this ends with the characters in <TT>text</TT>,

   869    * FALSE otherwise

   870    * @stable ICU 2.0

   871    */

   872   inline UBool endsWith(const UnicodeString& srcText,

   873           int32_t srcStart,

   874           int32_t srcLength) const;

   876   /**

   877    * Determine if this ends with the characters in <TT>srcChars</TT>

   878    * @param srcChars The characters to match.

   879    * @param srcLength the number of characters in <TT>srcChars</TT>

   880    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

   881    * FALSE otherwise

   882    * @stable ICU 2.0

   883    */

   884   inline UBool endsWith(const UChar *srcChars,

   885           int32_t srcLength) const;

   887   /**

   888    * Determine if this ends with the characters in <TT>srcChars</TT>

   889    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

   890    * @param srcChars The characters to match.

   891    * @param srcStart the offset into <TT>srcText</TT> to start matching

   892    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   893    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,

   894    * FALSE otherwise

   895    * @stable ICU 2.0

   896    */

   897   inline UBool endsWith(const UChar *srcChars,

   898           int32_t srcStart,

   899           int32_t srcLength) const;

   902   /* Searching - bitwise only */

   904   /**

   905    * Locate in this the first occurrence of the characters in <TT>text</TT>,

   906    * using bitwise comparison.

   907    * @param text The text to search for.

   908    * @return The offset into this of the start of <TT>text</TT>,

   909    * or -1 if not found.

   910    * @stable ICU 2.0

   911    */

   912   inline int32_t indexOf(const UnicodeString& text) const;

   914   /**

   915    * Locate in this the first occurrence of the characters in <TT>text</TT>

   916    * starting at offset <TT>start</TT>, using bitwise comparison.

   917    * @param text The text to search for.

   918    * @param start The offset at which searching will start.

   919    * @return The offset into this of the start of <TT>text</TT>,

   920    * or -1 if not found.

   921    * @stable ICU 2.0

   922    */

   923   inline int32_t indexOf(const UnicodeString& text,

   924               int32_t start) const;

   926   /**

   927    * Locate in this the first occurrence in the range

   928    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   929    * in <TT>text</TT>, using bitwise comparison.

   930    * @param text The text to search for.

   931    * @param start The offset at which searching will start.

   932    * @param length The number of characters to search

   933    * @return The offset into this of the start of <TT>text</TT>,

   934    * or -1 if not found.

   935    * @stable ICU 2.0

   936    */

   937   inline int32_t indexOf(const UnicodeString& text,

   938               int32_t start,

   939               int32_t length) const;

   941   /**

   942    * Locate in this the first occurrence in the range

   943    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   944    *  in <TT>srcText</TT> in the range

   945    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

   946    * using bitwise comparison.

   947    * @param srcText The text to search for.

   948    * @param srcStart the offset into <TT>srcText</TT> at which

   949    * to start matching

   950    * @param srcLength the number of characters in <TT>srcText</TT> to match

   951    * @param start the offset into this at which to start matching

   952    * @param length the number of characters in this to search

   953    * @return The offset into this of the start of <TT>text</TT>,

   954    * or -1 if not found.

   955    * @stable ICU 2.0

   956    */

   957   inline int32_t indexOf(const UnicodeString& srcText,

   958               int32_t srcStart,

   959               int32_t srcLength,

   960               int32_t start,

   961               int32_t length) const;

   963   /**

   964    * Locate in this the first occurrence of the characters in

   965    * <TT>srcChars</TT>

   966    * starting at offset <TT>start</TT>, using bitwise comparison.

   967    * @param srcChars The text to search for.

   968    * @param srcLength the number of characters in <TT>srcChars</TT> to match

   969    * @param start the offset into this at which to start matching

   970    * @return The offset into this of the start of <TT>text</TT>,

   971    * or -1 if not found.

   972    * @stable ICU 2.0

   973    */

   974   inline int32_t indexOf(const UChar *srcChars,

   975               int32_t srcLength,

   976               int32_t start) const;

   978   /**

   979    * Locate in this the first occurrence in the range

   980    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   981    * in <TT>srcChars</TT>, using bitwise comparison.

   982    * @param srcChars The text to search for.

   983    * @param srcLength the number of characters in <TT>srcChars</TT>

   984    * @param start The offset at which searching will start.

   985    * @param length The number of characters to search

   986    * @return The offset into this of the start of <TT>srcChars</TT>,

   987    * or -1 if not found.

   988    * @stable ICU 2.0

   989    */

   990   inline int32_t indexOf(const UChar *srcChars,

   991               int32_t srcLength,

   992               int32_t start,

   993               int32_t length) const;

   995   /**

   996    * Locate in this the first occurrence in the range

   997    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

   998    * in <TT>srcChars</TT> in the range

   999    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

  1000    * using bitwise comparison.

  1001    * @param srcChars The text to search for.

  1002    * @param srcStart the offset into <TT>srcChars</TT> at which

  1003    * to start matching

  1004    * @param srcLength the number of characters in <TT>srcChars</TT> to match

  1005    * @param start the offset into this at which to start matching

  1006    * @param length the number of characters in this to search

  1007    * @return The offset into this of the start of <TT>text</TT>,

  1008    * or -1 if not found.

  1009    * @stable ICU 2.0

  1010    */

  1011   int32_t indexOf(const UChar *srcChars,

  1012               int32_t srcStart,

  1013               int32_t srcLength,

  1014               int32_t start,

  1015               int32_t length) const;

  1017   /**

  1018    * Locate in this the first occurrence of the BMP code point <code>c</code>,

  1019    * using bitwise comparison.

  1020    * @param c The code unit to search for.

  1021    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1022    * @stable ICU 2.0

  1023    */

  1024   inline int32_t indexOf(UChar c) const;

  1026   /**

  1027    * Locate in this the first occurrence of the code point <TT>c</TT>,

  1028    * using bitwise comparison.

  1029    *

  1030    * @param c The code point to search for.

  1031    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1032    * @stable ICU 2.0

  1033    */

  1034   inline int32_t indexOf(UChar32 c) const;

  1036   /**

  1037    * Locate in this the first occurrence of the BMP code point <code>c</code>,

  1038    * starting at offset <TT>start</TT>, using bitwise comparison.

  1039    * @param c The code unit to search for.

  1040    * @param start The offset at which searching will start.

  1041    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1042    * @stable ICU 2.0

  1043    */

  1044   inline int32_t indexOf(UChar c,

  1045               int32_t start) const;

  1047   /**

  1048    * Locate in this the first occurrence of the code point <TT>c</TT>

  1049    * starting at offset <TT>start</TT>, using bitwise comparison.

  1050    *

  1051    * @param c The code point to search for.

  1052    * @param start The offset at which searching will start.

  1053    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1054    * @stable ICU 2.0

  1055    */

  1056   inline int32_t indexOf(UChar32 c,

  1057               int32_t start) const;

  1059   /**

  1060    * Locate in this the first occurrence of the BMP code point <code>c</code>

  1061    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1062    * using bitwise comparison.

  1063    * @param c The code unit to search for.

  1064    * @param start the offset into this at which to start matching

  1065    * @param length the number of characters in this to search

  1066    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1067    * @stable ICU 2.0

  1068    */

  1069   inline int32_t indexOf(UChar c,

  1070               int32_t start,

  1071               int32_t length) const;

  1073   /**

  1074    * Locate in this the first occurrence of the code point <TT>c</TT>

  1075    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1076    * using bitwise comparison.

  1077    *

  1078    * @param c The code point to search for.

  1079    * @param start the offset into this at which to start matching

  1080    * @param length the number of characters in this to search

  1081    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1082    * @stable ICU 2.0

  1083    */

  1084   inline int32_t indexOf(UChar32 c,

  1085               int32_t start,

  1086               int32_t length) const;

  1088   /**

  1089    * Locate in this the last occurrence of the characters in <TT>text</TT>,

  1090    * using bitwise comparison.

  1091    * @param text The text to search for.

  1092    * @return The offset into this of the start of <TT>text</TT>,

  1093    * or -1 if not found.

  1094    * @stable ICU 2.0

  1095    */

  1096   inline int32_t lastIndexOf(const UnicodeString& text) const;

  1098   /**

  1099    * Locate in this the last occurrence of the characters in <TT>text</TT>

  1100    * starting at offset <TT>start</TT>, using bitwise comparison.

  1101    * @param text The text to search for.

  1102    * @param start The offset at which searching will start.

  1103    * @return The offset into this of the start of <TT>text</TT>,

  1104    * or -1 if not found.

  1105    * @stable ICU 2.0

  1106    */

  1107   inline int32_t lastIndexOf(const UnicodeString& text,

  1108               int32_t start) const;

  1110   /**

  1111    * Locate in this the last occurrence in the range

  1112    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1113    * in <TT>text</TT>, using bitwise comparison.

  1114    * @param text The text to search for.

  1115    * @param start The offset at which searching will start.

  1116    * @param length The number of characters to search

  1117    * @return The offset into this of the start of <TT>text</TT>,

  1118    * or -1 if not found.

  1119    * @stable ICU 2.0

  1120    */

  1121   inline int32_t lastIndexOf(const UnicodeString& text,

  1122               int32_t start,

  1123               int32_t length) const;

  1125   /**

  1126    * Locate in this the last occurrence in the range

  1127    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1128    * in <TT>srcText</TT> in the range

  1129    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

  1130    * using bitwise comparison.

  1131    * @param srcText The text to search for.

  1132    * @param srcStart the offset into <TT>srcText</TT> at which

  1133    * to start matching

  1134    * @param srcLength the number of characters in <TT>srcText</TT> to match

  1135    * @param start the offset into this at which to start matching

  1136    * @param length the number of characters in this to search

  1137    * @return The offset into this of the start of <TT>text</TT>,

  1138    * or -1 if not found.

  1139    * @stable ICU 2.0

  1140    */

  1141   inline int32_t lastIndexOf(const UnicodeString& srcText,

  1142               int32_t srcStart,

  1143               int32_t srcLength,

  1144               int32_t start,

  1145               int32_t length) const;

  1147   /**

  1148    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>

  1149    * starting at offset <TT>start</TT>, using bitwise comparison.

  1150    * @param srcChars The text to search for.

  1151    * @param srcLength the number of characters in <TT>srcChars</TT> to match

  1152    * @param start the offset into this at which to start matching

  1153    * @return The offset into this of the start of <TT>text</TT>,

  1154    * or -1 if not found.

  1155    * @stable ICU 2.0

  1156    */

  1157   inline int32_t lastIndexOf(const UChar *srcChars,

  1158               int32_t srcLength,

  1159               int32_t start) const;

  1161   /**

  1162    * Locate in this the last occurrence in the range

  1163    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1164    * in <TT>srcChars</TT>, using bitwise comparison.

  1165    * @param srcChars The text to search for.

  1166    * @param srcLength the number of characters in <TT>srcChars</TT>

  1167    * @param start The offset at which searching will start.

  1168    * @param length The number of characters to search

  1169    * @return The offset into this of the start of <TT>srcChars</TT>,

  1170    * or -1 if not found.

  1171    * @stable ICU 2.0

  1172    */

  1173   inline int32_t lastIndexOf(const UChar *srcChars,

  1174               int32_t srcLength,

  1175               int32_t start,

  1176               int32_t length) const;

  1178   /**

  1179    * Locate in this the last occurrence in the range

  1180    * [<TT>start</TT>, <TT>start + length</TT>) of the characters

  1181    * in <TT>srcChars</TT> in the range

  1182    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),

  1183    * using bitwise comparison.

  1184    * @param srcChars The text to search for.

  1185    * @param srcStart the offset into <TT>srcChars</TT> at which

  1186    * to start matching

  1187    * @param srcLength the number of characters in <TT>srcChars</TT> to match

  1188    * @param start the offset into this at which to start matching

  1189    * @param length the number of characters in this to search

  1190    * @return The offset into this of the start of <TT>text</TT>,

  1191    * or -1 if not found.

  1192    * @stable ICU 2.0

  1193    */

  1194   int32_t lastIndexOf(const UChar *srcChars,

  1195               int32_t srcStart,

  1196               int32_t srcLength,

  1197               int32_t start,

  1198               int32_t length) const;

  1200   /**

  1201    * Locate in this the last occurrence of the BMP code point <code>c</code>,

  1202    * using bitwise comparison.

  1203    * @param c The code unit to search for.

  1204    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1205    * @stable ICU 2.0

  1206    */

  1207   inline int32_t lastIndexOf(UChar c) const;

  1209   /**

  1210    * Locate in this the last occurrence of the code point <TT>c</TT>,

  1211    * using bitwise comparison.

  1212    *

  1213    * @param c The code point to search for.

  1214    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1215    * @stable ICU 2.0

  1216    */

  1217   inline int32_t lastIndexOf(UChar32 c) const;

  1219   /**

  1220    * Locate in this the last occurrence of the BMP code point <code>c</code>

  1221    * starting at offset <TT>start</TT>, using bitwise comparison.

  1222    * @param c The code unit to search for.

  1223    * @param start The offset at which searching will start.

  1224    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1225    * @stable ICU 2.0

  1226    */

  1227   inline int32_t lastIndexOf(UChar c,

  1228               int32_t start) const;

  1230   /**

  1231    * Locate in this the last occurrence of the code point <TT>c</TT>

  1232    * starting at offset <TT>start</TT>, using bitwise comparison.

  1233    *

  1234    * @param c The code point to search for.

  1235    * @param start The offset at which searching will start.

  1236    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1237    * @stable ICU 2.0

  1238    */

  1239   inline int32_t lastIndexOf(UChar32 c,

  1240               int32_t start) const;

  1242   /**

  1243    * Locate in this the last occurrence of the BMP code point <code>c</code>

  1244    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1245    * using bitwise comparison.

  1246    * @param c The code unit to search for.

  1247    * @param start the offset into this at which to start matching

  1248    * @param length the number of characters in this to search

  1249    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1250    * @stable ICU 2.0

  1251    */

  1252   inline int32_t lastIndexOf(UChar c,

  1253               int32_t start,

  1254               int32_t length) const;

  1256   /**

  1257    * Locate in this the last occurrence of the code point <TT>c</TT>

  1258    * in the range [<TT>start</TT>, <TT>start + length</TT>),

  1259    * using bitwise comparison.

  1260    *

  1261    * @param c The code point to search for.

  1262    * @param start the offset into this at which to start matching

  1263    * @param length the number of characters in this to search

  1264    * @return The offset into this of <TT>c</TT>, or -1 if not found.

  1265    * @stable ICU 2.0

  1266    */

  1267   inline int32_t lastIndexOf(UChar32 c,

  1268               int32_t start,

  1269               int32_t length) const;

  1272   /* Character access */

  1274   /**

  1275    * Return the code unit at offset <tt>offset</tt>.

  1276    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1277    * @param offset a valid offset into the text

  1278    * @return the code unit at offset <tt>offset</tt>

  1279    *         or 0xffff if the offset is not valid for this string

  1280    * @stable ICU 2.0

  1281    */

  1282   inline UChar charAt(int32_t offset) const;

  1284   /**

  1285    * Return the code unit at offset <tt>offset</tt>.

  1286    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1287    * @param offset a valid offset into the text

  1288    * @return the code unit at offset <tt>offset</tt>

  1289    * @stable ICU 2.0

  1290    */

  1291   inline UChar operator[] (int32_t offset) const;

  1293   /**

  1294    * Return the code point that contains the code unit

  1295    * at offset <tt>offset</tt>.

  1296    * If the offset is not valid (0..length()-1) then U+ffff is returned.

  1297    * @param offset a valid offset into the text

  1298    * that indicates the text offset of any of the code units

  1299    * that will be assembled into a code point (21-bit value) and returned

  1300    * @return the code point of text at <tt>offset</tt>

  1301    *         or 0xffff if the offset is not valid for this string

  1302    * @stable ICU 2.0

  1303    */

  1304   UChar32 char32At(int32_t offset) const;

  1306   /**

  1307    * Adjust a random-access offset so that

  1308    * it points to the beginning of a Unicode character.

  1309    * The offset that is passed in points to

  1310    * any code unit of a code point,

  1311    * while the returned offset will point to the first code unit

  1312    * of the same code point.

  1313    * In UTF-16, if the input offset points to a second surrogate

  1314    * of a surrogate pair, then the returned offset will point

  1315    * to the first surrogate.

  1316    * @param offset a valid offset into one code point of the text

  1317    * @return offset of the first code unit of the same code point

  1318    * @see U16_SET_CP_START

  1319    * @stable ICU 2.0

  1320    */

  1321   int32_t getChar32Start(int32_t offset) const;

  1323   /**

  1324    * Adjust a random-access offset so that

  1325    * it points behind a Unicode character.

  1326    * The offset that is passed in points behind

  1327    * any code unit of a code point,

  1328    * while the returned offset will point behind the last code unit

  1329    * of the same code point.

  1330    * In UTF-16, if the input offset points behind the first surrogate

  1331    * (i.e., to the second surrogate)

  1332    * of a surrogate pair, then the returned offset will point

  1333    * behind the second surrogate (i.e., to the first surrogate).

  1334    * @param offset a valid offset after any code unit of a code point of the text

  1335    * @return offset of the first code unit after the same code point

  1336    * @see U16_SET_CP_LIMIT

  1337    * @stable ICU 2.0

  1338    */

  1339   int32_t getChar32Limit(int32_t offset) const;

  1341   /**

  1342    * Move the code unit index along the string by delta code points.

  1343    * Interpret the input index as a code unit-based offset into the string,

  1344    * move the index forward or backward by delta code points, and

  1345    * return the resulting index.

  1346    * The input index should point to the first code unit of a code point,

  1347    * if there is more than one.

  1348    *

  1349    * Both input and output indexes are code unit-based as for all

  1350    * string indexes/offsets in ICU (and other libraries, like MBCS char*).

  1351    * If delta<0 then the index is moved backward (toward the start of the string).

  1352    * If delta>0 then the index is moved forward (toward the end of the string).

  1353    *

  1354    * This behaves like CharacterIterator::move32(delta, kCurrent).

  1355    *

  1356    * Behavior for out-of-bounds indexes:

  1357    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,

  1358    * if the input index<0 then it is pinned to 0;

  1359    * if it is index>length() then it is pinned to length().

  1360    * Afterwards, the index is moved by <code>delta</code> code points

  1361    * forward or backward,

  1362    * but no further backward than to 0 and no further forward than to length().

  1363    * The resulting index return value will be in between 0 and length(), inclusively.

  1364    *

  1365    * Examples:

  1366    * <pre>

  1367    * // s has code points 'a' U+10000 'b' U+10ffff U+2029

  1368    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();

  1369    *

  1370    * // initial index: position of U+10000

  1371    * int32_t index=1;

  1372    *

  1373    * // the following examples will all result in index==4, position of U+10ffff

  1374    *

  1375    * // skip 2 code points from some position in the string

  1376    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'

  1377    *

  1378    * // go to the 3rd code point from the start of s (0-based)

  1379    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'

  1380    *

  1381    * // go to the next-to-last code point of s

  1382    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff

  1383    * </pre>

  1384    *

  1385    * @param index input code unit index

  1386    * @param delta (signed) code point count to move the index forward or backward

  1387    *        in the string

  1388    * @return the resulting code unit index

  1389    * @stable ICU 2.0

  1390    */

  1391   int32_t moveIndex32(int32_t index, int32_t delta) const;

  1393   /* Substring extraction */

  1395   /**

  1396    * Copy the characters in the range

  1397    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,

  1398    * beginning at <tt>dstStart</tt>.

  1399    * If the string aliases to <code>dst</code> itself as an external buffer,

  1400    * then extract() will not copy the contents.

  1401    *

  1402    * @param start offset of first character which will be copied into the array

  1403    * @param length the number of characters to extract

  1404    * @param dst array in which to copy characters.  The length of <tt>dst</tt>

  1405    * must be at least (<tt>dstStart + length</tt>).

  1406    * @param dstStart the offset in <TT>dst</TT> where the first character

  1407    * will be extracted

  1408    * @stable ICU 2.0

  1409    */

  1410   inline void extract(int32_t start,

  1411            int32_t length,

  1412            UChar *dst,

  1413            int32_t dstStart = 0) const;

  1415   /**

  1416    * Copy the contents of the string into dest.

  1417    * This is a convenience function that

  1418    * checks if there is enough space in dest,

  1419    * extracts the entire string if possible,

  1420    * and NUL-terminates dest if possible.

  1421    *

  1422    * If the string fits into dest but cannot be NUL-terminated

  1423    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.

  1424    * If the string itself does not fit into dest

  1425    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.

  1426    *

  1427    * If the string aliases to <code>dest</code> itself as an external buffer,

  1428    * then extract() will not copy the contents.

  1429    *

  1430    * @param dest Destination string buffer.

  1431    * @param destCapacity Number of UChars available at dest.

  1432    * @param errorCode ICU error code.

  1433    * @return length()

  1434    * @stable ICU 2.0

  1435    */

  1436   int32_t

  1437   extract(UChar *dest, int32_t destCapacity,

  1438           UErrorCode &errorCode) const;

  1440   /**

  1441    * Copy the characters in the range

  1442    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString

  1443    * <tt>target</tt>.

  1444    * @param start offset of first character which will be copied

  1445    * @param length the number of characters to extract

  1446    * @param target UnicodeString into which to copy characters.

  1447    * @return A reference to <TT>target</TT>

  1448    * @stable ICU 2.0

  1449    */

  1450   inline void extract(int32_t start,

  1451            int32_t length,

  1452            UnicodeString& target) const;

  1454   /**

  1455    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

  1456    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.

  1457    * @param start offset of first character which will be copied into the array

  1458    * @param limit offset immediately following the last character to be copied

  1459    * @param dst array in which to copy characters.  The length of <tt>dst</tt>

  1460    * must be at least (<tt>dstStart + (limit - start)</tt>).

  1461    * @param dstStart the offset in <TT>dst</TT> where the first character

  1462    * will be extracted

  1463    * @stable ICU 2.0

  1464    */

  1465   inline void extractBetween(int32_t start,

  1466               int32_t limit,

  1467               UChar *dst,

  1468               int32_t dstStart = 0) const;

  1470   /**

  1471    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)

  1472    * into the UnicodeString <tt>target</tt>.  Replaceable API.

  1473    * @param start offset of first character which will be copied

  1474    * @param limit offset immediately following the last character to be copied

  1475    * @param target UnicodeString into which to copy characters.

  1476    * @return A reference to <TT>target</TT>

  1477    * @stable ICU 2.0

  1478    */

  1479   virtual void extractBetween(int32_t start,

  1480               int32_t limit,

  1481               UnicodeString& target) const;

  1483   /**

  1484    * Copy the characters in the range

  1485    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.

  1486    * All characters must be invariant (see utypes.h).

  1487    * Use US_INV as the last, signature-distinguishing parameter.

  1488    *

  1489    * This function does not write any more than <code>targetLength</code>

  1490    * characters but returns the length of the entire output string

  1491    * so that one can allocate a larger buffer and call the function again

  1492    * if necessary.

  1493    * The output string is NUL-terminated if possible.

  1494    *

  1495    * @param start offset of first character which will be copied

  1496    * @param startLength the number of characters to extract

  1497    * @param target the target buffer for extraction, can be NULL

  1498    *               if targetLength is 0

  1499    * @param targetCapacity the length of the target buffer

  1500    * @param inv Signature-distinguishing paramater, use US_INV.

  1501    * @return the output string length, not including the terminating NUL

  1502    * @stable ICU 3.2

  1503    */

  1504   int32_t extract(int32_t start,

  1505            int32_t startLength,

  1506            char *target,

  1507            int32_t targetCapacity,

  1508            enum EInvariant inv) const;

  1510 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION

  1512   /**

  1513    * Copy the characters in the range

  1514    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

  1515    * in the platform's default codepage.

  1516    * This function does not write any more than <code>targetLength</code>

  1517    * characters but returns the length of the entire output string

  1518    * so that one can allocate a larger buffer and call the function again

  1519    * if necessary.

  1520    * The output string is NUL-terminated if possible.

  1521    *

  1522    * @param start offset of first character which will be copied

  1523    * @param startLength the number of characters to extract

  1524    * @param target the target buffer for extraction

  1525    * @param targetLength the length of the target buffer

  1526    * If <TT>target</TT> is NULL, then the number of bytes required for

  1527    * <TT>target</TT> is returned.

  1528    * @return the output string length, not including the terminating NUL

  1529    * @stable ICU 2.0

  1530    */

  1531   int32_t extract(int32_t start,

  1532            int32_t startLength,

  1533            char *target,

  1534            uint32_t targetLength) const;

  1536 #endif

  1538 #if !UCONFIG_NO_CONVERSION

  1540   /**

  1541    * Copy the characters in the range

  1542    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

  1543    * in a specified codepage.

  1544    * The output string is NUL-terminated.

  1545    *

  1546    * Recommendation: For invariant-character strings use

  1547    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const

  1548    * because it avoids object code dependencies of UnicodeString on

  1549    * the conversion code.

  1550    *

  1551    * @param start offset of first character which will be copied

  1552    * @param startLength the number of characters to extract

  1553    * @param target the target buffer for extraction

  1554    * @param codepage the desired codepage for the characters.  0 has

  1555    * the special meaning of the default codepage

  1556    * If <code>codepage</code> is an empty string (<code>""</code>),

  1557    * then a simple conversion is performed on the codepage-invariant

  1558    * subset ("invariant characters") of the platform encoding. See utypes.h.

  1559    * If <TT>target</TT> is NULL, then the number of bytes required for

  1560    * <TT>target</TT> is returned. It is assumed that the target is big enough

  1561    * to fit all of the characters.

  1562    * @return the output string length, not including the terminating NUL

  1563    * @stable ICU 2.0

  1564    */

  1565   inline int32_t extract(int32_t start,

  1566                  int32_t startLength,

  1567                  char *target,

  1568                  const char *codepage = 0) const;

  1570   /**

  1571    * Copy the characters in the range

  1572    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters

  1573    * in a specified codepage.

  1574    * This function does not write any more than <code>targetLength</code>

  1575    * characters but returns the length of the entire output string

  1576    * so that one can allocate a larger buffer and call the function again

  1577    * if necessary.

  1578    * The output string is NUL-terminated if possible.

  1579    *

  1580    * Recommendation: For invariant-character strings use

  1581    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const

  1582    * because it avoids object code dependencies of UnicodeString on

  1583    * the conversion code.

  1584    *

  1585    * @param start offset of first character which will be copied

  1586    * @param startLength the number of characters to extract

  1587    * @param target the target buffer for extraction

  1588    * @param targetLength the length of the target buffer

  1589    * @param codepage the desired codepage for the characters.  0 has

  1590    * the special meaning of the default codepage

  1591    * If <code>codepage</code> is an empty string (<code>""</code>),

  1592    * then a simple conversion is performed on the codepage-invariant

  1593    * subset ("invariant characters") of the platform encoding. See utypes.h.

  1594    * If <TT>target</TT> is NULL, then the number of bytes required for

  1595    * <TT>target</TT> is returned.

  1596    * @return the output string length, not including the terminating NUL

  1597    * @stable ICU 2.0

  1598    */

  1599   int32_t extract(int32_t start,

  1600            int32_t startLength,

  1601            char *target,

  1602            uint32_t targetLength,

  1603            const char *codepage) const;

  1605   /**

  1606    * Convert the UnicodeString into a codepage string using an existing UConverter.

  1607    * The output string is NUL-terminated if possible.

  1608    *

  1609    * This function avoids the overhead of opening and closing a converter if

  1610    * multiple strings are extracted.

  1611    *

  1612    * @param dest destination string buffer, can be NULL if destCapacity==0

  1613    * @param destCapacity the number of chars available at dest

  1614    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),

  1615    *        or NULL for the default converter

  1616    * @param errorCode normal ICU error code

  1617    * @return the length of the output string, not counting the terminating NUL;

  1618    *         if the length is greater than destCapacity, then the string will not fit

  1619    *         and a buffer of the indicated length would need to be passed in

  1620    * @stable ICU 2.0

  1621    */

  1622   int32_t extract(char *dest, int32_t destCapacity,

  1623                   UConverter *cnv,

  1624                   UErrorCode &errorCode) const;

  1626 #endif

  1628   /**

  1629    * Create a temporary substring for the specified range.

  1630    * Unlike the substring constructor and setTo() functions,

  1631    * the object returned here will be a read-only alias (using getBuffer())

  1632    * rather than copying the text.

  1633    * As a result, this substring operation is much faster but requires

  1634    * that the original string not be modified or deleted during the lifetime

  1635    * of the returned substring object.

  1636    * @param start offset of the first character visible in the substring

  1637    * @param length length of the substring

  1638    * @return a read-only alias UnicodeString object for the substring

  1639    * @stable ICU 4.4

  1640    */

  1641   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;

  1643   /**

  1644    * Create a temporary substring for the specified range.

  1645    * Same as tempSubString(start, length) except that the substring range

  1646    * is specified as a (start, limit) pair (with an exclusive limit index)

  1647    * rather than a (start, length) pair.

  1648    * @param start offset of the first character visible in the substring

  1649    * @param limit offset immediately following the last character visible in the substring

  1650    * @return a read-only alias UnicodeString object for the substring

  1651    * @stable ICU 4.4

  1652    */

  1653   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;

  1655   /**

  1656    * Convert the UnicodeString to UTF-8 and write the result

  1657    * to a ByteSink. This is called by toUTF8String().

  1658    * Unpaired surrogates are replaced with U+FFFD.

  1659    * Calls u_strToUTF8WithSub().

  1660    *

  1661    * @param sink A ByteSink to which the UTF-8 version of the string is written.

  1662    *             sink.Flush() is called at the end.

  1663    * @stable ICU 4.2

  1664    * @see toUTF8String

  1665    */

  1666   void toUTF8(ByteSink &sink) const;

  1668 #if U_HAVE_STD_STRING

  1670   /**

  1671    * Convert the UnicodeString to UTF-8 and append the result

  1672    * to a standard string.

  1673    * Unpaired surrogates are replaced with U+FFFD.

  1674    * Calls toUTF8().

  1675    *

  1676    * @param result A standard string (or a compatible object)

  1677    *        to which the UTF-8 version of the string is appended.

  1678    * @return The string object.

  1679    * @stable ICU 4.2

  1680    * @see toUTF8

  1681    */

  1682   template<typename StringClass>

  1683   StringClass &toUTF8String(StringClass &result) const {

  1684     StringByteSink<StringClass> sbs(&result);

  1685     toUTF8(sbs);

  1686     return result;

  1687   }

  1689 #endif

  1691   /**

  1692    * Convert the UnicodeString to UTF-32.

  1693    * Unpaired surrogates are replaced with U+FFFD.

  1694    * Calls u_strToUTF32WithSub().

  1695    *

  1696    * @param utf32 destination string buffer, can be NULL if capacity==0

  1697    * @param capacity the number of UChar32s available at utf32

  1698    * @param errorCode Standard ICU error code. Its input value must

  1699    *                  pass the U_SUCCESS() test, or else the function returns

  1700    *                  immediately. Check for U_FAILURE() on output or use with

  1701    *                  function chaining. (See User Guide for details.)

  1702    * @return The length of the UTF-32 string.

  1703    * @see fromUTF32

  1704    * @stable ICU 4.2

  1705    */

  1706   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;

  1708   /* Length operations */

  1710   /**

  1711    * Return the length of the UnicodeString object.

  1712    * The length is the number of UChar code units are in the UnicodeString.

  1713    * If you want the number of code points, please use countChar32().

  1714    * @return the length of the UnicodeString object

  1715    * @see countChar32

  1716    * @stable ICU 2.0

  1717    */

  1718   inline int32_t length(void) const;

  1720   /**

  1721    * Count Unicode code points in the length UChar code units of the string.

  1722    * A code point may occupy either one or two UChar code units.

  1723    * Counting code points involves reading all code units.

  1724    *

  1725    * This functions is basically the inverse of moveIndex32().

  1726    *

  1727    * @param start the index of the first code unit to check

  1728    * @param length the number of UChar code units to check

  1729    * @return the number of code points in the specified code units

  1730    * @see length

  1731    * @stable ICU 2.0

  1732    */

  1733   int32_t

  1734   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;

  1736   /**

  1737    * Check if the length UChar code units of the string

  1738    * contain more Unicode code points than a certain number.

  1739    * This is more efficient than counting all code points in this part of the string

  1740    * and comparing that number with a threshold.

  1741    * This function may not need to scan the string at all if the length

  1742    * falls within a certain range, and

  1743    * never needs to count more than 'number+1' code points.

  1744    * Logically equivalent to (countChar32(start, length)>number).

  1745    * A Unicode code point may occupy either one or two UChar code units.

  1746    *

  1747    * @param start the index of the first code unit to check (0 for the entire string)

  1748    * @param length the number of UChar code units to check

  1749    *               (use INT32_MAX for the entire string; remember that start/length

  1750    *                values are pinned)

  1751    * @param number The number of code points in the (sub)string is compared against

  1752    *               the 'number' parameter.

  1753    * @return Boolean value for whether the string contains more Unicode code points

  1754    *         than 'number'. Same as (u_countChar32(s, length)>number).

  1755    * @see countChar32

  1756    * @see u_strHasMoreChar32Than

  1757    * @stable ICU 2.4

  1758    */

  1759   UBool

  1760   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;

  1762   /**

  1763    * Determine if this string is empty.

  1764    * @return TRUE if this string contains 0 characters, FALSE otherwise.

  1765    * @stable ICU 2.0

  1766    */

  1767   inline UBool isEmpty(void) const;

  1769   /**

  1770    * Return the capacity of the internal buffer of the UnicodeString object.

  1771    * This is useful together with the getBuffer functions.

  1772    * See there for details.

  1773    *

  1774    * @return the number of UChars available in the internal buffer

  1775    * @see getBuffer

  1776    * @stable ICU 2.0

  1777    */

  1778   inline int32_t getCapacity(void) const;

  1780   /* Other operations */

  1782   /**

  1783    * Generate a hash code for this object.

  1784    * @return The hash code of this UnicodeString.

  1785    * @stable ICU 2.0

  1786    */

  1787   inline int32_t hashCode(void) const;

  1789   /**

  1790    * Determine if this object contains a valid string.

  1791    * A bogus string has no value. It is different from an empty string,

  1792    * although in both cases isEmpty() returns TRUE and length() returns 0.

  1793    * setToBogus() and isBogus() can be used to indicate that no string value is available.

  1794    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and

  1795    * length() returns 0.

  1796    *

  1797    * @return TRUE if the string is bogus/invalid, FALSE otherwise

  1798    * @see setToBogus()

  1799    * @stable ICU 2.0

  1800    */

  1801   inline UBool isBogus(void) const;

  1804   //========================================

  1805   // Write operations

  1806   //========================================

  1808   /* Assignment operations */

  1810   /**

  1811    * Assignment operator.  Replace the characters in this UnicodeString

  1812    * with the characters from <TT>srcText</TT>.

  1813    * @param srcText The text containing the characters to replace

  1814    * @return a reference to this

  1815    * @stable ICU 2.0

  1816    */

  1817   UnicodeString &operator=(const UnicodeString &srcText);

  1819   /**

  1820    * Almost the same as the assignment operator.

  1821    * Replace the characters in this UnicodeString

  1822    * with the characters from <code>srcText</code>.

  1823    *

  1824    * This function works the same as the assignment operator

  1825    * for all strings except for ones that are readonly aliases.

  1826    *

  1827    * Starting with ICU 2.4, the assignment operator and the copy constructor

  1828    * allocate a new buffer and copy the buffer contents even for readonly aliases.

  1829    * This function implements the old, more efficient but less safe behavior

  1830    * of making this string also a readonly alias to the same buffer.

  1831    *

  1832    * The fastCopyFrom function must be used only if it is known that the lifetime of

  1833    * this UnicodeString does not exceed the lifetime of the aliased buffer

  1834    * including its contents, for example for strings from resource bundles

  1835    * or aliases to string constants.

  1836    *

  1837    * @param src The text containing the characters to replace.

  1838    * @return a reference to this

  1839    * @stable ICU 2.4

  1840    */

  1841   UnicodeString &fastCopyFrom(const UnicodeString &src);

  1843   /**

  1844    * Assignment operator.  Replace the characters in this UnicodeString

  1845    * with the code unit <TT>ch</TT>.

  1846    * @param ch the code unit to replace

  1847    * @return a reference to this

  1848    * @stable ICU 2.0

  1849    */

  1850   inline UnicodeString& operator= (UChar ch);

  1852   /**

  1853    * Assignment operator.  Replace the characters in this UnicodeString

  1854    * with the code point <TT>ch</TT>.

  1855    * @param ch the code point to replace

  1856    * @return a reference to this

  1857    * @stable ICU 2.0

  1858    */

  1859   inline UnicodeString& operator= (UChar32 ch);

  1861   /**

  1862    * Set the text in the UnicodeString object to the characters

  1863    * in <TT>srcText</TT> in the range

  1864    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).

  1865    * <TT>srcText</TT> is not modified.

  1866    * @param srcText the source for the new characters

  1867    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1868    * will be obtained

  1869    * @return a reference to this

  1870    * @stable ICU 2.2

  1871    */

  1872   inline UnicodeString& setTo(const UnicodeString& srcText,

  1873                int32_t srcStart);

  1875   /**

  1876    * Set the text in the UnicodeString object to the characters

  1877    * in <TT>srcText</TT> in the range

  1878    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

  1879    * <TT>srcText</TT> is not modified.

  1880    * @param srcText the source for the new characters

  1881    * @param srcStart the offset into <TT>srcText</TT> where new characters

  1882    * will be obtained

  1883    * @param srcLength the number of characters in <TT>srcText</TT> in the

  1884    * replace string.

  1885    * @return a reference to this

  1886    * @stable ICU 2.0

  1887    */

  1888   inline UnicodeString& setTo(const UnicodeString& srcText,

  1889                int32_t srcStart,

  1890                int32_t srcLength);

  1892   /**

  1893    * Set the text in the UnicodeString object to the characters in

  1894    * <TT>srcText</TT>.

  1895    * <TT>srcText</TT> is not modified.

  1896    * @param srcText the source for the new characters

  1897    * @return a reference to this

  1898    * @stable ICU 2.0

  1899    */

  1900   inline UnicodeString& setTo(const UnicodeString& srcText);

  1902   /**

  1903    * Set the characters in the UnicodeString object to the characters

  1904    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.

  1905    * @param srcChars the source for the new characters

  1906    * @param srcLength the number of Unicode characters in srcChars.

  1907    * @return a reference to this

  1908    * @stable ICU 2.0

  1909    */

  1910   inline UnicodeString& setTo(const UChar *srcChars,

  1911                int32_t srcLength);

  1913   /**

  1914    * Set the characters in the UnicodeString object to the code unit

  1915    * <TT>srcChar</TT>.

  1916    * @param srcChar the code unit which becomes the UnicodeString's character

  1917    * content

  1918    * @return a reference to this

  1919    * @stable ICU 2.0

  1920    */

  1921   UnicodeString& setTo(UChar srcChar);

  1923   /**

  1924    * Set the characters in the UnicodeString object to the code point

  1925    * <TT>srcChar</TT>.

  1926    * @param srcChar the code point which becomes the UnicodeString's character

  1927    * content

  1928    * @return a reference to this

  1929    * @stable ICU 2.0

  1930    */

  1931   UnicodeString& setTo(UChar32 srcChar);

  1933   /**

  1934    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.

  1935    * The text will be used for the UnicodeString object, but

  1936    * it will not be released when the UnicodeString is destroyed.

  1937    * This has copy-on-write semantics:

  1938    * When the string is modified, then the buffer is first copied into

  1939    * newly allocated memory.

  1940    * The aliased buffer is never modified.

  1941    *

  1942    * In an assignment to another UnicodeString, when using the copy constructor

  1943    * or the assignment operator, the text will be copied.

  1944    * When using fastCopyFrom(), the text will be aliased again,

  1945    * so that both strings then alias the same readonly-text.

  1946    *

  1947    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.

  1948    *                     This must be true if <code>textLength==-1</code>.

  1949    * @param text The characters to alias for the UnicodeString.

  1950    * @param textLength The number of Unicode characters in <code>text</code> to alias.

  1951    *                   If -1, then this constructor will determine the length

  1952    *                   by calling <code>u_strlen()</code>.

  1953    * @return a reference to this

  1954    * @stable ICU 2.0

  1955    */

  1956   UnicodeString &setTo(UBool isTerminated,

  1957                        const UChar *text,

  1958                        int32_t textLength);

  1960   /**

  1961    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.

  1962    * The text will be used for the UnicodeString object, but

  1963    * it will not be released when the UnicodeString is destroyed.

  1964    * This has write-through semantics:

  1965    * For as long as the capacity of the buffer is sufficient, write operations

  1966    * will directly affect the buffer. When more capacity is necessary, then

  1967    * a new buffer will be allocated and the contents copied as with regularly

  1968    * constructed strings.

  1969    * In an assignment to another UnicodeString, the buffer will be copied.

  1970    * The extract(UChar *dst) function detects whether the dst pointer is the same

  1971    * as the string buffer itself and will in this case not copy the contents.

  1972    *

  1973    * @param buffer The characters to alias for the UnicodeString.

  1974    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.

  1975    * @param buffCapacity The size of <code>buffer</code> in UChars.

  1976    * @return a reference to this

  1977    * @stable ICU 2.0

  1978    */

  1979   UnicodeString &setTo(UChar *buffer,

  1980                        int32_t buffLength,

  1981                        int32_t buffCapacity);

  1983   /**

  1984    * Make this UnicodeString object invalid.

  1985    * The string will test TRUE with isBogus().

  1986    *

  1987    * A bogus string has no value. It is different from an empty string.

  1988    * It can be used to indicate that no string value is available.

  1989    * getBuffer() and getTerminatedBuffer() return NULL, and

  1990    * length() returns 0.

  1991    *

  1992    * This utility function is used throughout the UnicodeString

  1993    * implementation to indicate that a UnicodeString operation failed,

  1994    * and may be used in other functions,

  1995    * especially but not exclusively when such functions do not

  1996    * take a UErrorCode for simplicity.

  1997    *

  1998    * The following methods, and no others, will clear a string object's bogus flag:

  1999    * - remove()

  2000    * - remove(0, INT32_MAX)

  2001    * - truncate(0)

  2002    * - operator=() (assignment operator)

  2003    * - setTo(...)

  2004    *

  2005    * The simplest ways to turn a bogus string into an empty one

  2006    * is to use the remove() function.

  2007    * Examples for other functions that are equivalent to "set to empty string":

  2008    * \code

  2009    * if(s.isBogus()) {

  2010    *   s.remove();           // set to an empty string (remove all), or

  2011    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or

  2012    *   s.truncate(0);        // set to an empty string (complete truncation), or

  2013    *   s=UnicodeString();    // assign an empty string, or

  2014    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or

  2015    *   static const UChar nul=0;

  2016    *   s.setTo(&nul, 0);     // set to an empty C Unicode string

  2017    * }

  2018    * \endcode

  2019    *

  2020    * @see isBogus()

  2021    * @stable ICU 2.0

  2022    */

  2023   void setToBogus();

  2025   /**

  2026    * Set the character at the specified offset to the specified character.

  2027    * @param offset A valid offset into the text of the character to set

  2028    * @param ch The new character

  2029    * @return A reference to this

  2030    * @stable ICU 2.0

  2031    */

  2032   UnicodeString& setCharAt(int32_t offset,

  2033                UChar ch);

  2036   /* Append operations */

  2038   /**

  2039    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString

  2040    * object.

  2041    * @param ch the code unit to be appended

  2042    * @return a reference to this

  2043    * @stable ICU 2.0

  2044    */

  2045  inline  UnicodeString& operator+= (UChar ch);

  2047   /**

  2048    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString

  2049    * object.

  2050    * @param ch the code point to be appended

  2051    * @return a reference to this

  2052    * @stable ICU 2.0

  2053    */

  2054  inline  UnicodeString& operator+= (UChar32 ch);

  2056   /**

  2057    * Append operator. Append the characters in <TT>srcText</TT> to the

  2058    * UnicodeString object. <TT>srcText</TT> is not modified.

  2059    * @param srcText the source for the new characters

  2060    * @return a reference to this

  2061    * @stable ICU 2.0

  2062    */

  2063   inline UnicodeString& operator+= (const UnicodeString& srcText);

  2065   /**

  2066    * Append the characters

  2067    * in <TT>srcText</TT> in the range

  2068    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the

  2069    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>

  2070    * is not modified.

  2071    * @param srcText the source for the new characters

  2072    * @param srcStart the offset into <TT>srcText</TT> where new characters

  2073    * will be obtained

  2074    * @param srcLength the number of characters in <TT>srcText</TT> in

  2075    * the append string

  2076    * @return a reference to this

  2077    * @stable ICU 2.0

  2078    */

  2079   inline UnicodeString& append(const UnicodeString& srcText,

  2080             int32_t srcStart,

  2081             int32_t srcLength);

  2083   /**

  2084    * Append the characters in <TT>srcText</TT> to the UnicodeString object.

  2085    * <TT>srcText</TT> is not modified.

  2086    * @param srcText the source for the new characters

  2087    * @return a reference to this

  2088    * @stable ICU 2.0

  2089    */

  2090   inline UnicodeString& append(const UnicodeString& srcText);

  2092   /**

  2093    * Append the characters in <TT>srcChars</TT> in the range

  2094    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString

  2095    * object at offset

  2096    * <TT>start</TT>. <TT>srcChars</TT> is not modified.

  2097    * @param srcChars the source for the new characters

  2098    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2099    * will be obtained

  2100    * @param srcLength the number of characters in <TT>srcChars</TT> in

  2101    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated

  2102    * @return a reference to this

  2103    * @stable ICU 2.0

  2104    */

  2105   inline UnicodeString& append(const UChar *srcChars,

  2106             int32_t srcStart,

  2107             int32_t srcLength);

  2109   /**

  2110    * Append the characters in <TT>srcChars</TT> to the UnicodeString object

  2111    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  2112    * @param srcChars the source for the new characters

  2113    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;

  2114    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated

  2115    * @return a reference to this

  2116    * @stable ICU 2.0

  2117    */

  2118   inline UnicodeString& append(const UChar *srcChars,

  2119             int32_t srcLength);

  2121   /**

  2122    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.

  2123    * @param srcChar the code unit to append

  2124    * @return a reference to this

  2125    * @stable ICU 2.0

  2126    */

  2127   inline UnicodeString& append(UChar srcChar);

  2129   /**

  2130    * Append the code point <TT>srcChar</TT> to the UnicodeString object.

  2131    * @param srcChar the code point to append

  2132    * @return a reference to this

  2133    * @stable ICU 2.0

  2134    */

  2135   UnicodeString& append(UChar32 srcChar);

  2138   /* Insert operations */

  2140   /**

  2141    * Insert the characters in <TT>srcText</TT> in the range

  2142    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

  2143    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

  2144    * @param start the offset where the insertion begins

  2145    * @param srcText the source for the new characters

  2146    * @param srcStart the offset into <TT>srcText</TT> where new characters

  2147    * will be obtained

  2148    * @param srcLength the number of characters in <TT>srcText</TT> in

  2149    * the insert string

  2150    * @return a reference to this

  2151    * @stable ICU 2.0

  2152    */

  2153   inline UnicodeString& insert(int32_t start,

  2154             const UnicodeString& srcText,

  2155             int32_t srcStart,

  2156             int32_t srcLength);

  2158   /**

  2159    * Insert the characters in <TT>srcText</TT> into the UnicodeString object

  2160    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.

  2161    * @param start the offset where the insertion begins

  2162    * @param srcText the source for the new characters

  2163    * @return a reference to this

  2164    * @stable ICU 2.0

  2165    */

  2166   inline UnicodeString& insert(int32_t start,

  2167             const UnicodeString& srcText);

  2169   /**

  2170    * Insert the characters in <TT>srcChars</TT> in the range

  2171    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString

  2172    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  2173    * @param start the offset at which the insertion begins

  2174    * @param srcChars the source for the new characters

  2175    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2176    * will be obtained

  2177    * @param srcLength the number of characters in <TT>srcChars</TT>

  2178    * in the insert string

  2179    * @return a reference to this

  2180    * @stable ICU 2.0

  2181    */

  2182   inline UnicodeString& insert(int32_t start,

  2183             const UChar *srcChars,

  2184             int32_t srcStart,

  2185             int32_t srcLength);

  2187   /**

  2188    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object

  2189    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.

  2190    * @param start the offset where the insertion begins

  2191    * @param srcChars the source for the new characters

  2192    * @param srcLength the number of Unicode characters in srcChars.

  2193    * @return a reference to this

  2194    * @stable ICU 2.0

  2195    */

  2196   inline UnicodeString& insert(int32_t start,

  2197             const UChar *srcChars,

  2198             int32_t srcLength);

  2200   /**

  2201    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at

  2202    * offset <TT>start</TT>.

  2203    * @param start the offset at which the insertion occurs

  2204    * @param srcChar the code unit to insert

  2205    * @return a reference to this

  2206    * @stable ICU 2.0

  2207    */

  2208   inline UnicodeString& insert(int32_t start,

  2209             UChar srcChar);

  2211   /**

  2212    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at

  2213    * offset <TT>start</TT>.

  2214    * @param start the offset at which the insertion occurs

  2215    * @param srcChar the code point to insert

  2216    * @return a reference to this

  2217    * @stable ICU 2.0

  2218    */

  2219   inline UnicodeString& insert(int32_t start,

  2220             UChar32 srcChar);

  2223   /* Replace operations */

  2225   /**

  2226    * Replace the characters in the range

  2227    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2228    * <TT>srcText</TT> in the range

  2229    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).

  2230    * <TT>srcText</TT> is not modified.

  2231    * @param start the offset at which the replace operation begins

  2232    * @param length the number of characters to replace. The character at

  2233    * <TT>start + length</TT> is not modified.

  2234    * @param srcText the source for the new characters

  2235    * @param srcStart the offset into <TT>srcText</TT> where new characters

  2236    * will be obtained

  2237    * @param srcLength the number of characters in <TT>srcText</TT> in

  2238    * the replace string

  2239    * @return a reference to this

  2240    * @stable ICU 2.0

  2241    */

  2242   UnicodeString& replace(int32_t start,

  2243              int32_t length,

  2244              const UnicodeString& srcText,

  2245              int32_t srcStart,

  2246              int32_t srcLength);

  2248   /**

  2249    * Replace the characters in the range

  2250    * [<TT>start</TT>, <TT>start + length</TT>)

  2251    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is

  2252    *  not modified.

  2253    * @param start the offset at which the replace operation begins

  2254    * @param length the number of characters to replace. The character at

  2255    * <TT>start + length</TT> is not modified.

  2256    * @param srcText the source for the new characters

  2257    * @return a reference to this

  2258    * @stable ICU 2.0

  2259    */

  2260   UnicodeString& replace(int32_t start,

  2261              int32_t length,

  2262              const UnicodeString& srcText);

  2264   /**

  2265    * Replace the characters in the range

  2266    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2267    * <TT>srcChars</TT> in the range

  2268    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>

  2269    * is not modified.

  2270    * @param start the offset at which the replace operation begins

  2271    * @param length the number of characters to replace.  The character at

  2272    * <TT>start + length</TT> is not modified.

  2273    * @param srcChars the source for the new characters

  2274    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2275    * will be obtained

  2276    * @param srcLength the number of characters in <TT>srcChars</TT>

  2277    * in the replace string

  2278    * @return a reference to this

  2279    * @stable ICU 2.0

  2280    */

  2281   UnicodeString& replace(int32_t start,

  2282              int32_t length,

  2283              const UChar *srcChars,

  2284              int32_t srcStart,

  2285              int32_t srcLength);

  2287   /**

  2288    * Replace the characters in the range

  2289    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in

  2290    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.

  2291    * @param start the offset at which the replace operation begins

  2292    * @param length number of characters to replace.  The character at

  2293    * <TT>start + length</TT> is not modified.

  2294    * @param srcChars the source for the new characters

  2295    * @param srcLength the number of Unicode characters in srcChars

  2296    * @return a reference to this

  2297    * @stable ICU 2.0

  2298    */

  2299   inline UnicodeString& replace(int32_t start,

  2300              int32_t length,

  2301              const UChar *srcChars,

  2302              int32_t srcLength);

  2304   /**

  2305    * Replace the characters in the range

  2306    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit

  2307    * <TT>srcChar</TT>.

  2308    * @param start the offset at which the replace operation begins

  2309    * @param length the number of characters to replace.  The character at

  2310    * <TT>start + length</TT> is not modified.

  2311    * @param srcChar the new code unit

  2312    * @return a reference to this

  2313    * @stable ICU 2.0

  2314    */

  2315   inline UnicodeString& replace(int32_t start,

  2316              int32_t length,

  2317              UChar srcChar);

  2319   /**

  2320    * Replace the characters in the range

  2321    * [<TT>start</TT>, <TT>start + length</TT>) with the code point

  2322    * <TT>srcChar</TT>.

  2323    * @param start the offset at which the replace operation begins

  2324    * @param length the number of characters to replace.  The character at

  2325    * <TT>start + length</TT> is not modified.

  2326    * @param srcChar the new code point

  2327    * @return a reference to this

  2328    * @stable ICU 2.0

  2329    */

  2330   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);

  2332   /**

  2333    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

  2334    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.

  2335    * @param start the offset at which the replace operation begins

  2336    * @param limit the offset immediately following the replace range

  2337    * @param srcText the source for the new characters

  2338    * @return a reference to this

  2339    * @stable ICU 2.0

  2340    */

  2341   inline UnicodeString& replaceBetween(int32_t start,

  2342                 int32_t limit,

  2343                 const UnicodeString& srcText);

  2345   /**

  2346    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)

  2347    * with the characters in <TT>srcText</TT> in the range

  2348    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.

  2349    * @param start the offset at which the replace operation begins

  2350    * @param limit the offset immediately following the replace range

  2351    * @param srcText the source for the new characters

  2352    * @param srcStart the offset into <TT>srcChars</TT> where new characters

  2353    * will be obtained

  2354    * @param srcLimit the offset immediately following the range to copy

  2355    * in <TT>srcText</TT>

  2356    * @return a reference to this

  2357    * @stable ICU 2.0

  2358    */

  2359   inline UnicodeString& replaceBetween(int32_t start,

  2360                 int32_t limit,

  2361                 const UnicodeString& srcText,

  2362                 int32_t srcStart,

  2363                 int32_t srcLimit);

  2365   /**

  2366    * Replace a substring of this object with the given text.

  2367    * @param start the beginning index, inclusive; <code>0 <= start

  2368    * <= limit</code>.

  2369    * @param limit the ending index, exclusive; <code>start <= limit

  2370    * <= length()</code>.

  2371    * @param text the text to replace characters <code>start</code>

  2372    * to <code>limit - 1</code>

  2373    * @stable ICU 2.0

  2374    */

  2375   virtual void handleReplaceBetween(int32_t start,

  2376                                     int32_t limit,

  2377                                     const UnicodeString& text);

  2379   /**

  2380    * Replaceable API

  2381    * @return TRUE if it has MetaData

  2382    * @stable ICU 2.4

  2383    */

  2384   virtual UBool hasMetaData() const;

  2386   /**

  2387    * Copy a substring of this object, retaining attribute (out-of-band)

  2388    * information.  This method is used to duplicate or reorder substrings.

  2389    * The destination index must not overlap the source range.

  2390    *

  2391    * @param start the beginning index, inclusive; <code>0 <= start <=

  2392    * limit</code>.

  2393    * @param limit the ending index, exclusive; <code>start <= limit <=

  2394    * length()</code>.

  2395    * @param dest the destination index.  The characters from

  2396    * <code>start..limit-1</code> will be copied to <code>dest</code>.

  2397    * Implementations of this method may assume that <code>dest <= start ||

  2398    * dest >= limit</code>.

  2399    * @stable ICU 2.0

  2400    */

  2401   virtual void copy(int32_t start, int32_t limit, int32_t dest);

  2403   /* Search and replace operations */

  2405   /**

  2406    * Replace all occurrences of characters in oldText with the characters

  2407    * in newText

  2408    * @param oldText the text containing the search text

  2409    * @param newText the text containing the replacement text

  2410    * @return a reference to this

  2411    * @stable ICU 2.0

  2412    */

  2413   inline UnicodeString& findAndReplace(const UnicodeString& oldText,

  2414                 const UnicodeString& newText);

  2416   /**

  2417    * Replace all occurrences of characters in oldText with characters

  2418    * in newText

  2419    * in the range [<TT>start</TT>, <TT>start + length</TT>).

  2420    * @param start the start of the range in which replace will performed

  2421    * @param length the length of the range in which replace will be performed

  2422    * @param oldText the text containing the search text

  2423    * @param newText the text containing the replacement text

  2424    * @return a reference to this

  2425    * @stable ICU 2.0

  2426    */

  2427   inline UnicodeString& findAndReplace(int32_t start,

  2428                 int32_t length,

  2429                 const UnicodeString& oldText,

  2430                 const UnicodeString& newText);

  2432   /**

  2433    * Replace all occurrences of characters in oldText in the range

  2434    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters

  2435    * in newText in the range

  2436    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)

  2437    * in the range [<TT>start</TT>, <TT>start + length</TT>).

  2438    * @param start the start of the range in which replace will performed

  2439    * @param length the length of the range in which replace will be performed

  2440    * @param oldText the text containing the search text

  2441    * @param oldStart the start of the search range in <TT>oldText</TT>

  2442    * @param oldLength the length of the search range in <TT>oldText</TT>

  2443    * @param newText the text containing the replacement text

  2444    * @param newStart the start of the replacement range in <TT>newText</TT>

  2445    * @param newLength the length of the replacement range in <TT>newText</TT>

  2446    * @return a reference to this

  2447    * @stable ICU 2.0

  2448    */

  2449   UnicodeString& findAndReplace(int32_t start,

  2450                 int32_t length,

  2451                 const UnicodeString& oldText,

  2452                 int32_t oldStart,

  2453                 int32_t oldLength,

  2454                 const UnicodeString& newText,

  2455                 int32_t newStart,

  2456                 int32_t newLength);

  2459   /* Remove operations */

  2461   /**

  2462    * Remove all characters from the UnicodeString object.

  2463    * @return a reference to this

  2464    * @stable ICU 2.0

  2465    */

  2466   inline UnicodeString& remove(void);

  2468   /**

  2469    * Remove the characters in the range

  2470    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.

  2471    * @param start the offset of the first character to remove

  2472    * @param length the number of characters to remove

  2473    * @return a reference to this

  2474    * @stable ICU 2.0

  2475    */

  2476   inline UnicodeString& remove(int32_t start,

  2477                                int32_t length = (int32_t)INT32_MAX);

  2479   /**

  2480    * Remove the characters in the range

  2481    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.

  2482    * @param start the offset of the first character to remove

  2483    * @param limit the offset immediately following the range to remove

  2484    * @return a reference to this

  2485    * @stable ICU 2.0

  2486    */

  2487   inline UnicodeString& removeBetween(int32_t start,

  2488                                       int32_t limit = (int32_t)INT32_MAX);

  2490   /**

  2491    * Retain only the characters in the range

  2492    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.

  2493    * Removes characters before <code>start</code> and at and after <code>limit</code>.

  2494    * @param start the offset of the first character to retain

  2495    * @param limit the offset immediately following the range to retain

  2496    * @return a reference to this

  2497    * @stable ICU 4.4

  2498    */

  2499   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);

  2501   /* Length operations */

  2503   /**

  2504    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.

  2505    * If the length of this UnicodeString is less than targetLength,

  2506    * length() - targetLength copies of padChar will be added to the

  2507    * beginning of this UnicodeString.

  2508    * @param targetLength the desired length of the string

  2509    * @param padChar the character to use for padding. Defaults to

  2510    * space (U+0020)

  2511    * @return TRUE if the text was padded, FALSE otherwise.

  2512    * @stable ICU 2.0

  2513    */

  2514   UBool padLeading(int32_t targetLength,

  2515                     UChar padChar = 0x0020);

  2517   /**

  2518    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.

  2519    * If the length of this UnicodeString is less than targetLength,

  2520    * length() - targetLength copies of padChar will be added to the

  2521    * end of this UnicodeString.

  2522    * @param targetLength the desired length of the string

  2523    * @param padChar the character to use for padding. Defaults to

  2524    * space (U+0020)

  2525    * @return TRUE if the text was padded, FALSE otherwise.

  2526    * @stable ICU 2.0

  2527    */

  2528   UBool padTrailing(int32_t targetLength,

  2529                      UChar padChar = 0x0020);

  2531   /**

  2532    * Truncate this UnicodeString to the <TT>targetLength</TT>.

  2533    * @param targetLength the desired length of this UnicodeString.

  2534    * @return TRUE if the text was truncated, FALSE otherwise

  2535    * @stable ICU 2.0

  2536    */

  2537   inline UBool truncate(int32_t targetLength);

  2539   /**

  2540    * Trims leading and trailing whitespace from this UnicodeString.

  2541    * @return a reference to this

  2542    * @stable ICU 2.0

  2543    */

  2544   UnicodeString& trim(void);

  2547   /* Miscellaneous operations */

  2549   /**

  2550    * Reverse this UnicodeString in place.

  2551    * @return a reference to this

  2552    * @stable ICU 2.0

  2553    */

  2554   inline UnicodeString& reverse(void);

  2556   /**

  2557    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in

  2558    * this UnicodeString.

  2559    * @param start the start of the range to reverse

  2560    * @param length the number of characters to to reverse

  2561    * @return a reference to this

  2562    * @stable ICU 2.0

  2563    */

  2564   inline UnicodeString& reverse(int32_t start,

  2565              int32_t length);

  2567   /**

  2568    * Convert the characters in this to UPPER CASE following the conventions of

  2569    * the default locale.

  2570    * @return A reference to this.

  2571    * @stable ICU 2.0

  2572    */

  2573   UnicodeString& toUpper(void);

  2575   /**

  2576    * Convert the characters in this to UPPER CASE following the conventions of

  2577    * a specific locale.

  2578    * @param locale The locale containing the conventions to use.

  2579    * @return A reference to this.

  2580    * @stable ICU 2.0

  2581    */

  2582   UnicodeString& toUpper(const Locale& locale);

  2584   /**

  2585    * Convert the characters in this to lower case following the conventions of

  2586    * the default locale.

  2587    * @return A reference to this.

  2588    * @stable ICU 2.0

  2589    */

  2590   UnicodeString& toLower(void);

  2592   /**

  2593    * Convert the characters in this to lower case following the conventions of

  2594    * a specific locale.

  2595    * @param locale The locale containing the conventions to use.

  2596    * @return A reference to this.

  2597    * @stable ICU 2.0

  2598    */

  2599   UnicodeString& toLower(const Locale& locale);

  2601 #if !UCONFIG_NO_BREAK_ITERATION

  2603   /**

  2604    * Titlecase this string, convenience function using the default locale.

  2605    *

  2606    * Casing is locale-dependent and context-sensitive.

  2607    * Titlecasing uses a break iterator to find the first characters of words

  2608    * that are to be titlecased. It titlecases those characters and lowercases

  2609    * all others.

  2610    *

  2611    * The titlecase break iterator can be provided to customize for arbitrary

  2612    * styles, using rules and dictionaries beyond the standard iterators.

  2613    * It may be more efficient to always provide an iterator to avoid

  2614    * opening and closing one for each string.

  2615    * The standard titlecase iterator for the root locale implements the

  2616    * algorithm of Unicode TR 21.

  2617    *

  2618    * This function uses only the setText(), first() and next() methods of the

  2619    * provided break iterator.

  2620    *

  2621    * @param titleIter A break iterator to find the first characters of words

  2622    *                  that are to be titlecased.

  2623    *                  If none is provided (0), then a standard titlecase

  2624    *                  break iterator is opened.

  2625    *                  Otherwise the provided iterator is set to the string's text.

  2626    * @return A reference to this.

  2627    * @stable ICU 2.1

  2628    */

  2629   UnicodeString &toTitle(BreakIterator *titleIter);

  2631   /**

  2632    * Titlecase this string.

  2633    *

  2634    * Casing is locale-dependent and context-sensitive.

  2635    * Titlecasing uses a break iterator to find the first characters of words

  2636    * that are to be titlecased. It titlecases those characters and lowercases

  2637    * all others.

  2638    *

  2639    * The titlecase break iterator can be provided to customize for arbitrary

  2640    * styles, using rules and dictionaries beyond the standard iterators.

  2641    * It may be more efficient to always provide an iterator to avoid

  2642    * opening and closing one for each string.

  2643    * The standard titlecase iterator for the root locale implements the

  2644    * algorithm of Unicode TR 21.

  2645    *

  2646    * This function uses only the setText(), first() and next() methods of the

  2647    * provided break iterator.

  2648    *

  2649    * @param titleIter A break iterator to find the first characters of words

  2650    *                  that are to be titlecased.

  2651    *                  If none is provided (0), then a standard titlecase

  2652    *                  break iterator is opened.

  2653    *                  Otherwise the provided iterator is set to the string's text.

  2654    * @param locale    The locale to consider.

  2655    * @return A reference to this.

  2656    * @stable ICU 2.1

  2657    */

  2658   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);

  2660   /**

  2661    * Titlecase this string, with options.

  2662    *

  2663    * Casing is locale-dependent and context-sensitive.

  2664    * Titlecasing uses a break iterator to find the first characters of words

  2665    * that are to be titlecased. It titlecases those characters and lowercases

  2666    * all others. (This can be modified with options.)

  2667    *

  2668    * The titlecase break iterator can be provided to customize for arbitrary

  2669    * styles, using rules and dictionaries beyond the standard iterators.

  2670    * It may be more efficient to always provide an iterator to avoid

  2671    * opening and closing one for each string.

  2672    * The standard titlecase iterator for the root locale implements the

  2673    * algorithm of Unicode TR 21.

  2674    *

  2675    * This function uses only the setText(), first() and next() methods of the

  2676    * provided break iterator.

  2677    *

  2678    * @param titleIter A break iterator to find the first characters of words

  2679    *                  that are to be titlecased.

  2680    *                  If none is provided (0), then a standard titlecase

  2681    *                  break iterator is opened.

  2682    *                  Otherwise the provided iterator is set to the string's text.

  2683    * @param locale    The locale to consider.

  2684    * @param options Options bit set, see ucasemap_open().

  2685    * @return A reference to this.

  2686    * @see U_TITLECASE_NO_LOWERCASE

  2687    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT

  2688    * @see ucasemap_open

  2689    * @stable ICU 3.8

  2690    */

  2691   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);

  2693 #endif

  2695   /**

  2696    * Case-folds the characters in this string.

  2697    *

  2698    * Case-folding is locale-independent and not context-sensitive,

  2699    * but there is an option for whether to include or exclude mappings for dotted I

  2700    * and dotless i that are marked with 'T' in CaseFolding.txt.

  2701    *

  2702    * The result may be longer or shorter than the original.

  2703    *

  2704    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

  2705    * @return A reference to this.

  2706    * @stable ICU 2.0

  2707    */

  2708   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);

  2710   //========================================

  2711   // Access to the internal buffer

  2712   //========================================

  2714   /**

  2715    * Get a read/write pointer to the internal buffer.

  2716    * The buffer is guaranteed to be large enough for at least minCapacity UChars,

  2717    * writable, and is still owned by the UnicodeString object.

  2718    * Calls to getBuffer(minCapacity) must not be nested, and

  2719    * must be matched with calls to releaseBuffer(newLength).

  2720    * If the string buffer was read-only or shared,

  2721    * then it will be reallocated and copied.

  2722    *

  2723    * An attempted nested call will return 0, and will not further modify the

  2724    * state of the UnicodeString object.

  2725    * It also returns 0 if the string is bogus.

  2726    *

  2727    * The actual capacity of the string buffer may be larger than minCapacity.

  2728    * getCapacity() returns the actual capacity.

  2729    * For many operations, the full capacity should be used to avoid reallocations.

  2730    *

  2731    * While the buffer is "open" between getBuffer(minCapacity)

  2732    * and releaseBuffer(newLength), the following applies:

  2733    * - The string length is set to 0.

  2734    * - Any read API call on the UnicodeString object will behave like on a 0-length string.

  2735    * - Any write API call on the UnicodeString object is disallowed and will have no effect.

  2736    * - You can read from and write to the returned buffer.

  2737    * - The previous string contents will still be in the buffer;

  2738    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).

  2739    *   If the length() was greater than minCapacity, then any contents after minCapacity

  2740    *   may be lost.

  2741    *   The buffer contents is not NUL-terminated by getBuffer().

  2742    *   If length()<getCapacity() then you can terminate it by writing a NUL

  2743    *   at index length().

  2744    * - You must call releaseBuffer(newLength) before and in order to

  2745    *   return to normal UnicodeString operation.

  2746    *

  2747    * @param minCapacity the minimum number of UChars that are to be available

  2748    *        in the buffer, starting at the returned pointer;

  2749    *        default to the current string capacity if minCapacity==-1

  2750    * @return a writable pointer to the internal string buffer,

  2751    *         or 0 if an error occurs (nested calls, out of memory)

  2752    *

  2753    * @see releaseBuffer

  2754    * @see getTerminatedBuffer()

  2755    * @stable ICU 2.0

  2756    */

  2757   UChar *getBuffer(int32_t minCapacity);

  2759   /**

  2760    * Release a read/write buffer on a UnicodeString object with an

  2761    * "open" getBuffer(minCapacity).

  2762    * This function must be called in a matched pair with getBuffer(minCapacity).

  2763    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".

  2764    *

  2765    * It will set the string length to newLength, at most to the current capacity.

  2766    * If newLength==-1 then it will set the length according to the

  2767    * first NUL in the buffer, or to the capacity if there is no NUL.

  2768    *

  2769    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.

  2770    *

  2771    * @param newLength the new length of the UnicodeString object;

  2772    *        defaults to the current capacity if newLength is greater than that;

  2773    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than

  2774    *        the current capacity of the string

  2775    *

  2776    * @see getBuffer(int32_t minCapacity)

  2777    * @stable ICU 2.0

  2778    */

  2779   void releaseBuffer(int32_t newLength=-1);

  2781   /**

  2782    * Get a read-only pointer to the internal buffer.

  2783    * This can be called at any time on a valid UnicodeString.

  2784    *

  2785    * It returns 0 if the string is bogus, or

  2786    * during an "open" getBuffer(minCapacity).

  2787    *

  2788    * It can be called as many times as desired.

  2789    * The pointer that it returns will remain valid until the UnicodeString object is modified,

  2790    * at which time the pointer is semantically invalidated and must not be used any more.

  2791    *

  2792    * The capacity of the buffer can be determined with getCapacity().

  2793    * The part after length() may or may not be initialized and valid,

  2794    * depending on the history of the UnicodeString object.

  2795    *

  2796    * The buffer contents is (probably) not NUL-terminated.

  2797    * You can check if it is with

  2798    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.

  2799    * (See getTerminatedBuffer().)

  2800    *

  2801    * The buffer may reside in read-only memory. Its contents must not

  2802    * be modified.

  2803    *

  2804    * @return a read-only pointer to the internal string buffer,

  2805    *         or 0 if the string is empty or bogus

  2806    *

  2807    * @see getBuffer(int32_t minCapacity)

  2808    * @see getTerminatedBuffer()

  2809    * @stable ICU 2.0

  2810    */

  2811   inline const UChar *getBuffer() const;

  2813   /**

  2814    * Get a read-only pointer to the internal buffer,

  2815    * making sure that it is NUL-terminated.

  2816    * This can be called at any time on a valid UnicodeString.

  2817    *

  2818    * It returns 0 if the string is bogus, or

  2819    * during an "open" getBuffer(minCapacity), or if the buffer cannot

  2820    * be NUL-terminated (because memory allocation failed).

  2821    *

  2822    * It can be called as many times as desired.

  2823    * The pointer that it returns will remain valid until the UnicodeString object is modified,

  2824    * at which time the pointer is semantically invalidated and must not be used any more.

  2825    *

  2826    * The capacity of the buffer can be determined with getCapacity().

  2827    * The part after length()+1 may or may not be initialized and valid,

  2828    * depending on the history of the UnicodeString object.

  2829    *

  2830    * The buffer contents is guaranteed to be NUL-terminated.

  2831    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL

  2832    * is written.

  2833    * For this reason, this function is not const, unlike getBuffer().

  2834    * Note that a UnicodeString may also contain NUL characters as part of its contents.

  2835    *

  2836    * The buffer may reside in read-only memory. Its contents must not

  2837    * be modified.

  2838    *

  2839    * @return a read-only pointer to the internal string buffer,

  2840    *         or 0 if the string is empty or bogus

  2841    *

  2842    * @see getBuffer(int32_t minCapacity)

  2843    * @see getBuffer()

  2844    * @stable ICU 2.2

  2845    */

  2846   const UChar *getTerminatedBuffer();

  2848   //========================================

  2849   // Constructors

  2850   //========================================

  2852   /** Construct an empty UnicodeString.

  2853    * @stable ICU 2.0

  2854    */

  2855   inline UnicodeString();

  2857   /**

  2858    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars

  2859    * @param capacity the number of UChars this UnicodeString should hold

  2860    * before a resize is necessary; if count is greater than 0 and count

  2861    * code points c take up more space than capacity, then capacity is adjusted

  2862    * accordingly.

  2863    * @param c is used to initially fill the string

  2864    * @param count specifies how many code points c are to be written in the

  2865    *              string

  2866    * @stable ICU 2.0

  2867    */

  2868   UnicodeString(int32_t capacity, UChar32 c, int32_t count);

  2870   /**

  2871    * Single UChar (code unit) constructor.

  2872    *

  2873    * It is recommended to mark this constructor "explicit" by

  2874    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>

  2875    * on the compiler command line or similar.

  2876    * @param ch the character to place in the UnicodeString

  2877    * @stable ICU 2.0

  2878    */

  2879   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);

  2881   /**

  2882    * Single UChar32 (code point) constructor.

  2883    *

  2884    * It is recommended to mark this constructor "explicit" by

  2885    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>

  2886    * on the compiler command line or similar.

  2887    * @param ch the character to place in the UnicodeString

  2888    * @stable ICU 2.0

  2889    */

  2890   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);

  2892   /**

  2893    * UChar* constructor.

  2894    *

  2895    * It is recommended to mark this constructor "explicit" by

  2896    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>

  2897    * on the compiler command line or similar.

  2898    * @param text The characters to place in the UnicodeString.  <TT>text</TT>

  2899    * must be NULL (U+0000) terminated.

  2900    * @stable ICU 2.0

  2901    */

  2902   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);

  2904   /**

  2905    * UChar* constructor.

  2906    * @param text The characters to place in the UnicodeString.

  2907    * @param textLength The number of Unicode characters in <TT>text</TT>

  2908    * to copy.

  2909    * @stable ICU 2.0

  2910    */

  2911   UnicodeString(const UChar *text,

  2912         int32_t textLength);

  2914   /**

  2915    * Readonly-aliasing UChar* constructor.

  2916    * The text will be used for the UnicodeString object, but

  2917    * it will not be released when the UnicodeString is destroyed.

  2918    * This has copy-on-write semantics:

  2919    * When the string is modified, then the buffer is first copied into

  2920    * newly allocated memory.

  2921    * The aliased buffer is never modified.

  2922    *

  2923    * In an assignment to another UnicodeString, when using the copy constructor

  2924    * or the assignment operator, the text will be copied.

  2925    * When using fastCopyFrom(), the text will be aliased again,

  2926    * so that both strings then alias the same readonly-text.

  2927    *

  2928    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.

  2929    *                     This must be true if <code>textLength==-1</code>.

  2930    * @param text The characters to alias for the UnicodeString.

  2931    * @param textLength The number of Unicode characters in <code>text</code> to alias.

  2932    *                   If -1, then this constructor will determine the length

  2933    *                   by calling <code>u_strlen()</code>.

  2934    * @stable ICU 2.0

  2935    */

  2936   UnicodeString(UBool isTerminated,

  2937                 const UChar *text,

  2938                 int32_t textLength);

  2940   /**

  2941    * Writable-aliasing UChar* constructor.

  2942    * The text will be used for the UnicodeString object, but

  2943    * it will not be released when the UnicodeString is destroyed.

  2944    * This has write-through semantics:

  2945    * For as long as the capacity of the buffer is sufficient, write operations

  2946    * will directly affect the buffer. When more capacity is necessary, then

  2947    * a new buffer will be allocated and the contents copied as with regularly

  2948    * constructed strings.

  2949    * In an assignment to another UnicodeString, the buffer will be copied.

  2950    * The extract(UChar *dst) function detects whether the dst pointer is the same

  2951    * as the string buffer itself and will in this case not copy the contents.

  2952    *

  2953    * @param buffer The characters to alias for the UnicodeString.

  2954    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.

  2955    * @param buffCapacity The size of <code>buffer</code> in UChars.

  2956    * @stable ICU 2.0

  2957    */

  2958   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);

  2960 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION

  2962   /**

  2963    * char* constructor.

  2964    * Uses the default converter (and thus depends on the ICU conversion code)

  2965    * unless U_CHARSET_IS_UTF8 is set to 1.

  2966    *

  2967    * For ASCII (really "invariant character") strings it is more efficient to use

  2968    * the constructor that takes a US_INV (for its enum EInvariant).

  2969    * For ASCII (invariant-character) string literals, see UNICODE_STRING and

  2970    * UNICODE_STRING_SIMPLE.

  2971    *

  2972    * It is recommended to mark this constructor "explicit" by

  2973    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>

  2974    * on the compiler command line or similar.

  2975    * @param codepageData an array of bytes, null-terminated,

  2976    *                     in the platform's default codepage.

  2977    * @stable ICU 2.0

  2978    * @see UNICODE_STRING

  2979    * @see UNICODE_STRING_SIMPLE

  2980    */

  2981   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);

  2983   /**

  2984    * char* constructor.

  2985    * Uses the default converter (and thus depends on the ICU conversion code)

  2986    * unless U_CHARSET_IS_UTF8 is set to 1.

  2987    * @param codepageData an array of bytes in the platform's default codepage.

  2988    * @param dataLength The number of bytes in <TT>codepageData</TT>.

  2989    * @stable ICU 2.0

  2990    */

  2991   UnicodeString(const char *codepageData, int32_t dataLength);

  2993 #endif

  2995 #if !UCONFIG_NO_CONVERSION

  2997   /**

  2998    * char* constructor.

  2999    * @param codepageData an array of bytes, null-terminated

  3000    * @param codepage the encoding of <TT>codepageData</TT>.  The special

  3001    * value 0 for <TT>codepage</TT> indicates that the text is in the

  3002    * platform's default codepage.

  3003    *

  3004    * If <code>codepage</code> is an empty string (<code>""</code>),

  3005    * then a simple conversion is performed on the codepage-invariant

  3006    * subset ("invariant characters") of the platform encoding. See utypes.h.

  3007    * Recommendation: For invariant-character strings use the constructor

  3008    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

  3009    * because it avoids object code dependencies of UnicodeString on

  3010    * the conversion code.

  3011    *

  3012    * @stable ICU 2.0

  3013    */

  3014   UnicodeString(const char *codepageData, const char *codepage);

  3016   /**

  3017    * char* constructor.

  3018    * @param codepageData an array of bytes.

  3019    * @param dataLength The number of bytes in <TT>codepageData</TT>.

  3020    * @param codepage the encoding of <TT>codepageData</TT>.  The special

  3021    * value 0 for <TT>codepage</TT> indicates that the text is in the

  3022    * platform's default codepage.

  3023    * If <code>codepage</code> is an empty string (<code>""</code>),

  3024    * then a simple conversion is performed on the codepage-invariant

  3025    * subset ("invariant characters") of the platform encoding. See utypes.h.

  3026    * Recommendation: For invariant-character strings use the constructor

  3027    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)

  3028    * because it avoids object code dependencies of UnicodeString on

  3029    * the conversion code.

  3030    *

  3031    * @stable ICU 2.0

  3032    */

  3033   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);

  3035   /**

  3036    * char * / UConverter constructor.

  3037    * This constructor uses an existing UConverter object to

  3038    * convert the codepage string to Unicode and construct a UnicodeString

  3039    * from that.

  3040    *

  3041    * The converter is reset at first.

  3042    * If the error code indicates a failure before this constructor is called,

  3043    * or if an error occurs during conversion or construction,

  3044    * then the string will be bogus.

  3045    *

  3046    * This function avoids the overhead of opening and closing a converter if

  3047    * multiple strings are constructed.

  3048    *

  3049    * @param src input codepage string

  3050    * @param srcLength length of the input string, can be -1 for NUL-terminated strings

  3051    * @param cnv converter object (ucnv_resetToUnicode() will be called),

  3052    *        can be NULL for the default converter

  3053    * @param errorCode normal ICU error code

  3054    * @stable ICU 2.0

  3055    */

  3056   UnicodeString(

  3057         const char *src, int32_t srcLength,

  3058         UConverter *cnv,

  3059         UErrorCode &errorCode);

  3061 #endif

  3063   /**

  3064    * Constructs a Unicode string from an invariant-character char * string.

  3065    * About invariant characters see utypes.h.

  3066    * This constructor has no runtime dependency on conversion code and is

  3067    * therefore recommended over ones taking a charset name string

  3068    * (where the empty string "" indicates invariant-character conversion).

  3069    *

  3070    * Use the macro US_INV as the third, signature-distinguishing parameter.

  3071    *

  3072    * For example:

  3073    * \code

  3074    * void fn(const char *s) {

  3075    *   UnicodeString ustr(s, -1, US_INV);

  3076    *   // use ustr ...

  3077    * }

  3078    * \endcode

  3079    *

  3080    * @param src String using only invariant characters.

  3081    * @param length Length of src, or -1 if NUL-terminated.

  3082    * @param inv Signature-distinguishing paramater, use US_INV.

  3083    *

  3084    * @see US_INV

  3085    * @stable ICU 3.2

  3086    */

  3087   UnicodeString(const char *src, int32_t length, enum EInvariant inv);

  3090   /**

  3091    * Copy constructor.

  3092    * @param that The UnicodeString object to copy.

  3093    * @stable ICU 2.0

  3094    */

  3095   UnicodeString(const UnicodeString& that);

  3097   /**

  3098    * 'Substring' constructor from tail of source string.

  3099    * @param src The UnicodeString object to copy.

  3100    * @param srcStart The offset into <tt>src</tt> at which to start copying.

  3101    * @stable ICU 2.2

  3102    */

  3103   UnicodeString(const UnicodeString& src, int32_t srcStart);

  3105   /**

  3106    * 'Substring' constructor from subrange of source string.

  3107    * @param src The UnicodeString object to copy.

  3108    * @param srcStart The offset into <tt>src</tt> at which to start copying.

  3109    * @param srcLength The number of characters from <tt>src</tt> to copy.

  3110    * @stable ICU 2.2

  3111    */

  3112   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);

  3114   /**

  3115    * Clone this object, an instance of a subclass of Replaceable.

  3116    * Clones can be used concurrently in multiple threads.

  3117    * If a subclass does not implement clone(), or if an error occurs,

  3118    * then NULL is returned.

  3119    * The clone functions in all subclasses return a pointer to a Replaceable

  3120    * because some compilers do not support covariant (same-as-this)

  3121    * return types; cast to the appropriate subclass if necessary.

  3122    * The caller must delete the clone.

  3123    *

  3124    * @return a clone of this object

  3125    *

  3126    * @see Replaceable::clone

  3127    * @see getDynamicClassID

  3128    * @stable ICU 2.6

  3129    */

  3130   virtual Replaceable *clone() const;

  3132   /** Destructor.

  3133    * @stable ICU 2.0

  3134    */

  3135   virtual ~UnicodeString();

  3137   /**

  3138    * Create a UnicodeString from a UTF-8 string.

  3139    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

  3140    * Calls u_strFromUTF8WithSub().

  3141    *

  3142    * @param utf8 UTF-8 input string.

  3143    *             Note that a StringPiece can be implicitly constructed

  3144    *             from a std::string or a NUL-terminated const char * string.

  3145    * @return A UnicodeString with equivalent UTF-16 contents.

  3146    * @see toUTF8

  3147    * @see toUTF8String

  3148    * @stable ICU 4.2

  3149    */

  3150   static UnicodeString fromUTF8(const StringPiece &utf8);

  3152   /**

  3153    * Create a UnicodeString from a UTF-32 string.

  3154    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.

  3155    * Calls u_strFromUTF32WithSub().

  3156    *

  3157    * @param utf32 UTF-32 input string. Must not be NULL.

  3158    * @param length Length of the input string, or -1 if NUL-terminated.

  3159    * @return A UnicodeString with equivalent UTF-16 contents.

  3160    * @see toUTF32

  3161    * @stable ICU 4.2

  3162    */

  3163   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);

  3165   /* Miscellaneous operations */

  3167   /**

  3168    * Unescape a string of characters and return a string containing

  3169    * the result.  The following escape sequences are recognized:

  3170    *

  3171    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]

  3172    * \\Uhhhhhhhh   8 hex digits

  3173    * \\xhh         1-2 hex digits

  3174    * \\ooo         1-3 octal digits; o in [0-7]

  3175    * \\cX          control-X; X is masked with 0x1F

  3176    *

  3177    * as well as the standard ANSI C escapes:

  3178    *

  3179    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,

  3180    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,

  3181    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C

  3182    *

  3183    * Anything else following a backslash is generically escaped.  For

  3184    * example, "[a\\-z]" returns "[a-z]".

  3185    *

  3186    * If an escape sequence is ill-formed, this method returns an empty

  3187    * string.  An example of an ill-formed sequence is "\\u" followed by

  3188    * fewer than 4 hex digits.

  3189    *

  3190    * This function is similar to u_unescape() but not identical to it.

  3191    * The latter takes a source char*, so it does escape recognition

  3192    * and also invariant conversion.

  3193    *

  3194    * @return a string with backslash escapes interpreted, or an

  3195    * empty string on error.

  3196    * @see UnicodeString#unescapeAt()

  3197    * @see u_unescape()

  3198    * @see u_unescapeAt()

  3199    * @stable ICU 2.0

  3200    */

  3201   UnicodeString unescape() const;

  3203   /**

  3204    * Unescape a single escape sequence and return the represented

  3205    * character.  See unescape() for a listing of the recognized escape

  3206    * sequences.  The character at offset-1 is assumed (without

  3207    * checking) to be a backslash.  If the escape sequence is

  3208    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is

  3209    * returned.

  3210    *

  3211    * @param offset an input output parameter.  On input, it is the

  3212    * offset into this string where the escape sequence is located,

  3213    * after the initial backslash.  On output, it is advanced after the

  3214    * last character parsed.  On error, it is not advanced at all.

  3215    * @return the character represented by the escape sequence at

  3216    * offset, or U_SENTINEL=-1 on error.

  3217    * @see UnicodeString#unescape()

  3218    * @see u_unescape()

  3219    * @see u_unescapeAt()

  3220    * @stable ICU 2.0

  3221    */

  3222   UChar32 unescapeAt(int32_t &offset) const;

  3224   /**

  3225    * ICU "poor man's RTTI", returns a UClassID for this class.

  3226    *

  3227    * @stable ICU 2.2

  3228    */

  3229   static UClassID U_EXPORT2 getStaticClassID();

  3231   /**

  3232    * ICU "poor man's RTTI", returns a UClassID for the actual class.

  3233    *

  3234    * @stable ICU 2.2

  3235    */

  3236   virtual UClassID getDynamicClassID() const;

  3238   //========================================

  3239   // Implementation methods

  3240   //========================================

  3242 protected:

  3243   /**

  3244    * Implement Replaceable::getLength() (see jitterbug 1027).

  3245    * @stable ICU 2.4

  3246    */

  3247   virtual int32_t getLength() const;

  3249   /**

  3250    * The change in Replaceable to use virtual getCharAt() allows

  3251    * UnicodeString::charAt() to be inline again (see jitterbug 709).

  3252    * @stable ICU 2.4

  3253    */

  3254   virtual UChar getCharAt(int32_t offset) const;

  3256   /**

  3257    * The change in Replaceable to use virtual getChar32At() allows

  3258    * UnicodeString::char32At() to be inline again (see jitterbug 709).

  3259    * @stable ICU 2.4

  3260    */

  3261   virtual UChar32 getChar32At(int32_t offset) const;

  3263 private:

  3264   // For char* constructors. Could be made public.

  3265   UnicodeString &setToUTF8(const StringPiece &utf8);

  3266   // For extract(char*).

  3267   // We could make a toUTF8(target, capacity, errorCode) public but not

  3268   // this version: New API will be cleaner if we make callers create substrings

  3269   // rather than having start+length on every method,

  3270   // and it should take a UErrorCode&.

  3271   int32_t

  3272   toUTF8(int32_t start, int32_t len,

  3273          char *target, int32_t capacity) const;

  3275   /**

  3276    * Internal string contents comparison, called by operator==.

  3277    * Requires: this & text not bogus and have same lengths.

  3278    */

  3279   UBool doEquals(const UnicodeString &text, int32_t len) const;

  3281   inline int8_t

  3282   doCompare(int32_t start,

  3283            int32_t length,

  3284            const UnicodeString& srcText,

  3285            int32_t srcStart,

  3286            int32_t srcLength) const;

  3288   int8_t doCompare(int32_t start,

  3289            int32_t length,

  3290            const UChar *srcChars,

  3291            int32_t srcStart,

  3292            int32_t srcLength) const;

  3294   inline int8_t

  3295   doCompareCodePointOrder(int32_t start,

  3296                           int32_t length,

  3297                           const UnicodeString& srcText,

  3298                           int32_t srcStart,

  3299                           int32_t srcLength) const;

  3301   int8_t doCompareCodePointOrder(int32_t start,

  3302                                  int32_t length,

  3303                                  const UChar *srcChars,

  3304                                  int32_t srcStart,

  3305                                  int32_t srcLength) const;

  3307   inline int8_t

  3308   doCaseCompare(int32_t start,

  3309                 int32_t length,

  3310                 const UnicodeString &srcText,

  3311                 int32_t srcStart,

  3312                 int32_t srcLength,

  3313                 uint32_t options) const;

  3315   int8_t

  3316   doCaseCompare(int32_t start,

  3317                 int32_t length,

  3318                 const UChar *srcChars,

  3319                 int32_t srcStart,

  3320                 int32_t srcLength,

  3321                 uint32_t options) const;

  3323   int32_t doIndexOf(UChar c,

  3324             int32_t start,

  3325             int32_t length) const;

  3327   int32_t doIndexOf(UChar32 c,

  3328                         int32_t start,

  3329                         int32_t length) const;

  3331   int32_t doLastIndexOf(UChar c,

  3332                 int32_t start,

  3333                 int32_t length) const;

  3335   int32_t doLastIndexOf(UChar32 c,

  3336                             int32_t start,

  3337                             int32_t length) const;

  3339   void doExtract(int32_t start,

  3340          int32_t length,

  3341          UChar *dst,

  3342          int32_t dstStart) const;

  3344   inline void doExtract(int32_t start,

  3345          int32_t length,

  3346          UnicodeString& target) const;

  3348   inline UChar doCharAt(int32_t offset)  const;

  3350   UnicodeString& doReplace(int32_t start,

  3351                int32_t length,

  3352                const UnicodeString& srcText,

  3353                int32_t srcStart,

  3354                int32_t srcLength);

  3356   UnicodeString& doReplace(int32_t start,

  3357                int32_t length,

  3358                const UChar *srcChars,

  3359                int32_t srcStart,

  3360                int32_t srcLength);

  3362   UnicodeString& doReverse(int32_t start,

  3363                int32_t length);

  3365   // calculate hash code

  3366   int32_t doHashCode(void) const;

  3368   // get pointer to start of array

  3369   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function

  3370   inline UChar* getArrayStart(void);

  3371   inline const UChar* getArrayStart(void) const;

  3373   // A UnicodeString object (not necessarily its current buffer)

  3374   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).

  3375   inline UBool isWritable() const;

  3377   // Is the current buffer writable?

  3378   inline UBool isBufferWritable() const;

  3380   // None of the following does releaseArray().

  3381   inline void setLength(int32_t len);        // sets only fShortLength and fLength

  3382   inline void setToEmpty();                  // sets fFlags=kShortString

  3383   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags

  3385   // allocate the array; result may be fStackBuffer

  3386   // sets refCount to 1 if appropriate

  3387   // sets fArray, fCapacity, and fFlags

  3388   // returns boolean for success or failure

  3389   UBool allocate(int32_t capacity);

  3391   // release the array if owned

  3392   void releaseArray(void);

  3394   // turn a bogus string into an empty one

  3395   void unBogus();

  3397   // implements assigment operator, copy constructor, and fastCopyFrom()

  3398   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);

  3400   // Pin start and limit to acceptable values.

  3401   inline void pinIndex(int32_t& start) const;

  3402   inline void pinIndices(int32_t& start,

  3403                          int32_t& length) const;

  3405 #if !UCONFIG_NO_CONVERSION

  3407   /* Internal extract() using UConverter. */

  3408   int32_t doExtract(int32_t start, int32_t length,

  3409                     char *dest, int32_t destCapacity,

  3410                     UConverter *cnv,

  3411                     UErrorCode &errorCode) const;

  3413   /*

  3414    * Real constructor for converting from codepage data.

  3415    * It assumes that it is called with !fRefCounted.

  3416    *

  3417    * If <code>codepage==0</code>, then the default converter

  3418    * is used for the platform encoding.

  3419    * If <code>codepage</code> is an empty string (<code>""</code>),

  3420    * then a simple conversion is performed on the codepage-invariant

  3421    * subset ("invariant characters") of the platform encoding. See utypes.h.

  3422    */

  3423   void doCodepageCreate(const char *codepageData,

  3424                         int32_t dataLength,

  3425                         const char *codepage);

  3427   /*

  3428    * Worker function for creating a UnicodeString from

  3429    * a codepage string using a UConverter.

  3430    */

  3431   void

  3432   doCodepageCreate(const char *codepageData,

  3433                    int32_t dataLength,

  3434                    UConverter *converter,

  3435                    UErrorCode &status);

  3437 #endif

  3439   /*

  3440    * This function is called when write access to the array

  3441    * is necessary.

  3442    *

  3443    * We need to make a copy of the array if

  3444    * the buffer is read-only, or

  3445    * the buffer is refCounted (shared), and refCount>1, or

  3446    * the buffer is too small.

  3447    *

  3448    * Return FALSE if memory could not be allocated.

  3449    */

  3450   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,

  3451                             int32_t growCapacity = -1,

  3452                             UBool doCopyArray = TRUE,

  3453                             int32_t **pBufferToDelete = 0,

  3454                             UBool forceClone = FALSE);

  3456   /**

  3457    * Common function for UnicodeString case mappings.

  3458    * The stringCaseMapper has the same type UStringCaseMapper

  3459    * as in ustr_imp.h for ustrcase_map().

  3460    */

  3461   UnicodeString &

  3462   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);

  3464   // ref counting

  3465   void addRef(void);

  3466   int32_t removeRef(void);

  3467   int32_t refCount(void) const;

  3469   // constants

  3470   enum {

  3471     // Set the stack buffer size so that sizeof(UnicodeString) is,

  3472     // naturally (without padding), a multiple of sizeof(pointer).

  3473     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings

  3474     kInvalidUChar=0xffff, // invalid UChar index

  3475     kGrowSize=128, // grow size for this buffer

  3476     kInvalidHashCode=0, // invalid hash code

  3477     kEmptyHashCode=1, // hash code for empty string

  3479     // bit flag values for fFlags

  3480     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL

  3481     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields

  3482     kRefCounted=4,      // there is a refCount field before the characters in fArray

  3483     kBufferIsReadonly=8,// do not write to this buffer

  3484     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),

  3485                         // and releaseBuffer(newLength) must be called

  3487     // combined values for convenience

  3488     kShortString=kUsingStackBuffer,

  3489     kLongString=kRefCounted,

  3490     kReadonlyAlias=kBufferIsReadonly,

  3491     kWritableAlias=0

  3492   };

  3494   friend class StringThreadTest;

  3495   friend class UnicodeStringAppendable;

  3497   union StackBufferOrFields;        // forward declaration necessary before friend declaration

  3498   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion

  3500   /*

  3501    * The following are all the class fields that are stored

  3502    * in each UnicodeString object.

  3503    * Note that UnicodeString has virtual functions,

  3504    * therefore there is an implicit vtable pointer

  3505    * as the first real field.

  3506    * The fields should be aligned such that no padding is necessary.

  3507    * On 32-bit machines, the size should be 32 bytes,

  3508    * on 64-bit machines (8-byte pointers), it should be 40 bytes.

  3509    *

  3510    * We use a hack to achieve this.

  3511    *

  3512    * With at least some compilers, each of the following is forced to

  3513    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],

  3514    * rounded up with additional padding if the fields do not already fit that requirement:

  3515    * - sizeof(class UnicodeString)

  3516    * - offsetof(UnicodeString, fUnion)

  3517    * - sizeof(fUnion)

  3518    * - sizeof(fFields)

  3519    *

  3520    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)

  3521    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.

  3522    * (Padding at the end of fFields is ok:

  3523    * As long as there is no padding after fStackBuffer, it is not wasted space.)

  3524    *

  3525    * We further assume that the compiler does not reorder the fields,

  3526    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,

  3527    * with at most some padding (but no other field) in between.

  3528    * (Padding there would be wasted space, but functionally harmless.)

  3529    *

  3530    * We use a few more sizeof(pointer)'s chunks of space with

  3531    * fRestOfStackBuffer, fShortLength and fFlags,

  3532    * to get up exactly to the intended sizeof(UnicodeString).

  3533    */

  3534   // (implicit) *vtable;

  3535   union StackBufferOrFields {

  3536     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)

  3537     // else fFields is used

  3538     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer

  3539     struct {

  3540       UChar   *fArray;    // the Unicode data

  3541       int32_t fCapacity;  // capacity of fArray (in UChars)

  3542       int32_t fLength;    // number of characters in fArray if >127; else undefined

  3543     } fFields;

  3544   } fUnion;

  3545   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];

  3546   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength

  3547   uint8_t fFlags;       // bit flags: see constants above

  3548 };

  3550 /**

  3551  * Create a new UnicodeString with the concatenation of two others.

  3552  *

  3553  * @param s1 The first string to be copied to the new one.

  3554  * @param s2 The second string to be copied to the new one, after s1.

  3555  * @return UnicodeString(s1).append(s2)

  3556  * @stable ICU 2.8

  3557  */

  3558 U_COMMON_API UnicodeString U_EXPORT2

  3559 operator+ (const UnicodeString &s1, const UnicodeString &s2);

  3561 //========================================

  3562 // Inline members

  3563 //========================================

  3565 //========================================

  3566 // Privates

  3567 //========================================

  3569 inline void

  3570 UnicodeString::pinIndex(int32_t& start) const

  3571 {

  3572   // pin index

  3573   if(start < 0) {

  3574     start = 0;

  3575   } else if(start > length()) {

  3576     start = length();

  3577   }

  3578 }

  3580 inline void

  3581 UnicodeString::pinIndices(int32_t& start,

  3582                           int32_t& _length) const

  3583 {

  3584   // pin indices

  3585   int32_t len = length();

  3586   if(start < 0) {

  3587     start = 0;

  3588   } else if(start > len) {

  3589     start = len;

  3590   }

  3591   if(_length < 0) {

  3592     _length = 0;

  3593   } else if(_length > (len - start)) {

  3594     _length = (len - start);

  3595   }

  3596 }

  3598 inline UChar*

  3599 UnicodeString::getArrayStart()

  3600 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }

  3602 inline const UChar*

  3603 UnicodeString::getArrayStart() const

  3604 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }

  3606 //========================================

  3607 // Default constructor

  3608 //========================================

  3610 inline

  3611 UnicodeString::UnicodeString()

  3612   : fShortLength(0),

  3613     fFlags(kShortString)

  3614 {}

  3616 //========================================

  3617 // Read-only implementation methods

  3618 //========================================

  3619 inline int32_t

  3620 UnicodeString::length() const

  3621 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }

  3623 inline int32_t

  3624 UnicodeString::getCapacity() const

  3625 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }

  3627 inline int32_t

  3628 UnicodeString::hashCode() const

  3629 { return doHashCode(); }

  3631 inline UBool

  3632 UnicodeString::isBogus() const

  3633 { return (UBool)(fFlags & kIsBogus); }

  3635 inline UBool

  3636 UnicodeString::isWritable() const

  3637 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }

  3639 inline UBool

  3640 UnicodeString::isBufferWritable() const

  3641 {

  3642   return (UBool)(

  3643       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&

  3644       (!(fFlags&kRefCounted) || refCount()==1));

  3645 }

  3647 inline const UChar *

  3648 UnicodeString::getBuffer() const {

  3649   if(fFlags&(kIsBogus|kOpenGetBuffer)) {

  3650     return 0;

  3651   } else if(fFlags&kUsingStackBuffer) {

  3652     return fUnion.fStackBuffer;

  3653   } else {

  3654     return fUnion.fFields.fArray;

  3655   }

  3656 }

  3658 //========================================

  3659 // Read-only alias methods

  3660 //========================================

  3661 inline int8_t

  3662 UnicodeString::doCompare(int32_t start,

  3663               int32_t thisLength,

  3664               const UnicodeString& srcText,

  3665               int32_t srcStart,

  3666               int32_t srcLength) const

  3667 {

  3668   if(srcText.isBogus()) {

  3669     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3670   } else {

  3671     srcText.pinIndices(srcStart, srcLength);

  3672     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);

  3673   }

  3674 }

  3676 inline UBool

  3677 UnicodeString::operator== (const UnicodeString& text) const

  3678 {

  3679   if(isBogus()) {

  3680     return text.isBogus();

  3681   } else {

  3682     int32_t len = length(), textLength = text.length();

  3683     return !text.isBogus() && len == textLength && doEquals(text, len);

  3684   }

  3685 }

  3687 inline UBool

  3688 UnicodeString::operator!= (const UnicodeString& text) const

  3689 { return (! operator==(text)); }

  3691 inline UBool

  3692 UnicodeString::operator> (const UnicodeString& text) const

  3693 { return doCompare(0, length(), text, 0, text.length()) == 1; }

  3695 inline UBool

  3696 UnicodeString::operator< (const UnicodeString& text) const

  3697 { return doCompare(0, length(), text, 0, text.length()) == -1; }

  3699 inline UBool

  3700 UnicodeString::operator>= (const UnicodeString& text) const

  3701 { return doCompare(0, length(), text, 0, text.length()) != -1; }

  3703 inline UBool

  3704 UnicodeString::operator<= (const UnicodeString& text) const

  3705 { return doCompare(0, length(), text, 0, text.length()) != 1; }

  3707 inline int8_t

  3708 UnicodeString::compare(const UnicodeString& text) const

  3709 { return doCompare(0, length(), text, 0, text.length()); }

  3711 inline int8_t

  3712 UnicodeString::compare(int32_t start,

  3713                int32_t _length,

  3714                const UnicodeString& srcText) const

  3715 { return doCompare(start, _length, srcText, 0, srcText.length()); }

  3717 inline int8_t

  3718 UnicodeString::compare(const UChar *srcChars,

  3719                int32_t srcLength) const

  3720 { return doCompare(0, length(), srcChars, 0, srcLength); }

  3722 inline int8_t

  3723 UnicodeString::compare(int32_t start,

  3724                int32_t _length,

  3725                const UnicodeString& srcText,

  3726                int32_t srcStart,

  3727                int32_t srcLength) const

  3728 { return doCompare(start, _length, srcText, srcStart, srcLength); }

  3730 inline int8_t

  3731 UnicodeString::compare(int32_t start,

  3732                int32_t _length,

  3733                const UChar *srcChars) const

  3734 { return doCompare(start, _length, srcChars, 0, _length); }

  3736 inline int8_t

  3737 UnicodeString::compare(int32_t start,

  3738                int32_t _length,

  3739                const UChar *srcChars,

  3740                int32_t srcStart,

  3741                int32_t srcLength) const

  3742 { return doCompare(start, _length, srcChars, srcStart, srcLength); }

  3744 inline int8_t

  3745 UnicodeString::compareBetween(int32_t start,

  3746                   int32_t limit,

  3747                   const UnicodeString& srcText,

  3748                   int32_t srcStart,

  3749                   int32_t srcLimit) const

  3750 { return doCompare(start, limit - start,

  3751            srcText, srcStart, srcLimit - srcStart); }

  3753 inline int8_t

  3754 UnicodeString::doCompareCodePointOrder(int32_t start,

  3755                                        int32_t thisLength,

  3756                                        const UnicodeString& srcText,

  3757                                        int32_t srcStart,

  3758                                        int32_t srcLength) const

  3759 {

  3760   if(srcText.isBogus()) {

  3761     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3762   } else {

  3763     srcText.pinIndices(srcStart, srcLength);

  3764     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);

  3765   }

  3766 }

  3768 inline int8_t

  3769 UnicodeString::compareCodePointOrder(const UnicodeString& text) const

  3770 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }

  3772 inline int8_t

  3773 UnicodeString::compareCodePointOrder(int32_t start,

  3774                                      int32_t _length,

  3775                                      const UnicodeString& srcText) const

  3776 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }

  3778 inline int8_t

  3779 UnicodeString::compareCodePointOrder(const UChar *srcChars,

  3780                                      int32_t srcLength) const

  3781 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }

  3783 inline int8_t

  3784 UnicodeString::compareCodePointOrder(int32_t start,

  3785                                      int32_t _length,

  3786                                      const UnicodeString& srcText,

  3787                                      int32_t srcStart,

  3788                                      int32_t srcLength) const

  3789 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }

  3791 inline int8_t

  3792 UnicodeString::compareCodePointOrder(int32_t start,

  3793                                      int32_t _length,

  3794                                      const UChar *srcChars) const

  3795 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }

  3797 inline int8_t

  3798 UnicodeString::compareCodePointOrder(int32_t start,

  3799                                      int32_t _length,

  3800                                      const UChar *srcChars,

  3801                                      int32_t srcStart,

  3802                                      int32_t srcLength) const

  3803 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }

  3805 inline int8_t

  3806 UnicodeString::compareCodePointOrderBetween(int32_t start,

  3807                                             int32_t limit,

  3808                                             const UnicodeString& srcText,

  3809                                             int32_t srcStart,

  3810                                             int32_t srcLimit) const

  3811 { return doCompareCodePointOrder(start, limit - start,

  3812            srcText, srcStart, srcLimit - srcStart); }

  3814 inline int8_t

  3815 UnicodeString::doCaseCompare(int32_t start,

  3816                              int32_t thisLength,

  3817                              const UnicodeString &srcText,

  3818                              int32_t srcStart,

  3819                              int32_t srcLength,

  3820                              uint32_t options) const

  3821 {

  3822   if(srcText.isBogus()) {

  3823     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise

  3824   } else {

  3825     srcText.pinIndices(srcStart, srcLength);

  3826     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);

  3827   }

  3828 }

  3830 inline int8_t

  3831 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {

  3832   return doCaseCompare(0, length(), text, 0, text.length(), options);

  3833 }

  3835 inline int8_t

  3836 UnicodeString::caseCompare(int32_t start,

  3837                            int32_t _length,

  3838                            const UnicodeString &srcText,

  3839                            uint32_t options) const {

  3840   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);

  3841 }

  3843 inline int8_t

  3844 UnicodeString::caseCompare(const UChar *srcChars,

  3845                            int32_t srcLength,

  3846                            uint32_t options) const {

  3847   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);

  3848 }

  3850 inline int8_t

  3851 UnicodeString::caseCompare(int32_t start,

  3852                            int32_t _length,

  3853                            const UnicodeString &srcText,

  3854                            int32_t srcStart,

  3855                            int32_t srcLength,

  3856                            uint32_t options) const {

  3857   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);

  3858 }

  3860 inline int8_t

  3861 UnicodeString::caseCompare(int32_t start,

  3862                            int32_t _length,

  3863                            const UChar *srcChars,

  3864                            uint32_t options) const {

  3865   return doCaseCompare(start, _length, srcChars, 0, _length, options);

  3866 }

  3868 inline int8_t

  3869 UnicodeString::caseCompare(int32_t start,

  3870                            int32_t _length,

  3871                            const UChar *srcChars,

  3872                            int32_t srcStart,

  3873                            int32_t srcLength,

  3874                            uint32_t options) const {

  3875   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);

  3876 }

  3878 inline int8_t

  3879 UnicodeString::caseCompareBetween(int32_t start,

  3880                                   int32_t limit,

  3881                                   const UnicodeString &srcText,

  3882                                   int32_t srcStart,

  3883                                   int32_t srcLimit,

  3884                                   uint32_t options) const {

  3885   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);

  3886 }

  3888 inline int32_t

  3889 UnicodeString::indexOf(const UnicodeString& srcText,

  3890                int32_t srcStart,

  3891                int32_t srcLength,

  3892                int32_t start,

  3893                int32_t _length) const

  3894 {

  3895   if(!srcText.isBogus()) {

  3896     srcText.pinIndices(srcStart, srcLength);

  3897     if(srcLength > 0) {

  3898       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);

  3899     }

  3900   }

  3901   return -1;

  3902 }

  3904 inline int32_t

  3905 UnicodeString::indexOf(const UnicodeString& text) const

  3906 { return indexOf(text, 0, text.length(), 0, length()); }

  3908 inline int32_t

  3909 UnicodeString::indexOf(const UnicodeString& text,

  3910                int32_t start) const {

  3911   pinIndex(start);

  3912   return indexOf(text, 0, text.length(), start, length() - start);

  3913 }

  3915 inline int32_t

  3916 UnicodeString::indexOf(const UnicodeString& text,

  3917                int32_t start,

  3918                int32_t _length) const

  3919 { return indexOf(text, 0, text.length(), start, _length); }

  3921 inline int32_t

  3922 UnicodeString::indexOf(const UChar *srcChars,

  3923                int32_t srcLength,

  3924                int32_t start) const {

  3925   pinIndex(start);

  3926   return indexOf(srcChars, 0, srcLength, start, length() - start);

  3927 }

  3929 inline int32_t

  3930 UnicodeString::indexOf(const UChar *srcChars,

  3931                int32_t srcLength,

  3932                int32_t start,

  3933                int32_t _length) const

  3934 { return indexOf(srcChars, 0, srcLength, start, _length); }

  3936 inline int32_t

  3937 UnicodeString::indexOf(UChar c,

  3938                int32_t start,

  3939                int32_t _length) const

  3940 { return doIndexOf(c, start, _length); }

  3942 inline int32_t

  3943 UnicodeString::indexOf(UChar32 c,

  3944                int32_t start,

  3945                int32_t _length) const

  3946 { return doIndexOf(c, start, _length); }

  3948 inline int32_t

  3949 UnicodeString::indexOf(UChar c) const

  3950 { return doIndexOf(c, 0, length()); }

  3952 inline int32_t

  3953 UnicodeString::indexOf(UChar32 c) const

  3954 { return indexOf(c, 0, length()); }

  3956 inline int32_t

  3957 UnicodeString::indexOf(UChar c,

  3958                int32_t start) const {

  3959   pinIndex(start);

  3960   return doIndexOf(c, start, length() - start);

  3961 }

  3963 inline int32_t

  3964 UnicodeString::indexOf(UChar32 c,

  3965                int32_t start) const {

  3966   pinIndex(start);

  3967   return indexOf(c, start, length() - start);

  3968 }

  3970 inline int32_t

  3971 UnicodeString::lastIndexOf(const UChar *srcChars,

  3972                int32_t srcLength,

  3973                int32_t start,

  3974                int32_t _length) const

  3975 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }

  3977 inline int32_t

  3978 UnicodeString::lastIndexOf(const UChar *srcChars,

  3979                int32_t srcLength,

  3980                int32_t start) const {

  3981   pinIndex(start);

  3982   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);

  3983 }

  3985 inline int32_t

  3986 UnicodeString::lastIndexOf(const UnicodeString& srcText,

  3987                int32_t srcStart,

  3988                int32_t srcLength,

  3989                int32_t start,

  3990                int32_t _length) const

  3991 {

  3992   if(!srcText.isBogus()) {

  3993     srcText.pinIndices(srcStart, srcLength);

  3994     if(srcLength > 0) {

  3995       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);

  3996     }

  3997   }

  3998   return -1;

  3999 }

  4001 inline int32_t

  4002 UnicodeString::lastIndexOf(const UnicodeString& text,

  4003                int32_t start,

  4004                int32_t _length) const

  4005 { return lastIndexOf(text, 0, text.length(), start, _length); }

  4007 inline int32_t

  4008 UnicodeString::lastIndexOf(const UnicodeString& text,

  4009                int32_t start) const {

  4010   pinIndex(start);

  4011   return lastIndexOf(text, 0, text.length(), start, length() - start);

  4012 }

  4014 inline int32_t

  4015 UnicodeString::lastIndexOf(const UnicodeString& text) const

  4016 { return lastIndexOf(text, 0, text.length(), 0, length()); }

  4018 inline int32_t

  4019 UnicodeString::lastIndexOf(UChar c,

  4020                int32_t start,

  4021                int32_t _length) const

  4022 { return doLastIndexOf(c, start, _length); }

  4024 inline int32_t

  4025 UnicodeString::lastIndexOf(UChar32 c,

  4026                int32_t start,

  4027                int32_t _length) const {

  4028   return doLastIndexOf(c, start, _length);

  4029 }

  4031 inline int32_t

  4032 UnicodeString::lastIndexOf(UChar c) const

  4033 { return doLastIndexOf(c, 0, length()); }

  4035 inline int32_t

  4036 UnicodeString::lastIndexOf(UChar32 c) const {

  4037   return lastIndexOf(c, 0, length());

  4038 }

  4040 inline int32_t

  4041 UnicodeString::lastIndexOf(UChar c,

  4042                int32_t start) const {

  4043   pinIndex(start);

  4044   return doLastIndexOf(c, start, length() - start);

  4045 }

  4047 inline int32_t

  4048 UnicodeString::lastIndexOf(UChar32 c,

  4049                int32_t start) const {

  4050   pinIndex(start);

  4051   return lastIndexOf(c, start, length() - start);

  4052 }

  4054 inline UBool

  4055 UnicodeString::startsWith(const UnicodeString& text) const

  4056 { return compare(0, text.length(), text, 0, text.length()) == 0; }

  4058 inline UBool

  4059 UnicodeString::startsWith(const UnicodeString& srcText,

  4060               int32_t srcStart,

  4061               int32_t srcLength) const

  4062 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }

  4064 inline UBool

  4065 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {

  4066   if(srcLength < 0) {

  4067     srcLength = u_strlen(srcChars);

  4068   }

  4069   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;

  4070 }

  4072 inline UBool

  4073 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {

  4074   if(srcLength < 0) {

  4075     srcLength = u_strlen(srcChars);

  4076   }

  4077   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;

  4078 }

  4080 inline UBool

  4081 UnicodeString::endsWith(const UnicodeString& text) const

  4082 { return doCompare(length() - text.length(), text.length(),

  4083            text, 0, text.length()) == 0; }

  4085 inline UBool

  4086 UnicodeString::endsWith(const UnicodeString& srcText,

  4087             int32_t srcStart,

  4088             int32_t srcLength) const {

  4089   srcText.pinIndices(srcStart, srcLength);

  4090   return doCompare(length() - srcLength, srcLength,

  4091                    srcText, srcStart, srcLength) == 0;

  4092 }

  4094 inline UBool

  4095 UnicodeString::endsWith(const UChar *srcChars,

  4096             int32_t srcLength) const {

  4097   if(srcLength < 0) {

  4098     srcLength = u_strlen(srcChars);

  4099   }

  4100   return doCompare(length() - srcLength, srcLength,

  4101                    srcChars, 0, srcLength) == 0;

  4102 }

  4104 inline UBool

  4105 UnicodeString::endsWith(const UChar *srcChars,

  4106             int32_t srcStart,

  4107             int32_t srcLength) const {

  4108   if(srcLength < 0) {

  4109     srcLength = u_strlen(srcChars + srcStart);

  4110   }

  4111   return doCompare(length() - srcLength, srcLength,

  4112                    srcChars, srcStart, srcLength) == 0;

  4113 }

  4115 //========================================

  4116 // replace

  4117 //========================================

  4118 inline UnicodeString&

  4119 UnicodeString::replace(int32_t start,

  4120                int32_t _length,

  4121                const UnicodeString& srcText)

  4122 { return doReplace(start, _length, srcText, 0, srcText.length()); }

  4124 inline UnicodeString&

  4125 UnicodeString::replace(int32_t start,

  4126                int32_t _length,

  4127                const UnicodeString& srcText,

  4128                int32_t srcStart,

  4129                int32_t srcLength)

  4130 { return doReplace(start, _length, srcText, srcStart, srcLength); }

  4132 inline UnicodeString&

  4133 UnicodeString::replace(int32_t start,

  4134                int32_t _length,

  4135                const UChar *srcChars,

  4136                int32_t srcLength)

  4137 { return doReplace(start, _length, srcChars, 0, srcLength); }

  4139 inline UnicodeString&

  4140 UnicodeString::replace(int32_t start,

  4141                int32_t _length,

  4142                const UChar *srcChars,

  4143                int32_t srcStart,

  4144                int32_t srcLength)

  4145 { return doReplace(start, _length, srcChars, srcStart, srcLength); }

  4147 inline UnicodeString&

  4148 UnicodeString::replace(int32_t start,

  4149                int32_t _length,

  4150                UChar srcChar)

  4151 { return doReplace(start, _length, &srcChar, 0, 1); }

  4153 inline UnicodeString&

  4154 UnicodeString::replaceBetween(int32_t start,

  4155                   int32_t limit,

  4156                   const UnicodeString& srcText)

  4157 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }

  4159 inline UnicodeString&

  4160 UnicodeString::replaceBetween(int32_t start,

  4161                   int32_t limit,

  4162                   const UnicodeString& srcText,

  4163                   int32_t srcStart,

  4164                   int32_t srcLimit)

  4165 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }

  4167 inline UnicodeString&

  4168 UnicodeString::findAndReplace(const UnicodeString& oldText,

  4169                   const UnicodeString& newText)

  4170 { return findAndReplace(0, length(), oldText, 0, oldText.length(),

  4171             newText, 0, newText.length()); }

  4173 inline UnicodeString&

  4174 UnicodeString::findAndReplace(int32_t start,

  4175                   int32_t _length,

  4176                   const UnicodeString& oldText,

  4177                   const UnicodeString& newText)

  4178 { return findAndReplace(start, _length, oldText, 0, oldText.length(),

  4179             newText, 0, newText.length()); }

  4181 // ============================

  4182 // extract

  4183 // ============================

  4184 inline void

  4185 UnicodeString::doExtract(int32_t start,

  4186              int32_t _length,

  4187              UnicodeString& target) const

  4188 { target.replace(0, target.length(), *this, start, _length); }

  4190 inline void

  4191 UnicodeString::extract(int32_t start,

  4192                int32_t _length,

  4193                UChar *target,

  4194                int32_t targetStart) const

  4195 { doExtract(start, _length, target, targetStart); }

  4197 inline void

  4198 UnicodeString::extract(int32_t start,

  4199                int32_t _length,

  4200                UnicodeString& target) const

  4201 { doExtract(start, _length, target); }

  4203 #if !UCONFIG_NO_CONVERSION

  4205 inline int32_t

  4206 UnicodeString::extract(int32_t start,

  4207                int32_t _length,

  4208                char *dst,

  4209                const char *codepage) const

  4211 {

  4212   // This dstSize value will be checked explicitly

  4213   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);

  4214 }

  4216 #endif

  4218 inline void

  4219 UnicodeString::extractBetween(int32_t start,

  4220                   int32_t limit,

  4221                   UChar *dst,

  4222                   int32_t dstStart) const {

  4223   pinIndex(start);

  4224   pinIndex(limit);

  4225   doExtract(start, limit - start, dst, dstStart);

  4226 }

  4228 inline UnicodeString

  4229 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {

  4230     return tempSubString(start, limit - start);

  4231 }

  4233 inline UChar

  4234 UnicodeString::doCharAt(int32_t offset) const

  4235 {

  4236   if((uint32_t)offset < (uint32_t)length()) {

  4237     return getArrayStart()[offset];

  4238   } else {

  4239     return kInvalidUChar;

  4240   }

  4241 }

  4243 inline UChar

  4244 UnicodeString::charAt(int32_t offset) const

  4245 { return doCharAt(offset); }

  4247 inline UChar

  4248 UnicodeString::operator[] (int32_t offset) const

  4249 { return doCharAt(offset); }

  4251 inline UBool

  4252 UnicodeString::isEmpty() const {

  4253   return fShortLength == 0;

  4254 }

  4256 //========================================

  4257 // Write implementation methods

  4258 //========================================

  4259 inline void

  4260 UnicodeString::setLength(int32_t len) {

  4261   if(len <= 127) {

  4262     fShortLength = (int8_t)len;

  4263   } else {

  4264     fShortLength = (int8_t)-1;

  4265     fUnion.fFields.fLength = len;

  4266   }

  4267 }

  4269 inline void

  4270 UnicodeString::setToEmpty() {

  4271   fShortLength = 0;

  4272   fFlags = kShortString;

  4273 }

  4275 inline void

  4276 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {

  4277   setLength(len);

  4278   fUnion.fFields.fArray = array;

  4279   fUnion.fFields.fCapacity = capacity;

  4280 }

  4282 inline UnicodeString&

  4283 UnicodeString::operator= (UChar ch)

  4284 { return doReplace(0, length(), &ch, 0, 1); }

  4286 inline UnicodeString&

  4287 UnicodeString::operator= (UChar32 ch)

  4288 { return replace(0, length(), ch); }

  4290 inline UnicodeString&

  4291 UnicodeString::setTo(const UnicodeString& srcText,

  4292              int32_t srcStart,

  4293              int32_t srcLength)

  4294 {

  4295   unBogus();

  4296   return doReplace(0, length(), srcText, srcStart, srcLength);

  4297 }

  4299 inline UnicodeString&

  4300 UnicodeString::setTo(const UnicodeString& srcText,

  4301              int32_t srcStart)

  4302 {

  4303   unBogus();

  4304   srcText.pinIndex(srcStart);

  4305   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);

  4306 }

  4308 inline UnicodeString&

  4309 UnicodeString::setTo(const UnicodeString& srcText)

  4310 {

  4311   return copyFrom(srcText);

  4312 }

  4314 inline UnicodeString&

  4315 UnicodeString::setTo(const UChar *srcChars,

  4316              int32_t srcLength)

  4317 {

  4318   unBogus();

  4319   return doReplace(0, length(), srcChars, 0, srcLength);

  4320 }

  4322 inline UnicodeString&

  4323 UnicodeString::setTo(UChar srcChar)

  4324 {

  4325   unBogus();

  4326   return doReplace(0, length(), &srcChar, 0, 1);

  4327 }

  4329 inline UnicodeString&

  4330 UnicodeString::setTo(UChar32 srcChar)

  4331 {

  4332   unBogus();

  4333   return replace(0, length(), srcChar);

  4334 }

  4336 inline UnicodeString&

  4337 UnicodeString::append(const UnicodeString& srcText,

  4338               int32_t srcStart,

  4339               int32_t srcLength)

  4340 { return doReplace(length(), 0, srcText, srcStart, srcLength); }

  4342 inline UnicodeString&

  4343 UnicodeString::append(const UnicodeString& srcText)

  4344 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

  4346 inline UnicodeString&

  4347 UnicodeString::append(const UChar *srcChars,

  4348               int32_t srcStart,

  4349               int32_t srcLength)

  4350 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }

  4352 inline UnicodeString&

  4353 UnicodeString::append(const UChar *srcChars,

  4354               int32_t srcLength)

  4355 { return doReplace(length(), 0, srcChars, 0, srcLength); }

  4357 inline UnicodeString&

  4358 UnicodeString::append(UChar srcChar)

  4359 { return doReplace(length(), 0, &srcChar, 0, 1); }

  4361 inline UnicodeString&

  4362 UnicodeString::operator+= (UChar ch)

  4363 { return doReplace(length(), 0, &ch, 0, 1); }

  4365 inline UnicodeString&

  4366 UnicodeString::operator+= (UChar32 ch) {

  4367   return append(ch);

  4368 }

  4370 inline UnicodeString&

  4371 UnicodeString::operator+= (const UnicodeString& srcText)

  4372 { return doReplace(length(), 0, srcText, 0, srcText.length()); }

  4374 inline UnicodeString&

  4375 UnicodeString::insert(int32_t start,

  4376               const UnicodeString& srcText,

  4377               int32_t srcStart,

  4378               int32_t srcLength)

  4379 { return doReplace(start, 0, srcText, srcStart, srcLength); }

  4381 inline UnicodeString&

  4382 UnicodeString::insert(int32_t start,

  4383               const UnicodeString& srcText)

  4384 { return doReplace(start, 0, srcText, 0, srcText.length()); }

  4386 inline UnicodeString&

  4387 UnicodeString::insert(int32_t start,

  4388               const UChar *srcChars,

  4389               int32_t srcStart,

  4390               int32_t srcLength)

  4391 { return doReplace(start, 0, srcChars, srcStart, srcLength); }

  4393 inline UnicodeString&

  4394 UnicodeString::insert(int32_t start,

  4395               const UChar *srcChars,

  4396               int32_t srcLength)

  4397 { return doReplace(start, 0, srcChars, 0, srcLength); }

  4399 inline UnicodeString&

  4400 UnicodeString::insert(int32_t start,

  4401               UChar srcChar)

  4402 { return doReplace(start, 0, &srcChar, 0, 1); }

  4404 inline UnicodeString&

  4405 UnicodeString::insert(int32_t start,

  4406               UChar32 srcChar)

  4407 { return replace(start, 0, srcChar); }

  4410 inline UnicodeString&

  4411 UnicodeString::remove()

  4412 {

  4413   // remove() of a bogus string makes the string empty and non-bogus

  4414   if(isBogus()) {

  4415     setToEmpty();

  4416   } else {

  4417     fShortLength = 0;

  4418   }

  4419   return *this;

  4420 }

  4422 inline UnicodeString&

  4423 UnicodeString::remove(int32_t start,

  4424              int32_t _length)

  4425 {

  4426     if(start <= 0 && _length == INT32_MAX) {

  4427         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus

  4428         return remove();

  4429     }

  4430     return doReplace(start, _length, NULL, 0, 0);

  4431 }

  4433 inline UnicodeString&

  4434 UnicodeString::removeBetween(int32_t start,

  4435                 int32_t limit)

  4436 { return doReplace(start, limit - start, NULL, 0, 0); }

  4438 inline UnicodeString &

  4439 UnicodeString::retainBetween(int32_t start, int32_t limit) {

  4440   truncate(limit);

  4441   return doReplace(0, start, NULL, 0, 0);

  4442 }

  4444 inline UBool

  4445 UnicodeString::truncate(int32_t targetLength)

  4446 {

  4447   if(isBogus() && targetLength == 0) {

  4448     // truncate(0) of a bogus string makes the string empty and non-bogus

  4449     unBogus();

  4450     return FALSE;

  4451   } else if((uint32_t)targetLength < (uint32_t)length()) {

  4452     setLength(targetLength);

  4453     return TRUE;

  4454   } else {

  4455     return FALSE;

  4456   }

  4457 }

  4459 inline UnicodeString&

  4460 UnicodeString::reverse()

  4461 { return doReverse(0, length()); }

  4463 inline UnicodeString&

  4464 UnicodeString::reverse(int32_t start,

  4465                int32_t _length)

  4466 { return doReverse(start, _length); }

  4468 U_NAMESPACE_END

  4470 #endif

The Tor Browser / file revision

intl/icu/source/common/unicode/unistr.h@129ffea94266

intl/icu/source/common/unicode/unistr.h