1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/unistr.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,4470 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1998-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* File unistr.h 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 09/25/98 stephen Creation. 1.16 +* 11/11/98 stephen Changed per 11/9 code review. 1.17 +* 04/20/99 stephen Overhauled per 4/16 code review. 1.18 +* 11/18/99 aliu Made to inherit from Replaceable. Added method 1.19 +* handleReplaceBetween(); other methods unchanged. 1.20 +* 06/25/01 grhoten Remove dependency on iostream. 1.21 +****************************************************************************** 1.22 +*/ 1.23 + 1.24 +#ifndef UNISTR_H 1.25 +#define UNISTR_H 1.26 + 1.27 +/** 1.28 + * \file 1.29 + * \brief C++ API: Unicode String 1.30 + */ 1.31 + 1.32 +#include "unicode/utypes.h" 1.33 +#include "unicode/rep.h" 1.34 +#include "unicode/std_string.h" 1.35 +#include "unicode/stringpiece.h" 1.36 +#include "unicode/bytestream.h" 1.37 +#include "unicode/ucasemap.h" 1.38 + 1.39 +struct UConverter; // unicode/ucnv.h 1.40 +class StringThreadTest; 1.41 + 1.42 +#ifndef U_COMPARE_CODE_POINT_ORDER 1.43 +/* see also ustring.h and unorm.h */ 1.44 +/** 1.45 + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 1.46 + * Compare strings in code point order instead of code unit order. 1.47 + * @stable ICU 2.2 1.48 + */ 1.49 +#define U_COMPARE_CODE_POINT_ORDER 0x8000 1.50 +#endif 1.51 + 1.52 +#ifndef USTRING_H 1.53 +/** 1.54 + * \ingroup ustring_ustrlen 1.55 + */ 1.56 +U_STABLE int32_t U_EXPORT2 1.57 +u_strlen(const UChar *s); 1.58 +#endif 1.59 + 1.60 +/** 1.61 + * \def U_STRING_CASE_MAPPER_DEFINED 1.62 + * @internal 1.63 + */ 1.64 +#ifndef U_STRING_CASE_MAPPER_DEFINED 1.65 +#define U_STRING_CASE_MAPPER_DEFINED 1.66 + 1.67 +/** 1.68 + * Internal string case mapping function type. 1.69 + * @internal 1.70 + */ 1.71 +typedef int32_t U_CALLCONV 1.72 +UStringCaseMapper(const UCaseMap *csm, 1.73 + UChar *dest, int32_t destCapacity, 1.74 + const UChar *src, int32_t srcLength, 1.75 + UErrorCode *pErrorCode); 1.76 + 1.77 +#endif 1.78 + 1.79 +U_NAMESPACE_BEGIN 1.80 + 1.81 +class BreakIterator; // unicode/brkiter.h 1.82 +class Locale; // unicode/locid.h 1.83 +class StringCharacterIterator; 1.84 +class UnicodeStringAppendable; // unicode/appendable.h 1.85 + 1.86 +/* The <iostream> include has been moved to unicode/ustream.h */ 1.87 + 1.88 +/** 1.89 + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 1.90 + * which constructs a Unicode string from an invariant-character char * string. 1.91 + * About invariant characters see utypes.h. 1.92 + * This constructor has no runtime dependency on conversion code and is 1.93 + * therefore recommended over ones taking a charset name string 1.94 + * (where the empty string "" indicates invariant-character conversion). 1.95 + * 1.96 + * @stable ICU 3.2 1.97 + */ 1.98 +#define US_INV icu::UnicodeString::kInvariant 1.99 + 1.100 +/** 1.101 + * Unicode String literals in C++. 1.102 + * Dependent on the platform properties, different UnicodeString 1.103 + * constructors should be used to create a UnicodeString object from 1.104 + * a string literal. 1.105 + * The macros are defined for maximum performance. 1.106 + * They work only for strings that contain "invariant characters", i.e., 1.107 + * only latin letters, digits, and some punctuation. 1.108 + * See utypes.h for details. 1.109 + * 1.110 + * The string parameter must be a C string literal. 1.111 + * The length of the string, not including the terminating 1.112 + * <code>NUL</code>, must be specified as a constant. 1.113 + * The U_STRING_DECL macro should be invoked exactly once for one 1.114 + * such string variable before it is used. 1.115 + * @stable ICU 2.0 1.116 + */ 1.117 +#if defined(U_DECLARE_UTF16) 1.118 +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 1.119 +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 1.120 +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 1.121 +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 1.122 +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 1.123 +#else 1.124 +# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 1.125 +#endif 1.126 + 1.127 +/** 1.128 + * Unicode String literals in C++. 1.129 + * Dependent on the platform properties, different UnicodeString 1.130 + * constructors should be used to create a UnicodeString object from 1.131 + * a string literal. 1.132 + * The macros are defined for improved performance. 1.133 + * They work only for strings that contain "invariant characters", i.e., 1.134 + * only latin letters, digits, and some punctuation. 1.135 + * See utypes.h for details. 1.136 + * 1.137 + * The string parameter must be a C string literal. 1.138 + * @stable ICU 2.0 1.139 + */ 1.140 +#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 1.141 + 1.142 +/** 1.143 + * \def UNISTR_FROM_CHAR_EXPLICIT 1.144 + * This can be defined to be empty or "explicit". 1.145 + * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) 1.146 + * constructors are marked as explicit, preventing their inadvertent use. 1.147 + * @stable ICU 49 1.148 + */ 1.149 +#ifndef UNISTR_FROM_CHAR_EXPLICIT 1.150 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 1.151 + // Auto-"explicit" in ICU library code. 1.152 +# define UNISTR_FROM_CHAR_EXPLICIT explicit 1.153 +# else 1.154 + // Empty by default for source code compatibility. 1.155 +# define UNISTR_FROM_CHAR_EXPLICIT 1.156 +# endif 1.157 +#endif 1.158 + 1.159 +/** 1.160 + * \def UNISTR_FROM_STRING_EXPLICIT 1.161 + * This can be defined to be empty or "explicit". 1.162 + * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) 1.163 + * constructors are marked as explicit, preventing their inadvertent use. 1.164 + * 1.165 + * In particular, this helps prevent accidentally depending on ICU conversion code 1.166 + * by passing a string literal into an API with a const UnicodeString & parameter. 1.167 + * @stable ICU 49 1.168 + */ 1.169 +#ifndef UNISTR_FROM_STRING_EXPLICIT 1.170 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 1.171 + // Auto-"explicit" in ICU library code. 1.172 +# define UNISTR_FROM_STRING_EXPLICIT explicit 1.173 +# else 1.174 + // Empty by default for source code compatibility. 1.175 +# define UNISTR_FROM_STRING_EXPLICIT 1.176 +# endif 1.177 +#endif 1.178 + 1.179 +/** 1.180 + * UnicodeString is a string class that stores Unicode characters directly and provides 1.181 + * similar functionality as the Java String and StringBuffer classes. 1.182 + * It is a concrete implementation of the abstract class Replaceable (for transliteration). 1.183 + * 1.184 + * The UnicodeString class is not suitable for subclassing. 1.185 + * 1.186 + * <p>For an overview of Unicode strings in C and C++ see the 1.187 + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 1.188 + * 1.189 + * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 1.190 + * A Unicode character may be stored with either one code unit 1.191 + * (the most common case) or with a matched pair of special code units 1.192 + * ("surrogates"). The data type for code units is UChar. 1.193 + * For single-character handling, a Unicode character code <em>point</em> is a value 1.194 + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 1.195 + * 1.196 + * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 1.197 + * This is the same as with multi-byte char* strings in traditional string handling. 1.198 + * Operations on partial strings typically do not test for code point boundaries. 1.199 + * If necessary, the user needs to take care of such boundaries by testing for the code unit 1.200 + * values or by using functions like 1.201 + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 1.202 + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 1.203 + * 1.204 + * UnicodeString methods are more lenient with regard to input parameter values 1.205 + * than other ICU APIs. In particular: 1.206 + * - If indexes are out of bounds for a UnicodeString object 1.207 + * (<0 or >length()) then they are "pinned" to the nearest boundary. 1.208 + * - If primitive string pointer values (e.g., const UChar * or char *) 1.209 + * for input strings are NULL, then those input string parameters are treated 1.210 + * as if they pointed to an empty string. 1.211 + * However, this is <em>not</em> the case for char * parameters for charset names 1.212 + * or other IDs. 1.213 + * - Most UnicodeString methods do not take a UErrorCode parameter because 1.214 + * there are usually very few opportunities for failure other than a shortage 1.215 + * of memory, error codes in low-level C++ string methods would be inconvenient, 1.216 + * and the error code as the last parameter (ICU convention) would prevent 1.217 + * the use of default parameter values. 1.218 + * Instead, such methods set the UnicodeString into a "bogus" state 1.219 + * (see isBogus()) if an error occurs. 1.220 + * 1.221 + * In string comparisons, two UnicodeString objects that are both "bogus" 1.222 + * compare equal (to be transitive and prevent endless loops in sorting), 1.223 + * and a "bogus" string compares less than any non-"bogus" one. 1.224 + * 1.225 + * Const UnicodeString methods are thread-safe. Multiple threads can use 1.226 + * const methods on the same UnicodeString object simultaneously, 1.227 + * but non-const methods must not be called concurrently (in multiple threads) 1.228 + * with any other (const or non-const) methods. 1.229 + * 1.230 + * Similarly, const UnicodeString & parameters are thread-safe. 1.231 + * One object may be passed in as such a parameter concurrently in multiple threads. 1.232 + * This includes the const UnicodeString & parameters for 1.233 + * copy construction, assignment, and cloning. 1.234 + * 1.235 + * <p>UnicodeString uses several storage methods. 1.236 + * String contents can be stored inside the UnicodeString object itself, 1.237 + * in an allocated and shared buffer, or in an outside buffer that is "aliased". 1.238 + * Most of this is done transparently, but careful aliasing in particular provides 1.239 + * significant performance improvements. 1.240 + * Also, the internal buffer is accessible via special functions. 1.241 + * For details see the 1.242 + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 1.243 + * 1.244 + * @see utf.h 1.245 + * @see CharacterIterator 1.246 + * @stable ICU 2.0 1.247 + */ 1.248 +class U_COMMON_API UnicodeString : public Replaceable 1.249 +{ 1.250 +public: 1.251 + 1.252 + /** 1.253 + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 1.254 + * which constructs a Unicode string from an invariant-character char * string. 1.255 + * Use the macro US_INV instead of the full qualification for this value. 1.256 + * 1.257 + * @see US_INV 1.258 + * @stable ICU 3.2 1.259 + */ 1.260 + enum EInvariant { 1.261 + /** 1.262 + * @see EInvariant 1.263 + * @stable ICU 3.2 1.264 + */ 1.265 + kInvariant 1.266 + }; 1.267 + 1.268 + //======================================== 1.269 + // Read-only operations 1.270 + //======================================== 1.271 + 1.272 + /* Comparison - bitwise only - for international comparison use collation */ 1.273 + 1.274 + /** 1.275 + * Equality operator. Performs only bitwise comparison. 1.276 + * @param text The UnicodeString to compare to this one. 1.277 + * @return TRUE if <TT>text</TT> contains the same characters as this one, 1.278 + * FALSE otherwise. 1.279 + * @stable ICU 2.0 1.280 + */ 1.281 + inline UBool operator== (const UnicodeString& text) const; 1.282 + 1.283 + /** 1.284 + * Inequality operator. Performs only bitwise comparison. 1.285 + * @param text The UnicodeString to compare to this one. 1.286 + * @return FALSE if <TT>text</TT> contains the same characters as this one, 1.287 + * TRUE otherwise. 1.288 + * @stable ICU 2.0 1.289 + */ 1.290 + inline UBool operator!= (const UnicodeString& text) const; 1.291 + 1.292 + /** 1.293 + * Greater than operator. Performs only bitwise comparison. 1.294 + * @param text The UnicodeString to compare to this one. 1.295 + * @return TRUE if the characters in this are bitwise 1.296 + * greater than the characters in <code>text</code>, FALSE otherwise 1.297 + * @stable ICU 2.0 1.298 + */ 1.299 + inline UBool operator> (const UnicodeString& text) const; 1.300 + 1.301 + /** 1.302 + * Less than operator. Performs only bitwise comparison. 1.303 + * @param text The UnicodeString to compare to this one. 1.304 + * @return TRUE if the characters in this are bitwise 1.305 + * less than the characters in <code>text</code>, FALSE otherwise 1.306 + * @stable ICU 2.0 1.307 + */ 1.308 + inline UBool operator< (const UnicodeString& text) const; 1.309 + 1.310 + /** 1.311 + * Greater than or equal operator. Performs only bitwise comparison. 1.312 + * @param text The UnicodeString to compare to this one. 1.313 + * @return TRUE if the characters in this are bitwise 1.314 + * greater than or equal to the characters in <code>text</code>, FALSE otherwise 1.315 + * @stable ICU 2.0 1.316 + */ 1.317 + inline UBool operator>= (const UnicodeString& text) const; 1.318 + 1.319 + /** 1.320 + * Less than or equal operator. Performs only bitwise comparison. 1.321 + * @param text The UnicodeString to compare to this one. 1.322 + * @return TRUE if the characters in this are bitwise 1.323 + * less than or equal to the characters in <code>text</code>, FALSE otherwise 1.324 + * @stable ICU 2.0 1.325 + */ 1.326 + inline UBool operator<= (const UnicodeString& text) const; 1.327 + 1.328 + /** 1.329 + * Compare the characters bitwise in this UnicodeString to 1.330 + * the characters in <code>text</code>. 1.331 + * @param text The UnicodeString to compare to this one. 1.332 + * @return The result of bitwise character comparison: 0 if this 1.333 + * contains the same characters as <code>text</code>, -1 if the characters in 1.334 + * this are bitwise less than the characters in <code>text</code>, +1 if the 1.335 + * characters in this are bitwise greater than the characters 1.336 + * in <code>text</code>. 1.337 + * @stable ICU 2.0 1.338 + */ 1.339 + inline int8_t compare(const UnicodeString& text) const; 1.340 + 1.341 + /** 1.342 + * Compare the characters bitwise in the range 1.343 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters 1.344 + * in the <b>entire string</b> <TT>text</TT>. 1.345 + * (The parameters "start" and "length" are not applied to the other text "text".) 1.346 + * @param start the offset at which the compare operation begins 1.347 + * @param length the number of characters of text to compare. 1.348 + * @param text the other text to be compared against this string. 1.349 + * @return The result of bitwise character comparison: 0 if this 1.350 + * contains the same characters as <code>text</code>, -1 if the characters in 1.351 + * this are bitwise less than the characters in <code>text</code>, +1 if the 1.352 + * characters in this are bitwise greater than the characters 1.353 + * in <code>text</code>. 1.354 + * @stable ICU 2.0 1.355 + */ 1.356 + inline int8_t compare(int32_t start, 1.357 + int32_t length, 1.358 + const UnicodeString& text) const; 1.359 + 1.360 + /** 1.361 + * Compare the characters bitwise in the range 1.362 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters 1.363 + * in <TT>srcText</TT> in the range 1.364 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.365 + * @param start the offset at which the compare operation begins 1.366 + * @param length the number of characters in this to compare. 1.367 + * @param srcText the text to be compared 1.368 + * @param srcStart the offset into <TT>srcText</TT> to start comparison 1.369 + * @param srcLength the number of characters in <TT>src</TT> to compare 1.370 + * @return The result of bitwise character comparison: 0 if this 1.371 + * contains the same characters as <code>srcText</code>, -1 if the characters in 1.372 + * this are bitwise less than the characters in <code>srcText</code>, +1 if the 1.373 + * characters in this are bitwise greater than the characters 1.374 + * in <code>srcText</code>. 1.375 + * @stable ICU 2.0 1.376 + */ 1.377 + inline int8_t compare(int32_t start, 1.378 + int32_t length, 1.379 + const UnicodeString& srcText, 1.380 + int32_t srcStart, 1.381 + int32_t srcLength) const; 1.382 + 1.383 + /** 1.384 + * Compare the characters bitwise in this UnicodeString with the first 1.385 + * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 1.386 + * @param srcChars The characters to compare to this UnicodeString. 1.387 + * @param srcLength the number of characters in <TT>srcChars</TT> to compare 1.388 + * @return The result of bitwise character comparison: 0 if this 1.389 + * contains the same characters as <code>srcChars</code>, -1 if the characters in 1.390 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 1.391 + * characters in this are bitwise greater than the characters 1.392 + * in <code>srcChars</code>. 1.393 + * @stable ICU 2.0 1.394 + */ 1.395 + inline int8_t compare(const UChar *srcChars, 1.396 + int32_t srcLength) const; 1.397 + 1.398 + /** 1.399 + * Compare the characters bitwise in the range 1.400 + * [<TT>start</TT>, <TT>start + length</TT>) with the first 1.401 + * <TT>length</TT> characters in <TT>srcChars</TT> 1.402 + * @param start the offset at which the compare operation begins 1.403 + * @param length the number of characters to compare. 1.404 + * @param srcChars the characters to be compared 1.405 + * @return The result of bitwise character comparison: 0 if this 1.406 + * contains the same characters as <code>srcChars</code>, -1 if the characters in 1.407 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 1.408 + * characters in this are bitwise greater than the characters 1.409 + * in <code>srcChars</code>. 1.410 + * @stable ICU 2.0 1.411 + */ 1.412 + inline int8_t compare(int32_t start, 1.413 + int32_t length, 1.414 + const UChar *srcChars) const; 1.415 + 1.416 + /** 1.417 + * Compare the characters bitwise in the range 1.418 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters 1.419 + * in <TT>srcChars</TT> in the range 1.420 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.421 + * @param start the offset at which the compare operation begins 1.422 + * @param length the number of characters in this to compare 1.423 + * @param srcChars the characters to be compared 1.424 + * @param srcStart the offset into <TT>srcChars</TT> to start comparison 1.425 + * @param srcLength the number of characters in <TT>srcChars</TT> to compare 1.426 + * @return The result of bitwise character comparison: 0 if this 1.427 + * contains the same characters as <code>srcChars</code>, -1 if the characters in 1.428 + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 1.429 + * characters in this are bitwise greater than the characters 1.430 + * in <code>srcChars</code>. 1.431 + * @stable ICU 2.0 1.432 + */ 1.433 + inline int8_t compare(int32_t start, 1.434 + int32_t length, 1.435 + const UChar *srcChars, 1.436 + int32_t srcStart, 1.437 + int32_t srcLength) const; 1.438 + 1.439 + /** 1.440 + * Compare the characters bitwise in the range 1.441 + * [<TT>start</TT>, <TT>limit</TT>) with the characters 1.442 + * in <TT>srcText</TT> in the range 1.443 + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 1.444 + * @param start the offset at which the compare operation begins 1.445 + * @param limit the offset immediately following the compare operation 1.446 + * @param srcText the text to be compared 1.447 + * @param srcStart the offset into <TT>srcText</TT> to start comparison 1.448 + * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 1.449 + * @return The result of bitwise character comparison: 0 if this 1.450 + * contains the same characters as <code>srcText</code>, -1 if the characters in 1.451 + * this are bitwise less than the characters in <code>srcText</code>, +1 if the 1.452 + * characters in this are bitwise greater than the characters 1.453 + * in <code>srcText</code>. 1.454 + * @stable ICU 2.0 1.455 + */ 1.456 + inline int8_t compareBetween(int32_t start, 1.457 + int32_t limit, 1.458 + const UnicodeString& srcText, 1.459 + int32_t srcStart, 1.460 + int32_t srcLimit) const; 1.461 + 1.462 + /** 1.463 + * Compare two Unicode strings in code point order. 1.464 + * The result may be different from the results of compare(), operator<, etc. 1.465 + * if supplementary characters are present: 1.466 + * 1.467 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.468 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.469 + * which means that they compare as less than some other BMP characters like U+feff. 1.470 + * This function compares Unicode strings in code point order. 1.471 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.472 + * 1.473 + * @param text Another string to compare this one to. 1.474 + * @return a negative/zero/positive integer corresponding to whether 1.475 + * this string is less than/equal to/greater than the second one 1.476 + * in code point order 1.477 + * @stable ICU 2.0 1.478 + */ 1.479 + inline int8_t compareCodePointOrder(const UnicodeString& text) const; 1.480 + 1.481 + /** 1.482 + * Compare two Unicode strings in code point order. 1.483 + * The result may be different from the results of compare(), operator<, etc. 1.484 + * if supplementary characters are present: 1.485 + * 1.486 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.487 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.488 + * which means that they compare as less than some other BMP characters like U+feff. 1.489 + * This function compares Unicode strings in code point order. 1.490 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.491 + * 1.492 + * @param start The start offset in this string at which the compare operation begins. 1.493 + * @param length The number of code units from this string to compare. 1.494 + * @param srcText Another string to compare this one to. 1.495 + * @return a negative/zero/positive integer corresponding to whether 1.496 + * this string is less than/equal to/greater than the second one 1.497 + * in code point order 1.498 + * @stable ICU 2.0 1.499 + */ 1.500 + inline int8_t compareCodePointOrder(int32_t start, 1.501 + int32_t length, 1.502 + const UnicodeString& srcText) const; 1.503 + 1.504 + /** 1.505 + * Compare two Unicode strings in code point order. 1.506 + * The result may be different from the results of compare(), operator<, etc. 1.507 + * if supplementary characters are present: 1.508 + * 1.509 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.510 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.511 + * which means that they compare as less than some other BMP characters like U+feff. 1.512 + * This function compares Unicode strings in code point order. 1.513 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.514 + * 1.515 + * @param start The start offset in this string at which the compare operation begins. 1.516 + * @param length The number of code units from this string to compare. 1.517 + * @param srcText Another string to compare this one to. 1.518 + * @param srcStart The start offset in that string at which the compare operation begins. 1.519 + * @param srcLength The number of code units from that string to compare. 1.520 + * @return a negative/zero/positive integer corresponding to whether 1.521 + * this string is less than/equal to/greater than the second one 1.522 + * in code point order 1.523 + * @stable ICU 2.0 1.524 + */ 1.525 + inline int8_t compareCodePointOrder(int32_t start, 1.526 + int32_t length, 1.527 + const UnicodeString& srcText, 1.528 + int32_t srcStart, 1.529 + int32_t srcLength) const; 1.530 + 1.531 + /** 1.532 + * Compare two Unicode strings in code point order. 1.533 + * The result may be different from the results of compare(), operator<, etc. 1.534 + * if supplementary characters are present: 1.535 + * 1.536 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.537 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.538 + * which means that they compare as less than some other BMP characters like U+feff. 1.539 + * This function compares Unicode strings in code point order. 1.540 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.541 + * 1.542 + * @param srcChars A pointer to another string to compare this one to. 1.543 + * @param srcLength The number of code units from that string to compare. 1.544 + * @return a negative/zero/positive integer corresponding to whether 1.545 + * this string is less than/equal to/greater than the second one 1.546 + * in code point order 1.547 + * @stable ICU 2.0 1.548 + */ 1.549 + inline int8_t compareCodePointOrder(const UChar *srcChars, 1.550 + int32_t srcLength) const; 1.551 + 1.552 + /** 1.553 + * Compare two Unicode strings in code point order. 1.554 + * The result may be different from the results of compare(), operator<, etc. 1.555 + * if supplementary characters are present: 1.556 + * 1.557 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.558 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.559 + * which means that they compare as less than some other BMP characters like U+feff. 1.560 + * This function compares Unicode strings in code point order. 1.561 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.562 + * 1.563 + * @param start The start offset in this string at which the compare operation begins. 1.564 + * @param length The number of code units from this string to compare. 1.565 + * @param srcChars A pointer to another string to compare this one to. 1.566 + * @return a negative/zero/positive integer corresponding to whether 1.567 + * this string is less than/equal to/greater than the second one 1.568 + * in code point order 1.569 + * @stable ICU 2.0 1.570 + */ 1.571 + inline int8_t compareCodePointOrder(int32_t start, 1.572 + int32_t length, 1.573 + const UChar *srcChars) const; 1.574 + 1.575 + /** 1.576 + * Compare two Unicode strings in code point order. 1.577 + * The result may be different from the results of compare(), operator<, etc. 1.578 + * if supplementary characters are present: 1.579 + * 1.580 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.581 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.582 + * which means that they compare as less than some other BMP characters like U+feff. 1.583 + * This function compares Unicode strings in code point order. 1.584 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.585 + * 1.586 + * @param start The start offset in this string at which the compare operation begins. 1.587 + * @param length The number of code units from this string to compare. 1.588 + * @param srcChars A pointer to another string to compare this one to. 1.589 + * @param srcStart The start offset in that string at which the compare operation begins. 1.590 + * @param srcLength The number of code units from that string to compare. 1.591 + * @return a negative/zero/positive integer corresponding to whether 1.592 + * this string is less than/equal to/greater than the second one 1.593 + * in code point order 1.594 + * @stable ICU 2.0 1.595 + */ 1.596 + inline int8_t compareCodePointOrder(int32_t start, 1.597 + int32_t length, 1.598 + const UChar *srcChars, 1.599 + int32_t srcStart, 1.600 + int32_t srcLength) const; 1.601 + 1.602 + /** 1.603 + * Compare two Unicode strings in code point order. 1.604 + * The result may be different from the results of compare(), operator<, etc. 1.605 + * if supplementary characters are present: 1.606 + * 1.607 + * In UTF-16, supplementary characters (with code points U+10000 and above) are 1.608 + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 1.609 + * which means that they compare as less than some other BMP characters like U+feff. 1.610 + * This function compares Unicode strings in code point order. 1.611 + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 1.612 + * 1.613 + * @param start The start offset in this string at which the compare operation begins. 1.614 + * @param limit The offset after the last code unit from this string to compare. 1.615 + * @param srcText Another string to compare this one to. 1.616 + * @param srcStart The start offset in that string at which the compare operation begins. 1.617 + * @param srcLimit The offset after the last code unit from that string to compare. 1.618 + * @return a negative/zero/positive integer corresponding to whether 1.619 + * this string is less than/equal to/greater than the second one 1.620 + * in code point order 1.621 + * @stable ICU 2.0 1.622 + */ 1.623 + inline int8_t compareCodePointOrderBetween(int32_t start, 1.624 + int32_t limit, 1.625 + const UnicodeString& srcText, 1.626 + int32_t srcStart, 1.627 + int32_t srcLimit) const; 1.628 + 1.629 + /** 1.630 + * Compare two strings case-insensitively using full case folding. 1.631 + * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 1.632 + * 1.633 + * @param text Another string to compare this one to. 1.634 + * @param options A bit set of options: 1.635 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.636 + * Comparison in code unit order with default case folding. 1.637 + * 1.638 + * - U_COMPARE_CODE_POINT_ORDER 1.639 + * Set to choose code point order instead of code unit order 1.640 + * (see u_strCompare for details). 1.641 + * 1.642 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.643 + * 1.644 + * @return A negative, zero, or positive integer indicating the comparison result. 1.645 + * @stable ICU 2.0 1.646 + */ 1.647 + inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 1.648 + 1.649 + /** 1.650 + * Compare two strings case-insensitively using full case folding. 1.651 + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 1.652 + * 1.653 + * @param start The start offset in this string at which the compare operation begins. 1.654 + * @param length The number of code units from this string to compare. 1.655 + * @param srcText Another string to compare this one to. 1.656 + * @param options A bit set of options: 1.657 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.658 + * Comparison in code unit order with default case folding. 1.659 + * 1.660 + * - U_COMPARE_CODE_POINT_ORDER 1.661 + * Set to choose code point order instead of code unit order 1.662 + * (see u_strCompare for details). 1.663 + * 1.664 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.665 + * 1.666 + * @return A negative, zero, or positive integer indicating the comparison result. 1.667 + * @stable ICU 2.0 1.668 + */ 1.669 + inline int8_t caseCompare(int32_t start, 1.670 + int32_t length, 1.671 + const UnicodeString& srcText, 1.672 + uint32_t options) const; 1.673 + 1.674 + /** 1.675 + * Compare two strings case-insensitively using full case folding. 1.676 + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 1.677 + * 1.678 + * @param start The start offset in this string at which the compare operation begins. 1.679 + * @param length The number of code units from this string to compare. 1.680 + * @param srcText Another string to compare this one to. 1.681 + * @param srcStart The start offset in that string at which the compare operation begins. 1.682 + * @param srcLength The number of code units from that string to compare. 1.683 + * @param options A bit set of options: 1.684 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.685 + * Comparison in code unit order with default case folding. 1.686 + * 1.687 + * - U_COMPARE_CODE_POINT_ORDER 1.688 + * Set to choose code point order instead of code unit order 1.689 + * (see u_strCompare for details). 1.690 + * 1.691 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.692 + * 1.693 + * @return A negative, zero, or positive integer indicating the comparison result. 1.694 + * @stable ICU 2.0 1.695 + */ 1.696 + inline int8_t caseCompare(int32_t start, 1.697 + int32_t length, 1.698 + const UnicodeString& srcText, 1.699 + int32_t srcStart, 1.700 + int32_t srcLength, 1.701 + uint32_t options) const; 1.702 + 1.703 + /** 1.704 + * Compare two strings case-insensitively using full case folding. 1.705 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 1.706 + * 1.707 + * @param srcChars A pointer to another string to compare this one to. 1.708 + * @param srcLength The number of code units from that string to compare. 1.709 + * @param options A bit set of options: 1.710 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.711 + * Comparison in code unit order with default case folding. 1.712 + * 1.713 + * - U_COMPARE_CODE_POINT_ORDER 1.714 + * Set to choose code point order instead of code unit order 1.715 + * (see u_strCompare for details). 1.716 + * 1.717 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.718 + * 1.719 + * @return A negative, zero, or positive integer indicating the comparison result. 1.720 + * @stable ICU 2.0 1.721 + */ 1.722 + inline int8_t caseCompare(const UChar *srcChars, 1.723 + int32_t srcLength, 1.724 + uint32_t options) const; 1.725 + 1.726 + /** 1.727 + * Compare two strings case-insensitively using full case folding. 1.728 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 1.729 + * 1.730 + * @param start The start offset in this string at which the compare operation begins. 1.731 + * @param length The number of code units from this string to compare. 1.732 + * @param srcChars A pointer to another string to compare this one to. 1.733 + * @param options A bit set of options: 1.734 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.735 + * Comparison in code unit order with default case folding. 1.736 + * 1.737 + * - U_COMPARE_CODE_POINT_ORDER 1.738 + * Set to choose code point order instead of code unit order 1.739 + * (see u_strCompare for details). 1.740 + * 1.741 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.742 + * 1.743 + * @return A negative, zero, or positive integer indicating the comparison result. 1.744 + * @stable ICU 2.0 1.745 + */ 1.746 + inline int8_t caseCompare(int32_t start, 1.747 + int32_t length, 1.748 + const UChar *srcChars, 1.749 + uint32_t options) const; 1.750 + 1.751 + /** 1.752 + * Compare two strings case-insensitively using full case folding. 1.753 + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 1.754 + * 1.755 + * @param start The start offset in this string at which the compare operation begins. 1.756 + * @param length The number of code units from this string to compare. 1.757 + * @param srcChars A pointer to another string to compare this one to. 1.758 + * @param srcStart The start offset in that string at which the compare operation begins. 1.759 + * @param srcLength The number of code units from that string to compare. 1.760 + * @param options A bit set of options: 1.761 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.762 + * Comparison in code unit order with default case folding. 1.763 + * 1.764 + * - U_COMPARE_CODE_POINT_ORDER 1.765 + * Set to choose code point order instead of code unit order 1.766 + * (see u_strCompare for details). 1.767 + * 1.768 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.769 + * 1.770 + * @return A negative, zero, or positive integer indicating the comparison result. 1.771 + * @stable ICU 2.0 1.772 + */ 1.773 + inline int8_t caseCompare(int32_t start, 1.774 + int32_t length, 1.775 + const UChar *srcChars, 1.776 + int32_t srcStart, 1.777 + int32_t srcLength, 1.778 + uint32_t options) const; 1.779 + 1.780 + /** 1.781 + * Compare two strings case-insensitively using full case folding. 1.782 + * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 1.783 + * 1.784 + * @param start The start offset in this string at which the compare operation begins. 1.785 + * @param limit The offset after the last code unit from this string to compare. 1.786 + * @param srcText Another string to compare this one to. 1.787 + * @param srcStart The start offset in that string at which the compare operation begins. 1.788 + * @param srcLimit The offset after the last code unit from that string to compare. 1.789 + * @param options A bit set of options: 1.790 + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 1.791 + * Comparison in code unit order with default case folding. 1.792 + * 1.793 + * - U_COMPARE_CODE_POINT_ORDER 1.794 + * Set to choose code point order instead of code unit order 1.795 + * (see u_strCompare for details). 1.796 + * 1.797 + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.798 + * 1.799 + * @return A negative, zero, or positive integer indicating the comparison result. 1.800 + * @stable ICU 2.0 1.801 + */ 1.802 + inline int8_t caseCompareBetween(int32_t start, 1.803 + int32_t limit, 1.804 + const UnicodeString& srcText, 1.805 + int32_t srcStart, 1.806 + int32_t srcLimit, 1.807 + uint32_t options) const; 1.808 + 1.809 + /** 1.810 + * Determine if this starts with the characters in <TT>text</TT> 1.811 + * @param text The text to match. 1.812 + * @return TRUE if this starts with the characters in <TT>text</TT>, 1.813 + * FALSE otherwise 1.814 + * @stable ICU 2.0 1.815 + */ 1.816 + inline UBool startsWith(const UnicodeString& text) const; 1.817 + 1.818 + /** 1.819 + * Determine if this starts with the characters in <TT>srcText</TT> 1.820 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.821 + * @param srcText The text to match. 1.822 + * @param srcStart the offset into <TT>srcText</TT> to start matching 1.823 + * @param srcLength the number of characters in <TT>srcText</TT> to match 1.824 + * @return TRUE if this starts with the characters in <TT>text</TT>, 1.825 + * FALSE otherwise 1.826 + * @stable ICU 2.0 1.827 + */ 1.828 + inline UBool startsWith(const UnicodeString& srcText, 1.829 + int32_t srcStart, 1.830 + int32_t srcLength) const; 1.831 + 1.832 + /** 1.833 + * Determine if this starts with the characters in <TT>srcChars</TT> 1.834 + * @param srcChars The characters to match. 1.835 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.836 + * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 1.837 + * FALSE otherwise 1.838 + * @stable ICU 2.0 1.839 + */ 1.840 + inline UBool startsWith(const UChar *srcChars, 1.841 + int32_t srcLength) const; 1.842 + 1.843 + /** 1.844 + * Determine if this ends with the characters in <TT>srcChars</TT> 1.845 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.846 + * @param srcChars The characters to match. 1.847 + * @param srcStart the offset into <TT>srcText</TT> to start matching 1.848 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.849 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 1.850 + * @stable ICU 2.0 1.851 + */ 1.852 + inline UBool startsWith(const UChar *srcChars, 1.853 + int32_t srcStart, 1.854 + int32_t srcLength) const; 1.855 + 1.856 + /** 1.857 + * Determine if this ends with the characters in <TT>text</TT> 1.858 + * @param text The text to match. 1.859 + * @return TRUE if this ends with the characters in <TT>text</TT>, 1.860 + * FALSE otherwise 1.861 + * @stable ICU 2.0 1.862 + */ 1.863 + inline UBool endsWith(const UnicodeString& text) const; 1.864 + 1.865 + /** 1.866 + * Determine if this ends with the characters in <TT>srcText</TT> 1.867 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.868 + * @param srcText The text to match. 1.869 + * @param srcStart the offset into <TT>srcText</TT> to start matching 1.870 + * @param srcLength the number of characters in <TT>srcText</TT> to match 1.871 + * @return TRUE if this ends with the characters in <TT>text</TT>, 1.872 + * FALSE otherwise 1.873 + * @stable ICU 2.0 1.874 + */ 1.875 + inline UBool endsWith(const UnicodeString& srcText, 1.876 + int32_t srcStart, 1.877 + int32_t srcLength) const; 1.878 + 1.879 + /** 1.880 + * Determine if this ends with the characters in <TT>srcChars</TT> 1.881 + * @param srcChars The characters to match. 1.882 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.883 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 1.884 + * FALSE otherwise 1.885 + * @stable ICU 2.0 1.886 + */ 1.887 + inline UBool endsWith(const UChar *srcChars, 1.888 + int32_t srcLength) const; 1.889 + 1.890 + /** 1.891 + * Determine if this ends with the characters in <TT>srcChars</TT> 1.892 + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.893 + * @param srcChars The characters to match. 1.894 + * @param srcStart the offset into <TT>srcText</TT> to start matching 1.895 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.896 + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 1.897 + * FALSE otherwise 1.898 + * @stable ICU 2.0 1.899 + */ 1.900 + inline UBool endsWith(const UChar *srcChars, 1.901 + int32_t srcStart, 1.902 + int32_t srcLength) const; 1.903 + 1.904 + 1.905 + /* Searching - bitwise only */ 1.906 + 1.907 + /** 1.908 + * Locate in this the first occurrence of the characters in <TT>text</TT>, 1.909 + * using bitwise comparison. 1.910 + * @param text The text to search for. 1.911 + * @return The offset into this of the start of <TT>text</TT>, 1.912 + * or -1 if not found. 1.913 + * @stable ICU 2.0 1.914 + */ 1.915 + inline int32_t indexOf(const UnicodeString& text) const; 1.916 + 1.917 + /** 1.918 + * Locate in this the first occurrence of the characters in <TT>text</TT> 1.919 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.920 + * @param text The text to search for. 1.921 + * @param start The offset at which searching will start. 1.922 + * @return The offset into this of the start of <TT>text</TT>, 1.923 + * or -1 if not found. 1.924 + * @stable ICU 2.0 1.925 + */ 1.926 + inline int32_t indexOf(const UnicodeString& text, 1.927 + int32_t start) const; 1.928 + 1.929 + /** 1.930 + * Locate in this the first occurrence in the range 1.931 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.932 + * in <TT>text</TT>, using bitwise comparison. 1.933 + * @param text The text to search for. 1.934 + * @param start The offset at which searching will start. 1.935 + * @param length The number of characters to search 1.936 + * @return The offset into this of the start of <TT>text</TT>, 1.937 + * or -1 if not found. 1.938 + * @stable ICU 2.0 1.939 + */ 1.940 + inline int32_t indexOf(const UnicodeString& text, 1.941 + int32_t start, 1.942 + int32_t length) const; 1.943 + 1.944 + /** 1.945 + * Locate in this the first occurrence in the range 1.946 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.947 + * in <TT>srcText</TT> in the range 1.948 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1.949 + * using bitwise comparison. 1.950 + * @param srcText The text to search for. 1.951 + * @param srcStart the offset into <TT>srcText</TT> at which 1.952 + * to start matching 1.953 + * @param srcLength the number of characters in <TT>srcText</TT> to match 1.954 + * @param start the offset into this at which to start matching 1.955 + * @param length the number of characters in this to search 1.956 + * @return The offset into this of the start of <TT>text</TT>, 1.957 + * or -1 if not found. 1.958 + * @stable ICU 2.0 1.959 + */ 1.960 + inline int32_t indexOf(const UnicodeString& srcText, 1.961 + int32_t srcStart, 1.962 + int32_t srcLength, 1.963 + int32_t start, 1.964 + int32_t length) const; 1.965 + 1.966 + /** 1.967 + * Locate in this the first occurrence of the characters in 1.968 + * <TT>srcChars</TT> 1.969 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.970 + * @param srcChars The text to search for. 1.971 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.972 + * @param start the offset into this at which to start matching 1.973 + * @return The offset into this of the start of <TT>text</TT>, 1.974 + * or -1 if not found. 1.975 + * @stable ICU 2.0 1.976 + */ 1.977 + inline int32_t indexOf(const UChar *srcChars, 1.978 + int32_t srcLength, 1.979 + int32_t start) const; 1.980 + 1.981 + /** 1.982 + * Locate in this the first occurrence in the range 1.983 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.984 + * in <TT>srcChars</TT>, using bitwise comparison. 1.985 + * @param srcChars The text to search for. 1.986 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.987 + * @param start The offset at which searching will start. 1.988 + * @param length The number of characters to search 1.989 + * @return The offset into this of the start of <TT>srcChars</TT>, 1.990 + * or -1 if not found. 1.991 + * @stable ICU 2.0 1.992 + */ 1.993 + inline int32_t indexOf(const UChar *srcChars, 1.994 + int32_t srcLength, 1.995 + int32_t start, 1.996 + int32_t length) const; 1.997 + 1.998 + /** 1.999 + * Locate in this the first occurrence in the range 1.1000 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.1001 + * in <TT>srcChars</TT> in the range 1.1002 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1.1003 + * using bitwise comparison. 1.1004 + * @param srcChars The text to search for. 1.1005 + * @param srcStart the offset into <TT>srcChars</TT> at which 1.1006 + * to start matching 1.1007 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.1008 + * @param start the offset into this at which to start matching 1.1009 + * @param length the number of characters in this to search 1.1010 + * @return The offset into this of the start of <TT>text</TT>, 1.1011 + * or -1 if not found. 1.1012 + * @stable ICU 2.0 1.1013 + */ 1.1014 + int32_t indexOf(const UChar *srcChars, 1.1015 + int32_t srcStart, 1.1016 + int32_t srcLength, 1.1017 + int32_t start, 1.1018 + int32_t length) const; 1.1019 + 1.1020 + /** 1.1021 + * Locate in this the first occurrence of the BMP code point <code>c</code>, 1.1022 + * using bitwise comparison. 1.1023 + * @param c The code unit to search for. 1.1024 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1025 + * @stable ICU 2.0 1.1026 + */ 1.1027 + inline int32_t indexOf(UChar c) const; 1.1028 + 1.1029 + /** 1.1030 + * Locate in this the first occurrence of the code point <TT>c</TT>, 1.1031 + * using bitwise comparison. 1.1032 + * 1.1033 + * @param c The code point to search for. 1.1034 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1035 + * @stable ICU 2.0 1.1036 + */ 1.1037 + inline int32_t indexOf(UChar32 c) const; 1.1038 + 1.1039 + /** 1.1040 + * Locate in this the first occurrence of the BMP code point <code>c</code>, 1.1041 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1042 + * @param c The code unit to search for. 1.1043 + * @param start The offset at which searching will start. 1.1044 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1045 + * @stable ICU 2.0 1.1046 + */ 1.1047 + inline int32_t indexOf(UChar c, 1.1048 + int32_t start) const; 1.1049 + 1.1050 + /** 1.1051 + * Locate in this the first occurrence of the code point <TT>c</TT> 1.1052 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1053 + * 1.1054 + * @param c The code point to search for. 1.1055 + * @param start The offset at which searching will start. 1.1056 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1057 + * @stable ICU 2.0 1.1058 + */ 1.1059 + inline int32_t indexOf(UChar32 c, 1.1060 + int32_t start) const; 1.1061 + 1.1062 + /** 1.1063 + * Locate in this the first occurrence of the BMP code point <code>c</code> 1.1064 + * in the range [<TT>start</TT>, <TT>start + length</TT>), 1.1065 + * using bitwise comparison. 1.1066 + * @param c The code unit to search for. 1.1067 + * @param start the offset into this at which to start matching 1.1068 + * @param length the number of characters in this to search 1.1069 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1070 + * @stable ICU 2.0 1.1071 + */ 1.1072 + inline int32_t indexOf(UChar c, 1.1073 + int32_t start, 1.1074 + int32_t length) const; 1.1075 + 1.1076 + /** 1.1077 + * Locate in this the first occurrence of the code point <TT>c</TT> 1.1078 + * in the range [<TT>start</TT>, <TT>start + length</TT>), 1.1079 + * using bitwise comparison. 1.1080 + * 1.1081 + * @param c The code point to search for. 1.1082 + * @param start the offset into this at which to start matching 1.1083 + * @param length the number of characters in this to search 1.1084 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1085 + * @stable ICU 2.0 1.1086 + */ 1.1087 + inline int32_t indexOf(UChar32 c, 1.1088 + int32_t start, 1.1089 + int32_t length) const; 1.1090 + 1.1091 + /** 1.1092 + * Locate in this the last occurrence of the characters in <TT>text</TT>, 1.1093 + * using bitwise comparison. 1.1094 + * @param text The text to search for. 1.1095 + * @return The offset into this of the start of <TT>text</TT>, 1.1096 + * or -1 if not found. 1.1097 + * @stable ICU 2.0 1.1098 + */ 1.1099 + inline int32_t lastIndexOf(const UnicodeString& text) const; 1.1100 + 1.1101 + /** 1.1102 + * Locate in this the last occurrence of the characters in <TT>text</TT> 1.1103 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1104 + * @param text The text to search for. 1.1105 + * @param start The offset at which searching will start. 1.1106 + * @return The offset into this of the start of <TT>text</TT>, 1.1107 + * or -1 if not found. 1.1108 + * @stable ICU 2.0 1.1109 + */ 1.1110 + inline int32_t lastIndexOf(const UnicodeString& text, 1.1111 + int32_t start) const; 1.1112 + 1.1113 + /** 1.1114 + * Locate in this the last occurrence in the range 1.1115 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.1116 + * in <TT>text</TT>, using bitwise comparison. 1.1117 + * @param text The text to search for. 1.1118 + * @param start The offset at which searching will start. 1.1119 + * @param length The number of characters to search 1.1120 + * @return The offset into this of the start of <TT>text</TT>, 1.1121 + * or -1 if not found. 1.1122 + * @stable ICU 2.0 1.1123 + */ 1.1124 + inline int32_t lastIndexOf(const UnicodeString& text, 1.1125 + int32_t start, 1.1126 + int32_t length) const; 1.1127 + 1.1128 + /** 1.1129 + * Locate in this the last occurrence in the range 1.1130 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.1131 + * in <TT>srcText</TT> in the range 1.1132 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1.1133 + * using bitwise comparison. 1.1134 + * @param srcText The text to search for. 1.1135 + * @param srcStart the offset into <TT>srcText</TT> at which 1.1136 + * to start matching 1.1137 + * @param srcLength the number of characters in <TT>srcText</TT> to match 1.1138 + * @param start the offset into this at which to start matching 1.1139 + * @param length the number of characters in this to search 1.1140 + * @return The offset into this of the start of <TT>text</TT>, 1.1141 + * or -1 if not found. 1.1142 + * @stable ICU 2.0 1.1143 + */ 1.1144 + inline int32_t lastIndexOf(const UnicodeString& srcText, 1.1145 + int32_t srcStart, 1.1146 + int32_t srcLength, 1.1147 + int32_t start, 1.1148 + int32_t length) const; 1.1149 + 1.1150 + /** 1.1151 + * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1.1152 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1153 + * @param srcChars The text to search for. 1.1154 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.1155 + * @param start the offset into this at which to start matching 1.1156 + * @return The offset into this of the start of <TT>text</TT>, 1.1157 + * or -1 if not found. 1.1158 + * @stable ICU 2.0 1.1159 + */ 1.1160 + inline int32_t lastIndexOf(const UChar *srcChars, 1.1161 + int32_t srcLength, 1.1162 + int32_t start) const; 1.1163 + 1.1164 + /** 1.1165 + * Locate in this the last occurrence in the range 1.1166 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.1167 + * in <TT>srcChars</TT>, using bitwise comparison. 1.1168 + * @param srcChars The text to search for. 1.1169 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.1170 + * @param start The offset at which searching will start. 1.1171 + * @param length The number of characters to search 1.1172 + * @return The offset into this of the start of <TT>srcChars</TT>, 1.1173 + * or -1 if not found. 1.1174 + * @stable ICU 2.0 1.1175 + */ 1.1176 + inline int32_t lastIndexOf(const UChar *srcChars, 1.1177 + int32_t srcLength, 1.1178 + int32_t start, 1.1179 + int32_t length) const; 1.1180 + 1.1181 + /** 1.1182 + * Locate in this the last occurrence in the range 1.1183 + * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1.1184 + * in <TT>srcChars</TT> in the range 1.1185 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1.1186 + * using bitwise comparison. 1.1187 + * @param srcChars The text to search for. 1.1188 + * @param srcStart the offset into <TT>srcChars</TT> at which 1.1189 + * to start matching 1.1190 + * @param srcLength the number of characters in <TT>srcChars</TT> to match 1.1191 + * @param start the offset into this at which to start matching 1.1192 + * @param length the number of characters in this to search 1.1193 + * @return The offset into this of the start of <TT>text</TT>, 1.1194 + * or -1 if not found. 1.1195 + * @stable ICU 2.0 1.1196 + */ 1.1197 + int32_t lastIndexOf(const UChar *srcChars, 1.1198 + int32_t srcStart, 1.1199 + int32_t srcLength, 1.1200 + int32_t start, 1.1201 + int32_t length) const; 1.1202 + 1.1203 + /** 1.1204 + * Locate in this the last occurrence of the BMP code point <code>c</code>, 1.1205 + * using bitwise comparison. 1.1206 + * @param c The code unit to search for. 1.1207 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1208 + * @stable ICU 2.0 1.1209 + */ 1.1210 + inline int32_t lastIndexOf(UChar c) const; 1.1211 + 1.1212 + /** 1.1213 + * Locate in this the last occurrence of the code point <TT>c</TT>, 1.1214 + * using bitwise comparison. 1.1215 + * 1.1216 + * @param c The code point to search for. 1.1217 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1218 + * @stable ICU 2.0 1.1219 + */ 1.1220 + inline int32_t lastIndexOf(UChar32 c) const; 1.1221 + 1.1222 + /** 1.1223 + * Locate in this the last occurrence of the BMP code point <code>c</code> 1.1224 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1225 + * @param c The code unit to search for. 1.1226 + * @param start The offset at which searching will start. 1.1227 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1228 + * @stable ICU 2.0 1.1229 + */ 1.1230 + inline int32_t lastIndexOf(UChar c, 1.1231 + int32_t start) const; 1.1232 + 1.1233 + /** 1.1234 + * Locate in this the last occurrence of the code point <TT>c</TT> 1.1235 + * starting at offset <TT>start</TT>, using bitwise comparison. 1.1236 + * 1.1237 + * @param c The code point to search for. 1.1238 + * @param start The offset at which searching will start. 1.1239 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1240 + * @stable ICU 2.0 1.1241 + */ 1.1242 + inline int32_t lastIndexOf(UChar32 c, 1.1243 + int32_t start) const; 1.1244 + 1.1245 + /** 1.1246 + * Locate in this the last occurrence of the BMP code point <code>c</code> 1.1247 + * in the range [<TT>start</TT>, <TT>start + length</TT>), 1.1248 + * using bitwise comparison. 1.1249 + * @param c The code unit to search for. 1.1250 + * @param start the offset into this at which to start matching 1.1251 + * @param length the number of characters in this to search 1.1252 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1253 + * @stable ICU 2.0 1.1254 + */ 1.1255 + inline int32_t lastIndexOf(UChar c, 1.1256 + int32_t start, 1.1257 + int32_t length) const; 1.1258 + 1.1259 + /** 1.1260 + * Locate in this the last occurrence of the code point <TT>c</TT> 1.1261 + * in the range [<TT>start</TT>, <TT>start + length</TT>), 1.1262 + * using bitwise comparison. 1.1263 + * 1.1264 + * @param c The code point to search for. 1.1265 + * @param start the offset into this at which to start matching 1.1266 + * @param length the number of characters in this to search 1.1267 + * @return The offset into this of <TT>c</TT>, or -1 if not found. 1.1268 + * @stable ICU 2.0 1.1269 + */ 1.1270 + inline int32_t lastIndexOf(UChar32 c, 1.1271 + int32_t start, 1.1272 + int32_t length) const; 1.1273 + 1.1274 + 1.1275 + /* Character access */ 1.1276 + 1.1277 + /** 1.1278 + * Return the code unit at offset <tt>offset</tt>. 1.1279 + * If the offset is not valid (0..length()-1) then U+ffff is returned. 1.1280 + * @param offset a valid offset into the text 1.1281 + * @return the code unit at offset <tt>offset</tt> 1.1282 + * or 0xffff if the offset is not valid for this string 1.1283 + * @stable ICU 2.0 1.1284 + */ 1.1285 + inline UChar charAt(int32_t offset) const; 1.1286 + 1.1287 + /** 1.1288 + * Return the code unit at offset <tt>offset</tt>. 1.1289 + * If the offset is not valid (0..length()-1) then U+ffff is returned. 1.1290 + * @param offset a valid offset into the text 1.1291 + * @return the code unit at offset <tt>offset</tt> 1.1292 + * @stable ICU 2.0 1.1293 + */ 1.1294 + inline UChar operator[] (int32_t offset) const; 1.1295 + 1.1296 + /** 1.1297 + * Return the code point that contains the code unit 1.1298 + * at offset <tt>offset</tt>. 1.1299 + * If the offset is not valid (0..length()-1) then U+ffff is returned. 1.1300 + * @param offset a valid offset into the text 1.1301 + * that indicates the text offset of any of the code units 1.1302 + * that will be assembled into a code point (21-bit value) and returned 1.1303 + * @return the code point of text at <tt>offset</tt> 1.1304 + * or 0xffff if the offset is not valid for this string 1.1305 + * @stable ICU 2.0 1.1306 + */ 1.1307 + UChar32 char32At(int32_t offset) const; 1.1308 + 1.1309 + /** 1.1310 + * Adjust a random-access offset so that 1.1311 + * it points to the beginning of a Unicode character. 1.1312 + * The offset that is passed in points to 1.1313 + * any code unit of a code point, 1.1314 + * while the returned offset will point to the first code unit 1.1315 + * of the same code point. 1.1316 + * In UTF-16, if the input offset points to a second surrogate 1.1317 + * of a surrogate pair, then the returned offset will point 1.1318 + * to the first surrogate. 1.1319 + * @param offset a valid offset into one code point of the text 1.1320 + * @return offset of the first code unit of the same code point 1.1321 + * @see U16_SET_CP_START 1.1322 + * @stable ICU 2.0 1.1323 + */ 1.1324 + int32_t getChar32Start(int32_t offset) const; 1.1325 + 1.1326 + /** 1.1327 + * Adjust a random-access offset so that 1.1328 + * it points behind a Unicode character. 1.1329 + * The offset that is passed in points behind 1.1330 + * any code unit of a code point, 1.1331 + * while the returned offset will point behind the last code unit 1.1332 + * of the same code point. 1.1333 + * In UTF-16, if the input offset points behind the first surrogate 1.1334 + * (i.e., to the second surrogate) 1.1335 + * of a surrogate pair, then the returned offset will point 1.1336 + * behind the second surrogate (i.e., to the first surrogate). 1.1337 + * @param offset a valid offset after any code unit of a code point of the text 1.1338 + * @return offset of the first code unit after the same code point 1.1339 + * @see U16_SET_CP_LIMIT 1.1340 + * @stable ICU 2.0 1.1341 + */ 1.1342 + int32_t getChar32Limit(int32_t offset) const; 1.1343 + 1.1344 + /** 1.1345 + * Move the code unit index along the string by delta code points. 1.1346 + * Interpret the input index as a code unit-based offset into the string, 1.1347 + * move the index forward or backward by delta code points, and 1.1348 + * return the resulting index. 1.1349 + * The input index should point to the first code unit of a code point, 1.1350 + * if there is more than one. 1.1351 + * 1.1352 + * Both input and output indexes are code unit-based as for all 1.1353 + * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1.1354 + * If delta<0 then the index is moved backward (toward the start of the string). 1.1355 + * If delta>0 then the index is moved forward (toward the end of the string). 1.1356 + * 1.1357 + * This behaves like CharacterIterator::move32(delta, kCurrent). 1.1358 + * 1.1359 + * Behavior for out-of-bounds indexes: 1.1360 + * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1.1361 + * if the input index<0 then it is pinned to 0; 1.1362 + * if it is index>length() then it is pinned to length(). 1.1363 + * Afterwards, the index is moved by <code>delta</code> code points 1.1364 + * forward or backward, 1.1365 + * but no further backward than to 0 and no further forward than to length(). 1.1366 + * The resulting index return value will be in between 0 and length(), inclusively. 1.1367 + * 1.1368 + * Examples: 1.1369 + * <pre> 1.1370 + * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1.1371 + * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1.1372 + * 1.1373 + * // initial index: position of U+10000 1.1374 + * int32_t index=1; 1.1375 + * 1.1376 + * // the following examples will all result in index==4, position of U+10ffff 1.1377 + * 1.1378 + * // skip 2 code points from some position in the string 1.1379 + * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1.1380 + * 1.1381 + * // go to the 3rd code point from the start of s (0-based) 1.1382 + * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1.1383 + * 1.1384 + * // go to the next-to-last code point of s 1.1385 + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1.1386 + * </pre> 1.1387 + * 1.1388 + * @param index input code unit index 1.1389 + * @param delta (signed) code point count to move the index forward or backward 1.1390 + * in the string 1.1391 + * @return the resulting code unit index 1.1392 + * @stable ICU 2.0 1.1393 + */ 1.1394 + int32_t moveIndex32(int32_t index, int32_t delta) const; 1.1395 + 1.1396 + /* Substring extraction */ 1.1397 + 1.1398 + /** 1.1399 + * Copy the characters in the range 1.1400 + * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1.1401 + * beginning at <tt>dstStart</tt>. 1.1402 + * If the string aliases to <code>dst</code> itself as an external buffer, 1.1403 + * then extract() will not copy the contents. 1.1404 + * 1.1405 + * @param start offset of first character which will be copied into the array 1.1406 + * @param length the number of characters to extract 1.1407 + * @param dst array in which to copy characters. The length of <tt>dst</tt> 1.1408 + * must be at least (<tt>dstStart + length</tt>). 1.1409 + * @param dstStart the offset in <TT>dst</TT> where the first character 1.1410 + * will be extracted 1.1411 + * @stable ICU 2.0 1.1412 + */ 1.1413 + inline void extract(int32_t start, 1.1414 + int32_t length, 1.1415 + UChar *dst, 1.1416 + int32_t dstStart = 0) const; 1.1417 + 1.1418 + /** 1.1419 + * Copy the contents of the string into dest. 1.1420 + * This is a convenience function that 1.1421 + * checks if there is enough space in dest, 1.1422 + * extracts the entire string if possible, 1.1423 + * and NUL-terminates dest if possible. 1.1424 + * 1.1425 + * If the string fits into dest but cannot be NUL-terminated 1.1426 + * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1.1427 + * If the string itself does not fit into dest 1.1428 + * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1.1429 + * 1.1430 + * If the string aliases to <code>dest</code> itself as an external buffer, 1.1431 + * then extract() will not copy the contents. 1.1432 + * 1.1433 + * @param dest Destination string buffer. 1.1434 + * @param destCapacity Number of UChars available at dest. 1.1435 + * @param errorCode ICU error code. 1.1436 + * @return length() 1.1437 + * @stable ICU 2.0 1.1438 + */ 1.1439 + int32_t 1.1440 + extract(UChar *dest, int32_t destCapacity, 1.1441 + UErrorCode &errorCode) const; 1.1442 + 1.1443 + /** 1.1444 + * Copy the characters in the range 1.1445 + * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1.1446 + * <tt>target</tt>. 1.1447 + * @param start offset of first character which will be copied 1.1448 + * @param length the number of characters to extract 1.1449 + * @param target UnicodeString into which to copy characters. 1.1450 + * @return A reference to <TT>target</TT> 1.1451 + * @stable ICU 2.0 1.1452 + */ 1.1453 + inline void extract(int32_t start, 1.1454 + int32_t length, 1.1455 + UnicodeString& target) const; 1.1456 + 1.1457 + /** 1.1458 + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1.1459 + * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1.1460 + * @param start offset of first character which will be copied into the array 1.1461 + * @param limit offset immediately following the last character to be copied 1.1462 + * @param dst array in which to copy characters. The length of <tt>dst</tt> 1.1463 + * must be at least (<tt>dstStart + (limit - start)</tt>). 1.1464 + * @param dstStart the offset in <TT>dst</TT> where the first character 1.1465 + * will be extracted 1.1466 + * @stable ICU 2.0 1.1467 + */ 1.1468 + inline void extractBetween(int32_t start, 1.1469 + int32_t limit, 1.1470 + UChar *dst, 1.1471 + int32_t dstStart = 0) const; 1.1472 + 1.1473 + /** 1.1474 + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1.1475 + * into the UnicodeString <tt>target</tt>. Replaceable API. 1.1476 + * @param start offset of first character which will be copied 1.1477 + * @param limit offset immediately following the last character to be copied 1.1478 + * @param target UnicodeString into which to copy characters. 1.1479 + * @return A reference to <TT>target</TT> 1.1480 + * @stable ICU 2.0 1.1481 + */ 1.1482 + virtual void extractBetween(int32_t start, 1.1483 + int32_t limit, 1.1484 + UnicodeString& target) const; 1.1485 + 1.1486 + /** 1.1487 + * Copy the characters in the range 1.1488 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. 1.1489 + * All characters must be invariant (see utypes.h). 1.1490 + * Use US_INV as the last, signature-distinguishing parameter. 1.1491 + * 1.1492 + * This function does not write any more than <code>targetLength</code> 1.1493 + * characters but returns the length of the entire output string 1.1494 + * so that one can allocate a larger buffer and call the function again 1.1495 + * if necessary. 1.1496 + * The output string is NUL-terminated if possible. 1.1497 + * 1.1498 + * @param start offset of first character which will be copied 1.1499 + * @param startLength the number of characters to extract 1.1500 + * @param target the target buffer for extraction, can be NULL 1.1501 + * if targetLength is 0 1.1502 + * @param targetCapacity the length of the target buffer 1.1503 + * @param inv Signature-distinguishing paramater, use US_INV. 1.1504 + * @return the output string length, not including the terminating NUL 1.1505 + * @stable ICU 3.2 1.1506 + */ 1.1507 + int32_t extract(int32_t start, 1.1508 + int32_t startLength, 1.1509 + char *target, 1.1510 + int32_t targetCapacity, 1.1511 + enum EInvariant inv) const; 1.1512 + 1.1513 +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1.1514 + 1.1515 + /** 1.1516 + * Copy the characters in the range 1.1517 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1.1518 + * in the platform's default codepage. 1.1519 + * This function does not write any more than <code>targetLength</code> 1.1520 + * characters but returns the length of the entire output string 1.1521 + * so that one can allocate a larger buffer and call the function again 1.1522 + * if necessary. 1.1523 + * The output string is NUL-terminated if possible. 1.1524 + * 1.1525 + * @param start offset of first character which will be copied 1.1526 + * @param startLength the number of characters to extract 1.1527 + * @param target the target buffer for extraction 1.1528 + * @param targetLength the length of the target buffer 1.1529 + * If <TT>target</TT> is NULL, then the number of bytes required for 1.1530 + * <TT>target</TT> is returned. 1.1531 + * @return the output string length, not including the terminating NUL 1.1532 + * @stable ICU 2.0 1.1533 + */ 1.1534 + int32_t extract(int32_t start, 1.1535 + int32_t startLength, 1.1536 + char *target, 1.1537 + uint32_t targetLength) const; 1.1538 + 1.1539 +#endif 1.1540 + 1.1541 +#if !UCONFIG_NO_CONVERSION 1.1542 + 1.1543 + /** 1.1544 + * Copy the characters in the range 1.1545 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1.1546 + * in a specified codepage. 1.1547 + * The output string is NUL-terminated. 1.1548 + * 1.1549 + * Recommendation: For invariant-character strings use 1.1550 + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1.1551 + * because it avoids object code dependencies of UnicodeString on 1.1552 + * the conversion code. 1.1553 + * 1.1554 + * @param start offset of first character which will be copied 1.1555 + * @param startLength the number of characters to extract 1.1556 + * @param target the target buffer for extraction 1.1557 + * @param codepage the desired codepage for the characters. 0 has 1.1558 + * the special meaning of the default codepage 1.1559 + * If <code>codepage</code> is an empty string (<code>""</code>), 1.1560 + * then a simple conversion is performed on the codepage-invariant 1.1561 + * subset ("invariant characters") of the platform encoding. See utypes.h. 1.1562 + * If <TT>target</TT> is NULL, then the number of bytes required for 1.1563 + * <TT>target</TT> is returned. It is assumed that the target is big enough 1.1564 + * to fit all of the characters. 1.1565 + * @return the output string length, not including the terminating NUL 1.1566 + * @stable ICU 2.0 1.1567 + */ 1.1568 + inline int32_t extract(int32_t start, 1.1569 + int32_t startLength, 1.1570 + char *target, 1.1571 + const char *codepage = 0) const; 1.1572 + 1.1573 + /** 1.1574 + * Copy the characters in the range 1.1575 + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1.1576 + * in a specified codepage. 1.1577 + * This function does not write any more than <code>targetLength</code> 1.1578 + * characters but returns the length of the entire output string 1.1579 + * so that one can allocate a larger buffer and call the function again 1.1580 + * if necessary. 1.1581 + * The output string is NUL-terminated if possible. 1.1582 + * 1.1583 + * Recommendation: For invariant-character strings use 1.1584 + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1.1585 + * because it avoids object code dependencies of UnicodeString on 1.1586 + * the conversion code. 1.1587 + * 1.1588 + * @param start offset of first character which will be copied 1.1589 + * @param startLength the number of characters to extract 1.1590 + * @param target the target buffer for extraction 1.1591 + * @param targetLength the length of the target buffer 1.1592 + * @param codepage the desired codepage for the characters. 0 has 1.1593 + * the special meaning of the default codepage 1.1594 + * If <code>codepage</code> is an empty string (<code>""</code>), 1.1595 + * then a simple conversion is performed on the codepage-invariant 1.1596 + * subset ("invariant characters") of the platform encoding. See utypes.h. 1.1597 + * If <TT>target</TT> is NULL, then the number of bytes required for 1.1598 + * <TT>target</TT> is returned. 1.1599 + * @return the output string length, not including the terminating NUL 1.1600 + * @stable ICU 2.0 1.1601 + */ 1.1602 + int32_t extract(int32_t start, 1.1603 + int32_t startLength, 1.1604 + char *target, 1.1605 + uint32_t targetLength, 1.1606 + const char *codepage) const; 1.1607 + 1.1608 + /** 1.1609 + * Convert the UnicodeString into a codepage string using an existing UConverter. 1.1610 + * The output string is NUL-terminated if possible. 1.1611 + * 1.1612 + * This function avoids the overhead of opening and closing a converter if 1.1613 + * multiple strings are extracted. 1.1614 + * 1.1615 + * @param dest destination string buffer, can be NULL if destCapacity==0 1.1616 + * @param destCapacity the number of chars available at dest 1.1617 + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1.1618 + * or NULL for the default converter 1.1619 + * @param errorCode normal ICU error code 1.1620 + * @return the length of the output string, not counting the terminating NUL; 1.1621 + * if the length is greater than destCapacity, then the string will not fit 1.1622 + * and a buffer of the indicated length would need to be passed in 1.1623 + * @stable ICU 2.0 1.1624 + */ 1.1625 + int32_t extract(char *dest, int32_t destCapacity, 1.1626 + UConverter *cnv, 1.1627 + UErrorCode &errorCode) const; 1.1628 + 1.1629 +#endif 1.1630 + 1.1631 + /** 1.1632 + * Create a temporary substring for the specified range. 1.1633 + * Unlike the substring constructor and setTo() functions, 1.1634 + * the object returned here will be a read-only alias (using getBuffer()) 1.1635 + * rather than copying the text. 1.1636 + * As a result, this substring operation is much faster but requires 1.1637 + * that the original string not be modified or deleted during the lifetime 1.1638 + * of the returned substring object. 1.1639 + * @param start offset of the first character visible in the substring 1.1640 + * @param length length of the substring 1.1641 + * @return a read-only alias UnicodeString object for the substring 1.1642 + * @stable ICU 4.4 1.1643 + */ 1.1644 + UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1.1645 + 1.1646 + /** 1.1647 + * Create a temporary substring for the specified range. 1.1648 + * Same as tempSubString(start, length) except that the substring range 1.1649 + * is specified as a (start, limit) pair (with an exclusive limit index) 1.1650 + * rather than a (start, length) pair. 1.1651 + * @param start offset of the first character visible in the substring 1.1652 + * @param limit offset immediately following the last character visible in the substring 1.1653 + * @return a read-only alias UnicodeString object for the substring 1.1654 + * @stable ICU 4.4 1.1655 + */ 1.1656 + inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1.1657 + 1.1658 + /** 1.1659 + * Convert the UnicodeString to UTF-8 and write the result 1.1660 + * to a ByteSink. This is called by toUTF8String(). 1.1661 + * Unpaired surrogates are replaced with U+FFFD. 1.1662 + * Calls u_strToUTF8WithSub(). 1.1663 + * 1.1664 + * @param sink A ByteSink to which the UTF-8 version of the string is written. 1.1665 + * sink.Flush() is called at the end. 1.1666 + * @stable ICU 4.2 1.1667 + * @see toUTF8String 1.1668 + */ 1.1669 + void toUTF8(ByteSink &sink) const; 1.1670 + 1.1671 +#if U_HAVE_STD_STRING 1.1672 + 1.1673 + /** 1.1674 + * Convert the UnicodeString to UTF-8 and append the result 1.1675 + * to a standard string. 1.1676 + * Unpaired surrogates are replaced with U+FFFD. 1.1677 + * Calls toUTF8(). 1.1678 + * 1.1679 + * @param result A standard string (or a compatible object) 1.1680 + * to which the UTF-8 version of the string is appended. 1.1681 + * @return The string object. 1.1682 + * @stable ICU 4.2 1.1683 + * @see toUTF8 1.1684 + */ 1.1685 + template<typename StringClass> 1.1686 + StringClass &toUTF8String(StringClass &result) const { 1.1687 + StringByteSink<StringClass> sbs(&result); 1.1688 + toUTF8(sbs); 1.1689 + return result; 1.1690 + } 1.1691 + 1.1692 +#endif 1.1693 + 1.1694 + /** 1.1695 + * Convert the UnicodeString to UTF-32. 1.1696 + * Unpaired surrogates are replaced with U+FFFD. 1.1697 + * Calls u_strToUTF32WithSub(). 1.1698 + * 1.1699 + * @param utf32 destination string buffer, can be NULL if capacity==0 1.1700 + * @param capacity the number of UChar32s available at utf32 1.1701 + * @param errorCode Standard ICU error code. Its input value must 1.1702 + * pass the U_SUCCESS() test, or else the function returns 1.1703 + * immediately. Check for U_FAILURE() on output or use with 1.1704 + * function chaining. (See User Guide for details.) 1.1705 + * @return The length of the UTF-32 string. 1.1706 + * @see fromUTF32 1.1707 + * @stable ICU 4.2 1.1708 + */ 1.1709 + int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1.1710 + 1.1711 + /* Length operations */ 1.1712 + 1.1713 + /** 1.1714 + * Return the length of the UnicodeString object. 1.1715 + * The length is the number of UChar code units are in the UnicodeString. 1.1716 + * If you want the number of code points, please use countChar32(). 1.1717 + * @return the length of the UnicodeString object 1.1718 + * @see countChar32 1.1719 + * @stable ICU 2.0 1.1720 + */ 1.1721 + inline int32_t length(void) const; 1.1722 + 1.1723 + /** 1.1724 + * Count Unicode code points in the length UChar code units of the string. 1.1725 + * A code point may occupy either one or two UChar code units. 1.1726 + * Counting code points involves reading all code units. 1.1727 + * 1.1728 + * This functions is basically the inverse of moveIndex32(). 1.1729 + * 1.1730 + * @param start the index of the first code unit to check 1.1731 + * @param length the number of UChar code units to check 1.1732 + * @return the number of code points in the specified code units 1.1733 + * @see length 1.1734 + * @stable ICU 2.0 1.1735 + */ 1.1736 + int32_t 1.1737 + countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1.1738 + 1.1739 + /** 1.1740 + * Check if the length UChar code units of the string 1.1741 + * contain more Unicode code points than a certain number. 1.1742 + * This is more efficient than counting all code points in this part of the string 1.1743 + * and comparing that number with a threshold. 1.1744 + * This function may not need to scan the string at all if the length 1.1745 + * falls within a certain range, and 1.1746 + * never needs to count more than 'number+1' code points. 1.1747 + * Logically equivalent to (countChar32(start, length)>number). 1.1748 + * A Unicode code point may occupy either one or two UChar code units. 1.1749 + * 1.1750 + * @param start the index of the first code unit to check (0 for the entire string) 1.1751 + * @param length the number of UChar code units to check 1.1752 + * (use INT32_MAX for the entire string; remember that start/length 1.1753 + * values are pinned) 1.1754 + * @param number The number of code points in the (sub)string is compared against 1.1755 + * the 'number' parameter. 1.1756 + * @return Boolean value for whether the string contains more Unicode code points 1.1757 + * than 'number'. Same as (u_countChar32(s, length)>number). 1.1758 + * @see countChar32 1.1759 + * @see u_strHasMoreChar32Than 1.1760 + * @stable ICU 2.4 1.1761 + */ 1.1762 + UBool 1.1763 + hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1.1764 + 1.1765 + /** 1.1766 + * Determine if this string is empty. 1.1767 + * @return TRUE if this string contains 0 characters, FALSE otherwise. 1.1768 + * @stable ICU 2.0 1.1769 + */ 1.1770 + inline UBool isEmpty(void) const; 1.1771 + 1.1772 + /** 1.1773 + * Return the capacity of the internal buffer of the UnicodeString object. 1.1774 + * This is useful together with the getBuffer functions. 1.1775 + * See there for details. 1.1776 + * 1.1777 + * @return the number of UChars available in the internal buffer 1.1778 + * @see getBuffer 1.1779 + * @stable ICU 2.0 1.1780 + */ 1.1781 + inline int32_t getCapacity(void) const; 1.1782 + 1.1783 + /* Other operations */ 1.1784 + 1.1785 + /** 1.1786 + * Generate a hash code for this object. 1.1787 + * @return The hash code of this UnicodeString. 1.1788 + * @stable ICU 2.0 1.1789 + */ 1.1790 + inline int32_t hashCode(void) const; 1.1791 + 1.1792 + /** 1.1793 + * Determine if this object contains a valid string. 1.1794 + * A bogus string has no value. It is different from an empty string, 1.1795 + * although in both cases isEmpty() returns TRUE and length() returns 0. 1.1796 + * setToBogus() and isBogus() can be used to indicate that no string value is available. 1.1797 + * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1.1798 + * length() returns 0. 1.1799 + * 1.1800 + * @return TRUE if the string is bogus/invalid, FALSE otherwise 1.1801 + * @see setToBogus() 1.1802 + * @stable ICU 2.0 1.1803 + */ 1.1804 + inline UBool isBogus(void) const; 1.1805 + 1.1806 + 1.1807 + //======================================== 1.1808 + // Write operations 1.1809 + //======================================== 1.1810 + 1.1811 + /* Assignment operations */ 1.1812 + 1.1813 + /** 1.1814 + * Assignment operator. Replace the characters in this UnicodeString 1.1815 + * with the characters from <TT>srcText</TT>. 1.1816 + * @param srcText The text containing the characters to replace 1.1817 + * @return a reference to this 1.1818 + * @stable ICU 2.0 1.1819 + */ 1.1820 + UnicodeString &operator=(const UnicodeString &srcText); 1.1821 + 1.1822 + /** 1.1823 + * Almost the same as the assignment operator. 1.1824 + * Replace the characters in this UnicodeString 1.1825 + * with the characters from <code>srcText</code>. 1.1826 + * 1.1827 + * This function works the same as the assignment operator 1.1828 + * for all strings except for ones that are readonly aliases. 1.1829 + * 1.1830 + * Starting with ICU 2.4, the assignment operator and the copy constructor 1.1831 + * allocate a new buffer and copy the buffer contents even for readonly aliases. 1.1832 + * This function implements the old, more efficient but less safe behavior 1.1833 + * of making this string also a readonly alias to the same buffer. 1.1834 + * 1.1835 + * The fastCopyFrom function must be used only if it is known that the lifetime of 1.1836 + * this UnicodeString does not exceed the lifetime of the aliased buffer 1.1837 + * including its contents, for example for strings from resource bundles 1.1838 + * or aliases to string constants. 1.1839 + * 1.1840 + * @param src The text containing the characters to replace. 1.1841 + * @return a reference to this 1.1842 + * @stable ICU 2.4 1.1843 + */ 1.1844 + UnicodeString &fastCopyFrom(const UnicodeString &src); 1.1845 + 1.1846 + /** 1.1847 + * Assignment operator. Replace the characters in this UnicodeString 1.1848 + * with the code unit <TT>ch</TT>. 1.1849 + * @param ch the code unit to replace 1.1850 + * @return a reference to this 1.1851 + * @stable ICU 2.0 1.1852 + */ 1.1853 + inline UnicodeString& operator= (UChar ch); 1.1854 + 1.1855 + /** 1.1856 + * Assignment operator. Replace the characters in this UnicodeString 1.1857 + * with the code point <TT>ch</TT>. 1.1858 + * @param ch the code point to replace 1.1859 + * @return a reference to this 1.1860 + * @stable ICU 2.0 1.1861 + */ 1.1862 + inline UnicodeString& operator= (UChar32 ch); 1.1863 + 1.1864 + /** 1.1865 + * Set the text in the UnicodeString object to the characters 1.1866 + * in <TT>srcText</TT> in the range 1.1867 + * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1.1868 + * <TT>srcText</TT> is not modified. 1.1869 + * @param srcText the source for the new characters 1.1870 + * @param srcStart the offset into <TT>srcText</TT> where new characters 1.1871 + * will be obtained 1.1872 + * @return a reference to this 1.1873 + * @stable ICU 2.2 1.1874 + */ 1.1875 + inline UnicodeString& setTo(const UnicodeString& srcText, 1.1876 + int32_t srcStart); 1.1877 + 1.1878 + /** 1.1879 + * Set the text in the UnicodeString object to the characters 1.1880 + * in <TT>srcText</TT> in the range 1.1881 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.1882 + * <TT>srcText</TT> is not modified. 1.1883 + * @param srcText the source for the new characters 1.1884 + * @param srcStart the offset into <TT>srcText</TT> where new characters 1.1885 + * will be obtained 1.1886 + * @param srcLength the number of characters in <TT>srcText</TT> in the 1.1887 + * replace string. 1.1888 + * @return a reference to this 1.1889 + * @stable ICU 2.0 1.1890 + */ 1.1891 + inline UnicodeString& setTo(const UnicodeString& srcText, 1.1892 + int32_t srcStart, 1.1893 + int32_t srcLength); 1.1894 + 1.1895 + /** 1.1896 + * Set the text in the UnicodeString object to the characters in 1.1897 + * <TT>srcText</TT>. 1.1898 + * <TT>srcText</TT> is not modified. 1.1899 + * @param srcText the source for the new characters 1.1900 + * @return a reference to this 1.1901 + * @stable ICU 2.0 1.1902 + */ 1.1903 + inline UnicodeString& setTo(const UnicodeString& srcText); 1.1904 + 1.1905 + /** 1.1906 + * Set the characters in the UnicodeString object to the characters 1.1907 + * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1.1908 + * @param srcChars the source for the new characters 1.1909 + * @param srcLength the number of Unicode characters in srcChars. 1.1910 + * @return a reference to this 1.1911 + * @stable ICU 2.0 1.1912 + */ 1.1913 + inline UnicodeString& setTo(const UChar *srcChars, 1.1914 + int32_t srcLength); 1.1915 + 1.1916 + /** 1.1917 + * Set the characters in the UnicodeString object to the code unit 1.1918 + * <TT>srcChar</TT>. 1.1919 + * @param srcChar the code unit which becomes the UnicodeString's character 1.1920 + * content 1.1921 + * @return a reference to this 1.1922 + * @stable ICU 2.0 1.1923 + */ 1.1924 + UnicodeString& setTo(UChar srcChar); 1.1925 + 1.1926 + /** 1.1927 + * Set the characters in the UnicodeString object to the code point 1.1928 + * <TT>srcChar</TT>. 1.1929 + * @param srcChar the code point which becomes the UnicodeString's character 1.1930 + * content 1.1931 + * @return a reference to this 1.1932 + * @stable ICU 2.0 1.1933 + */ 1.1934 + UnicodeString& setTo(UChar32 srcChar); 1.1935 + 1.1936 + /** 1.1937 + * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. 1.1938 + * The text will be used for the UnicodeString object, but 1.1939 + * it will not be released when the UnicodeString is destroyed. 1.1940 + * This has copy-on-write semantics: 1.1941 + * When the string is modified, then the buffer is first copied into 1.1942 + * newly allocated memory. 1.1943 + * The aliased buffer is never modified. 1.1944 + * 1.1945 + * In an assignment to another UnicodeString, when using the copy constructor 1.1946 + * or the assignment operator, the text will be copied. 1.1947 + * When using fastCopyFrom(), the text will be aliased again, 1.1948 + * so that both strings then alias the same readonly-text. 1.1949 + * 1.1950 + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 1.1951 + * This must be true if <code>textLength==-1</code>. 1.1952 + * @param text The characters to alias for the UnicodeString. 1.1953 + * @param textLength The number of Unicode characters in <code>text</code> to alias. 1.1954 + * If -1, then this constructor will determine the length 1.1955 + * by calling <code>u_strlen()</code>. 1.1956 + * @return a reference to this 1.1957 + * @stable ICU 2.0 1.1958 + */ 1.1959 + UnicodeString &setTo(UBool isTerminated, 1.1960 + const UChar *text, 1.1961 + int32_t textLength); 1.1962 + 1.1963 + /** 1.1964 + * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. 1.1965 + * The text will be used for the UnicodeString object, but 1.1966 + * it will not be released when the UnicodeString is destroyed. 1.1967 + * This has write-through semantics: 1.1968 + * For as long as the capacity of the buffer is sufficient, write operations 1.1969 + * will directly affect the buffer. When more capacity is necessary, then 1.1970 + * a new buffer will be allocated and the contents copied as with regularly 1.1971 + * constructed strings. 1.1972 + * In an assignment to another UnicodeString, the buffer will be copied. 1.1973 + * The extract(UChar *dst) function detects whether the dst pointer is the same 1.1974 + * as the string buffer itself and will in this case not copy the contents. 1.1975 + * 1.1976 + * @param buffer The characters to alias for the UnicodeString. 1.1977 + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 1.1978 + * @param buffCapacity The size of <code>buffer</code> in UChars. 1.1979 + * @return a reference to this 1.1980 + * @stable ICU 2.0 1.1981 + */ 1.1982 + UnicodeString &setTo(UChar *buffer, 1.1983 + int32_t buffLength, 1.1984 + int32_t buffCapacity); 1.1985 + 1.1986 + /** 1.1987 + * Make this UnicodeString object invalid. 1.1988 + * The string will test TRUE with isBogus(). 1.1989 + * 1.1990 + * A bogus string has no value. It is different from an empty string. 1.1991 + * It can be used to indicate that no string value is available. 1.1992 + * getBuffer() and getTerminatedBuffer() return NULL, and 1.1993 + * length() returns 0. 1.1994 + * 1.1995 + * This utility function is used throughout the UnicodeString 1.1996 + * implementation to indicate that a UnicodeString operation failed, 1.1997 + * and may be used in other functions, 1.1998 + * especially but not exclusively when such functions do not 1.1999 + * take a UErrorCode for simplicity. 1.2000 + * 1.2001 + * The following methods, and no others, will clear a string object's bogus flag: 1.2002 + * - remove() 1.2003 + * - remove(0, INT32_MAX) 1.2004 + * - truncate(0) 1.2005 + * - operator=() (assignment operator) 1.2006 + * - setTo(...) 1.2007 + * 1.2008 + * The simplest ways to turn a bogus string into an empty one 1.2009 + * is to use the remove() function. 1.2010 + * Examples for other functions that are equivalent to "set to empty string": 1.2011 + * \code 1.2012 + * if(s.isBogus()) { 1.2013 + * s.remove(); // set to an empty string (remove all), or 1.2014 + * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 1.2015 + * s.truncate(0); // set to an empty string (complete truncation), or 1.2016 + * s=UnicodeString(); // assign an empty string, or 1.2017 + * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 1.2018 + * static const UChar nul=0; 1.2019 + * s.setTo(&nul, 0); // set to an empty C Unicode string 1.2020 + * } 1.2021 + * \endcode 1.2022 + * 1.2023 + * @see isBogus() 1.2024 + * @stable ICU 2.0 1.2025 + */ 1.2026 + void setToBogus(); 1.2027 + 1.2028 + /** 1.2029 + * Set the character at the specified offset to the specified character. 1.2030 + * @param offset A valid offset into the text of the character to set 1.2031 + * @param ch The new character 1.2032 + * @return A reference to this 1.2033 + * @stable ICU 2.0 1.2034 + */ 1.2035 + UnicodeString& setCharAt(int32_t offset, 1.2036 + UChar ch); 1.2037 + 1.2038 + 1.2039 + /* Append operations */ 1.2040 + 1.2041 + /** 1.2042 + * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 1.2043 + * object. 1.2044 + * @param ch the code unit to be appended 1.2045 + * @return a reference to this 1.2046 + * @stable ICU 2.0 1.2047 + */ 1.2048 + inline UnicodeString& operator+= (UChar ch); 1.2049 + 1.2050 + /** 1.2051 + * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 1.2052 + * object. 1.2053 + * @param ch the code point to be appended 1.2054 + * @return a reference to this 1.2055 + * @stable ICU 2.0 1.2056 + */ 1.2057 + inline UnicodeString& operator+= (UChar32 ch); 1.2058 + 1.2059 + /** 1.2060 + * Append operator. Append the characters in <TT>srcText</TT> to the 1.2061 + * UnicodeString object. <TT>srcText</TT> is not modified. 1.2062 + * @param srcText the source for the new characters 1.2063 + * @return a reference to this 1.2064 + * @stable ICU 2.0 1.2065 + */ 1.2066 + inline UnicodeString& operator+= (const UnicodeString& srcText); 1.2067 + 1.2068 + /** 1.2069 + * Append the characters 1.2070 + * in <TT>srcText</TT> in the range 1.2071 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 1.2072 + * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 1.2073 + * is not modified. 1.2074 + * @param srcText the source for the new characters 1.2075 + * @param srcStart the offset into <TT>srcText</TT> where new characters 1.2076 + * will be obtained 1.2077 + * @param srcLength the number of characters in <TT>srcText</TT> in 1.2078 + * the append string 1.2079 + * @return a reference to this 1.2080 + * @stable ICU 2.0 1.2081 + */ 1.2082 + inline UnicodeString& append(const UnicodeString& srcText, 1.2083 + int32_t srcStart, 1.2084 + int32_t srcLength); 1.2085 + 1.2086 + /** 1.2087 + * Append the characters in <TT>srcText</TT> to the UnicodeString object. 1.2088 + * <TT>srcText</TT> is not modified. 1.2089 + * @param srcText the source for the new characters 1.2090 + * @return a reference to this 1.2091 + * @stable ICU 2.0 1.2092 + */ 1.2093 + inline UnicodeString& append(const UnicodeString& srcText); 1.2094 + 1.2095 + /** 1.2096 + * Append the characters in <TT>srcChars</TT> in the range 1.2097 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 1.2098 + * object at offset 1.2099 + * <TT>start</TT>. <TT>srcChars</TT> is not modified. 1.2100 + * @param srcChars the source for the new characters 1.2101 + * @param srcStart the offset into <TT>srcChars</TT> where new characters 1.2102 + * will be obtained 1.2103 + * @param srcLength the number of characters in <TT>srcChars</TT> in 1.2104 + * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 1.2105 + * @return a reference to this 1.2106 + * @stable ICU 2.0 1.2107 + */ 1.2108 + inline UnicodeString& append(const UChar *srcChars, 1.2109 + int32_t srcStart, 1.2110 + int32_t srcLength); 1.2111 + 1.2112 + /** 1.2113 + * Append the characters in <TT>srcChars</TT> to the UnicodeString object 1.2114 + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 1.2115 + * @param srcChars the source for the new characters 1.2116 + * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 1.2117 + * can be -1 if <TT>srcChars</TT> is NUL-terminated 1.2118 + * @return a reference to this 1.2119 + * @stable ICU 2.0 1.2120 + */ 1.2121 + inline UnicodeString& append(const UChar *srcChars, 1.2122 + int32_t srcLength); 1.2123 + 1.2124 + /** 1.2125 + * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 1.2126 + * @param srcChar the code unit to append 1.2127 + * @return a reference to this 1.2128 + * @stable ICU 2.0 1.2129 + */ 1.2130 + inline UnicodeString& append(UChar srcChar); 1.2131 + 1.2132 + /** 1.2133 + * Append the code point <TT>srcChar</TT> to the UnicodeString object. 1.2134 + * @param srcChar the code point to append 1.2135 + * @return a reference to this 1.2136 + * @stable ICU 2.0 1.2137 + */ 1.2138 + UnicodeString& append(UChar32 srcChar); 1.2139 + 1.2140 + 1.2141 + /* Insert operations */ 1.2142 + 1.2143 + /** 1.2144 + * Insert the characters in <TT>srcText</TT> in the range 1.2145 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 1.2146 + * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 1.2147 + * @param start the offset where the insertion begins 1.2148 + * @param srcText the source for the new characters 1.2149 + * @param srcStart the offset into <TT>srcText</TT> where new characters 1.2150 + * will be obtained 1.2151 + * @param srcLength the number of characters in <TT>srcText</TT> in 1.2152 + * the insert string 1.2153 + * @return a reference to this 1.2154 + * @stable ICU 2.0 1.2155 + */ 1.2156 + inline UnicodeString& insert(int32_t start, 1.2157 + const UnicodeString& srcText, 1.2158 + int32_t srcStart, 1.2159 + int32_t srcLength); 1.2160 + 1.2161 + /** 1.2162 + * Insert the characters in <TT>srcText</TT> into the UnicodeString object 1.2163 + * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 1.2164 + * @param start the offset where the insertion begins 1.2165 + * @param srcText the source for the new characters 1.2166 + * @return a reference to this 1.2167 + * @stable ICU 2.0 1.2168 + */ 1.2169 + inline UnicodeString& insert(int32_t start, 1.2170 + const UnicodeString& srcText); 1.2171 + 1.2172 + /** 1.2173 + * Insert the characters in <TT>srcChars</TT> in the range 1.2174 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 1.2175 + * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 1.2176 + * @param start the offset at which the insertion begins 1.2177 + * @param srcChars the source for the new characters 1.2178 + * @param srcStart the offset into <TT>srcChars</TT> where new characters 1.2179 + * will be obtained 1.2180 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.2181 + * in the insert string 1.2182 + * @return a reference to this 1.2183 + * @stable ICU 2.0 1.2184 + */ 1.2185 + inline UnicodeString& insert(int32_t start, 1.2186 + const UChar *srcChars, 1.2187 + int32_t srcStart, 1.2188 + int32_t srcLength); 1.2189 + 1.2190 + /** 1.2191 + * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 1.2192 + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 1.2193 + * @param start the offset where the insertion begins 1.2194 + * @param srcChars the source for the new characters 1.2195 + * @param srcLength the number of Unicode characters in srcChars. 1.2196 + * @return a reference to this 1.2197 + * @stable ICU 2.0 1.2198 + */ 1.2199 + inline UnicodeString& insert(int32_t start, 1.2200 + const UChar *srcChars, 1.2201 + int32_t srcLength); 1.2202 + 1.2203 + /** 1.2204 + * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 1.2205 + * offset <TT>start</TT>. 1.2206 + * @param start the offset at which the insertion occurs 1.2207 + * @param srcChar the code unit to insert 1.2208 + * @return a reference to this 1.2209 + * @stable ICU 2.0 1.2210 + */ 1.2211 + inline UnicodeString& insert(int32_t start, 1.2212 + UChar srcChar); 1.2213 + 1.2214 + /** 1.2215 + * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 1.2216 + * offset <TT>start</TT>. 1.2217 + * @param start the offset at which the insertion occurs 1.2218 + * @param srcChar the code point to insert 1.2219 + * @return a reference to this 1.2220 + * @stable ICU 2.0 1.2221 + */ 1.2222 + inline UnicodeString& insert(int32_t start, 1.2223 + UChar32 srcChar); 1.2224 + 1.2225 + 1.2226 + /* Replace operations */ 1.2227 + 1.2228 + /** 1.2229 + * Replace the characters in the range 1.2230 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 1.2231 + * <TT>srcText</TT> in the range 1.2232 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1.2233 + * <TT>srcText</TT> is not modified. 1.2234 + * @param start the offset at which the replace operation begins 1.2235 + * @param length the number of characters to replace. The character at 1.2236 + * <TT>start + length</TT> is not modified. 1.2237 + * @param srcText the source for the new characters 1.2238 + * @param srcStart the offset into <TT>srcText</TT> where new characters 1.2239 + * will be obtained 1.2240 + * @param srcLength the number of characters in <TT>srcText</TT> in 1.2241 + * the replace string 1.2242 + * @return a reference to this 1.2243 + * @stable ICU 2.0 1.2244 + */ 1.2245 + UnicodeString& replace(int32_t start, 1.2246 + int32_t length, 1.2247 + const UnicodeString& srcText, 1.2248 + int32_t srcStart, 1.2249 + int32_t srcLength); 1.2250 + 1.2251 + /** 1.2252 + * Replace the characters in the range 1.2253 + * [<TT>start</TT>, <TT>start + length</TT>) 1.2254 + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 1.2255 + * not modified. 1.2256 + * @param start the offset at which the replace operation begins 1.2257 + * @param length the number of characters to replace. The character at 1.2258 + * <TT>start + length</TT> is not modified. 1.2259 + * @param srcText the source for the new characters 1.2260 + * @return a reference to this 1.2261 + * @stable ICU 2.0 1.2262 + */ 1.2263 + UnicodeString& replace(int32_t start, 1.2264 + int32_t length, 1.2265 + const UnicodeString& srcText); 1.2266 + 1.2267 + /** 1.2268 + * Replace the characters in the range 1.2269 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 1.2270 + * <TT>srcChars</TT> in the range 1.2271 + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 1.2272 + * is not modified. 1.2273 + * @param start the offset at which the replace operation begins 1.2274 + * @param length the number of characters to replace. The character at 1.2275 + * <TT>start + length</TT> is not modified. 1.2276 + * @param srcChars the source for the new characters 1.2277 + * @param srcStart the offset into <TT>srcChars</TT> where new characters 1.2278 + * will be obtained 1.2279 + * @param srcLength the number of characters in <TT>srcChars</TT> 1.2280 + * in the replace string 1.2281 + * @return a reference to this 1.2282 + * @stable ICU 2.0 1.2283 + */ 1.2284 + UnicodeString& replace(int32_t start, 1.2285 + int32_t length, 1.2286 + const UChar *srcChars, 1.2287 + int32_t srcStart, 1.2288 + int32_t srcLength); 1.2289 + 1.2290 + /** 1.2291 + * Replace the characters in the range 1.2292 + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 1.2293 + * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1.2294 + * @param start the offset at which the replace operation begins 1.2295 + * @param length number of characters to replace. The character at 1.2296 + * <TT>start + length</TT> is not modified. 1.2297 + * @param srcChars the source for the new characters 1.2298 + * @param srcLength the number of Unicode characters in srcChars 1.2299 + * @return a reference to this 1.2300 + * @stable ICU 2.0 1.2301 + */ 1.2302 + inline UnicodeString& replace(int32_t start, 1.2303 + int32_t length, 1.2304 + const UChar *srcChars, 1.2305 + int32_t srcLength); 1.2306 + 1.2307 + /** 1.2308 + * Replace the characters in the range 1.2309 + * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 1.2310 + * <TT>srcChar</TT>. 1.2311 + * @param start the offset at which the replace operation begins 1.2312 + * @param length the number of characters to replace. The character at 1.2313 + * <TT>start + length</TT> is not modified. 1.2314 + * @param srcChar the new code unit 1.2315 + * @return a reference to this 1.2316 + * @stable ICU 2.0 1.2317 + */ 1.2318 + inline UnicodeString& replace(int32_t start, 1.2319 + int32_t length, 1.2320 + UChar srcChar); 1.2321 + 1.2322 + /** 1.2323 + * Replace the characters in the range 1.2324 + * [<TT>start</TT>, <TT>start + length</TT>) with the code point 1.2325 + * <TT>srcChar</TT>. 1.2326 + * @param start the offset at which the replace operation begins 1.2327 + * @param length the number of characters to replace. The character at 1.2328 + * <TT>start + length</TT> is not modified. 1.2329 + * @param srcChar the new code point 1.2330 + * @return a reference to this 1.2331 + * @stable ICU 2.0 1.2332 + */ 1.2333 + UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 1.2334 + 1.2335 + /** 1.2336 + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 1.2337 + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 1.2338 + * @param start the offset at which the replace operation begins 1.2339 + * @param limit the offset immediately following the replace range 1.2340 + * @param srcText the source for the new characters 1.2341 + * @return a reference to this 1.2342 + * @stable ICU 2.0 1.2343 + */ 1.2344 + inline UnicodeString& replaceBetween(int32_t start, 1.2345 + int32_t limit, 1.2346 + const UnicodeString& srcText); 1.2347 + 1.2348 + /** 1.2349 + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 1.2350 + * with the characters in <TT>srcText</TT> in the range 1.2351 + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 1.2352 + * @param start the offset at which the replace operation begins 1.2353 + * @param limit the offset immediately following the replace range 1.2354 + * @param srcText the source for the new characters 1.2355 + * @param srcStart the offset into <TT>srcChars</TT> where new characters 1.2356 + * will be obtained 1.2357 + * @param srcLimit the offset immediately following the range to copy 1.2358 + * in <TT>srcText</TT> 1.2359 + * @return a reference to this 1.2360 + * @stable ICU 2.0 1.2361 + */ 1.2362 + inline UnicodeString& replaceBetween(int32_t start, 1.2363 + int32_t limit, 1.2364 + const UnicodeString& srcText, 1.2365 + int32_t srcStart, 1.2366 + int32_t srcLimit); 1.2367 + 1.2368 + /** 1.2369 + * Replace a substring of this object with the given text. 1.2370 + * @param start the beginning index, inclusive; <code>0 <= start 1.2371 + * <= limit</code>. 1.2372 + * @param limit the ending index, exclusive; <code>start <= limit 1.2373 + * <= length()</code>. 1.2374 + * @param text the text to replace characters <code>start</code> 1.2375 + * to <code>limit - 1</code> 1.2376 + * @stable ICU 2.0 1.2377 + */ 1.2378 + virtual void handleReplaceBetween(int32_t start, 1.2379 + int32_t limit, 1.2380 + const UnicodeString& text); 1.2381 + 1.2382 + /** 1.2383 + * Replaceable API 1.2384 + * @return TRUE if it has MetaData 1.2385 + * @stable ICU 2.4 1.2386 + */ 1.2387 + virtual UBool hasMetaData() const; 1.2388 + 1.2389 + /** 1.2390 + * Copy a substring of this object, retaining attribute (out-of-band) 1.2391 + * information. This method is used to duplicate or reorder substrings. 1.2392 + * The destination index must not overlap the source range. 1.2393 + * 1.2394 + * @param start the beginning index, inclusive; <code>0 <= start <= 1.2395 + * limit</code>. 1.2396 + * @param limit the ending index, exclusive; <code>start <= limit <= 1.2397 + * length()</code>. 1.2398 + * @param dest the destination index. The characters from 1.2399 + * <code>start..limit-1</code> will be copied to <code>dest</code>. 1.2400 + * Implementations of this method may assume that <code>dest <= start || 1.2401 + * dest >= limit</code>. 1.2402 + * @stable ICU 2.0 1.2403 + */ 1.2404 + virtual void copy(int32_t start, int32_t limit, int32_t dest); 1.2405 + 1.2406 + /* Search and replace operations */ 1.2407 + 1.2408 + /** 1.2409 + * Replace all occurrences of characters in oldText with the characters 1.2410 + * in newText 1.2411 + * @param oldText the text containing the search text 1.2412 + * @param newText the text containing the replacement text 1.2413 + * @return a reference to this 1.2414 + * @stable ICU 2.0 1.2415 + */ 1.2416 + inline UnicodeString& findAndReplace(const UnicodeString& oldText, 1.2417 + const UnicodeString& newText); 1.2418 + 1.2419 + /** 1.2420 + * Replace all occurrences of characters in oldText with characters 1.2421 + * in newText 1.2422 + * in the range [<TT>start</TT>, <TT>start + length</TT>). 1.2423 + * @param start the start of the range in which replace will performed 1.2424 + * @param length the length of the range in which replace will be performed 1.2425 + * @param oldText the text containing the search text 1.2426 + * @param newText the text containing the replacement text 1.2427 + * @return a reference to this 1.2428 + * @stable ICU 2.0 1.2429 + */ 1.2430 + inline UnicodeString& findAndReplace(int32_t start, 1.2431 + int32_t length, 1.2432 + const UnicodeString& oldText, 1.2433 + const UnicodeString& newText); 1.2434 + 1.2435 + /** 1.2436 + * Replace all occurrences of characters in oldText in the range 1.2437 + * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 1.2438 + * in newText in the range 1.2439 + * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 1.2440 + * in the range [<TT>start</TT>, <TT>start + length</TT>). 1.2441 + * @param start the start of the range in which replace will performed 1.2442 + * @param length the length of the range in which replace will be performed 1.2443 + * @param oldText the text containing the search text 1.2444 + * @param oldStart the start of the search range in <TT>oldText</TT> 1.2445 + * @param oldLength the length of the search range in <TT>oldText</TT> 1.2446 + * @param newText the text containing the replacement text 1.2447 + * @param newStart the start of the replacement range in <TT>newText</TT> 1.2448 + * @param newLength the length of the replacement range in <TT>newText</TT> 1.2449 + * @return a reference to this 1.2450 + * @stable ICU 2.0 1.2451 + */ 1.2452 + UnicodeString& findAndReplace(int32_t start, 1.2453 + int32_t length, 1.2454 + const UnicodeString& oldText, 1.2455 + int32_t oldStart, 1.2456 + int32_t oldLength, 1.2457 + const UnicodeString& newText, 1.2458 + int32_t newStart, 1.2459 + int32_t newLength); 1.2460 + 1.2461 + 1.2462 + /* Remove operations */ 1.2463 + 1.2464 + /** 1.2465 + * Remove all characters from the UnicodeString object. 1.2466 + * @return a reference to this 1.2467 + * @stable ICU 2.0 1.2468 + */ 1.2469 + inline UnicodeString& remove(void); 1.2470 + 1.2471 + /** 1.2472 + * Remove the characters in the range 1.2473 + * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 1.2474 + * @param start the offset of the first character to remove 1.2475 + * @param length the number of characters to remove 1.2476 + * @return a reference to this 1.2477 + * @stable ICU 2.0 1.2478 + */ 1.2479 + inline UnicodeString& remove(int32_t start, 1.2480 + int32_t length = (int32_t)INT32_MAX); 1.2481 + 1.2482 + /** 1.2483 + * Remove the characters in the range 1.2484 + * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 1.2485 + * @param start the offset of the first character to remove 1.2486 + * @param limit the offset immediately following the range to remove 1.2487 + * @return a reference to this 1.2488 + * @stable ICU 2.0 1.2489 + */ 1.2490 + inline UnicodeString& removeBetween(int32_t start, 1.2491 + int32_t limit = (int32_t)INT32_MAX); 1.2492 + 1.2493 + /** 1.2494 + * Retain only the characters in the range 1.2495 + * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 1.2496 + * Removes characters before <code>start</code> and at and after <code>limit</code>. 1.2497 + * @param start the offset of the first character to retain 1.2498 + * @param limit the offset immediately following the range to retain 1.2499 + * @return a reference to this 1.2500 + * @stable ICU 4.4 1.2501 + */ 1.2502 + inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 1.2503 + 1.2504 + /* Length operations */ 1.2505 + 1.2506 + /** 1.2507 + * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 1.2508 + * If the length of this UnicodeString is less than targetLength, 1.2509 + * length() - targetLength copies of padChar will be added to the 1.2510 + * beginning of this UnicodeString. 1.2511 + * @param targetLength the desired length of the string 1.2512 + * @param padChar the character to use for padding. Defaults to 1.2513 + * space (U+0020) 1.2514 + * @return TRUE if the text was padded, FALSE otherwise. 1.2515 + * @stable ICU 2.0 1.2516 + */ 1.2517 + UBool padLeading(int32_t targetLength, 1.2518 + UChar padChar = 0x0020); 1.2519 + 1.2520 + /** 1.2521 + * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 1.2522 + * If the length of this UnicodeString is less than targetLength, 1.2523 + * length() - targetLength copies of padChar will be added to the 1.2524 + * end of this UnicodeString. 1.2525 + * @param targetLength the desired length of the string 1.2526 + * @param padChar the character to use for padding. Defaults to 1.2527 + * space (U+0020) 1.2528 + * @return TRUE if the text was padded, FALSE otherwise. 1.2529 + * @stable ICU 2.0 1.2530 + */ 1.2531 + UBool padTrailing(int32_t targetLength, 1.2532 + UChar padChar = 0x0020); 1.2533 + 1.2534 + /** 1.2535 + * Truncate this UnicodeString to the <TT>targetLength</TT>. 1.2536 + * @param targetLength the desired length of this UnicodeString. 1.2537 + * @return TRUE if the text was truncated, FALSE otherwise 1.2538 + * @stable ICU 2.0 1.2539 + */ 1.2540 + inline UBool truncate(int32_t targetLength); 1.2541 + 1.2542 + /** 1.2543 + * Trims leading and trailing whitespace from this UnicodeString. 1.2544 + * @return a reference to this 1.2545 + * @stable ICU 2.0 1.2546 + */ 1.2547 + UnicodeString& trim(void); 1.2548 + 1.2549 + 1.2550 + /* Miscellaneous operations */ 1.2551 + 1.2552 + /** 1.2553 + * Reverse this UnicodeString in place. 1.2554 + * @return a reference to this 1.2555 + * @stable ICU 2.0 1.2556 + */ 1.2557 + inline UnicodeString& reverse(void); 1.2558 + 1.2559 + /** 1.2560 + * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 1.2561 + * this UnicodeString. 1.2562 + * @param start the start of the range to reverse 1.2563 + * @param length the number of characters to to reverse 1.2564 + * @return a reference to this 1.2565 + * @stable ICU 2.0 1.2566 + */ 1.2567 + inline UnicodeString& reverse(int32_t start, 1.2568 + int32_t length); 1.2569 + 1.2570 + /** 1.2571 + * Convert the characters in this to UPPER CASE following the conventions of 1.2572 + * the default locale. 1.2573 + * @return A reference to this. 1.2574 + * @stable ICU 2.0 1.2575 + */ 1.2576 + UnicodeString& toUpper(void); 1.2577 + 1.2578 + /** 1.2579 + * Convert the characters in this to UPPER CASE following the conventions of 1.2580 + * a specific locale. 1.2581 + * @param locale The locale containing the conventions to use. 1.2582 + * @return A reference to this. 1.2583 + * @stable ICU 2.0 1.2584 + */ 1.2585 + UnicodeString& toUpper(const Locale& locale); 1.2586 + 1.2587 + /** 1.2588 + * Convert the characters in this to lower case following the conventions of 1.2589 + * the default locale. 1.2590 + * @return A reference to this. 1.2591 + * @stable ICU 2.0 1.2592 + */ 1.2593 + UnicodeString& toLower(void); 1.2594 + 1.2595 + /** 1.2596 + * Convert the characters in this to lower case following the conventions of 1.2597 + * a specific locale. 1.2598 + * @param locale The locale containing the conventions to use. 1.2599 + * @return A reference to this. 1.2600 + * @stable ICU 2.0 1.2601 + */ 1.2602 + UnicodeString& toLower(const Locale& locale); 1.2603 + 1.2604 +#if !UCONFIG_NO_BREAK_ITERATION 1.2605 + 1.2606 + /** 1.2607 + * Titlecase this string, convenience function using the default locale. 1.2608 + * 1.2609 + * Casing is locale-dependent and context-sensitive. 1.2610 + * Titlecasing uses a break iterator to find the first characters of words 1.2611 + * that are to be titlecased. It titlecases those characters and lowercases 1.2612 + * all others. 1.2613 + * 1.2614 + * The titlecase break iterator can be provided to customize for arbitrary 1.2615 + * styles, using rules and dictionaries beyond the standard iterators. 1.2616 + * It may be more efficient to always provide an iterator to avoid 1.2617 + * opening and closing one for each string. 1.2618 + * The standard titlecase iterator for the root locale implements the 1.2619 + * algorithm of Unicode TR 21. 1.2620 + * 1.2621 + * This function uses only the setText(), first() and next() methods of the 1.2622 + * provided break iterator. 1.2623 + * 1.2624 + * @param titleIter A break iterator to find the first characters of words 1.2625 + * that are to be titlecased. 1.2626 + * If none is provided (0), then a standard titlecase 1.2627 + * break iterator is opened. 1.2628 + * Otherwise the provided iterator is set to the string's text. 1.2629 + * @return A reference to this. 1.2630 + * @stable ICU 2.1 1.2631 + */ 1.2632 + UnicodeString &toTitle(BreakIterator *titleIter); 1.2633 + 1.2634 + /** 1.2635 + * Titlecase this string. 1.2636 + * 1.2637 + * Casing is locale-dependent and context-sensitive. 1.2638 + * Titlecasing uses a break iterator to find the first characters of words 1.2639 + * that are to be titlecased. It titlecases those characters and lowercases 1.2640 + * all others. 1.2641 + * 1.2642 + * The titlecase break iterator can be provided to customize for arbitrary 1.2643 + * styles, using rules and dictionaries beyond the standard iterators. 1.2644 + * It may be more efficient to always provide an iterator to avoid 1.2645 + * opening and closing one for each string. 1.2646 + * The standard titlecase iterator for the root locale implements the 1.2647 + * algorithm of Unicode TR 21. 1.2648 + * 1.2649 + * This function uses only the setText(), first() and next() methods of the 1.2650 + * provided break iterator. 1.2651 + * 1.2652 + * @param titleIter A break iterator to find the first characters of words 1.2653 + * that are to be titlecased. 1.2654 + * If none is provided (0), then a standard titlecase 1.2655 + * break iterator is opened. 1.2656 + * Otherwise the provided iterator is set to the string's text. 1.2657 + * @param locale The locale to consider. 1.2658 + * @return A reference to this. 1.2659 + * @stable ICU 2.1 1.2660 + */ 1.2661 + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 1.2662 + 1.2663 + /** 1.2664 + * Titlecase this string, with options. 1.2665 + * 1.2666 + * Casing is locale-dependent and context-sensitive. 1.2667 + * Titlecasing uses a break iterator to find the first characters of words 1.2668 + * that are to be titlecased. It titlecases those characters and lowercases 1.2669 + * all others. (This can be modified with options.) 1.2670 + * 1.2671 + * The titlecase break iterator can be provided to customize for arbitrary 1.2672 + * styles, using rules and dictionaries beyond the standard iterators. 1.2673 + * It may be more efficient to always provide an iterator to avoid 1.2674 + * opening and closing one for each string. 1.2675 + * The standard titlecase iterator for the root locale implements the 1.2676 + * algorithm of Unicode TR 21. 1.2677 + * 1.2678 + * This function uses only the setText(), first() and next() methods of the 1.2679 + * provided break iterator. 1.2680 + * 1.2681 + * @param titleIter A break iterator to find the first characters of words 1.2682 + * that are to be titlecased. 1.2683 + * If none is provided (0), then a standard titlecase 1.2684 + * break iterator is opened. 1.2685 + * Otherwise the provided iterator is set to the string's text. 1.2686 + * @param locale The locale to consider. 1.2687 + * @param options Options bit set, see ucasemap_open(). 1.2688 + * @return A reference to this. 1.2689 + * @see U_TITLECASE_NO_LOWERCASE 1.2690 + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 1.2691 + * @see ucasemap_open 1.2692 + * @stable ICU 3.8 1.2693 + */ 1.2694 + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 1.2695 + 1.2696 +#endif 1.2697 + 1.2698 + /** 1.2699 + * Case-folds the characters in this string. 1.2700 + * 1.2701 + * Case-folding is locale-independent and not context-sensitive, 1.2702 + * but there is an option for whether to include or exclude mappings for dotted I 1.2703 + * and dotless i that are marked with 'T' in CaseFolding.txt. 1.2704 + * 1.2705 + * The result may be longer or shorter than the original. 1.2706 + * 1.2707 + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.2708 + * @return A reference to this. 1.2709 + * @stable ICU 2.0 1.2710 + */ 1.2711 + UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 1.2712 + 1.2713 + //======================================== 1.2714 + // Access to the internal buffer 1.2715 + //======================================== 1.2716 + 1.2717 + /** 1.2718 + * Get a read/write pointer to the internal buffer. 1.2719 + * The buffer is guaranteed to be large enough for at least minCapacity UChars, 1.2720 + * writable, and is still owned by the UnicodeString object. 1.2721 + * Calls to getBuffer(minCapacity) must not be nested, and 1.2722 + * must be matched with calls to releaseBuffer(newLength). 1.2723 + * If the string buffer was read-only or shared, 1.2724 + * then it will be reallocated and copied. 1.2725 + * 1.2726 + * An attempted nested call will return 0, and will not further modify the 1.2727 + * state of the UnicodeString object. 1.2728 + * It also returns 0 if the string is bogus. 1.2729 + * 1.2730 + * The actual capacity of the string buffer may be larger than minCapacity. 1.2731 + * getCapacity() returns the actual capacity. 1.2732 + * For many operations, the full capacity should be used to avoid reallocations. 1.2733 + * 1.2734 + * While the buffer is "open" between getBuffer(minCapacity) 1.2735 + * and releaseBuffer(newLength), the following applies: 1.2736 + * - The string length is set to 0. 1.2737 + * - Any read API call on the UnicodeString object will behave like on a 0-length string. 1.2738 + * - Any write API call on the UnicodeString object is disallowed and will have no effect. 1.2739 + * - You can read from and write to the returned buffer. 1.2740 + * - The previous string contents will still be in the buffer; 1.2741 + * if you want to use it, then you need to call length() before getBuffer(minCapacity). 1.2742 + * If the length() was greater than minCapacity, then any contents after minCapacity 1.2743 + * may be lost. 1.2744 + * The buffer contents is not NUL-terminated by getBuffer(). 1.2745 + * If length()<getCapacity() then you can terminate it by writing a NUL 1.2746 + * at index length(). 1.2747 + * - You must call releaseBuffer(newLength) before and in order to 1.2748 + * return to normal UnicodeString operation. 1.2749 + * 1.2750 + * @param minCapacity the minimum number of UChars that are to be available 1.2751 + * in the buffer, starting at the returned pointer; 1.2752 + * default to the current string capacity if minCapacity==-1 1.2753 + * @return a writable pointer to the internal string buffer, 1.2754 + * or 0 if an error occurs (nested calls, out of memory) 1.2755 + * 1.2756 + * @see releaseBuffer 1.2757 + * @see getTerminatedBuffer() 1.2758 + * @stable ICU 2.0 1.2759 + */ 1.2760 + UChar *getBuffer(int32_t minCapacity); 1.2761 + 1.2762 + /** 1.2763 + * Release a read/write buffer on a UnicodeString object with an 1.2764 + * "open" getBuffer(minCapacity). 1.2765 + * This function must be called in a matched pair with getBuffer(minCapacity). 1.2766 + * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 1.2767 + * 1.2768 + * It will set the string length to newLength, at most to the current capacity. 1.2769 + * If newLength==-1 then it will set the length according to the 1.2770 + * first NUL in the buffer, or to the capacity if there is no NUL. 1.2771 + * 1.2772 + * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 1.2773 + * 1.2774 + * @param newLength the new length of the UnicodeString object; 1.2775 + * defaults to the current capacity if newLength is greater than that; 1.2776 + * if newLength==-1, it defaults to u_strlen(buffer) but not more than 1.2777 + * the current capacity of the string 1.2778 + * 1.2779 + * @see getBuffer(int32_t minCapacity) 1.2780 + * @stable ICU 2.0 1.2781 + */ 1.2782 + void releaseBuffer(int32_t newLength=-1); 1.2783 + 1.2784 + /** 1.2785 + * Get a read-only pointer to the internal buffer. 1.2786 + * This can be called at any time on a valid UnicodeString. 1.2787 + * 1.2788 + * It returns 0 if the string is bogus, or 1.2789 + * during an "open" getBuffer(minCapacity). 1.2790 + * 1.2791 + * It can be called as many times as desired. 1.2792 + * The pointer that it returns will remain valid until the UnicodeString object is modified, 1.2793 + * at which time the pointer is semantically invalidated and must not be used any more. 1.2794 + * 1.2795 + * The capacity of the buffer can be determined with getCapacity(). 1.2796 + * The part after length() may or may not be initialized and valid, 1.2797 + * depending on the history of the UnicodeString object. 1.2798 + * 1.2799 + * The buffer contents is (probably) not NUL-terminated. 1.2800 + * You can check if it is with 1.2801 + * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 1.2802 + * (See getTerminatedBuffer().) 1.2803 + * 1.2804 + * The buffer may reside in read-only memory. Its contents must not 1.2805 + * be modified. 1.2806 + * 1.2807 + * @return a read-only pointer to the internal string buffer, 1.2808 + * or 0 if the string is empty or bogus 1.2809 + * 1.2810 + * @see getBuffer(int32_t minCapacity) 1.2811 + * @see getTerminatedBuffer() 1.2812 + * @stable ICU 2.0 1.2813 + */ 1.2814 + inline const UChar *getBuffer() const; 1.2815 + 1.2816 + /** 1.2817 + * Get a read-only pointer to the internal buffer, 1.2818 + * making sure that it is NUL-terminated. 1.2819 + * This can be called at any time on a valid UnicodeString. 1.2820 + * 1.2821 + * It returns 0 if the string is bogus, or 1.2822 + * during an "open" getBuffer(minCapacity), or if the buffer cannot 1.2823 + * be NUL-terminated (because memory allocation failed). 1.2824 + * 1.2825 + * It can be called as many times as desired. 1.2826 + * The pointer that it returns will remain valid until the UnicodeString object is modified, 1.2827 + * at which time the pointer is semantically invalidated and must not be used any more. 1.2828 + * 1.2829 + * The capacity of the buffer can be determined with getCapacity(). 1.2830 + * The part after length()+1 may or may not be initialized and valid, 1.2831 + * depending on the history of the UnicodeString object. 1.2832 + * 1.2833 + * The buffer contents is guaranteed to be NUL-terminated. 1.2834 + * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 1.2835 + * is written. 1.2836 + * For this reason, this function is not const, unlike getBuffer(). 1.2837 + * Note that a UnicodeString may also contain NUL characters as part of its contents. 1.2838 + * 1.2839 + * The buffer may reside in read-only memory. Its contents must not 1.2840 + * be modified. 1.2841 + * 1.2842 + * @return a read-only pointer to the internal string buffer, 1.2843 + * or 0 if the string is empty or bogus 1.2844 + * 1.2845 + * @see getBuffer(int32_t minCapacity) 1.2846 + * @see getBuffer() 1.2847 + * @stable ICU 2.2 1.2848 + */ 1.2849 + const UChar *getTerminatedBuffer(); 1.2850 + 1.2851 + //======================================== 1.2852 + // Constructors 1.2853 + //======================================== 1.2854 + 1.2855 + /** Construct an empty UnicodeString. 1.2856 + * @stable ICU 2.0 1.2857 + */ 1.2858 + inline UnicodeString(); 1.2859 + 1.2860 + /** 1.2861 + * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars 1.2862 + * @param capacity the number of UChars this UnicodeString should hold 1.2863 + * before a resize is necessary; if count is greater than 0 and count 1.2864 + * code points c take up more space than capacity, then capacity is adjusted 1.2865 + * accordingly. 1.2866 + * @param c is used to initially fill the string 1.2867 + * @param count specifies how many code points c are to be written in the 1.2868 + * string 1.2869 + * @stable ICU 2.0 1.2870 + */ 1.2871 + UnicodeString(int32_t capacity, UChar32 c, int32_t count); 1.2872 + 1.2873 + /** 1.2874 + * Single UChar (code unit) constructor. 1.2875 + * 1.2876 + * It is recommended to mark this constructor "explicit" by 1.2877 + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 1.2878 + * on the compiler command line or similar. 1.2879 + * @param ch the character to place in the UnicodeString 1.2880 + * @stable ICU 2.0 1.2881 + */ 1.2882 + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 1.2883 + 1.2884 + /** 1.2885 + * Single UChar32 (code point) constructor. 1.2886 + * 1.2887 + * It is recommended to mark this constructor "explicit" by 1.2888 + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 1.2889 + * on the compiler command line or similar. 1.2890 + * @param ch the character to place in the UnicodeString 1.2891 + * @stable ICU 2.0 1.2892 + */ 1.2893 + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 1.2894 + 1.2895 + /** 1.2896 + * UChar* constructor. 1.2897 + * 1.2898 + * It is recommended to mark this constructor "explicit" by 1.2899 + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 1.2900 + * on the compiler command line or similar. 1.2901 + * @param text The characters to place in the UnicodeString. <TT>text</TT> 1.2902 + * must be NULL (U+0000) terminated. 1.2903 + * @stable ICU 2.0 1.2904 + */ 1.2905 + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 1.2906 + 1.2907 + /** 1.2908 + * UChar* constructor. 1.2909 + * @param text The characters to place in the UnicodeString. 1.2910 + * @param textLength The number of Unicode characters in <TT>text</TT> 1.2911 + * to copy. 1.2912 + * @stable ICU 2.0 1.2913 + */ 1.2914 + UnicodeString(const UChar *text, 1.2915 + int32_t textLength); 1.2916 + 1.2917 + /** 1.2918 + * Readonly-aliasing UChar* constructor. 1.2919 + * The text will be used for the UnicodeString object, but 1.2920 + * it will not be released when the UnicodeString is destroyed. 1.2921 + * This has copy-on-write semantics: 1.2922 + * When the string is modified, then the buffer is first copied into 1.2923 + * newly allocated memory. 1.2924 + * The aliased buffer is never modified. 1.2925 + * 1.2926 + * In an assignment to another UnicodeString, when using the copy constructor 1.2927 + * or the assignment operator, the text will be copied. 1.2928 + * When using fastCopyFrom(), the text will be aliased again, 1.2929 + * so that both strings then alias the same readonly-text. 1.2930 + * 1.2931 + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 1.2932 + * This must be true if <code>textLength==-1</code>. 1.2933 + * @param text The characters to alias for the UnicodeString. 1.2934 + * @param textLength The number of Unicode characters in <code>text</code> to alias. 1.2935 + * If -1, then this constructor will determine the length 1.2936 + * by calling <code>u_strlen()</code>. 1.2937 + * @stable ICU 2.0 1.2938 + */ 1.2939 + UnicodeString(UBool isTerminated, 1.2940 + const UChar *text, 1.2941 + int32_t textLength); 1.2942 + 1.2943 + /** 1.2944 + * Writable-aliasing UChar* constructor. 1.2945 + * The text will be used for the UnicodeString object, but 1.2946 + * it will not be released when the UnicodeString is destroyed. 1.2947 + * This has write-through semantics: 1.2948 + * For as long as the capacity of the buffer is sufficient, write operations 1.2949 + * will directly affect the buffer. When more capacity is necessary, then 1.2950 + * a new buffer will be allocated and the contents copied as with regularly 1.2951 + * constructed strings. 1.2952 + * In an assignment to another UnicodeString, the buffer will be copied. 1.2953 + * The extract(UChar *dst) function detects whether the dst pointer is the same 1.2954 + * as the string buffer itself and will in this case not copy the contents. 1.2955 + * 1.2956 + * @param buffer The characters to alias for the UnicodeString. 1.2957 + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 1.2958 + * @param buffCapacity The size of <code>buffer</code> in UChars. 1.2959 + * @stable ICU 2.0 1.2960 + */ 1.2961 + UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 1.2962 + 1.2963 +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1.2964 + 1.2965 + /** 1.2966 + * char* constructor. 1.2967 + * Uses the default converter (and thus depends on the ICU conversion code) 1.2968 + * unless U_CHARSET_IS_UTF8 is set to 1. 1.2969 + * 1.2970 + * For ASCII (really "invariant character") strings it is more efficient to use 1.2971 + * the constructor that takes a US_INV (for its enum EInvariant). 1.2972 + * For ASCII (invariant-character) string literals, see UNICODE_STRING and 1.2973 + * UNICODE_STRING_SIMPLE. 1.2974 + * 1.2975 + * It is recommended to mark this constructor "explicit" by 1.2976 + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 1.2977 + * on the compiler command line or similar. 1.2978 + * @param codepageData an array of bytes, null-terminated, 1.2979 + * in the platform's default codepage. 1.2980 + * @stable ICU 2.0 1.2981 + * @see UNICODE_STRING 1.2982 + * @see UNICODE_STRING_SIMPLE 1.2983 + */ 1.2984 + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 1.2985 + 1.2986 + /** 1.2987 + * char* constructor. 1.2988 + * Uses the default converter (and thus depends on the ICU conversion code) 1.2989 + * unless U_CHARSET_IS_UTF8 is set to 1. 1.2990 + * @param codepageData an array of bytes in the platform's default codepage. 1.2991 + * @param dataLength The number of bytes in <TT>codepageData</TT>. 1.2992 + * @stable ICU 2.0 1.2993 + */ 1.2994 + UnicodeString(const char *codepageData, int32_t dataLength); 1.2995 + 1.2996 +#endif 1.2997 + 1.2998 +#if !UCONFIG_NO_CONVERSION 1.2999 + 1.3000 + /** 1.3001 + * char* constructor. 1.3002 + * @param codepageData an array of bytes, null-terminated 1.3003 + * @param codepage the encoding of <TT>codepageData</TT>. The special 1.3004 + * value 0 for <TT>codepage</TT> indicates that the text is in the 1.3005 + * platform's default codepage. 1.3006 + * 1.3007 + * If <code>codepage</code> is an empty string (<code>""</code>), 1.3008 + * then a simple conversion is performed on the codepage-invariant 1.3009 + * subset ("invariant characters") of the platform encoding. See utypes.h. 1.3010 + * Recommendation: For invariant-character strings use the constructor 1.3011 + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 1.3012 + * because it avoids object code dependencies of UnicodeString on 1.3013 + * the conversion code. 1.3014 + * 1.3015 + * @stable ICU 2.0 1.3016 + */ 1.3017 + UnicodeString(const char *codepageData, const char *codepage); 1.3018 + 1.3019 + /** 1.3020 + * char* constructor. 1.3021 + * @param codepageData an array of bytes. 1.3022 + * @param dataLength The number of bytes in <TT>codepageData</TT>. 1.3023 + * @param codepage the encoding of <TT>codepageData</TT>. The special 1.3024 + * value 0 for <TT>codepage</TT> indicates that the text is in the 1.3025 + * platform's default codepage. 1.3026 + * If <code>codepage</code> is an empty string (<code>""</code>), 1.3027 + * then a simple conversion is performed on the codepage-invariant 1.3028 + * subset ("invariant characters") of the platform encoding. See utypes.h. 1.3029 + * Recommendation: For invariant-character strings use the constructor 1.3030 + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 1.3031 + * because it avoids object code dependencies of UnicodeString on 1.3032 + * the conversion code. 1.3033 + * 1.3034 + * @stable ICU 2.0 1.3035 + */ 1.3036 + UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 1.3037 + 1.3038 + /** 1.3039 + * char * / UConverter constructor. 1.3040 + * This constructor uses an existing UConverter object to 1.3041 + * convert the codepage string to Unicode and construct a UnicodeString 1.3042 + * from that. 1.3043 + * 1.3044 + * The converter is reset at first. 1.3045 + * If the error code indicates a failure before this constructor is called, 1.3046 + * or if an error occurs during conversion or construction, 1.3047 + * then the string will be bogus. 1.3048 + * 1.3049 + * This function avoids the overhead of opening and closing a converter if 1.3050 + * multiple strings are constructed. 1.3051 + * 1.3052 + * @param src input codepage string 1.3053 + * @param srcLength length of the input string, can be -1 for NUL-terminated strings 1.3054 + * @param cnv converter object (ucnv_resetToUnicode() will be called), 1.3055 + * can be NULL for the default converter 1.3056 + * @param errorCode normal ICU error code 1.3057 + * @stable ICU 2.0 1.3058 + */ 1.3059 + UnicodeString( 1.3060 + const char *src, int32_t srcLength, 1.3061 + UConverter *cnv, 1.3062 + UErrorCode &errorCode); 1.3063 + 1.3064 +#endif 1.3065 + 1.3066 + /** 1.3067 + * Constructs a Unicode string from an invariant-character char * string. 1.3068 + * About invariant characters see utypes.h. 1.3069 + * This constructor has no runtime dependency on conversion code and is 1.3070 + * therefore recommended over ones taking a charset name string 1.3071 + * (where the empty string "" indicates invariant-character conversion). 1.3072 + * 1.3073 + * Use the macro US_INV as the third, signature-distinguishing parameter. 1.3074 + * 1.3075 + * For example: 1.3076 + * \code 1.3077 + * void fn(const char *s) { 1.3078 + * UnicodeString ustr(s, -1, US_INV); 1.3079 + * // use ustr ... 1.3080 + * } 1.3081 + * \endcode 1.3082 + * 1.3083 + * @param src String using only invariant characters. 1.3084 + * @param length Length of src, or -1 if NUL-terminated. 1.3085 + * @param inv Signature-distinguishing paramater, use US_INV. 1.3086 + * 1.3087 + * @see US_INV 1.3088 + * @stable ICU 3.2 1.3089 + */ 1.3090 + UnicodeString(const char *src, int32_t length, enum EInvariant inv); 1.3091 + 1.3092 + 1.3093 + /** 1.3094 + * Copy constructor. 1.3095 + * @param that The UnicodeString object to copy. 1.3096 + * @stable ICU 2.0 1.3097 + */ 1.3098 + UnicodeString(const UnicodeString& that); 1.3099 + 1.3100 + /** 1.3101 + * 'Substring' constructor from tail of source string. 1.3102 + * @param src The UnicodeString object to copy. 1.3103 + * @param srcStart The offset into <tt>src</tt> at which to start copying. 1.3104 + * @stable ICU 2.2 1.3105 + */ 1.3106 + UnicodeString(const UnicodeString& src, int32_t srcStart); 1.3107 + 1.3108 + /** 1.3109 + * 'Substring' constructor from subrange of source string. 1.3110 + * @param src The UnicodeString object to copy. 1.3111 + * @param srcStart The offset into <tt>src</tt> at which to start copying. 1.3112 + * @param srcLength The number of characters from <tt>src</tt> to copy. 1.3113 + * @stable ICU 2.2 1.3114 + */ 1.3115 + UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 1.3116 + 1.3117 + /** 1.3118 + * Clone this object, an instance of a subclass of Replaceable. 1.3119 + * Clones can be used concurrently in multiple threads. 1.3120 + * If a subclass does not implement clone(), or if an error occurs, 1.3121 + * then NULL is returned. 1.3122 + * The clone functions in all subclasses return a pointer to a Replaceable 1.3123 + * because some compilers do not support covariant (same-as-this) 1.3124 + * return types; cast to the appropriate subclass if necessary. 1.3125 + * The caller must delete the clone. 1.3126 + * 1.3127 + * @return a clone of this object 1.3128 + * 1.3129 + * @see Replaceable::clone 1.3130 + * @see getDynamicClassID 1.3131 + * @stable ICU 2.6 1.3132 + */ 1.3133 + virtual Replaceable *clone() const; 1.3134 + 1.3135 + /** Destructor. 1.3136 + * @stable ICU 2.0 1.3137 + */ 1.3138 + virtual ~UnicodeString(); 1.3139 + 1.3140 + /** 1.3141 + * Create a UnicodeString from a UTF-8 string. 1.3142 + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 1.3143 + * Calls u_strFromUTF8WithSub(). 1.3144 + * 1.3145 + * @param utf8 UTF-8 input string. 1.3146 + * Note that a StringPiece can be implicitly constructed 1.3147 + * from a std::string or a NUL-terminated const char * string. 1.3148 + * @return A UnicodeString with equivalent UTF-16 contents. 1.3149 + * @see toUTF8 1.3150 + * @see toUTF8String 1.3151 + * @stable ICU 4.2 1.3152 + */ 1.3153 + static UnicodeString fromUTF8(const StringPiece &utf8); 1.3154 + 1.3155 + /** 1.3156 + * Create a UnicodeString from a UTF-32 string. 1.3157 + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 1.3158 + * Calls u_strFromUTF32WithSub(). 1.3159 + * 1.3160 + * @param utf32 UTF-32 input string. Must not be NULL. 1.3161 + * @param length Length of the input string, or -1 if NUL-terminated. 1.3162 + * @return A UnicodeString with equivalent UTF-16 contents. 1.3163 + * @see toUTF32 1.3164 + * @stable ICU 4.2 1.3165 + */ 1.3166 + static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 1.3167 + 1.3168 + /* Miscellaneous operations */ 1.3169 + 1.3170 + /** 1.3171 + * Unescape a string of characters and return a string containing 1.3172 + * the result. The following escape sequences are recognized: 1.3173 + * 1.3174 + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 1.3175 + * \\Uhhhhhhhh 8 hex digits 1.3176 + * \\xhh 1-2 hex digits 1.3177 + * \\ooo 1-3 octal digits; o in [0-7] 1.3178 + * \\cX control-X; X is masked with 0x1F 1.3179 + * 1.3180 + * as well as the standard ANSI C escapes: 1.3181 + * 1.3182 + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 1.3183 + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 1.3184 + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 1.3185 + * 1.3186 + * Anything else following a backslash is generically escaped. For 1.3187 + * example, "[a\\-z]" returns "[a-z]". 1.3188 + * 1.3189 + * If an escape sequence is ill-formed, this method returns an empty 1.3190 + * string. An example of an ill-formed sequence is "\\u" followed by 1.3191 + * fewer than 4 hex digits. 1.3192 + * 1.3193 + * This function is similar to u_unescape() but not identical to it. 1.3194 + * The latter takes a source char*, so it does escape recognition 1.3195 + * and also invariant conversion. 1.3196 + * 1.3197 + * @return a string with backslash escapes interpreted, or an 1.3198 + * empty string on error. 1.3199 + * @see UnicodeString#unescapeAt() 1.3200 + * @see u_unescape() 1.3201 + * @see u_unescapeAt() 1.3202 + * @stable ICU 2.0 1.3203 + */ 1.3204 + UnicodeString unescape() const; 1.3205 + 1.3206 + /** 1.3207 + * Unescape a single escape sequence and return the represented 1.3208 + * character. See unescape() for a listing of the recognized escape 1.3209 + * sequences. The character at offset-1 is assumed (without 1.3210 + * checking) to be a backslash. If the escape sequence is 1.3211 + * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 1.3212 + * returned. 1.3213 + * 1.3214 + * @param offset an input output parameter. On input, it is the 1.3215 + * offset into this string where the escape sequence is located, 1.3216 + * after the initial backslash. On output, it is advanced after the 1.3217 + * last character parsed. On error, it is not advanced at all. 1.3218 + * @return the character represented by the escape sequence at 1.3219 + * offset, or U_SENTINEL=-1 on error. 1.3220 + * @see UnicodeString#unescape() 1.3221 + * @see u_unescape() 1.3222 + * @see u_unescapeAt() 1.3223 + * @stable ICU 2.0 1.3224 + */ 1.3225 + UChar32 unescapeAt(int32_t &offset) const; 1.3226 + 1.3227 + /** 1.3228 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.3229 + * 1.3230 + * @stable ICU 2.2 1.3231 + */ 1.3232 + static UClassID U_EXPORT2 getStaticClassID(); 1.3233 + 1.3234 + /** 1.3235 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.3236 + * 1.3237 + * @stable ICU 2.2 1.3238 + */ 1.3239 + virtual UClassID getDynamicClassID() const; 1.3240 + 1.3241 + //======================================== 1.3242 + // Implementation methods 1.3243 + //======================================== 1.3244 + 1.3245 +protected: 1.3246 + /** 1.3247 + * Implement Replaceable::getLength() (see jitterbug 1027). 1.3248 + * @stable ICU 2.4 1.3249 + */ 1.3250 + virtual int32_t getLength() const; 1.3251 + 1.3252 + /** 1.3253 + * The change in Replaceable to use virtual getCharAt() allows 1.3254 + * UnicodeString::charAt() to be inline again (see jitterbug 709). 1.3255 + * @stable ICU 2.4 1.3256 + */ 1.3257 + virtual UChar getCharAt(int32_t offset) const; 1.3258 + 1.3259 + /** 1.3260 + * The change in Replaceable to use virtual getChar32At() allows 1.3261 + * UnicodeString::char32At() to be inline again (see jitterbug 709). 1.3262 + * @stable ICU 2.4 1.3263 + */ 1.3264 + virtual UChar32 getChar32At(int32_t offset) const; 1.3265 + 1.3266 +private: 1.3267 + // For char* constructors. Could be made public. 1.3268 + UnicodeString &setToUTF8(const StringPiece &utf8); 1.3269 + // For extract(char*). 1.3270 + // We could make a toUTF8(target, capacity, errorCode) public but not 1.3271 + // this version: New API will be cleaner if we make callers create substrings 1.3272 + // rather than having start+length on every method, 1.3273 + // and it should take a UErrorCode&. 1.3274 + int32_t 1.3275 + toUTF8(int32_t start, int32_t len, 1.3276 + char *target, int32_t capacity) const; 1.3277 + 1.3278 + /** 1.3279 + * Internal string contents comparison, called by operator==. 1.3280 + * Requires: this & text not bogus and have same lengths. 1.3281 + */ 1.3282 + UBool doEquals(const UnicodeString &text, int32_t len) const; 1.3283 + 1.3284 + inline int8_t 1.3285 + doCompare(int32_t start, 1.3286 + int32_t length, 1.3287 + const UnicodeString& srcText, 1.3288 + int32_t srcStart, 1.3289 + int32_t srcLength) const; 1.3290 + 1.3291 + int8_t doCompare(int32_t start, 1.3292 + int32_t length, 1.3293 + const UChar *srcChars, 1.3294 + int32_t srcStart, 1.3295 + int32_t srcLength) const; 1.3296 + 1.3297 + inline int8_t 1.3298 + doCompareCodePointOrder(int32_t start, 1.3299 + int32_t length, 1.3300 + const UnicodeString& srcText, 1.3301 + int32_t srcStart, 1.3302 + int32_t srcLength) const; 1.3303 + 1.3304 + int8_t doCompareCodePointOrder(int32_t start, 1.3305 + int32_t length, 1.3306 + const UChar *srcChars, 1.3307 + int32_t srcStart, 1.3308 + int32_t srcLength) const; 1.3309 + 1.3310 + inline int8_t 1.3311 + doCaseCompare(int32_t start, 1.3312 + int32_t length, 1.3313 + const UnicodeString &srcText, 1.3314 + int32_t srcStart, 1.3315 + int32_t srcLength, 1.3316 + uint32_t options) const; 1.3317 + 1.3318 + int8_t 1.3319 + doCaseCompare(int32_t start, 1.3320 + int32_t length, 1.3321 + const UChar *srcChars, 1.3322 + int32_t srcStart, 1.3323 + int32_t srcLength, 1.3324 + uint32_t options) const; 1.3325 + 1.3326 + int32_t doIndexOf(UChar c, 1.3327 + int32_t start, 1.3328 + int32_t length) const; 1.3329 + 1.3330 + int32_t doIndexOf(UChar32 c, 1.3331 + int32_t start, 1.3332 + int32_t length) const; 1.3333 + 1.3334 + int32_t doLastIndexOf(UChar c, 1.3335 + int32_t start, 1.3336 + int32_t length) const; 1.3337 + 1.3338 + int32_t doLastIndexOf(UChar32 c, 1.3339 + int32_t start, 1.3340 + int32_t length) const; 1.3341 + 1.3342 + void doExtract(int32_t start, 1.3343 + int32_t length, 1.3344 + UChar *dst, 1.3345 + int32_t dstStart) const; 1.3346 + 1.3347 + inline void doExtract(int32_t start, 1.3348 + int32_t length, 1.3349 + UnicodeString& target) const; 1.3350 + 1.3351 + inline UChar doCharAt(int32_t offset) const; 1.3352 + 1.3353 + UnicodeString& doReplace(int32_t start, 1.3354 + int32_t length, 1.3355 + const UnicodeString& srcText, 1.3356 + int32_t srcStart, 1.3357 + int32_t srcLength); 1.3358 + 1.3359 + UnicodeString& doReplace(int32_t start, 1.3360 + int32_t length, 1.3361 + const UChar *srcChars, 1.3362 + int32_t srcStart, 1.3363 + int32_t srcLength); 1.3364 + 1.3365 + UnicodeString& doReverse(int32_t start, 1.3366 + int32_t length); 1.3367 + 1.3368 + // calculate hash code 1.3369 + int32_t doHashCode(void) const; 1.3370 + 1.3371 + // get pointer to start of array 1.3372 + // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 1.3373 + inline UChar* getArrayStart(void); 1.3374 + inline const UChar* getArrayStart(void) const; 1.3375 + 1.3376 + // A UnicodeString object (not necessarily its current buffer) 1.3377 + // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 1.3378 + inline UBool isWritable() const; 1.3379 + 1.3380 + // Is the current buffer writable? 1.3381 + inline UBool isBufferWritable() const; 1.3382 + 1.3383 + // None of the following does releaseArray(). 1.3384 + inline void setLength(int32_t len); // sets only fShortLength and fLength 1.3385 + inline void setToEmpty(); // sets fFlags=kShortString 1.3386 + inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 1.3387 + 1.3388 + // allocate the array; result may be fStackBuffer 1.3389 + // sets refCount to 1 if appropriate 1.3390 + // sets fArray, fCapacity, and fFlags 1.3391 + // returns boolean for success or failure 1.3392 + UBool allocate(int32_t capacity); 1.3393 + 1.3394 + // release the array if owned 1.3395 + void releaseArray(void); 1.3396 + 1.3397 + // turn a bogus string into an empty one 1.3398 + void unBogus(); 1.3399 + 1.3400 + // implements assigment operator, copy constructor, and fastCopyFrom() 1.3401 + UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 1.3402 + 1.3403 + // Pin start and limit to acceptable values. 1.3404 + inline void pinIndex(int32_t& start) const; 1.3405 + inline void pinIndices(int32_t& start, 1.3406 + int32_t& length) const; 1.3407 + 1.3408 +#if !UCONFIG_NO_CONVERSION 1.3409 + 1.3410 + /* Internal extract() using UConverter. */ 1.3411 + int32_t doExtract(int32_t start, int32_t length, 1.3412 + char *dest, int32_t destCapacity, 1.3413 + UConverter *cnv, 1.3414 + UErrorCode &errorCode) const; 1.3415 + 1.3416 + /* 1.3417 + * Real constructor for converting from codepage data. 1.3418 + * It assumes that it is called with !fRefCounted. 1.3419 + * 1.3420 + * If <code>codepage==0</code>, then the default converter 1.3421 + * is used for the platform encoding. 1.3422 + * If <code>codepage</code> is an empty string (<code>""</code>), 1.3423 + * then a simple conversion is performed on the codepage-invariant 1.3424 + * subset ("invariant characters") of the platform encoding. See utypes.h. 1.3425 + */ 1.3426 + void doCodepageCreate(const char *codepageData, 1.3427 + int32_t dataLength, 1.3428 + const char *codepage); 1.3429 + 1.3430 + /* 1.3431 + * Worker function for creating a UnicodeString from 1.3432 + * a codepage string using a UConverter. 1.3433 + */ 1.3434 + void 1.3435 + doCodepageCreate(const char *codepageData, 1.3436 + int32_t dataLength, 1.3437 + UConverter *converter, 1.3438 + UErrorCode &status); 1.3439 + 1.3440 +#endif 1.3441 + 1.3442 + /* 1.3443 + * This function is called when write access to the array 1.3444 + * is necessary. 1.3445 + * 1.3446 + * We need to make a copy of the array if 1.3447 + * the buffer is read-only, or 1.3448 + * the buffer is refCounted (shared), and refCount>1, or 1.3449 + * the buffer is too small. 1.3450 + * 1.3451 + * Return FALSE if memory could not be allocated. 1.3452 + */ 1.3453 + UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 1.3454 + int32_t growCapacity = -1, 1.3455 + UBool doCopyArray = TRUE, 1.3456 + int32_t **pBufferToDelete = 0, 1.3457 + UBool forceClone = FALSE); 1.3458 + 1.3459 + /** 1.3460 + * Common function for UnicodeString case mappings. 1.3461 + * The stringCaseMapper has the same type UStringCaseMapper 1.3462 + * as in ustr_imp.h for ustrcase_map(). 1.3463 + */ 1.3464 + UnicodeString & 1.3465 + caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 1.3466 + 1.3467 + // ref counting 1.3468 + void addRef(void); 1.3469 + int32_t removeRef(void); 1.3470 + int32_t refCount(void) const; 1.3471 + 1.3472 + // constants 1.3473 + enum { 1.3474 + // Set the stack buffer size so that sizeof(UnicodeString) is, 1.3475 + // naturally (without padding), a multiple of sizeof(pointer). 1.3476 + US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 1.3477 + kInvalidUChar=0xffff, // invalid UChar index 1.3478 + kGrowSize=128, // grow size for this buffer 1.3479 + kInvalidHashCode=0, // invalid hash code 1.3480 + kEmptyHashCode=1, // hash code for empty string 1.3481 + 1.3482 + // bit flag values for fFlags 1.3483 + kIsBogus=1, // this string is bogus, i.e., not valid or NULL 1.3484 + kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 1.3485 + kRefCounted=4, // there is a refCount field before the characters in fArray 1.3486 + kBufferIsReadonly=8,// do not write to this buffer 1.3487 + kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 1.3488 + // and releaseBuffer(newLength) must be called 1.3489 + 1.3490 + // combined values for convenience 1.3491 + kShortString=kUsingStackBuffer, 1.3492 + kLongString=kRefCounted, 1.3493 + kReadonlyAlias=kBufferIsReadonly, 1.3494 + kWritableAlias=0 1.3495 + }; 1.3496 + 1.3497 + friend class StringThreadTest; 1.3498 + friend class UnicodeStringAppendable; 1.3499 + 1.3500 + union StackBufferOrFields; // forward declaration necessary before friend declaration 1.3501 + friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 1.3502 + 1.3503 + /* 1.3504 + * The following are all the class fields that are stored 1.3505 + * in each UnicodeString object. 1.3506 + * Note that UnicodeString has virtual functions, 1.3507 + * therefore there is an implicit vtable pointer 1.3508 + * as the first real field. 1.3509 + * The fields should be aligned such that no padding is necessary. 1.3510 + * On 32-bit machines, the size should be 32 bytes, 1.3511 + * on 64-bit machines (8-byte pointers), it should be 40 bytes. 1.3512 + * 1.3513 + * We use a hack to achieve this. 1.3514 + * 1.3515 + * With at least some compilers, each of the following is forced to 1.3516 + * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 1.3517 + * rounded up with additional padding if the fields do not already fit that requirement: 1.3518 + * - sizeof(class UnicodeString) 1.3519 + * - offsetof(UnicodeString, fUnion) 1.3520 + * - sizeof(fUnion) 1.3521 + * - sizeof(fFields) 1.3522 + * 1.3523 + * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 1.3524 + * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 1.3525 + * (Padding at the end of fFields is ok: 1.3526 + * As long as there is no padding after fStackBuffer, it is not wasted space.) 1.3527 + * 1.3528 + * We further assume that the compiler does not reorder the fields, 1.3529 + * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 1.3530 + * with at most some padding (but no other field) in between. 1.3531 + * (Padding there would be wasted space, but functionally harmless.) 1.3532 + * 1.3533 + * We use a few more sizeof(pointer)'s chunks of space with 1.3534 + * fRestOfStackBuffer, fShortLength and fFlags, 1.3535 + * to get up exactly to the intended sizeof(UnicodeString). 1.3536 + */ 1.3537 + // (implicit) *vtable; 1.3538 + union StackBufferOrFields { 1.3539 + // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 1.3540 + // else fFields is used 1.3541 + UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 1.3542 + struct { 1.3543 + UChar *fArray; // the Unicode data 1.3544 + int32_t fCapacity; // capacity of fArray (in UChars) 1.3545 + int32_t fLength; // number of characters in fArray if >127; else undefined 1.3546 + } fFields; 1.3547 + } fUnion; 1.3548 + UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 1.3549 + int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 1.3550 + uint8_t fFlags; // bit flags: see constants above 1.3551 +}; 1.3552 + 1.3553 +/** 1.3554 + * Create a new UnicodeString with the concatenation of two others. 1.3555 + * 1.3556 + * @param s1 The first string to be copied to the new one. 1.3557 + * @param s2 The second string to be copied to the new one, after s1. 1.3558 + * @return UnicodeString(s1).append(s2) 1.3559 + * @stable ICU 2.8 1.3560 + */ 1.3561 +U_COMMON_API UnicodeString U_EXPORT2 1.3562 +operator+ (const UnicodeString &s1, const UnicodeString &s2); 1.3563 + 1.3564 +//======================================== 1.3565 +// Inline members 1.3566 +//======================================== 1.3567 + 1.3568 +//======================================== 1.3569 +// Privates 1.3570 +//======================================== 1.3571 + 1.3572 +inline void 1.3573 +UnicodeString::pinIndex(int32_t& start) const 1.3574 +{ 1.3575 + // pin index 1.3576 + if(start < 0) { 1.3577 + start = 0; 1.3578 + } else if(start > length()) { 1.3579 + start = length(); 1.3580 + } 1.3581 +} 1.3582 + 1.3583 +inline void 1.3584 +UnicodeString::pinIndices(int32_t& start, 1.3585 + int32_t& _length) const 1.3586 +{ 1.3587 + // pin indices 1.3588 + int32_t len = length(); 1.3589 + if(start < 0) { 1.3590 + start = 0; 1.3591 + } else if(start > len) { 1.3592 + start = len; 1.3593 + } 1.3594 + if(_length < 0) { 1.3595 + _length = 0; 1.3596 + } else if(_length > (len - start)) { 1.3597 + _length = (len - start); 1.3598 + } 1.3599 +} 1.3600 + 1.3601 +inline UChar* 1.3602 +UnicodeString::getArrayStart() 1.3603 +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 1.3604 + 1.3605 +inline const UChar* 1.3606 +UnicodeString::getArrayStart() const 1.3607 +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 1.3608 + 1.3609 +//======================================== 1.3610 +// Default constructor 1.3611 +//======================================== 1.3612 + 1.3613 +inline 1.3614 +UnicodeString::UnicodeString() 1.3615 + : fShortLength(0), 1.3616 + fFlags(kShortString) 1.3617 +{} 1.3618 + 1.3619 +//======================================== 1.3620 +// Read-only implementation methods 1.3621 +//======================================== 1.3622 +inline int32_t 1.3623 +UnicodeString::length() const 1.3624 +{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 1.3625 + 1.3626 +inline int32_t 1.3627 +UnicodeString::getCapacity() const 1.3628 +{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 1.3629 + 1.3630 +inline int32_t 1.3631 +UnicodeString::hashCode() const 1.3632 +{ return doHashCode(); } 1.3633 + 1.3634 +inline UBool 1.3635 +UnicodeString::isBogus() const 1.3636 +{ return (UBool)(fFlags & kIsBogus); } 1.3637 + 1.3638 +inline UBool 1.3639 +UnicodeString::isWritable() const 1.3640 +{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 1.3641 + 1.3642 +inline UBool 1.3643 +UnicodeString::isBufferWritable() const 1.3644 +{ 1.3645 + return (UBool)( 1.3646 + !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 1.3647 + (!(fFlags&kRefCounted) || refCount()==1)); 1.3648 +} 1.3649 + 1.3650 +inline const UChar * 1.3651 +UnicodeString::getBuffer() const { 1.3652 + if(fFlags&(kIsBogus|kOpenGetBuffer)) { 1.3653 + return 0; 1.3654 + } else if(fFlags&kUsingStackBuffer) { 1.3655 + return fUnion.fStackBuffer; 1.3656 + } else { 1.3657 + return fUnion.fFields.fArray; 1.3658 + } 1.3659 +} 1.3660 + 1.3661 +//======================================== 1.3662 +// Read-only alias methods 1.3663 +//======================================== 1.3664 +inline int8_t 1.3665 +UnicodeString::doCompare(int32_t start, 1.3666 + int32_t thisLength, 1.3667 + const UnicodeString& srcText, 1.3668 + int32_t srcStart, 1.3669 + int32_t srcLength) const 1.3670 +{ 1.3671 + if(srcText.isBogus()) { 1.3672 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 1.3673 + } else { 1.3674 + srcText.pinIndices(srcStart, srcLength); 1.3675 + return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 1.3676 + } 1.3677 +} 1.3678 + 1.3679 +inline UBool 1.3680 +UnicodeString::operator== (const UnicodeString& text) const 1.3681 +{ 1.3682 + if(isBogus()) { 1.3683 + return text.isBogus(); 1.3684 + } else { 1.3685 + int32_t len = length(), textLength = text.length(); 1.3686 + return !text.isBogus() && len == textLength && doEquals(text, len); 1.3687 + } 1.3688 +} 1.3689 + 1.3690 +inline UBool 1.3691 +UnicodeString::operator!= (const UnicodeString& text) const 1.3692 +{ return (! operator==(text)); } 1.3693 + 1.3694 +inline UBool 1.3695 +UnicodeString::operator> (const UnicodeString& text) const 1.3696 +{ return doCompare(0, length(), text, 0, text.length()) == 1; } 1.3697 + 1.3698 +inline UBool 1.3699 +UnicodeString::operator< (const UnicodeString& text) const 1.3700 +{ return doCompare(0, length(), text, 0, text.length()) == -1; } 1.3701 + 1.3702 +inline UBool 1.3703 +UnicodeString::operator>= (const UnicodeString& text) const 1.3704 +{ return doCompare(0, length(), text, 0, text.length()) != -1; } 1.3705 + 1.3706 +inline UBool 1.3707 +UnicodeString::operator<= (const UnicodeString& text) const 1.3708 +{ return doCompare(0, length(), text, 0, text.length()) != 1; } 1.3709 + 1.3710 +inline int8_t 1.3711 +UnicodeString::compare(const UnicodeString& text) const 1.3712 +{ return doCompare(0, length(), text, 0, text.length()); } 1.3713 + 1.3714 +inline int8_t 1.3715 +UnicodeString::compare(int32_t start, 1.3716 + int32_t _length, 1.3717 + const UnicodeString& srcText) const 1.3718 +{ return doCompare(start, _length, srcText, 0, srcText.length()); } 1.3719 + 1.3720 +inline int8_t 1.3721 +UnicodeString::compare(const UChar *srcChars, 1.3722 + int32_t srcLength) const 1.3723 +{ return doCompare(0, length(), srcChars, 0, srcLength); } 1.3724 + 1.3725 +inline int8_t 1.3726 +UnicodeString::compare(int32_t start, 1.3727 + int32_t _length, 1.3728 + const UnicodeString& srcText, 1.3729 + int32_t srcStart, 1.3730 + int32_t srcLength) const 1.3731 +{ return doCompare(start, _length, srcText, srcStart, srcLength); } 1.3732 + 1.3733 +inline int8_t 1.3734 +UnicodeString::compare(int32_t start, 1.3735 + int32_t _length, 1.3736 + const UChar *srcChars) const 1.3737 +{ return doCompare(start, _length, srcChars, 0, _length); } 1.3738 + 1.3739 +inline int8_t 1.3740 +UnicodeString::compare(int32_t start, 1.3741 + int32_t _length, 1.3742 + const UChar *srcChars, 1.3743 + int32_t srcStart, 1.3744 + int32_t srcLength) const 1.3745 +{ return doCompare(start, _length, srcChars, srcStart, srcLength); } 1.3746 + 1.3747 +inline int8_t 1.3748 +UnicodeString::compareBetween(int32_t start, 1.3749 + int32_t limit, 1.3750 + const UnicodeString& srcText, 1.3751 + int32_t srcStart, 1.3752 + int32_t srcLimit) const 1.3753 +{ return doCompare(start, limit - start, 1.3754 + srcText, srcStart, srcLimit - srcStart); } 1.3755 + 1.3756 +inline int8_t 1.3757 +UnicodeString::doCompareCodePointOrder(int32_t start, 1.3758 + int32_t thisLength, 1.3759 + const UnicodeString& srcText, 1.3760 + int32_t srcStart, 1.3761 + int32_t srcLength) const 1.3762 +{ 1.3763 + if(srcText.isBogus()) { 1.3764 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 1.3765 + } else { 1.3766 + srcText.pinIndices(srcStart, srcLength); 1.3767 + return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 1.3768 + } 1.3769 +} 1.3770 + 1.3771 +inline int8_t 1.3772 +UnicodeString::compareCodePointOrder(const UnicodeString& text) const 1.3773 +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 1.3774 + 1.3775 +inline int8_t 1.3776 +UnicodeString::compareCodePointOrder(int32_t start, 1.3777 + int32_t _length, 1.3778 + const UnicodeString& srcText) const 1.3779 +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 1.3780 + 1.3781 +inline int8_t 1.3782 +UnicodeString::compareCodePointOrder(const UChar *srcChars, 1.3783 + int32_t srcLength) const 1.3784 +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 1.3785 + 1.3786 +inline int8_t 1.3787 +UnicodeString::compareCodePointOrder(int32_t start, 1.3788 + int32_t _length, 1.3789 + const UnicodeString& srcText, 1.3790 + int32_t srcStart, 1.3791 + int32_t srcLength) const 1.3792 +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 1.3793 + 1.3794 +inline int8_t 1.3795 +UnicodeString::compareCodePointOrder(int32_t start, 1.3796 + int32_t _length, 1.3797 + const UChar *srcChars) const 1.3798 +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 1.3799 + 1.3800 +inline int8_t 1.3801 +UnicodeString::compareCodePointOrder(int32_t start, 1.3802 + int32_t _length, 1.3803 + const UChar *srcChars, 1.3804 + int32_t srcStart, 1.3805 + int32_t srcLength) const 1.3806 +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 1.3807 + 1.3808 +inline int8_t 1.3809 +UnicodeString::compareCodePointOrderBetween(int32_t start, 1.3810 + int32_t limit, 1.3811 + const UnicodeString& srcText, 1.3812 + int32_t srcStart, 1.3813 + int32_t srcLimit) const 1.3814 +{ return doCompareCodePointOrder(start, limit - start, 1.3815 + srcText, srcStart, srcLimit - srcStart); } 1.3816 + 1.3817 +inline int8_t 1.3818 +UnicodeString::doCaseCompare(int32_t start, 1.3819 + int32_t thisLength, 1.3820 + const UnicodeString &srcText, 1.3821 + int32_t srcStart, 1.3822 + int32_t srcLength, 1.3823 + uint32_t options) const 1.3824 +{ 1.3825 + if(srcText.isBogus()) { 1.3826 + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 1.3827 + } else { 1.3828 + srcText.pinIndices(srcStart, srcLength); 1.3829 + return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 1.3830 + } 1.3831 +} 1.3832 + 1.3833 +inline int8_t 1.3834 +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 1.3835 + return doCaseCompare(0, length(), text, 0, text.length(), options); 1.3836 +} 1.3837 + 1.3838 +inline int8_t 1.3839 +UnicodeString::caseCompare(int32_t start, 1.3840 + int32_t _length, 1.3841 + const UnicodeString &srcText, 1.3842 + uint32_t options) const { 1.3843 + return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 1.3844 +} 1.3845 + 1.3846 +inline int8_t 1.3847 +UnicodeString::caseCompare(const UChar *srcChars, 1.3848 + int32_t srcLength, 1.3849 + uint32_t options) const { 1.3850 + return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 1.3851 +} 1.3852 + 1.3853 +inline int8_t 1.3854 +UnicodeString::caseCompare(int32_t start, 1.3855 + int32_t _length, 1.3856 + const UnicodeString &srcText, 1.3857 + int32_t srcStart, 1.3858 + int32_t srcLength, 1.3859 + uint32_t options) const { 1.3860 + return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 1.3861 +} 1.3862 + 1.3863 +inline int8_t 1.3864 +UnicodeString::caseCompare(int32_t start, 1.3865 + int32_t _length, 1.3866 + const UChar *srcChars, 1.3867 + uint32_t options) const { 1.3868 + return doCaseCompare(start, _length, srcChars, 0, _length, options); 1.3869 +} 1.3870 + 1.3871 +inline int8_t 1.3872 +UnicodeString::caseCompare(int32_t start, 1.3873 + int32_t _length, 1.3874 + const UChar *srcChars, 1.3875 + int32_t srcStart, 1.3876 + int32_t srcLength, 1.3877 + uint32_t options) const { 1.3878 + return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 1.3879 +} 1.3880 + 1.3881 +inline int8_t 1.3882 +UnicodeString::caseCompareBetween(int32_t start, 1.3883 + int32_t limit, 1.3884 + const UnicodeString &srcText, 1.3885 + int32_t srcStart, 1.3886 + int32_t srcLimit, 1.3887 + uint32_t options) const { 1.3888 + return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 1.3889 +} 1.3890 + 1.3891 +inline int32_t 1.3892 +UnicodeString::indexOf(const UnicodeString& srcText, 1.3893 + int32_t srcStart, 1.3894 + int32_t srcLength, 1.3895 + int32_t start, 1.3896 + int32_t _length) const 1.3897 +{ 1.3898 + if(!srcText.isBogus()) { 1.3899 + srcText.pinIndices(srcStart, srcLength); 1.3900 + if(srcLength > 0) { 1.3901 + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 1.3902 + } 1.3903 + } 1.3904 + return -1; 1.3905 +} 1.3906 + 1.3907 +inline int32_t 1.3908 +UnicodeString::indexOf(const UnicodeString& text) const 1.3909 +{ return indexOf(text, 0, text.length(), 0, length()); } 1.3910 + 1.3911 +inline int32_t 1.3912 +UnicodeString::indexOf(const UnicodeString& text, 1.3913 + int32_t start) const { 1.3914 + pinIndex(start); 1.3915 + return indexOf(text, 0, text.length(), start, length() - start); 1.3916 +} 1.3917 + 1.3918 +inline int32_t 1.3919 +UnicodeString::indexOf(const UnicodeString& text, 1.3920 + int32_t start, 1.3921 + int32_t _length) const 1.3922 +{ return indexOf(text, 0, text.length(), start, _length); } 1.3923 + 1.3924 +inline int32_t 1.3925 +UnicodeString::indexOf(const UChar *srcChars, 1.3926 + int32_t srcLength, 1.3927 + int32_t start) const { 1.3928 + pinIndex(start); 1.3929 + return indexOf(srcChars, 0, srcLength, start, length() - start); 1.3930 +} 1.3931 + 1.3932 +inline int32_t 1.3933 +UnicodeString::indexOf(const UChar *srcChars, 1.3934 + int32_t srcLength, 1.3935 + int32_t start, 1.3936 + int32_t _length) const 1.3937 +{ return indexOf(srcChars, 0, srcLength, start, _length); } 1.3938 + 1.3939 +inline int32_t 1.3940 +UnicodeString::indexOf(UChar c, 1.3941 + int32_t start, 1.3942 + int32_t _length) const 1.3943 +{ return doIndexOf(c, start, _length); } 1.3944 + 1.3945 +inline int32_t 1.3946 +UnicodeString::indexOf(UChar32 c, 1.3947 + int32_t start, 1.3948 + int32_t _length) const 1.3949 +{ return doIndexOf(c, start, _length); } 1.3950 + 1.3951 +inline int32_t 1.3952 +UnicodeString::indexOf(UChar c) const 1.3953 +{ return doIndexOf(c, 0, length()); } 1.3954 + 1.3955 +inline int32_t 1.3956 +UnicodeString::indexOf(UChar32 c) const 1.3957 +{ return indexOf(c, 0, length()); } 1.3958 + 1.3959 +inline int32_t 1.3960 +UnicodeString::indexOf(UChar c, 1.3961 + int32_t start) const { 1.3962 + pinIndex(start); 1.3963 + return doIndexOf(c, start, length() - start); 1.3964 +} 1.3965 + 1.3966 +inline int32_t 1.3967 +UnicodeString::indexOf(UChar32 c, 1.3968 + int32_t start) const { 1.3969 + pinIndex(start); 1.3970 + return indexOf(c, start, length() - start); 1.3971 +} 1.3972 + 1.3973 +inline int32_t 1.3974 +UnicodeString::lastIndexOf(const UChar *srcChars, 1.3975 + int32_t srcLength, 1.3976 + int32_t start, 1.3977 + int32_t _length) const 1.3978 +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } 1.3979 + 1.3980 +inline int32_t 1.3981 +UnicodeString::lastIndexOf(const UChar *srcChars, 1.3982 + int32_t srcLength, 1.3983 + int32_t start) const { 1.3984 + pinIndex(start); 1.3985 + return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 1.3986 +} 1.3987 + 1.3988 +inline int32_t 1.3989 +UnicodeString::lastIndexOf(const UnicodeString& srcText, 1.3990 + int32_t srcStart, 1.3991 + int32_t srcLength, 1.3992 + int32_t start, 1.3993 + int32_t _length) const 1.3994 +{ 1.3995 + if(!srcText.isBogus()) { 1.3996 + srcText.pinIndices(srcStart, srcLength); 1.3997 + if(srcLength > 0) { 1.3998 + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 1.3999 + } 1.4000 + } 1.4001 + return -1; 1.4002 +} 1.4003 + 1.4004 +inline int32_t 1.4005 +UnicodeString::lastIndexOf(const UnicodeString& text, 1.4006 + int32_t start, 1.4007 + int32_t _length) const 1.4008 +{ return lastIndexOf(text, 0, text.length(), start, _length); } 1.4009 + 1.4010 +inline int32_t 1.4011 +UnicodeString::lastIndexOf(const UnicodeString& text, 1.4012 + int32_t start) const { 1.4013 + pinIndex(start); 1.4014 + return lastIndexOf(text, 0, text.length(), start, length() - start); 1.4015 +} 1.4016 + 1.4017 +inline int32_t 1.4018 +UnicodeString::lastIndexOf(const UnicodeString& text) const 1.4019 +{ return lastIndexOf(text, 0, text.length(), 0, length()); } 1.4020 + 1.4021 +inline int32_t 1.4022 +UnicodeString::lastIndexOf(UChar c, 1.4023 + int32_t start, 1.4024 + int32_t _length) const 1.4025 +{ return doLastIndexOf(c, start, _length); } 1.4026 + 1.4027 +inline int32_t 1.4028 +UnicodeString::lastIndexOf(UChar32 c, 1.4029 + int32_t start, 1.4030 + int32_t _length) const { 1.4031 + return doLastIndexOf(c, start, _length); 1.4032 +} 1.4033 + 1.4034 +inline int32_t 1.4035 +UnicodeString::lastIndexOf(UChar c) const 1.4036 +{ return doLastIndexOf(c, 0, length()); } 1.4037 + 1.4038 +inline int32_t 1.4039 +UnicodeString::lastIndexOf(UChar32 c) const { 1.4040 + return lastIndexOf(c, 0, length()); 1.4041 +} 1.4042 + 1.4043 +inline int32_t 1.4044 +UnicodeString::lastIndexOf(UChar c, 1.4045 + int32_t start) const { 1.4046 + pinIndex(start); 1.4047 + return doLastIndexOf(c, start, length() - start); 1.4048 +} 1.4049 + 1.4050 +inline int32_t 1.4051 +UnicodeString::lastIndexOf(UChar32 c, 1.4052 + int32_t start) const { 1.4053 + pinIndex(start); 1.4054 + return lastIndexOf(c, start, length() - start); 1.4055 +} 1.4056 + 1.4057 +inline UBool 1.4058 +UnicodeString::startsWith(const UnicodeString& text) const 1.4059 +{ return compare(0, text.length(), text, 0, text.length()) == 0; } 1.4060 + 1.4061 +inline UBool 1.4062 +UnicodeString::startsWith(const UnicodeString& srcText, 1.4063 + int32_t srcStart, 1.4064 + int32_t srcLength) const 1.4065 +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 1.4066 + 1.4067 +inline UBool 1.4068 +UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 1.4069 + if(srcLength < 0) { 1.4070 + srcLength = u_strlen(srcChars); 1.4071 + } 1.4072 + return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 1.4073 +} 1.4074 + 1.4075 +inline UBool 1.4076 +UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 1.4077 + if(srcLength < 0) { 1.4078 + srcLength = u_strlen(srcChars); 1.4079 + } 1.4080 + return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 1.4081 +} 1.4082 + 1.4083 +inline UBool 1.4084 +UnicodeString::endsWith(const UnicodeString& text) const 1.4085 +{ return doCompare(length() - text.length(), text.length(), 1.4086 + text, 0, text.length()) == 0; } 1.4087 + 1.4088 +inline UBool 1.4089 +UnicodeString::endsWith(const UnicodeString& srcText, 1.4090 + int32_t srcStart, 1.4091 + int32_t srcLength) const { 1.4092 + srcText.pinIndices(srcStart, srcLength); 1.4093 + return doCompare(length() - srcLength, srcLength, 1.4094 + srcText, srcStart, srcLength) == 0; 1.4095 +} 1.4096 + 1.4097 +inline UBool 1.4098 +UnicodeString::endsWith(const UChar *srcChars, 1.4099 + int32_t srcLength) const { 1.4100 + if(srcLength < 0) { 1.4101 + srcLength = u_strlen(srcChars); 1.4102 + } 1.4103 + return doCompare(length() - srcLength, srcLength, 1.4104 + srcChars, 0, srcLength) == 0; 1.4105 +} 1.4106 + 1.4107 +inline UBool 1.4108 +UnicodeString::endsWith(const UChar *srcChars, 1.4109 + int32_t srcStart, 1.4110 + int32_t srcLength) const { 1.4111 + if(srcLength < 0) { 1.4112 + srcLength = u_strlen(srcChars + srcStart); 1.4113 + } 1.4114 + return doCompare(length() - srcLength, srcLength, 1.4115 + srcChars, srcStart, srcLength) == 0; 1.4116 +} 1.4117 + 1.4118 +//======================================== 1.4119 +// replace 1.4120 +//======================================== 1.4121 +inline UnicodeString& 1.4122 +UnicodeString::replace(int32_t start, 1.4123 + int32_t _length, 1.4124 + const UnicodeString& srcText) 1.4125 +{ return doReplace(start, _length, srcText, 0, srcText.length()); } 1.4126 + 1.4127 +inline UnicodeString& 1.4128 +UnicodeString::replace(int32_t start, 1.4129 + int32_t _length, 1.4130 + const UnicodeString& srcText, 1.4131 + int32_t srcStart, 1.4132 + int32_t srcLength) 1.4133 +{ return doReplace(start, _length, srcText, srcStart, srcLength); } 1.4134 + 1.4135 +inline UnicodeString& 1.4136 +UnicodeString::replace(int32_t start, 1.4137 + int32_t _length, 1.4138 + const UChar *srcChars, 1.4139 + int32_t srcLength) 1.4140 +{ return doReplace(start, _length, srcChars, 0, srcLength); } 1.4141 + 1.4142 +inline UnicodeString& 1.4143 +UnicodeString::replace(int32_t start, 1.4144 + int32_t _length, 1.4145 + const UChar *srcChars, 1.4146 + int32_t srcStart, 1.4147 + int32_t srcLength) 1.4148 +{ return doReplace(start, _length, srcChars, srcStart, srcLength); } 1.4149 + 1.4150 +inline UnicodeString& 1.4151 +UnicodeString::replace(int32_t start, 1.4152 + int32_t _length, 1.4153 + UChar srcChar) 1.4154 +{ return doReplace(start, _length, &srcChar, 0, 1); } 1.4155 + 1.4156 +inline UnicodeString& 1.4157 +UnicodeString::replaceBetween(int32_t start, 1.4158 + int32_t limit, 1.4159 + const UnicodeString& srcText) 1.4160 +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } 1.4161 + 1.4162 +inline UnicodeString& 1.4163 +UnicodeString::replaceBetween(int32_t start, 1.4164 + int32_t limit, 1.4165 + const UnicodeString& srcText, 1.4166 + int32_t srcStart, 1.4167 + int32_t srcLimit) 1.4168 +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 1.4169 + 1.4170 +inline UnicodeString& 1.4171 +UnicodeString::findAndReplace(const UnicodeString& oldText, 1.4172 + const UnicodeString& newText) 1.4173 +{ return findAndReplace(0, length(), oldText, 0, oldText.length(), 1.4174 + newText, 0, newText.length()); } 1.4175 + 1.4176 +inline UnicodeString& 1.4177 +UnicodeString::findAndReplace(int32_t start, 1.4178 + int32_t _length, 1.4179 + const UnicodeString& oldText, 1.4180 + const UnicodeString& newText) 1.4181 +{ return findAndReplace(start, _length, oldText, 0, oldText.length(), 1.4182 + newText, 0, newText.length()); } 1.4183 + 1.4184 +// ============================ 1.4185 +// extract 1.4186 +// ============================ 1.4187 +inline void 1.4188 +UnicodeString::doExtract(int32_t start, 1.4189 + int32_t _length, 1.4190 + UnicodeString& target) const 1.4191 +{ target.replace(0, target.length(), *this, start, _length); } 1.4192 + 1.4193 +inline void 1.4194 +UnicodeString::extract(int32_t start, 1.4195 + int32_t _length, 1.4196 + UChar *target, 1.4197 + int32_t targetStart) const 1.4198 +{ doExtract(start, _length, target, targetStart); } 1.4199 + 1.4200 +inline void 1.4201 +UnicodeString::extract(int32_t start, 1.4202 + int32_t _length, 1.4203 + UnicodeString& target) const 1.4204 +{ doExtract(start, _length, target); } 1.4205 + 1.4206 +#if !UCONFIG_NO_CONVERSION 1.4207 + 1.4208 +inline int32_t 1.4209 +UnicodeString::extract(int32_t start, 1.4210 + int32_t _length, 1.4211 + char *dst, 1.4212 + const char *codepage) const 1.4213 + 1.4214 +{ 1.4215 + // This dstSize value will be checked explicitly 1.4216 + return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 1.4217 +} 1.4218 + 1.4219 +#endif 1.4220 + 1.4221 +inline void 1.4222 +UnicodeString::extractBetween(int32_t start, 1.4223 + int32_t limit, 1.4224 + UChar *dst, 1.4225 + int32_t dstStart) const { 1.4226 + pinIndex(start); 1.4227 + pinIndex(limit); 1.4228 + doExtract(start, limit - start, dst, dstStart); 1.4229 +} 1.4230 + 1.4231 +inline UnicodeString 1.4232 +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 1.4233 + return tempSubString(start, limit - start); 1.4234 +} 1.4235 + 1.4236 +inline UChar 1.4237 +UnicodeString::doCharAt(int32_t offset) const 1.4238 +{ 1.4239 + if((uint32_t)offset < (uint32_t)length()) { 1.4240 + return getArrayStart()[offset]; 1.4241 + } else { 1.4242 + return kInvalidUChar; 1.4243 + } 1.4244 +} 1.4245 + 1.4246 +inline UChar 1.4247 +UnicodeString::charAt(int32_t offset) const 1.4248 +{ return doCharAt(offset); } 1.4249 + 1.4250 +inline UChar 1.4251 +UnicodeString::operator[] (int32_t offset) const 1.4252 +{ return doCharAt(offset); } 1.4253 + 1.4254 +inline UBool 1.4255 +UnicodeString::isEmpty() const { 1.4256 + return fShortLength == 0; 1.4257 +} 1.4258 + 1.4259 +//======================================== 1.4260 +// Write implementation methods 1.4261 +//======================================== 1.4262 +inline void 1.4263 +UnicodeString::setLength(int32_t len) { 1.4264 + if(len <= 127) { 1.4265 + fShortLength = (int8_t)len; 1.4266 + } else { 1.4267 + fShortLength = (int8_t)-1; 1.4268 + fUnion.fFields.fLength = len; 1.4269 + } 1.4270 +} 1.4271 + 1.4272 +inline void 1.4273 +UnicodeString::setToEmpty() { 1.4274 + fShortLength = 0; 1.4275 + fFlags = kShortString; 1.4276 +} 1.4277 + 1.4278 +inline void 1.4279 +UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 1.4280 + setLength(len); 1.4281 + fUnion.fFields.fArray = array; 1.4282 + fUnion.fFields.fCapacity = capacity; 1.4283 +} 1.4284 + 1.4285 +inline UnicodeString& 1.4286 +UnicodeString::operator= (UChar ch) 1.4287 +{ return doReplace(0, length(), &ch, 0, 1); } 1.4288 + 1.4289 +inline UnicodeString& 1.4290 +UnicodeString::operator= (UChar32 ch) 1.4291 +{ return replace(0, length(), ch); } 1.4292 + 1.4293 +inline UnicodeString& 1.4294 +UnicodeString::setTo(const UnicodeString& srcText, 1.4295 + int32_t srcStart, 1.4296 + int32_t srcLength) 1.4297 +{ 1.4298 + unBogus(); 1.4299 + return doReplace(0, length(), srcText, srcStart, srcLength); 1.4300 +} 1.4301 + 1.4302 +inline UnicodeString& 1.4303 +UnicodeString::setTo(const UnicodeString& srcText, 1.4304 + int32_t srcStart) 1.4305 +{ 1.4306 + unBogus(); 1.4307 + srcText.pinIndex(srcStart); 1.4308 + return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 1.4309 +} 1.4310 + 1.4311 +inline UnicodeString& 1.4312 +UnicodeString::setTo(const UnicodeString& srcText) 1.4313 +{ 1.4314 + return copyFrom(srcText); 1.4315 +} 1.4316 + 1.4317 +inline UnicodeString& 1.4318 +UnicodeString::setTo(const UChar *srcChars, 1.4319 + int32_t srcLength) 1.4320 +{ 1.4321 + unBogus(); 1.4322 + return doReplace(0, length(), srcChars, 0, srcLength); 1.4323 +} 1.4324 + 1.4325 +inline UnicodeString& 1.4326 +UnicodeString::setTo(UChar srcChar) 1.4327 +{ 1.4328 + unBogus(); 1.4329 + return doReplace(0, length(), &srcChar, 0, 1); 1.4330 +} 1.4331 + 1.4332 +inline UnicodeString& 1.4333 +UnicodeString::setTo(UChar32 srcChar) 1.4334 +{ 1.4335 + unBogus(); 1.4336 + return replace(0, length(), srcChar); 1.4337 +} 1.4338 + 1.4339 +inline UnicodeString& 1.4340 +UnicodeString::append(const UnicodeString& srcText, 1.4341 + int32_t srcStart, 1.4342 + int32_t srcLength) 1.4343 +{ return doReplace(length(), 0, srcText, srcStart, srcLength); } 1.4344 + 1.4345 +inline UnicodeString& 1.4346 +UnicodeString::append(const UnicodeString& srcText) 1.4347 +{ return doReplace(length(), 0, srcText, 0, srcText.length()); } 1.4348 + 1.4349 +inline UnicodeString& 1.4350 +UnicodeString::append(const UChar *srcChars, 1.4351 + int32_t srcStart, 1.4352 + int32_t srcLength) 1.4353 +{ return doReplace(length(), 0, srcChars, srcStart, srcLength); } 1.4354 + 1.4355 +inline UnicodeString& 1.4356 +UnicodeString::append(const UChar *srcChars, 1.4357 + int32_t srcLength) 1.4358 +{ return doReplace(length(), 0, srcChars, 0, srcLength); } 1.4359 + 1.4360 +inline UnicodeString& 1.4361 +UnicodeString::append(UChar srcChar) 1.4362 +{ return doReplace(length(), 0, &srcChar, 0, 1); } 1.4363 + 1.4364 +inline UnicodeString& 1.4365 +UnicodeString::operator+= (UChar ch) 1.4366 +{ return doReplace(length(), 0, &ch, 0, 1); } 1.4367 + 1.4368 +inline UnicodeString& 1.4369 +UnicodeString::operator+= (UChar32 ch) { 1.4370 + return append(ch); 1.4371 +} 1.4372 + 1.4373 +inline UnicodeString& 1.4374 +UnicodeString::operator+= (const UnicodeString& srcText) 1.4375 +{ return doReplace(length(), 0, srcText, 0, srcText.length()); } 1.4376 + 1.4377 +inline UnicodeString& 1.4378 +UnicodeString::insert(int32_t start, 1.4379 + const UnicodeString& srcText, 1.4380 + int32_t srcStart, 1.4381 + int32_t srcLength) 1.4382 +{ return doReplace(start, 0, srcText, srcStart, srcLength); } 1.4383 + 1.4384 +inline UnicodeString& 1.4385 +UnicodeString::insert(int32_t start, 1.4386 + const UnicodeString& srcText) 1.4387 +{ return doReplace(start, 0, srcText, 0, srcText.length()); } 1.4388 + 1.4389 +inline UnicodeString& 1.4390 +UnicodeString::insert(int32_t start, 1.4391 + const UChar *srcChars, 1.4392 + int32_t srcStart, 1.4393 + int32_t srcLength) 1.4394 +{ return doReplace(start, 0, srcChars, srcStart, srcLength); } 1.4395 + 1.4396 +inline UnicodeString& 1.4397 +UnicodeString::insert(int32_t start, 1.4398 + const UChar *srcChars, 1.4399 + int32_t srcLength) 1.4400 +{ return doReplace(start, 0, srcChars, 0, srcLength); } 1.4401 + 1.4402 +inline UnicodeString& 1.4403 +UnicodeString::insert(int32_t start, 1.4404 + UChar srcChar) 1.4405 +{ return doReplace(start, 0, &srcChar, 0, 1); } 1.4406 + 1.4407 +inline UnicodeString& 1.4408 +UnicodeString::insert(int32_t start, 1.4409 + UChar32 srcChar) 1.4410 +{ return replace(start, 0, srcChar); } 1.4411 + 1.4412 + 1.4413 +inline UnicodeString& 1.4414 +UnicodeString::remove() 1.4415 +{ 1.4416 + // remove() of a bogus string makes the string empty and non-bogus 1.4417 + if(isBogus()) { 1.4418 + setToEmpty(); 1.4419 + } else { 1.4420 + fShortLength = 0; 1.4421 + } 1.4422 + return *this; 1.4423 +} 1.4424 + 1.4425 +inline UnicodeString& 1.4426 +UnicodeString::remove(int32_t start, 1.4427 + int32_t _length) 1.4428 +{ 1.4429 + if(start <= 0 && _length == INT32_MAX) { 1.4430 + // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 1.4431 + return remove(); 1.4432 + } 1.4433 + return doReplace(start, _length, NULL, 0, 0); 1.4434 +} 1.4435 + 1.4436 +inline UnicodeString& 1.4437 +UnicodeString::removeBetween(int32_t start, 1.4438 + int32_t limit) 1.4439 +{ return doReplace(start, limit - start, NULL, 0, 0); } 1.4440 + 1.4441 +inline UnicodeString & 1.4442 +UnicodeString::retainBetween(int32_t start, int32_t limit) { 1.4443 + truncate(limit); 1.4444 + return doReplace(0, start, NULL, 0, 0); 1.4445 +} 1.4446 + 1.4447 +inline UBool 1.4448 +UnicodeString::truncate(int32_t targetLength) 1.4449 +{ 1.4450 + if(isBogus() && targetLength == 0) { 1.4451 + // truncate(0) of a bogus string makes the string empty and non-bogus 1.4452 + unBogus(); 1.4453 + return FALSE; 1.4454 + } else if((uint32_t)targetLength < (uint32_t)length()) { 1.4455 + setLength(targetLength); 1.4456 + return TRUE; 1.4457 + } else { 1.4458 + return FALSE; 1.4459 + } 1.4460 +} 1.4461 + 1.4462 +inline UnicodeString& 1.4463 +UnicodeString::reverse() 1.4464 +{ return doReverse(0, length()); } 1.4465 + 1.4466 +inline UnicodeString& 1.4467 +UnicodeString::reverse(int32_t start, 1.4468 + int32_t _length) 1.4469 +{ return doReverse(start, _length); } 1.4470 + 1.4471 +U_NAMESPACE_END 1.4472 + 1.4473 +#endif