The Tor Browser: intl/icu/source/common/unistr.cpp@129ffea94266

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*

     2 ******************************************************************************

     3 * Copyright (C) 1999-2013, International Business Machines Corporation and

     4 * others. All Rights Reserved.

     5 ******************************************************************************

6 *

     7 * File unistr.cpp

8 *

     9 * Modification History:

    10 *

    11 *   Date        Name        Description

    12 *   09/25/98    stephen     Creation.

    13 *   04/20/99    stephen     Overhauled per 4/16 code review.

    14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX

    15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from

    16 *                           Replaceable.

    17 *   06/25/01    grhoten     Removed the dependency on iostream

    18 ******************************************************************************

    19 */

    21 #include "unicode/utypes.h"

    22 #include "unicode/appendable.h"

    23 #include "unicode/putil.h"

    24 #include "cstring.h"

    25 #include "cmemory.h"

    26 #include "unicode/ustring.h"

    27 #include "unicode/unistr.h"

    28 #include "unicode/utf.h"

    29 #include "unicode/utf16.h"

    30 #include "uelement.h"

    31 #include "ustr_imp.h"

    32 #include "umutex.h"

    33 #include "uassert.h"

    35 #if 0

    37 #include <iostream>

    38 using namespace std;

    40 //DEBUGGING

    41 void

    42 print(const UnicodeString& s,

    43       const char *name)

    44 {

    45   UChar c;

    46   cout << name << ":|";

    47   for(int i = 0; i < s.length(); ++i) {

    48     c = s[i];

    49     if(c>= 0x007E || c < 0x0020)

    50       cout << "[0x" << hex << s[i] << "]";

    51     else

    52       cout << (char) s[i];

    53   }

    54   cout << '|' << endl;

    55 }

    57 void

    58 print(const UChar *s,

    59       int32_t len,

    60       const char *name)

    61 {

    62   UChar c;

    63   cout << name << ":|";

    64   for(int i = 0; i < len; ++i) {

    65     c = s[i];

    66     if(c>= 0x007E || c < 0x0020)

    67       cout << "[0x" << hex << s[i] << "]";

    68     else

    69       cout << (char) s[i];

    70   }

    71   cout << '|' << endl;

    72 }

    73 // END DEBUGGING

    74 #endif

    76 // Local function definitions for now

    78 // need to copy areas that may overlap

    79 static

    80 inline void

    81 us_arrayCopy(const UChar *src, int32_t srcStart,

    82          UChar *dst, int32_t dstStart, int32_t count)

    83 {

    84   if(count>0) {

    85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));

    86   }

    87 }

    89 // u_unescapeAt() callback to get a UChar from a UnicodeString

    90 U_CDECL_BEGIN

    91 static UChar U_CALLCONV

    92 UnicodeString_charAt(int32_t offset, void *context) {

    93     return ((icu::UnicodeString*) context)->charAt(offset);

    94 }

    95 U_CDECL_END

    97 U_NAMESPACE_BEGIN

    99 /* The Replaceable virtual destructor can't be defined in the header

   100    due to how AIX works with multiple definitions of virtual functions.

   101 */

   102 Replaceable::~Replaceable() {}

   104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)

   106 UnicodeString U_EXPORT2

   107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {

   108     return

   109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).

   110             append(s1).

   111                 append(s2);

   112 }

   114 //========================================

   115 // Reference Counting functions, put at top of file so that optimizing compilers

   116 //                               have a chance to automatically inline.

   117 //========================================

   119 void

   120 UnicodeString::addRef() {

   121   umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);

   122 }

   124 int32_t

   125 UnicodeString::removeRef() {

   126   return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);

   127 }

   129 int32_t

   130 UnicodeString::refCount() const {

   131   return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));

   132 }

   134 void

   135 UnicodeString::releaseArray() {

   136   if((fFlags & kRefCounted) && removeRef() == 0) {

   137     uprv_free((int32_t *)fUnion.fFields.fArray - 1);

   138   }

   139 }

   143 //========================================

   144 // Constructors

   145 //========================================

   147 // The default constructor is inline in unistr.h.

   149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)

   150   : fShortLength(0),

   151     fFlags(0)

   152 {

   153   if(count <= 0 || (uint32_t)c > 0x10ffff) {

   154     // just allocate and do not do anything else

   155     allocate(capacity);

   156   } else {

   157     // count > 0, allocate and fill the new string with count c's

   158     int32_t unitCount = U16_LENGTH(c), length = count * unitCount;

   159     if(capacity < length) {

   160       capacity = length;

   161     }

   162     if(allocate(capacity)) {

   163       UChar *array = getArrayStart();

   164       int32_t i = 0;

   166       // fill the new string with c

   167       if(unitCount == 1) {

   168         // fill with length UChars

   169         while(i < length) {

   170           array[i++] = (UChar)c;

   171         }

   172       } else {

   173         // get the code units for c

   174         UChar units[U16_MAX_LENGTH];

   175         U16_APPEND_UNSAFE(units, i, c);

   177         // now it must be i==unitCount

   178         i = 0;

   180         // for Unicode, unitCount can only be 1, 2, 3, or 4

   181         // 1 is handled above

   182         while(i < length) {

   183           int32_t unitIdx = 0;

   184           while(unitIdx < unitCount) {

   185             array[i++]=units[unitIdx++];

   186           }

   187         }

   188       }

   189     }

   190     setLength(length);

   191   }

   192 }

   194 UnicodeString::UnicodeString(UChar ch)

   195   : fShortLength(1),

   196     fFlags(kShortString)

   197 {

   198   fUnion.fStackBuffer[0] = ch;

   199 }

   201 UnicodeString::UnicodeString(UChar32 ch)

   202   : fShortLength(0),

   203     fFlags(kShortString)

   204 {

   205   int32_t i = 0;

   206   UBool isError = FALSE;

   207   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);

   208   // We test isError so that the compiler does not complain that we don't.

   209   // If isError then i==0 which is what we want anyway.

   210   if(!isError) {

   211     fShortLength = (int8_t)i;

   212   }

   213 }

   215 UnicodeString::UnicodeString(const UChar *text)

   216   : fShortLength(0),

   217     fFlags(kShortString)

   218 {

   219   doReplace(0, 0, text, 0, -1);

   220 }

   222 UnicodeString::UnicodeString(const UChar *text,

   223                              int32_t textLength)

   224   : fShortLength(0),

   225     fFlags(kShortString)

   226 {

   227   doReplace(0, 0, text, 0, textLength);

   228 }

   230 UnicodeString::UnicodeString(UBool isTerminated,

   231                              const UChar *text,

   232                              int32_t textLength)

   233   : fShortLength(0),

   234     fFlags(kReadonlyAlias)

   235 {

   236   if(text == NULL) {

   237     // treat as an empty string, do not alias

   238     setToEmpty();

   239   } else if(textLength < -1 ||

   240             (textLength == -1 && !isTerminated) ||

   241             (textLength >= 0 && isTerminated && text[textLength] != 0)

   242   ) {

   243     setToBogus();

   244   } else {

   245     if(textLength == -1) {

   246       // text is terminated, or else it would have failed the above test

   247       textLength = u_strlen(text);

   248     }

   249     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);

   250   }

   251 }

   253 UnicodeString::UnicodeString(UChar *buff,

   254                              int32_t buffLength,

   255                              int32_t buffCapacity)

   256   : fShortLength(0),

   257     fFlags(kWritableAlias)

   258 {

   259   if(buff == NULL) {

   260     // treat as an empty string, do not alias

   261     setToEmpty();

   262   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {

   263     setToBogus();

   264   } else {

   265     if(buffLength == -1) {

   266       // fLength = u_strlen(buff); but do not look beyond buffCapacity

   267       const UChar *p = buff, *limit = buff + buffCapacity;

   268       while(p != limit && *p != 0) {

   269         ++p;

   270       }

   271       buffLength = (int32_t)(p - buff);

   272     }

   273     setArray(buff, buffLength, buffCapacity);

   274   }

   275 }

   277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)

   278   : fShortLength(0),

   279     fFlags(kShortString)

   280 {

   281   if(src==NULL) {

   282     // treat as an empty string

   283   } else {

   284     if(length<0) {

   285       length=(int32_t)uprv_strlen(src);

   286     }

   287     if(cloneArrayIfNeeded(length, length, FALSE)) {

   288       u_charsToUChars(src, getArrayStart(), length);

   289       setLength(length);

   290     } else {

   291       setToBogus();

   292     }

   293   }

   294 }

   296 #if U_CHARSET_IS_UTF8

   298 UnicodeString::UnicodeString(const char *codepageData)

   299   : fShortLength(0),

   300     fFlags(kShortString) {

   301   if(codepageData != 0) {

   302     setToUTF8(codepageData);

   303   }

   304 }

   306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)

   307   : fShortLength(0),

   308     fFlags(kShortString) {

   309   // if there's nothing to convert, do nothing

   310   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {

   311     return;

   312   }

   313   if(dataLength == -1) {

   314     dataLength = (int32_t)uprv_strlen(codepageData);

   315   }

   316   setToUTF8(StringPiece(codepageData, dataLength));

   317 }

   319 // else see unistr_cnv.cpp

   320 #endif

   322 UnicodeString::UnicodeString(const UnicodeString& that)

   323   : Replaceable(),

   324     fShortLength(0),

   325     fFlags(kShortString)

   326 {

   327   copyFrom(that);

   328 }

   330 UnicodeString::UnicodeString(const UnicodeString& that,

   331                              int32_t srcStart)

   332   : Replaceable(),

   333     fShortLength(0),

   334     fFlags(kShortString)

   335 {

   336   setTo(that, srcStart);

   337 }

   339 UnicodeString::UnicodeString(const UnicodeString& that,

   340                              int32_t srcStart,

   341                              int32_t srcLength)

   342   : Replaceable(),

   343     fShortLength(0),

   344     fFlags(kShortString)

   345 {

   346   setTo(that, srcStart, srcLength);

   347 }

   349 // Replaceable base class clone() default implementation, does not clone

   350 Replaceable *

   351 Replaceable::clone() const {

   352   return NULL;

   353 }

   355 // UnicodeString overrides clone() with a real implementation

   356 Replaceable *

   357 UnicodeString::clone() const {

   358   return new UnicodeString(*this);

   359 }

   361 //========================================

   362 // array allocation

   363 //========================================

   365 UBool

   366 UnicodeString::allocate(int32_t capacity) {

   367   if(capacity <= US_STACKBUF_SIZE) {

   368     fFlags = kShortString;

   369   } else {

   370     // count bytes for the refCounter and the string capacity, and

   371     // round up to a multiple of 16; then divide by 4 and allocate int32_t's

   372     // to be safely aligned for the refCount

   373     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()

   374     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);

   375     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );

   376     if(array != 0) {

   377       // set initial refCount and point behind the refCount

   378       *array++ = 1;

   380       // have fArray point to the first UChar

   381       fUnion.fFields.fArray = (UChar *)array;

   382       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));

   383       fFlags = kLongString;

   384     } else {

   385       fShortLength = 0;

   386       fUnion.fFields.fArray = 0;

   387       fUnion.fFields.fCapacity = 0;

   388       fFlags = kIsBogus;

   389       return FALSE;

   390     }

   391   }

   392   return TRUE;

   393 }

   395 //========================================

   396 // Destructor

   397 //========================================

   398 UnicodeString::~UnicodeString()

   399 {

   400   releaseArray();

   401 }

   403 //========================================

   404 // Factory methods

   405 //========================================

   407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {

   408   UnicodeString result;

   409   result.setToUTF8(utf8);

   410   return result;

   411 }

   413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {

   414   UnicodeString result;

   415   int32_t capacity;

   416   // Most UTF-32 strings will be BMP-only and result in a same-length

   417   // UTF-16 string. We overestimate the capacity just slightly,

   418   // just in case there are a few supplementary characters.

   419   if(length <= US_STACKBUF_SIZE) {

   420     capacity = US_STACKBUF_SIZE;

   421   } else {

   422     capacity = length + (length >> 4) + 4;

   423   }

   424   do {

   425     UChar *utf16 = result.getBuffer(capacity);

   426     int32_t length16;

   427     UErrorCode errorCode = U_ZERO_ERROR;

   428     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,

   429         utf32, length,

   430         0xfffd,  // Substitution character.

   431         NULL,    // Don't care about number of substitutions.

   432         &errorCode);

   433     result.releaseBuffer(length16);

   434     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {

   435       capacity = length16 + 1;  // +1 for the terminating NUL.

   436       continue;

   437     } else if(U_FAILURE(errorCode)) {

   438       result.setToBogus();

   439     }

   440     break;

   441   } while(TRUE);

   442   return result;

   443 }

   445 //========================================

   446 // Assignment

   447 //========================================

   449 UnicodeString &

   450 UnicodeString::operator=(const UnicodeString &src) {

   451   return copyFrom(src);

   452 }

   454 UnicodeString &

   455 UnicodeString::fastCopyFrom(const UnicodeString &src) {

   456   return copyFrom(src, TRUE);

   457 }

   459 UnicodeString &

   460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {

   461   // if assigning to ourselves, do nothing

   462   if(this == 0 || this == &src) {

   463     return *this;

   464   }

   466   // is the right side bogus?

   467   if(&src == 0 || src.isBogus()) {

   468     setToBogus();

   469     return *this;

   470   }

   472   // delete the current contents

   473   releaseArray();

   475   if(src.isEmpty()) {

   476     // empty string - use the stack buffer

   477     setToEmpty();

   478     return *this;

   479   }

   481   // we always copy the length

   482   int32_t srcLength = src.length();

   483   setLength(srcLength);

   485   // fLength>0 and not an "open" src.getBuffer(minCapacity)

   486   switch(src.fFlags) {

   487   case kShortString:

   488     // short string using the stack buffer, do the same

   489     fFlags = kShortString;

   490     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);

   491     break;

   492   case kLongString:

   493     // src uses a refCounted string buffer, use that buffer with refCount

   494     // src is const, use a cast - we don't really change it

   495     ((UnicodeString &)src).addRef();

   496     // copy all fields, share the reference-counted buffer

   497     fUnion.fFields.fArray = src.fUnion.fFields.fArray;

   498     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;

   499     fFlags = src.fFlags;

   500     break;

   501   case kReadonlyAlias:

   502     if(fastCopy) {

   503       // src is a readonly alias, do the same

   504       // -> maintain the readonly alias as such

   505       fUnion.fFields.fArray = src.fUnion.fFields.fArray;

   506       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;

   507       fFlags = src.fFlags;

   508       break;

   509     }

   510     // else if(!fastCopy) fall through to case kWritableAlias

   511     // -> allocate a new buffer and copy the contents

   512   case kWritableAlias:

   513     // src is a writable alias; we make a copy of that instead

   514     if(allocate(srcLength)) {

   515       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);

   516       break;

   517     }

   518     // if there is not enough memory, then fall through to setting to bogus

   519   default:

   520     // if src is bogus, set ourselves to bogus

   521     // do not call setToBogus() here because fArray and fFlags are not consistent here

   522     fShortLength = 0;

   523     fUnion.fFields.fArray = 0;

   524     fUnion.fFields.fCapacity = 0;

   525     fFlags = kIsBogus;

   526     break;

   527   }

   529   return *this;

   530 }

   532 //========================================

   533 // Miscellaneous operations

   534 //========================================

   536 UnicodeString UnicodeString::unescape() const {

   537     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity

   538     const UChar *array = getBuffer();

   539     int32_t len = length();

   540     int32_t prev = 0;

   541     for (int32_t i=0;;) {

   542         if (i == len) {

   543             result.append(array, prev, len - prev);

   544             break;

   545         }

   546         if (array[i++] == 0x5C /*'\\'*/) {

   547             result.append(array, prev, (i - 1) - prev);

   548             UChar32 c = unescapeAt(i); // advances i

   549             if (c < 0) {

   550                 result.remove(); // return empty string

   551                 break; // invalid escape sequence

   552             }

   553             result.append(c);

   554             prev = i;

   555         }

   556     }

   557     return result;

   558 }

   560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {

   561     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);

   562 }

   564 //========================================

   565 // Read-only implementation

   566 //========================================

   567 UBool

   568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {

   569   // Requires: this & text not bogus and have same lengths.

   570   // Byte-wise comparison works for equality regardless of endianness.

   571   return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;

   572 }

   574 int8_t

   575 UnicodeString::doCompare( int32_t start,

   576               int32_t length,

   577               const UChar *srcChars,

   578               int32_t srcStart,

   579               int32_t srcLength) const

   580 {

   581   // compare illegal string values

   582   if(isBogus()) {

   583     return -1;

   584   }

   586   // pin indices to legal values

   587   pinIndices(start, length);

   589   if(srcChars == NULL) {

   590     // treat const UChar *srcChars==NULL as an empty string

   591     return length == 0 ? 0 : 1;

   592   }

   594   // get the correct pointer

   595   const UChar *chars = getArrayStart();

   597   chars += start;

   598   srcChars += srcStart;

   600   int32_t minLength;

   601   int8_t lengthResult;

   603   // get the srcLength if necessary

   604   if(srcLength < 0) {

   605     srcLength = u_strlen(srcChars + srcStart);

   606   }

   608   // are we comparing different lengths?

   609   if(length != srcLength) {

   610     if(length < srcLength) {

   611       minLength = length;

   612       lengthResult = -1;

   613     } else {

   614       minLength = srcLength;

   615       lengthResult = 1;

   616     }

   617   } else {

   618     minLength = length;

   619     lengthResult = 0;

   620   }

   622   /*

   623    * note that uprv_memcmp() returns an int but we return an int8_t;

   624    * we need to take care not to truncate the result -

   625    * one way to do this is to right-shift the value to

   626    * move the sign bit into the lower 8 bits and making sure that this

   627    * does not become 0 itself

   628    */

   630   if(minLength > 0 && chars != srcChars) {

   631     int32_t result;

   633 #   if U_IS_BIG_ENDIAN

   634       // big-endian: byte comparison works

   635       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));

   636       if(result != 0) {

   637         return (int8_t)(result >> 15 | 1);

   638       }

   639 #   else

   640       // little-endian: compare UChar units

   641       do {

   642         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));

   643         if(result != 0) {

   644           return (int8_t)(result >> 15 | 1);

   645         }

   646       } while(--minLength > 0);

   647 #   endif

   648   }

   649   return lengthResult;

   650 }

   652 /* String compare in code point order - doCompare() compares in code unit order. */

   653 int8_t

   654 UnicodeString::doCompareCodePointOrder(int32_t start,

   655                                        int32_t length,

   656                                        const UChar *srcChars,

   657                                        int32_t srcStart,

   658                                        int32_t srcLength) const

   659 {

   660   // compare illegal string values

   661   // treat const UChar *srcChars==NULL as an empty string

   662   if(isBogus()) {

   663     return -1;

   664   }

   666   // pin indices to legal values

   667   pinIndices(start, length);

   669   if(srcChars == NULL) {

   670     srcStart = srcLength = 0;

   671   }

   673   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);

   674   /* translate the 32-bit result into an 8-bit one */

   675   if(diff!=0) {

   676     return (int8_t)(diff >> 15 | 1);

   677   } else {

   678     return 0;

   679   }

   680 }

   682 int32_t

   683 UnicodeString::getLength() const {

   684     return length();

   685 }

   687 UChar

   688 UnicodeString::getCharAt(int32_t offset) const {

   689   return charAt(offset);

   690 }

   692 UChar32

   693 UnicodeString::getChar32At(int32_t offset) const {

   694   return char32At(offset);

   695 }

   697 UChar32

   698 UnicodeString::char32At(int32_t offset) const

   699 {

   700   int32_t len = length();

   701   if((uint32_t)offset < (uint32_t)len) {

   702     const UChar *array = getArrayStart();

   703     UChar32 c;

   704     U16_GET(array, 0, offset, len, c);

   705     return c;

   706   } else {

   707     return kInvalidUChar;

   708   }

   709 }

   711 int32_t

   712 UnicodeString::getChar32Start(int32_t offset) const {

   713   if((uint32_t)offset < (uint32_t)length()) {

   714     const UChar *array = getArrayStart();

   715     U16_SET_CP_START(array, 0, offset);

   716     return offset;

   717   } else {

   718     return 0;

   719   }

   720 }

   722 int32_t

   723 UnicodeString::getChar32Limit(int32_t offset) const {

   724   int32_t len = length();

   725   if((uint32_t)offset < (uint32_t)len) {

   726     const UChar *array = getArrayStart();

   727     U16_SET_CP_LIMIT(array, 0, offset, len);

   728     return offset;

   729   } else {

   730     return len;

   731   }

   732 }

   734 int32_t

   735 UnicodeString::countChar32(int32_t start, int32_t length) const {

   736   pinIndices(start, length);

   737   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL

   738   return u_countChar32(getArrayStart()+start, length);

   739 }

   741 UBool

   742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {

   743   pinIndices(start, length);

   744   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL

   745   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);

   746 }

   748 int32_t

   749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {

   750   // pin index

   751   int32_t len = length();

   752   if(index<0) {

   753     index=0;

   754   } else if(index>len) {

   755     index=len;

   756   }

   758   const UChar *array = getArrayStart();

   759   if(delta>0) {

   760     U16_FWD_N(array, index, len, delta);

   761   } else {

   762     U16_BACK_N(array, 0, index, -delta);

   763   }

   765   return index;

   766 }

   768 void

   769 UnicodeString::doExtract(int32_t start,

   770              int32_t length,

   771              UChar *dst,

   772              int32_t dstStart) const

   773 {

   774   // pin indices to legal values

   775   pinIndices(start, length);

   777   // do not copy anything if we alias dst itself

   778   const UChar *array = getArrayStart();

   779   if(array + start != dst + dstStart) {

   780     us_arrayCopy(array, start, dst, dstStart, length);

   781   }

   782 }

   784 int32_t

   785 UnicodeString::extract(UChar *dest, int32_t destCapacity,

   786                        UErrorCode &errorCode) const {

   787   int32_t len = length();

   788   if(U_SUCCESS(errorCode)) {

   789     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {

   790       errorCode=U_ILLEGAL_ARGUMENT_ERROR;

   791     } else {

   792       const UChar *array = getArrayStart();

   793       if(len>0 && len<=destCapacity && array!=dest) {

   794         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);

   795       }

   796       return u_terminateUChars(dest, destCapacity, len, &errorCode);

   797     }

   798   }

   800   return len;

   801 }

   803 int32_t

   804 UnicodeString::extract(int32_t start,

   805                        int32_t length,

   806                        char *target,

   807                        int32_t targetCapacity,

   808                        enum EInvariant) const

   809 {

   810   // if the arguments are illegal, then do nothing

   811   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {

   812     return 0;

   813   }

   815   // pin the indices to legal values

   816   pinIndices(start, length);

   818   if(length <= targetCapacity) {

   819     u_UCharsToChars(getArrayStart() + start, target, length);

   820   }

   821   UErrorCode status = U_ZERO_ERROR;

   822   return u_terminateChars(target, targetCapacity, length, &status);

   823 }

   825 UnicodeString

   826 UnicodeString::tempSubString(int32_t start, int32_t len) const {

   827   pinIndices(start, len);

   828   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer

   829   if(array==NULL) {

   830     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string

   831     len=-2;  // bogus result string

   832   }

   833   return UnicodeString(FALSE, array + start, len);

   834 }

   836 int32_t

   837 UnicodeString::toUTF8(int32_t start, int32_t len,

   838                       char *target, int32_t capacity) const {

   839   pinIndices(start, len);

   840   int32_t length8;

   841   UErrorCode errorCode = U_ZERO_ERROR;

   842   u_strToUTF8WithSub(target, capacity, &length8,

   843                      getBuffer() + start, len,

   844                      0xFFFD,  // Standard substitution character.

   845                      NULL,    // Don't care about number of substitutions.

   846                      &errorCode);

   847   return length8;

   848 }

   850 #if U_CHARSET_IS_UTF8

   852 int32_t

   853 UnicodeString::extract(int32_t start, int32_t len,

   854                        char *target, uint32_t dstSize) const {

   855   // if the arguments are illegal, then do nothing

   856   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {

   857     return 0;

   858   }

   859   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);

   860 }

   862 // else see unistr_cnv.cpp

   863 #endif

   865 void

   866 UnicodeString::extractBetween(int32_t start,

   867                   int32_t limit,

   868                   UnicodeString& target) const {

   869   pinIndex(start);

   870   pinIndex(limit);

   871   doExtract(start, limit - start, target);

   872 }

   874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times

   875 // as many bytes as the source has UChars.

   876 // The "worst cases" are writing systems like Indic, Thai and CJK with

   877 // 3:1 bytes:UChars.

   878 void

   879 UnicodeString::toUTF8(ByteSink &sink) const {

   880   int32_t length16 = length();

   881   if(length16 != 0) {

   882     char stackBuffer[1024];

   883     int32_t capacity = (int32_t)sizeof(stackBuffer);

   884     UBool utf8IsOwned = FALSE;

   885     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,

   886                                       3*length16,

   887                                       stackBuffer, capacity,

   888                                       &capacity);

   889     int32_t length8 = 0;

   890     UErrorCode errorCode = U_ZERO_ERROR;

   891     u_strToUTF8WithSub(utf8, capacity, &length8,

   892                        getBuffer(), length16,

   893                        0xFFFD,  // Standard substitution character.

   894                        NULL,    // Don't care about number of substitutions.

   895                        &errorCode);

   896     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {

   897       utf8 = (char *)uprv_malloc(length8);

   898       if(utf8 != NULL) {

   899         utf8IsOwned = TRUE;

   900         errorCode = U_ZERO_ERROR;

   901         u_strToUTF8WithSub(utf8, length8, &length8,

   902                            getBuffer(), length16,

   903                            0xFFFD,  // Standard substitution character.

   904                            NULL,    // Don't care about number of substitutions.

   905                            &errorCode);

   906       } else {

   907         errorCode = U_MEMORY_ALLOCATION_ERROR;

   908       }

   909     }

   910     if(U_SUCCESS(errorCode)) {

   911       sink.Append(utf8, length8);

   912       sink.Flush();

   913     }

   914     if(utf8IsOwned) {

   915       uprv_free(utf8);

   916     }

   917   }

   918 }

   920 int32_t

   921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {

   922   int32_t length32=0;

   923   if(U_SUCCESS(errorCode)) {

   924     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.

   925     u_strToUTF32WithSub(utf32, capacity, &length32,

   926         getBuffer(), length(),

   927         0xfffd,  // Substitution character.

   928         NULL,    // Don't care about number of substitutions.

   929         &errorCode);

   930   }

   931   return length32;

   932 }

   934 int32_t

   935 UnicodeString::indexOf(const UChar *srcChars,

   936                int32_t srcStart,

   937                int32_t srcLength,

   938                int32_t start,

   939                int32_t length) const

   940 {

   941   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {

   942     return -1;

   943   }

   945   // UnicodeString does not find empty substrings

   946   if(srcLength < 0 && srcChars[srcStart] == 0) {

   947     return -1;

   948   }

   950   // get the indices within bounds

   951   pinIndices(start, length);

   953   // find the first occurrence of the substring

   954   const UChar *array = getArrayStart();

   955   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);

   956   if(match == NULL) {

   957     return -1;

   958   } else {

   959     return (int32_t)(match - array);

   960   }

   961 }

   963 int32_t

   964 UnicodeString::doIndexOf(UChar c,

   965              int32_t start,

   966              int32_t length) const

   967 {

   968   // pin indices

   969   pinIndices(start, length);

   971   // find the first occurrence of c

   972   const UChar *array = getArrayStart();

   973   const UChar *match = u_memchr(array + start, c, length);

   974   if(match == NULL) {

   975     return -1;

   976   } else {

   977     return (int32_t)(match - array);

   978   }

   979 }

   981 int32_t

   982 UnicodeString::doIndexOf(UChar32 c,

   983                          int32_t start,

   984                          int32_t length) const {

   985   // pin indices

   986   pinIndices(start, length);

   988   // find the first occurrence of c

   989   const UChar *array = getArrayStart();

   990   const UChar *match = u_memchr32(array + start, c, length);

   991   if(match == NULL) {

   992     return -1;

   993   } else {

   994     return (int32_t)(match - array);

   995   }

   996 }

   998 int32_t

   999 UnicodeString::lastIndexOf(const UChar *srcChars,

  1000                int32_t srcStart,

  1001                int32_t srcLength,

  1002                int32_t start,

  1003                int32_t length) const

  1004 {

  1005   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {

  1006     return -1;

  1007   }

  1009   // UnicodeString does not find empty substrings

  1010   if(srcLength < 0 && srcChars[srcStart] == 0) {

  1011     return -1;

  1012   }

  1014   // get the indices within bounds

  1015   pinIndices(start, length);

  1017   // find the last occurrence of the substring

  1018   const UChar *array = getArrayStart();

  1019   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);

  1020   if(match == NULL) {

  1021     return -1;

  1022   } else {

  1023     return (int32_t)(match - array);

  1024   }

  1025 }

  1027 int32_t

  1028 UnicodeString::doLastIndexOf(UChar c,

  1029                  int32_t start,

  1030                  int32_t length) const

  1031 {

  1032   if(isBogus()) {

  1033     return -1;

  1034   }

  1036   // pin indices

  1037   pinIndices(start, length);

  1039   // find the last occurrence of c

  1040   const UChar *array = getArrayStart();

  1041   const UChar *match = u_memrchr(array + start, c, length);

  1042   if(match == NULL) {

  1043     return -1;

  1044   } else {

  1045     return (int32_t)(match - array);

  1046   }

  1047 }

  1049 int32_t

  1050 UnicodeString::doLastIndexOf(UChar32 c,

  1051                              int32_t start,

  1052                              int32_t length) const {

  1053   // pin indices

  1054   pinIndices(start, length);

  1056   // find the last occurrence of c

  1057   const UChar *array = getArrayStart();

  1058   const UChar *match = u_memrchr32(array + start, c, length);

  1059   if(match == NULL) {

  1060     return -1;

  1061   } else {

  1062     return (int32_t)(match - array);

  1063   }

  1064 }

  1066 //========================================

  1067 // Write implementation

  1068 //========================================

  1070 UnicodeString&

  1071 UnicodeString::findAndReplace(int32_t start,

  1072                   int32_t length,

  1073                   const UnicodeString& oldText,

  1074                   int32_t oldStart,

  1075                   int32_t oldLength,

  1076                   const UnicodeString& newText,

  1077                   int32_t newStart,

  1078                   int32_t newLength)

  1079 {

  1080   if(isBogus() || oldText.isBogus() || newText.isBogus()) {

  1081     return *this;

  1082   }

  1084   pinIndices(start, length);

  1085   oldText.pinIndices(oldStart, oldLength);

  1086   newText.pinIndices(newStart, newLength);

  1088   if(oldLength == 0) {

  1089     return *this;

  1090   }

  1092   while(length > 0 && length >= oldLength) {

  1093     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);

  1094     if(pos < 0) {

  1095       // no more oldText's here: done

  1096       break;

  1097     } else {

  1098       // we found oldText, replace it by newText and go beyond it

  1099       replace(pos, oldLength, newText, newStart, newLength);

  1100       length -= pos + oldLength - start;

  1101       start = pos + newLength;

  1102     }

  1103   }

  1105   return *this;

  1106 }

  1109 void

  1110 UnicodeString::setToBogus()

  1111 {

  1112   releaseArray();

  1114   fShortLength = 0;

  1115   fUnion.fFields.fArray = 0;

  1116   fUnion.fFields.fCapacity = 0;

  1117   fFlags = kIsBogus;

  1118 }

  1120 // turn a bogus string into an empty one

  1121 void

  1122 UnicodeString::unBogus() {

  1123   if(fFlags & kIsBogus) {

  1124     setToEmpty();

  1125   }

  1126 }

  1128 const UChar *

  1129 UnicodeString::getTerminatedBuffer() {

  1130   if(!isWritable()) {

  1131     return 0;

  1132   }

  1133   UChar *array = getArrayStart();

  1134   int32_t len = length();

  1135   if(len < getCapacity()) {

  1136     if(fFlags & kBufferIsReadonly) {

  1137       // If len<capacity on a read-only alias, then array[len] is

  1138       // either the original NUL (if constructed with (TRUE, s, length))

  1139       // or one of the original string contents characters (if later truncated),

  1140       // therefore we can assume that array[len] is initialized memory.

  1141       if(array[len] == 0) {

  1142         return array;

  1143       }

  1144     } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {

  1145       // kRefCounted: Do not write the NUL if the buffer is shared.

  1146       // That is mostly safe, except when the length of one copy was modified

  1147       // without copy-on-write, e.g., via truncate(newLength) or remove(void).

  1148       // Then the NUL would be written into the middle of another copy's string.

  1150       // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.

  1151       // Do not test if there is a NUL already because it might be uninitialized memory.

  1152       // (That would be safe, but tools like valgrind & Purify would complain.)

  1153       array[len] = 0;

  1154       return array;

  1155     }

  1156   }

  1157   if(cloneArrayIfNeeded(len+1)) {

  1158     array = getArrayStart();

  1159     array[len] = 0;

  1160     return array;

  1161   } else {

  1162     return NULL;

  1163   }

  1164 }

  1166 // setTo() analogous to the readonly-aliasing constructor with the same signature

  1167 UnicodeString &

  1168 UnicodeString::setTo(UBool isTerminated,

  1169                      const UChar *text,

  1170                      int32_t textLength)

  1171 {

  1172   if(fFlags & kOpenGetBuffer) {

  1173     // do not modify a string that has an "open" getBuffer(minCapacity)

  1174     return *this;

  1175   }

  1177   if(text == NULL) {

  1178     // treat as an empty string, do not alias

  1179     releaseArray();

  1180     setToEmpty();

  1181     return *this;

  1182   }

  1184   if( textLength < -1 ||

  1185       (textLength == -1 && !isTerminated) ||

  1186       (textLength >= 0 && isTerminated && text[textLength] != 0)

  1187   ) {

  1188     setToBogus();

  1189     return *this;

  1190   }

  1192   releaseArray();

  1194   if(textLength == -1) {

  1195     // text is terminated, or else it would have failed the above test

  1196     textLength = u_strlen(text);

  1197   }

  1198   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);

  1200   fFlags = kReadonlyAlias;

  1201   return *this;

  1202 }

  1204 // setTo() analogous to the writable-aliasing constructor with the same signature

  1205 UnicodeString &

  1206 UnicodeString::setTo(UChar *buffer,

  1207                      int32_t buffLength,

  1208                      int32_t buffCapacity) {

  1209   if(fFlags & kOpenGetBuffer) {

  1210     // do not modify a string that has an "open" getBuffer(minCapacity)

  1211     return *this;

  1212   }

  1214   if(buffer == NULL) {

  1215     // treat as an empty string, do not alias

  1216     releaseArray();

  1217     setToEmpty();

  1218     return *this;

  1219   }

  1221   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {

  1222     setToBogus();

  1223     return *this;

  1224   } else if(buffLength == -1) {

  1225     // buffLength = u_strlen(buff); but do not look beyond buffCapacity

  1226     const UChar *p = buffer, *limit = buffer + buffCapacity;

  1227     while(p != limit && *p != 0) {

  1228       ++p;

  1229     }

  1230     buffLength = (int32_t)(p - buffer);

  1231   }

  1233   releaseArray();

  1235   setArray(buffer, buffLength, buffCapacity);

  1236   fFlags = kWritableAlias;

  1237   return *this;

  1238 }

  1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {

  1241   unBogus();

  1242   int32_t length = utf8.length();

  1243   int32_t capacity;

  1244   // The UTF-16 string will be at most as long as the UTF-8 string.

  1245   if(length <= US_STACKBUF_SIZE) {

  1246     capacity = US_STACKBUF_SIZE;

  1247   } else {

  1248     capacity = length + 1;  // +1 for the terminating NUL.

  1249   }

  1250   UChar *utf16 = getBuffer(capacity);

  1251   int32_t length16;

  1252   UErrorCode errorCode = U_ZERO_ERROR;

  1253   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,

  1254       utf8.data(), length,

  1255       0xfffd,  // Substitution character.

  1256       NULL,    // Don't care about number of substitutions.

  1257       &errorCode);

  1258   releaseBuffer(length16);

  1259   if(U_FAILURE(errorCode)) {

  1260     setToBogus();

  1261   }

  1262   return *this;

  1263 }

  1265 UnicodeString&

  1266 UnicodeString::setCharAt(int32_t offset,

  1267              UChar c)

  1268 {

  1269   int32_t len = length();

  1270   if(cloneArrayIfNeeded() && len > 0) {

  1271     if(offset < 0) {

  1272       offset = 0;

  1273     } else if(offset >= len) {

  1274       offset = len - 1;

  1275     }

  1277     getArrayStart()[offset] = c;

  1278   }

  1279   return *this;

  1280 }

  1282 UnicodeString&

  1283 UnicodeString::replace(int32_t start,

  1284                int32_t _length,

  1285                UChar32 srcChar) {

  1286   UChar buffer[U16_MAX_LENGTH];

  1287   int32_t count = 0;

  1288   UBool isError = FALSE;

  1289   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);

  1290   // We test isError so that the compiler does not complain that we don't.

  1291   // If isError (srcChar is not a valid code point) then count==0 which means

  1292   // we remove the source segment rather than replacing it with srcChar.

  1293   return doReplace(start, _length, buffer, 0, isError ? 0 : count);

  1294 }

  1296 UnicodeString&

  1297 UnicodeString::append(UChar32 srcChar) {

  1298   UChar buffer[U16_MAX_LENGTH];

  1299   int32_t _length = 0;

  1300   UBool isError = FALSE;

  1301   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);

  1302   // We test isError so that the compiler does not complain that we don't.

  1303   // If isError then _length==0 which turns the doReplace() into a no-op anyway.

  1304   return isError ? *this : doReplace(length(), 0, buffer, 0, _length);

  1305 }

  1307 UnicodeString&

  1308 UnicodeString::doReplace( int32_t start,

  1309               int32_t length,

  1310               const UnicodeString& src,

  1311               int32_t srcStart,

  1312               int32_t srcLength)

  1313 {

  1314   if(!src.isBogus()) {

  1315     // pin the indices to legal values

  1316     src.pinIndices(srcStart, srcLength);

  1318     // get the characters from src

  1319     // and replace the range in ourselves with them

  1320     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);

  1321   } else {

  1322     // remove the range

  1323     return doReplace(start, length, 0, 0, 0);

  1324   }

  1325 }

  1327 UnicodeString&

  1328 UnicodeString::doReplace(int32_t start,

  1329              int32_t length,

  1330              const UChar *srcChars,

  1331              int32_t srcStart,

  1332              int32_t srcLength)

  1333 {

  1334   if(!isWritable()) {

  1335     return *this;

  1336   }

  1338   int32_t oldLength = this->length();

  1340   // optimize (read-only alias).remove(0, start) and .remove(start, end)

  1341   if((fFlags&kBufferIsReadonly) && srcLength == 0) {

  1342     if(start == 0) {

  1343       // remove prefix by adjusting the array pointer

  1344       pinIndex(length);

  1345       fUnion.fFields.fArray += length;

  1346       fUnion.fFields.fCapacity -= length;

  1347       setLength(oldLength - length);

  1348       return *this;

  1349     } else {

  1350       pinIndex(start);

  1351       if(length >= (oldLength - start)) {

  1352         // remove suffix by reducing the length (like truncate())

  1353         setLength(start);

  1354         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more

  1355         return *this;

  1356       }

  1357     }

  1358   }

  1360   if(srcChars == 0) {

  1361     srcStart = srcLength = 0;

  1362   } else if(srcLength < 0) {

  1363     // get the srcLength if necessary

  1364     srcLength = u_strlen(srcChars + srcStart);

  1365   }

  1367   // calculate the size of the string after the replace

  1368   int32_t newLength;

  1370   // optimize append() onto a large-enough, owned string

  1371   if(start >= oldLength) {

  1372     if(srcLength == 0) {

  1373       return *this;

  1374     }

  1375     newLength = oldLength + srcLength;

  1376     if(newLength <= getCapacity() && isBufferWritable()) {

  1377       UChar *oldArray = getArrayStart();

  1378       // Do not copy characters when

  1379       //   UChar *buffer=str.getAppendBuffer(...);

  1380       // is followed by

  1381       //   str.append(buffer, length);

  1382       // or

  1383       //   str.appendString(buffer, length)

  1384       // or similar.

  1385       if(srcChars + srcStart != oldArray + start || start > oldLength) {

  1386         us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);

  1387       }

  1388       setLength(newLength);

  1389       return *this;

  1390     } else {

  1391       // pin the indices to legal values

  1392       start = oldLength;

  1393       length = 0;

  1394     }

  1395   } else {

  1396     // pin the indices to legal values

  1397     pinIndices(start, length);

  1399     newLength = oldLength - length + srcLength;

  1400   }

  1402   // the following may change fArray but will not copy the current contents;

  1403   // therefore we need to keep the current fArray

  1404   UChar oldStackBuffer[US_STACKBUF_SIZE];

  1405   UChar *oldArray;

  1406   if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {

  1407     // copy the stack buffer contents because it will be overwritten with

  1408     // fUnion.fFields values

  1409     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);

  1410     oldArray = oldStackBuffer;

  1411   } else {

  1412     oldArray = getArrayStart();

  1413   }

  1415   // clone our array and allocate a bigger array if needed

  1416   int32_t *bufferToDelete = 0;

  1417   if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,

  1418                          FALSE, &bufferToDelete)

  1419   ) {

  1420     return *this;

  1421   }

  1423   // now do the replace

  1425   UChar *newArray = getArrayStart();

  1426   if(newArray != oldArray) {

  1427     // if fArray changed, then we need to copy everything except what will change

  1428     us_arrayCopy(oldArray, 0, newArray, 0, start);

  1429     us_arrayCopy(oldArray, start + length,

  1430                  newArray, start + srcLength,

  1431                  oldLength - (start + length));

  1432   } else if(length != srcLength) {

  1433     // fArray did not change; copy only the portion that isn't changing, leaving a hole

  1434     us_arrayCopy(oldArray, start + length,

  1435                  newArray, start + srcLength,

  1436                  oldLength - (start + length));

  1437   }

  1439   // now fill in the hole with the new string

  1440   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);

  1442   setLength(newLength);

  1444   // delayed delete in case srcChars == fArray when we started, and

  1445   // to keep oldArray alive for the above operations

  1446   if (bufferToDelete) {

  1447     uprv_free(bufferToDelete);

  1448   }

  1450   return *this;

  1451 }

  1453 /**

  1454  * Replaceable API

  1455  */

  1456 void

  1457 UnicodeString::handleReplaceBetween(int32_t start,

  1458                                     int32_t limit,

  1459                                     const UnicodeString& text) {

  1460     replaceBetween(start, limit, text);

  1461 }

  1463 /**

  1464  * Replaceable API

  1465  */

  1466 void

  1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {

  1468     if (limit <= start) {

  1469         return; // Nothing to do; avoid bogus malloc call

  1470     }

  1471     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );

  1472     // Check to make sure text is not null.

  1473     if (text != NULL) {

  1474 	    extractBetween(start, limit, text, 0);

  1475 	    insert(dest, text, 0, limit - start);

  1476 	    uprv_free(text);

  1477     }

  1478 }

  1480 /**

  1481  * Replaceable API

  1482  *

  1483  * NOTE: This is for the Replaceable class.  There is no rep.cpp,

  1484  * so we implement this function here.

  1485  */

  1486 UBool Replaceable::hasMetaData() const {

  1487     return TRUE;

  1488 }

  1490 /**

  1491  * Replaceable API

  1492  */

  1493 UBool UnicodeString::hasMetaData() const {

  1494     return FALSE;

  1495 }

  1497 UnicodeString&

  1498 UnicodeString::doReverse(int32_t start, int32_t length) {

  1499   if(length <= 1 || !cloneArrayIfNeeded()) {

  1500     return *this;

  1501   }

  1503   // pin the indices to legal values

  1504   pinIndices(start, length);

  1505   if(length <= 1) {  // pinIndices() might have shrunk the length

  1506     return *this;

  1507   }

  1509   UChar *left = getArrayStart() + start;

  1510   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)

  1511   UChar swap;

  1512   UBool hasSupplementary = FALSE;

  1514   // Before the loop we know left<right because length>=2.

  1515   do {

  1516     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);

  1517     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);

  1518     *right-- = swap;

  1519   } while(left < right);

  1520   // Make sure to test the middle code unit of an odd-length string.

  1521   // Redundant if the length is even.

  1522   hasSupplementary |= (UBool)U16_IS_LEAD(*left);

  1524   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */

  1525   if(hasSupplementary) {

  1526     UChar swap2;

  1528     left = getArrayStart() + start;

  1529     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right

  1530     while(left < right) {

  1531       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {

  1532         *left++ = swap2;

  1533         *left++ = swap;

  1534       } else {

  1535         ++left;

  1536       }

  1537     }

  1538   }

  1540   return *this;

  1541 }

  1543 UBool

  1544 UnicodeString::padLeading(int32_t targetLength,

  1545                           UChar padChar)

  1546 {

  1547   int32_t oldLength = length();

  1548   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {

  1549     return FALSE;

  1550   } else {

  1551     // move contents up by padding width

  1552     UChar *array = getArrayStart();

  1553     int32_t start = targetLength - oldLength;

  1554     us_arrayCopy(array, 0, array, start, oldLength);

  1556     // fill in padding character

  1557     while(--start >= 0) {

  1558       array[start] = padChar;

  1559     }

  1560     setLength(targetLength);

  1561     return TRUE;

  1562   }

  1563 }

  1565 UBool

  1566 UnicodeString::padTrailing(int32_t targetLength,

  1567                            UChar padChar)

  1568 {

  1569   int32_t oldLength = length();

  1570   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {

  1571     return FALSE;

  1572   } else {

  1573     // fill in padding character

  1574     UChar *array = getArrayStart();

  1575     int32_t length = targetLength;

  1576     while(--length >= oldLength) {

  1577       array[length] = padChar;

  1578     }

  1579     setLength(targetLength);

  1580     return TRUE;

  1581   }

  1582 }

  1584 //========================================

  1585 // Hashing

  1586 //========================================

  1587 int32_t

  1588 UnicodeString::doHashCode() const

  1589 {

  1590     /* Delegate hash computation to uhash.  This makes UnicodeString

  1591      * hashing consistent with UChar* hashing.  */

  1592     int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());

  1593     if (hashCode == kInvalidHashCode) {

  1594         hashCode = kEmptyHashCode;

  1595     }

  1596     return hashCode;

  1597 }

  1599 //========================================

  1600 // External Buffer

  1601 //========================================

  1603 UChar *

  1604 UnicodeString::getBuffer(int32_t minCapacity) {

  1605   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {

  1606     fFlags|=kOpenGetBuffer;

  1607     fShortLength=0;

  1608     return getArrayStart();

  1609   } else {

  1610     return 0;

  1611   }

  1612 }

  1614 void

  1615 UnicodeString::releaseBuffer(int32_t newLength) {

  1616   if(fFlags&kOpenGetBuffer && newLength>=-1) {

  1617     // set the new fLength

  1618     int32_t capacity=getCapacity();

  1619     if(newLength==-1) {

  1620       // the new length is the string length, capped by fCapacity

  1621       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;

  1622       while(p<limit && *p!=0) {

  1623         ++p;

  1624       }

  1625       newLength=(int32_t)(p-array);

  1626     } else if(newLength>capacity) {

  1627       newLength=capacity;

  1628     }

  1629     setLength(newLength);

  1630     fFlags&=~kOpenGetBuffer;

  1631   }

  1632 }

  1634 //========================================

  1635 // Miscellaneous

  1636 //========================================

  1637 UBool

  1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,

  1639                                   int32_t growCapacity,

  1640                                   UBool doCopyArray,

  1641                                   int32_t **pBufferToDelete,

  1642                                   UBool forceClone) {

  1643   // default parameters need to be static, therefore

  1644   // the defaults are -1 to have convenience defaults

  1645   if(newCapacity == -1) {

  1646     newCapacity = getCapacity();

  1647   }

  1649   // while a getBuffer(minCapacity) is "open",

  1650   // prevent any modifications of the string by returning FALSE here

  1651   // if the string is bogus, then only an assignment or similar can revive it

  1652   if(!isWritable()) {

  1653     return FALSE;

  1654   }

  1656   /*

  1657    * We need to make a copy of the array if

  1658    * the buffer is read-only, or

  1659    * the buffer is refCounted (shared), and refCount>1, or

  1660    * the buffer is too small.

  1661    * Return FALSE if memory could not be allocated.

  1662    */

  1663   if(forceClone ||

  1664      fFlags & kBufferIsReadonly ||

  1665      (fFlags & kRefCounted && refCount() > 1) ||

  1666      newCapacity > getCapacity()

  1667   ) {

  1668     // check growCapacity for default value and use of the stack buffer

  1669     if(growCapacity < 0) {

  1670       growCapacity = newCapacity;

  1671     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {

  1672       growCapacity = US_STACKBUF_SIZE;

  1673     }

  1675     // save old values

  1676     UChar oldStackBuffer[US_STACKBUF_SIZE];

  1677     UChar *oldArray;

  1678     uint8_t flags = fFlags;

  1680     if(flags&kUsingStackBuffer) {

  1681       U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */

  1682       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {

  1683         // copy the stack buffer contents because it will be overwritten with

  1684         // fUnion.fFields values

  1685         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);

  1686         oldArray = oldStackBuffer;

  1687       } else {

  1688         oldArray = 0; // no need to copy from stack buffer to itself

  1689       }

  1690     } else {

  1691       oldArray = fUnion.fFields.fArray;

  1692       U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */

  1693     }

  1695     // allocate a new array

  1696     if(allocate(growCapacity) ||

  1697        (newCapacity < growCapacity && allocate(newCapacity))

  1698     ) {

  1699       if(doCopyArray && oldArray != 0) {

  1700         // copy the contents

  1701         // do not copy more than what fits - it may be smaller than before

  1702         int32_t minLength = length();

  1703         newCapacity = getCapacity();

  1704         if(newCapacity < minLength) {

  1705           minLength = newCapacity;

  1706           setLength(minLength);

  1707         }

  1708         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);

  1709       } else {

  1710         fShortLength = 0;

  1711       }

  1713       // release the old array

  1714       if(flags & kRefCounted) {

  1715         // the array is refCounted; decrement and release if 0

  1716         u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);

  1717         if(umtx_atomic_dec(pRefCount) == 0) {

  1718           if(pBufferToDelete == 0) {

  1719               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t

  1720               // is defined as volatile. (Volatile has useful non-standard behavior

  1721               //   with this compiler.)

  1722             uprv_free((void *)pRefCount);

  1723           } else {

  1724             // the caller requested to delete it himself

  1725             *pBufferToDelete = (int32_t *)pRefCount;

  1726           }

  1727         }

  1728       }

  1729     } else {

  1730       // not enough memory for growCapacity and not even for the smaller newCapacity

  1731       // reset the old values for setToBogus() to release the array

  1732       if(!(flags&kUsingStackBuffer)) {

  1733         fUnion.fFields.fArray = oldArray;

  1734       }

  1735       fFlags = flags;

  1736       setToBogus();

  1737       return FALSE;

  1738     }

  1739   }

  1740   return TRUE;

  1741 }

  1743 // UnicodeStringAppendable ------------------------------------------------- ***

  1745 UnicodeStringAppendable::~UnicodeStringAppendable() {}

  1747 UBool

  1748 UnicodeStringAppendable::appendCodeUnit(UChar c) {

  1749   return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();

  1750 }

  1752 UBool

  1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) {

  1754   UChar buffer[U16_MAX_LENGTH];

  1755   int32_t cLength = 0;

  1756   UBool isError = FALSE;

  1757   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);

  1758   return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();

  1759 }

  1761 UBool

  1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {

  1763   return str.doReplace(str.length(), 0, s, 0, length).isWritable();

  1764 }

  1766 UBool

  1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {

  1768   return str.cloneArrayIfNeeded(str.length() + appendCapacity);

  1769 }

  1771 UChar *

  1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,

  1773                                          int32_t desiredCapacityHint,

  1774                                          UChar *scratch, int32_t scratchCapacity,

  1775                                          int32_t *resultCapacity) {

  1776   if(minCapacity < 1 || scratchCapacity < minCapacity) {

  1777     *resultCapacity = 0;

  1778     return NULL;

  1779   }

  1780   int32_t oldLength = str.length();

  1781   if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {

  1782     *resultCapacity = str.getCapacity() - oldLength;

  1783     return str.getArrayStart() + oldLength;

  1784   }

  1785   *resultCapacity = scratchCapacity;

  1786   return scratch;

  1787 }

  1789 U_NAMESPACE_END

  1791 U_NAMESPACE_USE

  1793 U_CAPI int32_t U_EXPORT2

  1794 uhash_hashUnicodeString(const UElement key) {

  1795     const UnicodeString *str = (const UnicodeString*) key.pointer;

  1796     return (str == NULL) ? 0 : str->hashCode();

  1797 }

  1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*

  1800 // does not depend on hashtable code.

  1801 U_CAPI UBool U_EXPORT2

  1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) {

  1803     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;

  1804     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;

  1805     if (str1 == str2) {

  1806         return TRUE;

  1807     }

  1808     if (str1 == NULL || str2 == NULL) {

  1809         return FALSE;

  1810     }

  1811     return *str1 == *str2;

  1812 }

  1814 #ifdef U_STATIC_IMPLEMENTATION

  1815 /*

  1816 This should never be called. It is defined here to make sure that the

  1817 virtual vector deleting destructor is defined within unistr.cpp.

  1818 The vector deleting destructor is already a part of UObject,

  1819 but defining it here makes sure that it is included with this object file.

  1820 This makes sure that static library dependencies are kept to a minimum.

  1821 */

  1822 static void uprv_UnicodeStringDummy(void) {

  1823     delete [] (new UnicodeString[2]);

  1824 }

  1825 #endif

The Tor Browser / file revision

intl/icu/source/common/unistr.cpp@129ffea94266

intl/icu/source/common/unistr.cpp