intl/icu/source/common/unistr.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 ******************************************************************************
     3 * Copyright (C) 1999-2013, International Business Machines Corporation and
     4 * others. All Rights Reserved.
     5 ******************************************************************************
     6 *
     7 * File unistr.cpp
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   09/25/98    stephen     Creation.
    13 *   04/20/99    stephen     Overhauled per 4/16 code review.
    14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
    15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
    16 *                           Replaceable.
    17 *   06/25/01    grhoten     Removed the dependency on iostream
    18 ******************************************************************************
    19 */
    21 #include "unicode/utypes.h"
    22 #include "unicode/appendable.h"
    23 #include "unicode/putil.h"
    24 #include "cstring.h"
    25 #include "cmemory.h"
    26 #include "unicode/ustring.h"
    27 #include "unicode/unistr.h"
    28 #include "unicode/utf.h"
    29 #include "unicode/utf16.h"
    30 #include "uelement.h"
    31 #include "ustr_imp.h"
    32 #include "umutex.h"
    33 #include "uassert.h"
    35 #if 0
    37 #include <iostream>
    38 using namespace std;
    40 //DEBUGGING
    41 void
    42 print(const UnicodeString& s,
    43       const char *name)
    44 {
    45   UChar c;
    46   cout << name << ":|";
    47   for(int i = 0; i < s.length(); ++i) {
    48     c = s[i];
    49     if(c>= 0x007E || c < 0x0020)
    50       cout << "[0x" << hex << s[i] << "]";
    51     else
    52       cout << (char) s[i];
    53   }
    54   cout << '|' << endl;
    55 }
    57 void
    58 print(const UChar *s,
    59       int32_t len,
    60       const char *name)
    61 {
    62   UChar c;
    63   cout << name << ":|";
    64   for(int i = 0; i < len; ++i) {
    65     c = s[i];
    66     if(c>= 0x007E || c < 0x0020)
    67       cout << "[0x" << hex << s[i] << "]";
    68     else
    69       cout << (char) s[i];
    70   }
    71   cout << '|' << endl;
    72 }
    73 // END DEBUGGING
    74 #endif
    76 // Local function definitions for now
    78 // need to copy areas that may overlap
    79 static
    80 inline void
    81 us_arrayCopy(const UChar *src, int32_t srcStart,
    82          UChar *dst, int32_t dstStart, int32_t count)
    83 {
    84   if(count>0) {
    85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
    86   }
    87 }
    89 // u_unescapeAt() callback to get a UChar from a UnicodeString
    90 U_CDECL_BEGIN
    91 static UChar U_CALLCONV
    92 UnicodeString_charAt(int32_t offset, void *context) {
    93     return ((icu::UnicodeString*) context)->charAt(offset);
    94 }
    95 U_CDECL_END
    97 U_NAMESPACE_BEGIN
    99 /* The Replaceable virtual destructor can't be defined in the header
   100    due to how AIX works with multiple definitions of virtual functions.
   101 */
   102 Replaceable::~Replaceable() {}
   104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
   106 UnicodeString U_EXPORT2
   107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
   108     return
   109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
   110             append(s1).
   111                 append(s2);
   112 }
   114 //========================================
   115 // Reference Counting functions, put at top of file so that optimizing compilers
   116 //                               have a chance to automatically inline.
   117 //========================================
   119 void
   120 UnicodeString::addRef() {
   121   umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
   122 }
   124 int32_t
   125 UnicodeString::removeRef() {
   126   return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
   127 }
   129 int32_t
   130 UnicodeString::refCount() const {
   131   return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
   132 }
   134 void
   135 UnicodeString::releaseArray() {
   136   if((fFlags & kRefCounted) && removeRef() == 0) {
   137     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
   138   }
   139 }
   143 //========================================
   144 // Constructors
   145 //========================================
   147 // The default constructor is inline in unistr.h.
   149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
   150   : fShortLength(0),
   151     fFlags(0)
   152 {
   153   if(count <= 0 || (uint32_t)c > 0x10ffff) {
   154     // just allocate and do not do anything else
   155     allocate(capacity);
   156   } else {
   157     // count > 0, allocate and fill the new string with count c's
   158     int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
   159     if(capacity < length) {
   160       capacity = length;
   161     }
   162     if(allocate(capacity)) {
   163       UChar *array = getArrayStart();
   164       int32_t i = 0;
   166       // fill the new string with c
   167       if(unitCount == 1) {
   168         // fill with length UChars
   169         while(i < length) {
   170           array[i++] = (UChar)c;
   171         }
   172       } else {
   173         // get the code units for c
   174         UChar units[U16_MAX_LENGTH];
   175         U16_APPEND_UNSAFE(units, i, c);
   177         // now it must be i==unitCount
   178         i = 0;
   180         // for Unicode, unitCount can only be 1, 2, 3, or 4
   181         // 1 is handled above
   182         while(i < length) {
   183           int32_t unitIdx = 0;
   184           while(unitIdx < unitCount) {
   185             array[i++]=units[unitIdx++];
   186           }
   187         }
   188       }
   189     }
   190     setLength(length);
   191   }
   192 }
   194 UnicodeString::UnicodeString(UChar ch)
   195   : fShortLength(1),
   196     fFlags(kShortString)
   197 {
   198   fUnion.fStackBuffer[0] = ch;
   199 }
   201 UnicodeString::UnicodeString(UChar32 ch)
   202   : fShortLength(0),
   203     fFlags(kShortString)
   204 {
   205   int32_t i = 0;
   206   UBool isError = FALSE;
   207   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
   208   // We test isError so that the compiler does not complain that we don't.
   209   // If isError then i==0 which is what we want anyway.
   210   if(!isError) {
   211     fShortLength = (int8_t)i;
   212   }
   213 }
   215 UnicodeString::UnicodeString(const UChar *text)
   216   : fShortLength(0),
   217     fFlags(kShortString)
   218 {
   219   doReplace(0, 0, text, 0, -1);
   220 }
   222 UnicodeString::UnicodeString(const UChar *text,
   223                              int32_t textLength)
   224   : fShortLength(0),
   225     fFlags(kShortString)
   226 {
   227   doReplace(0, 0, text, 0, textLength);
   228 }
   230 UnicodeString::UnicodeString(UBool isTerminated,
   231                              const UChar *text,
   232                              int32_t textLength)
   233   : fShortLength(0),
   234     fFlags(kReadonlyAlias)
   235 {
   236   if(text == NULL) {
   237     // treat as an empty string, do not alias
   238     setToEmpty();
   239   } else if(textLength < -1 ||
   240             (textLength == -1 && !isTerminated) ||
   241             (textLength >= 0 && isTerminated && text[textLength] != 0)
   242   ) {
   243     setToBogus();
   244   } else {
   245     if(textLength == -1) {
   246       // text is terminated, or else it would have failed the above test
   247       textLength = u_strlen(text);
   248     }
   249     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   250   }
   251 }
   253 UnicodeString::UnicodeString(UChar *buff,
   254                              int32_t buffLength,
   255                              int32_t buffCapacity)
   256   : fShortLength(0),
   257     fFlags(kWritableAlias)
   258 {
   259   if(buff == NULL) {
   260     // treat as an empty string, do not alias
   261     setToEmpty();
   262   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   263     setToBogus();
   264   } else {
   265     if(buffLength == -1) {
   266       // fLength = u_strlen(buff); but do not look beyond buffCapacity
   267       const UChar *p = buff, *limit = buff + buffCapacity;
   268       while(p != limit && *p != 0) {
   269         ++p;
   270       }
   271       buffLength = (int32_t)(p - buff);
   272     }
   273     setArray(buff, buffLength, buffCapacity);
   274   }
   275 }
   277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
   278   : fShortLength(0),
   279     fFlags(kShortString)
   280 {
   281   if(src==NULL) {
   282     // treat as an empty string
   283   } else {
   284     if(length<0) {
   285       length=(int32_t)uprv_strlen(src);
   286     }
   287     if(cloneArrayIfNeeded(length, length, FALSE)) {
   288       u_charsToUChars(src, getArrayStart(), length);
   289       setLength(length);
   290     } else {
   291       setToBogus();
   292     }
   293   }
   294 }
   296 #if U_CHARSET_IS_UTF8
   298 UnicodeString::UnicodeString(const char *codepageData)
   299   : fShortLength(0),
   300     fFlags(kShortString) {
   301   if(codepageData != 0) {
   302     setToUTF8(codepageData);
   303   }
   304 }
   306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
   307   : fShortLength(0),
   308     fFlags(kShortString) {
   309   // if there's nothing to convert, do nothing
   310   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
   311     return;
   312   }
   313   if(dataLength == -1) {
   314     dataLength = (int32_t)uprv_strlen(codepageData);
   315   }
   316   setToUTF8(StringPiece(codepageData, dataLength));
   317 }
   319 // else see unistr_cnv.cpp
   320 #endif
   322 UnicodeString::UnicodeString(const UnicodeString& that)
   323   : Replaceable(),
   324     fShortLength(0),
   325     fFlags(kShortString)
   326 {
   327   copyFrom(that);
   328 }
   330 UnicodeString::UnicodeString(const UnicodeString& that,
   331                              int32_t srcStart)
   332   : Replaceable(),
   333     fShortLength(0),
   334     fFlags(kShortString)
   335 {
   336   setTo(that, srcStart);
   337 }
   339 UnicodeString::UnicodeString(const UnicodeString& that,
   340                              int32_t srcStart,
   341                              int32_t srcLength)
   342   : Replaceable(),
   343     fShortLength(0),
   344     fFlags(kShortString)
   345 {
   346   setTo(that, srcStart, srcLength);
   347 }
   349 // Replaceable base class clone() default implementation, does not clone
   350 Replaceable *
   351 Replaceable::clone() const {
   352   return NULL;
   353 }
   355 // UnicodeString overrides clone() with a real implementation
   356 Replaceable *
   357 UnicodeString::clone() const {
   358   return new UnicodeString(*this);
   359 }
   361 //========================================
   362 // array allocation
   363 //========================================
   365 UBool
   366 UnicodeString::allocate(int32_t capacity) {
   367   if(capacity <= US_STACKBUF_SIZE) {
   368     fFlags = kShortString;
   369   } else {
   370     // count bytes for the refCounter and the string capacity, and
   371     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
   372     // to be safely aligned for the refCount
   373     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
   374     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
   375     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
   376     if(array != 0) {
   377       // set initial refCount and point behind the refCount
   378       *array++ = 1;
   380       // have fArray point to the first UChar
   381       fUnion.fFields.fArray = (UChar *)array;
   382       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
   383       fFlags = kLongString;
   384     } else {
   385       fShortLength = 0;
   386       fUnion.fFields.fArray = 0;
   387       fUnion.fFields.fCapacity = 0;
   388       fFlags = kIsBogus;
   389       return FALSE;
   390     }
   391   }
   392   return TRUE;
   393 }
   395 //========================================
   396 // Destructor
   397 //========================================
   398 UnicodeString::~UnicodeString()
   399 {
   400   releaseArray();
   401 }
   403 //========================================
   404 // Factory methods
   405 //========================================
   407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
   408   UnicodeString result;
   409   result.setToUTF8(utf8);
   410   return result;
   411 }
   413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
   414   UnicodeString result;
   415   int32_t capacity;
   416   // Most UTF-32 strings will be BMP-only and result in a same-length
   417   // UTF-16 string. We overestimate the capacity just slightly,
   418   // just in case there are a few supplementary characters.
   419   if(length <= US_STACKBUF_SIZE) {
   420     capacity = US_STACKBUF_SIZE;
   421   } else {
   422     capacity = length + (length >> 4) + 4;
   423   }
   424   do {
   425     UChar *utf16 = result.getBuffer(capacity);
   426     int32_t length16;
   427     UErrorCode errorCode = U_ZERO_ERROR;
   428     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
   429         utf32, length,
   430         0xfffd,  // Substitution character.
   431         NULL,    // Don't care about number of substitutions.
   432         &errorCode);
   433     result.releaseBuffer(length16);
   434     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
   435       capacity = length16 + 1;  // +1 for the terminating NUL.
   436       continue;
   437     } else if(U_FAILURE(errorCode)) {
   438       result.setToBogus();
   439     }
   440     break;
   441   } while(TRUE);
   442   return result;
   443 }
   445 //========================================
   446 // Assignment
   447 //========================================
   449 UnicodeString &
   450 UnicodeString::operator=(const UnicodeString &src) {
   451   return copyFrom(src);
   452 }
   454 UnicodeString &
   455 UnicodeString::fastCopyFrom(const UnicodeString &src) {
   456   return copyFrom(src, TRUE);
   457 }
   459 UnicodeString &
   460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
   461   // if assigning to ourselves, do nothing
   462   if(this == 0 || this == &src) {
   463     return *this;
   464   }
   466   // is the right side bogus?
   467   if(&src == 0 || src.isBogus()) {
   468     setToBogus();
   469     return *this;
   470   }
   472   // delete the current contents
   473   releaseArray();
   475   if(src.isEmpty()) {
   476     // empty string - use the stack buffer
   477     setToEmpty();
   478     return *this;
   479   }
   481   // we always copy the length
   482   int32_t srcLength = src.length();
   483   setLength(srcLength);
   485   // fLength>0 and not an "open" src.getBuffer(minCapacity)
   486   switch(src.fFlags) {
   487   case kShortString:
   488     // short string using the stack buffer, do the same
   489     fFlags = kShortString;
   490     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
   491     break;
   492   case kLongString:
   493     // src uses a refCounted string buffer, use that buffer with refCount
   494     // src is const, use a cast - we don't really change it
   495     ((UnicodeString &)src).addRef();
   496     // copy all fields, share the reference-counted buffer
   497     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
   498     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
   499     fFlags = src.fFlags;
   500     break;
   501   case kReadonlyAlias:
   502     if(fastCopy) {
   503       // src is a readonly alias, do the same
   504       // -> maintain the readonly alias as such
   505       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
   506       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
   507       fFlags = src.fFlags;
   508       break;
   509     }
   510     // else if(!fastCopy) fall through to case kWritableAlias
   511     // -> allocate a new buffer and copy the contents
   512   case kWritableAlias:
   513     // src is a writable alias; we make a copy of that instead
   514     if(allocate(srcLength)) {
   515       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
   516       break;
   517     }
   518     // if there is not enough memory, then fall through to setting to bogus
   519   default:
   520     // if src is bogus, set ourselves to bogus
   521     // do not call setToBogus() here because fArray and fFlags are not consistent here
   522     fShortLength = 0;
   523     fUnion.fFields.fArray = 0;
   524     fUnion.fFields.fCapacity = 0;
   525     fFlags = kIsBogus;
   526     break;
   527   }
   529   return *this;
   530 }
   532 //========================================
   533 // Miscellaneous operations
   534 //========================================
   536 UnicodeString UnicodeString::unescape() const {
   537     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
   538     const UChar *array = getBuffer();
   539     int32_t len = length();
   540     int32_t prev = 0;
   541     for (int32_t i=0;;) {
   542         if (i == len) {
   543             result.append(array, prev, len - prev);
   544             break;
   545         }
   546         if (array[i++] == 0x5C /*'\\'*/) {
   547             result.append(array, prev, (i - 1) - prev);
   548             UChar32 c = unescapeAt(i); // advances i
   549             if (c < 0) {
   550                 result.remove(); // return empty string
   551                 break; // invalid escape sequence
   552             }
   553             result.append(c);
   554             prev = i;
   555         }
   556     }
   557     return result;
   558 }
   560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
   561     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
   562 }
   564 //========================================
   565 // Read-only implementation
   566 //========================================
   567 UBool
   568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
   569   // Requires: this & text not bogus and have same lengths.
   570   // Byte-wise comparison works for equality regardless of endianness.
   571   return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
   572 }
   574 int8_t
   575 UnicodeString::doCompare( int32_t start,
   576               int32_t length,
   577               const UChar *srcChars,
   578               int32_t srcStart,
   579               int32_t srcLength) const
   580 {
   581   // compare illegal string values
   582   if(isBogus()) {
   583     return -1;
   584   }
   586   // pin indices to legal values
   587   pinIndices(start, length);
   589   if(srcChars == NULL) {
   590     // treat const UChar *srcChars==NULL as an empty string
   591     return length == 0 ? 0 : 1;
   592   }
   594   // get the correct pointer
   595   const UChar *chars = getArrayStart();
   597   chars += start;
   598   srcChars += srcStart;
   600   int32_t minLength;
   601   int8_t lengthResult;
   603   // get the srcLength if necessary
   604   if(srcLength < 0) {
   605     srcLength = u_strlen(srcChars + srcStart);
   606   }
   608   // are we comparing different lengths?
   609   if(length != srcLength) {
   610     if(length < srcLength) {
   611       minLength = length;
   612       lengthResult = -1;
   613     } else {
   614       minLength = srcLength;
   615       lengthResult = 1;
   616     }
   617   } else {
   618     minLength = length;
   619     lengthResult = 0;
   620   }
   622   /*
   623    * note that uprv_memcmp() returns an int but we return an int8_t;
   624    * we need to take care not to truncate the result -
   625    * one way to do this is to right-shift the value to
   626    * move the sign bit into the lower 8 bits and making sure that this
   627    * does not become 0 itself
   628    */
   630   if(minLength > 0 && chars != srcChars) {
   631     int32_t result;
   633 #   if U_IS_BIG_ENDIAN 
   634       // big-endian: byte comparison works
   635       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
   636       if(result != 0) {
   637         return (int8_t)(result >> 15 | 1);
   638       }
   639 #   else
   640       // little-endian: compare UChar units
   641       do {
   642         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
   643         if(result != 0) {
   644           return (int8_t)(result >> 15 | 1);
   645         }
   646       } while(--minLength > 0);
   647 #   endif
   648   }
   649   return lengthResult;
   650 }
   652 /* String compare in code point order - doCompare() compares in code unit order. */
   653 int8_t
   654 UnicodeString::doCompareCodePointOrder(int32_t start,
   655                                        int32_t length,
   656                                        const UChar *srcChars,
   657                                        int32_t srcStart,
   658                                        int32_t srcLength) const
   659 {
   660   // compare illegal string values
   661   // treat const UChar *srcChars==NULL as an empty string
   662   if(isBogus()) {
   663     return -1;
   664   }
   666   // pin indices to legal values
   667   pinIndices(start, length);
   669   if(srcChars == NULL) {
   670     srcStart = srcLength = 0;
   671   }
   673   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
   674   /* translate the 32-bit result into an 8-bit one */
   675   if(diff!=0) {
   676     return (int8_t)(diff >> 15 | 1);
   677   } else {
   678     return 0;
   679   }
   680 }
   682 int32_t
   683 UnicodeString::getLength() const {
   684     return length();
   685 }
   687 UChar
   688 UnicodeString::getCharAt(int32_t offset) const {
   689   return charAt(offset);
   690 }
   692 UChar32
   693 UnicodeString::getChar32At(int32_t offset) const {
   694   return char32At(offset);
   695 }
   697 UChar32
   698 UnicodeString::char32At(int32_t offset) const
   699 {
   700   int32_t len = length();
   701   if((uint32_t)offset < (uint32_t)len) {
   702     const UChar *array = getArrayStart();
   703     UChar32 c;
   704     U16_GET(array, 0, offset, len, c);
   705     return c;
   706   } else {
   707     return kInvalidUChar;
   708   }
   709 }
   711 int32_t
   712 UnicodeString::getChar32Start(int32_t offset) const {
   713   if((uint32_t)offset < (uint32_t)length()) {
   714     const UChar *array = getArrayStart();
   715     U16_SET_CP_START(array, 0, offset);
   716     return offset;
   717   } else {
   718     return 0;
   719   }
   720 }
   722 int32_t
   723 UnicodeString::getChar32Limit(int32_t offset) const {
   724   int32_t len = length();
   725   if((uint32_t)offset < (uint32_t)len) {
   726     const UChar *array = getArrayStart();
   727     U16_SET_CP_LIMIT(array, 0, offset, len);
   728     return offset;
   729   } else {
   730     return len;
   731   }
   732 }
   734 int32_t
   735 UnicodeString::countChar32(int32_t start, int32_t length) const {
   736   pinIndices(start, length);
   737   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
   738   return u_countChar32(getArrayStart()+start, length);
   739 }
   741 UBool
   742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
   743   pinIndices(start, length);
   744   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
   745   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
   746 }
   748 int32_t
   749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
   750   // pin index
   751   int32_t len = length();
   752   if(index<0) {
   753     index=0;
   754   } else if(index>len) {
   755     index=len;
   756   }
   758   const UChar *array = getArrayStart();
   759   if(delta>0) {
   760     U16_FWD_N(array, index, len, delta);
   761   } else {
   762     U16_BACK_N(array, 0, index, -delta);
   763   }
   765   return index;
   766 }
   768 void
   769 UnicodeString::doExtract(int32_t start,
   770              int32_t length,
   771              UChar *dst,
   772              int32_t dstStart) const
   773 {
   774   // pin indices to legal values
   775   pinIndices(start, length);
   777   // do not copy anything if we alias dst itself
   778   const UChar *array = getArrayStart();
   779   if(array + start != dst + dstStart) {
   780     us_arrayCopy(array, start, dst, dstStart, length);
   781   }
   782 }
   784 int32_t
   785 UnicodeString::extract(UChar *dest, int32_t destCapacity,
   786                        UErrorCode &errorCode) const {
   787   int32_t len = length();
   788   if(U_SUCCESS(errorCode)) {
   789     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
   790       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   791     } else {
   792       const UChar *array = getArrayStart();
   793       if(len>0 && len<=destCapacity && array!=dest) {
   794         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
   795       }
   796       return u_terminateUChars(dest, destCapacity, len, &errorCode);
   797     }
   798   }
   800   return len;
   801 }
   803 int32_t
   804 UnicodeString::extract(int32_t start,
   805                        int32_t length,
   806                        char *target,
   807                        int32_t targetCapacity,
   808                        enum EInvariant) const
   809 {
   810   // if the arguments are illegal, then do nothing
   811   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
   812     return 0;
   813   }
   815   // pin the indices to legal values
   816   pinIndices(start, length);
   818   if(length <= targetCapacity) {
   819     u_UCharsToChars(getArrayStart() + start, target, length);
   820   }
   821   UErrorCode status = U_ZERO_ERROR;
   822   return u_terminateChars(target, targetCapacity, length, &status);
   823 }
   825 UnicodeString
   826 UnicodeString::tempSubString(int32_t start, int32_t len) const {
   827   pinIndices(start, len);
   828   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
   829   if(array==NULL) {
   830     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
   831     len=-2;  // bogus result string
   832   }
   833   return UnicodeString(FALSE, array + start, len);
   834 }
   836 int32_t
   837 UnicodeString::toUTF8(int32_t start, int32_t len,
   838                       char *target, int32_t capacity) const {
   839   pinIndices(start, len);
   840   int32_t length8;
   841   UErrorCode errorCode = U_ZERO_ERROR;
   842   u_strToUTF8WithSub(target, capacity, &length8,
   843                      getBuffer() + start, len,
   844                      0xFFFD,  // Standard substitution character.
   845                      NULL,    // Don't care about number of substitutions.
   846                      &errorCode);
   847   return length8;
   848 }
   850 #if U_CHARSET_IS_UTF8
   852 int32_t
   853 UnicodeString::extract(int32_t start, int32_t len,
   854                        char *target, uint32_t dstSize) const {
   855   // if the arguments are illegal, then do nothing
   856   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
   857     return 0;
   858   }
   859   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
   860 }
   862 // else see unistr_cnv.cpp
   863 #endif
   865 void 
   866 UnicodeString::extractBetween(int32_t start,
   867                   int32_t limit,
   868                   UnicodeString& target) const {
   869   pinIndex(start);
   870   pinIndex(limit);
   871   doExtract(start, limit - start, target);
   872 }
   874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
   875 // as many bytes as the source has UChars.
   876 // The "worst cases" are writing systems like Indic, Thai and CJK with
   877 // 3:1 bytes:UChars.
   878 void
   879 UnicodeString::toUTF8(ByteSink &sink) const {
   880   int32_t length16 = length();
   881   if(length16 != 0) {
   882     char stackBuffer[1024];
   883     int32_t capacity = (int32_t)sizeof(stackBuffer);
   884     UBool utf8IsOwned = FALSE;
   885     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
   886                                       3*length16,
   887                                       stackBuffer, capacity,
   888                                       &capacity);
   889     int32_t length8 = 0;
   890     UErrorCode errorCode = U_ZERO_ERROR;
   891     u_strToUTF8WithSub(utf8, capacity, &length8,
   892                        getBuffer(), length16,
   893                        0xFFFD,  // Standard substitution character.
   894                        NULL,    // Don't care about number of substitutions.
   895                        &errorCode);
   896     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
   897       utf8 = (char *)uprv_malloc(length8);
   898       if(utf8 != NULL) {
   899         utf8IsOwned = TRUE;
   900         errorCode = U_ZERO_ERROR;
   901         u_strToUTF8WithSub(utf8, length8, &length8,
   902                            getBuffer(), length16,
   903                            0xFFFD,  // Standard substitution character.
   904                            NULL,    // Don't care about number of substitutions.
   905                            &errorCode);
   906       } else {
   907         errorCode = U_MEMORY_ALLOCATION_ERROR;
   908       }
   909     }
   910     if(U_SUCCESS(errorCode)) {
   911       sink.Append(utf8, length8);
   912       sink.Flush();
   913     }
   914     if(utf8IsOwned) {
   915       uprv_free(utf8);
   916     }
   917   }
   918 }
   920 int32_t
   921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
   922   int32_t length32=0;
   923   if(U_SUCCESS(errorCode)) {
   924     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
   925     u_strToUTF32WithSub(utf32, capacity, &length32,
   926         getBuffer(), length(),
   927         0xfffd,  // Substitution character.
   928         NULL,    // Don't care about number of substitutions.
   929         &errorCode);
   930   }
   931   return length32;
   932 }
   934 int32_t 
   935 UnicodeString::indexOf(const UChar *srcChars,
   936                int32_t srcStart,
   937                int32_t srcLength,
   938                int32_t start,
   939                int32_t length) const
   940 {
   941   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
   942     return -1;
   943   }
   945   // UnicodeString does not find empty substrings
   946   if(srcLength < 0 && srcChars[srcStart] == 0) {
   947     return -1;
   948   }
   950   // get the indices within bounds
   951   pinIndices(start, length);
   953   // find the first occurrence of the substring
   954   const UChar *array = getArrayStart();
   955   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
   956   if(match == NULL) {
   957     return -1;
   958   } else {
   959     return (int32_t)(match - array);
   960   }
   961 }
   963 int32_t
   964 UnicodeString::doIndexOf(UChar c,
   965              int32_t start,
   966              int32_t length) const
   967 {
   968   // pin indices
   969   pinIndices(start, length);
   971   // find the first occurrence of c
   972   const UChar *array = getArrayStart();
   973   const UChar *match = u_memchr(array + start, c, length);
   974   if(match == NULL) {
   975     return -1;
   976   } else {
   977     return (int32_t)(match - array);
   978   }
   979 }
   981 int32_t
   982 UnicodeString::doIndexOf(UChar32 c,
   983                          int32_t start,
   984                          int32_t length) const {
   985   // pin indices
   986   pinIndices(start, length);
   988   // find the first occurrence of c
   989   const UChar *array = getArrayStart();
   990   const UChar *match = u_memchr32(array + start, c, length);
   991   if(match == NULL) {
   992     return -1;
   993   } else {
   994     return (int32_t)(match - array);
   995   }
   996 }
   998 int32_t 
   999 UnicodeString::lastIndexOf(const UChar *srcChars,
  1000                int32_t srcStart,
  1001                int32_t srcLength,
  1002                int32_t start,
  1003                int32_t length) const
  1005   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
  1006     return -1;
  1009   // UnicodeString does not find empty substrings
  1010   if(srcLength < 0 && srcChars[srcStart] == 0) {
  1011     return -1;
  1014   // get the indices within bounds
  1015   pinIndices(start, length);
  1017   // find the last occurrence of the substring
  1018   const UChar *array = getArrayStart();
  1019   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
  1020   if(match == NULL) {
  1021     return -1;
  1022   } else {
  1023     return (int32_t)(match - array);
  1027 int32_t
  1028 UnicodeString::doLastIndexOf(UChar c,
  1029                  int32_t start,
  1030                  int32_t length) const
  1032   if(isBogus()) {
  1033     return -1;
  1036   // pin indices
  1037   pinIndices(start, length);
  1039   // find the last occurrence of c
  1040   const UChar *array = getArrayStart();
  1041   const UChar *match = u_memrchr(array + start, c, length);
  1042   if(match == NULL) {
  1043     return -1;
  1044   } else {
  1045     return (int32_t)(match - array);
  1049 int32_t
  1050 UnicodeString::doLastIndexOf(UChar32 c,
  1051                              int32_t start,
  1052                              int32_t length) const {
  1053   // pin indices
  1054   pinIndices(start, length);
  1056   // find the last occurrence of c
  1057   const UChar *array = getArrayStart();
  1058   const UChar *match = u_memrchr32(array + start, c, length);
  1059   if(match == NULL) {
  1060     return -1;
  1061   } else {
  1062     return (int32_t)(match - array);
  1066 //========================================
  1067 // Write implementation
  1068 //========================================
  1070 UnicodeString& 
  1071 UnicodeString::findAndReplace(int32_t start,
  1072                   int32_t length,
  1073                   const UnicodeString& oldText,
  1074                   int32_t oldStart,
  1075                   int32_t oldLength,
  1076                   const UnicodeString& newText,
  1077                   int32_t newStart,
  1078                   int32_t newLength)
  1080   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
  1081     return *this;
  1084   pinIndices(start, length);
  1085   oldText.pinIndices(oldStart, oldLength);
  1086   newText.pinIndices(newStart, newLength);
  1088   if(oldLength == 0) {
  1089     return *this;
  1092   while(length > 0 && length >= oldLength) {
  1093     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
  1094     if(pos < 0) {
  1095       // no more oldText's here: done
  1096       break;
  1097     } else {
  1098       // we found oldText, replace it by newText and go beyond it
  1099       replace(pos, oldLength, newText, newStart, newLength);
  1100       length -= pos + oldLength - start;
  1101       start = pos + newLength;
  1105   return *this;
  1109 void
  1110 UnicodeString::setToBogus()
  1112   releaseArray();
  1114   fShortLength = 0;
  1115   fUnion.fFields.fArray = 0;
  1116   fUnion.fFields.fCapacity = 0;
  1117   fFlags = kIsBogus;
  1120 // turn a bogus string into an empty one
  1121 void
  1122 UnicodeString::unBogus() {
  1123   if(fFlags & kIsBogus) {
  1124     setToEmpty();
  1128 const UChar *
  1129 UnicodeString::getTerminatedBuffer() {
  1130   if(!isWritable()) {
  1131     return 0;
  1133   UChar *array = getArrayStart();
  1134   int32_t len = length();
  1135   if(len < getCapacity()) {
  1136     if(fFlags & kBufferIsReadonly) {
  1137       // If len<capacity on a read-only alias, then array[len] is
  1138       // either the original NUL (if constructed with (TRUE, s, length))
  1139       // or one of the original string contents characters (if later truncated),
  1140       // therefore we can assume that array[len] is initialized memory.
  1141       if(array[len] == 0) {
  1142         return array;
  1144     } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
  1145       // kRefCounted: Do not write the NUL if the buffer is shared.
  1146       // That is mostly safe, except when the length of one copy was modified
  1147       // without copy-on-write, e.g., via truncate(newLength) or remove(void).
  1148       // Then the NUL would be written into the middle of another copy's string.
  1150       // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
  1151       // Do not test if there is a NUL already because it might be uninitialized memory.
  1152       // (That would be safe, but tools like valgrind & Purify would complain.)
  1153       array[len] = 0;
  1154       return array;
  1157   if(cloneArrayIfNeeded(len+1)) {
  1158     array = getArrayStart();
  1159     array[len] = 0;
  1160     return array;
  1161   } else {
  1162     return NULL;
  1166 // setTo() analogous to the readonly-aliasing constructor with the same signature
  1167 UnicodeString &
  1168 UnicodeString::setTo(UBool isTerminated,
  1169                      const UChar *text,
  1170                      int32_t textLength)
  1172   if(fFlags & kOpenGetBuffer) {
  1173     // do not modify a string that has an "open" getBuffer(minCapacity)
  1174     return *this;
  1177   if(text == NULL) {
  1178     // treat as an empty string, do not alias
  1179     releaseArray();
  1180     setToEmpty();
  1181     return *this;
  1184   if( textLength < -1 ||
  1185       (textLength == -1 && !isTerminated) ||
  1186       (textLength >= 0 && isTerminated && text[textLength] != 0)
  1187   ) {
  1188     setToBogus();
  1189     return *this;
  1192   releaseArray();
  1194   if(textLength == -1) {
  1195     // text is terminated, or else it would have failed the above test
  1196     textLength = u_strlen(text);
  1198   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
  1200   fFlags = kReadonlyAlias;
  1201   return *this;
  1204 // setTo() analogous to the writable-aliasing constructor with the same signature
  1205 UnicodeString &
  1206 UnicodeString::setTo(UChar *buffer,
  1207                      int32_t buffLength,
  1208                      int32_t buffCapacity) {
  1209   if(fFlags & kOpenGetBuffer) {
  1210     // do not modify a string that has an "open" getBuffer(minCapacity)
  1211     return *this;
  1214   if(buffer == NULL) {
  1215     // treat as an empty string, do not alias
  1216     releaseArray();
  1217     setToEmpty();
  1218     return *this;
  1221   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
  1222     setToBogus();
  1223     return *this;
  1224   } else if(buffLength == -1) {
  1225     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
  1226     const UChar *p = buffer, *limit = buffer + buffCapacity;
  1227     while(p != limit && *p != 0) {
  1228       ++p;
  1230     buffLength = (int32_t)(p - buffer);
  1233   releaseArray();
  1235   setArray(buffer, buffLength, buffCapacity);
  1236   fFlags = kWritableAlias;
  1237   return *this;
  1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
  1241   unBogus();
  1242   int32_t length = utf8.length();
  1243   int32_t capacity;
  1244   // The UTF-16 string will be at most as long as the UTF-8 string.
  1245   if(length <= US_STACKBUF_SIZE) {
  1246     capacity = US_STACKBUF_SIZE;
  1247   } else {
  1248     capacity = length + 1;  // +1 for the terminating NUL.
  1250   UChar *utf16 = getBuffer(capacity);
  1251   int32_t length16;
  1252   UErrorCode errorCode = U_ZERO_ERROR;
  1253   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
  1254       utf8.data(), length,
  1255       0xfffd,  // Substitution character.
  1256       NULL,    // Don't care about number of substitutions.
  1257       &errorCode);
  1258   releaseBuffer(length16);
  1259   if(U_FAILURE(errorCode)) {
  1260     setToBogus();
  1262   return *this;
  1265 UnicodeString&
  1266 UnicodeString::setCharAt(int32_t offset,
  1267              UChar c)
  1269   int32_t len = length();
  1270   if(cloneArrayIfNeeded() && len > 0) {
  1271     if(offset < 0) {
  1272       offset = 0;
  1273     } else if(offset >= len) {
  1274       offset = len - 1;
  1277     getArrayStart()[offset] = c;
  1279   return *this;
  1282 UnicodeString&
  1283 UnicodeString::replace(int32_t start,
  1284                int32_t _length,
  1285                UChar32 srcChar) {
  1286   UChar buffer[U16_MAX_LENGTH];
  1287   int32_t count = 0;
  1288   UBool isError = FALSE;
  1289   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
  1290   // We test isError so that the compiler does not complain that we don't.
  1291   // If isError (srcChar is not a valid code point) then count==0 which means
  1292   // we remove the source segment rather than replacing it with srcChar.
  1293   return doReplace(start, _length, buffer, 0, isError ? 0 : count);
  1296 UnicodeString&
  1297 UnicodeString::append(UChar32 srcChar) {
  1298   UChar buffer[U16_MAX_LENGTH];
  1299   int32_t _length = 0;
  1300   UBool isError = FALSE;
  1301   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
  1302   // We test isError so that the compiler does not complain that we don't.
  1303   // If isError then _length==0 which turns the doReplace() into a no-op anyway.
  1304   return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
  1307 UnicodeString&
  1308 UnicodeString::doReplace( int32_t start,
  1309               int32_t length,
  1310               const UnicodeString& src,
  1311               int32_t srcStart,
  1312               int32_t srcLength)
  1314   if(!src.isBogus()) {
  1315     // pin the indices to legal values
  1316     src.pinIndices(srcStart, srcLength);
  1318     // get the characters from src
  1319     // and replace the range in ourselves with them
  1320     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
  1321   } else {
  1322     // remove the range
  1323     return doReplace(start, length, 0, 0, 0);
  1327 UnicodeString&
  1328 UnicodeString::doReplace(int32_t start,
  1329              int32_t length,
  1330              const UChar *srcChars,
  1331              int32_t srcStart,
  1332              int32_t srcLength)
  1334   if(!isWritable()) {
  1335     return *this;
  1338   int32_t oldLength = this->length();
  1340   // optimize (read-only alias).remove(0, start) and .remove(start, end)
  1341   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
  1342     if(start == 0) {
  1343       // remove prefix by adjusting the array pointer
  1344       pinIndex(length);
  1345       fUnion.fFields.fArray += length;
  1346       fUnion.fFields.fCapacity -= length;
  1347       setLength(oldLength - length);
  1348       return *this;
  1349     } else {
  1350       pinIndex(start);
  1351       if(length >= (oldLength - start)) {
  1352         // remove suffix by reducing the length (like truncate())
  1353         setLength(start);
  1354         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
  1355         return *this;
  1360   if(srcChars == 0) {
  1361     srcStart = srcLength = 0;
  1362   } else if(srcLength < 0) {
  1363     // get the srcLength if necessary
  1364     srcLength = u_strlen(srcChars + srcStart);
  1367   // calculate the size of the string after the replace
  1368   int32_t newLength;
  1370   // optimize append() onto a large-enough, owned string
  1371   if(start >= oldLength) {
  1372     if(srcLength == 0) {
  1373       return *this;
  1375     newLength = oldLength + srcLength;
  1376     if(newLength <= getCapacity() && isBufferWritable()) {
  1377       UChar *oldArray = getArrayStart();
  1378       // Do not copy characters when
  1379       //   UChar *buffer=str.getAppendBuffer(...);
  1380       // is followed by
  1381       //   str.append(buffer, length);
  1382       // or
  1383       //   str.appendString(buffer, length)
  1384       // or similar.
  1385       if(srcChars + srcStart != oldArray + start || start > oldLength) {
  1386         us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
  1388       setLength(newLength);
  1389       return *this;
  1390     } else {
  1391       // pin the indices to legal values
  1392       start = oldLength;
  1393       length = 0;
  1395   } else {
  1396     // pin the indices to legal values
  1397     pinIndices(start, length);
  1399     newLength = oldLength - length + srcLength;
  1402   // the following may change fArray but will not copy the current contents;
  1403   // therefore we need to keep the current fArray
  1404   UChar oldStackBuffer[US_STACKBUF_SIZE];
  1405   UChar *oldArray;
  1406   if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
  1407     // copy the stack buffer contents because it will be overwritten with
  1408     // fUnion.fFields values
  1409     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
  1410     oldArray = oldStackBuffer;
  1411   } else {
  1412     oldArray = getArrayStart();
  1415   // clone our array and allocate a bigger array if needed
  1416   int32_t *bufferToDelete = 0;
  1417   if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
  1418                          FALSE, &bufferToDelete)
  1419   ) {
  1420     return *this;
  1423   // now do the replace
  1425   UChar *newArray = getArrayStart();
  1426   if(newArray != oldArray) {
  1427     // if fArray changed, then we need to copy everything except what will change
  1428     us_arrayCopy(oldArray, 0, newArray, 0, start);
  1429     us_arrayCopy(oldArray, start + length,
  1430                  newArray, start + srcLength,
  1431                  oldLength - (start + length));
  1432   } else if(length != srcLength) {
  1433     // fArray did not change; copy only the portion that isn't changing, leaving a hole
  1434     us_arrayCopy(oldArray, start + length,
  1435                  newArray, start + srcLength,
  1436                  oldLength - (start + length));
  1439   // now fill in the hole with the new string
  1440   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
  1442   setLength(newLength);
  1444   // delayed delete in case srcChars == fArray when we started, and
  1445   // to keep oldArray alive for the above operations
  1446   if (bufferToDelete) {
  1447     uprv_free(bufferToDelete);
  1450   return *this;
  1453 /**
  1454  * Replaceable API
  1455  */
  1456 void
  1457 UnicodeString::handleReplaceBetween(int32_t start,
  1458                                     int32_t limit,
  1459                                     const UnicodeString& text) {
  1460     replaceBetween(start, limit, text);
  1463 /**
  1464  * Replaceable API
  1465  */
  1466 void 
  1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
  1468     if (limit <= start) {
  1469         return; // Nothing to do; avoid bogus malloc call
  1471     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
  1472     // Check to make sure text is not null.
  1473     if (text != NULL) {
  1474 	    extractBetween(start, limit, text, 0);
  1475 	    insert(dest, text, 0, limit - start);    
  1476 	    uprv_free(text);
  1480 /**
  1481  * Replaceable API
  1483  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
  1484  * so we implement this function here.
  1485  */
  1486 UBool Replaceable::hasMetaData() const {
  1487     return TRUE;
  1490 /**
  1491  * Replaceable API
  1492  */
  1493 UBool UnicodeString::hasMetaData() const {
  1494     return FALSE;
  1497 UnicodeString&
  1498 UnicodeString::doReverse(int32_t start, int32_t length) {
  1499   if(length <= 1 || !cloneArrayIfNeeded()) {
  1500     return *this;
  1503   // pin the indices to legal values
  1504   pinIndices(start, length);
  1505   if(length <= 1) {  // pinIndices() might have shrunk the length
  1506     return *this;
  1509   UChar *left = getArrayStart() + start;
  1510   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
  1511   UChar swap;
  1512   UBool hasSupplementary = FALSE;
  1514   // Before the loop we know left<right because length>=2.
  1515   do {
  1516     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
  1517     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
  1518     *right-- = swap;
  1519   } while(left < right);
  1520   // Make sure to test the middle code unit of an odd-length string.
  1521   // Redundant if the length is even.
  1522   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
  1524   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
  1525   if(hasSupplementary) {
  1526     UChar swap2;
  1528     left = getArrayStart() + start;
  1529     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
  1530     while(left < right) {
  1531       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
  1532         *left++ = swap2;
  1533         *left++ = swap;
  1534       } else {
  1535         ++left;
  1540   return *this;
  1543 UBool 
  1544 UnicodeString::padLeading(int32_t targetLength,
  1545                           UChar padChar)
  1547   int32_t oldLength = length();
  1548   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
  1549     return FALSE;
  1550   } else {
  1551     // move contents up by padding width
  1552     UChar *array = getArrayStart();
  1553     int32_t start = targetLength - oldLength;
  1554     us_arrayCopy(array, 0, array, start, oldLength);
  1556     // fill in padding character
  1557     while(--start >= 0) {
  1558       array[start] = padChar;
  1560     setLength(targetLength);
  1561     return TRUE;
  1565 UBool 
  1566 UnicodeString::padTrailing(int32_t targetLength,
  1567                            UChar padChar)
  1569   int32_t oldLength = length();
  1570   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
  1571     return FALSE;
  1572   } else {
  1573     // fill in padding character
  1574     UChar *array = getArrayStart();
  1575     int32_t length = targetLength;
  1576     while(--length >= oldLength) {
  1577       array[length] = padChar;
  1579     setLength(targetLength);
  1580     return TRUE;
  1584 //========================================
  1585 // Hashing
  1586 //========================================
  1587 int32_t
  1588 UnicodeString::doHashCode() const
  1590     /* Delegate hash computation to uhash.  This makes UnicodeString
  1591      * hashing consistent with UChar* hashing.  */
  1592     int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
  1593     if (hashCode == kInvalidHashCode) {
  1594         hashCode = kEmptyHashCode;
  1596     return hashCode;
  1599 //========================================
  1600 // External Buffer
  1601 //========================================
  1603 UChar *
  1604 UnicodeString::getBuffer(int32_t minCapacity) {
  1605   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
  1606     fFlags|=kOpenGetBuffer;
  1607     fShortLength=0;
  1608     return getArrayStart();
  1609   } else {
  1610     return 0;
  1614 void
  1615 UnicodeString::releaseBuffer(int32_t newLength) {
  1616   if(fFlags&kOpenGetBuffer && newLength>=-1) {
  1617     // set the new fLength
  1618     int32_t capacity=getCapacity();
  1619     if(newLength==-1) {
  1620       // the new length is the string length, capped by fCapacity
  1621       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
  1622       while(p<limit && *p!=0) {
  1623         ++p;
  1625       newLength=(int32_t)(p-array);
  1626     } else if(newLength>capacity) {
  1627       newLength=capacity;
  1629     setLength(newLength);
  1630     fFlags&=~kOpenGetBuffer;
  1634 //========================================
  1635 // Miscellaneous
  1636 //========================================
  1637 UBool
  1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
  1639                                   int32_t growCapacity,
  1640                                   UBool doCopyArray,
  1641                                   int32_t **pBufferToDelete,
  1642                                   UBool forceClone) {
  1643   // default parameters need to be static, therefore
  1644   // the defaults are -1 to have convenience defaults
  1645   if(newCapacity == -1) {
  1646     newCapacity = getCapacity();
  1649   // while a getBuffer(minCapacity) is "open",
  1650   // prevent any modifications of the string by returning FALSE here
  1651   // if the string is bogus, then only an assignment or similar can revive it
  1652   if(!isWritable()) {
  1653     return FALSE;
  1656   /*
  1657    * We need to make a copy of the array if
  1658    * the buffer is read-only, or
  1659    * the buffer is refCounted (shared), and refCount>1, or
  1660    * the buffer is too small.
  1661    * Return FALSE if memory could not be allocated.
  1662    */
  1663   if(forceClone ||
  1664      fFlags & kBufferIsReadonly ||
  1665      (fFlags & kRefCounted && refCount() > 1) ||
  1666      newCapacity > getCapacity()
  1667   ) {
  1668     // check growCapacity for default value and use of the stack buffer
  1669     if(growCapacity < 0) {
  1670       growCapacity = newCapacity;
  1671     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
  1672       growCapacity = US_STACKBUF_SIZE;
  1675     // save old values
  1676     UChar oldStackBuffer[US_STACKBUF_SIZE];
  1677     UChar *oldArray;
  1678     uint8_t flags = fFlags;
  1680     if(flags&kUsingStackBuffer) {
  1681       U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
  1682       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
  1683         // copy the stack buffer contents because it will be overwritten with
  1684         // fUnion.fFields values
  1685         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
  1686         oldArray = oldStackBuffer;
  1687       } else {
  1688         oldArray = 0; // no need to copy from stack buffer to itself
  1690     } else {
  1691       oldArray = fUnion.fFields.fArray;
  1692       U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
  1695     // allocate a new array
  1696     if(allocate(growCapacity) ||
  1697        (newCapacity < growCapacity && allocate(newCapacity))
  1698     ) {
  1699       if(doCopyArray && oldArray != 0) {
  1700         // copy the contents
  1701         // do not copy more than what fits - it may be smaller than before
  1702         int32_t minLength = length();
  1703         newCapacity = getCapacity();
  1704         if(newCapacity < minLength) {
  1705           minLength = newCapacity;
  1706           setLength(minLength);
  1708         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
  1709       } else {
  1710         fShortLength = 0;
  1713       // release the old array
  1714       if(flags & kRefCounted) {
  1715         // the array is refCounted; decrement and release if 0
  1716         u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
  1717         if(umtx_atomic_dec(pRefCount) == 0) {
  1718           if(pBufferToDelete == 0) {
  1719               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
  1720               // is defined as volatile. (Volatile has useful non-standard behavior
  1721               //   with this compiler.)
  1722             uprv_free((void *)pRefCount);
  1723           } else {
  1724             // the caller requested to delete it himself
  1725             *pBufferToDelete = (int32_t *)pRefCount;
  1729     } else {
  1730       // not enough memory for growCapacity and not even for the smaller newCapacity
  1731       // reset the old values for setToBogus() to release the array
  1732       if(!(flags&kUsingStackBuffer)) {
  1733         fUnion.fFields.fArray = oldArray;
  1735       fFlags = flags;
  1736       setToBogus();
  1737       return FALSE;
  1740   return TRUE;
  1743 // UnicodeStringAppendable ------------------------------------------------- ***
  1745 UnicodeStringAppendable::~UnicodeStringAppendable() {}
  1747 UBool
  1748 UnicodeStringAppendable::appendCodeUnit(UChar c) {
  1749   return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
  1752 UBool
  1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
  1754   UChar buffer[U16_MAX_LENGTH];
  1755   int32_t cLength = 0;
  1756   UBool isError = FALSE;
  1757   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
  1758   return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
  1761 UBool
  1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
  1763   return str.doReplace(str.length(), 0, s, 0, length).isWritable();
  1766 UBool
  1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
  1768   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
  1771 UChar *
  1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
  1773                                          int32_t desiredCapacityHint,
  1774                                          UChar *scratch, int32_t scratchCapacity,
  1775                                          int32_t *resultCapacity) {
  1776   if(minCapacity < 1 || scratchCapacity < minCapacity) {
  1777     *resultCapacity = 0;
  1778     return NULL;
  1780   int32_t oldLength = str.length();
  1781   if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
  1782     *resultCapacity = str.getCapacity() - oldLength;
  1783     return str.getArrayStart() + oldLength;
  1785   *resultCapacity = scratchCapacity;
  1786   return scratch;
  1789 U_NAMESPACE_END
  1791 U_NAMESPACE_USE
  1793 U_CAPI int32_t U_EXPORT2
  1794 uhash_hashUnicodeString(const UElement key) {
  1795     const UnicodeString *str = (const UnicodeString*) key.pointer;
  1796     return (str == NULL) ? 0 : str->hashCode();
  1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
  1800 // does not depend on hashtable code.
  1801 U_CAPI UBool U_EXPORT2
  1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
  1803     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
  1804     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
  1805     if (str1 == str2) {
  1806         return TRUE;
  1808     if (str1 == NULL || str2 == NULL) {
  1809         return FALSE;
  1811     return *str1 == *str2;
  1814 #ifdef U_STATIC_IMPLEMENTATION
  1815 /*
  1816 This should never be called. It is defined here to make sure that the
  1817 virtual vector deleting destructor is defined within unistr.cpp.
  1818 The vector deleting destructor is already a part of UObject,
  1819 but defining it here makes sure that it is included with this object file.
  1820 This makes sure that static library dependencies are kept to a minimum.
  1821 */
  1822 static void uprv_UnicodeStringDummy(void) {
  1823     delete [] (new UnicodeString[2]);
  1825 #endif

mercurial