intl/icu/source/common/unistr.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unistr.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1825 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +* Copyright (C) 1999-2013, International Business Machines Corporation and
     1.7 +* others. All Rights Reserved.
     1.8 +******************************************************************************
     1.9 +*
    1.10 +* File unistr.cpp
    1.11 +*
    1.12 +* Modification History:
    1.13 +*
    1.14 +*   Date        Name        Description
    1.15 +*   09/25/98    stephen     Creation.
    1.16 +*   04/20/99    stephen     Overhauled per 4/16 code review.
    1.17 +*   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
    1.18 +*   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
    1.19 +*                           Replaceable.
    1.20 +*   06/25/01    grhoten     Removed the dependency on iostream
    1.21 +******************************************************************************
    1.22 +*/
    1.23 +
    1.24 +#include "unicode/utypes.h"
    1.25 +#include "unicode/appendable.h"
    1.26 +#include "unicode/putil.h"
    1.27 +#include "cstring.h"
    1.28 +#include "cmemory.h"
    1.29 +#include "unicode/ustring.h"
    1.30 +#include "unicode/unistr.h"
    1.31 +#include "unicode/utf.h"
    1.32 +#include "unicode/utf16.h"
    1.33 +#include "uelement.h"
    1.34 +#include "ustr_imp.h"
    1.35 +#include "umutex.h"
    1.36 +#include "uassert.h"
    1.37 +
    1.38 +#if 0
    1.39 +
    1.40 +#include <iostream>
    1.41 +using namespace std;
    1.42 +
    1.43 +//DEBUGGING
    1.44 +void
    1.45 +print(const UnicodeString& s,
    1.46 +      const char *name)
    1.47 +{
    1.48 +  UChar c;
    1.49 +  cout << name << ":|";
    1.50 +  for(int i = 0; i < s.length(); ++i) {
    1.51 +    c = s[i];
    1.52 +    if(c>= 0x007E || c < 0x0020)
    1.53 +      cout << "[0x" << hex << s[i] << "]";
    1.54 +    else
    1.55 +      cout << (char) s[i];
    1.56 +  }
    1.57 +  cout << '|' << endl;
    1.58 +}
    1.59 +
    1.60 +void
    1.61 +print(const UChar *s,
    1.62 +      int32_t len,
    1.63 +      const char *name)
    1.64 +{
    1.65 +  UChar c;
    1.66 +  cout << name << ":|";
    1.67 +  for(int i = 0; i < len; ++i) {
    1.68 +    c = s[i];
    1.69 +    if(c>= 0x007E || c < 0x0020)
    1.70 +      cout << "[0x" << hex << s[i] << "]";
    1.71 +    else
    1.72 +      cout << (char) s[i];
    1.73 +  }
    1.74 +  cout << '|' << endl;
    1.75 +}
    1.76 +// END DEBUGGING
    1.77 +#endif
    1.78 +
    1.79 +// Local function definitions for now
    1.80 +
    1.81 +// need to copy areas that may overlap
    1.82 +static
    1.83 +inline void
    1.84 +us_arrayCopy(const UChar *src, int32_t srcStart,
    1.85 +         UChar *dst, int32_t dstStart, int32_t count)
    1.86 +{
    1.87 +  if(count>0) {
    1.88 +    uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
    1.89 +  }
    1.90 +}
    1.91 +
    1.92 +// u_unescapeAt() callback to get a UChar from a UnicodeString
    1.93 +U_CDECL_BEGIN
    1.94 +static UChar U_CALLCONV
    1.95 +UnicodeString_charAt(int32_t offset, void *context) {
    1.96 +    return ((icu::UnicodeString*) context)->charAt(offset);
    1.97 +}
    1.98 +U_CDECL_END
    1.99 +
   1.100 +U_NAMESPACE_BEGIN
   1.101 +
   1.102 +/* The Replaceable virtual destructor can't be defined in the header
   1.103 +   due to how AIX works with multiple definitions of virtual functions.
   1.104 +*/
   1.105 +Replaceable::~Replaceable() {}
   1.106 +
   1.107 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
   1.108 +
   1.109 +UnicodeString U_EXPORT2
   1.110 +operator+ (const UnicodeString &s1, const UnicodeString &s2) {
   1.111 +    return
   1.112 +        UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
   1.113 +            append(s1).
   1.114 +                append(s2);
   1.115 +}
   1.116 +
   1.117 +//========================================
   1.118 +// Reference Counting functions, put at top of file so that optimizing compilers
   1.119 +//                               have a chance to automatically inline.
   1.120 +//========================================
   1.121 +
   1.122 +void
   1.123 +UnicodeString::addRef() {
   1.124 +  umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
   1.125 +}
   1.126 +
   1.127 +int32_t
   1.128 +UnicodeString::removeRef() {
   1.129 +  return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
   1.130 +}
   1.131 +
   1.132 +int32_t
   1.133 +UnicodeString::refCount() const {
   1.134 +  return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
   1.135 +}
   1.136 +
   1.137 +void
   1.138 +UnicodeString::releaseArray() {
   1.139 +  if((fFlags & kRefCounted) && removeRef() == 0) {
   1.140 +    uprv_free((int32_t *)fUnion.fFields.fArray - 1);
   1.141 +  }
   1.142 +}
   1.143 +
   1.144 +
   1.145 +
   1.146 +//========================================
   1.147 +// Constructors
   1.148 +//========================================
   1.149 +
   1.150 +// The default constructor is inline in unistr.h.
   1.151 +
   1.152 +UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
   1.153 +  : fShortLength(0),
   1.154 +    fFlags(0)
   1.155 +{
   1.156 +  if(count <= 0 || (uint32_t)c > 0x10ffff) {
   1.157 +    // just allocate and do not do anything else
   1.158 +    allocate(capacity);
   1.159 +  } else {
   1.160 +    // count > 0, allocate and fill the new string with count c's
   1.161 +    int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
   1.162 +    if(capacity < length) {
   1.163 +      capacity = length;
   1.164 +    }
   1.165 +    if(allocate(capacity)) {
   1.166 +      UChar *array = getArrayStart();
   1.167 +      int32_t i = 0;
   1.168 +
   1.169 +      // fill the new string with c
   1.170 +      if(unitCount == 1) {
   1.171 +        // fill with length UChars
   1.172 +        while(i < length) {
   1.173 +          array[i++] = (UChar)c;
   1.174 +        }
   1.175 +      } else {
   1.176 +        // get the code units for c
   1.177 +        UChar units[U16_MAX_LENGTH];
   1.178 +        U16_APPEND_UNSAFE(units, i, c);
   1.179 +
   1.180 +        // now it must be i==unitCount
   1.181 +        i = 0;
   1.182 +
   1.183 +        // for Unicode, unitCount can only be 1, 2, 3, or 4
   1.184 +        // 1 is handled above
   1.185 +        while(i < length) {
   1.186 +          int32_t unitIdx = 0;
   1.187 +          while(unitIdx < unitCount) {
   1.188 +            array[i++]=units[unitIdx++];
   1.189 +          }
   1.190 +        }
   1.191 +      }
   1.192 +    }
   1.193 +    setLength(length);
   1.194 +  }
   1.195 +}
   1.196 +
   1.197 +UnicodeString::UnicodeString(UChar ch)
   1.198 +  : fShortLength(1),
   1.199 +    fFlags(kShortString)
   1.200 +{
   1.201 +  fUnion.fStackBuffer[0] = ch;
   1.202 +}
   1.203 +
   1.204 +UnicodeString::UnicodeString(UChar32 ch)
   1.205 +  : fShortLength(0),
   1.206 +    fFlags(kShortString)
   1.207 +{
   1.208 +  int32_t i = 0;
   1.209 +  UBool isError = FALSE;
   1.210 +  U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
   1.211 +  // We test isError so that the compiler does not complain that we don't.
   1.212 +  // If isError then i==0 which is what we want anyway.
   1.213 +  if(!isError) {
   1.214 +    fShortLength = (int8_t)i;
   1.215 +  }
   1.216 +}
   1.217 +
   1.218 +UnicodeString::UnicodeString(const UChar *text)
   1.219 +  : fShortLength(0),
   1.220 +    fFlags(kShortString)
   1.221 +{
   1.222 +  doReplace(0, 0, text, 0, -1);
   1.223 +}
   1.224 +
   1.225 +UnicodeString::UnicodeString(const UChar *text,
   1.226 +                             int32_t textLength)
   1.227 +  : fShortLength(0),
   1.228 +    fFlags(kShortString)
   1.229 +{
   1.230 +  doReplace(0, 0, text, 0, textLength);
   1.231 +}
   1.232 +
   1.233 +UnicodeString::UnicodeString(UBool isTerminated,
   1.234 +                             const UChar *text,
   1.235 +                             int32_t textLength)
   1.236 +  : fShortLength(0),
   1.237 +    fFlags(kReadonlyAlias)
   1.238 +{
   1.239 +  if(text == NULL) {
   1.240 +    // treat as an empty string, do not alias
   1.241 +    setToEmpty();
   1.242 +  } else if(textLength < -1 ||
   1.243 +            (textLength == -1 && !isTerminated) ||
   1.244 +            (textLength >= 0 && isTerminated && text[textLength] != 0)
   1.245 +  ) {
   1.246 +    setToBogus();
   1.247 +  } else {
   1.248 +    if(textLength == -1) {
   1.249 +      // text is terminated, or else it would have failed the above test
   1.250 +      textLength = u_strlen(text);
   1.251 +    }
   1.252 +    setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   1.253 +  }
   1.254 +}
   1.255 +
   1.256 +UnicodeString::UnicodeString(UChar *buff,
   1.257 +                             int32_t buffLength,
   1.258 +                             int32_t buffCapacity)
   1.259 +  : fShortLength(0),
   1.260 +    fFlags(kWritableAlias)
   1.261 +{
   1.262 +  if(buff == NULL) {
   1.263 +    // treat as an empty string, do not alias
   1.264 +    setToEmpty();
   1.265 +  } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1.266 +    setToBogus();
   1.267 +  } else {
   1.268 +    if(buffLength == -1) {
   1.269 +      // fLength = u_strlen(buff); but do not look beyond buffCapacity
   1.270 +      const UChar *p = buff, *limit = buff + buffCapacity;
   1.271 +      while(p != limit && *p != 0) {
   1.272 +        ++p;
   1.273 +      }
   1.274 +      buffLength = (int32_t)(p - buff);
   1.275 +    }
   1.276 +    setArray(buff, buffLength, buffCapacity);
   1.277 +  }
   1.278 +}
   1.279 +
   1.280 +UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
   1.281 +  : fShortLength(0),
   1.282 +    fFlags(kShortString)
   1.283 +{
   1.284 +  if(src==NULL) {
   1.285 +    // treat as an empty string
   1.286 +  } else {
   1.287 +    if(length<0) {
   1.288 +      length=(int32_t)uprv_strlen(src);
   1.289 +    }
   1.290 +    if(cloneArrayIfNeeded(length, length, FALSE)) {
   1.291 +      u_charsToUChars(src, getArrayStart(), length);
   1.292 +      setLength(length);
   1.293 +    } else {
   1.294 +      setToBogus();
   1.295 +    }
   1.296 +  }
   1.297 +}
   1.298 +
   1.299 +#if U_CHARSET_IS_UTF8
   1.300 +
   1.301 +UnicodeString::UnicodeString(const char *codepageData)
   1.302 +  : fShortLength(0),
   1.303 +    fFlags(kShortString) {
   1.304 +  if(codepageData != 0) {
   1.305 +    setToUTF8(codepageData);
   1.306 +  }
   1.307 +}
   1.308 +
   1.309 +UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
   1.310 +  : fShortLength(0),
   1.311 +    fFlags(kShortString) {
   1.312 +  // if there's nothing to convert, do nothing
   1.313 +  if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
   1.314 +    return;
   1.315 +  }
   1.316 +  if(dataLength == -1) {
   1.317 +    dataLength = (int32_t)uprv_strlen(codepageData);
   1.318 +  }
   1.319 +  setToUTF8(StringPiece(codepageData, dataLength));
   1.320 +}
   1.321 +
   1.322 +// else see unistr_cnv.cpp
   1.323 +#endif
   1.324 +
   1.325 +UnicodeString::UnicodeString(const UnicodeString& that)
   1.326 +  : Replaceable(),
   1.327 +    fShortLength(0),
   1.328 +    fFlags(kShortString)
   1.329 +{
   1.330 +  copyFrom(that);
   1.331 +}
   1.332 +
   1.333 +UnicodeString::UnicodeString(const UnicodeString& that,
   1.334 +                             int32_t srcStart)
   1.335 +  : Replaceable(),
   1.336 +    fShortLength(0),
   1.337 +    fFlags(kShortString)
   1.338 +{
   1.339 +  setTo(that, srcStart);
   1.340 +}
   1.341 +
   1.342 +UnicodeString::UnicodeString(const UnicodeString& that,
   1.343 +                             int32_t srcStart,
   1.344 +                             int32_t srcLength)
   1.345 +  : Replaceable(),
   1.346 +    fShortLength(0),
   1.347 +    fFlags(kShortString)
   1.348 +{
   1.349 +  setTo(that, srcStart, srcLength);
   1.350 +}
   1.351 +
   1.352 +// Replaceable base class clone() default implementation, does not clone
   1.353 +Replaceable *
   1.354 +Replaceable::clone() const {
   1.355 +  return NULL;
   1.356 +}
   1.357 +
   1.358 +// UnicodeString overrides clone() with a real implementation
   1.359 +Replaceable *
   1.360 +UnicodeString::clone() const {
   1.361 +  return new UnicodeString(*this);
   1.362 +}
   1.363 +
   1.364 +//========================================
   1.365 +// array allocation
   1.366 +//========================================
   1.367 +
   1.368 +UBool
   1.369 +UnicodeString::allocate(int32_t capacity) {
   1.370 +  if(capacity <= US_STACKBUF_SIZE) {
   1.371 +    fFlags = kShortString;
   1.372 +  } else {
   1.373 +    // count bytes for the refCounter and the string capacity, and
   1.374 +    // round up to a multiple of 16; then divide by 4 and allocate int32_t's
   1.375 +    // to be safely aligned for the refCount
   1.376 +    // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
   1.377 +    int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
   1.378 +    int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
   1.379 +    if(array != 0) {
   1.380 +      // set initial refCount and point behind the refCount
   1.381 +      *array++ = 1;
   1.382 +
   1.383 +      // have fArray point to the first UChar
   1.384 +      fUnion.fFields.fArray = (UChar *)array;
   1.385 +      fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
   1.386 +      fFlags = kLongString;
   1.387 +    } else {
   1.388 +      fShortLength = 0;
   1.389 +      fUnion.fFields.fArray = 0;
   1.390 +      fUnion.fFields.fCapacity = 0;
   1.391 +      fFlags = kIsBogus;
   1.392 +      return FALSE;
   1.393 +    }
   1.394 +  }
   1.395 +  return TRUE;
   1.396 +}
   1.397 +
   1.398 +//========================================
   1.399 +// Destructor
   1.400 +//========================================
   1.401 +UnicodeString::~UnicodeString()
   1.402 +{
   1.403 +  releaseArray();
   1.404 +}
   1.405 +
   1.406 +//========================================
   1.407 +// Factory methods
   1.408 +//========================================
   1.409 +
   1.410 +UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
   1.411 +  UnicodeString result;
   1.412 +  result.setToUTF8(utf8);
   1.413 +  return result;
   1.414 +}
   1.415 +
   1.416 +UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
   1.417 +  UnicodeString result;
   1.418 +  int32_t capacity;
   1.419 +  // Most UTF-32 strings will be BMP-only and result in a same-length
   1.420 +  // UTF-16 string. We overestimate the capacity just slightly,
   1.421 +  // just in case there are a few supplementary characters.
   1.422 +  if(length <= US_STACKBUF_SIZE) {
   1.423 +    capacity = US_STACKBUF_SIZE;
   1.424 +  } else {
   1.425 +    capacity = length + (length >> 4) + 4;
   1.426 +  }
   1.427 +  do {
   1.428 +    UChar *utf16 = result.getBuffer(capacity);
   1.429 +    int32_t length16;
   1.430 +    UErrorCode errorCode = U_ZERO_ERROR;
   1.431 +    u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
   1.432 +        utf32, length,
   1.433 +        0xfffd,  // Substitution character.
   1.434 +        NULL,    // Don't care about number of substitutions.
   1.435 +        &errorCode);
   1.436 +    result.releaseBuffer(length16);
   1.437 +    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
   1.438 +      capacity = length16 + 1;  // +1 for the terminating NUL.
   1.439 +      continue;
   1.440 +    } else if(U_FAILURE(errorCode)) {
   1.441 +      result.setToBogus();
   1.442 +    }
   1.443 +    break;
   1.444 +  } while(TRUE);
   1.445 +  return result;
   1.446 +}
   1.447 +
   1.448 +//========================================
   1.449 +// Assignment
   1.450 +//========================================
   1.451 +
   1.452 +UnicodeString &
   1.453 +UnicodeString::operator=(const UnicodeString &src) {
   1.454 +  return copyFrom(src);
   1.455 +}
   1.456 +
   1.457 +UnicodeString &
   1.458 +UnicodeString::fastCopyFrom(const UnicodeString &src) {
   1.459 +  return copyFrom(src, TRUE);
   1.460 +}
   1.461 +
   1.462 +UnicodeString &
   1.463 +UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
   1.464 +  // if assigning to ourselves, do nothing
   1.465 +  if(this == 0 || this == &src) {
   1.466 +    return *this;
   1.467 +  }
   1.468 +
   1.469 +  // is the right side bogus?
   1.470 +  if(&src == 0 || src.isBogus()) {
   1.471 +    setToBogus();
   1.472 +    return *this;
   1.473 +  }
   1.474 +
   1.475 +  // delete the current contents
   1.476 +  releaseArray();
   1.477 +
   1.478 +  if(src.isEmpty()) {
   1.479 +    // empty string - use the stack buffer
   1.480 +    setToEmpty();
   1.481 +    return *this;
   1.482 +  }
   1.483 +
   1.484 +  // we always copy the length
   1.485 +  int32_t srcLength = src.length();
   1.486 +  setLength(srcLength);
   1.487 +
   1.488 +  // fLength>0 and not an "open" src.getBuffer(minCapacity)
   1.489 +  switch(src.fFlags) {
   1.490 +  case kShortString:
   1.491 +    // short string using the stack buffer, do the same
   1.492 +    fFlags = kShortString;
   1.493 +    uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
   1.494 +    break;
   1.495 +  case kLongString:
   1.496 +    // src uses a refCounted string buffer, use that buffer with refCount
   1.497 +    // src is const, use a cast - we don't really change it
   1.498 +    ((UnicodeString &)src).addRef();
   1.499 +    // copy all fields, share the reference-counted buffer
   1.500 +    fUnion.fFields.fArray = src.fUnion.fFields.fArray;
   1.501 +    fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
   1.502 +    fFlags = src.fFlags;
   1.503 +    break;
   1.504 +  case kReadonlyAlias:
   1.505 +    if(fastCopy) {
   1.506 +      // src is a readonly alias, do the same
   1.507 +      // -> maintain the readonly alias as such
   1.508 +      fUnion.fFields.fArray = src.fUnion.fFields.fArray;
   1.509 +      fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
   1.510 +      fFlags = src.fFlags;
   1.511 +      break;
   1.512 +    }
   1.513 +    // else if(!fastCopy) fall through to case kWritableAlias
   1.514 +    // -> allocate a new buffer and copy the contents
   1.515 +  case kWritableAlias:
   1.516 +    // src is a writable alias; we make a copy of that instead
   1.517 +    if(allocate(srcLength)) {
   1.518 +      uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
   1.519 +      break;
   1.520 +    }
   1.521 +    // if there is not enough memory, then fall through to setting to bogus
   1.522 +  default:
   1.523 +    // if src is bogus, set ourselves to bogus
   1.524 +    // do not call setToBogus() here because fArray and fFlags are not consistent here
   1.525 +    fShortLength = 0;
   1.526 +    fUnion.fFields.fArray = 0;
   1.527 +    fUnion.fFields.fCapacity = 0;
   1.528 +    fFlags = kIsBogus;
   1.529 +    break;
   1.530 +  }
   1.531 +
   1.532 +  return *this;
   1.533 +}
   1.534 +
   1.535 +//========================================
   1.536 +// Miscellaneous operations
   1.537 +//========================================
   1.538 +
   1.539 +UnicodeString UnicodeString::unescape() const {
   1.540 +    UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
   1.541 +    const UChar *array = getBuffer();
   1.542 +    int32_t len = length();
   1.543 +    int32_t prev = 0;
   1.544 +    for (int32_t i=0;;) {
   1.545 +        if (i == len) {
   1.546 +            result.append(array, prev, len - prev);
   1.547 +            break;
   1.548 +        }
   1.549 +        if (array[i++] == 0x5C /*'\\'*/) {
   1.550 +            result.append(array, prev, (i - 1) - prev);
   1.551 +            UChar32 c = unescapeAt(i); // advances i
   1.552 +            if (c < 0) {
   1.553 +                result.remove(); // return empty string
   1.554 +                break; // invalid escape sequence
   1.555 +            }
   1.556 +            result.append(c);
   1.557 +            prev = i;
   1.558 +        }
   1.559 +    }
   1.560 +    return result;
   1.561 +}
   1.562 +
   1.563 +UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
   1.564 +    return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
   1.565 +}
   1.566 +
   1.567 +//========================================
   1.568 +// Read-only implementation
   1.569 +//========================================
   1.570 +UBool
   1.571 +UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
   1.572 +  // Requires: this & text not bogus and have same lengths.
   1.573 +  // Byte-wise comparison works for equality regardless of endianness.
   1.574 +  return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
   1.575 +}
   1.576 +
   1.577 +int8_t
   1.578 +UnicodeString::doCompare( int32_t start,
   1.579 +              int32_t length,
   1.580 +              const UChar *srcChars,
   1.581 +              int32_t srcStart,
   1.582 +              int32_t srcLength) const
   1.583 +{
   1.584 +  // compare illegal string values
   1.585 +  if(isBogus()) {
   1.586 +    return -1;
   1.587 +  }
   1.588 +  
   1.589 +  // pin indices to legal values
   1.590 +  pinIndices(start, length);
   1.591 +
   1.592 +  if(srcChars == NULL) {
   1.593 +    // treat const UChar *srcChars==NULL as an empty string
   1.594 +    return length == 0 ? 0 : 1;
   1.595 +  }
   1.596 +
   1.597 +  // get the correct pointer
   1.598 +  const UChar *chars = getArrayStart();
   1.599 +
   1.600 +  chars += start;
   1.601 +  srcChars += srcStart;
   1.602 +
   1.603 +  int32_t minLength;
   1.604 +  int8_t lengthResult;
   1.605 +
   1.606 +  // get the srcLength if necessary
   1.607 +  if(srcLength < 0) {
   1.608 +    srcLength = u_strlen(srcChars + srcStart);
   1.609 +  }
   1.610 +
   1.611 +  // are we comparing different lengths?
   1.612 +  if(length != srcLength) {
   1.613 +    if(length < srcLength) {
   1.614 +      minLength = length;
   1.615 +      lengthResult = -1;
   1.616 +    } else {
   1.617 +      minLength = srcLength;
   1.618 +      lengthResult = 1;
   1.619 +    }
   1.620 +  } else {
   1.621 +    minLength = length;
   1.622 +    lengthResult = 0;
   1.623 +  }
   1.624 +
   1.625 +  /*
   1.626 +   * note that uprv_memcmp() returns an int but we return an int8_t;
   1.627 +   * we need to take care not to truncate the result -
   1.628 +   * one way to do this is to right-shift the value to
   1.629 +   * move the sign bit into the lower 8 bits and making sure that this
   1.630 +   * does not become 0 itself
   1.631 +   */
   1.632 +
   1.633 +  if(minLength > 0 && chars != srcChars) {
   1.634 +    int32_t result;
   1.635 +
   1.636 +#   if U_IS_BIG_ENDIAN 
   1.637 +      // big-endian: byte comparison works
   1.638 +      result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
   1.639 +      if(result != 0) {
   1.640 +        return (int8_t)(result >> 15 | 1);
   1.641 +      }
   1.642 +#   else
   1.643 +      // little-endian: compare UChar units
   1.644 +      do {
   1.645 +        result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
   1.646 +        if(result != 0) {
   1.647 +          return (int8_t)(result >> 15 | 1);
   1.648 +        }
   1.649 +      } while(--minLength > 0);
   1.650 +#   endif
   1.651 +  }
   1.652 +  return lengthResult;
   1.653 +}
   1.654 +
   1.655 +/* String compare in code point order - doCompare() compares in code unit order. */
   1.656 +int8_t
   1.657 +UnicodeString::doCompareCodePointOrder(int32_t start,
   1.658 +                                       int32_t length,
   1.659 +                                       const UChar *srcChars,
   1.660 +                                       int32_t srcStart,
   1.661 +                                       int32_t srcLength) const
   1.662 +{
   1.663 +  // compare illegal string values
   1.664 +  // treat const UChar *srcChars==NULL as an empty string
   1.665 +  if(isBogus()) {
   1.666 +    return -1;
   1.667 +  }
   1.668 +
   1.669 +  // pin indices to legal values
   1.670 +  pinIndices(start, length);
   1.671 +
   1.672 +  if(srcChars == NULL) {
   1.673 +    srcStart = srcLength = 0;
   1.674 +  }
   1.675 +
   1.676 +  int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
   1.677 +  /* translate the 32-bit result into an 8-bit one */
   1.678 +  if(diff!=0) {
   1.679 +    return (int8_t)(diff >> 15 | 1);
   1.680 +  } else {
   1.681 +    return 0;
   1.682 +  }
   1.683 +}
   1.684 +
   1.685 +int32_t
   1.686 +UnicodeString::getLength() const {
   1.687 +    return length();
   1.688 +}
   1.689 +
   1.690 +UChar
   1.691 +UnicodeString::getCharAt(int32_t offset) const {
   1.692 +  return charAt(offset);
   1.693 +}
   1.694 +
   1.695 +UChar32
   1.696 +UnicodeString::getChar32At(int32_t offset) const {
   1.697 +  return char32At(offset);
   1.698 +}
   1.699 +
   1.700 +UChar32
   1.701 +UnicodeString::char32At(int32_t offset) const
   1.702 +{
   1.703 +  int32_t len = length();
   1.704 +  if((uint32_t)offset < (uint32_t)len) {
   1.705 +    const UChar *array = getArrayStart();
   1.706 +    UChar32 c;
   1.707 +    U16_GET(array, 0, offset, len, c);
   1.708 +    return c;
   1.709 +  } else {
   1.710 +    return kInvalidUChar;
   1.711 +  }
   1.712 +}
   1.713 +
   1.714 +int32_t
   1.715 +UnicodeString::getChar32Start(int32_t offset) const {
   1.716 +  if((uint32_t)offset < (uint32_t)length()) {
   1.717 +    const UChar *array = getArrayStart();
   1.718 +    U16_SET_CP_START(array, 0, offset);
   1.719 +    return offset;
   1.720 +  } else {
   1.721 +    return 0;
   1.722 +  }
   1.723 +}
   1.724 +
   1.725 +int32_t
   1.726 +UnicodeString::getChar32Limit(int32_t offset) const {
   1.727 +  int32_t len = length();
   1.728 +  if((uint32_t)offset < (uint32_t)len) {
   1.729 +    const UChar *array = getArrayStart();
   1.730 +    U16_SET_CP_LIMIT(array, 0, offset, len);
   1.731 +    return offset;
   1.732 +  } else {
   1.733 +    return len;
   1.734 +  }
   1.735 +}
   1.736 +
   1.737 +int32_t
   1.738 +UnicodeString::countChar32(int32_t start, int32_t length) const {
   1.739 +  pinIndices(start, length);
   1.740 +  // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
   1.741 +  return u_countChar32(getArrayStart()+start, length);
   1.742 +}
   1.743 +
   1.744 +UBool
   1.745 +UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
   1.746 +  pinIndices(start, length);
   1.747 +  // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
   1.748 +  return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
   1.749 +}
   1.750 +
   1.751 +int32_t
   1.752 +UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
   1.753 +  // pin index
   1.754 +  int32_t len = length();
   1.755 +  if(index<0) {
   1.756 +    index=0;
   1.757 +  } else if(index>len) {
   1.758 +    index=len;
   1.759 +  }
   1.760 +
   1.761 +  const UChar *array = getArrayStart();
   1.762 +  if(delta>0) {
   1.763 +    U16_FWD_N(array, index, len, delta);
   1.764 +  } else {
   1.765 +    U16_BACK_N(array, 0, index, -delta);
   1.766 +  }
   1.767 +
   1.768 +  return index;
   1.769 +}
   1.770 +
   1.771 +void
   1.772 +UnicodeString::doExtract(int32_t start,
   1.773 +             int32_t length,
   1.774 +             UChar *dst,
   1.775 +             int32_t dstStart) const
   1.776 +{
   1.777 +  // pin indices to legal values
   1.778 +  pinIndices(start, length);
   1.779 +
   1.780 +  // do not copy anything if we alias dst itself
   1.781 +  const UChar *array = getArrayStart();
   1.782 +  if(array + start != dst + dstStart) {
   1.783 +    us_arrayCopy(array, start, dst, dstStart, length);
   1.784 +  }
   1.785 +}
   1.786 +
   1.787 +int32_t
   1.788 +UnicodeString::extract(UChar *dest, int32_t destCapacity,
   1.789 +                       UErrorCode &errorCode) const {
   1.790 +  int32_t len = length();
   1.791 +  if(U_SUCCESS(errorCode)) {
   1.792 +    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
   1.793 +      errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.794 +    } else {
   1.795 +      const UChar *array = getArrayStart();
   1.796 +      if(len>0 && len<=destCapacity && array!=dest) {
   1.797 +        uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
   1.798 +      }
   1.799 +      return u_terminateUChars(dest, destCapacity, len, &errorCode);
   1.800 +    }
   1.801 +  }
   1.802 +
   1.803 +  return len;
   1.804 +}
   1.805 +
   1.806 +int32_t
   1.807 +UnicodeString::extract(int32_t start,
   1.808 +                       int32_t length,
   1.809 +                       char *target,
   1.810 +                       int32_t targetCapacity,
   1.811 +                       enum EInvariant) const
   1.812 +{
   1.813 +  // if the arguments are illegal, then do nothing
   1.814 +  if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
   1.815 +    return 0;
   1.816 +  }
   1.817 +
   1.818 +  // pin the indices to legal values
   1.819 +  pinIndices(start, length);
   1.820 +
   1.821 +  if(length <= targetCapacity) {
   1.822 +    u_UCharsToChars(getArrayStart() + start, target, length);
   1.823 +  }
   1.824 +  UErrorCode status = U_ZERO_ERROR;
   1.825 +  return u_terminateChars(target, targetCapacity, length, &status);
   1.826 +}
   1.827 +
   1.828 +UnicodeString
   1.829 +UnicodeString::tempSubString(int32_t start, int32_t len) const {
   1.830 +  pinIndices(start, len);
   1.831 +  const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
   1.832 +  if(array==NULL) {
   1.833 +    array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
   1.834 +    len=-2;  // bogus result string
   1.835 +  }
   1.836 +  return UnicodeString(FALSE, array + start, len);
   1.837 +}
   1.838 +
   1.839 +int32_t
   1.840 +UnicodeString::toUTF8(int32_t start, int32_t len,
   1.841 +                      char *target, int32_t capacity) const {
   1.842 +  pinIndices(start, len);
   1.843 +  int32_t length8;
   1.844 +  UErrorCode errorCode = U_ZERO_ERROR;
   1.845 +  u_strToUTF8WithSub(target, capacity, &length8,
   1.846 +                     getBuffer() + start, len,
   1.847 +                     0xFFFD,  // Standard substitution character.
   1.848 +                     NULL,    // Don't care about number of substitutions.
   1.849 +                     &errorCode);
   1.850 +  return length8;
   1.851 +}
   1.852 +
   1.853 +#if U_CHARSET_IS_UTF8
   1.854 +
   1.855 +int32_t
   1.856 +UnicodeString::extract(int32_t start, int32_t len,
   1.857 +                       char *target, uint32_t dstSize) const {
   1.858 +  // if the arguments are illegal, then do nothing
   1.859 +  if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
   1.860 +    return 0;
   1.861 +  }
   1.862 +  return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
   1.863 +}
   1.864 +
   1.865 +// else see unistr_cnv.cpp
   1.866 +#endif
   1.867 +
   1.868 +void 
   1.869 +UnicodeString::extractBetween(int32_t start,
   1.870 +                  int32_t limit,
   1.871 +                  UnicodeString& target) const {
   1.872 +  pinIndex(start);
   1.873 +  pinIndex(limit);
   1.874 +  doExtract(start, limit - start, target);
   1.875 +}
   1.876 +
   1.877 +// When converting from UTF-16 to UTF-8, the result will have at most 3 times
   1.878 +// as many bytes as the source has UChars.
   1.879 +// The "worst cases" are writing systems like Indic, Thai and CJK with
   1.880 +// 3:1 bytes:UChars.
   1.881 +void
   1.882 +UnicodeString::toUTF8(ByteSink &sink) const {
   1.883 +  int32_t length16 = length();
   1.884 +  if(length16 != 0) {
   1.885 +    char stackBuffer[1024];
   1.886 +    int32_t capacity = (int32_t)sizeof(stackBuffer);
   1.887 +    UBool utf8IsOwned = FALSE;
   1.888 +    char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
   1.889 +                                      3*length16,
   1.890 +                                      stackBuffer, capacity,
   1.891 +                                      &capacity);
   1.892 +    int32_t length8 = 0;
   1.893 +    UErrorCode errorCode = U_ZERO_ERROR;
   1.894 +    u_strToUTF8WithSub(utf8, capacity, &length8,
   1.895 +                       getBuffer(), length16,
   1.896 +                       0xFFFD,  // Standard substitution character.
   1.897 +                       NULL,    // Don't care about number of substitutions.
   1.898 +                       &errorCode);
   1.899 +    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
   1.900 +      utf8 = (char *)uprv_malloc(length8);
   1.901 +      if(utf8 != NULL) {
   1.902 +        utf8IsOwned = TRUE;
   1.903 +        errorCode = U_ZERO_ERROR;
   1.904 +        u_strToUTF8WithSub(utf8, length8, &length8,
   1.905 +                           getBuffer(), length16,
   1.906 +                           0xFFFD,  // Standard substitution character.
   1.907 +                           NULL,    // Don't care about number of substitutions.
   1.908 +                           &errorCode);
   1.909 +      } else {
   1.910 +        errorCode = U_MEMORY_ALLOCATION_ERROR;
   1.911 +      }
   1.912 +    }
   1.913 +    if(U_SUCCESS(errorCode)) {
   1.914 +      sink.Append(utf8, length8);
   1.915 +      sink.Flush();
   1.916 +    }
   1.917 +    if(utf8IsOwned) {
   1.918 +      uprv_free(utf8);
   1.919 +    }
   1.920 +  }
   1.921 +}
   1.922 +
   1.923 +int32_t
   1.924 +UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
   1.925 +  int32_t length32=0;
   1.926 +  if(U_SUCCESS(errorCode)) {
   1.927 +    // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
   1.928 +    u_strToUTF32WithSub(utf32, capacity, &length32,
   1.929 +        getBuffer(), length(),
   1.930 +        0xfffd,  // Substitution character.
   1.931 +        NULL,    // Don't care about number of substitutions.
   1.932 +        &errorCode);
   1.933 +  }
   1.934 +  return length32;
   1.935 +}
   1.936 +
   1.937 +int32_t 
   1.938 +UnicodeString::indexOf(const UChar *srcChars,
   1.939 +               int32_t srcStart,
   1.940 +               int32_t srcLength,
   1.941 +               int32_t start,
   1.942 +               int32_t length) const
   1.943 +{
   1.944 +  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
   1.945 +    return -1;
   1.946 +  }
   1.947 +
   1.948 +  // UnicodeString does not find empty substrings
   1.949 +  if(srcLength < 0 && srcChars[srcStart] == 0) {
   1.950 +    return -1;
   1.951 +  }
   1.952 +
   1.953 +  // get the indices within bounds
   1.954 +  pinIndices(start, length);
   1.955 +
   1.956 +  // find the first occurrence of the substring
   1.957 +  const UChar *array = getArrayStart();
   1.958 +  const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
   1.959 +  if(match == NULL) {
   1.960 +    return -1;
   1.961 +  } else {
   1.962 +    return (int32_t)(match - array);
   1.963 +  }
   1.964 +}
   1.965 +
   1.966 +int32_t
   1.967 +UnicodeString::doIndexOf(UChar c,
   1.968 +             int32_t start,
   1.969 +             int32_t length) const
   1.970 +{
   1.971 +  // pin indices
   1.972 +  pinIndices(start, length);
   1.973 +
   1.974 +  // find the first occurrence of c
   1.975 +  const UChar *array = getArrayStart();
   1.976 +  const UChar *match = u_memchr(array + start, c, length);
   1.977 +  if(match == NULL) {
   1.978 +    return -1;
   1.979 +  } else {
   1.980 +    return (int32_t)(match - array);
   1.981 +  }
   1.982 +}
   1.983 +
   1.984 +int32_t
   1.985 +UnicodeString::doIndexOf(UChar32 c,
   1.986 +                         int32_t start,
   1.987 +                         int32_t length) const {
   1.988 +  // pin indices
   1.989 +  pinIndices(start, length);
   1.990 +
   1.991 +  // find the first occurrence of c
   1.992 +  const UChar *array = getArrayStart();
   1.993 +  const UChar *match = u_memchr32(array + start, c, length);
   1.994 +  if(match == NULL) {
   1.995 +    return -1;
   1.996 +  } else {
   1.997 +    return (int32_t)(match - array);
   1.998 +  }
   1.999 +}
  1.1000 +
  1.1001 +int32_t 
  1.1002 +UnicodeString::lastIndexOf(const UChar *srcChars,
  1.1003 +               int32_t srcStart,
  1.1004 +               int32_t srcLength,
  1.1005 +               int32_t start,
  1.1006 +               int32_t length) const
  1.1007 +{
  1.1008 +  if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
  1.1009 +    return -1;
  1.1010 +  }
  1.1011 +
  1.1012 +  // UnicodeString does not find empty substrings
  1.1013 +  if(srcLength < 0 && srcChars[srcStart] == 0) {
  1.1014 +    return -1;
  1.1015 +  }
  1.1016 +
  1.1017 +  // get the indices within bounds
  1.1018 +  pinIndices(start, length);
  1.1019 +
  1.1020 +  // find the last occurrence of the substring
  1.1021 +  const UChar *array = getArrayStart();
  1.1022 +  const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
  1.1023 +  if(match == NULL) {
  1.1024 +    return -1;
  1.1025 +  } else {
  1.1026 +    return (int32_t)(match - array);
  1.1027 +  }
  1.1028 +}
  1.1029 +
  1.1030 +int32_t
  1.1031 +UnicodeString::doLastIndexOf(UChar c,
  1.1032 +                 int32_t start,
  1.1033 +                 int32_t length) const
  1.1034 +{
  1.1035 +  if(isBogus()) {
  1.1036 +    return -1;
  1.1037 +  }
  1.1038 +
  1.1039 +  // pin indices
  1.1040 +  pinIndices(start, length);
  1.1041 +
  1.1042 +  // find the last occurrence of c
  1.1043 +  const UChar *array = getArrayStart();
  1.1044 +  const UChar *match = u_memrchr(array + start, c, length);
  1.1045 +  if(match == NULL) {
  1.1046 +    return -1;
  1.1047 +  } else {
  1.1048 +    return (int32_t)(match - array);
  1.1049 +  }
  1.1050 +}
  1.1051 +
  1.1052 +int32_t
  1.1053 +UnicodeString::doLastIndexOf(UChar32 c,
  1.1054 +                             int32_t start,
  1.1055 +                             int32_t length) const {
  1.1056 +  // pin indices
  1.1057 +  pinIndices(start, length);
  1.1058 +
  1.1059 +  // find the last occurrence of c
  1.1060 +  const UChar *array = getArrayStart();
  1.1061 +  const UChar *match = u_memrchr32(array + start, c, length);
  1.1062 +  if(match == NULL) {
  1.1063 +    return -1;
  1.1064 +  } else {
  1.1065 +    return (int32_t)(match - array);
  1.1066 +  }
  1.1067 +}
  1.1068 +
  1.1069 +//========================================
  1.1070 +// Write implementation
  1.1071 +//========================================
  1.1072 +
  1.1073 +UnicodeString& 
  1.1074 +UnicodeString::findAndReplace(int32_t start,
  1.1075 +                  int32_t length,
  1.1076 +                  const UnicodeString& oldText,
  1.1077 +                  int32_t oldStart,
  1.1078 +                  int32_t oldLength,
  1.1079 +                  const UnicodeString& newText,
  1.1080 +                  int32_t newStart,
  1.1081 +                  int32_t newLength)
  1.1082 +{
  1.1083 +  if(isBogus() || oldText.isBogus() || newText.isBogus()) {
  1.1084 +    return *this;
  1.1085 +  }
  1.1086 +
  1.1087 +  pinIndices(start, length);
  1.1088 +  oldText.pinIndices(oldStart, oldLength);
  1.1089 +  newText.pinIndices(newStart, newLength);
  1.1090 +
  1.1091 +  if(oldLength == 0) {
  1.1092 +    return *this;
  1.1093 +  }
  1.1094 +
  1.1095 +  while(length > 0 && length >= oldLength) {
  1.1096 +    int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
  1.1097 +    if(pos < 0) {
  1.1098 +      // no more oldText's here: done
  1.1099 +      break;
  1.1100 +    } else {
  1.1101 +      // we found oldText, replace it by newText and go beyond it
  1.1102 +      replace(pos, oldLength, newText, newStart, newLength);
  1.1103 +      length -= pos + oldLength - start;
  1.1104 +      start = pos + newLength;
  1.1105 +    }
  1.1106 +  }
  1.1107 +
  1.1108 +  return *this;
  1.1109 +}
  1.1110 +
  1.1111 +
  1.1112 +void
  1.1113 +UnicodeString::setToBogus()
  1.1114 +{
  1.1115 +  releaseArray();
  1.1116 +
  1.1117 +  fShortLength = 0;
  1.1118 +  fUnion.fFields.fArray = 0;
  1.1119 +  fUnion.fFields.fCapacity = 0;
  1.1120 +  fFlags = kIsBogus;
  1.1121 +}
  1.1122 +
  1.1123 +// turn a bogus string into an empty one
  1.1124 +void
  1.1125 +UnicodeString::unBogus() {
  1.1126 +  if(fFlags & kIsBogus) {
  1.1127 +    setToEmpty();
  1.1128 +  }
  1.1129 +}
  1.1130 +
  1.1131 +const UChar *
  1.1132 +UnicodeString::getTerminatedBuffer() {
  1.1133 +  if(!isWritable()) {
  1.1134 +    return 0;
  1.1135 +  }
  1.1136 +  UChar *array = getArrayStart();
  1.1137 +  int32_t len = length();
  1.1138 +  if(len < getCapacity()) {
  1.1139 +    if(fFlags & kBufferIsReadonly) {
  1.1140 +      // If len<capacity on a read-only alias, then array[len] is
  1.1141 +      // either the original NUL (if constructed with (TRUE, s, length))
  1.1142 +      // or one of the original string contents characters (if later truncated),
  1.1143 +      // therefore we can assume that array[len] is initialized memory.
  1.1144 +      if(array[len] == 0) {
  1.1145 +        return array;
  1.1146 +      }
  1.1147 +    } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
  1.1148 +      // kRefCounted: Do not write the NUL if the buffer is shared.
  1.1149 +      // That is mostly safe, except when the length of one copy was modified
  1.1150 +      // without copy-on-write, e.g., via truncate(newLength) or remove(void).
  1.1151 +      // Then the NUL would be written into the middle of another copy's string.
  1.1152 +
  1.1153 +      // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
  1.1154 +      // Do not test if there is a NUL already because it might be uninitialized memory.
  1.1155 +      // (That would be safe, but tools like valgrind & Purify would complain.)
  1.1156 +      array[len] = 0;
  1.1157 +      return array;
  1.1158 +    }
  1.1159 +  }
  1.1160 +  if(cloneArrayIfNeeded(len+1)) {
  1.1161 +    array = getArrayStart();
  1.1162 +    array[len] = 0;
  1.1163 +    return array;
  1.1164 +  } else {
  1.1165 +    return NULL;
  1.1166 +  }
  1.1167 +}
  1.1168 +
  1.1169 +// setTo() analogous to the readonly-aliasing constructor with the same signature
  1.1170 +UnicodeString &
  1.1171 +UnicodeString::setTo(UBool isTerminated,
  1.1172 +                     const UChar *text,
  1.1173 +                     int32_t textLength)
  1.1174 +{
  1.1175 +  if(fFlags & kOpenGetBuffer) {
  1.1176 +    // do not modify a string that has an "open" getBuffer(minCapacity)
  1.1177 +    return *this;
  1.1178 +  }
  1.1179 +
  1.1180 +  if(text == NULL) {
  1.1181 +    // treat as an empty string, do not alias
  1.1182 +    releaseArray();
  1.1183 +    setToEmpty();
  1.1184 +    return *this;
  1.1185 +  }
  1.1186 +
  1.1187 +  if( textLength < -1 ||
  1.1188 +      (textLength == -1 && !isTerminated) ||
  1.1189 +      (textLength >= 0 && isTerminated && text[textLength] != 0)
  1.1190 +  ) {
  1.1191 +    setToBogus();
  1.1192 +    return *this;
  1.1193 +  }
  1.1194 +
  1.1195 +  releaseArray();
  1.1196 +
  1.1197 +  if(textLength == -1) {
  1.1198 +    // text is terminated, or else it would have failed the above test
  1.1199 +    textLength = u_strlen(text);
  1.1200 +  }
  1.1201 +  setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
  1.1202 +
  1.1203 +  fFlags = kReadonlyAlias;
  1.1204 +  return *this;
  1.1205 +}
  1.1206 +
  1.1207 +// setTo() analogous to the writable-aliasing constructor with the same signature
  1.1208 +UnicodeString &
  1.1209 +UnicodeString::setTo(UChar *buffer,
  1.1210 +                     int32_t buffLength,
  1.1211 +                     int32_t buffCapacity) {
  1.1212 +  if(fFlags & kOpenGetBuffer) {
  1.1213 +    // do not modify a string that has an "open" getBuffer(minCapacity)
  1.1214 +    return *this;
  1.1215 +  }
  1.1216 +
  1.1217 +  if(buffer == NULL) {
  1.1218 +    // treat as an empty string, do not alias
  1.1219 +    releaseArray();
  1.1220 +    setToEmpty();
  1.1221 +    return *this;
  1.1222 +  }
  1.1223 +
  1.1224 +  if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
  1.1225 +    setToBogus();
  1.1226 +    return *this;
  1.1227 +  } else if(buffLength == -1) {
  1.1228 +    // buffLength = u_strlen(buff); but do not look beyond buffCapacity
  1.1229 +    const UChar *p = buffer, *limit = buffer + buffCapacity;
  1.1230 +    while(p != limit && *p != 0) {
  1.1231 +      ++p;
  1.1232 +    }
  1.1233 +    buffLength = (int32_t)(p - buffer);
  1.1234 +  }
  1.1235 +
  1.1236 +  releaseArray();
  1.1237 +
  1.1238 +  setArray(buffer, buffLength, buffCapacity);
  1.1239 +  fFlags = kWritableAlias;
  1.1240 +  return *this;
  1.1241 +}
  1.1242 +
  1.1243 +UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
  1.1244 +  unBogus();
  1.1245 +  int32_t length = utf8.length();
  1.1246 +  int32_t capacity;
  1.1247 +  // The UTF-16 string will be at most as long as the UTF-8 string.
  1.1248 +  if(length <= US_STACKBUF_SIZE) {
  1.1249 +    capacity = US_STACKBUF_SIZE;
  1.1250 +  } else {
  1.1251 +    capacity = length + 1;  // +1 for the terminating NUL.
  1.1252 +  }
  1.1253 +  UChar *utf16 = getBuffer(capacity);
  1.1254 +  int32_t length16;
  1.1255 +  UErrorCode errorCode = U_ZERO_ERROR;
  1.1256 +  u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
  1.1257 +      utf8.data(), length,
  1.1258 +      0xfffd,  // Substitution character.
  1.1259 +      NULL,    // Don't care about number of substitutions.
  1.1260 +      &errorCode);
  1.1261 +  releaseBuffer(length16);
  1.1262 +  if(U_FAILURE(errorCode)) {
  1.1263 +    setToBogus();
  1.1264 +  }
  1.1265 +  return *this;
  1.1266 +}
  1.1267 +
  1.1268 +UnicodeString&
  1.1269 +UnicodeString::setCharAt(int32_t offset,
  1.1270 +             UChar c)
  1.1271 +{
  1.1272 +  int32_t len = length();
  1.1273 +  if(cloneArrayIfNeeded() && len > 0) {
  1.1274 +    if(offset < 0) {
  1.1275 +      offset = 0;
  1.1276 +    } else if(offset >= len) {
  1.1277 +      offset = len - 1;
  1.1278 +    }
  1.1279 +
  1.1280 +    getArrayStart()[offset] = c;
  1.1281 +  }
  1.1282 +  return *this;
  1.1283 +}
  1.1284 +
  1.1285 +UnicodeString&
  1.1286 +UnicodeString::replace(int32_t start,
  1.1287 +               int32_t _length,
  1.1288 +               UChar32 srcChar) {
  1.1289 +  UChar buffer[U16_MAX_LENGTH];
  1.1290 +  int32_t count = 0;
  1.1291 +  UBool isError = FALSE;
  1.1292 +  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
  1.1293 +  // We test isError so that the compiler does not complain that we don't.
  1.1294 +  // If isError (srcChar is not a valid code point) then count==0 which means
  1.1295 +  // we remove the source segment rather than replacing it with srcChar.
  1.1296 +  return doReplace(start, _length, buffer, 0, isError ? 0 : count);
  1.1297 +}
  1.1298 +
  1.1299 +UnicodeString&
  1.1300 +UnicodeString::append(UChar32 srcChar) {
  1.1301 +  UChar buffer[U16_MAX_LENGTH];
  1.1302 +  int32_t _length = 0;
  1.1303 +  UBool isError = FALSE;
  1.1304 +  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
  1.1305 +  // We test isError so that the compiler does not complain that we don't.
  1.1306 +  // If isError then _length==0 which turns the doReplace() into a no-op anyway.
  1.1307 +  return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
  1.1308 +}
  1.1309 +
  1.1310 +UnicodeString&
  1.1311 +UnicodeString::doReplace( int32_t start,
  1.1312 +              int32_t length,
  1.1313 +              const UnicodeString& src,
  1.1314 +              int32_t srcStart,
  1.1315 +              int32_t srcLength)
  1.1316 +{
  1.1317 +  if(!src.isBogus()) {
  1.1318 +    // pin the indices to legal values
  1.1319 +    src.pinIndices(srcStart, srcLength);
  1.1320 +
  1.1321 +    // get the characters from src
  1.1322 +    // and replace the range in ourselves with them
  1.1323 +    return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
  1.1324 +  } else {
  1.1325 +    // remove the range
  1.1326 +    return doReplace(start, length, 0, 0, 0);
  1.1327 +  }
  1.1328 +}
  1.1329 +
  1.1330 +UnicodeString&
  1.1331 +UnicodeString::doReplace(int32_t start,
  1.1332 +             int32_t length,
  1.1333 +             const UChar *srcChars,
  1.1334 +             int32_t srcStart,
  1.1335 +             int32_t srcLength)
  1.1336 +{
  1.1337 +  if(!isWritable()) {
  1.1338 +    return *this;
  1.1339 +  }
  1.1340 +
  1.1341 +  int32_t oldLength = this->length();
  1.1342 +
  1.1343 +  // optimize (read-only alias).remove(0, start) and .remove(start, end)
  1.1344 +  if((fFlags&kBufferIsReadonly) && srcLength == 0) {
  1.1345 +    if(start == 0) {
  1.1346 +      // remove prefix by adjusting the array pointer
  1.1347 +      pinIndex(length);
  1.1348 +      fUnion.fFields.fArray += length;
  1.1349 +      fUnion.fFields.fCapacity -= length;
  1.1350 +      setLength(oldLength - length);
  1.1351 +      return *this;
  1.1352 +    } else {
  1.1353 +      pinIndex(start);
  1.1354 +      if(length >= (oldLength - start)) {
  1.1355 +        // remove suffix by reducing the length (like truncate())
  1.1356 +        setLength(start);
  1.1357 +        fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
  1.1358 +        return *this;
  1.1359 +      }
  1.1360 +    }
  1.1361 +  }
  1.1362 +
  1.1363 +  if(srcChars == 0) {
  1.1364 +    srcStart = srcLength = 0;
  1.1365 +  } else if(srcLength < 0) {
  1.1366 +    // get the srcLength if necessary
  1.1367 +    srcLength = u_strlen(srcChars + srcStart);
  1.1368 +  }
  1.1369 +
  1.1370 +  // calculate the size of the string after the replace
  1.1371 +  int32_t newLength;
  1.1372 +
  1.1373 +  // optimize append() onto a large-enough, owned string
  1.1374 +  if(start >= oldLength) {
  1.1375 +    if(srcLength == 0) {
  1.1376 +      return *this;
  1.1377 +    }
  1.1378 +    newLength = oldLength + srcLength;
  1.1379 +    if(newLength <= getCapacity() && isBufferWritable()) {
  1.1380 +      UChar *oldArray = getArrayStart();
  1.1381 +      // Do not copy characters when
  1.1382 +      //   UChar *buffer=str.getAppendBuffer(...);
  1.1383 +      // is followed by
  1.1384 +      //   str.append(buffer, length);
  1.1385 +      // or
  1.1386 +      //   str.appendString(buffer, length)
  1.1387 +      // or similar.
  1.1388 +      if(srcChars + srcStart != oldArray + start || start > oldLength) {
  1.1389 +        us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
  1.1390 +      }
  1.1391 +      setLength(newLength);
  1.1392 +      return *this;
  1.1393 +    } else {
  1.1394 +      // pin the indices to legal values
  1.1395 +      start = oldLength;
  1.1396 +      length = 0;
  1.1397 +    }
  1.1398 +  } else {
  1.1399 +    // pin the indices to legal values
  1.1400 +    pinIndices(start, length);
  1.1401 +
  1.1402 +    newLength = oldLength - length + srcLength;
  1.1403 +  }
  1.1404 +
  1.1405 +  // the following may change fArray but will not copy the current contents;
  1.1406 +  // therefore we need to keep the current fArray
  1.1407 +  UChar oldStackBuffer[US_STACKBUF_SIZE];
  1.1408 +  UChar *oldArray;
  1.1409 +  if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
  1.1410 +    // copy the stack buffer contents because it will be overwritten with
  1.1411 +    // fUnion.fFields values
  1.1412 +    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
  1.1413 +    oldArray = oldStackBuffer;
  1.1414 +  } else {
  1.1415 +    oldArray = getArrayStart();
  1.1416 +  }
  1.1417 +
  1.1418 +  // clone our array and allocate a bigger array if needed
  1.1419 +  int32_t *bufferToDelete = 0;
  1.1420 +  if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
  1.1421 +                         FALSE, &bufferToDelete)
  1.1422 +  ) {
  1.1423 +    return *this;
  1.1424 +  }
  1.1425 +
  1.1426 +  // now do the replace
  1.1427 +
  1.1428 +  UChar *newArray = getArrayStart();
  1.1429 +  if(newArray != oldArray) {
  1.1430 +    // if fArray changed, then we need to copy everything except what will change
  1.1431 +    us_arrayCopy(oldArray, 0, newArray, 0, start);
  1.1432 +    us_arrayCopy(oldArray, start + length,
  1.1433 +                 newArray, start + srcLength,
  1.1434 +                 oldLength - (start + length));
  1.1435 +  } else if(length != srcLength) {
  1.1436 +    // fArray did not change; copy only the portion that isn't changing, leaving a hole
  1.1437 +    us_arrayCopy(oldArray, start + length,
  1.1438 +                 newArray, start + srcLength,
  1.1439 +                 oldLength - (start + length));
  1.1440 +  }
  1.1441 +
  1.1442 +  // now fill in the hole with the new string
  1.1443 +  us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
  1.1444 +
  1.1445 +  setLength(newLength);
  1.1446 +
  1.1447 +  // delayed delete in case srcChars == fArray when we started, and
  1.1448 +  // to keep oldArray alive for the above operations
  1.1449 +  if (bufferToDelete) {
  1.1450 +    uprv_free(bufferToDelete);
  1.1451 +  }
  1.1452 +
  1.1453 +  return *this;
  1.1454 +}
  1.1455 +
  1.1456 +/**
  1.1457 + * Replaceable API
  1.1458 + */
  1.1459 +void
  1.1460 +UnicodeString::handleReplaceBetween(int32_t start,
  1.1461 +                                    int32_t limit,
  1.1462 +                                    const UnicodeString& text) {
  1.1463 +    replaceBetween(start, limit, text);
  1.1464 +}
  1.1465 +
  1.1466 +/**
  1.1467 + * Replaceable API
  1.1468 + */
  1.1469 +void 
  1.1470 +UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
  1.1471 +    if (limit <= start) {
  1.1472 +        return; // Nothing to do; avoid bogus malloc call
  1.1473 +    }
  1.1474 +    UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
  1.1475 +    // Check to make sure text is not null.
  1.1476 +    if (text != NULL) {
  1.1477 +	    extractBetween(start, limit, text, 0);
  1.1478 +	    insert(dest, text, 0, limit - start);    
  1.1479 +	    uprv_free(text);
  1.1480 +    }
  1.1481 +}
  1.1482 +
  1.1483 +/**
  1.1484 + * Replaceable API
  1.1485 + *
  1.1486 + * NOTE: This is for the Replaceable class.  There is no rep.cpp,
  1.1487 + * so we implement this function here.
  1.1488 + */
  1.1489 +UBool Replaceable::hasMetaData() const {
  1.1490 +    return TRUE;
  1.1491 +}
  1.1492 +
  1.1493 +/**
  1.1494 + * Replaceable API
  1.1495 + */
  1.1496 +UBool UnicodeString::hasMetaData() const {
  1.1497 +    return FALSE;
  1.1498 +}
  1.1499 +
  1.1500 +UnicodeString&
  1.1501 +UnicodeString::doReverse(int32_t start, int32_t length) {
  1.1502 +  if(length <= 1 || !cloneArrayIfNeeded()) {
  1.1503 +    return *this;
  1.1504 +  }
  1.1505 +
  1.1506 +  // pin the indices to legal values
  1.1507 +  pinIndices(start, length);
  1.1508 +  if(length <= 1) {  // pinIndices() might have shrunk the length
  1.1509 +    return *this;
  1.1510 +  }
  1.1511 +
  1.1512 +  UChar *left = getArrayStart() + start;
  1.1513 +  UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
  1.1514 +  UChar swap;
  1.1515 +  UBool hasSupplementary = FALSE;
  1.1516 +
  1.1517 +  // Before the loop we know left<right because length>=2.
  1.1518 +  do {
  1.1519 +    hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
  1.1520 +    hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
  1.1521 +    *right-- = swap;
  1.1522 +  } while(left < right);
  1.1523 +  // Make sure to test the middle code unit of an odd-length string.
  1.1524 +  // Redundant if the length is even.
  1.1525 +  hasSupplementary |= (UBool)U16_IS_LEAD(*left);
  1.1526 +
  1.1527 +  /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
  1.1528 +  if(hasSupplementary) {
  1.1529 +    UChar swap2;
  1.1530 +
  1.1531 +    left = getArrayStart() + start;
  1.1532 +    right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
  1.1533 +    while(left < right) {
  1.1534 +      if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
  1.1535 +        *left++ = swap2;
  1.1536 +        *left++ = swap;
  1.1537 +      } else {
  1.1538 +        ++left;
  1.1539 +      }
  1.1540 +    }
  1.1541 +  }
  1.1542 +
  1.1543 +  return *this;
  1.1544 +}
  1.1545 +
  1.1546 +UBool 
  1.1547 +UnicodeString::padLeading(int32_t targetLength,
  1.1548 +                          UChar padChar)
  1.1549 +{
  1.1550 +  int32_t oldLength = length();
  1.1551 +  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
  1.1552 +    return FALSE;
  1.1553 +  } else {
  1.1554 +    // move contents up by padding width
  1.1555 +    UChar *array = getArrayStart();
  1.1556 +    int32_t start = targetLength - oldLength;
  1.1557 +    us_arrayCopy(array, 0, array, start, oldLength);
  1.1558 +
  1.1559 +    // fill in padding character
  1.1560 +    while(--start >= 0) {
  1.1561 +      array[start] = padChar;
  1.1562 +    }
  1.1563 +    setLength(targetLength);
  1.1564 +    return TRUE;
  1.1565 +  }
  1.1566 +}
  1.1567 +
  1.1568 +UBool 
  1.1569 +UnicodeString::padTrailing(int32_t targetLength,
  1.1570 +                           UChar padChar)
  1.1571 +{
  1.1572 +  int32_t oldLength = length();
  1.1573 +  if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
  1.1574 +    return FALSE;
  1.1575 +  } else {
  1.1576 +    // fill in padding character
  1.1577 +    UChar *array = getArrayStart();
  1.1578 +    int32_t length = targetLength;
  1.1579 +    while(--length >= oldLength) {
  1.1580 +      array[length] = padChar;
  1.1581 +    }
  1.1582 +    setLength(targetLength);
  1.1583 +    return TRUE;
  1.1584 +  }
  1.1585 +}
  1.1586 +
  1.1587 +//========================================
  1.1588 +// Hashing
  1.1589 +//========================================
  1.1590 +int32_t
  1.1591 +UnicodeString::doHashCode() const
  1.1592 +{
  1.1593 +    /* Delegate hash computation to uhash.  This makes UnicodeString
  1.1594 +     * hashing consistent with UChar* hashing.  */
  1.1595 +    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
  1.1596 +    if (hashCode == kInvalidHashCode) {
  1.1597 +        hashCode = kEmptyHashCode;
  1.1598 +    }
  1.1599 +    return hashCode;
  1.1600 +}
  1.1601 +
  1.1602 +//========================================
  1.1603 +// External Buffer
  1.1604 +//========================================
  1.1605 +
  1.1606 +UChar *
  1.1607 +UnicodeString::getBuffer(int32_t minCapacity) {
  1.1608 +  if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
  1.1609 +    fFlags|=kOpenGetBuffer;
  1.1610 +    fShortLength=0;
  1.1611 +    return getArrayStart();
  1.1612 +  } else {
  1.1613 +    return 0;
  1.1614 +  }
  1.1615 +}
  1.1616 +
  1.1617 +void
  1.1618 +UnicodeString::releaseBuffer(int32_t newLength) {
  1.1619 +  if(fFlags&kOpenGetBuffer && newLength>=-1) {
  1.1620 +    // set the new fLength
  1.1621 +    int32_t capacity=getCapacity();
  1.1622 +    if(newLength==-1) {
  1.1623 +      // the new length is the string length, capped by fCapacity
  1.1624 +      const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
  1.1625 +      while(p<limit && *p!=0) {
  1.1626 +        ++p;
  1.1627 +      }
  1.1628 +      newLength=(int32_t)(p-array);
  1.1629 +    } else if(newLength>capacity) {
  1.1630 +      newLength=capacity;
  1.1631 +    }
  1.1632 +    setLength(newLength);
  1.1633 +    fFlags&=~kOpenGetBuffer;
  1.1634 +  }
  1.1635 +}
  1.1636 +
  1.1637 +//========================================
  1.1638 +// Miscellaneous
  1.1639 +//========================================
  1.1640 +UBool
  1.1641 +UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
  1.1642 +                                  int32_t growCapacity,
  1.1643 +                                  UBool doCopyArray,
  1.1644 +                                  int32_t **pBufferToDelete,
  1.1645 +                                  UBool forceClone) {
  1.1646 +  // default parameters need to be static, therefore
  1.1647 +  // the defaults are -1 to have convenience defaults
  1.1648 +  if(newCapacity == -1) {
  1.1649 +    newCapacity = getCapacity();
  1.1650 +  }
  1.1651 +
  1.1652 +  // while a getBuffer(minCapacity) is "open",
  1.1653 +  // prevent any modifications of the string by returning FALSE here
  1.1654 +  // if the string is bogus, then only an assignment or similar can revive it
  1.1655 +  if(!isWritable()) {
  1.1656 +    return FALSE;
  1.1657 +  }
  1.1658 +
  1.1659 +  /*
  1.1660 +   * We need to make a copy of the array if
  1.1661 +   * the buffer is read-only, or
  1.1662 +   * the buffer is refCounted (shared), and refCount>1, or
  1.1663 +   * the buffer is too small.
  1.1664 +   * Return FALSE if memory could not be allocated.
  1.1665 +   */
  1.1666 +  if(forceClone ||
  1.1667 +     fFlags & kBufferIsReadonly ||
  1.1668 +     (fFlags & kRefCounted && refCount() > 1) ||
  1.1669 +     newCapacity > getCapacity()
  1.1670 +  ) {
  1.1671 +    // check growCapacity for default value and use of the stack buffer
  1.1672 +    if(growCapacity < 0) {
  1.1673 +      growCapacity = newCapacity;
  1.1674 +    } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
  1.1675 +      growCapacity = US_STACKBUF_SIZE;
  1.1676 +    }
  1.1677 +
  1.1678 +    // save old values
  1.1679 +    UChar oldStackBuffer[US_STACKBUF_SIZE];
  1.1680 +    UChar *oldArray;
  1.1681 +    uint8_t flags = fFlags;
  1.1682 +
  1.1683 +    if(flags&kUsingStackBuffer) {
  1.1684 +      U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
  1.1685 +      if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
  1.1686 +        // copy the stack buffer contents because it will be overwritten with
  1.1687 +        // fUnion.fFields values
  1.1688 +        us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
  1.1689 +        oldArray = oldStackBuffer;
  1.1690 +      } else {
  1.1691 +        oldArray = 0; // no need to copy from stack buffer to itself
  1.1692 +      }
  1.1693 +    } else {
  1.1694 +      oldArray = fUnion.fFields.fArray;
  1.1695 +      U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
  1.1696 +    }
  1.1697 +
  1.1698 +    // allocate a new array
  1.1699 +    if(allocate(growCapacity) ||
  1.1700 +       (newCapacity < growCapacity && allocate(newCapacity))
  1.1701 +    ) {
  1.1702 +      if(doCopyArray && oldArray != 0) {
  1.1703 +        // copy the contents
  1.1704 +        // do not copy more than what fits - it may be smaller than before
  1.1705 +        int32_t minLength = length();
  1.1706 +        newCapacity = getCapacity();
  1.1707 +        if(newCapacity < minLength) {
  1.1708 +          minLength = newCapacity;
  1.1709 +          setLength(minLength);
  1.1710 +        }
  1.1711 +        us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
  1.1712 +      } else {
  1.1713 +        fShortLength = 0;
  1.1714 +      }
  1.1715 +
  1.1716 +      // release the old array
  1.1717 +      if(flags & kRefCounted) {
  1.1718 +        // the array is refCounted; decrement and release if 0
  1.1719 +        u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
  1.1720 +        if(umtx_atomic_dec(pRefCount) == 0) {
  1.1721 +          if(pBufferToDelete == 0) {
  1.1722 +              // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
  1.1723 +              // is defined as volatile. (Volatile has useful non-standard behavior
  1.1724 +              //   with this compiler.)
  1.1725 +            uprv_free((void *)pRefCount);
  1.1726 +          } else {
  1.1727 +            // the caller requested to delete it himself
  1.1728 +            *pBufferToDelete = (int32_t *)pRefCount;
  1.1729 +          }
  1.1730 +        }
  1.1731 +      }
  1.1732 +    } else {
  1.1733 +      // not enough memory for growCapacity and not even for the smaller newCapacity
  1.1734 +      // reset the old values for setToBogus() to release the array
  1.1735 +      if(!(flags&kUsingStackBuffer)) {
  1.1736 +        fUnion.fFields.fArray = oldArray;
  1.1737 +      }
  1.1738 +      fFlags = flags;
  1.1739 +      setToBogus();
  1.1740 +      return FALSE;
  1.1741 +    }
  1.1742 +  }
  1.1743 +  return TRUE;
  1.1744 +}
  1.1745 +
  1.1746 +// UnicodeStringAppendable ------------------------------------------------- ***
  1.1747 +
  1.1748 +UnicodeStringAppendable::~UnicodeStringAppendable() {}
  1.1749 +
  1.1750 +UBool
  1.1751 +UnicodeStringAppendable::appendCodeUnit(UChar c) {
  1.1752 +  return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
  1.1753 +}
  1.1754 +
  1.1755 +UBool
  1.1756 +UnicodeStringAppendable::appendCodePoint(UChar32 c) {
  1.1757 +  UChar buffer[U16_MAX_LENGTH];
  1.1758 +  int32_t cLength = 0;
  1.1759 +  UBool isError = FALSE;
  1.1760 +  U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
  1.1761 +  return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
  1.1762 +}
  1.1763 +
  1.1764 +UBool
  1.1765 +UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
  1.1766 +  return str.doReplace(str.length(), 0, s, 0, length).isWritable();
  1.1767 +}
  1.1768 +
  1.1769 +UBool
  1.1770 +UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
  1.1771 +  return str.cloneArrayIfNeeded(str.length() + appendCapacity);
  1.1772 +}
  1.1773 +
  1.1774 +UChar *
  1.1775 +UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
  1.1776 +                                         int32_t desiredCapacityHint,
  1.1777 +                                         UChar *scratch, int32_t scratchCapacity,
  1.1778 +                                         int32_t *resultCapacity) {
  1.1779 +  if(minCapacity < 1 || scratchCapacity < minCapacity) {
  1.1780 +    *resultCapacity = 0;
  1.1781 +    return NULL;
  1.1782 +  }
  1.1783 +  int32_t oldLength = str.length();
  1.1784 +  if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
  1.1785 +    *resultCapacity = str.getCapacity() - oldLength;
  1.1786 +    return str.getArrayStart() + oldLength;
  1.1787 +  }
  1.1788 +  *resultCapacity = scratchCapacity;
  1.1789 +  return scratch;
  1.1790 +}
  1.1791 +
  1.1792 +U_NAMESPACE_END
  1.1793 +
  1.1794 +U_NAMESPACE_USE
  1.1795 +
  1.1796 +U_CAPI int32_t U_EXPORT2
  1.1797 +uhash_hashUnicodeString(const UElement key) {
  1.1798 +    const UnicodeString *str = (const UnicodeString*) key.pointer;
  1.1799 +    return (str == NULL) ? 0 : str->hashCode();
  1.1800 +}
  1.1801 +
  1.1802 +// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
  1.1803 +// does not depend on hashtable code.
  1.1804 +U_CAPI UBool U_EXPORT2
  1.1805 +uhash_compareUnicodeString(const UElement key1, const UElement key2) {
  1.1806 +    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
  1.1807 +    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
  1.1808 +    if (str1 == str2) {
  1.1809 +        return TRUE;
  1.1810 +    }
  1.1811 +    if (str1 == NULL || str2 == NULL) {
  1.1812 +        return FALSE;
  1.1813 +    }
  1.1814 +    return *str1 == *str2;
  1.1815 +}
  1.1816 +
  1.1817 +#ifdef U_STATIC_IMPLEMENTATION
  1.1818 +/*
  1.1819 +This should never be called. It is defined here to make sure that the
  1.1820 +virtual vector deleting destructor is defined within unistr.cpp.
  1.1821 +The vector deleting destructor is already a part of UObject,
  1.1822 +but defining it here makes sure that it is included with this object file.
  1.1823 +This makes sure that static library dependencies are kept to a minimum.
  1.1824 +*/
  1.1825 +static void uprv_UnicodeStringDummy(void) {
  1.1826 +    delete [] (new UnicodeString[2]);
  1.1827 +}
  1.1828 +#endif

mercurial