intl/icu/source/common/unistr.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 * Copyright (C) 1999-2013, International Business Machines Corporation and
michael@0 4 * others. All Rights Reserved.
michael@0 5 ******************************************************************************
michael@0 6 *
michael@0 7 * File unistr.cpp
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 09/25/98 stephen Creation.
michael@0 13 * 04/20/99 stephen Overhauled per 4/16 code review.
michael@0 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
michael@0 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
michael@0 16 * Replaceable.
michael@0 17 * 06/25/01 grhoten Removed the dependency on iostream
michael@0 18 ******************************************************************************
michael@0 19 */
michael@0 20
michael@0 21 #include "unicode/utypes.h"
michael@0 22 #include "unicode/appendable.h"
michael@0 23 #include "unicode/putil.h"
michael@0 24 #include "cstring.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "unicode/ustring.h"
michael@0 27 #include "unicode/unistr.h"
michael@0 28 #include "unicode/utf.h"
michael@0 29 #include "unicode/utf16.h"
michael@0 30 #include "uelement.h"
michael@0 31 #include "ustr_imp.h"
michael@0 32 #include "umutex.h"
michael@0 33 #include "uassert.h"
michael@0 34
michael@0 35 #if 0
michael@0 36
michael@0 37 #include <iostream>
michael@0 38 using namespace std;
michael@0 39
michael@0 40 //DEBUGGING
michael@0 41 void
michael@0 42 print(const UnicodeString& s,
michael@0 43 const char *name)
michael@0 44 {
michael@0 45 UChar c;
michael@0 46 cout << name << ":|";
michael@0 47 for(int i = 0; i < s.length(); ++i) {
michael@0 48 c = s[i];
michael@0 49 if(c>= 0x007E || c < 0x0020)
michael@0 50 cout << "[0x" << hex << s[i] << "]";
michael@0 51 else
michael@0 52 cout << (char) s[i];
michael@0 53 }
michael@0 54 cout << '|' << endl;
michael@0 55 }
michael@0 56
michael@0 57 void
michael@0 58 print(const UChar *s,
michael@0 59 int32_t len,
michael@0 60 const char *name)
michael@0 61 {
michael@0 62 UChar c;
michael@0 63 cout << name << ":|";
michael@0 64 for(int i = 0; i < len; ++i) {
michael@0 65 c = s[i];
michael@0 66 if(c>= 0x007E || c < 0x0020)
michael@0 67 cout << "[0x" << hex << s[i] << "]";
michael@0 68 else
michael@0 69 cout << (char) s[i];
michael@0 70 }
michael@0 71 cout << '|' << endl;
michael@0 72 }
michael@0 73 // END DEBUGGING
michael@0 74 #endif
michael@0 75
michael@0 76 // Local function definitions for now
michael@0 77
michael@0 78 // need to copy areas that may overlap
michael@0 79 static
michael@0 80 inline void
michael@0 81 us_arrayCopy(const UChar *src, int32_t srcStart,
michael@0 82 UChar *dst, int32_t dstStart, int32_t count)
michael@0 83 {
michael@0 84 if(count>0) {
michael@0 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
michael@0 86 }
michael@0 87 }
michael@0 88
michael@0 89 // u_unescapeAt() callback to get a UChar from a UnicodeString
michael@0 90 U_CDECL_BEGIN
michael@0 91 static UChar U_CALLCONV
michael@0 92 UnicodeString_charAt(int32_t offset, void *context) {
michael@0 93 return ((icu::UnicodeString*) context)->charAt(offset);
michael@0 94 }
michael@0 95 U_CDECL_END
michael@0 96
michael@0 97 U_NAMESPACE_BEGIN
michael@0 98
michael@0 99 /* The Replaceable virtual destructor can't be defined in the header
michael@0 100 due to how AIX works with multiple definitions of virtual functions.
michael@0 101 */
michael@0 102 Replaceable::~Replaceable() {}
michael@0 103
michael@0 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
michael@0 105
michael@0 106 UnicodeString U_EXPORT2
michael@0 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
michael@0 108 return
michael@0 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
michael@0 110 append(s1).
michael@0 111 append(s2);
michael@0 112 }
michael@0 113
michael@0 114 //========================================
michael@0 115 // Reference Counting functions, put at top of file so that optimizing compilers
michael@0 116 // have a chance to automatically inline.
michael@0 117 //========================================
michael@0 118
michael@0 119 void
michael@0 120 UnicodeString::addRef() {
michael@0 121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
michael@0 122 }
michael@0 123
michael@0 124 int32_t
michael@0 125 UnicodeString::removeRef() {
michael@0 126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
michael@0 127 }
michael@0 128
michael@0 129 int32_t
michael@0 130 UnicodeString::refCount() const {
michael@0 131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
michael@0 132 }
michael@0 133
michael@0 134 void
michael@0 135 UnicodeString::releaseArray() {
michael@0 136 if((fFlags & kRefCounted) && removeRef() == 0) {
michael@0 137 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
michael@0 138 }
michael@0 139 }
michael@0 140
michael@0 141
michael@0 142
michael@0 143 //========================================
michael@0 144 // Constructors
michael@0 145 //========================================
michael@0 146
michael@0 147 // The default constructor is inline in unistr.h.
michael@0 148
michael@0 149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
michael@0 150 : fShortLength(0),
michael@0 151 fFlags(0)
michael@0 152 {
michael@0 153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
michael@0 154 // just allocate and do not do anything else
michael@0 155 allocate(capacity);
michael@0 156 } else {
michael@0 157 // count > 0, allocate and fill the new string with count c's
michael@0 158 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
michael@0 159 if(capacity < length) {
michael@0 160 capacity = length;
michael@0 161 }
michael@0 162 if(allocate(capacity)) {
michael@0 163 UChar *array = getArrayStart();
michael@0 164 int32_t i = 0;
michael@0 165
michael@0 166 // fill the new string with c
michael@0 167 if(unitCount == 1) {
michael@0 168 // fill with length UChars
michael@0 169 while(i < length) {
michael@0 170 array[i++] = (UChar)c;
michael@0 171 }
michael@0 172 } else {
michael@0 173 // get the code units for c
michael@0 174 UChar units[U16_MAX_LENGTH];
michael@0 175 U16_APPEND_UNSAFE(units, i, c);
michael@0 176
michael@0 177 // now it must be i==unitCount
michael@0 178 i = 0;
michael@0 179
michael@0 180 // for Unicode, unitCount can only be 1, 2, 3, or 4
michael@0 181 // 1 is handled above
michael@0 182 while(i < length) {
michael@0 183 int32_t unitIdx = 0;
michael@0 184 while(unitIdx < unitCount) {
michael@0 185 array[i++]=units[unitIdx++];
michael@0 186 }
michael@0 187 }
michael@0 188 }
michael@0 189 }
michael@0 190 setLength(length);
michael@0 191 }
michael@0 192 }
michael@0 193
michael@0 194 UnicodeString::UnicodeString(UChar ch)
michael@0 195 : fShortLength(1),
michael@0 196 fFlags(kShortString)
michael@0 197 {
michael@0 198 fUnion.fStackBuffer[0] = ch;
michael@0 199 }
michael@0 200
michael@0 201 UnicodeString::UnicodeString(UChar32 ch)
michael@0 202 : fShortLength(0),
michael@0 203 fFlags(kShortString)
michael@0 204 {
michael@0 205 int32_t i = 0;
michael@0 206 UBool isError = FALSE;
michael@0 207 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
michael@0 208 // We test isError so that the compiler does not complain that we don't.
michael@0 209 // If isError then i==0 which is what we want anyway.
michael@0 210 if(!isError) {
michael@0 211 fShortLength = (int8_t)i;
michael@0 212 }
michael@0 213 }
michael@0 214
michael@0 215 UnicodeString::UnicodeString(const UChar *text)
michael@0 216 : fShortLength(0),
michael@0 217 fFlags(kShortString)
michael@0 218 {
michael@0 219 doReplace(0, 0, text, 0, -1);
michael@0 220 }
michael@0 221
michael@0 222 UnicodeString::UnicodeString(const UChar *text,
michael@0 223 int32_t textLength)
michael@0 224 : fShortLength(0),
michael@0 225 fFlags(kShortString)
michael@0 226 {
michael@0 227 doReplace(0, 0, text, 0, textLength);
michael@0 228 }
michael@0 229
michael@0 230 UnicodeString::UnicodeString(UBool isTerminated,
michael@0 231 const UChar *text,
michael@0 232 int32_t textLength)
michael@0 233 : fShortLength(0),
michael@0 234 fFlags(kReadonlyAlias)
michael@0 235 {
michael@0 236 if(text == NULL) {
michael@0 237 // treat as an empty string, do not alias
michael@0 238 setToEmpty();
michael@0 239 } else if(textLength < -1 ||
michael@0 240 (textLength == -1 && !isTerminated) ||
michael@0 241 (textLength >= 0 && isTerminated && text[textLength] != 0)
michael@0 242 ) {
michael@0 243 setToBogus();
michael@0 244 } else {
michael@0 245 if(textLength == -1) {
michael@0 246 // text is terminated, or else it would have failed the above test
michael@0 247 textLength = u_strlen(text);
michael@0 248 }
michael@0 249 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
michael@0 250 }
michael@0 251 }
michael@0 252
michael@0 253 UnicodeString::UnicodeString(UChar *buff,
michael@0 254 int32_t buffLength,
michael@0 255 int32_t buffCapacity)
michael@0 256 : fShortLength(0),
michael@0 257 fFlags(kWritableAlias)
michael@0 258 {
michael@0 259 if(buff == NULL) {
michael@0 260 // treat as an empty string, do not alias
michael@0 261 setToEmpty();
michael@0 262 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
michael@0 263 setToBogus();
michael@0 264 } else {
michael@0 265 if(buffLength == -1) {
michael@0 266 // fLength = u_strlen(buff); but do not look beyond buffCapacity
michael@0 267 const UChar *p = buff, *limit = buff + buffCapacity;
michael@0 268 while(p != limit && *p != 0) {
michael@0 269 ++p;
michael@0 270 }
michael@0 271 buffLength = (int32_t)(p - buff);
michael@0 272 }
michael@0 273 setArray(buff, buffLength, buffCapacity);
michael@0 274 }
michael@0 275 }
michael@0 276
michael@0 277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
michael@0 278 : fShortLength(0),
michael@0 279 fFlags(kShortString)
michael@0 280 {
michael@0 281 if(src==NULL) {
michael@0 282 // treat as an empty string
michael@0 283 } else {
michael@0 284 if(length<0) {
michael@0 285 length=(int32_t)uprv_strlen(src);
michael@0 286 }
michael@0 287 if(cloneArrayIfNeeded(length, length, FALSE)) {
michael@0 288 u_charsToUChars(src, getArrayStart(), length);
michael@0 289 setLength(length);
michael@0 290 } else {
michael@0 291 setToBogus();
michael@0 292 }
michael@0 293 }
michael@0 294 }
michael@0 295
michael@0 296 #if U_CHARSET_IS_UTF8
michael@0 297
michael@0 298 UnicodeString::UnicodeString(const char *codepageData)
michael@0 299 : fShortLength(0),
michael@0 300 fFlags(kShortString) {
michael@0 301 if(codepageData != 0) {
michael@0 302 setToUTF8(codepageData);
michael@0 303 }
michael@0 304 }
michael@0 305
michael@0 306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
michael@0 307 : fShortLength(0),
michael@0 308 fFlags(kShortString) {
michael@0 309 // if there's nothing to convert, do nothing
michael@0 310 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
michael@0 311 return;
michael@0 312 }
michael@0 313 if(dataLength == -1) {
michael@0 314 dataLength = (int32_t)uprv_strlen(codepageData);
michael@0 315 }
michael@0 316 setToUTF8(StringPiece(codepageData, dataLength));
michael@0 317 }
michael@0 318
michael@0 319 // else see unistr_cnv.cpp
michael@0 320 #endif
michael@0 321
michael@0 322 UnicodeString::UnicodeString(const UnicodeString& that)
michael@0 323 : Replaceable(),
michael@0 324 fShortLength(0),
michael@0 325 fFlags(kShortString)
michael@0 326 {
michael@0 327 copyFrom(that);
michael@0 328 }
michael@0 329
michael@0 330 UnicodeString::UnicodeString(const UnicodeString& that,
michael@0 331 int32_t srcStart)
michael@0 332 : Replaceable(),
michael@0 333 fShortLength(0),
michael@0 334 fFlags(kShortString)
michael@0 335 {
michael@0 336 setTo(that, srcStart);
michael@0 337 }
michael@0 338
michael@0 339 UnicodeString::UnicodeString(const UnicodeString& that,
michael@0 340 int32_t srcStart,
michael@0 341 int32_t srcLength)
michael@0 342 : Replaceable(),
michael@0 343 fShortLength(0),
michael@0 344 fFlags(kShortString)
michael@0 345 {
michael@0 346 setTo(that, srcStart, srcLength);
michael@0 347 }
michael@0 348
michael@0 349 // Replaceable base class clone() default implementation, does not clone
michael@0 350 Replaceable *
michael@0 351 Replaceable::clone() const {
michael@0 352 return NULL;
michael@0 353 }
michael@0 354
michael@0 355 // UnicodeString overrides clone() with a real implementation
michael@0 356 Replaceable *
michael@0 357 UnicodeString::clone() const {
michael@0 358 return new UnicodeString(*this);
michael@0 359 }
michael@0 360
michael@0 361 //========================================
michael@0 362 // array allocation
michael@0 363 //========================================
michael@0 364
michael@0 365 UBool
michael@0 366 UnicodeString::allocate(int32_t capacity) {
michael@0 367 if(capacity <= US_STACKBUF_SIZE) {
michael@0 368 fFlags = kShortString;
michael@0 369 } else {
michael@0 370 // count bytes for the refCounter and the string capacity, and
michael@0 371 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
michael@0 372 // to be safely aligned for the refCount
michael@0 373 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
michael@0 374 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
michael@0 375 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
michael@0 376 if(array != 0) {
michael@0 377 // set initial refCount and point behind the refCount
michael@0 378 *array++ = 1;
michael@0 379
michael@0 380 // have fArray point to the first UChar
michael@0 381 fUnion.fFields.fArray = (UChar *)array;
michael@0 382 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
michael@0 383 fFlags = kLongString;
michael@0 384 } else {
michael@0 385 fShortLength = 0;
michael@0 386 fUnion.fFields.fArray = 0;
michael@0 387 fUnion.fFields.fCapacity = 0;
michael@0 388 fFlags = kIsBogus;
michael@0 389 return FALSE;
michael@0 390 }
michael@0 391 }
michael@0 392 return TRUE;
michael@0 393 }
michael@0 394
michael@0 395 //========================================
michael@0 396 // Destructor
michael@0 397 //========================================
michael@0 398 UnicodeString::~UnicodeString()
michael@0 399 {
michael@0 400 releaseArray();
michael@0 401 }
michael@0 402
michael@0 403 //========================================
michael@0 404 // Factory methods
michael@0 405 //========================================
michael@0 406
michael@0 407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
michael@0 408 UnicodeString result;
michael@0 409 result.setToUTF8(utf8);
michael@0 410 return result;
michael@0 411 }
michael@0 412
michael@0 413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
michael@0 414 UnicodeString result;
michael@0 415 int32_t capacity;
michael@0 416 // Most UTF-32 strings will be BMP-only and result in a same-length
michael@0 417 // UTF-16 string. We overestimate the capacity just slightly,
michael@0 418 // just in case there are a few supplementary characters.
michael@0 419 if(length <= US_STACKBUF_SIZE) {
michael@0 420 capacity = US_STACKBUF_SIZE;
michael@0 421 } else {
michael@0 422 capacity = length + (length >> 4) + 4;
michael@0 423 }
michael@0 424 do {
michael@0 425 UChar *utf16 = result.getBuffer(capacity);
michael@0 426 int32_t length16;
michael@0 427 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 428 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
michael@0 429 utf32, length,
michael@0 430 0xfffd, // Substitution character.
michael@0 431 NULL, // Don't care about number of substitutions.
michael@0 432 &errorCode);
michael@0 433 result.releaseBuffer(length16);
michael@0 434 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
michael@0 435 capacity = length16 + 1; // +1 for the terminating NUL.
michael@0 436 continue;
michael@0 437 } else if(U_FAILURE(errorCode)) {
michael@0 438 result.setToBogus();
michael@0 439 }
michael@0 440 break;
michael@0 441 } while(TRUE);
michael@0 442 return result;
michael@0 443 }
michael@0 444
michael@0 445 //========================================
michael@0 446 // Assignment
michael@0 447 //========================================
michael@0 448
michael@0 449 UnicodeString &
michael@0 450 UnicodeString::operator=(const UnicodeString &src) {
michael@0 451 return copyFrom(src);
michael@0 452 }
michael@0 453
michael@0 454 UnicodeString &
michael@0 455 UnicodeString::fastCopyFrom(const UnicodeString &src) {
michael@0 456 return copyFrom(src, TRUE);
michael@0 457 }
michael@0 458
michael@0 459 UnicodeString &
michael@0 460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
michael@0 461 // if assigning to ourselves, do nothing
michael@0 462 if(this == 0 || this == &src) {
michael@0 463 return *this;
michael@0 464 }
michael@0 465
michael@0 466 // is the right side bogus?
michael@0 467 if(&src == 0 || src.isBogus()) {
michael@0 468 setToBogus();
michael@0 469 return *this;
michael@0 470 }
michael@0 471
michael@0 472 // delete the current contents
michael@0 473 releaseArray();
michael@0 474
michael@0 475 if(src.isEmpty()) {
michael@0 476 // empty string - use the stack buffer
michael@0 477 setToEmpty();
michael@0 478 return *this;
michael@0 479 }
michael@0 480
michael@0 481 // we always copy the length
michael@0 482 int32_t srcLength = src.length();
michael@0 483 setLength(srcLength);
michael@0 484
michael@0 485 // fLength>0 and not an "open" src.getBuffer(minCapacity)
michael@0 486 switch(src.fFlags) {
michael@0 487 case kShortString:
michael@0 488 // short string using the stack buffer, do the same
michael@0 489 fFlags = kShortString;
michael@0 490 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
michael@0 491 break;
michael@0 492 case kLongString:
michael@0 493 // src uses a refCounted string buffer, use that buffer with refCount
michael@0 494 // src is const, use a cast - we don't really change it
michael@0 495 ((UnicodeString &)src).addRef();
michael@0 496 // copy all fields, share the reference-counted buffer
michael@0 497 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
michael@0 498 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
michael@0 499 fFlags = src.fFlags;
michael@0 500 break;
michael@0 501 case kReadonlyAlias:
michael@0 502 if(fastCopy) {
michael@0 503 // src is a readonly alias, do the same
michael@0 504 // -> maintain the readonly alias as such
michael@0 505 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
michael@0 506 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
michael@0 507 fFlags = src.fFlags;
michael@0 508 break;
michael@0 509 }
michael@0 510 // else if(!fastCopy) fall through to case kWritableAlias
michael@0 511 // -> allocate a new buffer and copy the contents
michael@0 512 case kWritableAlias:
michael@0 513 // src is a writable alias; we make a copy of that instead
michael@0 514 if(allocate(srcLength)) {
michael@0 515 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
michael@0 516 break;
michael@0 517 }
michael@0 518 // if there is not enough memory, then fall through to setting to bogus
michael@0 519 default:
michael@0 520 // if src is bogus, set ourselves to bogus
michael@0 521 // do not call setToBogus() here because fArray and fFlags are not consistent here
michael@0 522 fShortLength = 0;
michael@0 523 fUnion.fFields.fArray = 0;
michael@0 524 fUnion.fFields.fCapacity = 0;
michael@0 525 fFlags = kIsBogus;
michael@0 526 break;
michael@0 527 }
michael@0 528
michael@0 529 return *this;
michael@0 530 }
michael@0 531
michael@0 532 //========================================
michael@0 533 // Miscellaneous operations
michael@0 534 //========================================
michael@0 535
michael@0 536 UnicodeString UnicodeString::unescape() const {
michael@0 537 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
michael@0 538 const UChar *array = getBuffer();
michael@0 539 int32_t len = length();
michael@0 540 int32_t prev = 0;
michael@0 541 for (int32_t i=0;;) {
michael@0 542 if (i == len) {
michael@0 543 result.append(array, prev, len - prev);
michael@0 544 break;
michael@0 545 }
michael@0 546 if (array[i++] == 0x5C /*'\\'*/) {
michael@0 547 result.append(array, prev, (i - 1) - prev);
michael@0 548 UChar32 c = unescapeAt(i); // advances i
michael@0 549 if (c < 0) {
michael@0 550 result.remove(); // return empty string
michael@0 551 break; // invalid escape sequence
michael@0 552 }
michael@0 553 result.append(c);
michael@0 554 prev = i;
michael@0 555 }
michael@0 556 }
michael@0 557 return result;
michael@0 558 }
michael@0 559
michael@0 560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
michael@0 561 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
michael@0 562 }
michael@0 563
michael@0 564 //========================================
michael@0 565 // Read-only implementation
michael@0 566 //========================================
michael@0 567 UBool
michael@0 568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
michael@0 569 // Requires: this & text not bogus and have same lengths.
michael@0 570 // Byte-wise comparison works for equality regardless of endianness.
michael@0 571 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
michael@0 572 }
michael@0 573
michael@0 574 int8_t
michael@0 575 UnicodeString::doCompare( int32_t start,
michael@0 576 int32_t length,
michael@0 577 const UChar *srcChars,
michael@0 578 int32_t srcStart,
michael@0 579 int32_t srcLength) const
michael@0 580 {
michael@0 581 // compare illegal string values
michael@0 582 if(isBogus()) {
michael@0 583 return -1;
michael@0 584 }
michael@0 585
michael@0 586 // pin indices to legal values
michael@0 587 pinIndices(start, length);
michael@0 588
michael@0 589 if(srcChars == NULL) {
michael@0 590 // treat const UChar *srcChars==NULL as an empty string
michael@0 591 return length == 0 ? 0 : 1;
michael@0 592 }
michael@0 593
michael@0 594 // get the correct pointer
michael@0 595 const UChar *chars = getArrayStart();
michael@0 596
michael@0 597 chars += start;
michael@0 598 srcChars += srcStart;
michael@0 599
michael@0 600 int32_t minLength;
michael@0 601 int8_t lengthResult;
michael@0 602
michael@0 603 // get the srcLength if necessary
michael@0 604 if(srcLength < 0) {
michael@0 605 srcLength = u_strlen(srcChars + srcStart);
michael@0 606 }
michael@0 607
michael@0 608 // are we comparing different lengths?
michael@0 609 if(length != srcLength) {
michael@0 610 if(length < srcLength) {
michael@0 611 minLength = length;
michael@0 612 lengthResult = -1;
michael@0 613 } else {
michael@0 614 minLength = srcLength;
michael@0 615 lengthResult = 1;
michael@0 616 }
michael@0 617 } else {
michael@0 618 minLength = length;
michael@0 619 lengthResult = 0;
michael@0 620 }
michael@0 621
michael@0 622 /*
michael@0 623 * note that uprv_memcmp() returns an int but we return an int8_t;
michael@0 624 * we need to take care not to truncate the result -
michael@0 625 * one way to do this is to right-shift the value to
michael@0 626 * move the sign bit into the lower 8 bits and making sure that this
michael@0 627 * does not become 0 itself
michael@0 628 */
michael@0 629
michael@0 630 if(minLength > 0 && chars != srcChars) {
michael@0 631 int32_t result;
michael@0 632
michael@0 633 # if U_IS_BIG_ENDIAN
michael@0 634 // big-endian: byte comparison works
michael@0 635 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
michael@0 636 if(result != 0) {
michael@0 637 return (int8_t)(result >> 15 | 1);
michael@0 638 }
michael@0 639 # else
michael@0 640 // little-endian: compare UChar units
michael@0 641 do {
michael@0 642 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
michael@0 643 if(result != 0) {
michael@0 644 return (int8_t)(result >> 15 | 1);
michael@0 645 }
michael@0 646 } while(--minLength > 0);
michael@0 647 # endif
michael@0 648 }
michael@0 649 return lengthResult;
michael@0 650 }
michael@0 651
michael@0 652 /* String compare in code point order - doCompare() compares in code unit order. */
michael@0 653 int8_t
michael@0 654 UnicodeString::doCompareCodePointOrder(int32_t start,
michael@0 655 int32_t length,
michael@0 656 const UChar *srcChars,
michael@0 657 int32_t srcStart,
michael@0 658 int32_t srcLength) const
michael@0 659 {
michael@0 660 // compare illegal string values
michael@0 661 // treat const UChar *srcChars==NULL as an empty string
michael@0 662 if(isBogus()) {
michael@0 663 return -1;
michael@0 664 }
michael@0 665
michael@0 666 // pin indices to legal values
michael@0 667 pinIndices(start, length);
michael@0 668
michael@0 669 if(srcChars == NULL) {
michael@0 670 srcStart = srcLength = 0;
michael@0 671 }
michael@0 672
michael@0 673 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
michael@0 674 /* translate the 32-bit result into an 8-bit one */
michael@0 675 if(diff!=0) {
michael@0 676 return (int8_t)(diff >> 15 | 1);
michael@0 677 } else {
michael@0 678 return 0;
michael@0 679 }
michael@0 680 }
michael@0 681
michael@0 682 int32_t
michael@0 683 UnicodeString::getLength() const {
michael@0 684 return length();
michael@0 685 }
michael@0 686
michael@0 687 UChar
michael@0 688 UnicodeString::getCharAt(int32_t offset) const {
michael@0 689 return charAt(offset);
michael@0 690 }
michael@0 691
michael@0 692 UChar32
michael@0 693 UnicodeString::getChar32At(int32_t offset) const {
michael@0 694 return char32At(offset);
michael@0 695 }
michael@0 696
michael@0 697 UChar32
michael@0 698 UnicodeString::char32At(int32_t offset) const
michael@0 699 {
michael@0 700 int32_t len = length();
michael@0 701 if((uint32_t)offset < (uint32_t)len) {
michael@0 702 const UChar *array = getArrayStart();
michael@0 703 UChar32 c;
michael@0 704 U16_GET(array, 0, offset, len, c);
michael@0 705 return c;
michael@0 706 } else {
michael@0 707 return kInvalidUChar;
michael@0 708 }
michael@0 709 }
michael@0 710
michael@0 711 int32_t
michael@0 712 UnicodeString::getChar32Start(int32_t offset) const {
michael@0 713 if((uint32_t)offset < (uint32_t)length()) {
michael@0 714 const UChar *array = getArrayStart();
michael@0 715 U16_SET_CP_START(array, 0, offset);
michael@0 716 return offset;
michael@0 717 } else {
michael@0 718 return 0;
michael@0 719 }
michael@0 720 }
michael@0 721
michael@0 722 int32_t
michael@0 723 UnicodeString::getChar32Limit(int32_t offset) const {
michael@0 724 int32_t len = length();
michael@0 725 if((uint32_t)offset < (uint32_t)len) {
michael@0 726 const UChar *array = getArrayStart();
michael@0 727 U16_SET_CP_LIMIT(array, 0, offset, len);
michael@0 728 return offset;
michael@0 729 } else {
michael@0 730 return len;
michael@0 731 }
michael@0 732 }
michael@0 733
michael@0 734 int32_t
michael@0 735 UnicodeString::countChar32(int32_t start, int32_t length) const {
michael@0 736 pinIndices(start, length);
michael@0 737 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
michael@0 738 return u_countChar32(getArrayStart()+start, length);
michael@0 739 }
michael@0 740
michael@0 741 UBool
michael@0 742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
michael@0 743 pinIndices(start, length);
michael@0 744 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
michael@0 745 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
michael@0 746 }
michael@0 747
michael@0 748 int32_t
michael@0 749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
michael@0 750 // pin index
michael@0 751 int32_t len = length();
michael@0 752 if(index<0) {
michael@0 753 index=0;
michael@0 754 } else if(index>len) {
michael@0 755 index=len;
michael@0 756 }
michael@0 757
michael@0 758 const UChar *array = getArrayStart();
michael@0 759 if(delta>0) {
michael@0 760 U16_FWD_N(array, index, len, delta);
michael@0 761 } else {
michael@0 762 U16_BACK_N(array, 0, index, -delta);
michael@0 763 }
michael@0 764
michael@0 765 return index;
michael@0 766 }
michael@0 767
michael@0 768 void
michael@0 769 UnicodeString::doExtract(int32_t start,
michael@0 770 int32_t length,
michael@0 771 UChar *dst,
michael@0 772 int32_t dstStart) const
michael@0 773 {
michael@0 774 // pin indices to legal values
michael@0 775 pinIndices(start, length);
michael@0 776
michael@0 777 // do not copy anything if we alias dst itself
michael@0 778 const UChar *array = getArrayStart();
michael@0 779 if(array + start != dst + dstStart) {
michael@0 780 us_arrayCopy(array, start, dst, dstStart, length);
michael@0 781 }
michael@0 782 }
michael@0 783
michael@0 784 int32_t
michael@0 785 UnicodeString::extract(UChar *dest, int32_t destCapacity,
michael@0 786 UErrorCode &errorCode) const {
michael@0 787 int32_t len = length();
michael@0 788 if(U_SUCCESS(errorCode)) {
michael@0 789 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
michael@0 790 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 791 } else {
michael@0 792 const UChar *array = getArrayStart();
michael@0 793 if(len>0 && len<=destCapacity && array!=dest) {
michael@0 794 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
michael@0 795 }
michael@0 796 return u_terminateUChars(dest, destCapacity, len, &errorCode);
michael@0 797 }
michael@0 798 }
michael@0 799
michael@0 800 return len;
michael@0 801 }
michael@0 802
michael@0 803 int32_t
michael@0 804 UnicodeString::extract(int32_t start,
michael@0 805 int32_t length,
michael@0 806 char *target,
michael@0 807 int32_t targetCapacity,
michael@0 808 enum EInvariant) const
michael@0 809 {
michael@0 810 // if the arguments are illegal, then do nothing
michael@0 811 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
michael@0 812 return 0;
michael@0 813 }
michael@0 814
michael@0 815 // pin the indices to legal values
michael@0 816 pinIndices(start, length);
michael@0 817
michael@0 818 if(length <= targetCapacity) {
michael@0 819 u_UCharsToChars(getArrayStart() + start, target, length);
michael@0 820 }
michael@0 821 UErrorCode status = U_ZERO_ERROR;
michael@0 822 return u_terminateChars(target, targetCapacity, length, &status);
michael@0 823 }
michael@0 824
michael@0 825 UnicodeString
michael@0 826 UnicodeString::tempSubString(int32_t start, int32_t len) const {
michael@0 827 pinIndices(start, len);
michael@0 828 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
michael@0 829 if(array==NULL) {
michael@0 830 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
michael@0 831 len=-2; // bogus result string
michael@0 832 }
michael@0 833 return UnicodeString(FALSE, array + start, len);
michael@0 834 }
michael@0 835
michael@0 836 int32_t
michael@0 837 UnicodeString::toUTF8(int32_t start, int32_t len,
michael@0 838 char *target, int32_t capacity) const {
michael@0 839 pinIndices(start, len);
michael@0 840 int32_t length8;
michael@0 841 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 842 u_strToUTF8WithSub(target, capacity, &length8,
michael@0 843 getBuffer() + start, len,
michael@0 844 0xFFFD, // Standard substitution character.
michael@0 845 NULL, // Don't care about number of substitutions.
michael@0 846 &errorCode);
michael@0 847 return length8;
michael@0 848 }
michael@0 849
michael@0 850 #if U_CHARSET_IS_UTF8
michael@0 851
michael@0 852 int32_t
michael@0 853 UnicodeString::extract(int32_t start, int32_t len,
michael@0 854 char *target, uint32_t dstSize) const {
michael@0 855 // if the arguments are illegal, then do nothing
michael@0 856 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
michael@0 857 return 0;
michael@0 858 }
michael@0 859 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
michael@0 860 }
michael@0 861
michael@0 862 // else see unistr_cnv.cpp
michael@0 863 #endif
michael@0 864
michael@0 865 void
michael@0 866 UnicodeString::extractBetween(int32_t start,
michael@0 867 int32_t limit,
michael@0 868 UnicodeString& target) const {
michael@0 869 pinIndex(start);
michael@0 870 pinIndex(limit);
michael@0 871 doExtract(start, limit - start, target);
michael@0 872 }
michael@0 873
michael@0 874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
michael@0 875 // as many bytes as the source has UChars.
michael@0 876 // The "worst cases" are writing systems like Indic, Thai and CJK with
michael@0 877 // 3:1 bytes:UChars.
michael@0 878 void
michael@0 879 UnicodeString::toUTF8(ByteSink &sink) const {
michael@0 880 int32_t length16 = length();
michael@0 881 if(length16 != 0) {
michael@0 882 char stackBuffer[1024];
michael@0 883 int32_t capacity = (int32_t)sizeof(stackBuffer);
michael@0 884 UBool utf8IsOwned = FALSE;
michael@0 885 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
michael@0 886 3*length16,
michael@0 887 stackBuffer, capacity,
michael@0 888 &capacity);
michael@0 889 int32_t length8 = 0;
michael@0 890 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 891 u_strToUTF8WithSub(utf8, capacity, &length8,
michael@0 892 getBuffer(), length16,
michael@0 893 0xFFFD, // Standard substitution character.
michael@0 894 NULL, // Don't care about number of substitutions.
michael@0 895 &errorCode);
michael@0 896 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
michael@0 897 utf8 = (char *)uprv_malloc(length8);
michael@0 898 if(utf8 != NULL) {
michael@0 899 utf8IsOwned = TRUE;
michael@0 900 errorCode = U_ZERO_ERROR;
michael@0 901 u_strToUTF8WithSub(utf8, length8, &length8,
michael@0 902 getBuffer(), length16,
michael@0 903 0xFFFD, // Standard substitution character.
michael@0 904 NULL, // Don't care about number of substitutions.
michael@0 905 &errorCode);
michael@0 906 } else {
michael@0 907 errorCode = U_MEMORY_ALLOCATION_ERROR;
michael@0 908 }
michael@0 909 }
michael@0 910 if(U_SUCCESS(errorCode)) {
michael@0 911 sink.Append(utf8, length8);
michael@0 912 sink.Flush();
michael@0 913 }
michael@0 914 if(utf8IsOwned) {
michael@0 915 uprv_free(utf8);
michael@0 916 }
michael@0 917 }
michael@0 918 }
michael@0 919
michael@0 920 int32_t
michael@0 921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
michael@0 922 int32_t length32=0;
michael@0 923 if(U_SUCCESS(errorCode)) {
michael@0 924 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
michael@0 925 u_strToUTF32WithSub(utf32, capacity, &length32,
michael@0 926 getBuffer(), length(),
michael@0 927 0xfffd, // Substitution character.
michael@0 928 NULL, // Don't care about number of substitutions.
michael@0 929 &errorCode);
michael@0 930 }
michael@0 931 return length32;
michael@0 932 }
michael@0 933
michael@0 934 int32_t
michael@0 935 UnicodeString::indexOf(const UChar *srcChars,
michael@0 936 int32_t srcStart,
michael@0 937 int32_t srcLength,
michael@0 938 int32_t start,
michael@0 939 int32_t length) const
michael@0 940 {
michael@0 941 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
michael@0 942 return -1;
michael@0 943 }
michael@0 944
michael@0 945 // UnicodeString does not find empty substrings
michael@0 946 if(srcLength < 0 && srcChars[srcStart] == 0) {
michael@0 947 return -1;
michael@0 948 }
michael@0 949
michael@0 950 // get the indices within bounds
michael@0 951 pinIndices(start, length);
michael@0 952
michael@0 953 // find the first occurrence of the substring
michael@0 954 const UChar *array = getArrayStart();
michael@0 955 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
michael@0 956 if(match == NULL) {
michael@0 957 return -1;
michael@0 958 } else {
michael@0 959 return (int32_t)(match - array);
michael@0 960 }
michael@0 961 }
michael@0 962
michael@0 963 int32_t
michael@0 964 UnicodeString::doIndexOf(UChar c,
michael@0 965 int32_t start,
michael@0 966 int32_t length) const
michael@0 967 {
michael@0 968 // pin indices
michael@0 969 pinIndices(start, length);
michael@0 970
michael@0 971 // find the first occurrence of c
michael@0 972 const UChar *array = getArrayStart();
michael@0 973 const UChar *match = u_memchr(array + start, c, length);
michael@0 974 if(match == NULL) {
michael@0 975 return -1;
michael@0 976 } else {
michael@0 977 return (int32_t)(match - array);
michael@0 978 }
michael@0 979 }
michael@0 980
michael@0 981 int32_t
michael@0 982 UnicodeString::doIndexOf(UChar32 c,
michael@0 983 int32_t start,
michael@0 984 int32_t length) const {
michael@0 985 // pin indices
michael@0 986 pinIndices(start, length);
michael@0 987
michael@0 988 // find the first occurrence of c
michael@0 989 const UChar *array = getArrayStart();
michael@0 990 const UChar *match = u_memchr32(array + start, c, length);
michael@0 991 if(match == NULL) {
michael@0 992 return -1;
michael@0 993 } else {
michael@0 994 return (int32_t)(match - array);
michael@0 995 }
michael@0 996 }
michael@0 997
michael@0 998 int32_t
michael@0 999 UnicodeString::lastIndexOf(const UChar *srcChars,
michael@0 1000 int32_t srcStart,
michael@0 1001 int32_t srcLength,
michael@0 1002 int32_t start,
michael@0 1003 int32_t length) const
michael@0 1004 {
michael@0 1005 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
michael@0 1006 return -1;
michael@0 1007 }
michael@0 1008
michael@0 1009 // UnicodeString does not find empty substrings
michael@0 1010 if(srcLength < 0 && srcChars[srcStart] == 0) {
michael@0 1011 return -1;
michael@0 1012 }
michael@0 1013
michael@0 1014 // get the indices within bounds
michael@0 1015 pinIndices(start, length);
michael@0 1016
michael@0 1017 // find the last occurrence of the substring
michael@0 1018 const UChar *array = getArrayStart();
michael@0 1019 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
michael@0 1020 if(match == NULL) {
michael@0 1021 return -1;
michael@0 1022 } else {
michael@0 1023 return (int32_t)(match - array);
michael@0 1024 }
michael@0 1025 }
michael@0 1026
michael@0 1027 int32_t
michael@0 1028 UnicodeString::doLastIndexOf(UChar c,
michael@0 1029 int32_t start,
michael@0 1030 int32_t length) const
michael@0 1031 {
michael@0 1032 if(isBogus()) {
michael@0 1033 return -1;
michael@0 1034 }
michael@0 1035
michael@0 1036 // pin indices
michael@0 1037 pinIndices(start, length);
michael@0 1038
michael@0 1039 // find the last occurrence of c
michael@0 1040 const UChar *array = getArrayStart();
michael@0 1041 const UChar *match = u_memrchr(array + start, c, length);
michael@0 1042 if(match == NULL) {
michael@0 1043 return -1;
michael@0 1044 } else {
michael@0 1045 return (int32_t)(match - array);
michael@0 1046 }
michael@0 1047 }
michael@0 1048
michael@0 1049 int32_t
michael@0 1050 UnicodeString::doLastIndexOf(UChar32 c,
michael@0 1051 int32_t start,
michael@0 1052 int32_t length) const {
michael@0 1053 // pin indices
michael@0 1054 pinIndices(start, length);
michael@0 1055
michael@0 1056 // find the last occurrence of c
michael@0 1057 const UChar *array = getArrayStart();
michael@0 1058 const UChar *match = u_memrchr32(array + start, c, length);
michael@0 1059 if(match == NULL) {
michael@0 1060 return -1;
michael@0 1061 } else {
michael@0 1062 return (int32_t)(match - array);
michael@0 1063 }
michael@0 1064 }
michael@0 1065
michael@0 1066 //========================================
michael@0 1067 // Write implementation
michael@0 1068 //========================================
michael@0 1069
michael@0 1070 UnicodeString&
michael@0 1071 UnicodeString::findAndReplace(int32_t start,
michael@0 1072 int32_t length,
michael@0 1073 const UnicodeString& oldText,
michael@0 1074 int32_t oldStart,
michael@0 1075 int32_t oldLength,
michael@0 1076 const UnicodeString& newText,
michael@0 1077 int32_t newStart,
michael@0 1078 int32_t newLength)
michael@0 1079 {
michael@0 1080 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
michael@0 1081 return *this;
michael@0 1082 }
michael@0 1083
michael@0 1084 pinIndices(start, length);
michael@0 1085 oldText.pinIndices(oldStart, oldLength);
michael@0 1086 newText.pinIndices(newStart, newLength);
michael@0 1087
michael@0 1088 if(oldLength == 0) {
michael@0 1089 return *this;
michael@0 1090 }
michael@0 1091
michael@0 1092 while(length > 0 && length >= oldLength) {
michael@0 1093 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
michael@0 1094 if(pos < 0) {
michael@0 1095 // no more oldText's here: done
michael@0 1096 break;
michael@0 1097 } else {
michael@0 1098 // we found oldText, replace it by newText and go beyond it
michael@0 1099 replace(pos, oldLength, newText, newStart, newLength);
michael@0 1100 length -= pos + oldLength - start;
michael@0 1101 start = pos + newLength;
michael@0 1102 }
michael@0 1103 }
michael@0 1104
michael@0 1105 return *this;
michael@0 1106 }
michael@0 1107
michael@0 1108
michael@0 1109 void
michael@0 1110 UnicodeString::setToBogus()
michael@0 1111 {
michael@0 1112 releaseArray();
michael@0 1113
michael@0 1114 fShortLength = 0;
michael@0 1115 fUnion.fFields.fArray = 0;
michael@0 1116 fUnion.fFields.fCapacity = 0;
michael@0 1117 fFlags = kIsBogus;
michael@0 1118 }
michael@0 1119
michael@0 1120 // turn a bogus string into an empty one
michael@0 1121 void
michael@0 1122 UnicodeString::unBogus() {
michael@0 1123 if(fFlags & kIsBogus) {
michael@0 1124 setToEmpty();
michael@0 1125 }
michael@0 1126 }
michael@0 1127
michael@0 1128 const UChar *
michael@0 1129 UnicodeString::getTerminatedBuffer() {
michael@0 1130 if(!isWritable()) {
michael@0 1131 return 0;
michael@0 1132 }
michael@0 1133 UChar *array = getArrayStart();
michael@0 1134 int32_t len = length();
michael@0 1135 if(len < getCapacity()) {
michael@0 1136 if(fFlags & kBufferIsReadonly) {
michael@0 1137 // If len<capacity on a read-only alias, then array[len] is
michael@0 1138 // either the original NUL (if constructed with (TRUE, s, length))
michael@0 1139 // or one of the original string contents characters (if later truncated),
michael@0 1140 // therefore we can assume that array[len] is initialized memory.
michael@0 1141 if(array[len] == 0) {
michael@0 1142 return array;
michael@0 1143 }
michael@0 1144 } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
michael@0 1145 // kRefCounted: Do not write the NUL if the buffer is shared.
michael@0 1146 // That is mostly safe, except when the length of one copy was modified
michael@0 1147 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
michael@0 1148 // Then the NUL would be written into the middle of another copy's string.
michael@0 1149
michael@0 1150 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
michael@0 1151 // Do not test if there is a NUL already because it might be uninitialized memory.
michael@0 1152 // (That would be safe, but tools like valgrind & Purify would complain.)
michael@0 1153 array[len] = 0;
michael@0 1154 return array;
michael@0 1155 }
michael@0 1156 }
michael@0 1157 if(cloneArrayIfNeeded(len+1)) {
michael@0 1158 array = getArrayStart();
michael@0 1159 array[len] = 0;
michael@0 1160 return array;
michael@0 1161 } else {
michael@0 1162 return NULL;
michael@0 1163 }
michael@0 1164 }
michael@0 1165
michael@0 1166 // setTo() analogous to the readonly-aliasing constructor with the same signature
michael@0 1167 UnicodeString &
michael@0 1168 UnicodeString::setTo(UBool isTerminated,
michael@0 1169 const UChar *text,
michael@0 1170 int32_t textLength)
michael@0 1171 {
michael@0 1172 if(fFlags & kOpenGetBuffer) {
michael@0 1173 // do not modify a string that has an "open" getBuffer(minCapacity)
michael@0 1174 return *this;
michael@0 1175 }
michael@0 1176
michael@0 1177 if(text == NULL) {
michael@0 1178 // treat as an empty string, do not alias
michael@0 1179 releaseArray();
michael@0 1180 setToEmpty();
michael@0 1181 return *this;
michael@0 1182 }
michael@0 1183
michael@0 1184 if( textLength < -1 ||
michael@0 1185 (textLength == -1 && !isTerminated) ||
michael@0 1186 (textLength >= 0 && isTerminated && text[textLength] != 0)
michael@0 1187 ) {
michael@0 1188 setToBogus();
michael@0 1189 return *this;
michael@0 1190 }
michael@0 1191
michael@0 1192 releaseArray();
michael@0 1193
michael@0 1194 if(textLength == -1) {
michael@0 1195 // text is terminated, or else it would have failed the above test
michael@0 1196 textLength = u_strlen(text);
michael@0 1197 }
michael@0 1198 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
michael@0 1199
michael@0 1200 fFlags = kReadonlyAlias;
michael@0 1201 return *this;
michael@0 1202 }
michael@0 1203
michael@0 1204 // setTo() analogous to the writable-aliasing constructor with the same signature
michael@0 1205 UnicodeString &
michael@0 1206 UnicodeString::setTo(UChar *buffer,
michael@0 1207 int32_t buffLength,
michael@0 1208 int32_t buffCapacity) {
michael@0 1209 if(fFlags & kOpenGetBuffer) {
michael@0 1210 // do not modify a string that has an "open" getBuffer(minCapacity)
michael@0 1211 return *this;
michael@0 1212 }
michael@0 1213
michael@0 1214 if(buffer == NULL) {
michael@0 1215 // treat as an empty string, do not alias
michael@0 1216 releaseArray();
michael@0 1217 setToEmpty();
michael@0 1218 return *this;
michael@0 1219 }
michael@0 1220
michael@0 1221 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
michael@0 1222 setToBogus();
michael@0 1223 return *this;
michael@0 1224 } else if(buffLength == -1) {
michael@0 1225 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
michael@0 1226 const UChar *p = buffer, *limit = buffer + buffCapacity;
michael@0 1227 while(p != limit && *p != 0) {
michael@0 1228 ++p;
michael@0 1229 }
michael@0 1230 buffLength = (int32_t)(p - buffer);
michael@0 1231 }
michael@0 1232
michael@0 1233 releaseArray();
michael@0 1234
michael@0 1235 setArray(buffer, buffLength, buffCapacity);
michael@0 1236 fFlags = kWritableAlias;
michael@0 1237 return *this;
michael@0 1238 }
michael@0 1239
michael@0 1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
michael@0 1241 unBogus();
michael@0 1242 int32_t length = utf8.length();
michael@0 1243 int32_t capacity;
michael@0 1244 // The UTF-16 string will be at most as long as the UTF-8 string.
michael@0 1245 if(length <= US_STACKBUF_SIZE) {
michael@0 1246 capacity = US_STACKBUF_SIZE;
michael@0 1247 } else {
michael@0 1248 capacity = length + 1; // +1 for the terminating NUL.
michael@0 1249 }
michael@0 1250 UChar *utf16 = getBuffer(capacity);
michael@0 1251 int32_t length16;
michael@0 1252 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 1253 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
michael@0 1254 utf8.data(), length,
michael@0 1255 0xfffd, // Substitution character.
michael@0 1256 NULL, // Don't care about number of substitutions.
michael@0 1257 &errorCode);
michael@0 1258 releaseBuffer(length16);
michael@0 1259 if(U_FAILURE(errorCode)) {
michael@0 1260 setToBogus();
michael@0 1261 }
michael@0 1262 return *this;
michael@0 1263 }
michael@0 1264
michael@0 1265 UnicodeString&
michael@0 1266 UnicodeString::setCharAt(int32_t offset,
michael@0 1267 UChar c)
michael@0 1268 {
michael@0 1269 int32_t len = length();
michael@0 1270 if(cloneArrayIfNeeded() && len > 0) {
michael@0 1271 if(offset < 0) {
michael@0 1272 offset = 0;
michael@0 1273 } else if(offset >= len) {
michael@0 1274 offset = len - 1;
michael@0 1275 }
michael@0 1276
michael@0 1277 getArrayStart()[offset] = c;
michael@0 1278 }
michael@0 1279 return *this;
michael@0 1280 }
michael@0 1281
michael@0 1282 UnicodeString&
michael@0 1283 UnicodeString::replace(int32_t start,
michael@0 1284 int32_t _length,
michael@0 1285 UChar32 srcChar) {
michael@0 1286 UChar buffer[U16_MAX_LENGTH];
michael@0 1287 int32_t count = 0;
michael@0 1288 UBool isError = FALSE;
michael@0 1289 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
michael@0 1290 // We test isError so that the compiler does not complain that we don't.
michael@0 1291 // If isError (srcChar is not a valid code point) then count==0 which means
michael@0 1292 // we remove the source segment rather than replacing it with srcChar.
michael@0 1293 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
michael@0 1294 }
michael@0 1295
michael@0 1296 UnicodeString&
michael@0 1297 UnicodeString::append(UChar32 srcChar) {
michael@0 1298 UChar buffer[U16_MAX_LENGTH];
michael@0 1299 int32_t _length = 0;
michael@0 1300 UBool isError = FALSE;
michael@0 1301 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
michael@0 1302 // We test isError so that the compiler does not complain that we don't.
michael@0 1303 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
michael@0 1304 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
michael@0 1305 }
michael@0 1306
michael@0 1307 UnicodeString&
michael@0 1308 UnicodeString::doReplace( int32_t start,
michael@0 1309 int32_t length,
michael@0 1310 const UnicodeString& src,
michael@0 1311 int32_t srcStart,
michael@0 1312 int32_t srcLength)
michael@0 1313 {
michael@0 1314 if(!src.isBogus()) {
michael@0 1315 // pin the indices to legal values
michael@0 1316 src.pinIndices(srcStart, srcLength);
michael@0 1317
michael@0 1318 // get the characters from src
michael@0 1319 // and replace the range in ourselves with them
michael@0 1320 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
michael@0 1321 } else {
michael@0 1322 // remove the range
michael@0 1323 return doReplace(start, length, 0, 0, 0);
michael@0 1324 }
michael@0 1325 }
michael@0 1326
michael@0 1327 UnicodeString&
michael@0 1328 UnicodeString::doReplace(int32_t start,
michael@0 1329 int32_t length,
michael@0 1330 const UChar *srcChars,
michael@0 1331 int32_t srcStart,
michael@0 1332 int32_t srcLength)
michael@0 1333 {
michael@0 1334 if(!isWritable()) {
michael@0 1335 return *this;
michael@0 1336 }
michael@0 1337
michael@0 1338 int32_t oldLength = this->length();
michael@0 1339
michael@0 1340 // optimize (read-only alias).remove(0, start) and .remove(start, end)
michael@0 1341 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
michael@0 1342 if(start == 0) {
michael@0 1343 // remove prefix by adjusting the array pointer
michael@0 1344 pinIndex(length);
michael@0 1345 fUnion.fFields.fArray += length;
michael@0 1346 fUnion.fFields.fCapacity -= length;
michael@0 1347 setLength(oldLength - length);
michael@0 1348 return *this;
michael@0 1349 } else {
michael@0 1350 pinIndex(start);
michael@0 1351 if(length >= (oldLength - start)) {
michael@0 1352 // remove suffix by reducing the length (like truncate())
michael@0 1353 setLength(start);
michael@0 1354 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
michael@0 1355 return *this;
michael@0 1356 }
michael@0 1357 }
michael@0 1358 }
michael@0 1359
michael@0 1360 if(srcChars == 0) {
michael@0 1361 srcStart = srcLength = 0;
michael@0 1362 } else if(srcLength < 0) {
michael@0 1363 // get the srcLength if necessary
michael@0 1364 srcLength = u_strlen(srcChars + srcStart);
michael@0 1365 }
michael@0 1366
michael@0 1367 // calculate the size of the string after the replace
michael@0 1368 int32_t newLength;
michael@0 1369
michael@0 1370 // optimize append() onto a large-enough, owned string
michael@0 1371 if(start >= oldLength) {
michael@0 1372 if(srcLength == 0) {
michael@0 1373 return *this;
michael@0 1374 }
michael@0 1375 newLength = oldLength + srcLength;
michael@0 1376 if(newLength <= getCapacity() && isBufferWritable()) {
michael@0 1377 UChar *oldArray = getArrayStart();
michael@0 1378 // Do not copy characters when
michael@0 1379 // UChar *buffer=str.getAppendBuffer(...);
michael@0 1380 // is followed by
michael@0 1381 // str.append(buffer, length);
michael@0 1382 // or
michael@0 1383 // str.appendString(buffer, length)
michael@0 1384 // or similar.
michael@0 1385 if(srcChars + srcStart != oldArray + start || start > oldLength) {
michael@0 1386 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
michael@0 1387 }
michael@0 1388 setLength(newLength);
michael@0 1389 return *this;
michael@0 1390 } else {
michael@0 1391 // pin the indices to legal values
michael@0 1392 start = oldLength;
michael@0 1393 length = 0;
michael@0 1394 }
michael@0 1395 } else {
michael@0 1396 // pin the indices to legal values
michael@0 1397 pinIndices(start, length);
michael@0 1398
michael@0 1399 newLength = oldLength - length + srcLength;
michael@0 1400 }
michael@0 1401
michael@0 1402 // the following may change fArray but will not copy the current contents;
michael@0 1403 // therefore we need to keep the current fArray
michael@0 1404 UChar oldStackBuffer[US_STACKBUF_SIZE];
michael@0 1405 UChar *oldArray;
michael@0 1406 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
michael@0 1407 // copy the stack buffer contents because it will be overwritten with
michael@0 1408 // fUnion.fFields values
michael@0 1409 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
michael@0 1410 oldArray = oldStackBuffer;
michael@0 1411 } else {
michael@0 1412 oldArray = getArrayStart();
michael@0 1413 }
michael@0 1414
michael@0 1415 // clone our array and allocate a bigger array if needed
michael@0 1416 int32_t *bufferToDelete = 0;
michael@0 1417 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
michael@0 1418 FALSE, &bufferToDelete)
michael@0 1419 ) {
michael@0 1420 return *this;
michael@0 1421 }
michael@0 1422
michael@0 1423 // now do the replace
michael@0 1424
michael@0 1425 UChar *newArray = getArrayStart();
michael@0 1426 if(newArray != oldArray) {
michael@0 1427 // if fArray changed, then we need to copy everything except what will change
michael@0 1428 us_arrayCopy(oldArray, 0, newArray, 0, start);
michael@0 1429 us_arrayCopy(oldArray, start + length,
michael@0 1430 newArray, start + srcLength,
michael@0 1431 oldLength - (start + length));
michael@0 1432 } else if(length != srcLength) {
michael@0 1433 // fArray did not change; copy only the portion that isn't changing, leaving a hole
michael@0 1434 us_arrayCopy(oldArray, start + length,
michael@0 1435 newArray, start + srcLength,
michael@0 1436 oldLength - (start + length));
michael@0 1437 }
michael@0 1438
michael@0 1439 // now fill in the hole with the new string
michael@0 1440 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
michael@0 1441
michael@0 1442 setLength(newLength);
michael@0 1443
michael@0 1444 // delayed delete in case srcChars == fArray when we started, and
michael@0 1445 // to keep oldArray alive for the above operations
michael@0 1446 if (bufferToDelete) {
michael@0 1447 uprv_free(bufferToDelete);
michael@0 1448 }
michael@0 1449
michael@0 1450 return *this;
michael@0 1451 }
michael@0 1452
michael@0 1453 /**
michael@0 1454 * Replaceable API
michael@0 1455 */
michael@0 1456 void
michael@0 1457 UnicodeString::handleReplaceBetween(int32_t start,
michael@0 1458 int32_t limit,
michael@0 1459 const UnicodeString& text) {
michael@0 1460 replaceBetween(start, limit, text);
michael@0 1461 }
michael@0 1462
michael@0 1463 /**
michael@0 1464 * Replaceable API
michael@0 1465 */
michael@0 1466 void
michael@0 1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
michael@0 1468 if (limit <= start) {
michael@0 1469 return; // Nothing to do; avoid bogus malloc call
michael@0 1470 }
michael@0 1471 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
michael@0 1472 // Check to make sure text is not null.
michael@0 1473 if (text != NULL) {
michael@0 1474 extractBetween(start, limit, text, 0);
michael@0 1475 insert(dest, text, 0, limit - start);
michael@0 1476 uprv_free(text);
michael@0 1477 }
michael@0 1478 }
michael@0 1479
michael@0 1480 /**
michael@0 1481 * Replaceable API
michael@0 1482 *
michael@0 1483 * NOTE: This is for the Replaceable class. There is no rep.cpp,
michael@0 1484 * so we implement this function here.
michael@0 1485 */
michael@0 1486 UBool Replaceable::hasMetaData() const {
michael@0 1487 return TRUE;
michael@0 1488 }
michael@0 1489
michael@0 1490 /**
michael@0 1491 * Replaceable API
michael@0 1492 */
michael@0 1493 UBool UnicodeString::hasMetaData() const {
michael@0 1494 return FALSE;
michael@0 1495 }
michael@0 1496
michael@0 1497 UnicodeString&
michael@0 1498 UnicodeString::doReverse(int32_t start, int32_t length) {
michael@0 1499 if(length <= 1 || !cloneArrayIfNeeded()) {
michael@0 1500 return *this;
michael@0 1501 }
michael@0 1502
michael@0 1503 // pin the indices to legal values
michael@0 1504 pinIndices(start, length);
michael@0 1505 if(length <= 1) { // pinIndices() might have shrunk the length
michael@0 1506 return *this;
michael@0 1507 }
michael@0 1508
michael@0 1509 UChar *left = getArrayStart() + start;
michael@0 1510 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
michael@0 1511 UChar swap;
michael@0 1512 UBool hasSupplementary = FALSE;
michael@0 1513
michael@0 1514 // Before the loop we know left<right because length>=2.
michael@0 1515 do {
michael@0 1516 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
michael@0 1517 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
michael@0 1518 *right-- = swap;
michael@0 1519 } while(left < right);
michael@0 1520 // Make sure to test the middle code unit of an odd-length string.
michael@0 1521 // Redundant if the length is even.
michael@0 1522 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
michael@0 1523
michael@0 1524 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
michael@0 1525 if(hasSupplementary) {
michael@0 1526 UChar swap2;
michael@0 1527
michael@0 1528 left = getArrayStart() + start;
michael@0 1529 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
michael@0 1530 while(left < right) {
michael@0 1531 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
michael@0 1532 *left++ = swap2;
michael@0 1533 *left++ = swap;
michael@0 1534 } else {
michael@0 1535 ++left;
michael@0 1536 }
michael@0 1537 }
michael@0 1538 }
michael@0 1539
michael@0 1540 return *this;
michael@0 1541 }
michael@0 1542
michael@0 1543 UBool
michael@0 1544 UnicodeString::padLeading(int32_t targetLength,
michael@0 1545 UChar padChar)
michael@0 1546 {
michael@0 1547 int32_t oldLength = length();
michael@0 1548 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
michael@0 1549 return FALSE;
michael@0 1550 } else {
michael@0 1551 // move contents up by padding width
michael@0 1552 UChar *array = getArrayStart();
michael@0 1553 int32_t start = targetLength - oldLength;
michael@0 1554 us_arrayCopy(array, 0, array, start, oldLength);
michael@0 1555
michael@0 1556 // fill in padding character
michael@0 1557 while(--start >= 0) {
michael@0 1558 array[start] = padChar;
michael@0 1559 }
michael@0 1560 setLength(targetLength);
michael@0 1561 return TRUE;
michael@0 1562 }
michael@0 1563 }
michael@0 1564
michael@0 1565 UBool
michael@0 1566 UnicodeString::padTrailing(int32_t targetLength,
michael@0 1567 UChar padChar)
michael@0 1568 {
michael@0 1569 int32_t oldLength = length();
michael@0 1570 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
michael@0 1571 return FALSE;
michael@0 1572 } else {
michael@0 1573 // fill in padding character
michael@0 1574 UChar *array = getArrayStart();
michael@0 1575 int32_t length = targetLength;
michael@0 1576 while(--length >= oldLength) {
michael@0 1577 array[length] = padChar;
michael@0 1578 }
michael@0 1579 setLength(targetLength);
michael@0 1580 return TRUE;
michael@0 1581 }
michael@0 1582 }
michael@0 1583
michael@0 1584 //========================================
michael@0 1585 // Hashing
michael@0 1586 //========================================
michael@0 1587 int32_t
michael@0 1588 UnicodeString::doHashCode() const
michael@0 1589 {
michael@0 1590 /* Delegate hash computation to uhash. This makes UnicodeString
michael@0 1591 * hashing consistent with UChar* hashing. */
michael@0 1592 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
michael@0 1593 if (hashCode == kInvalidHashCode) {
michael@0 1594 hashCode = kEmptyHashCode;
michael@0 1595 }
michael@0 1596 return hashCode;
michael@0 1597 }
michael@0 1598
michael@0 1599 //========================================
michael@0 1600 // External Buffer
michael@0 1601 //========================================
michael@0 1602
michael@0 1603 UChar *
michael@0 1604 UnicodeString::getBuffer(int32_t minCapacity) {
michael@0 1605 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
michael@0 1606 fFlags|=kOpenGetBuffer;
michael@0 1607 fShortLength=0;
michael@0 1608 return getArrayStart();
michael@0 1609 } else {
michael@0 1610 return 0;
michael@0 1611 }
michael@0 1612 }
michael@0 1613
michael@0 1614 void
michael@0 1615 UnicodeString::releaseBuffer(int32_t newLength) {
michael@0 1616 if(fFlags&kOpenGetBuffer && newLength>=-1) {
michael@0 1617 // set the new fLength
michael@0 1618 int32_t capacity=getCapacity();
michael@0 1619 if(newLength==-1) {
michael@0 1620 // the new length is the string length, capped by fCapacity
michael@0 1621 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
michael@0 1622 while(p<limit && *p!=0) {
michael@0 1623 ++p;
michael@0 1624 }
michael@0 1625 newLength=(int32_t)(p-array);
michael@0 1626 } else if(newLength>capacity) {
michael@0 1627 newLength=capacity;
michael@0 1628 }
michael@0 1629 setLength(newLength);
michael@0 1630 fFlags&=~kOpenGetBuffer;
michael@0 1631 }
michael@0 1632 }
michael@0 1633
michael@0 1634 //========================================
michael@0 1635 // Miscellaneous
michael@0 1636 //========================================
michael@0 1637 UBool
michael@0 1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
michael@0 1639 int32_t growCapacity,
michael@0 1640 UBool doCopyArray,
michael@0 1641 int32_t **pBufferToDelete,
michael@0 1642 UBool forceClone) {
michael@0 1643 // default parameters need to be static, therefore
michael@0 1644 // the defaults are -1 to have convenience defaults
michael@0 1645 if(newCapacity == -1) {
michael@0 1646 newCapacity = getCapacity();
michael@0 1647 }
michael@0 1648
michael@0 1649 // while a getBuffer(minCapacity) is "open",
michael@0 1650 // prevent any modifications of the string by returning FALSE here
michael@0 1651 // if the string is bogus, then only an assignment or similar can revive it
michael@0 1652 if(!isWritable()) {
michael@0 1653 return FALSE;
michael@0 1654 }
michael@0 1655
michael@0 1656 /*
michael@0 1657 * We need to make a copy of the array if
michael@0 1658 * the buffer is read-only, or
michael@0 1659 * the buffer is refCounted (shared), and refCount>1, or
michael@0 1660 * the buffer is too small.
michael@0 1661 * Return FALSE if memory could not be allocated.
michael@0 1662 */
michael@0 1663 if(forceClone ||
michael@0 1664 fFlags & kBufferIsReadonly ||
michael@0 1665 (fFlags & kRefCounted && refCount() > 1) ||
michael@0 1666 newCapacity > getCapacity()
michael@0 1667 ) {
michael@0 1668 // check growCapacity for default value and use of the stack buffer
michael@0 1669 if(growCapacity < 0) {
michael@0 1670 growCapacity = newCapacity;
michael@0 1671 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
michael@0 1672 growCapacity = US_STACKBUF_SIZE;
michael@0 1673 }
michael@0 1674
michael@0 1675 // save old values
michael@0 1676 UChar oldStackBuffer[US_STACKBUF_SIZE];
michael@0 1677 UChar *oldArray;
michael@0 1678 uint8_t flags = fFlags;
michael@0 1679
michael@0 1680 if(flags&kUsingStackBuffer) {
michael@0 1681 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
michael@0 1682 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
michael@0 1683 // copy the stack buffer contents because it will be overwritten with
michael@0 1684 // fUnion.fFields values
michael@0 1685 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
michael@0 1686 oldArray = oldStackBuffer;
michael@0 1687 } else {
michael@0 1688 oldArray = 0; // no need to copy from stack buffer to itself
michael@0 1689 }
michael@0 1690 } else {
michael@0 1691 oldArray = fUnion.fFields.fArray;
michael@0 1692 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
michael@0 1693 }
michael@0 1694
michael@0 1695 // allocate a new array
michael@0 1696 if(allocate(growCapacity) ||
michael@0 1697 (newCapacity < growCapacity && allocate(newCapacity))
michael@0 1698 ) {
michael@0 1699 if(doCopyArray && oldArray != 0) {
michael@0 1700 // copy the contents
michael@0 1701 // do not copy more than what fits - it may be smaller than before
michael@0 1702 int32_t minLength = length();
michael@0 1703 newCapacity = getCapacity();
michael@0 1704 if(newCapacity < minLength) {
michael@0 1705 minLength = newCapacity;
michael@0 1706 setLength(minLength);
michael@0 1707 }
michael@0 1708 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
michael@0 1709 } else {
michael@0 1710 fShortLength = 0;
michael@0 1711 }
michael@0 1712
michael@0 1713 // release the old array
michael@0 1714 if(flags & kRefCounted) {
michael@0 1715 // the array is refCounted; decrement and release if 0
michael@0 1716 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
michael@0 1717 if(umtx_atomic_dec(pRefCount) == 0) {
michael@0 1718 if(pBufferToDelete == 0) {
michael@0 1719 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
michael@0 1720 // is defined as volatile. (Volatile has useful non-standard behavior
michael@0 1721 // with this compiler.)
michael@0 1722 uprv_free((void *)pRefCount);
michael@0 1723 } else {
michael@0 1724 // the caller requested to delete it himself
michael@0 1725 *pBufferToDelete = (int32_t *)pRefCount;
michael@0 1726 }
michael@0 1727 }
michael@0 1728 }
michael@0 1729 } else {
michael@0 1730 // not enough memory for growCapacity and not even for the smaller newCapacity
michael@0 1731 // reset the old values for setToBogus() to release the array
michael@0 1732 if(!(flags&kUsingStackBuffer)) {
michael@0 1733 fUnion.fFields.fArray = oldArray;
michael@0 1734 }
michael@0 1735 fFlags = flags;
michael@0 1736 setToBogus();
michael@0 1737 return FALSE;
michael@0 1738 }
michael@0 1739 }
michael@0 1740 return TRUE;
michael@0 1741 }
michael@0 1742
michael@0 1743 // UnicodeStringAppendable ------------------------------------------------- ***
michael@0 1744
michael@0 1745 UnicodeStringAppendable::~UnicodeStringAppendable() {}
michael@0 1746
michael@0 1747 UBool
michael@0 1748 UnicodeStringAppendable::appendCodeUnit(UChar c) {
michael@0 1749 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
michael@0 1750 }
michael@0 1751
michael@0 1752 UBool
michael@0 1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
michael@0 1754 UChar buffer[U16_MAX_LENGTH];
michael@0 1755 int32_t cLength = 0;
michael@0 1756 UBool isError = FALSE;
michael@0 1757 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
michael@0 1758 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
michael@0 1759 }
michael@0 1760
michael@0 1761 UBool
michael@0 1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
michael@0 1763 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
michael@0 1764 }
michael@0 1765
michael@0 1766 UBool
michael@0 1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
michael@0 1768 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
michael@0 1769 }
michael@0 1770
michael@0 1771 UChar *
michael@0 1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
michael@0 1773 int32_t desiredCapacityHint,
michael@0 1774 UChar *scratch, int32_t scratchCapacity,
michael@0 1775 int32_t *resultCapacity) {
michael@0 1776 if(minCapacity < 1 || scratchCapacity < minCapacity) {
michael@0 1777 *resultCapacity = 0;
michael@0 1778 return NULL;
michael@0 1779 }
michael@0 1780 int32_t oldLength = str.length();
michael@0 1781 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
michael@0 1782 *resultCapacity = str.getCapacity() - oldLength;
michael@0 1783 return str.getArrayStart() + oldLength;
michael@0 1784 }
michael@0 1785 *resultCapacity = scratchCapacity;
michael@0 1786 return scratch;
michael@0 1787 }
michael@0 1788
michael@0 1789 U_NAMESPACE_END
michael@0 1790
michael@0 1791 U_NAMESPACE_USE
michael@0 1792
michael@0 1793 U_CAPI int32_t U_EXPORT2
michael@0 1794 uhash_hashUnicodeString(const UElement key) {
michael@0 1795 const UnicodeString *str = (const UnicodeString*) key.pointer;
michael@0 1796 return (str == NULL) ? 0 : str->hashCode();
michael@0 1797 }
michael@0 1798
michael@0 1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
michael@0 1800 // does not depend on hashtable code.
michael@0 1801 U_CAPI UBool U_EXPORT2
michael@0 1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
michael@0 1803 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
michael@0 1804 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
michael@0 1805 if (str1 == str2) {
michael@0 1806 return TRUE;
michael@0 1807 }
michael@0 1808 if (str1 == NULL || str2 == NULL) {
michael@0 1809 return FALSE;
michael@0 1810 }
michael@0 1811 return *str1 == *str2;
michael@0 1812 }
michael@0 1813
michael@0 1814 #ifdef U_STATIC_IMPLEMENTATION
michael@0 1815 /*
michael@0 1816 This should never be called. It is defined here to make sure that the
michael@0 1817 virtual vector deleting destructor is defined within unistr.cpp.
michael@0 1818 The vector deleting destructor is already a part of UObject,
michael@0 1819 but defining it here makes sure that it is included with this object file.
michael@0 1820 This makes sure that static library dependencies are kept to a minimum.
michael@0 1821 */
michael@0 1822 static void uprv_UnicodeStringDummy(void) {
michael@0 1823 delete [] (new UnicodeString[2]);
michael@0 1824 }
michael@0 1825 #endif

mercurial