Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
30 #include "uelement.h"
31 #include "ustr_imp.h"
32 #include "umutex.h"
33 #include "uassert.h"
35 #if 0
37 #include <iostream>
38 using namespace std;
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
76 // Local function definitions for now
78 // need to copy areas that may overlap
79 static
80 inline void
81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((icu::UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
97 U_NAMESPACE_BEGIN
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
102 Replaceable::~Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
119 void
120 UnicodeString::addRef() {
121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
122 }
124 int32_t
125 UnicodeString::removeRef() {
126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
127 }
129 int32_t
130 UnicodeString::refCount() const {
131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
132 }
134 void
135 UnicodeString::releaseArray() {
136 if((fFlags & kRefCounted) && removeRef() == 0) {
137 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
138 }
139 }
143 //========================================
144 // Constructors
145 //========================================
147 // The default constructor is inline in unistr.h.
149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
150 : fShortLength(0),
151 fFlags(0)
152 {
153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
154 // just allocate and do not do anything else
155 allocate(capacity);
156 } else {
157 // count > 0, allocate and fill the new string with count c's
158 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
159 if(capacity < length) {
160 capacity = length;
161 }
162 if(allocate(capacity)) {
163 UChar *array = getArrayStart();
164 int32_t i = 0;
166 // fill the new string with c
167 if(unitCount == 1) {
168 // fill with length UChars
169 while(i < length) {
170 array[i++] = (UChar)c;
171 }
172 } else {
173 // get the code units for c
174 UChar units[U16_MAX_LENGTH];
175 U16_APPEND_UNSAFE(units, i, c);
177 // now it must be i==unitCount
178 i = 0;
180 // for Unicode, unitCount can only be 1, 2, 3, or 4
181 // 1 is handled above
182 while(i < length) {
183 int32_t unitIdx = 0;
184 while(unitIdx < unitCount) {
185 array[i++]=units[unitIdx++];
186 }
187 }
188 }
189 }
190 setLength(length);
191 }
192 }
194 UnicodeString::UnicodeString(UChar ch)
195 : fShortLength(1),
196 fFlags(kShortString)
197 {
198 fUnion.fStackBuffer[0] = ch;
199 }
201 UnicodeString::UnicodeString(UChar32 ch)
202 : fShortLength(0),
203 fFlags(kShortString)
204 {
205 int32_t i = 0;
206 UBool isError = FALSE;
207 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
208 // We test isError so that the compiler does not complain that we don't.
209 // If isError then i==0 which is what we want anyway.
210 if(!isError) {
211 fShortLength = (int8_t)i;
212 }
213 }
215 UnicodeString::UnicodeString(const UChar *text)
216 : fShortLength(0),
217 fFlags(kShortString)
218 {
219 doReplace(0, 0, text, 0, -1);
220 }
222 UnicodeString::UnicodeString(const UChar *text,
223 int32_t textLength)
224 : fShortLength(0),
225 fFlags(kShortString)
226 {
227 doReplace(0, 0, text, 0, textLength);
228 }
230 UnicodeString::UnicodeString(UBool isTerminated,
231 const UChar *text,
232 int32_t textLength)
233 : fShortLength(0),
234 fFlags(kReadonlyAlias)
235 {
236 if(text == NULL) {
237 // treat as an empty string, do not alias
238 setToEmpty();
239 } else if(textLength < -1 ||
240 (textLength == -1 && !isTerminated) ||
241 (textLength >= 0 && isTerminated && text[textLength] != 0)
242 ) {
243 setToBogus();
244 } else {
245 if(textLength == -1) {
246 // text is terminated, or else it would have failed the above test
247 textLength = u_strlen(text);
248 }
249 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
250 }
251 }
253 UnicodeString::UnicodeString(UChar *buff,
254 int32_t buffLength,
255 int32_t buffCapacity)
256 : fShortLength(0),
257 fFlags(kWritableAlias)
258 {
259 if(buff == NULL) {
260 // treat as an empty string, do not alias
261 setToEmpty();
262 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
263 setToBogus();
264 } else {
265 if(buffLength == -1) {
266 // fLength = u_strlen(buff); but do not look beyond buffCapacity
267 const UChar *p = buff, *limit = buff + buffCapacity;
268 while(p != limit && *p != 0) {
269 ++p;
270 }
271 buffLength = (int32_t)(p - buff);
272 }
273 setArray(buff, buffLength, buffCapacity);
274 }
275 }
277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
278 : fShortLength(0),
279 fFlags(kShortString)
280 {
281 if(src==NULL) {
282 // treat as an empty string
283 } else {
284 if(length<0) {
285 length=(int32_t)uprv_strlen(src);
286 }
287 if(cloneArrayIfNeeded(length, length, FALSE)) {
288 u_charsToUChars(src, getArrayStart(), length);
289 setLength(length);
290 } else {
291 setToBogus();
292 }
293 }
294 }
296 #if U_CHARSET_IS_UTF8
298 UnicodeString::UnicodeString(const char *codepageData)
299 : fShortLength(0),
300 fFlags(kShortString) {
301 if(codepageData != 0) {
302 setToUTF8(codepageData);
303 }
304 }
306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
307 : fShortLength(0),
308 fFlags(kShortString) {
309 // if there's nothing to convert, do nothing
310 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
311 return;
312 }
313 if(dataLength == -1) {
314 dataLength = (int32_t)uprv_strlen(codepageData);
315 }
316 setToUTF8(StringPiece(codepageData, dataLength));
317 }
319 // else see unistr_cnv.cpp
320 #endif
322 UnicodeString::UnicodeString(const UnicodeString& that)
323 : Replaceable(),
324 fShortLength(0),
325 fFlags(kShortString)
326 {
327 copyFrom(that);
328 }
330 UnicodeString::UnicodeString(const UnicodeString& that,
331 int32_t srcStart)
332 : Replaceable(),
333 fShortLength(0),
334 fFlags(kShortString)
335 {
336 setTo(that, srcStart);
337 }
339 UnicodeString::UnicodeString(const UnicodeString& that,
340 int32_t srcStart,
341 int32_t srcLength)
342 : Replaceable(),
343 fShortLength(0),
344 fFlags(kShortString)
345 {
346 setTo(that, srcStart, srcLength);
347 }
349 // Replaceable base class clone() default implementation, does not clone
350 Replaceable *
351 Replaceable::clone() const {
352 return NULL;
353 }
355 // UnicodeString overrides clone() with a real implementation
356 Replaceable *
357 UnicodeString::clone() const {
358 return new UnicodeString(*this);
359 }
361 //========================================
362 // array allocation
363 //========================================
365 UBool
366 UnicodeString::allocate(int32_t capacity) {
367 if(capacity <= US_STACKBUF_SIZE) {
368 fFlags = kShortString;
369 } else {
370 // count bytes for the refCounter and the string capacity, and
371 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
372 // to be safely aligned for the refCount
373 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
374 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
375 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
376 if(array != 0) {
377 // set initial refCount and point behind the refCount
378 *array++ = 1;
380 // have fArray point to the first UChar
381 fUnion.fFields.fArray = (UChar *)array;
382 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
383 fFlags = kLongString;
384 } else {
385 fShortLength = 0;
386 fUnion.fFields.fArray = 0;
387 fUnion.fFields.fCapacity = 0;
388 fFlags = kIsBogus;
389 return FALSE;
390 }
391 }
392 return TRUE;
393 }
395 //========================================
396 // Destructor
397 //========================================
398 UnicodeString::~UnicodeString()
399 {
400 releaseArray();
401 }
403 //========================================
404 // Factory methods
405 //========================================
407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
408 UnicodeString result;
409 result.setToUTF8(utf8);
410 return result;
411 }
413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
414 UnicodeString result;
415 int32_t capacity;
416 // Most UTF-32 strings will be BMP-only and result in a same-length
417 // UTF-16 string. We overestimate the capacity just slightly,
418 // just in case there are a few supplementary characters.
419 if(length <= US_STACKBUF_SIZE) {
420 capacity = US_STACKBUF_SIZE;
421 } else {
422 capacity = length + (length >> 4) + 4;
423 }
424 do {
425 UChar *utf16 = result.getBuffer(capacity);
426 int32_t length16;
427 UErrorCode errorCode = U_ZERO_ERROR;
428 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
429 utf32, length,
430 0xfffd, // Substitution character.
431 NULL, // Don't care about number of substitutions.
432 &errorCode);
433 result.releaseBuffer(length16);
434 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
435 capacity = length16 + 1; // +1 for the terminating NUL.
436 continue;
437 } else if(U_FAILURE(errorCode)) {
438 result.setToBogus();
439 }
440 break;
441 } while(TRUE);
442 return result;
443 }
445 //========================================
446 // Assignment
447 //========================================
449 UnicodeString &
450 UnicodeString::operator=(const UnicodeString &src) {
451 return copyFrom(src);
452 }
454 UnicodeString &
455 UnicodeString::fastCopyFrom(const UnicodeString &src) {
456 return copyFrom(src, TRUE);
457 }
459 UnicodeString &
460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
461 // if assigning to ourselves, do nothing
462 if(this == 0 || this == &src) {
463 return *this;
464 }
466 // is the right side bogus?
467 if(&src == 0 || src.isBogus()) {
468 setToBogus();
469 return *this;
470 }
472 // delete the current contents
473 releaseArray();
475 if(src.isEmpty()) {
476 // empty string - use the stack buffer
477 setToEmpty();
478 return *this;
479 }
481 // we always copy the length
482 int32_t srcLength = src.length();
483 setLength(srcLength);
485 // fLength>0 and not an "open" src.getBuffer(minCapacity)
486 switch(src.fFlags) {
487 case kShortString:
488 // short string using the stack buffer, do the same
489 fFlags = kShortString;
490 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
491 break;
492 case kLongString:
493 // src uses a refCounted string buffer, use that buffer with refCount
494 // src is const, use a cast - we don't really change it
495 ((UnicodeString &)src).addRef();
496 // copy all fields, share the reference-counted buffer
497 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
498 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
499 fFlags = src.fFlags;
500 break;
501 case kReadonlyAlias:
502 if(fastCopy) {
503 // src is a readonly alias, do the same
504 // -> maintain the readonly alias as such
505 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
506 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
507 fFlags = src.fFlags;
508 break;
509 }
510 // else if(!fastCopy) fall through to case kWritableAlias
511 // -> allocate a new buffer and copy the contents
512 case kWritableAlias:
513 // src is a writable alias; we make a copy of that instead
514 if(allocate(srcLength)) {
515 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
516 break;
517 }
518 // if there is not enough memory, then fall through to setting to bogus
519 default:
520 // if src is bogus, set ourselves to bogus
521 // do not call setToBogus() here because fArray and fFlags are not consistent here
522 fShortLength = 0;
523 fUnion.fFields.fArray = 0;
524 fUnion.fFields.fCapacity = 0;
525 fFlags = kIsBogus;
526 break;
527 }
529 return *this;
530 }
532 //========================================
533 // Miscellaneous operations
534 //========================================
536 UnicodeString UnicodeString::unescape() const {
537 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
538 const UChar *array = getBuffer();
539 int32_t len = length();
540 int32_t prev = 0;
541 for (int32_t i=0;;) {
542 if (i == len) {
543 result.append(array, prev, len - prev);
544 break;
545 }
546 if (array[i++] == 0x5C /*'\\'*/) {
547 result.append(array, prev, (i - 1) - prev);
548 UChar32 c = unescapeAt(i); // advances i
549 if (c < 0) {
550 result.remove(); // return empty string
551 break; // invalid escape sequence
552 }
553 result.append(c);
554 prev = i;
555 }
556 }
557 return result;
558 }
560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
561 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
562 }
564 //========================================
565 // Read-only implementation
566 //========================================
567 UBool
568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
569 // Requires: this & text not bogus and have same lengths.
570 // Byte-wise comparison works for equality regardless of endianness.
571 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
572 }
574 int8_t
575 UnicodeString::doCompare( int32_t start,
576 int32_t length,
577 const UChar *srcChars,
578 int32_t srcStart,
579 int32_t srcLength) const
580 {
581 // compare illegal string values
582 if(isBogus()) {
583 return -1;
584 }
586 // pin indices to legal values
587 pinIndices(start, length);
589 if(srcChars == NULL) {
590 // treat const UChar *srcChars==NULL as an empty string
591 return length == 0 ? 0 : 1;
592 }
594 // get the correct pointer
595 const UChar *chars = getArrayStart();
597 chars += start;
598 srcChars += srcStart;
600 int32_t minLength;
601 int8_t lengthResult;
603 // get the srcLength if necessary
604 if(srcLength < 0) {
605 srcLength = u_strlen(srcChars + srcStart);
606 }
608 // are we comparing different lengths?
609 if(length != srcLength) {
610 if(length < srcLength) {
611 minLength = length;
612 lengthResult = -1;
613 } else {
614 minLength = srcLength;
615 lengthResult = 1;
616 }
617 } else {
618 minLength = length;
619 lengthResult = 0;
620 }
622 /*
623 * note that uprv_memcmp() returns an int but we return an int8_t;
624 * we need to take care not to truncate the result -
625 * one way to do this is to right-shift the value to
626 * move the sign bit into the lower 8 bits and making sure that this
627 * does not become 0 itself
628 */
630 if(minLength > 0 && chars != srcChars) {
631 int32_t result;
633 # if U_IS_BIG_ENDIAN
634 // big-endian: byte comparison works
635 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
636 if(result != 0) {
637 return (int8_t)(result >> 15 | 1);
638 }
639 # else
640 // little-endian: compare UChar units
641 do {
642 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
643 if(result != 0) {
644 return (int8_t)(result >> 15 | 1);
645 }
646 } while(--minLength > 0);
647 # endif
648 }
649 return lengthResult;
650 }
652 /* String compare in code point order - doCompare() compares in code unit order. */
653 int8_t
654 UnicodeString::doCompareCodePointOrder(int32_t start,
655 int32_t length,
656 const UChar *srcChars,
657 int32_t srcStart,
658 int32_t srcLength) const
659 {
660 // compare illegal string values
661 // treat const UChar *srcChars==NULL as an empty string
662 if(isBogus()) {
663 return -1;
664 }
666 // pin indices to legal values
667 pinIndices(start, length);
669 if(srcChars == NULL) {
670 srcStart = srcLength = 0;
671 }
673 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
674 /* translate the 32-bit result into an 8-bit one */
675 if(diff!=0) {
676 return (int8_t)(diff >> 15 | 1);
677 } else {
678 return 0;
679 }
680 }
682 int32_t
683 UnicodeString::getLength() const {
684 return length();
685 }
687 UChar
688 UnicodeString::getCharAt(int32_t offset) const {
689 return charAt(offset);
690 }
692 UChar32
693 UnicodeString::getChar32At(int32_t offset) const {
694 return char32At(offset);
695 }
697 UChar32
698 UnicodeString::char32At(int32_t offset) const
699 {
700 int32_t len = length();
701 if((uint32_t)offset < (uint32_t)len) {
702 const UChar *array = getArrayStart();
703 UChar32 c;
704 U16_GET(array, 0, offset, len, c);
705 return c;
706 } else {
707 return kInvalidUChar;
708 }
709 }
711 int32_t
712 UnicodeString::getChar32Start(int32_t offset) const {
713 if((uint32_t)offset < (uint32_t)length()) {
714 const UChar *array = getArrayStart();
715 U16_SET_CP_START(array, 0, offset);
716 return offset;
717 } else {
718 return 0;
719 }
720 }
722 int32_t
723 UnicodeString::getChar32Limit(int32_t offset) const {
724 int32_t len = length();
725 if((uint32_t)offset < (uint32_t)len) {
726 const UChar *array = getArrayStart();
727 U16_SET_CP_LIMIT(array, 0, offset, len);
728 return offset;
729 } else {
730 return len;
731 }
732 }
734 int32_t
735 UnicodeString::countChar32(int32_t start, int32_t length) const {
736 pinIndices(start, length);
737 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
738 return u_countChar32(getArrayStart()+start, length);
739 }
741 UBool
742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
743 pinIndices(start, length);
744 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
745 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
746 }
748 int32_t
749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
750 // pin index
751 int32_t len = length();
752 if(index<0) {
753 index=0;
754 } else if(index>len) {
755 index=len;
756 }
758 const UChar *array = getArrayStart();
759 if(delta>0) {
760 U16_FWD_N(array, index, len, delta);
761 } else {
762 U16_BACK_N(array, 0, index, -delta);
763 }
765 return index;
766 }
768 void
769 UnicodeString::doExtract(int32_t start,
770 int32_t length,
771 UChar *dst,
772 int32_t dstStart) const
773 {
774 // pin indices to legal values
775 pinIndices(start, length);
777 // do not copy anything if we alias dst itself
778 const UChar *array = getArrayStart();
779 if(array + start != dst + dstStart) {
780 us_arrayCopy(array, start, dst, dstStart, length);
781 }
782 }
784 int32_t
785 UnicodeString::extract(UChar *dest, int32_t destCapacity,
786 UErrorCode &errorCode) const {
787 int32_t len = length();
788 if(U_SUCCESS(errorCode)) {
789 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
790 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
791 } else {
792 const UChar *array = getArrayStart();
793 if(len>0 && len<=destCapacity && array!=dest) {
794 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
795 }
796 return u_terminateUChars(dest, destCapacity, len, &errorCode);
797 }
798 }
800 return len;
801 }
803 int32_t
804 UnicodeString::extract(int32_t start,
805 int32_t length,
806 char *target,
807 int32_t targetCapacity,
808 enum EInvariant) const
809 {
810 // if the arguments are illegal, then do nothing
811 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
812 return 0;
813 }
815 // pin the indices to legal values
816 pinIndices(start, length);
818 if(length <= targetCapacity) {
819 u_UCharsToChars(getArrayStart() + start, target, length);
820 }
821 UErrorCode status = U_ZERO_ERROR;
822 return u_terminateChars(target, targetCapacity, length, &status);
823 }
825 UnicodeString
826 UnicodeString::tempSubString(int32_t start, int32_t len) const {
827 pinIndices(start, len);
828 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
829 if(array==NULL) {
830 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
831 len=-2; // bogus result string
832 }
833 return UnicodeString(FALSE, array + start, len);
834 }
836 int32_t
837 UnicodeString::toUTF8(int32_t start, int32_t len,
838 char *target, int32_t capacity) const {
839 pinIndices(start, len);
840 int32_t length8;
841 UErrorCode errorCode = U_ZERO_ERROR;
842 u_strToUTF8WithSub(target, capacity, &length8,
843 getBuffer() + start, len,
844 0xFFFD, // Standard substitution character.
845 NULL, // Don't care about number of substitutions.
846 &errorCode);
847 return length8;
848 }
850 #if U_CHARSET_IS_UTF8
852 int32_t
853 UnicodeString::extract(int32_t start, int32_t len,
854 char *target, uint32_t dstSize) const {
855 // if the arguments are illegal, then do nothing
856 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
857 return 0;
858 }
859 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
860 }
862 // else see unistr_cnv.cpp
863 #endif
865 void
866 UnicodeString::extractBetween(int32_t start,
867 int32_t limit,
868 UnicodeString& target) const {
869 pinIndex(start);
870 pinIndex(limit);
871 doExtract(start, limit - start, target);
872 }
874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
875 // as many bytes as the source has UChars.
876 // The "worst cases" are writing systems like Indic, Thai and CJK with
877 // 3:1 bytes:UChars.
878 void
879 UnicodeString::toUTF8(ByteSink &sink) const {
880 int32_t length16 = length();
881 if(length16 != 0) {
882 char stackBuffer[1024];
883 int32_t capacity = (int32_t)sizeof(stackBuffer);
884 UBool utf8IsOwned = FALSE;
885 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
886 3*length16,
887 stackBuffer, capacity,
888 &capacity);
889 int32_t length8 = 0;
890 UErrorCode errorCode = U_ZERO_ERROR;
891 u_strToUTF8WithSub(utf8, capacity, &length8,
892 getBuffer(), length16,
893 0xFFFD, // Standard substitution character.
894 NULL, // Don't care about number of substitutions.
895 &errorCode);
896 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
897 utf8 = (char *)uprv_malloc(length8);
898 if(utf8 != NULL) {
899 utf8IsOwned = TRUE;
900 errorCode = U_ZERO_ERROR;
901 u_strToUTF8WithSub(utf8, length8, &length8,
902 getBuffer(), length16,
903 0xFFFD, // Standard substitution character.
904 NULL, // Don't care about number of substitutions.
905 &errorCode);
906 } else {
907 errorCode = U_MEMORY_ALLOCATION_ERROR;
908 }
909 }
910 if(U_SUCCESS(errorCode)) {
911 sink.Append(utf8, length8);
912 sink.Flush();
913 }
914 if(utf8IsOwned) {
915 uprv_free(utf8);
916 }
917 }
918 }
920 int32_t
921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
922 int32_t length32=0;
923 if(U_SUCCESS(errorCode)) {
924 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
925 u_strToUTF32WithSub(utf32, capacity, &length32,
926 getBuffer(), length(),
927 0xfffd, // Substitution character.
928 NULL, // Don't care about number of substitutions.
929 &errorCode);
930 }
931 return length32;
932 }
934 int32_t
935 UnicodeString::indexOf(const UChar *srcChars,
936 int32_t srcStart,
937 int32_t srcLength,
938 int32_t start,
939 int32_t length) const
940 {
941 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
942 return -1;
943 }
945 // UnicodeString does not find empty substrings
946 if(srcLength < 0 && srcChars[srcStart] == 0) {
947 return -1;
948 }
950 // get the indices within bounds
951 pinIndices(start, length);
953 // find the first occurrence of the substring
954 const UChar *array = getArrayStart();
955 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
956 if(match == NULL) {
957 return -1;
958 } else {
959 return (int32_t)(match - array);
960 }
961 }
963 int32_t
964 UnicodeString::doIndexOf(UChar c,
965 int32_t start,
966 int32_t length) const
967 {
968 // pin indices
969 pinIndices(start, length);
971 // find the first occurrence of c
972 const UChar *array = getArrayStart();
973 const UChar *match = u_memchr(array + start, c, length);
974 if(match == NULL) {
975 return -1;
976 } else {
977 return (int32_t)(match - array);
978 }
979 }
981 int32_t
982 UnicodeString::doIndexOf(UChar32 c,
983 int32_t start,
984 int32_t length) const {
985 // pin indices
986 pinIndices(start, length);
988 // find the first occurrence of c
989 const UChar *array = getArrayStart();
990 const UChar *match = u_memchr32(array + start, c, length);
991 if(match == NULL) {
992 return -1;
993 } else {
994 return (int32_t)(match - array);
995 }
996 }
998 int32_t
999 UnicodeString::lastIndexOf(const UChar *srcChars,
1000 int32_t srcStart,
1001 int32_t srcLength,
1002 int32_t start,
1003 int32_t length) const
1004 {
1005 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1006 return -1;
1007 }
1009 // UnicodeString does not find empty substrings
1010 if(srcLength < 0 && srcChars[srcStart] == 0) {
1011 return -1;
1012 }
1014 // get the indices within bounds
1015 pinIndices(start, length);
1017 // find the last occurrence of the substring
1018 const UChar *array = getArrayStart();
1019 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1020 if(match == NULL) {
1021 return -1;
1022 } else {
1023 return (int32_t)(match - array);
1024 }
1025 }
1027 int32_t
1028 UnicodeString::doLastIndexOf(UChar c,
1029 int32_t start,
1030 int32_t length) const
1031 {
1032 if(isBogus()) {
1033 return -1;
1034 }
1036 // pin indices
1037 pinIndices(start, length);
1039 // find the last occurrence of c
1040 const UChar *array = getArrayStart();
1041 const UChar *match = u_memrchr(array + start, c, length);
1042 if(match == NULL) {
1043 return -1;
1044 } else {
1045 return (int32_t)(match - array);
1046 }
1047 }
1049 int32_t
1050 UnicodeString::doLastIndexOf(UChar32 c,
1051 int32_t start,
1052 int32_t length) const {
1053 // pin indices
1054 pinIndices(start, length);
1056 // find the last occurrence of c
1057 const UChar *array = getArrayStart();
1058 const UChar *match = u_memrchr32(array + start, c, length);
1059 if(match == NULL) {
1060 return -1;
1061 } else {
1062 return (int32_t)(match - array);
1063 }
1064 }
1066 //========================================
1067 // Write implementation
1068 //========================================
1070 UnicodeString&
1071 UnicodeString::findAndReplace(int32_t start,
1072 int32_t length,
1073 const UnicodeString& oldText,
1074 int32_t oldStart,
1075 int32_t oldLength,
1076 const UnicodeString& newText,
1077 int32_t newStart,
1078 int32_t newLength)
1079 {
1080 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1081 return *this;
1082 }
1084 pinIndices(start, length);
1085 oldText.pinIndices(oldStart, oldLength);
1086 newText.pinIndices(newStart, newLength);
1088 if(oldLength == 0) {
1089 return *this;
1090 }
1092 while(length > 0 && length >= oldLength) {
1093 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1094 if(pos < 0) {
1095 // no more oldText's here: done
1096 break;
1097 } else {
1098 // we found oldText, replace it by newText and go beyond it
1099 replace(pos, oldLength, newText, newStart, newLength);
1100 length -= pos + oldLength - start;
1101 start = pos + newLength;
1102 }
1103 }
1105 return *this;
1106 }
1109 void
1110 UnicodeString::setToBogus()
1111 {
1112 releaseArray();
1114 fShortLength = 0;
1115 fUnion.fFields.fArray = 0;
1116 fUnion.fFields.fCapacity = 0;
1117 fFlags = kIsBogus;
1118 }
1120 // turn a bogus string into an empty one
1121 void
1122 UnicodeString::unBogus() {
1123 if(fFlags & kIsBogus) {
1124 setToEmpty();
1125 }
1126 }
1128 const UChar *
1129 UnicodeString::getTerminatedBuffer() {
1130 if(!isWritable()) {
1131 return 0;
1132 }
1133 UChar *array = getArrayStart();
1134 int32_t len = length();
1135 if(len < getCapacity()) {
1136 if(fFlags & kBufferIsReadonly) {
1137 // If len<capacity on a read-only alias, then array[len] is
1138 // either the original NUL (if constructed with (TRUE, s, length))
1139 // or one of the original string contents characters (if later truncated),
1140 // therefore we can assume that array[len] is initialized memory.
1141 if(array[len] == 0) {
1142 return array;
1143 }
1144 } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
1145 // kRefCounted: Do not write the NUL if the buffer is shared.
1146 // That is mostly safe, except when the length of one copy was modified
1147 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1148 // Then the NUL would be written into the middle of another copy's string.
1150 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1151 // Do not test if there is a NUL already because it might be uninitialized memory.
1152 // (That would be safe, but tools like valgrind & Purify would complain.)
1153 array[len] = 0;
1154 return array;
1155 }
1156 }
1157 if(cloneArrayIfNeeded(len+1)) {
1158 array = getArrayStart();
1159 array[len] = 0;
1160 return array;
1161 } else {
1162 return NULL;
1163 }
1164 }
1166 // setTo() analogous to the readonly-aliasing constructor with the same signature
1167 UnicodeString &
1168 UnicodeString::setTo(UBool isTerminated,
1169 const UChar *text,
1170 int32_t textLength)
1171 {
1172 if(fFlags & kOpenGetBuffer) {
1173 // do not modify a string that has an "open" getBuffer(minCapacity)
1174 return *this;
1175 }
1177 if(text == NULL) {
1178 // treat as an empty string, do not alias
1179 releaseArray();
1180 setToEmpty();
1181 return *this;
1182 }
1184 if( textLength < -1 ||
1185 (textLength == -1 && !isTerminated) ||
1186 (textLength >= 0 && isTerminated && text[textLength] != 0)
1187 ) {
1188 setToBogus();
1189 return *this;
1190 }
1192 releaseArray();
1194 if(textLength == -1) {
1195 // text is terminated, or else it would have failed the above test
1196 textLength = u_strlen(text);
1197 }
1198 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1200 fFlags = kReadonlyAlias;
1201 return *this;
1202 }
1204 // setTo() analogous to the writable-aliasing constructor with the same signature
1205 UnicodeString &
1206 UnicodeString::setTo(UChar *buffer,
1207 int32_t buffLength,
1208 int32_t buffCapacity) {
1209 if(fFlags & kOpenGetBuffer) {
1210 // do not modify a string that has an "open" getBuffer(minCapacity)
1211 return *this;
1212 }
1214 if(buffer == NULL) {
1215 // treat as an empty string, do not alias
1216 releaseArray();
1217 setToEmpty();
1218 return *this;
1219 }
1221 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1222 setToBogus();
1223 return *this;
1224 } else if(buffLength == -1) {
1225 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1226 const UChar *p = buffer, *limit = buffer + buffCapacity;
1227 while(p != limit && *p != 0) {
1228 ++p;
1229 }
1230 buffLength = (int32_t)(p - buffer);
1231 }
1233 releaseArray();
1235 setArray(buffer, buffLength, buffCapacity);
1236 fFlags = kWritableAlias;
1237 return *this;
1238 }
1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1241 unBogus();
1242 int32_t length = utf8.length();
1243 int32_t capacity;
1244 // The UTF-16 string will be at most as long as the UTF-8 string.
1245 if(length <= US_STACKBUF_SIZE) {
1246 capacity = US_STACKBUF_SIZE;
1247 } else {
1248 capacity = length + 1; // +1 for the terminating NUL.
1249 }
1250 UChar *utf16 = getBuffer(capacity);
1251 int32_t length16;
1252 UErrorCode errorCode = U_ZERO_ERROR;
1253 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1254 utf8.data(), length,
1255 0xfffd, // Substitution character.
1256 NULL, // Don't care about number of substitutions.
1257 &errorCode);
1258 releaseBuffer(length16);
1259 if(U_FAILURE(errorCode)) {
1260 setToBogus();
1261 }
1262 return *this;
1263 }
1265 UnicodeString&
1266 UnicodeString::setCharAt(int32_t offset,
1267 UChar c)
1268 {
1269 int32_t len = length();
1270 if(cloneArrayIfNeeded() && len > 0) {
1271 if(offset < 0) {
1272 offset = 0;
1273 } else if(offset >= len) {
1274 offset = len - 1;
1275 }
1277 getArrayStart()[offset] = c;
1278 }
1279 return *this;
1280 }
1282 UnicodeString&
1283 UnicodeString::replace(int32_t start,
1284 int32_t _length,
1285 UChar32 srcChar) {
1286 UChar buffer[U16_MAX_LENGTH];
1287 int32_t count = 0;
1288 UBool isError = FALSE;
1289 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1290 // We test isError so that the compiler does not complain that we don't.
1291 // If isError (srcChar is not a valid code point) then count==0 which means
1292 // we remove the source segment rather than replacing it with srcChar.
1293 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1294 }
1296 UnicodeString&
1297 UnicodeString::append(UChar32 srcChar) {
1298 UChar buffer[U16_MAX_LENGTH];
1299 int32_t _length = 0;
1300 UBool isError = FALSE;
1301 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1302 // We test isError so that the compiler does not complain that we don't.
1303 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1304 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1305 }
1307 UnicodeString&
1308 UnicodeString::doReplace( int32_t start,
1309 int32_t length,
1310 const UnicodeString& src,
1311 int32_t srcStart,
1312 int32_t srcLength)
1313 {
1314 if(!src.isBogus()) {
1315 // pin the indices to legal values
1316 src.pinIndices(srcStart, srcLength);
1318 // get the characters from src
1319 // and replace the range in ourselves with them
1320 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1321 } else {
1322 // remove the range
1323 return doReplace(start, length, 0, 0, 0);
1324 }
1325 }
1327 UnicodeString&
1328 UnicodeString::doReplace(int32_t start,
1329 int32_t length,
1330 const UChar *srcChars,
1331 int32_t srcStart,
1332 int32_t srcLength)
1333 {
1334 if(!isWritable()) {
1335 return *this;
1336 }
1338 int32_t oldLength = this->length();
1340 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1341 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1342 if(start == 0) {
1343 // remove prefix by adjusting the array pointer
1344 pinIndex(length);
1345 fUnion.fFields.fArray += length;
1346 fUnion.fFields.fCapacity -= length;
1347 setLength(oldLength - length);
1348 return *this;
1349 } else {
1350 pinIndex(start);
1351 if(length >= (oldLength - start)) {
1352 // remove suffix by reducing the length (like truncate())
1353 setLength(start);
1354 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1355 return *this;
1356 }
1357 }
1358 }
1360 if(srcChars == 0) {
1361 srcStart = srcLength = 0;
1362 } else if(srcLength < 0) {
1363 // get the srcLength if necessary
1364 srcLength = u_strlen(srcChars + srcStart);
1365 }
1367 // calculate the size of the string after the replace
1368 int32_t newLength;
1370 // optimize append() onto a large-enough, owned string
1371 if(start >= oldLength) {
1372 if(srcLength == 0) {
1373 return *this;
1374 }
1375 newLength = oldLength + srcLength;
1376 if(newLength <= getCapacity() && isBufferWritable()) {
1377 UChar *oldArray = getArrayStart();
1378 // Do not copy characters when
1379 // UChar *buffer=str.getAppendBuffer(...);
1380 // is followed by
1381 // str.append(buffer, length);
1382 // or
1383 // str.appendString(buffer, length)
1384 // or similar.
1385 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1386 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1387 }
1388 setLength(newLength);
1389 return *this;
1390 } else {
1391 // pin the indices to legal values
1392 start = oldLength;
1393 length = 0;
1394 }
1395 } else {
1396 // pin the indices to legal values
1397 pinIndices(start, length);
1399 newLength = oldLength - length + srcLength;
1400 }
1402 // the following may change fArray but will not copy the current contents;
1403 // therefore we need to keep the current fArray
1404 UChar oldStackBuffer[US_STACKBUF_SIZE];
1405 UChar *oldArray;
1406 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1407 // copy the stack buffer contents because it will be overwritten with
1408 // fUnion.fFields values
1409 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1410 oldArray = oldStackBuffer;
1411 } else {
1412 oldArray = getArrayStart();
1413 }
1415 // clone our array and allocate a bigger array if needed
1416 int32_t *bufferToDelete = 0;
1417 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1418 FALSE, &bufferToDelete)
1419 ) {
1420 return *this;
1421 }
1423 // now do the replace
1425 UChar *newArray = getArrayStart();
1426 if(newArray != oldArray) {
1427 // if fArray changed, then we need to copy everything except what will change
1428 us_arrayCopy(oldArray, 0, newArray, 0, start);
1429 us_arrayCopy(oldArray, start + length,
1430 newArray, start + srcLength,
1431 oldLength - (start + length));
1432 } else if(length != srcLength) {
1433 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1434 us_arrayCopy(oldArray, start + length,
1435 newArray, start + srcLength,
1436 oldLength - (start + length));
1437 }
1439 // now fill in the hole with the new string
1440 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1442 setLength(newLength);
1444 // delayed delete in case srcChars == fArray when we started, and
1445 // to keep oldArray alive for the above operations
1446 if (bufferToDelete) {
1447 uprv_free(bufferToDelete);
1448 }
1450 return *this;
1451 }
1453 /**
1454 * Replaceable API
1455 */
1456 void
1457 UnicodeString::handleReplaceBetween(int32_t start,
1458 int32_t limit,
1459 const UnicodeString& text) {
1460 replaceBetween(start, limit, text);
1461 }
1463 /**
1464 * Replaceable API
1465 */
1466 void
1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1468 if (limit <= start) {
1469 return; // Nothing to do; avoid bogus malloc call
1470 }
1471 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1472 // Check to make sure text is not null.
1473 if (text != NULL) {
1474 extractBetween(start, limit, text, 0);
1475 insert(dest, text, 0, limit - start);
1476 uprv_free(text);
1477 }
1478 }
1480 /**
1481 * Replaceable API
1482 *
1483 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1484 * so we implement this function here.
1485 */
1486 UBool Replaceable::hasMetaData() const {
1487 return TRUE;
1488 }
1490 /**
1491 * Replaceable API
1492 */
1493 UBool UnicodeString::hasMetaData() const {
1494 return FALSE;
1495 }
1497 UnicodeString&
1498 UnicodeString::doReverse(int32_t start, int32_t length) {
1499 if(length <= 1 || !cloneArrayIfNeeded()) {
1500 return *this;
1501 }
1503 // pin the indices to legal values
1504 pinIndices(start, length);
1505 if(length <= 1) { // pinIndices() might have shrunk the length
1506 return *this;
1507 }
1509 UChar *left = getArrayStart() + start;
1510 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1511 UChar swap;
1512 UBool hasSupplementary = FALSE;
1514 // Before the loop we know left<right because length>=2.
1515 do {
1516 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1517 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1518 *right-- = swap;
1519 } while(left < right);
1520 // Make sure to test the middle code unit of an odd-length string.
1521 // Redundant if the length is even.
1522 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1524 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1525 if(hasSupplementary) {
1526 UChar swap2;
1528 left = getArrayStart() + start;
1529 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1530 while(left < right) {
1531 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1532 *left++ = swap2;
1533 *left++ = swap;
1534 } else {
1535 ++left;
1536 }
1537 }
1538 }
1540 return *this;
1541 }
1543 UBool
1544 UnicodeString::padLeading(int32_t targetLength,
1545 UChar padChar)
1546 {
1547 int32_t oldLength = length();
1548 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1549 return FALSE;
1550 } else {
1551 // move contents up by padding width
1552 UChar *array = getArrayStart();
1553 int32_t start = targetLength - oldLength;
1554 us_arrayCopy(array, 0, array, start, oldLength);
1556 // fill in padding character
1557 while(--start >= 0) {
1558 array[start] = padChar;
1559 }
1560 setLength(targetLength);
1561 return TRUE;
1562 }
1563 }
1565 UBool
1566 UnicodeString::padTrailing(int32_t targetLength,
1567 UChar padChar)
1568 {
1569 int32_t oldLength = length();
1570 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1571 return FALSE;
1572 } else {
1573 // fill in padding character
1574 UChar *array = getArrayStart();
1575 int32_t length = targetLength;
1576 while(--length >= oldLength) {
1577 array[length] = padChar;
1578 }
1579 setLength(targetLength);
1580 return TRUE;
1581 }
1582 }
1584 //========================================
1585 // Hashing
1586 //========================================
1587 int32_t
1588 UnicodeString::doHashCode() const
1589 {
1590 /* Delegate hash computation to uhash. This makes UnicodeString
1591 * hashing consistent with UChar* hashing. */
1592 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1593 if (hashCode == kInvalidHashCode) {
1594 hashCode = kEmptyHashCode;
1595 }
1596 return hashCode;
1597 }
1599 //========================================
1600 // External Buffer
1601 //========================================
1603 UChar *
1604 UnicodeString::getBuffer(int32_t minCapacity) {
1605 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1606 fFlags|=kOpenGetBuffer;
1607 fShortLength=0;
1608 return getArrayStart();
1609 } else {
1610 return 0;
1611 }
1612 }
1614 void
1615 UnicodeString::releaseBuffer(int32_t newLength) {
1616 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1617 // set the new fLength
1618 int32_t capacity=getCapacity();
1619 if(newLength==-1) {
1620 // the new length is the string length, capped by fCapacity
1621 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1622 while(p<limit && *p!=0) {
1623 ++p;
1624 }
1625 newLength=(int32_t)(p-array);
1626 } else if(newLength>capacity) {
1627 newLength=capacity;
1628 }
1629 setLength(newLength);
1630 fFlags&=~kOpenGetBuffer;
1631 }
1632 }
1634 //========================================
1635 // Miscellaneous
1636 //========================================
1637 UBool
1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1639 int32_t growCapacity,
1640 UBool doCopyArray,
1641 int32_t **pBufferToDelete,
1642 UBool forceClone) {
1643 // default parameters need to be static, therefore
1644 // the defaults are -1 to have convenience defaults
1645 if(newCapacity == -1) {
1646 newCapacity = getCapacity();
1647 }
1649 // while a getBuffer(minCapacity) is "open",
1650 // prevent any modifications of the string by returning FALSE here
1651 // if the string is bogus, then only an assignment or similar can revive it
1652 if(!isWritable()) {
1653 return FALSE;
1654 }
1656 /*
1657 * We need to make a copy of the array if
1658 * the buffer is read-only, or
1659 * the buffer is refCounted (shared), and refCount>1, or
1660 * the buffer is too small.
1661 * Return FALSE if memory could not be allocated.
1662 */
1663 if(forceClone ||
1664 fFlags & kBufferIsReadonly ||
1665 (fFlags & kRefCounted && refCount() > 1) ||
1666 newCapacity > getCapacity()
1667 ) {
1668 // check growCapacity for default value and use of the stack buffer
1669 if(growCapacity < 0) {
1670 growCapacity = newCapacity;
1671 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1672 growCapacity = US_STACKBUF_SIZE;
1673 }
1675 // save old values
1676 UChar oldStackBuffer[US_STACKBUF_SIZE];
1677 UChar *oldArray;
1678 uint8_t flags = fFlags;
1680 if(flags&kUsingStackBuffer) {
1681 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1682 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1683 // copy the stack buffer contents because it will be overwritten with
1684 // fUnion.fFields values
1685 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1686 oldArray = oldStackBuffer;
1687 } else {
1688 oldArray = 0; // no need to copy from stack buffer to itself
1689 }
1690 } else {
1691 oldArray = fUnion.fFields.fArray;
1692 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1693 }
1695 // allocate a new array
1696 if(allocate(growCapacity) ||
1697 (newCapacity < growCapacity && allocate(newCapacity))
1698 ) {
1699 if(doCopyArray && oldArray != 0) {
1700 // copy the contents
1701 // do not copy more than what fits - it may be smaller than before
1702 int32_t minLength = length();
1703 newCapacity = getCapacity();
1704 if(newCapacity < minLength) {
1705 minLength = newCapacity;
1706 setLength(minLength);
1707 }
1708 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1709 } else {
1710 fShortLength = 0;
1711 }
1713 // release the old array
1714 if(flags & kRefCounted) {
1715 // the array is refCounted; decrement and release if 0
1716 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1717 if(umtx_atomic_dec(pRefCount) == 0) {
1718 if(pBufferToDelete == 0) {
1719 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1720 // is defined as volatile. (Volatile has useful non-standard behavior
1721 // with this compiler.)
1722 uprv_free((void *)pRefCount);
1723 } else {
1724 // the caller requested to delete it himself
1725 *pBufferToDelete = (int32_t *)pRefCount;
1726 }
1727 }
1728 }
1729 } else {
1730 // not enough memory for growCapacity and not even for the smaller newCapacity
1731 // reset the old values for setToBogus() to release the array
1732 if(!(flags&kUsingStackBuffer)) {
1733 fUnion.fFields.fArray = oldArray;
1734 }
1735 fFlags = flags;
1736 setToBogus();
1737 return FALSE;
1738 }
1739 }
1740 return TRUE;
1741 }
1743 // UnicodeStringAppendable ------------------------------------------------- ***
1745 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1747 UBool
1748 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1749 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1750 }
1752 UBool
1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1754 UChar buffer[U16_MAX_LENGTH];
1755 int32_t cLength = 0;
1756 UBool isError = FALSE;
1757 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1758 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1759 }
1761 UBool
1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1763 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1764 }
1766 UBool
1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1768 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1769 }
1771 UChar *
1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1773 int32_t desiredCapacityHint,
1774 UChar *scratch, int32_t scratchCapacity,
1775 int32_t *resultCapacity) {
1776 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1777 *resultCapacity = 0;
1778 return NULL;
1779 }
1780 int32_t oldLength = str.length();
1781 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1782 *resultCapacity = str.getCapacity() - oldLength;
1783 return str.getArrayStart() + oldLength;
1784 }
1785 *resultCapacity = scratchCapacity;
1786 return scratch;
1787 }
1789 U_NAMESPACE_END
1791 U_NAMESPACE_USE
1793 U_CAPI int32_t U_EXPORT2
1794 uhash_hashUnicodeString(const UElement key) {
1795 const UnicodeString *str = (const UnicodeString*) key.pointer;
1796 return (str == NULL) ? 0 : str->hashCode();
1797 }
1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1800 // does not depend on hashtable code.
1801 U_CAPI UBool U_EXPORT2
1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1803 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1804 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1805 if (str1 == str2) {
1806 return TRUE;
1807 }
1808 if (str1 == NULL || str2 == NULL) {
1809 return FALSE;
1810 }
1811 return *str1 == *str2;
1812 }
1814 #ifdef U_STATIC_IMPLEMENTATION
1815 /*
1816 This should never be called. It is defined here to make sure that the
1817 virtual vector deleting destructor is defined within unistr.cpp.
1818 The vector deleting destructor is already a part of UObject,
1819 but defining it here makes sure that it is included with this object file.
1820 This makes sure that static library dependencies are kept to a minimum.
1821 */
1822 static void uprv_UnicodeStringDummy(void) {
1823 delete [] (new UnicodeString[2]);
1824 }
1825 #endif