|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 1999-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: unistr_case.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:2 |
|
12 * |
|
13 * created on: 2004aug19 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Case-mapping functions moved here from unistr.cpp |
|
17 */ |
|
18 |
|
19 #include "unicode/utypes.h" |
|
20 #include "unicode/putil.h" |
|
21 #include "cstring.h" |
|
22 #include "cmemory.h" |
|
23 #include "unicode/ustring.h" |
|
24 #include "unicode/unistr.h" |
|
25 #include "unicode/uchar.h" |
|
26 #include "uelement.h" |
|
27 #include "ustr_imp.h" |
|
28 |
|
29 U_NAMESPACE_BEGIN |
|
30 |
|
31 //======================================== |
|
32 // Read-only implementation |
|
33 //======================================== |
|
34 |
|
35 int8_t |
|
36 UnicodeString::doCaseCompare(int32_t start, |
|
37 int32_t length, |
|
38 const UChar *srcChars, |
|
39 int32_t srcStart, |
|
40 int32_t srcLength, |
|
41 uint32_t options) const |
|
42 { |
|
43 // compare illegal string values |
|
44 // treat const UChar *srcChars==NULL as an empty string |
|
45 if(isBogus()) { |
|
46 return -1; |
|
47 } |
|
48 |
|
49 // pin indices to legal values |
|
50 pinIndices(start, length); |
|
51 |
|
52 if(srcChars == NULL) { |
|
53 srcStart = srcLength = 0; |
|
54 } |
|
55 |
|
56 // get the correct pointer |
|
57 const UChar *chars = getArrayStart(); |
|
58 |
|
59 chars += start; |
|
60 if(srcStart!=0) { |
|
61 srcChars += srcStart; |
|
62 } |
|
63 |
|
64 if(chars != srcChars) { |
|
65 UErrorCode errorCode=U_ZERO_ERROR; |
|
66 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, |
|
67 options|U_COMPARE_IGNORE_CASE, &errorCode); |
|
68 if(result!=0) { |
|
69 return (int8_t)(result >> 24 | 1); |
|
70 } |
|
71 } else { |
|
72 // get the srcLength if necessary |
|
73 if(srcLength < 0) { |
|
74 srcLength = u_strlen(srcChars + srcStart); |
|
75 } |
|
76 if(length != srcLength) { |
|
77 return (int8_t)((length - srcLength) >> 24 | 1); |
|
78 } |
|
79 } |
|
80 return 0; |
|
81 } |
|
82 |
|
83 //======================================== |
|
84 // Write implementation |
|
85 //======================================== |
|
86 |
|
87 UnicodeString & |
|
88 UnicodeString::caseMap(const UCaseMap *csm, |
|
89 UStringCaseMapper *stringCaseMapper) { |
|
90 if(isEmpty() || !isWritable()) { |
|
91 // nothing to do |
|
92 return *this; |
|
93 } |
|
94 |
|
95 // We need to allocate a new buffer for the internal string case mapping function. |
|
96 // This is very similar to how doReplace() keeps the old array pointer |
|
97 // and deletes the old array itself after it is done. |
|
98 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. |
|
99 UChar oldStackBuffer[US_STACKBUF_SIZE]; |
|
100 UChar *oldArray; |
|
101 int32_t oldLength; |
|
102 |
|
103 if(fFlags&kUsingStackBuffer) { |
|
104 // copy the stack buffer contents because it will be overwritten |
|
105 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); |
|
106 oldArray = oldStackBuffer; |
|
107 oldLength = fShortLength; |
|
108 } else { |
|
109 oldArray = getArrayStart(); |
|
110 oldLength = length(); |
|
111 } |
|
112 |
|
113 int32_t capacity; |
|
114 if(oldLength <= US_STACKBUF_SIZE) { |
|
115 capacity = US_STACKBUF_SIZE; |
|
116 } else { |
|
117 capacity = oldLength + 20; |
|
118 } |
|
119 int32_t *bufferToDelete = 0; |
|
120 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { |
|
121 return *this; |
|
122 } |
|
123 |
|
124 // Case-map, and if the result is too long, then reallocate and repeat. |
|
125 UErrorCode errorCode; |
|
126 int32_t newLength; |
|
127 do { |
|
128 errorCode = U_ZERO_ERROR; |
|
129 newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), |
|
130 oldArray, oldLength, &errorCode); |
|
131 setLength(newLength); |
|
132 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); |
|
133 |
|
134 if (bufferToDelete) { |
|
135 uprv_free(bufferToDelete); |
|
136 } |
|
137 if(U_FAILURE(errorCode)) { |
|
138 setToBogus(); |
|
139 } |
|
140 return *this; |
|
141 } |
|
142 |
|
143 UnicodeString & |
|
144 UnicodeString::foldCase(uint32_t options) { |
|
145 UCaseMap csm=UCASEMAP_INITIALIZER; |
|
146 csm.csp=ucase_getSingleton(); |
|
147 csm.options=options; |
|
148 return caseMap(&csm, ustrcase_internalFold); |
|
149 } |
|
150 |
|
151 U_NAMESPACE_END |
|
152 |
|
153 // Defined here to reduce dependencies on break iterator |
|
154 U_CAPI int32_t U_EXPORT2 |
|
155 uhash_hashCaselessUnicodeString(const UElement key) { |
|
156 U_NAMESPACE_USE |
|
157 const UnicodeString *str = (const UnicodeString*) key.pointer; |
|
158 if (str == NULL) { |
|
159 return 0; |
|
160 } |
|
161 // Inefficient; a better way would be to have a hash function in |
|
162 // UnicodeString that does case folding on the fly. |
|
163 UnicodeString copy(*str); |
|
164 return copy.foldCase().hashCode(); |
|
165 } |
|
166 |
|
167 // Defined here to reduce dependencies on break iterator |
|
168 U_CAPI UBool U_EXPORT2 |
|
169 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { |
|
170 U_NAMESPACE_USE |
|
171 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; |
|
172 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; |
|
173 if (str1 == str2) { |
|
174 return TRUE; |
|
175 } |
|
176 if (str1 == NULL || str2 == NULL) { |
|
177 return FALSE; |
|
178 } |
|
179 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; |
|
180 } |