|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 1996-2012, International Business Machines Corporation and |
|
4 * others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 */ |
|
7 //=============================================================================== |
|
8 // |
|
9 // File sortkey.cpp |
|
10 // |
|
11 // |
|
12 // |
|
13 // Created by: Helena Shih |
|
14 // |
|
15 // Modification History: |
|
16 // |
|
17 // Date Name Description |
|
18 // |
|
19 // 6/20/97 helena Java class name change. |
|
20 // 6/23/97 helena Added comments to make code more readable. |
|
21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString |
|
22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1, |
|
23 // Cleaned up operator= |
|
24 // 07/12/99 helena HPUX 11 CC port. |
|
25 // 03/06/01 synwee Modified compareTo, to handle the result of |
|
26 // 2 string similar in contents, but one is longer |
|
27 // than the other |
|
28 //=============================================================================== |
|
29 |
|
30 #include "unicode/utypes.h" |
|
31 |
|
32 #if !UCONFIG_NO_COLLATION |
|
33 |
|
34 #include "unicode/sortkey.h" |
|
35 #include "cmemory.h" |
|
36 #include "uelement.h" |
|
37 #include "ustr_imp.h" |
|
38 |
|
39 U_NAMESPACE_BEGIN |
|
40 |
|
41 // A hash code of kInvalidHashCode indicates that the hash code needs |
|
42 // to be computed. A hash code of kEmptyHashCode is used for empty keys |
|
43 // and for any key whose computed hash code is kInvalidHashCode. |
|
44 static const int32_t kInvalidHashCode = 0; |
|
45 static const int32_t kEmptyHashCode = 1; |
|
46 // The "bogus hash code" replaces a separate fBogus flag. |
|
47 static const int32_t kBogusHashCode = 2; |
|
48 |
|
49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
|
50 |
|
51 CollationKey::CollationKey() |
|
52 : UObject(), fFlagAndLength(0), |
|
53 fHashCode(kEmptyHashCode) |
|
54 { |
|
55 } |
|
56 |
|
57 // Create a collation key from a bit array. |
|
58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count) |
|
59 : UObject(), fFlagAndLength(count), |
|
60 fHashCode(kInvalidHashCode) |
|
61 { |
|
62 if (count < 0 || (newValues == NULL && count != 0) || |
|
63 (count > getCapacity() && reallocate(count, 0) == NULL)) { |
|
64 setToBogus(); |
|
65 return; |
|
66 } |
|
67 |
|
68 if (count > 0) { |
|
69 uprv_memcpy(getBytes(), newValues, count); |
|
70 } |
|
71 } |
|
72 |
|
73 CollationKey::CollationKey(const CollationKey& other) |
|
74 : UObject(other), fFlagAndLength(other.getLength()), |
|
75 fHashCode(other.fHashCode) |
|
76 { |
|
77 if (other.isBogus()) |
|
78 { |
|
79 setToBogus(); |
|
80 return; |
|
81 } |
|
82 |
|
83 int32_t length = fFlagAndLength; |
|
84 if (length > getCapacity() && reallocate(length, 0) == NULL) { |
|
85 setToBogus(); |
|
86 return; |
|
87 } |
|
88 |
|
89 if (length > 0) { |
|
90 uprv_memcpy(getBytes(), other.getBytes(), length); |
|
91 } |
|
92 } |
|
93 |
|
94 CollationKey::~CollationKey() |
|
95 { |
|
96 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
|
97 } |
|
98 |
|
99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { |
|
100 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); |
|
101 if(newBytes == NULL) { return NULL; } |
|
102 if(length > 0) { |
|
103 uprv_memcpy(newBytes, getBytes(), length); |
|
104 } |
|
105 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } |
|
106 fUnion.fFields.fBytes = newBytes; |
|
107 fUnion.fFields.fCapacity = newCapacity; |
|
108 fFlagAndLength |= 0x80000000; |
|
109 return newBytes; |
|
110 } |
|
111 |
|
112 void CollationKey::setLength(int32_t newLength) { |
|
113 // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); |
|
114 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; |
|
115 fHashCode = kInvalidHashCode; |
|
116 } |
|
117 |
|
118 // set the key to an empty state |
|
119 CollationKey& |
|
120 CollationKey::reset() |
|
121 { |
|
122 fFlagAndLength &= 0x80000000; |
|
123 fHashCode = kEmptyHashCode; |
|
124 |
|
125 return *this; |
|
126 } |
|
127 |
|
128 // set the key to a "bogus" or invalid state |
|
129 CollationKey& |
|
130 CollationKey::setToBogus() |
|
131 { |
|
132 fFlagAndLength &= 0x80000000; |
|
133 fHashCode = kBogusHashCode; |
|
134 |
|
135 return *this; |
|
136 } |
|
137 |
|
138 UBool |
|
139 CollationKey::operator==(const CollationKey& source) const |
|
140 { |
|
141 return getLength() == source.getLength() && |
|
142 (this == &source || |
|
143 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); |
|
144 } |
|
145 |
|
146 const CollationKey& |
|
147 CollationKey::operator=(const CollationKey& other) |
|
148 { |
|
149 if (this != &other) |
|
150 { |
|
151 if (other.isBogus()) |
|
152 { |
|
153 return setToBogus(); |
|
154 } |
|
155 |
|
156 int32_t length = other.getLength(); |
|
157 if (length > getCapacity() && reallocate(length, 0) == NULL) { |
|
158 return setToBogus(); |
|
159 } |
|
160 if (length > 0) { |
|
161 uprv_memcpy(getBytes(), other.getBytes(), length); |
|
162 } |
|
163 fFlagAndLength = (fFlagAndLength & 0x80000000) | length; |
|
164 fHashCode = other.fHashCode; |
|
165 } |
|
166 |
|
167 return *this; |
|
168 } |
|
169 |
|
170 // Bitwise comparison for the collation keys. |
|
171 Collator::EComparisonResult |
|
172 CollationKey::compareTo(const CollationKey& target) const |
|
173 { |
|
174 UErrorCode errorCode = U_ZERO_ERROR; |
|
175 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); |
|
176 } |
|
177 |
|
178 // Bitwise comparison for the collation keys. |
|
179 UCollationResult |
|
180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const |
|
181 { |
|
182 if(U_SUCCESS(status)) { |
|
183 const uint8_t *src = getBytes(); |
|
184 const uint8_t *tgt = target.getBytes(); |
|
185 |
|
186 // are we comparing the same string |
|
187 if (src == tgt) |
|
188 return UCOL_EQUAL; |
|
189 |
|
190 UCollationResult result; |
|
191 |
|
192 // are we comparing different lengths? |
|
193 int32_t minLength = getLength(); |
|
194 int32_t targetLength = target.getLength(); |
|
195 if (minLength < targetLength) { |
|
196 result = UCOL_LESS; |
|
197 } else if (minLength == targetLength) { |
|
198 result = UCOL_EQUAL; |
|
199 } else { |
|
200 minLength = targetLength; |
|
201 result = UCOL_GREATER; |
|
202 } |
|
203 |
|
204 if (minLength > 0) { |
|
205 int diff = uprv_memcmp(src, tgt, minLength); |
|
206 if (diff > 0) { |
|
207 return UCOL_GREATER; |
|
208 } |
|
209 else |
|
210 if (diff < 0) { |
|
211 return UCOL_LESS; |
|
212 } |
|
213 } |
|
214 |
|
215 return result; |
|
216 } else { |
|
217 return UCOL_EQUAL; |
|
218 } |
|
219 } |
|
220 |
|
221 #ifdef U_USE_COLLATION_KEY_DEPRECATES |
|
222 // Create a copy of the byte array. |
|
223 uint8_t* |
|
224 CollationKey::toByteArray(int32_t& count) const |
|
225 { |
|
226 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); |
|
227 |
|
228 if (result == NULL) |
|
229 { |
|
230 count = 0; |
|
231 } |
|
232 else |
|
233 { |
|
234 count = fCount; |
|
235 if (count > 0) { |
|
236 uprv_memcpy(result, fBytes, fCount); |
|
237 } |
|
238 } |
|
239 |
|
240 return result; |
|
241 } |
|
242 #endif |
|
243 |
|
244 static int32_t |
|
245 computeHashCode(const uint8_t *key, int32_t length) { |
|
246 const char *s = reinterpret_cast<const char *>(key); |
|
247 int32_t hash; |
|
248 if (s == NULL || length == 0) { |
|
249 hash = kEmptyHashCode; |
|
250 } else { |
|
251 hash = ustr_hashCharsN(s, length); |
|
252 if (hash == kInvalidHashCode || hash == kBogusHashCode) { |
|
253 hash = kEmptyHashCode; |
|
254 } |
|
255 } |
|
256 return hash; |
|
257 } |
|
258 |
|
259 int32_t |
|
260 CollationKey::hashCode() const |
|
261 { |
|
262 // (Cribbed from UnicodeString) |
|
263 // We cache the hashCode; when it becomes invalid, due to any change to the |
|
264 // string, we note this by setting it to kInvalidHashCode. [LIU] |
|
265 |
|
266 // Note: This method is semantically const, but physically non-const. |
|
267 |
|
268 if (fHashCode == kInvalidHashCode) |
|
269 { |
|
270 fHashCode = computeHashCode(getBytes(), getLength()); |
|
271 } |
|
272 |
|
273 return fHashCode; |
|
274 } |
|
275 |
|
276 U_NAMESPACE_END |
|
277 |
|
278 U_CAPI int32_t U_EXPORT2 |
|
279 ucol_keyHashCode(const uint8_t *key, |
|
280 int32_t length) |
|
281 { |
|
282 return icu::computeHashCode(key, length); |
|
283 } |
|
284 |
|
285 #endif /* #if !UCONFIG_NO_COLLATION */ |