|
1 /* |
|
2 ****************************************************************************** |
|
3 * Copyright (C) 1996-2013, International Business Machines Corporation and |
|
4 * others. All Rights Reserved. |
|
5 ****************************************************************************** |
|
6 */ |
|
7 |
|
8 /** |
|
9 * File tblcoll.cpp |
|
10 * |
|
11 * Created by: Helena Shih |
|
12 * |
|
13 * Modification History: |
|
14 * |
|
15 * Date Name Description |
|
16 * 2/5/97 aliu Added streamIn and streamOut methods. Added |
|
17 * constructor which reads RuleBasedCollator object from |
|
18 * a binary file. Added writeToFile method which streams |
|
19 * RuleBasedCollator out to a binary file. The streamIn |
|
20 * and streamOut methods use istream and ostream objects |
|
21 * in binary mode. |
|
22 * 2/11/97 aliu Moved declarations out of for loop initializer. |
|
23 * Added Mac compatibility #ifdef for ios::nocreate. |
|
24 * 2/12/97 aliu Modified to use TableCollationData sub-object to |
|
25 * hold invariant data. |
|
26 * 2/13/97 aliu Moved several methods into this class from Collation. |
|
27 * Added a private RuleBasedCollator(Locale&) constructor, |
|
28 * to be used by Collator::getInstance(). General |
|
29 * clean up. Made use of UErrorCode variables consistent. |
|
30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy |
|
31 * constructor and getDynamicClassID. |
|
32 * 3/5/97 aliu Changed compaction cycle to improve performance. We |
|
33 * use the maximum allowable value which is kBlockCount. |
|
34 * Modified getRules() to load rules dynamically. Changed |
|
35 * constructFromFile() call to accomodate this (added |
|
36 * parameter to specify whether binary loading is to |
|
37 * take place). |
|
38 * 05/06/97 helena Added memory allocation error check. |
|
39 * 6/20/97 helena Java class name change. |
|
40 * 6/23/97 helena Adding comments to make code more readable. |
|
41 * 09/03/97 helena Added createCollationKeyValues(). |
|
42 * 06/26/98 erm Changes for CollationKeys using byte arrays. |
|
43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java |
|
44 * 04/23/99 stephen Removed EDecompositionMode, merged with |
|
45 * Normalizer::EMode |
|
46 * 06/14/99 stephen Removed kResourceBundleSuffix |
|
47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx |
|
48 * files are no longer used. |
|
49 * 11/02/99 helena Collator performance enhancements. Special case |
|
50 * for NO_OP situations. |
|
51 * 11/17/99 srl More performance enhancements. Inlined some internal functions. |
|
52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator |
|
53 * to implementation file. |
|
54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) |
|
55 */ |
|
56 |
|
57 #include "unicode/utypes.h" |
|
58 |
|
59 #if !UCONFIG_NO_COLLATION |
|
60 |
|
61 #include "unicode/tblcoll.h" |
|
62 #include "unicode/coleitr.h" |
|
63 #include "unicode/ures.h" |
|
64 #include "unicode/uset.h" |
|
65 #include "ucol_imp.h" |
|
66 #include "uresimp.h" |
|
67 #include "uhash.h" |
|
68 #include "cmemory.h" |
|
69 #include "cstring.h" |
|
70 #include "putilimp.h" |
|
71 #include "ustr_imp.h" |
|
72 |
|
73 /* public RuleBasedCollator constructor ---------------------------------- */ |
|
74 |
|
75 U_NAMESPACE_BEGIN |
|
76 |
|
77 /** |
|
78 * Copy constructor, aliasing, not write-through |
|
79 */ |
|
80 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
|
81 : Collator(that) |
|
82 , dataIsOwned(FALSE) |
|
83 , isWriteThroughAlias(FALSE) |
|
84 , ucollator(NULL) |
|
85 { |
|
86 RuleBasedCollator::operator=(that); |
|
87 } |
|
88 |
|
89 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
|
90 UErrorCode& status) : |
|
91 dataIsOwned(FALSE) |
|
92 { |
|
93 construct(rules, |
|
94 UCOL_DEFAULT_STRENGTH, |
|
95 UCOL_DEFAULT, |
|
96 status); |
|
97 } |
|
98 |
|
99 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
|
100 ECollationStrength collationStrength, |
|
101 UErrorCode& status) : dataIsOwned(FALSE) |
|
102 { |
|
103 construct(rules, |
|
104 (UColAttributeValue)collationStrength, |
|
105 UCOL_DEFAULT, |
|
106 status); |
|
107 } |
|
108 |
|
109 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
|
110 UColAttributeValue decompositionMode, |
|
111 UErrorCode& status) : |
|
112 dataIsOwned(FALSE) |
|
113 { |
|
114 construct(rules, |
|
115 UCOL_DEFAULT_STRENGTH, |
|
116 decompositionMode, |
|
117 status); |
|
118 } |
|
119 |
|
120 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
|
121 ECollationStrength collationStrength, |
|
122 UColAttributeValue decompositionMode, |
|
123 UErrorCode& status) : dataIsOwned(FALSE) |
|
124 { |
|
125 construct(rules, |
|
126 (UColAttributeValue)collationStrength, |
|
127 decompositionMode, |
|
128 status); |
|
129 } |
|
130 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
|
131 const RuleBasedCollator *base, |
|
132 UErrorCode &status) : |
|
133 dataIsOwned(TRUE), |
|
134 isWriteThroughAlias(FALSE) |
|
135 { |
|
136 ucollator = ucol_openBinary(bin, length, base->ucollator, &status); |
|
137 } |
|
138 |
|
139 void |
|
140 RuleBasedCollator::setRuleStringFromCollator() |
|
141 { |
|
142 int32_t length; |
|
143 const UChar *r = ucol_getRules(ucollator, &length); |
|
144 |
|
145 if (r && length > 0) { |
|
146 // alias the rules string |
|
147 urulestring.setTo(TRUE, r, length); |
|
148 } |
|
149 else { |
|
150 urulestring.truncate(0); // Clear string. |
|
151 } |
|
152 } |
|
153 |
|
154 // not aliasing, not write-through |
|
155 void |
|
156 RuleBasedCollator::construct(const UnicodeString& rules, |
|
157 UColAttributeValue collationStrength, |
|
158 UColAttributeValue decompositionMode, |
|
159 UErrorCode& status) |
|
160 { |
|
161 ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
|
162 decompositionMode, collationStrength, |
|
163 NULL, &status); |
|
164 |
|
165 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
|
166 isWriteThroughAlias = FALSE; |
|
167 |
|
168 if(ucollator == NULL) { |
|
169 if(U_SUCCESS(status)) { |
|
170 status = U_MEMORY_ALLOCATION_ERROR; |
|
171 } |
|
172 return; // Failure |
|
173 } |
|
174 |
|
175 setRuleStringFromCollator(); |
|
176 } |
|
177 |
|
178 /* RuleBasedCollator public destructor ----------------------------------- */ |
|
179 |
|
180 RuleBasedCollator::~RuleBasedCollator() |
|
181 { |
|
182 if (dataIsOwned) |
|
183 { |
|
184 ucol_close(ucollator); |
|
185 } |
|
186 ucollator = 0; |
|
187 } |
|
188 |
|
189 /* RuleBaseCollator public methods --------------------------------------- */ |
|
190 |
|
191 UBool RuleBasedCollator::operator==(const Collator& that) const |
|
192 { |
|
193 /* only checks for address equals here */ |
|
194 if (this == &that) { |
|
195 return TRUE; |
|
196 } |
|
197 if (!Collator::operator==(that)) { |
|
198 return FALSE; /* not the same class */ |
|
199 } |
|
200 |
|
201 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; |
|
202 |
|
203 return ucol_equals(this->ucollator, thatAlias.ucollator); |
|
204 } |
|
205 |
|
206 // aliasing, not write-through |
|
207 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) |
|
208 { |
|
209 if (this == &that) { return *this; } |
|
210 |
|
211 UErrorCode intStatus = U_ZERO_ERROR; |
|
212 UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus); |
|
213 if (U_FAILURE(intStatus)) { return *this; } |
|
214 |
|
215 if (dataIsOwned) { |
|
216 ucol_close(ucollator); |
|
217 } |
|
218 ucollator = ucol; |
|
219 dataIsOwned = TRUE; |
|
220 isWriteThroughAlias = FALSE; |
|
221 setRuleStringFromCollator(); |
|
222 return *this; |
|
223 } |
|
224 |
|
225 // aliasing, not write-through |
|
226 Collator* RuleBasedCollator::clone() const |
|
227 { |
|
228 RuleBasedCollator* coll = new RuleBasedCollator(*this); |
|
229 // There is a small chance that the internal ucol_safeClone() call fails. |
|
230 if (coll != NULL && coll->ucollator == NULL) { |
|
231 delete coll; |
|
232 return NULL; |
|
233 } |
|
234 return coll; |
|
235 } |
|
236 |
|
237 |
|
238 CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
|
239 (const UnicodeString& source) const |
|
240 { |
|
241 UErrorCode status = U_ZERO_ERROR; |
|
242 CollationElementIterator *result = new CollationElementIterator(source, this, |
|
243 status); |
|
244 if (U_FAILURE(status)) { |
|
245 delete result; |
|
246 return NULL; |
|
247 } |
|
248 |
|
249 return result; |
|
250 } |
|
251 |
|
252 /** |
|
253 * Create a CollationElementIterator object that will iterate over the |
|
254 * elements in a string, using the collation rules defined in this |
|
255 * RuleBasedCollator |
|
256 */ |
|
257 CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
|
258 (const CharacterIterator& source) const |
|
259 { |
|
260 UErrorCode status = U_ZERO_ERROR; |
|
261 CollationElementIterator *result = new CollationElementIterator(source, this, |
|
262 status); |
|
263 |
|
264 if (U_FAILURE(status)) { |
|
265 delete result; |
|
266 return NULL; |
|
267 } |
|
268 |
|
269 return result; |
|
270 } |
|
271 |
|
272 /** |
|
273 * Return a string representation of this collator's rules. The string can |
|
274 * later be passed to the constructor that takes a UnicodeString argument, |
|
275 * which will construct a collator that's functionally identical to this one. |
|
276 * You can also allow users to edit the string in order to change the collation |
|
277 * data, or you can print it out for inspection, or whatever. |
|
278 */ |
|
279 const UnicodeString& RuleBasedCollator::getRules() const |
|
280 { |
|
281 return urulestring; |
|
282 } |
|
283 |
|
284 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) |
|
285 { |
|
286 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); |
|
287 |
|
288 if (rulesize > 0) { |
|
289 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); |
|
290 if(rules != NULL) { |
|
291 ucol_getRulesEx(ucollator, delta, rules, rulesize); |
|
292 buffer.setTo(rules, rulesize); |
|
293 uprv_free(rules); |
|
294 } else { // couldn't allocate |
|
295 buffer.remove(); |
|
296 } |
|
297 } |
|
298 else { |
|
299 buffer.remove(); |
|
300 } |
|
301 } |
|
302 |
|
303 UnicodeSet * |
|
304 RuleBasedCollator::getTailoredSet(UErrorCode &status) const |
|
305 { |
|
306 if(U_FAILURE(status)) { |
|
307 return NULL; |
|
308 } |
|
309 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); |
|
310 } |
|
311 |
|
312 |
|
313 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const |
|
314 { |
|
315 if (versionInfo!=NULL){ |
|
316 ucol_getVersion(ucollator, versionInfo); |
|
317 } |
|
318 } |
|
319 |
|
320 /** |
|
321 * Compare two strings using this collator |
|
322 */ |
|
323 UCollationResult RuleBasedCollator::compare( |
|
324 const UnicodeString& source, |
|
325 const UnicodeString& target, |
|
326 int32_t length, |
|
327 UErrorCode &status) const |
|
328 { |
|
329 return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); |
|
330 } |
|
331 |
|
332 UCollationResult RuleBasedCollator::compare(const UChar* source, |
|
333 int32_t sourceLength, |
|
334 const UChar* target, |
|
335 int32_t targetLength, |
|
336 UErrorCode &status) const |
|
337 { |
|
338 if(U_SUCCESS(status)) { |
|
339 return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); |
|
340 } else { |
|
341 return UCOL_EQUAL; |
|
342 } |
|
343 } |
|
344 |
|
345 UCollationResult RuleBasedCollator::compare( |
|
346 const UnicodeString& source, |
|
347 const UnicodeString& target, |
|
348 UErrorCode &status) const |
|
349 { |
|
350 if(U_SUCCESS(status)) { |
|
351 return ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
|
352 target.getBuffer(), target.length()); |
|
353 } else { |
|
354 return UCOL_EQUAL; |
|
355 } |
|
356 } |
|
357 |
|
358 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, |
|
359 UCharIterator &tIter, |
|
360 UErrorCode &status) const { |
|
361 if(U_SUCCESS(status)) { |
|
362 return ucol_strcollIter(ucollator, &sIter, &tIter, &status); |
|
363 } else { |
|
364 return UCOL_EQUAL; |
|
365 } |
|
366 } |
|
367 |
|
368 /** |
|
369 * Retrieve a collation key for the specified string. The key can be compared |
|
370 * with other collation keys using a bitwise comparison (e.g. memcmp) to find |
|
371 * the ordering of their respective source strings. This is handy when doing a |
|
372 * sort, where each sort key must be compared many times. |
|
373 * |
|
374 * The basic algorithm here is to find all of the collation elements for each |
|
375 * character in the source string, convert them to an ASCII representation, and |
|
376 * put them into the collation key. But it's trickier than that. Each |
|
377 * collation element in a string has three components: primary ('A' vs 'B'), |
|
378 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference |
|
379 * at the end of a string takes precedence over a secondary or tertiary |
|
380 * difference earlier in the string. |
|
381 * |
|
382 * To account for this, we put all of the primary orders at the beginning of |
|
383 * the string, followed by the secondary and tertiary orders. Each set of |
|
384 * orders is terminated by nulls so that a key for a string which is a initial |
|
385 * substring of another key will compare less without any special case. |
|
386 * |
|
387 * Here's a hypothetical example, with the collation element represented as a |
|
388 * three-digit number, one digit for primary, one for secondary, etc. |
|
389 * |
|
390 * String: A a B \u00C9 |
|
391 * Collation Elements: 101 100 201 511 |
|
392 * Collation Key: 1125<null>0001<null>1011<null> |
|
393 * |
|
394 * To make things even trickier, secondary differences (accent marks) are |
|
395 * compared starting at the *end* of the string in languages with French |
|
396 * secondary ordering. But when comparing the accent marks on a single base |
|
397 * character, they are compared from the beginning. To handle this, we reverse |
|
398 * all of the accents that belong to each base character, then we reverse the |
|
399 * entire string of secondary orderings at the end. |
|
400 */ |
|
401 CollationKey& RuleBasedCollator::getCollationKey( |
|
402 const UnicodeString& source, |
|
403 CollationKey& sortkey, |
|
404 UErrorCode& status) const |
|
405 { |
|
406 return getCollationKey(source.getBuffer(), source.length(), sortkey, status); |
|
407 } |
|
408 |
|
409 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, |
|
410 int32_t sourceLen, |
|
411 CollationKey& sortkey, |
|
412 UErrorCode& status) const |
|
413 { |
|
414 if (U_FAILURE(status)) { |
|
415 return sortkey.setToBogus(); |
|
416 } |
|
417 if (sourceLen < -1 || (source == NULL && sourceLen != 0)) { |
|
418 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
419 return sortkey.setToBogus(); |
|
420 } |
|
421 |
|
422 if (sourceLen < 0) { |
|
423 sourceLen = u_strlen(source); |
|
424 } |
|
425 if (sourceLen == 0) { |
|
426 return sortkey.reset(); |
|
427 } |
|
428 |
|
429 int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status); |
|
430 |
|
431 if (U_SUCCESS(status)) { |
|
432 sortkey.setLength(resultLen); |
|
433 } else { |
|
434 sortkey.setToBogus(); |
|
435 } |
|
436 return sortkey; |
|
437 } |
|
438 |
|
439 /** |
|
440 * Return the maximum length of any expansion sequences that end with the |
|
441 * specified comparison order. |
|
442 * @param order a collation order returned by previous or next. |
|
443 * @return the maximum length of any expansion seuences ending with the |
|
444 * specified order or 1 if collation order does not occur at the end of any |
|
445 * expansion sequence. |
|
446 * @see CollationElementIterator#getMaxExpansion |
|
447 */ |
|
448 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const |
|
449 { |
|
450 uint8_t result; |
|
451 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); |
|
452 return result; |
|
453 } |
|
454 |
|
455 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, |
|
456 UErrorCode &status) |
|
457 { |
|
458 if (U_FAILURE(status)) { return NULL; } |
|
459 LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); |
|
460 if (buffer.isNull()) { |
|
461 status = U_MEMORY_ALLOCATION_ERROR; |
|
462 return NULL; |
|
463 } |
|
464 length = cloneBinary(buffer.getAlias(), 20000, status); |
|
465 if (status == U_BUFFER_OVERFLOW_ERROR) { |
|
466 if (buffer.allocateInsteadAndCopy(length, 0) == NULL) { |
|
467 status = U_MEMORY_ALLOCATION_ERROR; |
|
468 return NULL; |
|
469 } |
|
470 status = U_ZERO_ERROR; |
|
471 length = cloneBinary(buffer.getAlias(), length, status); |
|
472 } |
|
473 if (U_FAILURE(status)) { return NULL; } |
|
474 return buffer.orphan(); |
|
475 } |
|
476 |
|
477 |
|
478 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) |
|
479 { |
|
480 return ucol_cloneBinary(ucollator, buffer, capacity, &status); |
|
481 } |
|
482 |
|
483 void RuleBasedCollator::setAttribute(UColAttribute attr, |
|
484 UColAttributeValue value, |
|
485 UErrorCode &status) |
|
486 { |
|
487 if (U_FAILURE(status)) |
|
488 return; |
|
489 checkOwned(); |
|
490 ucol_setAttribute(ucollator, attr, value, &status); |
|
491 } |
|
492 |
|
493 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, |
|
494 UErrorCode &status) const |
|
495 { |
|
496 if (U_FAILURE(status)) |
|
497 return UCOL_DEFAULT; |
|
498 return ucol_getAttribute(ucollator, attr, &status); |
|
499 } |
|
500 |
|
501 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { |
|
502 checkOwned(); |
|
503 return ucol_setVariableTop(ucollator, varTop, len, &status); |
|
504 } |
|
505 |
|
506 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) { |
|
507 checkOwned(); |
|
508 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); |
|
509 } |
|
510 |
|
511 void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) { |
|
512 checkOwned(); |
|
513 ucol_restoreVariableTop(ucollator, varTop, &status); |
|
514 } |
|
515 |
|
516 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { |
|
517 return ucol_getVariableTop(ucollator, &status); |
|
518 } |
|
519 |
|
520 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, |
|
521 uint8_t *result, int32_t resultLength) |
|
522 const |
|
523 { |
|
524 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); |
|
525 } |
|
526 |
|
527 int32_t RuleBasedCollator::getSortKey(const UChar *source, |
|
528 int32_t sourceLength, uint8_t *result, |
|
529 int32_t resultLength) const |
|
530 { |
|
531 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); |
|
532 } |
|
533 |
|
534 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, |
|
535 int32_t destCapacity, |
|
536 UErrorCode& status) const |
|
537 { |
|
538 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); |
|
539 } |
|
540 |
|
541 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, |
|
542 int32_t reorderCodesLength, |
|
543 UErrorCode& status) |
|
544 { |
|
545 checkOwned(); |
|
546 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); |
|
547 } |
|
548 |
|
549 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode, |
|
550 int32_t* dest, |
|
551 int32_t destCapacity, |
|
552 UErrorCode& status) |
|
553 { |
|
554 return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status); |
|
555 } |
|
556 |
|
557 /** |
|
558 * Create a hash code for this collation. Just hash the main rule table -- that |
|
559 * should be good enough for almost any use. |
|
560 */ |
|
561 int32_t RuleBasedCollator::hashCode() const |
|
562 { |
|
563 int32_t length; |
|
564 const UChar *rules = ucol_getRules(ucollator, &length); |
|
565 return ustr_hashUCharsN(rules, length); |
|
566 } |
|
567 |
|
568 /** |
|
569 * return the locale of this collator |
|
570 */ |
|
571 Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { |
|
572 const char *result = ucol_getLocaleByType(ucollator, type, &status); |
|
573 if(result == NULL) { |
|
574 Locale res(""); |
|
575 res.setToBogus(); |
|
576 return res; |
|
577 } else { |
|
578 return Locale(result); |
|
579 } |
|
580 } |
|
581 |
|
582 void |
|
583 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { |
|
584 checkOwned(); |
|
585 char* rloc = uprv_strdup(requestedLocale.getName()); |
|
586 if (rloc) { |
|
587 char* vloc = uprv_strdup(validLocale.getName()); |
|
588 if (vloc) { |
|
589 char* aloc = uprv_strdup(actualLocale.getName()); |
|
590 if (aloc) { |
|
591 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); |
|
592 return; |
|
593 } |
|
594 uprv_free(vloc); |
|
595 } |
|
596 uprv_free(rloc); |
|
597 } |
|
598 } |
|
599 |
|
600 // RuleBaseCollatorNew private constructor ---------------------------------- |
|
601 |
|
602 RuleBasedCollator::RuleBasedCollator() |
|
603 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
|
604 { |
|
605 } |
|
606 |
|
607 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, |
|
608 UErrorCode& status) |
|
609 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
|
610 { |
|
611 if (U_FAILURE(status)) |
|
612 return; |
|
613 |
|
614 /* |
|
615 Try to load, in order: |
|
616 1. The desired locale's collation. |
|
617 2. A fallback of the desired locale. |
|
618 3. The default locale's collation. |
|
619 4. A fallback of the default locale. |
|
620 5. The default collation rules, which contains en_US collation rules. |
|
621 |
|
622 To reiterate, we try: |
|
623 Specific: |
|
624 language+country+variant |
|
625 language+country |
|
626 language |
|
627 Default: |
|
628 language+country+variant |
|
629 language+country |
|
630 language |
|
631 Root: (aka DEFAULTRULES) |
|
632 steps 1-5 are handled by resource bundle fallback mechanism. |
|
633 however, in a very unprobable situation that no resource bundle |
|
634 data exists, step 5 is repeated with hardcoded default rules. |
|
635 */ |
|
636 |
|
637 setUCollator(desiredLocale, status); |
|
638 |
|
639 if (U_FAILURE(status)) |
|
640 { |
|
641 status = U_ZERO_ERROR; |
|
642 |
|
643 setUCollator(kRootLocaleName, status); |
|
644 if (status == U_ZERO_ERROR) { |
|
645 status = U_USING_DEFAULT_WARNING; |
|
646 } |
|
647 } |
|
648 |
|
649 if (U_SUCCESS(status)) |
|
650 { |
|
651 setRuleStringFromCollator(); |
|
652 } |
|
653 } |
|
654 |
|
655 void |
|
656 RuleBasedCollator::setUCollator(const char *locale, |
|
657 UErrorCode &status) |
|
658 { |
|
659 if (U_FAILURE(status)) { |
|
660 return; |
|
661 } |
|
662 if (ucollator && dataIsOwned) |
|
663 ucol_close(ucollator); |
|
664 ucollator = ucol_open_internal(locale, &status); |
|
665 dataIsOwned = TRUE; |
|
666 isWriteThroughAlias = FALSE; |
|
667 } |
|
668 |
|
669 |
|
670 void |
|
671 RuleBasedCollator::checkOwned() { |
|
672 if (!(dataIsOwned || isWriteThroughAlias)) { |
|
673 UErrorCode status = U_ZERO_ERROR; |
|
674 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); |
|
675 setRuleStringFromCollator(); |
|
676 dataIsOwned = TRUE; |
|
677 isWriteThroughAlias = FALSE; |
|
678 } |
|
679 } |
|
680 |
|
681 |
|
682 int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale, |
|
683 char *buffer, |
|
684 int32_t capacity, |
|
685 UErrorCode &status) const { |
|
686 /* simply delegate */ |
|
687 return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status); |
|
688 } |
|
689 |
|
690 |
|
691 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
|
692 |
|
693 U_NAMESPACE_END |
|
694 |
|
695 #endif /* #if !UCONFIG_NO_COLLATION */ |