1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/tblcoll.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,695 @@ 1.4 +/* 1.5 + ****************************************************************************** 1.6 + * Copyright (C) 1996-2013, International Business Machines Corporation and 1.7 + * others. All Rights Reserved. 1.8 + ****************************************************************************** 1.9 + */ 1.10 + 1.11 +/** 1.12 + * File tblcoll.cpp 1.13 + * 1.14 + * Created by: Helena Shih 1.15 + * 1.16 + * Modification History: 1.17 + * 1.18 + * Date Name Description 1.19 + * 2/5/97 aliu Added streamIn and streamOut methods. Added 1.20 + * constructor which reads RuleBasedCollator object from 1.21 + * a binary file. Added writeToFile method which streams 1.22 + * RuleBasedCollator out to a binary file. The streamIn 1.23 + * and streamOut methods use istream and ostream objects 1.24 + * in binary mode. 1.25 + * 2/11/97 aliu Moved declarations out of for loop initializer. 1.26 + * Added Mac compatibility #ifdef for ios::nocreate. 1.27 + * 2/12/97 aliu Modified to use TableCollationData sub-object to 1.28 + * hold invariant data. 1.29 + * 2/13/97 aliu Moved several methods into this class from Collation. 1.30 + * Added a private RuleBasedCollator(Locale&) constructor, 1.31 + * to be used by Collator::getInstance(). General 1.32 + * clean up. Made use of UErrorCode variables consistent. 1.33 + * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy 1.34 + * constructor and getDynamicClassID. 1.35 + * 3/5/97 aliu Changed compaction cycle to improve performance. We 1.36 + * use the maximum allowable value which is kBlockCount. 1.37 + * Modified getRules() to load rules dynamically. Changed 1.38 + * constructFromFile() call to accomodate this (added 1.39 + * parameter to specify whether binary loading is to 1.40 + * take place). 1.41 + * 05/06/97 helena Added memory allocation error check. 1.42 + * 6/20/97 helena Java class name change. 1.43 + * 6/23/97 helena Adding comments to make code more readable. 1.44 + * 09/03/97 helena Added createCollationKeyValues(). 1.45 + * 06/26/98 erm Changes for CollationKeys using byte arrays. 1.46 + * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java 1.47 + * 04/23/99 stephen Removed EDecompositionMode, merged with 1.48 + * Normalizer::EMode 1.49 + * 06/14/99 stephen Removed kResourceBundleSuffix 1.50 + * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx 1.51 + * files are no longer used. 1.52 + * 11/02/99 helena Collator performance enhancements. Special case 1.53 + * for NO_OP situations. 1.54 + * 11/17/99 srl More performance enhancements. Inlined some internal functions. 1.55 + * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator 1.56 + * to implementation file. 1.57 + * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) 1.58 + */ 1.59 + 1.60 +#include "unicode/utypes.h" 1.61 + 1.62 +#if !UCONFIG_NO_COLLATION 1.63 + 1.64 +#include "unicode/tblcoll.h" 1.65 +#include "unicode/coleitr.h" 1.66 +#include "unicode/ures.h" 1.67 +#include "unicode/uset.h" 1.68 +#include "ucol_imp.h" 1.69 +#include "uresimp.h" 1.70 +#include "uhash.h" 1.71 +#include "cmemory.h" 1.72 +#include "cstring.h" 1.73 +#include "putilimp.h" 1.74 +#include "ustr_imp.h" 1.75 + 1.76 +/* public RuleBasedCollator constructor ---------------------------------- */ 1.77 + 1.78 +U_NAMESPACE_BEGIN 1.79 + 1.80 +/** 1.81 +* Copy constructor, aliasing, not write-through 1.82 +*/ 1.83 +RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) 1.84 +: Collator(that) 1.85 +, dataIsOwned(FALSE) 1.86 +, isWriteThroughAlias(FALSE) 1.87 +, ucollator(NULL) 1.88 +{ 1.89 + RuleBasedCollator::operator=(that); 1.90 +} 1.91 + 1.92 +RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, 1.93 + UErrorCode& status) : 1.94 +dataIsOwned(FALSE) 1.95 +{ 1.96 + construct(rules, 1.97 + UCOL_DEFAULT_STRENGTH, 1.98 + UCOL_DEFAULT, 1.99 + status); 1.100 +} 1.101 + 1.102 +RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, 1.103 + ECollationStrength collationStrength, 1.104 + UErrorCode& status) : dataIsOwned(FALSE) 1.105 +{ 1.106 + construct(rules, 1.107 + (UColAttributeValue)collationStrength, 1.108 + UCOL_DEFAULT, 1.109 + status); 1.110 +} 1.111 + 1.112 +RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, 1.113 + UColAttributeValue decompositionMode, 1.114 + UErrorCode& status) : 1.115 +dataIsOwned(FALSE) 1.116 +{ 1.117 + construct(rules, 1.118 + UCOL_DEFAULT_STRENGTH, 1.119 + decompositionMode, 1.120 + status); 1.121 +} 1.122 + 1.123 +RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, 1.124 + ECollationStrength collationStrength, 1.125 + UColAttributeValue decompositionMode, 1.126 + UErrorCode& status) : dataIsOwned(FALSE) 1.127 +{ 1.128 + construct(rules, 1.129 + (UColAttributeValue)collationStrength, 1.130 + decompositionMode, 1.131 + status); 1.132 +} 1.133 +RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, 1.134 + const RuleBasedCollator *base, 1.135 + UErrorCode &status) : 1.136 +dataIsOwned(TRUE), 1.137 +isWriteThroughAlias(FALSE) 1.138 +{ 1.139 + ucollator = ucol_openBinary(bin, length, base->ucollator, &status); 1.140 +} 1.141 + 1.142 +void 1.143 +RuleBasedCollator::setRuleStringFromCollator() 1.144 +{ 1.145 + int32_t length; 1.146 + const UChar *r = ucol_getRules(ucollator, &length); 1.147 + 1.148 + if (r && length > 0) { 1.149 + // alias the rules string 1.150 + urulestring.setTo(TRUE, r, length); 1.151 + } 1.152 + else { 1.153 + urulestring.truncate(0); // Clear string. 1.154 + } 1.155 +} 1.156 + 1.157 +// not aliasing, not write-through 1.158 +void 1.159 +RuleBasedCollator::construct(const UnicodeString& rules, 1.160 + UColAttributeValue collationStrength, 1.161 + UColAttributeValue decompositionMode, 1.162 + UErrorCode& status) 1.163 +{ 1.164 + ucollator = ucol_openRules(rules.getBuffer(), rules.length(), 1.165 + decompositionMode, collationStrength, 1.166 + NULL, &status); 1.167 + 1.168 + dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it 1.169 + isWriteThroughAlias = FALSE; 1.170 + 1.171 + if(ucollator == NULL) { 1.172 + if(U_SUCCESS(status)) { 1.173 + status = U_MEMORY_ALLOCATION_ERROR; 1.174 + } 1.175 + return; // Failure 1.176 + } 1.177 + 1.178 + setRuleStringFromCollator(); 1.179 +} 1.180 + 1.181 +/* RuleBasedCollator public destructor ----------------------------------- */ 1.182 + 1.183 +RuleBasedCollator::~RuleBasedCollator() 1.184 +{ 1.185 + if (dataIsOwned) 1.186 + { 1.187 + ucol_close(ucollator); 1.188 + } 1.189 + ucollator = 0; 1.190 +} 1.191 + 1.192 +/* RuleBaseCollator public methods --------------------------------------- */ 1.193 + 1.194 +UBool RuleBasedCollator::operator==(const Collator& that) const 1.195 +{ 1.196 + /* only checks for address equals here */ 1.197 + if (this == &that) { 1.198 + return TRUE; 1.199 + } 1.200 + if (!Collator::operator==(that)) { 1.201 + return FALSE; /* not the same class */ 1.202 + } 1.203 + 1.204 + RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; 1.205 + 1.206 + return ucol_equals(this->ucollator, thatAlias.ucollator); 1.207 +} 1.208 + 1.209 +// aliasing, not write-through 1.210 +RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) 1.211 +{ 1.212 + if (this == &that) { return *this; } 1.213 + 1.214 + UErrorCode intStatus = U_ZERO_ERROR; 1.215 + UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus); 1.216 + if (U_FAILURE(intStatus)) { return *this; } 1.217 + 1.218 + if (dataIsOwned) { 1.219 + ucol_close(ucollator); 1.220 + } 1.221 + ucollator = ucol; 1.222 + dataIsOwned = TRUE; 1.223 + isWriteThroughAlias = FALSE; 1.224 + setRuleStringFromCollator(); 1.225 + return *this; 1.226 +} 1.227 + 1.228 +// aliasing, not write-through 1.229 +Collator* RuleBasedCollator::clone() const 1.230 +{ 1.231 + RuleBasedCollator* coll = new RuleBasedCollator(*this); 1.232 + // There is a small chance that the internal ucol_safeClone() call fails. 1.233 + if (coll != NULL && coll->ucollator == NULL) { 1.234 + delete coll; 1.235 + return NULL; 1.236 + } 1.237 + return coll; 1.238 +} 1.239 + 1.240 + 1.241 +CollationElementIterator* RuleBasedCollator::createCollationElementIterator 1.242 + (const UnicodeString& source) const 1.243 +{ 1.244 + UErrorCode status = U_ZERO_ERROR; 1.245 + CollationElementIterator *result = new CollationElementIterator(source, this, 1.246 + status); 1.247 + if (U_FAILURE(status)) { 1.248 + delete result; 1.249 + return NULL; 1.250 + } 1.251 + 1.252 + return result; 1.253 +} 1.254 + 1.255 +/** 1.256 +* Create a CollationElementIterator object that will iterate over the 1.257 +* elements in a string, using the collation rules defined in this 1.258 +* RuleBasedCollator 1.259 +*/ 1.260 +CollationElementIterator* RuleBasedCollator::createCollationElementIterator 1.261 + (const CharacterIterator& source) const 1.262 +{ 1.263 + UErrorCode status = U_ZERO_ERROR; 1.264 + CollationElementIterator *result = new CollationElementIterator(source, this, 1.265 + status); 1.266 + 1.267 + if (U_FAILURE(status)) { 1.268 + delete result; 1.269 + return NULL; 1.270 + } 1.271 + 1.272 + return result; 1.273 +} 1.274 + 1.275 +/** 1.276 +* Return a string representation of this collator's rules. The string can 1.277 +* later be passed to the constructor that takes a UnicodeString argument, 1.278 +* which will construct a collator that's functionally identical to this one. 1.279 +* You can also allow users to edit the string in order to change the collation 1.280 +* data, or you can print it out for inspection, or whatever. 1.281 +*/ 1.282 +const UnicodeString& RuleBasedCollator::getRules() const 1.283 +{ 1.284 + return urulestring; 1.285 +} 1.286 + 1.287 +void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) 1.288 +{ 1.289 + int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); 1.290 + 1.291 + if (rulesize > 0) { 1.292 + UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); 1.293 + if(rules != NULL) { 1.294 + ucol_getRulesEx(ucollator, delta, rules, rulesize); 1.295 + buffer.setTo(rules, rulesize); 1.296 + uprv_free(rules); 1.297 + } else { // couldn't allocate 1.298 + buffer.remove(); 1.299 + } 1.300 + } 1.301 + else { 1.302 + buffer.remove(); 1.303 + } 1.304 +} 1.305 + 1.306 +UnicodeSet * 1.307 +RuleBasedCollator::getTailoredSet(UErrorCode &status) const 1.308 +{ 1.309 + if(U_FAILURE(status)) { 1.310 + return NULL; 1.311 + } 1.312 + return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); 1.313 +} 1.314 + 1.315 + 1.316 +void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const 1.317 +{ 1.318 + if (versionInfo!=NULL){ 1.319 + ucol_getVersion(ucollator, versionInfo); 1.320 + } 1.321 +} 1.322 + 1.323 +/** 1.324 +* Compare two strings using this collator 1.325 +*/ 1.326 +UCollationResult RuleBasedCollator::compare( 1.327 + const UnicodeString& source, 1.328 + const UnicodeString& target, 1.329 + int32_t length, 1.330 + UErrorCode &status) const 1.331 +{ 1.332 + return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status); 1.333 +} 1.334 + 1.335 +UCollationResult RuleBasedCollator::compare(const UChar* source, 1.336 + int32_t sourceLength, 1.337 + const UChar* target, 1.338 + int32_t targetLength, 1.339 + UErrorCode &status) const 1.340 +{ 1.341 + if(U_SUCCESS(status)) { 1.342 + return ucol_strcoll(ucollator, source, sourceLength, target, targetLength); 1.343 + } else { 1.344 + return UCOL_EQUAL; 1.345 + } 1.346 +} 1.347 + 1.348 +UCollationResult RuleBasedCollator::compare( 1.349 + const UnicodeString& source, 1.350 + const UnicodeString& target, 1.351 + UErrorCode &status) const 1.352 +{ 1.353 + if(U_SUCCESS(status)) { 1.354 + return ucol_strcoll(ucollator, source.getBuffer(), source.length(), 1.355 + target.getBuffer(), target.length()); 1.356 + } else { 1.357 + return UCOL_EQUAL; 1.358 + } 1.359 +} 1.360 + 1.361 +UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, 1.362 + UCharIterator &tIter, 1.363 + UErrorCode &status) const { 1.364 + if(U_SUCCESS(status)) { 1.365 + return ucol_strcollIter(ucollator, &sIter, &tIter, &status); 1.366 + } else { 1.367 + return UCOL_EQUAL; 1.368 + } 1.369 +} 1.370 + 1.371 +/** 1.372 +* Retrieve a collation key for the specified string. The key can be compared 1.373 +* with other collation keys using a bitwise comparison (e.g. memcmp) to find 1.374 +* the ordering of their respective source strings. This is handy when doing a 1.375 +* sort, where each sort key must be compared many times. 1.376 +* 1.377 +* The basic algorithm here is to find all of the collation elements for each 1.378 +* character in the source string, convert them to an ASCII representation, and 1.379 +* put them into the collation key. But it's trickier than that. Each 1.380 +* collation element in a string has three components: primary ('A' vs 'B'), 1.381 +* secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference 1.382 +* at the end of a string takes precedence over a secondary or tertiary 1.383 +* difference earlier in the string. 1.384 +* 1.385 +* To account for this, we put all of the primary orders at the beginning of 1.386 +* the string, followed by the secondary and tertiary orders. Each set of 1.387 +* orders is terminated by nulls so that a key for a string which is a initial 1.388 +* substring of another key will compare less without any special case. 1.389 +* 1.390 +* Here's a hypothetical example, with the collation element represented as a 1.391 +* three-digit number, one digit for primary, one for secondary, etc. 1.392 +* 1.393 +* String: A a B \u00C9 1.394 +* Collation Elements: 101 100 201 511 1.395 +* Collation Key: 1125<null>0001<null>1011<null> 1.396 +* 1.397 +* To make things even trickier, secondary differences (accent marks) are 1.398 +* compared starting at the *end* of the string in languages with French 1.399 +* secondary ordering. But when comparing the accent marks on a single base 1.400 +* character, they are compared from the beginning. To handle this, we reverse 1.401 +* all of the accents that belong to each base character, then we reverse the 1.402 +* entire string of secondary orderings at the end. 1.403 +*/ 1.404 +CollationKey& RuleBasedCollator::getCollationKey( 1.405 + const UnicodeString& source, 1.406 + CollationKey& sortkey, 1.407 + UErrorCode& status) const 1.408 +{ 1.409 + return getCollationKey(source.getBuffer(), source.length(), sortkey, status); 1.410 +} 1.411 + 1.412 +CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, 1.413 + int32_t sourceLen, 1.414 + CollationKey& sortkey, 1.415 + UErrorCode& status) const 1.416 +{ 1.417 + if (U_FAILURE(status)) { 1.418 + return sortkey.setToBogus(); 1.419 + } 1.420 + if (sourceLen < -1 || (source == NULL && sourceLen != 0)) { 1.421 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.422 + return sortkey.setToBogus(); 1.423 + } 1.424 + 1.425 + if (sourceLen < 0) { 1.426 + sourceLen = u_strlen(source); 1.427 + } 1.428 + if (sourceLen == 0) { 1.429 + return sortkey.reset(); 1.430 + } 1.431 + 1.432 + int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status); 1.433 + 1.434 + if (U_SUCCESS(status)) { 1.435 + sortkey.setLength(resultLen); 1.436 + } else { 1.437 + sortkey.setToBogus(); 1.438 + } 1.439 + return sortkey; 1.440 +} 1.441 + 1.442 +/** 1.443 + * Return the maximum length of any expansion sequences that end with the 1.444 + * specified comparison order. 1.445 + * @param order a collation order returned by previous or next. 1.446 + * @return the maximum length of any expansion seuences ending with the 1.447 + * specified order or 1 if collation order does not occur at the end of any 1.448 + * expansion sequence. 1.449 + * @see CollationElementIterator#getMaxExpansion 1.450 + */ 1.451 +int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const 1.452 +{ 1.453 + uint8_t result; 1.454 + UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); 1.455 + return result; 1.456 +} 1.457 + 1.458 +uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, 1.459 + UErrorCode &status) 1.460 +{ 1.461 + if (U_FAILURE(status)) { return NULL; } 1.462 + LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); 1.463 + if (buffer.isNull()) { 1.464 + status = U_MEMORY_ALLOCATION_ERROR; 1.465 + return NULL; 1.466 + } 1.467 + length = cloneBinary(buffer.getAlias(), 20000, status); 1.468 + if (status == U_BUFFER_OVERFLOW_ERROR) { 1.469 + if (buffer.allocateInsteadAndCopy(length, 0) == NULL) { 1.470 + status = U_MEMORY_ALLOCATION_ERROR; 1.471 + return NULL; 1.472 + } 1.473 + status = U_ZERO_ERROR; 1.474 + length = cloneBinary(buffer.getAlias(), length, status); 1.475 + } 1.476 + if (U_FAILURE(status)) { return NULL; } 1.477 + return buffer.orphan(); 1.478 +} 1.479 + 1.480 + 1.481 +int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) 1.482 +{ 1.483 + return ucol_cloneBinary(ucollator, buffer, capacity, &status); 1.484 +} 1.485 + 1.486 +void RuleBasedCollator::setAttribute(UColAttribute attr, 1.487 + UColAttributeValue value, 1.488 + UErrorCode &status) 1.489 +{ 1.490 + if (U_FAILURE(status)) 1.491 + return; 1.492 + checkOwned(); 1.493 + ucol_setAttribute(ucollator, attr, value, &status); 1.494 +} 1.495 + 1.496 +UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, 1.497 + UErrorCode &status) const 1.498 +{ 1.499 + if (U_FAILURE(status)) 1.500 + return UCOL_DEFAULT; 1.501 + return ucol_getAttribute(ucollator, attr, &status); 1.502 +} 1.503 + 1.504 +uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) { 1.505 + checkOwned(); 1.506 + return ucol_setVariableTop(ucollator, varTop, len, &status); 1.507 +} 1.508 + 1.509 +uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) { 1.510 + checkOwned(); 1.511 + return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status); 1.512 +} 1.513 + 1.514 +void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) { 1.515 + checkOwned(); 1.516 + ucol_restoreVariableTop(ucollator, varTop, &status); 1.517 +} 1.518 + 1.519 +uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { 1.520 + return ucol_getVariableTop(ucollator, &status); 1.521 +} 1.522 + 1.523 +int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, 1.524 + uint8_t *result, int32_t resultLength) 1.525 + const 1.526 +{ 1.527 + return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength); 1.528 +} 1.529 + 1.530 +int32_t RuleBasedCollator::getSortKey(const UChar *source, 1.531 + int32_t sourceLength, uint8_t *result, 1.532 + int32_t resultLength) const 1.533 +{ 1.534 + return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength); 1.535 +} 1.536 + 1.537 +int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, 1.538 + int32_t destCapacity, 1.539 + UErrorCode& status) const 1.540 +{ 1.541 + return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); 1.542 +} 1.543 + 1.544 +void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, 1.545 + int32_t reorderCodesLength, 1.546 + UErrorCode& status) 1.547 +{ 1.548 + checkOwned(); 1.549 + ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); 1.550 +} 1.551 + 1.552 +int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode, 1.553 + int32_t* dest, 1.554 + int32_t destCapacity, 1.555 + UErrorCode& status) 1.556 +{ 1.557 + return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status); 1.558 +} 1.559 + 1.560 +/** 1.561 +* Create a hash code for this collation. Just hash the main rule table -- that 1.562 +* should be good enough for almost any use. 1.563 +*/ 1.564 +int32_t RuleBasedCollator::hashCode() const 1.565 +{ 1.566 + int32_t length; 1.567 + const UChar *rules = ucol_getRules(ucollator, &length); 1.568 + return ustr_hashUCharsN(rules, length); 1.569 +} 1.570 + 1.571 +/** 1.572 +* return the locale of this collator 1.573 +*/ 1.574 +Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const { 1.575 + const char *result = ucol_getLocaleByType(ucollator, type, &status); 1.576 + if(result == NULL) { 1.577 + Locale res(""); 1.578 + res.setToBogus(); 1.579 + return res; 1.580 + } else { 1.581 + return Locale(result); 1.582 + } 1.583 +} 1.584 + 1.585 +void 1.586 +RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) { 1.587 + checkOwned(); 1.588 + char* rloc = uprv_strdup(requestedLocale.getName()); 1.589 + if (rloc) { 1.590 + char* vloc = uprv_strdup(validLocale.getName()); 1.591 + if (vloc) { 1.592 + char* aloc = uprv_strdup(actualLocale.getName()); 1.593 + if (aloc) { 1.594 + ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); 1.595 + return; 1.596 + } 1.597 + uprv_free(vloc); 1.598 + } 1.599 + uprv_free(rloc); 1.600 + } 1.601 +} 1.602 + 1.603 +// RuleBaseCollatorNew private constructor ---------------------------------- 1.604 + 1.605 +RuleBasedCollator::RuleBasedCollator() 1.606 + : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) 1.607 +{ 1.608 +} 1.609 + 1.610 +RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, 1.611 + UErrorCode& status) 1.612 + : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) 1.613 +{ 1.614 + if (U_FAILURE(status)) 1.615 + return; 1.616 + 1.617 + /* 1.618 + Try to load, in order: 1.619 + 1. The desired locale's collation. 1.620 + 2. A fallback of the desired locale. 1.621 + 3. The default locale's collation. 1.622 + 4. A fallback of the default locale. 1.623 + 5. The default collation rules, which contains en_US collation rules. 1.624 + 1.625 + To reiterate, we try: 1.626 + Specific: 1.627 + language+country+variant 1.628 + language+country 1.629 + language 1.630 + Default: 1.631 + language+country+variant 1.632 + language+country 1.633 + language 1.634 + Root: (aka DEFAULTRULES) 1.635 + steps 1-5 are handled by resource bundle fallback mechanism. 1.636 + however, in a very unprobable situation that no resource bundle 1.637 + data exists, step 5 is repeated with hardcoded default rules. 1.638 + */ 1.639 + 1.640 + setUCollator(desiredLocale, status); 1.641 + 1.642 + if (U_FAILURE(status)) 1.643 + { 1.644 + status = U_ZERO_ERROR; 1.645 + 1.646 + setUCollator(kRootLocaleName, status); 1.647 + if (status == U_ZERO_ERROR) { 1.648 + status = U_USING_DEFAULT_WARNING; 1.649 + } 1.650 + } 1.651 + 1.652 + if (U_SUCCESS(status)) 1.653 + { 1.654 + setRuleStringFromCollator(); 1.655 + } 1.656 +} 1.657 + 1.658 +void 1.659 +RuleBasedCollator::setUCollator(const char *locale, 1.660 + UErrorCode &status) 1.661 +{ 1.662 + if (U_FAILURE(status)) { 1.663 + return; 1.664 + } 1.665 + if (ucollator && dataIsOwned) 1.666 + ucol_close(ucollator); 1.667 + ucollator = ucol_open_internal(locale, &status); 1.668 + dataIsOwned = TRUE; 1.669 + isWriteThroughAlias = FALSE; 1.670 +} 1.671 + 1.672 + 1.673 +void 1.674 +RuleBasedCollator::checkOwned() { 1.675 + if (!(dataIsOwned || isWriteThroughAlias)) { 1.676 + UErrorCode status = U_ZERO_ERROR; 1.677 + ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); 1.678 + setRuleStringFromCollator(); 1.679 + dataIsOwned = TRUE; 1.680 + isWriteThroughAlias = FALSE; 1.681 + } 1.682 +} 1.683 + 1.684 + 1.685 +int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale, 1.686 + char *buffer, 1.687 + int32_t capacity, 1.688 + UErrorCode &status) const { 1.689 + /* simply delegate */ 1.690 + return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status); 1.691 +} 1.692 + 1.693 + 1.694 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) 1.695 + 1.696 +U_NAMESPACE_END 1.697 + 1.698 +#endif /* #if !UCONFIG_NO_COLLATION */