intl/icu/source/i18n/tblcoll.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 * Copyright (C) 1996-2013, International Business Machines Corporation and
michael@0 4 * others. All Rights Reserved.
michael@0 5 ******************************************************************************
michael@0 6 */
michael@0 7
michael@0 8 /**
michael@0 9 * File tblcoll.cpp
michael@0 10 *
michael@0 11 * Created by: Helena Shih
michael@0 12 *
michael@0 13 * Modification History:
michael@0 14 *
michael@0 15 * Date Name Description
michael@0 16 * 2/5/97 aliu Added streamIn and streamOut methods. Added
michael@0 17 * constructor which reads RuleBasedCollator object from
michael@0 18 * a binary file. Added writeToFile method which streams
michael@0 19 * RuleBasedCollator out to a binary file. The streamIn
michael@0 20 * and streamOut methods use istream and ostream objects
michael@0 21 * in binary mode.
michael@0 22 * 2/11/97 aliu Moved declarations out of for loop initializer.
michael@0 23 * Added Mac compatibility #ifdef for ios::nocreate.
michael@0 24 * 2/12/97 aliu Modified to use TableCollationData sub-object to
michael@0 25 * hold invariant data.
michael@0 26 * 2/13/97 aliu Moved several methods into this class from Collation.
michael@0 27 * Added a private RuleBasedCollator(Locale&) constructor,
michael@0 28 * to be used by Collator::getInstance(). General
michael@0 29 * clean up. Made use of UErrorCode variables consistent.
michael@0 30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
michael@0 31 * constructor and getDynamicClassID.
michael@0 32 * 3/5/97 aliu Changed compaction cycle to improve performance. We
michael@0 33 * use the maximum allowable value which is kBlockCount.
michael@0 34 * Modified getRules() to load rules dynamically. Changed
michael@0 35 * constructFromFile() call to accomodate this (added
michael@0 36 * parameter to specify whether binary loading is to
michael@0 37 * take place).
michael@0 38 * 05/06/97 helena Added memory allocation error check.
michael@0 39 * 6/20/97 helena Java class name change.
michael@0 40 * 6/23/97 helena Adding comments to make code more readable.
michael@0 41 * 09/03/97 helena Added createCollationKeyValues().
michael@0 42 * 06/26/98 erm Changes for CollationKeys using byte arrays.
michael@0 43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
michael@0 44 * 04/23/99 stephen Removed EDecompositionMode, merged with
michael@0 45 * Normalizer::EMode
michael@0 46 * 06/14/99 stephen Removed kResourceBundleSuffix
michael@0 47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
michael@0 48 * files are no longer used.
michael@0 49 * 11/02/99 helena Collator performance enhancements. Special case
michael@0 50 * for NO_OP situations.
michael@0 51 * 11/17/99 srl More performance enhancements. Inlined some internal functions.
michael@0 52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
michael@0 53 * to implementation file.
michael@0 54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
michael@0 55 */
michael@0 56
michael@0 57 #include "unicode/utypes.h"
michael@0 58
michael@0 59 #if !UCONFIG_NO_COLLATION
michael@0 60
michael@0 61 #include "unicode/tblcoll.h"
michael@0 62 #include "unicode/coleitr.h"
michael@0 63 #include "unicode/ures.h"
michael@0 64 #include "unicode/uset.h"
michael@0 65 #include "ucol_imp.h"
michael@0 66 #include "uresimp.h"
michael@0 67 #include "uhash.h"
michael@0 68 #include "cmemory.h"
michael@0 69 #include "cstring.h"
michael@0 70 #include "putilimp.h"
michael@0 71 #include "ustr_imp.h"
michael@0 72
michael@0 73 /* public RuleBasedCollator constructor ---------------------------------- */
michael@0 74
michael@0 75 U_NAMESPACE_BEGIN
michael@0 76
michael@0 77 /**
michael@0 78 * Copy constructor, aliasing, not write-through
michael@0 79 */
michael@0 80 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
michael@0 81 : Collator(that)
michael@0 82 , dataIsOwned(FALSE)
michael@0 83 , isWriteThroughAlias(FALSE)
michael@0 84 , ucollator(NULL)
michael@0 85 {
michael@0 86 RuleBasedCollator::operator=(that);
michael@0 87 }
michael@0 88
michael@0 89 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
michael@0 90 UErrorCode& status) :
michael@0 91 dataIsOwned(FALSE)
michael@0 92 {
michael@0 93 construct(rules,
michael@0 94 UCOL_DEFAULT_STRENGTH,
michael@0 95 UCOL_DEFAULT,
michael@0 96 status);
michael@0 97 }
michael@0 98
michael@0 99 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
michael@0 100 ECollationStrength collationStrength,
michael@0 101 UErrorCode& status) : dataIsOwned(FALSE)
michael@0 102 {
michael@0 103 construct(rules,
michael@0 104 (UColAttributeValue)collationStrength,
michael@0 105 UCOL_DEFAULT,
michael@0 106 status);
michael@0 107 }
michael@0 108
michael@0 109 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
michael@0 110 UColAttributeValue decompositionMode,
michael@0 111 UErrorCode& status) :
michael@0 112 dataIsOwned(FALSE)
michael@0 113 {
michael@0 114 construct(rules,
michael@0 115 UCOL_DEFAULT_STRENGTH,
michael@0 116 decompositionMode,
michael@0 117 status);
michael@0 118 }
michael@0 119
michael@0 120 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
michael@0 121 ECollationStrength collationStrength,
michael@0 122 UColAttributeValue decompositionMode,
michael@0 123 UErrorCode& status) : dataIsOwned(FALSE)
michael@0 124 {
michael@0 125 construct(rules,
michael@0 126 (UColAttributeValue)collationStrength,
michael@0 127 decompositionMode,
michael@0 128 status);
michael@0 129 }
michael@0 130 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
michael@0 131 const RuleBasedCollator *base,
michael@0 132 UErrorCode &status) :
michael@0 133 dataIsOwned(TRUE),
michael@0 134 isWriteThroughAlias(FALSE)
michael@0 135 {
michael@0 136 ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
michael@0 137 }
michael@0 138
michael@0 139 void
michael@0 140 RuleBasedCollator::setRuleStringFromCollator()
michael@0 141 {
michael@0 142 int32_t length;
michael@0 143 const UChar *r = ucol_getRules(ucollator, &length);
michael@0 144
michael@0 145 if (r && length > 0) {
michael@0 146 // alias the rules string
michael@0 147 urulestring.setTo(TRUE, r, length);
michael@0 148 }
michael@0 149 else {
michael@0 150 urulestring.truncate(0); // Clear string.
michael@0 151 }
michael@0 152 }
michael@0 153
michael@0 154 // not aliasing, not write-through
michael@0 155 void
michael@0 156 RuleBasedCollator::construct(const UnicodeString& rules,
michael@0 157 UColAttributeValue collationStrength,
michael@0 158 UColAttributeValue decompositionMode,
michael@0 159 UErrorCode& status)
michael@0 160 {
michael@0 161 ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
michael@0 162 decompositionMode, collationStrength,
michael@0 163 NULL, &status);
michael@0 164
michael@0 165 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
michael@0 166 isWriteThroughAlias = FALSE;
michael@0 167
michael@0 168 if(ucollator == NULL) {
michael@0 169 if(U_SUCCESS(status)) {
michael@0 170 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 171 }
michael@0 172 return; // Failure
michael@0 173 }
michael@0 174
michael@0 175 setRuleStringFromCollator();
michael@0 176 }
michael@0 177
michael@0 178 /* RuleBasedCollator public destructor ----------------------------------- */
michael@0 179
michael@0 180 RuleBasedCollator::~RuleBasedCollator()
michael@0 181 {
michael@0 182 if (dataIsOwned)
michael@0 183 {
michael@0 184 ucol_close(ucollator);
michael@0 185 }
michael@0 186 ucollator = 0;
michael@0 187 }
michael@0 188
michael@0 189 /* RuleBaseCollator public methods --------------------------------------- */
michael@0 190
michael@0 191 UBool RuleBasedCollator::operator==(const Collator& that) const
michael@0 192 {
michael@0 193 /* only checks for address equals here */
michael@0 194 if (this == &that) {
michael@0 195 return TRUE;
michael@0 196 }
michael@0 197 if (!Collator::operator==(that)) {
michael@0 198 return FALSE; /* not the same class */
michael@0 199 }
michael@0 200
michael@0 201 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
michael@0 202
michael@0 203 return ucol_equals(this->ucollator, thatAlias.ucollator);
michael@0 204 }
michael@0 205
michael@0 206 // aliasing, not write-through
michael@0 207 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
michael@0 208 {
michael@0 209 if (this == &that) { return *this; }
michael@0 210
michael@0 211 UErrorCode intStatus = U_ZERO_ERROR;
michael@0 212 UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus);
michael@0 213 if (U_FAILURE(intStatus)) { return *this; }
michael@0 214
michael@0 215 if (dataIsOwned) {
michael@0 216 ucol_close(ucollator);
michael@0 217 }
michael@0 218 ucollator = ucol;
michael@0 219 dataIsOwned = TRUE;
michael@0 220 isWriteThroughAlias = FALSE;
michael@0 221 setRuleStringFromCollator();
michael@0 222 return *this;
michael@0 223 }
michael@0 224
michael@0 225 // aliasing, not write-through
michael@0 226 Collator* RuleBasedCollator::clone() const
michael@0 227 {
michael@0 228 RuleBasedCollator* coll = new RuleBasedCollator(*this);
michael@0 229 // There is a small chance that the internal ucol_safeClone() call fails.
michael@0 230 if (coll != NULL && coll->ucollator == NULL) {
michael@0 231 delete coll;
michael@0 232 return NULL;
michael@0 233 }
michael@0 234 return coll;
michael@0 235 }
michael@0 236
michael@0 237
michael@0 238 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
michael@0 239 (const UnicodeString& source) const
michael@0 240 {
michael@0 241 UErrorCode status = U_ZERO_ERROR;
michael@0 242 CollationElementIterator *result = new CollationElementIterator(source, this,
michael@0 243 status);
michael@0 244 if (U_FAILURE(status)) {
michael@0 245 delete result;
michael@0 246 return NULL;
michael@0 247 }
michael@0 248
michael@0 249 return result;
michael@0 250 }
michael@0 251
michael@0 252 /**
michael@0 253 * Create a CollationElementIterator object that will iterate over the
michael@0 254 * elements in a string, using the collation rules defined in this
michael@0 255 * RuleBasedCollator
michael@0 256 */
michael@0 257 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
michael@0 258 (const CharacterIterator& source) const
michael@0 259 {
michael@0 260 UErrorCode status = U_ZERO_ERROR;
michael@0 261 CollationElementIterator *result = new CollationElementIterator(source, this,
michael@0 262 status);
michael@0 263
michael@0 264 if (U_FAILURE(status)) {
michael@0 265 delete result;
michael@0 266 return NULL;
michael@0 267 }
michael@0 268
michael@0 269 return result;
michael@0 270 }
michael@0 271
michael@0 272 /**
michael@0 273 * Return a string representation of this collator's rules. The string can
michael@0 274 * later be passed to the constructor that takes a UnicodeString argument,
michael@0 275 * which will construct a collator that's functionally identical to this one.
michael@0 276 * You can also allow users to edit the string in order to change the collation
michael@0 277 * data, or you can print it out for inspection, or whatever.
michael@0 278 */
michael@0 279 const UnicodeString& RuleBasedCollator::getRules() const
michael@0 280 {
michael@0 281 return urulestring;
michael@0 282 }
michael@0 283
michael@0 284 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
michael@0 285 {
michael@0 286 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
michael@0 287
michael@0 288 if (rulesize > 0) {
michael@0 289 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
michael@0 290 if(rules != NULL) {
michael@0 291 ucol_getRulesEx(ucollator, delta, rules, rulesize);
michael@0 292 buffer.setTo(rules, rulesize);
michael@0 293 uprv_free(rules);
michael@0 294 } else { // couldn't allocate
michael@0 295 buffer.remove();
michael@0 296 }
michael@0 297 }
michael@0 298 else {
michael@0 299 buffer.remove();
michael@0 300 }
michael@0 301 }
michael@0 302
michael@0 303 UnicodeSet *
michael@0 304 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
michael@0 305 {
michael@0 306 if(U_FAILURE(status)) {
michael@0 307 return NULL;
michael@0 308 }
michael@0 309 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
michael@0 310 }
michael@0 311
michael@0 312
michael@0 313 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
michael@0 314 {
michael@0 315 if (versionInfo!=NULL){
michael@0 316 ucol_getVersion(ucollator, versionInfo);
michael@0 317 }
michael@0 318 }
michael@0 319
michael@0 320 /**
michael@0 321 * Compare two strings using this collator
michael@0 322 */
michael@0 323 UCollationResult RuleBasedCollator::compare(
michael@0 324 const UnicodeString& source,
michael@0 325 const UnicodeString& target,
michael@0 326 int32_t length,
michael@0 327 UErrorCode &status) const
michael@0 328 {
michael@0 329 return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
michael@0 330 }
michael@0 331
michael@0 332 UCollationResult RuleBasedCollator::compare(const UChar* source,
michael@0 333 int32_t sourceLength,
michael@0 334 const UChar* target,
michael@0 335 int32_t targetLength,
michael@0 336 UErrorCode &status) const
michael@0 337 {
michael@0 338 if(U_SUCCESS(status)) {
michael@0 339 return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
michael@0 340 } else {
michael@0 341 return UCOL_EQUAL;
michael@0 342 }
michael@0 343 }
michael@0 344
michael@0 345 UCollationResult RuleBasedCollator::compare(
michael@0 346 const UnicodeString& source,
michael@0 347 const UnicodeString& target,
michael@0 348 UErrorCode &status) const
michael@0 349 {
michael@0 350 if(U_SUCCESS(status)) {
michael@0 351 return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
michael@0 352 target.getBuffer(), target.length());
michael@0 353 } else {
michael@0 354 return UCOL_EQUAL;
michael@0 355 }
michael@0 356 }
michael@0 357
michael@0 358 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
michael@0 359 UCharIterator &tIter,
michael@0 360 UErrorCode &status) const {
michael@0 361 if(U_SUCCESS(status)) {
michael@0 362 return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
michael@0 363 } else {
michael@0 364 return UCOL_EQUAL;
michael@0 365 }
michael@0 366 }
michael@0 367
michael@0 368 /**
michael@0 369 * Retrieve a collation key for the specified string. The key can be compared
michael@0 370 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
michael@0 371 * the ordering of their respective source strings. This is handy when doing a
michael@0 372 * sort, where each sort key must be compared many times.
michael@0 373 *
michael@0 374 * The basic algorithm here is to find all of the collation elements for each
michael@0 375 * character in the source string, convert them to an ASCII representation, and
michael@0 376 * put them into the collation key. But it's trickier than that. Each
michael@0 377 * collation element in a string has three components: primary ('A' vs 'B'),
michael@0 378 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
michael@0 379 * at the end of a string takes precedence over a secondary or tertiary
michael@0 380 * difference earlier in the string.
michael@0 381 *
michael@0 382 * To account for this, we put all of the primary orders at the beginning of
michael@0 383 * the string, followed by the secondary and tertiary orders. Each set of
michael@0 384 * orders is terminated by nulls so that a key for a string which is a initial
michael@0 385 * substring of another key will compare less without any special case.
michael@0 386 *
michael@0 387 * Here's a hypothetical example, with the collation element represented as a
michael@0 388 * three-digit number, one digit for primary, one for secondary, etc.
michael@0 389 *
michael@0 390 * String: A a B \u00C9
michael@0 391 * Collation Elements: 101 100 201 511
michael@0 392 * Collation Key: 1125<null>0001<null>1011<null>
michael@0 393 *
michael@0 394 * To make things even trickier, secondary differences (accent marks) are
michael@0 395 * compared starting at the *end* of the string in languages with French
michael@0 396 * secondary ordering. But when comparing the accent marks on a single base
michael@0 397 * character, they are compared from the beginning. To handle this, we reverse
michael@0 398 * all of the accents that belong to each base character, then we reverse the
michael@0 399 * entire string of secondary orderings at the end.
michael@0 400 */
michael@0 401 CollationKey& RuleBasedCollator::getCollationKey(
michael@0 402 const UnicodeString& source,
michael@0 403 CollationKey& sortkey,
michael@0 404 UErrorCode& status) const
michael@0 405 {
michael@0 406 return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
michael@0 407 }
michael@0 408
michael@0 409 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
michael@0 410 int32_t sourceLen,
michael@0 411 CollationKey& sortkey,
michael@0 412 UErrorCode& status) const
michael@0 413 {
michael@0 414 if (U_FAILURE(status)) {
michael@0 415 return sortkey.setToBogus();
michael@0 416 }
michael@0 417 if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
michael@0 418 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 419 return sortkey.setToBogus();
michael@0 420 }
michael@0 421
michael@0 422 if (sourceLen < 0) {
michael@0 423 sourceLen = u_strlen(source);
michael@0 424 }
michael@0 425 if (sourceLen == 0) {
michael@0 426 return sortkey.reset();
michael@0 427 }
michael@0 428
michael@0 429 int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status);
michael@0 430
michael@0 431 if (U_SUCCESS(status)) {
michael@0 432 sortkey.setLength(resultLen);
michael@0 433 } else {
michael@0 434 sortkey.setToBogus();
michael@0 435 }
michael@0 436 return sortkey;
michael@0 437 }
michael@0 438
michael@0 439 /**
michael@0 440 * Return the maximum length of any expansion sequences that end with the
michael@0 441 * specified comparison order.
michael@0 442 * @param order a collation order returned by previous or next.
michael@0 443 * @return the maximum length of any expansion seuences ending with the
michael@0 444 * specified order or 1 if collation order does not occur at the end of any
michael@0 445 * expansion sequence.
michael@0 446 * @see CollationElementIterator#getMaxExpansion
michael@0 447 */
michael@0 448 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
michael@0 449 {
michael@0 450 uint8_t result;
michael@0 451 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
michael@0 452 return result;
michael@0 453 }
michael@0 454
michael@0 455 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
michael@0 456 UErrorCode &status)
michael@0 457 {
michael@0 458 if (U_FAILURE(status)) { return NULL; }
michael@0 459 LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
michael@0 460 if (buffer.isNull()) {
michael@0 461 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 462 return NULL;
michael@0 463 }
michael@0 464 length = cloneBinary(buffer.getAlias(), 20000, status);
michael@0 465 if (status == U_BUFFER_OVERFLOW_ERROR) {
michael@0 466 if (buffer.allocateInsteadAndCopy(length, 0) == NULL) {
michael@0 467 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 468 return NULL;
michael@0 469 }
michael@0 470 status = U_ZERO_ERROR;
michael@0 471 length = cloneBinary(buffer.getAlias(), length, status);
michael@0 472 }
michael@0 473 if (U_FAILURE(status)) { return NULL; }
michael@0 474 return buffer.orphan();
michael@0 475 }
michael@0 476
michael@0 477
michael@0 478 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
michael@0 479 {
michael@0 480 return ucol_cloneBinary(ucollator, buffer, capacity, &status);
michael@0 481 }
michael@0 482
michael@0 483 void RuleBasedCollator::setAttribute(UColAttribute attr,
michael@0 484 UColAttributeValue value,
michael@0 485 UErrorCode &status)
michael@0 486 {
michael@0 487 if (U_FAILURE(status))
michael@0 488 return;
michael@0 489 checkOwned();
michael@0 490 ucol_setAttribute(ucollator, attr, value, &status);
michael@0 491 }
michael@0 492
michael@0 493 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
michael@0 494 UErrorCode &status) const
michael@0 495 {
michael@0 496 if (U_FAILURE(status))
michael@0 497 return UCOL_DEFAULT;
michael@0 498 return ucol_getAttribute(ucollator, attr, &status);
michael@0 499 }
michael@0 500
michael@0 501 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
michael@0 502 checkOwned();
michael@0 503 return ucol_setVariableTop(ucollator, varTop, len, &status);
michael@0 504 }
michael@0 505
michael@0 506 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) {
michael@0 507 checkOwned();
michael@0 508 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
michael@0 509 }
michael@0 510
michael@0 511 void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) {
michael@0 512 checkOwned();
michael@0 513 ucol_restoreVariableTop(ucollator, varTop, &status);
michael@0 514 }
michael@0 515
michael@0 516 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
michael@0 517 return ucol_getVariableTop(ucollator, &status);
michael@0 518 }
michael@0 519
michael@0 520 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
michael@0 521 uint8_t *result, int32_t resultLength)
michael@0 522 const
michael@0 523 {
michael@0 524 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
michael@0 525 }
michael@0 526
michael@0 527 int32_t RuleBasedCollator::getSortKey(const UChar *source,
michael@0 528 int32_t sourceLength, uint8_t *result,
michael@0 529 int32_t resultLength) const
michael@0 530 {
michael@0 531 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
michael@0 532 }
michael@0 533
michael@0 534 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
michael@0 535 int32_t destCapacity,
michael@0 536 UErrorCode& status) const
michael@0 537 {
michael@0 538 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
michael@0 539 }
michael@0 540
michael@0 541 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
michael@0 542 int32_t reorderCodesLength,
michael@0 543 UErrorCode& status)
michael@0 544 {
michael@0 545 checkOwned();
michael@0 546 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
michael@0 547 }
michael@0 548
michael@0 549 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
michael@0 550 int32_t* dest,
michael@0 551 int32_t destCapacity,
michael@0 552 UErrorCode& status)
michael@0 553 {
michael@0 554 return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
michael@0 555 }
michael@0 556
michael@0 557 /**
michael@0 558 * Create a hash code for this collation. Just hash the main rule table -- that
michael@0 559 * should be good enough for almost any use.
michael@0 560 */
michael@0 561 int32_t RuleBasedCollator::hashCode() const
michael@0 562 {
michael@0 563 int32_t length;
michael@0 564 const UChar *rules = ucol_getRules(ucollator, &length);
michael@0 565 return ustr_hashUCharsN(rules, length);
michael@0 566 }
michael@0 567
michael@0 568 /**
michael@0 569 * return the locale of this collator
michael@0 570 */
michael@0 571 Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
michael@0 572 const char *result = ucol_getLocaleByType(ucollator, type, &status);
michael@0 573 if(result == NULL) {
michael@0 574 Locale res("");
michael@0 575 res.setToBogus();
michael@0 576 return res;
michael@0 577 } else {
michael@0 578 return Locale(result);
michael@0 579 }
michael@0 580 }
michael@0 581
michael@0 582 void
michael@0 583 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
michael@0 584 checkOwned();
michael@0 585 char* rloc = uprv_strdup(requestedLocale.getName());
michael@0 586 if (rloc) {
michael@0 587 char* vloc = uprv_strdup(validLocale.getName());
michael@0 588 if (vloc) {
michael@0 589 char* aloc = uprv_strdup(actualLocale.getName());
michael@0 590 if (aloc) {
michael@0 591 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
michael@0 592 return;
michael@0 593 }
michael@0 594 uprv_free(vloc);
michael@0 595 }
michael@0 596 uprv_free(rloc);
michael@0 597 }
michael@0 598 }
michael@0 599
michael@0 600 // RuleBaseCollatorNew private constructor ----------------------------------
michael@0 601
michael@0 602 RuleBasedCollator::RuleBasedCollator()
michael@0 603 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
michael@0 604 {
michael@0 605 }
michael@0 606
michael@0 607 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
michael@0 608 UErrorCode& status)
michael@0 609 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
michael@0 610 {
michael@0 611 if (U_FAILURE(status))
michael@0 612 return;
michael@0 613
michael@0 614 /*
michael@0 615 Try to load, in order:
michael@0 616 1. The desired locale's collation.
michael@0 617 2. A fallback of the desired locale.
michael@0 618 3. The default locale's collation.
michael@0 619 4. A fallback of the default locale.
michael@0 620 5. The default collation rules, which contains en_US collation rules.
michael@0 621
michael@0 622 To reiterate, we try:
michael@0 623 Specific:
michael@0 624 language+country+variant
michael@0 625 language+country
michael@0 626 language
michael@0 627 Default:
michael@0 628 language+country+variant
michael@0 629 language+country
michael@0 630 language
michael@0 631 Root: (aka DEFAULTRULES)
michael@0 632 steps 1-5 are handled by resource bundle fallback mechanism.
michael@0 633 however, in a very unprobable situation that no resource bundle
michael@0 634 data exists, step 5 is repeated with hardcoded default rules.
michael@0 635 */
michael@0 636
michael@0 637 setUCollator(desiredLocale, status);
michael@0 638
michael@0 639 if (U_FAILURE(status))
michael@0 640 {
michael@0 641 status = U_ZERO_ERROR;
michael@0 642
michael@0 643 setUCollator(kRootLocaleName, status);
michael@0 644 if (status == U_ZERO_ERROR) {
michael@0 645 status = U_USING_DEFAULT_WARNING;
michael@0 646 }
michael@0 647 }
michael@0 648
michael@0 649 if (U_SUCCESS(status))
michael@0 650 {
michael@0 651 setRuleStringFromCollator();
michael@0 652 }
michael@0 653 }
michael@0 654
michael@0 655 void
michael@0 656 RuleBasedCollator::setUCollator(const char *locale,
michael@0 657 UErrorCode &status)
michael@0 658 {
michael@0 659 if (U_FAILURE(status)) {
michael@0 660 return;
michael@0 661 }
michael@0 662 if (ucollator && dataIsOwned)
michael@0 663 ucol_close(ucollator);
michael@0 664 ucollator = ucol_open_internal(locale, &status);
michael@0 665 dataIsOwned = TRUE;
michael@0 666 isWriteThroughAlias = FALSE;
michael@0 667 }
michael@0 668
michael@0 669
michael@0 670 void
michael@0 671 RuleBasedCollator::checkOwned() {
michael@0 672 if (!(dataIsOwned || isWriteThroughAlias)) {
michael@0 673 UErrorCode status = U_ZERO_ERROR;
michael@0 674 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
michael@0 675 setRuleStringFromCollator();
michael@0 676 dataIsOwned = TRUE;
michael@0 677 isWriteThroughAlias = FALSE;
michael@0 678 }
michael@0 679 }
michael@0 680
michael@0 681
michael@0 682 int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
michael@0 683 char *buffer,
michael@0 684 int32_t capacity,
michael@0 685 UErrorCode &status) const {
michael@0 686 /* simply delegate */
michael@0 687 return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
michael@0 688 }
michael@0 689
michael@0 690
michael@0 691 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
michael@0 692
michael@0 693 U_NAMESPACE_END
michael@0 694
michael@0 695 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial