intl/icu/source/i18n/ucoleitr.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 * Copyright (C) 2001-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 ******************************************************************************
michael@0 6 *
michael@0 7 * File ucoleitr.cpp
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 02/15/2001 synwee Modified all methods to process its own function
michael@0 13 * instead of calling the equivalent c++ api (coleitr.h)
michael@0 14 ******************************************************************************/
michael@0 15
michael@0 16 #include "unicode/utypes.h"
michael@0 17
michael@0 18 #if !UCONFIG_NO_COLLATION
michael@0 19
michael@0 20 #include "unicode/ucoleitr.h"
michael@0 21 #include "unicode/ustring.h"
michael@0 22 #include "unicode/sortkey.h"
michael@0 23 #include "unicode/uobject.h"
michael@0 24 #include "ucol_imp.h"
michael@0 25 #include "cmemory.h"
michael@0 26
michael@0 27 U_NAMESPACE_USE
michael@0 28
michael@0 29 #define BUFFER_LENGTH 100
michael@0 30
michael@0 31 #define DEFAULT_BUFFER_SIZE 16
michael@0 32 #define BUFFER_GROW 8
michael@0 33
michael@0 34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
michael@0 35
michael@0 36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
michael@0 37
michael@0 38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
michael@0 39
michael@0 40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
michael@0 41
michael@0 42 #define DELETE_ARRAY(array) uprv_free((void *) (array))
michael@0 43
michael@0 44 typedef struct icu::collIterate collIterator;
michael@0 45
michael@0 46 struct RCEI
michael@0 47 {
michael@0 48 uint32_t ce;
michael@0 49 int32_t low;
michael@0 50 int32_t high;
michael@0 51 };
michael@0 52
michael@0 53 U_NAMESPACE_BEGIN
michael@0 54
michael@0 55 struct RCEBuffer
michael@0 56 {
michael@0 57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
michael@0 58 RCEI *buffer;
michael@0 59 int32_t bufferIndex;
michael@0 60 int32_t bufferSize;
michael@0 61
michael@0 62 RCEBuffer();
michael@0 63 ~RCEBuffer();
michael@0 64
michael@0 65 UBool empty() const;
michael@0 66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
michael@0 67 const RCEI *get();
michael@0 68 };
michael@0 69
michael@0 70 RCEBuffer::RCEBuffer()
michael@0 71 {
michael@0 72 buffer = defaultBuffer;
michael@0 73 bufferIndex = 0;
michael@0 74 bufferSize = DEFAULT_BUFFER_SIZE;
michael@0 75 }
michael@0 76
michael@0 77 RCEBuffer::~RCEBuffer()
michael@0 78 {
michael@0 79 if (buffer != defaultBuffer) {
michael@0 80 DELETE_ARRAY(buffer);
michael@0 81 }
michael@0 82 }
michael@0 83
michael@0 84 UBool RCEBuffer::empty() const
michael@0 85 {
michael@0 86 return bufferIndex <= 0;
michael@0 87 }
michael@0 88
michael@0 89 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
michael@0 90 {
michael@0 91 if (bufferIndex >= bufferSize) {
michael@0 92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
michael@0 93
michael@0 94 ARRAY_COPY(newBuffer, buffer, bufferSize);
michael@0 95
michael@0 96 if (buffer != defaultBuffer) {
michael@0 97 DELETE_ARRAY(buffer);
michael@0 98 }
michael@0 99
michael@0 100 buffer = newBuffer;
michael@0 101 bufferSize += BUFFER_GROW;
michael@0 102 }
michael@0 103
michael@0 104 buffer[bufferIndex].ce = ce;
michael@0 105 buffer[bufferIndex].low = ixLow;
michael@0 106 buffer[bufferIndex].high = ixHigh;
michael@0 107
michael@0 108 bufferIndex += 1;
michael@0 109 }
michael@0 110
michael@0 111 const RCEI *RCEBuffer::get()
michael@0 112 {
michael@0 113 if (bufferIndex > 0) {
michael@0 114 return &buffer[--bufferIndex];
michael@0 115 }
michael@0 116
michael@0 117 return NULL;
michael@0 118 }
michael@0 119
michael@0 120 struct PCEI
michael@0 121 {
michael@0 122 uint64_t ce;
michael@0 123 int32_t low;
michael@0 124 int32_t high;
michael@0 125 };
michael@0 126
michael@0 127 struct PCEBuffer
michael@0 128 {
michael@0 129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
michael@0 130 PCEI *buffer;
michael@0 131 int32_t bufferIndex;
michael@0 132 int32_t bufferSize;
michael@0 133
michael@0 134 PCEBuffer();
michael@0 135 ~PCEBuffer();
michael@0 136
michael@0 137 void reset();
michael@0 138 UBool empty() const;
michael@0 139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
michael@0 140 const PCEI *get();
michael@0 141 };
michael@0 142
michael@0 143 PCEBuffer::PCEBuffer()
michael@0 144 {
michael@0 145 buffer = defaultBuffer;
michael@0 146 bufferIndex = 0;
michael@0 147 bufferSize = DEFAULT_BUFFER_SIZE;
michael@0 148 }
michael@0 149
michael@0 150 PCEBuffer::~PCEBuffer()
michael@0 151 {
michael@0 152 if (buffer != defaultBuffer) {
michael@0 153 DELETE_ARRAY(buffer);
michael@0 154 }
michael@0 155 }
michael@0 156
michael@0 157 void PCEBuffer::reset()
michael@0 158 {
michael@0 159 bufferIndex = 0;
michael@0 160 }
michael@0 161
michael@0 162 UBool PCEBuffer::empty() const
michael@0 163 {
michael@0 164 return bufferIndex <= 0;
michael@0 165 }
michael@0 166
michael@0 167 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
michael@0 168 {
michael@0 169 if (bufferIndex >= bufferSize) {
michael@0 170 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
michael@0 171
michael@0 172 ARRAY_COPY(newBuffer, buffer, bufferSize);
michael@0 173
michael@0 174 if (buffer != defaultBuffer) {
michael@0 175 DELETE_ARRAY(buffer);
michael@0 176 }
michael@0 177
michael@0 178 buffer = newBuffer;
michael@0 179 bufferSize += BUFFER_GROW;
michael@0 180 }
michael@0 181
michael@0 182 buffer[bufferIndex].ce = ce;
michael@0 183 buffer[bufferIndex].low = ixLow;
michael@0 184 buffer[bufferIndex].high = ixHigh;
michael@0 185
michael@0 186 bufferIndex += 1;
michael@0 187 }
michael@0 188
michael@0 189 const PCEI *PCEBuffer::get()
michael@0 190 {
michael@0 191 if (bufferIndex > 0) {
michael@0 192 return &buffer[--bufferIndex];
michael@0 193 }
michael@0 194
michael@0 195 return NULL;
michael@0 196 }
michael@0 197
michael@0 198 /*
michael@0 199 * This inherits from UObject so that
michael@0 200 * it can be allocated by new and the
michael@0 201 * constructor for PCEBuffer is called.
michael@0 202 */
michael@0 203 struct UCollationPCE : public UObject
michael@0 204 {
michael@0 205 PCEBuffer pceBuffer;
michael@0 206 UCollationStrength strength;
michael@0 207 UBool toShift;
michael@0 208 UBool isShifted;
michael@0 209 uint32_t variableTop;
michael@0 210
michael@0 211 UCollationPCE(UCollationElements *elems);
michael@0 212 ~UCollationPCE();
michael@0 213
michael@0 214 void init(const UCollator *coll);
michael@0 215
michael@0 216 virtual UClassID getDynamicClassID() const;
michael@0 217 static UClassID getStaticClassID();
michael@0 218 };
michael@0 219
michael@0 220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
michael@0 221
michael@0 222 UCollationPCE::UCollationPCE(UCollationElements *elems)
michael@0 223 {
michael@0 224 init(elems->iteratordata_.coll);
michael@0 225 }
michael@0 226
michael@0 227 void UCollationPCE::init(const UCollator *coll)
michael@0 228 {
michael@0 229 UErrorCode status = U_ZERO_ERROR;
michael@0 230
michael@0 231 strength = ucol_getStrength(coll);
michael@0 232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
michael@0 233 isShifted = FALSE;
michael@0 234 variableTop = coll->variableTopValue << 16;
michael@0 235 }
michael@0 236
michael@0 237 UCollationPCE::~UCollationPCE()
michael@0 238 {
michael@0 239 // nothing to do
michael@0 240 }
michael@0 241
michael@0 242
michael@0 243 U_NAMESPACE_END
michael@0 244
michael@0 245
michael@0 246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
michael@0 247 {
michael@0 248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
michael@0 249
michael@0 250 // This is clean, but somewhat slow...
michael@0 251 // We could apply the mask to ce and then
michael@0 252 // just get all three orders...
michael@0 253 switch(elems->pce->strength) {
michael@0 254 default:
michael@0 255 tertiary = ucol_tertiaryOrder(ce);
michael@0 256 /* note fall-through */
michael@0 257
michael@0 258 case UCOL_SECONDARY:
michael@0 259 secondary = ucol_secondaryOrder(ce);
michael@0 260 /* note fall-through */
michael@0 261
michael@0 262 case UCOL_PRIMARY:
michael@0 263 primary = ucol_primaryOrder(ce);
michael@0 264 }
michael@0 265
michael@0 266 // **** This should probably handle continuations too. ****
michael@0 267 // **** That means that we need 24 bits for the primary ****
michael@0 268 // **** instead of the 16 that we're currently using. ****
michael@0 269 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
michael@0 270 // **** Another complication with continuations is that ****
michael@0 271 // **** the *second* CE is marked as a continuation, so ****
michael@0 272 // **** we always have to peek ahead to know how long ****
michael@0 273 // **** the primary is... ****
michael@0 274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
michael@0 275 || (elems->pce->isShifted && primary == 0)) {
michael@0 276
michael@0 277 if (primary == 0) {
michael@0 278 return UCOL_IGNORABLE;
michael@0 279 }
michael@0 280
michael@0 281 if (elems->pce->strength >= UCOL_QUATERNARY) {
michael@0 282 quaternary = primary;
michael@0 283 }
michael@0 284
michael@0 285 primary = secondary = tertiary = 0;
michael@0 286 elems->pce->isShifted = TRUE;
michael@0 287 } else {
michael@0 288 if (elems->pce->strength >= UCOL_QUATERNARY) {
michael@0 289 quaternary = 0xFFFF;
michael@0 290 }
michael@0 291
michael@0 292 elems->pce->isShifted = FALSE;
michael@0 293 }
michael@0 294
michael@0 295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
michael@0 296 }
michael@0 297
michael@0 298 U_CAPI void U_EXPORT2
michael@0 299 uprv_init_pce(const UCollationElements *elems)
michael@0 300 {
michael@0 301 if (elems->pce != NULL) {
michael@0 302 elems->pce->init(elems->iteratordata_.coll);
michael@0 303 }
michael@0 304 }
michael@0 305
michael@0 306
michael@0 307
michael@0 308 /* public methods ---------------------------------------------------- */
michael@0 309
michael@0 310 U_CAPI UCollationElements* U_EXPORT2
michael@0 311 ucol_openElements(const UCollator *coll,
michael@0 312 const UChar *text,
michael@0 313 int32_t textLength,
michael@0 314 UErrorCode *status)
michael@0 315 {
michael@0 316 if (U_FAILURE(*status)) {
michael@0 317 return NULL;
michael@0 318 }
michael@0 319
michael@0 320 UCollationElements *result = new UCollationElements;
michael@0 321 if (result == NULL) {
michael@0 322 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 323 return NULL;
michael@0 324 }
michael@0 325
michael@0 326 result->reset_ = TRUE;
michael@0 327 result->isWritable = FALSE;
michael@0 328 result->pce = NULL;
michael@0 329
michael@0 330 if (text == NULL) {
michael@0 331 textLength = 0;
michael@0 332 }
michael@0 333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
michael@0 334
michael@0 335 return result;
michael@0 336 }
michael@0 337
michael@0 338
michael@0 339 U_CAPI void U_EXPORT2
michael@0 340 ucol_closeElements(UCollationElements *elems)
michael@0 341 {
michael@0 342 if (elems != NULL) {
michael@0 343 collIterate *ci = &elems->iteratordata_;
michael@0 344
michael@0 345 if (ci->extendCEs) {
michael@0 346 uprv_free(ci->extendCEs);
michael@0 347 }
michael@0 348
michael@0 349 if (ci->offsetBuffer) {
michael@0 350 uprv_free(ci->offsetBuffer);
michael@0 351 }
michael@0 352
michael@0 353 if (elems->isWritable && elems->iteratordata_.string != NULL)
michael@0 354 {
michael@0 355 uprv_free((UChar *)elems->iteratordata_.string);
michael@0 356 }
michael@0 357
michael@0 358 if (elems->pce != NULL) {
michael@0 359 delete elems->pce;
michael@0 360 }
michael@0 361
michael@0 362 delete elems;
michael@0 363 }
michael@0 364 }
michael@0 365
michael@0 366 U_CAPI void U_EXPORT2
michael@0 367 ucol_reset(UCollationElements *elems)
michael@0 368 {
michael@0 369 collIterate *ci = &(elems->iteratordata_);
michael@0 370 elems->reset_ = TRUE;
michael@0 371 ci->pos = ci->string;
michael@0 372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
michael@0 373 ci->endp = ci->string + u_strlen(ci->string);
michael@0 374 }
michael@0 375 ci->CEpos = ci->toReturn = ci->CEs;
michael@0 376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
michael@0 377 if (ci->coll->normalizationMode == UCOL_ON) {
michael@0 378 ci->flags |= UCOL_ITER_NORM;
michael@0 379 }
michael@0 380
michael@0 381 ci->writableBuffer.remove();
michael@0 382 ci->fcdPosition = NULL;
michael@0 383
michael@0 384 //ci->offsetReturn = ci->offsetStore = NULL;
michael@0 385 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
michael@0 386 }
michael@0 387
michael@0 388 U_CAPI void U_EXPORT2
michael@0 389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
michael@0 390 {
michael@0 391 if (U_FAILURE(*status)) {
michael@0 392 return;
michael@0 393 }
michael@0 394
michael@0 395 if (elems == NULL) {
michael@0 396 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 397 return;
michael@0 398 }
michael@0 399
michael@0 400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
michael@0 401 }
michael@0 402
michael@0 403 U_CAPI int32_t U_EXPORT2
michael@0 404 ucol_next(UCollationElements *elems,
michael@0 405 UErrorCode *status)
michael@0 406 {
michael@0 407 int32_t result;
michael@0 408 if (U_FAILURE(*status)) {
michael@0 409 return UCOL_NULLORDER;
michael@0 410 }
michael@0 411
michael@0 412 elems->reset_ = FALSE;
michael@0 413
michael@0 414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
michael@0 415 &elems->iteratordata_,
michael@0 416 status);
michael@0 417
michael@0 418 if (result == UCOL_NO_MORE_CES) {
michael@0 419 result = UCOL_NULLORDER;
michael@0 420 }
michael@0 421 return result;
michael@0 422 }
michael@0 423
michael@0 424 U_CAPI int64_t U_EXPORT2
michael@0 425 ucol_nextProcessed(UCollationElements *elems,
michael@0 426 int32_t *ixLow,
michael@0 427 int32_t *ixHigh,
michael@0 428 UErrorCode *status)
michael@0 429 {
michael@0 430 const UCollator *coll = elems->iteratordata_.coll;
michael@0 431 int64_t result = UCOL_IGNORABLE;
michael@0 432 uint32_t low = 0, high = 0;
michael@0 433
michael@0 434 if (U_FAILURE(*status)) {
michael@0 435 return UCOL_PROCESSED_NULLORDER;
michael@0 436 }
michael@0 437
michael@0 438 if (elems->pce == NULL) {
michael@0 439 elems->pce = new UCollationPCE(elems);
michael@0 440 } else {
michael@0 441 elems->pce->pceBuffer.reset();
michael@0 442 }
michael@0 443
michael@0 444 elems->reset_ = FALSE;
michael@0 445
michael@0 446 do {
michael@0 447 low = ucol_getOffset(elems);
michael@0 448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
michael@0 449 high = ucol_getOffset(elems);
michael@0 450
michael@0 451 if (ce == UCOL_NO_MORE_CES) {
michael@0 452 result = UCOL_PROCESSED_NULLORDER;
michael@0 453 break;
michael@0 454 }
michael@0 455
michael@0 456 result = processCE(elems, ce);
michael@0 457 } while (result == UCOL_IGNORABLE);
michael@0 458
michael@0 459 if (ixLow != NULL) {
michael@0 460 *ixLow = low;
michael@0 461 }
michael@0 462
michael@0 463 if (ixHigh != NULL) {
michael@0 464 *ixHigh = high;
michael@0 465 }
michael@0 466
michael@0 467 return result;
michael@0 468 }
michael@0 469
michael@0 470 U_CAPI int32_t U_EXPORT2
michael@0 471 ucol_previous(UCollationElements *elems,
michael@0 472 UErrorCode *status)
michael@0 473 {
michael@0 474 if(U_FAILURE(*status)) {
michael@0 475 return UCOL_NULLORDER;
michael@0 476 }
michael@0 477 else
michael@0 478 {
michael@0 479 int32_t result;
michael@0 480
michael@0 481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
michael@0 482 if (elems->iteratordata_.endp == NULL) {
michael@0 483 elems->iteratordata_.endp = elems->iteratordata_.string +
michael@0 484 u_strlen(elems->iteratordata_.string);
michael@0 485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
michael@0 486 }
michael@0 487 elems->iteratordata_.pos = elems->iteratordata_.endp;
michael@0 488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
michael@0 489 }
michael@0 490
michael@0 491 elems->reset_ = FALSE;
michael@0 492
michael@0 493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
michael@0 494 &(elems->iteratordata_),
michael@0 495 status);
michael@0 496
michael@0 497 if (result == UCOL_NO_MORE_CES) {
michael@0 498 result = UCOL_NULLORDER;
michael@0 499 }
michael@0 500
michael@0 501 return result;
michael@0 502 }
michael@0 503 }
michael@0 504
michael@0 505 U_CAPI int64_t U_EXPORT2
michael@0 506 ucol_previousProcessed(UCollationElements *elems,
michael@0 507 int32_t *ixLow,
michael@0 508 int32_t *ixHigh,
michael@0 509 UErrorCode *status)
michael@0 510 {
michael@0 511 const UCollator *coll = elems->iteratordata_.coll;
michael@0 512 int64_t result = UCOL_IGNORABLE;
michael@0 513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
michael@0 514 // UCollationStrength strength = ucol_getStrength(coll);
michael@0 515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
michael@0 516 // uint32_t variableTop = coll->variableTopValue;
michael@0 517 int32_t low = 0, high = 0;
michael@0 518
michael@0 519 if (U_FAILURE(*status)) {
michael@0 520 return UCOL_PROCESSED_NULLORDER;
michael@0 521 }
michael@0 522
michael@0 523 if (elems->reset_ &&
michael@0 524 (elems->iteratordata_.pos == elems->iteratordata_.string)) {
michael@0 525 if (elems->iteratordata_.endp == NULL) {
michael@0 526 elems->iteratordata_.endp = elems->iteratordata_.string +
michael@0 527 u_strlen(elems->iteratordata_.string);
michael@0 528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
michael@0 529 }
michael@0 530
michael@0 531 elems->iteratordata_.pos = elems->iteratordata_.endp;
michael@0 532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
michael@0 533 }
michael@0 534
michael@0 535 if (elems->pce == NULL) {
michael@0 536 elems->pce = new UCollationPCE(elems);
michael@0 537 } else {
michael@0 538 //elems->pce->pceBuffer.reset();
michael@0 539 }
michael@0 540
michael@0 541 elems->reset_ = FALSE;
michael@0 542
michael@0 543 while (elems->pce->pceBuffer.empty()) {
michael@0 544 // buffer raw CEs up to non-ignorable primary
michael@0 545 RCEBuffer rceb;
michael@0 546 uint32_t ce;
michael@0 547
michael@0 548 // **** do we need to reset rceb, or will it always be empty at this point ****
michael@0 549 do {
michael@0 550 high = ucol_getOffset(elems);
michael@0 551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status);
michael@0 552 low = ucol_getOffset(elems);
michael@0 553
michael@0 554 if (ce == UCOL_NO_MORE_CES) {
michael@0 555 if (! rceb.empty()) {
michael@0 556 break;
michael@0 557 }
michael@0 558
michael@0 559 goto finish;
michael@0 560 }
michael@0 561
michael@0 562 rceb.put(ce, low, high);
michael@0 563 } while ((ce & UCOL_PRIMARYMASK) == 0);
michael@0 564
michael@0 565 // process the raw CEs
michael@0 566 while (! rceb.empty()) {
michael@0 567 const RCEI *rcei = rceb.get();
michael@0 568
michael@0 569 result = processCE(elems, rcei->ce);
michael@0 570
michael@0 571 if (result != UCOL_IGNORABLE) {
michael@0 572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
michael@0 573 }
michael@0 574 }
michael@0 575 }
michael@0 576
michael@0 577 finish:
michael@0 578 if (elems->pce->pceBuffer.empty()) {
michael@0 579 // **** Is -1 the right value for ixLow, ixHigh? ****
michael@0 580 if (ixLow != NULL) {
michael@0 581 *ixLow = -1;
michael@0 582 }
michael@0 583
michael@0 584 if (ixHigh != NULL) {
michael@0 585 *ixHigh = -1
michael@0 586 ;
michael@0 587 }
michael@0 588 return UCOL_PROCESSED_NULLORDER;
michael@0 589 }
michael@0 590
michael@0 591 const PCEI *pcei = elems->pce->pceBuffer.get();
michael@0 592
michael@0 593 if (ixLow != NULL) {
michael@0 594 *ixLow = pcei->low;
michael@0 595 }
michael@0 596
michael@0 597 if (ixHigh != NULL) {
michael@0 598 *ixHigh = pcei->high;
michael@0 599 }
michael@0 600
michael@0 601 return pcei->ce;
michael@0 602 }
michael@0 603
michael@0 604 U_CAPI int32_t U_EXPORT2
michael@0 605 ucol_getMaxExpansion(const UCollationElements *elems,
michael@0 606 int32_t order)
michael@0 607 {
michael@0 608 uint8_t result;
michael@0 609
michael@0 610 #if 0
michael@0 611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
michael@0 612 #else
michael@0 613 const UCollator *coll = elems->iteratordata_.coll;
michael@0 614 const uint32_t *start;
michael@0 615 const uint32_t *limit;
michael@0 616 const uint32_t *mid;
michael@0 617 uint32_t strengthMask = 0;
michael@0 618 uint32_t mOrder = (uint32_t) order;
michael@0 619
michael@0 620 switch (coll->strength)
michael@0 621 {
michael@0 622 default:
michael@0 623 strengthMask |= UCOL_TERTIARYORDERMASK;
michael@0 624 /* fall through */
michael@0 625
michael@0 626 case UCOL_SECONDARY:
michael@0 627 strengthMask |= UCOL_SECONDARYORDERMASK;
michael@0 628 /* fall through */
michael@0 629
michael@0 630 case UCOL_PRIMARY:
michael@0 631 strengthMask |= UCOL_PRIMARYORDERMASK;
michael@0 632 }
michael@0 633
michael@0 634 mOrder &= strengthMask;
michael@0 635 start = (coll)->endExpansionCE;
michael@0 636 limit = (coll)->lastEndExpansionCE;
michael@0 637
michael@0 638 while (start < limit - 1) {
michael@0 639 mid = start + ((limit - start) >> 1);
michael@0 640 if (mOrder <= (*mid & strengthMask)) {
michael@0 641 limit = mid;
michael@0 642 } else {
michael@0 643 start = mid;
michael@0 644 }
michael@0 645 }
michael@0 646
michael@0 647 // FIXME: with a masked search, there might be more than one hit,
michael@0 648 // so we need to look forward and backward from the match to find all
michael@0 649 // of the hits...
michael@0 650 if ((*start & strengthMask) == mOrder) {
michael@0 651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
michael@0 652 } else if ((*limit & strengthMask) == mOrder) {
michael@0 653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
michael@0 654 } else if ((mOrder & 0xFFFF) == 0x00C0) {
michael@0 655 result = 2;
michael@0 656 } else {
michael@0 657 result = 1;
michael@0 658 }
michael@0 659 #endif
michael@0 660
michael@0 661 return result;
michael@0 662 }
michael@0 663
michael@0 664 U_CAPI void U_EXPORT2
michael@0 665 ucol_setText( UCollationElements *elems,
michael@0 666 const UChar *text,
michael@0 667 int32_t textLength,
michael@0 668 UErrorCode *status)
michael@0 669 {
michael@0 670 if (U_FAILURE(*status)) {
michael@0 671 return;
michael@0 672 }
michael@0 673
michael@0 674 if (elems->isWritable && elems->iteratordata_.string != NULL)
michael@0 675 {
michael@0 676 uprv_free((UChar *)elems->iteratordata_.string);
michael@0 677 }
michael@0 678
michael@0 679 if (text == NULL) {
michael@0 680 textLength = 0;
michael@0 681 }
michael@0 682
michael@0 683 elems->isWritable = FALSE;
michael@0 684
michael@0 685 /* free offset buffer to avoid memory leak before initializing. */
michael@0 686 ucol_freeOffsetBuffer(&(elems->iteratordata_));
michael@0 687 /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
michael@0 688 if (elems->iteratordata_.extendCEs != NULL) {
michael@0 689 uprv_free(elems->iteratordata_.extendCEs);
michael@0 690 }
michael@0 691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
michael@0 692 &elems->iteratordata_, status);
michael@0 693
michael@0 694 elems->reset_ = TRUE;
michael@0 695 }
michael@0 696
michael@0 697 U_CAPI int32_t U_EXPORT2
michael@0 698 ucol_getOffset(const UCollationElements *elems)
michael@0 699 {
michael@0 700 const collIterate *ci = &(elems->iteratordata_);
michael@0 701
michael@0 702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
michael@0 703 return ci->offsetRepeatValue;
michael@0 704 }
michael@0 705
michael@0 706 if (ci->offsetReturn != NULL) {
michael@0 707 return *ci->offsetReturn;
michael@0 708 }
michael@0 709
michael@0 710 // while processing characters in normalization buffer getOffset will
michael@0 711 // return the next non-normalized character.
michael@0 712 // should be inline with the old implementation since the old codes uses
michael@0 713 // nextDecomp in normalizer which also decomposes the string till the
michael@0 714 // first base character is found.
michael@0 715 if (ci->flags & UCOL_ITER_INNORMBUF) {
michael@0 716 if (ci->fcdPosition == NULL) {
michael@0 717 return 0;
michael@0 718 }
michael@0 719 return (int32_t)(ci->fcdPosition - ci->string);
michael@0 720 }
michael@0 721 else {
michael@0 722 return (int32_t)(ci->pos - ci->string);
michael@0 723 }
michael@0 724 }
michael@0 725
michael@0 726 U_CAPI void U_EXPORT2
michael@0 727 ucol_setOffset(UCollationElements *elems,
michael@0 728 int32_t offset,
michael@0 729 UErrorCode *status)
michael@0 730 {
michael@0 731 if (U_FAILURE(*status)) {
michael@0 732 return;
michael@0 733 }
michael@0 734
michael@0 735 // this methods will clean up any use of the writable buffer and points to
michael@0 736 // the original string
michael@0 737 collIterate *ci = &(elems->iteratordata_);
michael@0 738 ci->pos = ci->string + offset;
michael@0 739 ci->CEpos = ci->toReturn = ci->CEs;
michael@0 740 if (ci->flags & UCOL_ITER_INNORMBUF) {
michael@0 741 ci->flags = ci->origFlags;
michael@0 742 }
michael@0 743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
michael@0 744 ci->endp = ci->string + u_strlen(ci->string);
michael@0 745 ci->flags |= UCOL_ITER_HASLEN;
michael@0 746 }
michael@0 747 ci->fcdPosition = NULL;
michael@0 748 elems->reset_ = FALSE;
michael@0 749
michael@0 750 ci->offsetReturn = NULL;
michael@0 751 ci->offsetStore = ci->offsetBuffer;
michael@0 752 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
michael@0 753 }
michael@0 754
michael@0 755 U_CAPI int32_t U_EXPORT2
michael@0 756 ucol_primaryOrder (int32_t order)
michael@0 757 {
michael@0 758 order &= UCOL_PRIMARYMASK;
michael@0 759 return (order >> UCOL_PRIMARYORDERSHIFT);
michael@0 760 }
michael@0 761
michael@0 762 U_CAPI int32_t U_EXPORT2
michael@0 763 ucol_secondaryOrder (int32_t order)
michael@0 764 {
michael@0 765 order &= UCOL_SECONDARYMASK;
michael@0 766 return (order >> UCOL_SECONDARYORDERSHIFT);
michael@0 767 }
michael@0 768
michael@0 769 U_CAPI int32_t U_EXPORT2
michael@0 770 ucol_tertiaryOrder (int32_t order)
michael@0 771 {
michael@0 772 return (order & UCOL_TERTIARYMASK);
michael@0 773 }
michael@0 774
michael@0 775
michael@0 776 void ucol_freeOffsetBuffer(collIterate *s) {
michael@0 777 if (s != NULL && s->offsetBuffer != NULL) {
michael@0 778 uprv_free(s->offsetBuffer);
michael@0 779 s->offsetBuffer = NULL;
michael@0 780 s->offsetBufferSize = 0;
michael@0 781 }
michael@0 782 }
michael@0 783
michael@0 784 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial