1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/ucoleitr.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,784 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* Copyright (C) 2001-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +****************************************************************************** 1.9 +* 1.10 +* File ucoleitr.cpp 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 02/15/2001 synwee Modified all methods to process its own function 1.16 +* instead of calling the equivalent c++ api (coleitr.h) 1.17 +******************************************************************************/ 1.18 + 1.19 +#include "unicode/utypes.h" 1.20 + 1.21 +#if !UCONFIG_NO_COLLATION 1.22 + 1.23 +#include "unicode/ucoleitr.h" 1.24 +#include "unicode/ustring.h" 1.25 +#include "unicode/sortkey.h" 1.26 +#include "unicode/uobject.h" 1.27 +#include "ucol_imp.h" 1.28 +#include "cmemory.h" 1.29 + 1.30 +U_NAMESPACE_USE 1.31 + 1.32 +#define BUFFER_LENGTH 100 1.33 + 1.34 +#define DEFAULT_BUFFER_SIZE 16 1.35 +#define BUFFER_GROW 8 1.36 + 1.37 +#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 1.38 + 1.39 +#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) 1.40 + 1.41 +#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 1.42 + 1.43 +#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0]) 1.44 + 1.45 +#define DELETE_ARRAY(array) uprv_free((void *) (array)) 1.46 + 1.47 +typedef struct icu::collIterate collIterator; 1.48 + 1.49 +struct RCEI 1.50 +{ 1.51 + uint32_t ce; 1.52 + int32_t low; 1.53 + int32_t high; 1.54 +}; 1.55 + 1.56 +U_NAMESPACE_BEGIN 1.57 + 1.58 +struct RCEBuffer 1.59 +{ 1.60 + RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; 1.61 + RCEI *buffer; 1.62 + int32_t bufferIndex; 1.63 + int32_t bufferSize; 1.64 + 1.65 + RCEBuffer(); 1.66 + ~RCEBuffer(); 1.67 + 1.68 + UBool empty() const; 1.69 + void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); 1.70 + const RCEI *get(); 1.71 +}; 1.72 + 1.73 +RCEBuffer::RCEBuffer() 1.74 +{ 1.75 + buffer = defaultBuffer; 1.76 + bufferIndex = 0; 1.77 + bufferSize = DEFAULT_BUFFER_SIZE; 1.78 +} 1.79 + 1.80 +RCEBuffer::~RCEBuffer() 1.81 +{ 1.82 + if (buffer != defaultBuffer) { 1.83 + DELETE_ARRAY(buffer); 1.84 + } 1.85 +} 1.86 + 1.87 +UBool RCEBuffer::empty() const 1.88 +{ 1.89 + return bufferIndex <= 0; 1.90 +} 1.91 + 1.92 +void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh) 1.93 +{ 1.94 + if (bufferIndex >= bufferSize) { 1.95 + RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); 1.96 + 1.97 + ARRAY_COPY(newBuffer, buffer, bufferSize); 1.98 + 1.99 + if (buffer != defaultBuffer) { 1.100 + DELETE_ARRAY(buffer); 1.101 + } 1.102 + 1.103 + buffer = newBuffer; 1.104 + bufferSize += BUFFER_GROW; 1.105 + } 1.106 + 1.107 + buffer[bufferIndex].ce = ce; 1.108 + buffer[bufferIndex].low = ixLow; 1.109 + buffer[bufferIndex].high = ixHigh; 1.110 + 1.111 + bufferIndex += 1; 1.112 +} 1.113 + 1.114 +const RCEI *RCEBuffer::get() 1.115 +{ 1.116 + if (bufferIndex > 0) { 1.117 + return &buffer[--bufferIndex]; 1.118 + } 1.119 + 1.120 + return NULL; 1.121 +} 1.122 + 1.123 +struct PCEI 1.124 +{ 1.125 + uint64_t ce; 1.126 + int32_t low; 1.127 + int32_t high; 1.128 +}; 1.129 + 1.130 +struct PCEBuffer 1.131 +{ 1.132 + PCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; 1.133 + PCEI *buffer; 1.134 + int32_t bufferIndex; 1.135 + int32_t bufferSize; 1.136 + 1.137 + PCEBuffer(); 1.138 + ~PCEBuffer(); 1.139 + 1.140 + void reset(); 1.141 + UBool empty() const; 1.142 + void put(uint64_t ce, int32_t ixLow, int32_t ixHigh); 1.143 + const PCEI *get(); 1.144 +}; 1.145 + 1.146 +PCEBuffer::PCEBuffer() 1.147 +{ 1.148 + buffer = defaultBuffer; 1.149 + bufferIndex = 0; 1.150 + bufferSize = DEFAULT_BUFFER_SIZE; 1.151 +} 1.152 + 1.153 +PCEBuffer::~PCEBuffer() 1.154 +{ 1.155 + if (buffer != defaultBuffer) { 1.156 + DELETE_ARRAY(buffer); 1.157 + } 1.158 +} 1.159 + 1.160 +void PCEBuffer::reset() 1.161 +{ 1.162 + bufferIndex = 0; 1.163 +} 1.164 + 1.165 +UBool PCEBuffer::empty() const 1.166 +{ 1.167 + return bufferIndex <= 0; 1.168 +} 1.169 + 1.170 +void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh) 1.171 +{ 1.172 + if (bufferIndex >= bufferSize) { 1.173 + PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); 1.174 + 1.175 + ARRAY_COPY(newBuffer, buffer, bufferSize); 1.176 + 1.177 + if (buffer != defaultBuffer) { 1.178 + DELETE_ARRAY(buffer); 1.179 + } 1.180 + 1.181 + buffer = newBuffer; 1.182 + bufferSize += BUFFER_GROW; 1.183 + } 1.184 + 1.185 + buffer[bufferIndex].ce = ce; 1.186 + buffer[bufferIndex].low = ixLow; 1.187 + buffer[bufferIndex].high = ixHigh; 1.188 + 1.189 + bufferIndex += 1; 1.190 +} 1.191 + 1.192 +const PCEI *PCEBuffer::get() 1.193 +{ 1.194 + if (bufferIndex > 0) { 1.195 + return &buffer[--bufferIndex]; 1.196 + } 1.197 + 1.198 + return NULL; 1.199 +} 1.200 + 1.201 +/* 1.202 + * This inherits from UObject so that 1.203 + * it can be allocated by new and the 1.204 + * constructor for PCEBuffer is called. 1.205 + */ 1.206 +struct UCollationPCE : public UObject 1.207 +{ 1.208 + PCEBuffer pceBuffer; 1.209 + UCollationStrength strength; 1.210 + UBool toShift; 1.211 + UBool isShifted; 1.212 + uint32_t variableTop; 1.213 + 1.214 + UCollationPCE(UCollationElements *elems); 1.215 + ~UCollationPCE(); 1.216 + 1.217 + void init(const UCollator *coll); 1.218 + 1.219 + virtual UClassID getDynamicClassID() const; 1.220 + static UClassID getStaticClassID(); 1.221 +}; 1.222 + 1.223 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE) 1.224 + 1.225 +UCollationPCE::UCollationPCE(UCollationElements *elems) 1.226 +{ 1.227 + init(elems->iteratordata_.coll); 1.228 +} 1.229 + 1.230 +void UCollationPCE::init(const UCollator *coll) 1.231 +{ 1.232 + UErrorCode status = U_ZERO_ERROR; 1.233 + 1.234 + strength = ucol_getStrength(coll); 1.235 + toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; 1.236 + isShifted = FALSE; 1.237 + variableTop = coll->variableTopValue << 16; 1.238 +} 1.239 + 1.240 +UCollationPCE::~UCollationPCE() 1.241 +{ 1.242 + // nothing to do 1.243 +} 1.244 + 1.245 + 1.246 +U_NAMESPACE_END 1.247 + 1.248 + 1.249 +inline uint64_t processCE(UCollationElements *elems, uint32_t ce) 1.250 +{ 1.251 + uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; 1.252 + 1.253 + // This is clean, but somewhat slow... 1.254 + // We could apply the mask to ce and then 1.255 + // just get all three orders... 1.256 + switch(elems->pce->strength) { 1.257 + default: 1.258 + tertiary = ucol_tertiaryOrder(ce); 1.259 + /* note fall-through */ 1.260 + 1.261 + case UCOL_SECONDARY: 1.262 + secondary = ucol_secondaryOrder(ce); 1.263 + /* note fall-through */ 1.264 + 1.265 + case UCOL_PRIMARY: 1.266 + primary = ucol_primaryOrder(ce); 1.267 + } 1.268 + 1.269 + // **** This should probably handle continuations too. **** 1.270 + // **** That means that we need 24 bits for the primary **** 1.271 + // **** instead of the 16 that we're currently using. **** 1.272 + // **** So we can lay out the 64 bits as: 24.12.12.16. **** 1.273 + // **** Another complication with continuations is that **** 1.274 + // **** the *second* CE is marked as a continuation, so **** 1.275 + // **** we always have to peek ahead to know how long **** 1.276 + // **** the primary is... **** 1.277 + if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) 1.278 + || (elems->pce->isShifted && primary == 0)) { 1.279 + 1.280 + if (primary == 0) { 1.281 + return UCOL_IGNORABLE; 1.282 + } 1.283 + 1.284 + if (elems->pce->strength >= UCOL_QUATERNARY) { 1.285 + quaternary = primary; 1.286 + } 1.287 + 1.288 + primary = secondary = tertiary = 0; 1.289 + elems->pce->isShifted = TRUE; 1.290 + } else { 1.291 + if (elems->pce->strength >= UCOL_QUATERNARY) { 1.292 + quaternary = 0xFFFF; 1.293 + } 1.294 + 1.295 + elems->pce->isShifted = FALSE; 1.296 + } 1.297 + 1.298 + return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; 1.299 +} 1.300 + 1.301 +U_CAPI void U_EXPORT2 1.302 +uprv_init_pce(const UCollationElements *elems) 1.303 +{ 1.304 + if (elems->pce != NULL) { 1.305 + elems->pce->init(elems->iteratordata_.coll); 1.306 + } 1.307 +} 1.308 + 1.309 + 1.310 + 1.311 +/* public methods ---------------------------------------------------- */ 1.312 + 1.313 +U_CAPI UCollationElements* U_EXPORT2 1.314 +ucol_openElements(const UCollator *coll, 1.315 + const UChar *text, 1.316 + int32_t textLength, 1.317 + UErrorCode *status) 1.318 +{ 1.319 + if (U_FAILURE(*status)) { 1.320 + return NULL; 1.321 + } 1.322 + 1.323 + UCollationElements *result = new UCollationElements; 1.324 + if (result == NULL) { 1.325 + *status = U_MEMORY_ALLOCATION_ERROR; 1.326 + return NULL; 1.327 + } 1.328 + 1.329 + result->reset_ = TRUE; 1.330 + result->isWritable = FALSE; 1.331 + result->pce = NULL; 1.332 + 1.333 + if (text == NULL) { 1.334 + textLength = 0; 1.335 + } 1.336 + uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status); 1.337 + 1.338 + return result; 1.339 +} 1.340 + 1.341 + 1.342 +U_CAPI void U_EXPORT2 1.343 +ucol_closeElements(UCollationElements *elems) 1.344 +{ 1.345 + if (elems != NULL) { 1.346 + collIterate *ci = &elems->iteratordata_; 1.347 + 1.348 + if (ci->extendCEs) { 1.349 + uprv_free(ci->extendCEs); 1.350 + } 1.351 + 1.352 + if (ci->offsetBuffer) { 1.353 + uprv_free(ci->offsetBuffer); 1.354 + } 1.355 + 1.356 + if (elems->isWritable && elems->iteratordata_.string != NULL) 1.357 + { 1.358 + uprv_free((UChar *)elems->iteratordata_.string); 1.359 + } 1.360 + 1.361 + if (elems->pce != NULL) { 1.362 + delete elems->pce; 1.363 + } 1.364 + 1.365 + delete elems; 1.366 + } 1.367 +} 1.368 + 1.369 +U_CAPI void U_EXPORT2 1.370 +ucol_reset(UCollationElements *elems) 1.371 +{ 1.372 + collIterate *ci = &(elems->iteratordata_); 1.373 + elems->reset_ = TRUE; 1.374 + ci->pos = ci->string; 1.375 + if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) { 1.376 + ci->endp = ci->string + u_strlen(ci->string); 1.377 + } 1.378 + ci->CEpos = ci->toReturn = ci->CEs; 1.379 + ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN; 1.380 + if (ci->coll->normalizationMode == UCOL_ON) { 1.381 + ci->flags |= UCOL_ITER_NORM; 1.382 + } 1.383 + 1.384 + ci->writableBuffer.remove(); 1.385 + ci->fcdPosition = NULL; 1.386 + 1.387 + //ci->offsetReturn = ci->offsetStore = NULL; 1.388 + ci->offsetRepeatCount = ci->offsetRepeatValue = 0; 1.389 +} 1.390 + 1.391 +U_CAPI void U_EXPORT2 1.392 +ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status) 1.393 +{ 1.394 + if (U_FAILURE(*status)) { 1.395 + return; 1.396 + } 1.397 + 1.398 + if (elems == NULL) { 1.399 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.400 + return; 1.401 + } 1.402 + 1.403 + elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT; 1.404 +} 1.405 + 1.406 +U_CAPI int32_t U_EXPORT2 1.407 +ucol_next(UCollationElements *elems, 1.408 + UErrorCode *status) 1.409 +{ 1.410 + int32_t result; 1.411 + if (U_FAILURE(*status)) { 1.412 + return UCOL_NULLORDER; 1.413 + } 1.414 + 1.415 + elems->reset_ = FALSE; 1.416 + 1.417 + result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll, 1.418 + &elems->iteratordata_, 1.419 + status); 1.420 + 1.421 + if (result == UCOL_NO_MORE_CES) { 1.422 + result = UCOL_NULLORDER; 1.423 + } 1.424 + return result; 1.425 +} 1.426 + 1.427 +U_CAPI int64_t U_EXPORT2 1.428 +ucol_nextProcessed(UCollationElements *elems, 1.429 + int32_t *ixLow, 1.430 + int32_t *ixHigh, 1.431 + UErrorCode *status) 1.432 +{ 1.433 + const UCollator *coll = elems->iteratordata_.coll; 1.434 + int64_t result = UCOL_IGNORABLE; 1.435 + uint32_t low = 0, high = 0; 1.436 + 1.437 + if (U_FAILURE(*status)) { 1.438 + return UCOL_PROCESSED_NULLORDER; 1.439 + } 1.440 + 1.441 + if (elems->pce == NULL) { 1.442 + elems->pce = new UCollationPCE(elems); 1.443 + } else { 1.444 + elems->pce->pceBuffer.reset(); 1.445 + } 1.446 + 1.447 + elems->reset_ = FALSE; 1.448 + 1.449 + do { 1.450 + low = ucol_getOffset(elems); 1.451 + uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status); 1.452 + high = ucol_getOffset(elems); 1.453 + 1.454 + if (ce == UCOL_NO_MORE_CES) { 1.455 + result = UCOL_PROCESSED_NULLORDER; 1.456 + break; 1.457 + } 1.458 + 1.459 + result = processCE(elems, ce); 1.460 + } while (result == UCOL_IGNORABLE); 1.461 + 1.462 + if (ixLow != NULL) { 1.463 + *ixLow = low; 1.464 + } 1.465 + 1.466 + if (ixHigh != NULL) { 1.467 + *ixHigh = high; 1.468 + } 1.469 + 1.470 + return result; 1.471 +} 1.472 + 1.473 +U_CAPI int32_t U_EXPORT2 1.474 +ucol_previous(UCollationElements *elems, 1.475 + UErrorCode *status) 1.476 +{ 1.477 + if(U_FAILURE(*status)) { 1.478 + return UCOL_NULLORDER; 1.479 + } 1.480 + else 1.481 + { 1.482 + int32_t result; 1.483 + 1.484 + if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) { 1.485 + if (elems->iteratordata_.endp == NULL) { 1.486 + elems->iteratordata_.endp = elems->iteratordata_.string + 1.487 + u_strlen(elems->iteratordata_.string); 1.488 + elems->iteratordata_.flags |= UCOL_ITER_HASLEN; 1.489 + } 1.490 + elems->iteratordata_.pos = elems->iteratordata_.endp; 1.491 + elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; 1.492 + } 1.493 + 1.494 + elems->reset_ = FALSE; 1.495 + 1.496 + result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll, 1.497 + &(elems->iteratordata_), 1.498 + status); 1.499 + 1.500 + if (result == UCOL_NO_MORE_CES) { 1.501 + result = UCOL_NULLORDER; 1.502 + } 1.503 + 1.504 + return result; 1.505 + } 1.506 +} 1.507 + 1.508 +U_CAPI int64_t U_EXPORT2 1.509 +ucol_previousProcessed(UCollationElements *elems, 1.510 + int32_t *ixLow, 1.511 + int32_t *ixHigh, 1.512 + UErrorCode *status) 1.513 +{ 1.514 + const UCollator *coll = elems->iteratordata_.coll; 1.515 + int64_t result = UCOL_IGNORABLE; 1.516 + // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; 1.517 + // UCollationStrength strength = ucol_getStrength(coll); 1.518 + // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; 1.519 + // uint32_t variableTop = coll->variableTopValue; 1.520 + int32_t low = 0, high = 0; 1.521 + 1.522 + if (U_FAILURE(*status)) { 1.523 + return UCOL_PROCESSED_NULLORDER; 1.524 + } 1.525 + 1.526 + if (elems->reset_ && 1.527 + (elems->iteratordata_.pos == elems->iteratordata_.string)) { 1.528 + if (elems->iteratordata_.endp == NULL) { 1.529 + elems->iteratordata_.endp = elems->iteratordata_.string + 1.530 + u_strlen(elems->iteratordata_.string); 1.531 + elems->iteratordata_.flags |= UCOL_ITER_HASLEN; 1.532 + } 1.533 + 1.534 + elems->iteratordata_.pos = elems->iteratordata_.endp; 1.535 + elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; 1.536 + } 1.537 + 1.538 + if (elems->pce == NULL) { 1.539 + elems->pce = new UCollationPCE(elems); 1.540 + } else { 1.541 + //elems->pce->pceBuffer.reset(); 1.542 + } 1.543 + 1.544 + elems->reset_ = FALSE; 1.545 + 1.546 + while (elems->pce->pceBuffer.empty()) { 1.547 + // buffer raw CEs up to non-ignorable primary 1.548 + RCEBuffer rceb; 1.549 + uint32_t ce; 1.550 + 1.551 + // **** do we need to reset rceb, or will it always be empty at this point **** 1.552 + do { 1.553 + high = ucol_getOffset(elems); 1.554 + ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); 1.555 + low = ucol_getOffset(elems); 1.556 + 1.557 + if (ce == UCOL_NO_MORE_CES) { 1.558 + if (! rceb.empty()) { 1.559 + break; 1.560 + } 1.561 + 1.562 + goto finish; 1.563 + } 1.564 + 1.565 + rceb.put(ce, low, high); 1.566 + } while ((ce & UCOL_PRIMARYMASK) == 0); 1.567 + 1.568 + // process the raw CEs 1.569 + while (! rceb.empty()) { 1.570 + const RCEI *rcei = rceb.get(); 1.571 + 1.572 + result = processCE(elems, rcei->ce); 1.573 + 1.574 + if (result != UCOL_IGNORABLE) { 1.575 + elems->pce->pceBuffer.put(result, rcei->low, rcei->high); 1.576 + } 1.577 + } 1.578 + } 1.579 + 1.580 +finish: 1.581 + if (elems->pce->pceBuffer.empty()) { 1.582 + // **** Is -1 the right value for ixLow, ixHigh? **** 1.583 + if (ixLow != NULL) { 1.584 + *ixLow = -1; 1.585 + } 1.586 + 1.587 + if (ixHigh != NULL) { 1.588 + *ixHigh = -1 1.589 + ; 1.590 + } 1.591 + return UCOL_PROCESSED_NULLORDER; 1.592 + } 1.593 + 1.594 + const PCEI *pcei = elems->pce->pceBuffer.get(); 1.595 + 1.596 + if (ixLow != NULL) { 1.597 + *ixLow = pcei->low; 1.598 + } 1.599 + 1.600 + if (ixHigh != NULL) { 1.601 + *ixHigh = pcei->high; 1.602 + } 1.603 + 1.604 + return pcei->ce; 1.605 +} 1.606 + 1.607 +U_CAPI int32_t U_EXPORT2 1.608 +ucol_getMaxExpansion(const UCollationElements *elems, 1.609 + int32_t order) 1.610 +{ 1.611 + uint8_t result; 1.612 + 1.613 +#if 0 1.614 + UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); 1.615 +#else 1.616 + const UCollator *coll = elems->iteratordata_.coll; 1.617 + const uint32_t *start; 1.618 + const uint32_t *limit; 1.619 + const uint32_t *mid; 1.620 + uint32_t strengthMask = 0; 1.621 + uint32_t mOrder = (uint32_t) order; 1.622 + 1.623 + switch (coll->strength) 1.624 + { 1.625 + default: 1.626 + strengthMask |= UCOL_TERTIARYORDERMASK; 1.627 + /* fall through */ 1.628 + 1.629 + case UCOL_SECONDARY: 1.630 + strengthMask |= UCOL_SECONDARYORDERMASK; 1.631 + /* fall through */ 1.632 + 1.633 + case UCOL_PRIMARY: 1.634 + strengthMask |= UCOL_PRIMARYORDERMASK; 1.635 + } 1.636 + 1.637 + mOrder &= strengthMask; 1.638 + start = (coll)->endExpansionCE; 1.639 + limit = (coll)->lastEndExpansionCE; 1.640 + 1.641 + while (start < limit - 1) { 1.642 + mid = start + ((limit - start) >> 1); 1.643 + if (mOrder <= (*mid & strengthMask)) { 1.644 + limit = mid; 1.645 + } else { 1.646 + start = mid; 1.647 + } 1.648 + } 1.649 + 1.650 + // FIXME: with a masked search, there might be more than one hit, 1.651 + // so we need to look forward and backward from the match to find all 1.652 + // of the hits... 1.653 + if ((*start & strengthMask) == mOrder) { 1.654 + result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE)); 1.655 + } else if ((*limit & strengthMask) == mOrder) { 1.656 + result = *(coll->expansionCESize + (limit - coll->endExpansionCE)); 1.657 + } else if ((mOrder & 0xFFFF) == 0x00C0) { 1.658 + result = 2; 1.659 + } else { 1.660 + result = 1; 1.661 + } 1.662 +#endif 1.663 + 1.664 + return result; 1.665 +} 1.666 + 1.667 +U_CAPI void U_EXPORT2 1.668 +ucol_setText( UCollationElements *elems, 1.669 + const UChar *text, 1.670 + int32_t textLength, 1.671 + UErrorCode *status) 1.672 +{ 1.673 + if (U_FAILURE(*status)) { 1.674 + return; 1.675 + } 1.676 + 1.677 + if (elems->isWritable && elems->iteratordata_.string != NULL) 1.678 + { 1.679 + uprv_free((UChar *)elems->iteratordata_.string); 1.680 + } 1.681 + 1.682 + if (text == NULL) { 1.683 + textLength = 0; 1.684 + } 1.685 + 1.686 + elems->isWritable = FALSE; 1.687 + 1.688 + /* free offset buffer to avoid memory leak before initializing. */ 1.689 + ucol_freeOffsetBuffer(&(elems->iteratordata_)); 1.690 + /* Ensure that previously allocated extendCEs is freed before setting to NULL. */ 1.691 + if (elems->iteratordata_.extendCEs != NULL) { 1.692 + uprv_free(elems->iteratordata_.extendCEs); 1.693 + } 1.694 + uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, 1.695 + &elems->iteratordata_, status); 1.696 + 1.697 + elems->reset_ = TRUE; 1.698 +} 1.699 + 1.700 +U_CAPI int32_t U_EXPORT2 1.701 +ucol_getOffset(const UCollationElements *elems) 1.702 +{ 1.703 + const collIterate *ci = &(elems->iteratordata_); 1.704 + 1.705 + if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) { 1.706 + return ci->offsetRepeatValue; 1.707 + } 1.708 + 1.709 + if (ci->offsetReturn != NULL) { 1.710 + return *ci->offsetReturn; 1.711 + } 1.712 + 1.713 + // while processing characters in normalization buffer getOffset will 1.714 + // return the next non-normalized character. 1.715 + // should be inline with the old implementation since the old codes uses 1.716 + // nextDecomp in normalizer which also decomposes the string till the 1.717 + // first base character is found. 1.718 + if (ci->flags & UCOL_ITER_INNORMBUF) { 1.719 + if (ci->fcdPosition == NULL) { 1.720 + return 0; 1.721 + } 1.722 + return (int32_t)(ci->fcdPosition - ci->string); 1.723 + } 1.724 + else { 1.725 + return (int32_t)(ci->pos - ci->string); 1.726 + } 1.727 +} 1.728 + 1.729 +U_CAPI void U_EXPORT2 1.730 +ucol_setOffset(UCollationElements *elems, 1.731 + int32_t offset, 1.732 + UErrorCode *status) 1.733 +{ 1.734 + if (U_FAILURE(*status)) { 1.735 + return; 1.736 + } 1.737 + 1.738 + // this methods will clean up any use of the writable buffer and points to 1.739 + // the original string 1.740 + collIterate *ci = &(elems->iteratordata_); 1.741 + ci->pos = ci->string + offset; 1.742 + ci->CEpos = ci->toReturn = ci->CEs; 1.743 + if (ci->flags & UCOL_ITER_INNORMBUF) { 1.744 + ci->flags = ci->origFlags; 1.745 + } 1.746 + if ((ci->flags & UCOL_ITER_HASLEN) == 0) { 1.747 + ci->endp = ci->string + u_strlen(ci->string); 1.748 + ci->flags |= UCOL_ITER_HASLEN; 1.749 + } 1.750 + ci->fcdPosition = NULL; 1.751 + elems->reset_ = FALSE; 1.752 + 1.753 + ci->offsetReturn = NULL; 1.754 + ci->offsetStore = ci->offsetBuffer; 1.755 + ci->offsetRepeatCount = ci->offsetRepeatValue = 0; 1.756 +} 1.757 + 1.758 +U_CAPI int32_t U_EXPORT2 1.759 +ucol_primaryOrder (int32_t order) 1.760 +{ 1.761 + order &= UCOL_PRIMARYMASK; 1.762 + return (order >> UCOL_PRIMARYORDERSHIFT); 1.763 +} 1.764 + 1.765 +U_CAPI int32_t U_EXPORT2 1.766 +ucol_secondaryOrder (int32_t order) 1.767 +{ 1.768 + order &= UCOL_SECONDARYMASK; 1.769 + return (order >> UCOL_SECONDARYORDERSHIFT); 1.770 +} 1.771 + 1.772 +U_CAPI int32_t U_EXPORT2 1.773 +ucol_tertiaryOrder (int32_t order) 1.774 +{ 1.775 + return (order & UCOL_TERTIARYMASK); 1.776 +} 1.777 + 1.778 + 1.779 +void ucol_freeOffsetBuffer(collIterate *s) { 1.780 + if (s != NULL && s->offsetBuffer != NULL) { 1.781 + uprv_free(s->offsetBuffer); 1.782 + s->offsetBuffer = NULL; 1.783 + s->offsetBufferSize = 0; 1.784 + } 1.785 +} 1.786 + 1.787 +#endif /* #if !UCONFIG_NO_COLLATION */