intl/icu/source/i18n/ucoleitr.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/ucoleitr.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,784 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*   Copyright (C) 2001-2011, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +******************************************************************************
     1.9 +*
    1.10 +* File ucoleitr.cpp
    1.11 +*
    1.12 +* Modification History:
    1.13 +*
    1.14 +* Date        Name        Description
    1.15 +* 02/15/2001  synwee      Modified all methods to process its own function 
    1.16 +*                         instead of calling the equivalent c++ api (coleitr.h)
    1.17 +******************************************************************************/
    1.18 +
    1.19 +#include "unicode/utypes.h"
    1.20 +
    1.21 +#if !UCONFIG_NO_COLLATION
    1.22 +
    1.23 +#include "unicode/ucoleitr.h"
    1.24 +#include "unicode/ustring.h"
    1.25 +#include "unicode/sortkey.h"
    1.26 +#include "unicode/uobject.h"
    1.27 +#include "ucol_imp.h"
    1.28 +#include "cmemory.h"
    1.29 +
    1.30 +U_NAMESPACE_USE
    1.31 +
    1.32 +#define BUFFER_LENGTH             100
    1.33 +
    1.34 +#define DEFAULT_BUFFER_SIZE 16
    1.35 +#define BUFFER_GROW 8
    1.36 +
    1.37 +#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
    1.38 +
    1.39 +#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
    1.40 +
    1.41 +#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
    1.42 +
    1.43 +#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
    1.44 +
    1.45 +#define DELETE_ARRAY(array) uprv_free((void *) (array))
    1.46 +
    1.47 +typedef struct icu::collIterate collIterator;
    1.48 +
    1.49 +struct RCEI
    1.50 +{
    1.51 +    uint32_t ce;
    1.52 +    int32_t  low;
    1.53 +    int32_t  high;
    1.54 +};
    1.55 +
    1.56 +U_NAMESPACE_BEGIN
    1.57 +
    1.58 +struct RCEBuffer
    1.59 +{
    1.60 +    RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
    1.61 +    RCEI   *buffer;
    1.62 +    int32_t bufferIndex;
    1.63 +    int32_t bufferSize;
    1.64 +
    1.65 +    RCEBuffer();
    1.66 +    ~RCEBuffer();
    1.67 +
    1.68 +    UBool empty() const;
    1.69 +    void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
    1.70 +    const RCEI *get();
    1.71 +};
    1.72 +
    1.73 +RCEBuffer::RCEBuffer()
    1.74 +{
    1.75 +    buffer = defaultBuffer;
    1.76 +    bufferIndex = 0;
    1.77 +    bufferSize = DEFAULT_BUFFER_SIZE;
    1.78 +}
    1.79 +
    1.80 +RCEBuffer::~RCEBuffer()
    1.81 +{
    1.82 +    if (buffer != defaultBuffer) {
    1.83 +        DELETE_ARRAY(buffer);
    1.84 +    }
    1.85 +}
    1.86 +
    1.87 +UBool RCEBuffer::empty() const
    1.88 +{
    1.89 +    return bufferIndex <= 0;
    1.90 +}
    1.91 +
    1.92 +void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
    1.93 +{
    1.94 +    if (bufferIndex >= bufferSize) {
    1.95 +        RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
    1.96 +
    1.97 +        ARRAY_COPY(newBuffer, buffer, bufferSize);
    1.98 +
    1.99 +        if (buffer != defaultBuffer) {
   1.100 +            DELETE_ARRAY(buffer);
   1.101 +        }
   1.102 +
   1.103 +        buffer = newBuffer;
   1.104 +        bufferSize += BUFFER_GROW;
   1.105 +    }
   1.106 +
   1.107 +    buffer[bufferIndex].ce   = ce;
   1.108 +    buffer[bufferIndex].low  = ixLow;
   1.109 +    buffer[bufferIndex].high = ixHigh;
   1.110 +
   1.111 +    bufferIndex += 1;
   1.112 +}
   1.113 +
   1.114 +const RCEI *RCEBuffer::get()
   1.115 +{
   1.116 +    if (bufferIndex > 0) {
   1.117 +     return &buffer[--bufferIndex];
   1.118 +    }
   1.119 +
   1.120 +    return NULL;
   1.121 +}
   1.122 +
   1.123 +struct PCEI
   1.124 +{
   1.125 +    uint64_t ce;
   1.126 +    int32_t  low;
   1.127 +    int32_t  high;
   1.128 +};
   1.129 +
   1.130 +struct PCEBuffer
   1.131 +{
   1.132 +    PCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
   1.133 +    PCEI   *buffer;
   1.134 +    int32_t bufferIndex;
   1.135 +    int32_t bufferSize;
   1.136 +
   1.137 +    PCEBuffer();
   1.138 +    ~PCEBuffer();
   1.139 +
   1.140 +    void  reset();
   1.141 +    UBool empty() const;
   1.142 +    void  put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
   1.143 +    const PCEI *get();
   1.144 +};
   1.145 +
   1.146 +PCEBuffer::PCEBuffer()
   1.147 +{
   1.148 +    buffer = defaultBuffer;
   1.149 +    bufferIndex = 0;
   1.150 +    bufferSize = DEFAULT_BUFFER_SIZE;
   1.151 +}
   1.152 +
   1.153 +PCEBuffer::~PCEBuffer()
   1.154 +{
   1.155 +    if (buffer != defaultBuffer) {
   1.156 +        DELETE_ARRAY(buffer);
   1.157 +    }
   1.158 +}
   1.159 +
   1.160 +void PCEBuffer::reset()
   1.161 +{
   1.162 +    bufferIndex = 0;
   1.163 +}
   1.164 +
   1.165 +UBool PCEBuffer::empty() const
   1.166 +{
   1.167 +    return bufferIndex <= 0;
   1.168 +}
   1.169 +
   1.170 +void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
   1.171 +{
   1.172 +    if (bufferIndex >= bufferSize) {
   1.173 +        PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
   1.174 +
   1.175 +        ARRAY_COPY(newBuffer, buffer, bufferSize);
   1.176 +
   1.177 +        if (buffer != defaultBuffer) {
   1.178 +            DELETE_ARRAY(buffer);
   1.179 +        }
   1.180 +
   1.181 +        buffer = newBuffer;
   1.182 +        bufferSize += BUFFER_GROW;
   1.183 +    }
   1.184 +
   1.185 +    buffer[bufferIndex].ce   = ce;
   1.186 +    buffer[bufferIndex].low  = ixLow;
   1.187 +    buffer[bufferIndex].high = ixHigh;
   1.188 +
   1.189 +    bufferIndex += 1;
   1.190 +}
   1.191 +
   1.192 +const PCEI *PCEBuffer::get()
   1.193 +{
   1.194 +    if (bufferIndex > 0) {
   1.195 +     return &buffer[--bufferIndex];
   1.196 +    }
   1.197 +
   1.198 +    return NULL;
   1.199 +}
   1.200 +
   1.201 +/*
   1.202 + * This inherits from UObject so that
   1.203 + * it can be allocated by new and the
   1.204 + * constructor for PCEBuffer is called.
   1.205 + */
   1.206 +struct UCollationPCE : public UObject
   1.207 +{
   1.208 +    PCEBuffer          pceBuffer;
   1.209 +    UCollationStrength strength;
   1.210 +    UBool              toShift;
   1.211 +    UBool              isShifted;
   1.212 +    uint32_t           variableTop;
   1.213 +
   1.214 +    UCollationPCE(UCollationElements *elems);
   1.215 +    ~UCollationPCE();
   1.216 +
   1.217 +    void init(const UCollator *coll);
   1.218 +
   1.219 +    virtual UClassID getDynamicClassID() const;
   1.220 +    static UClassID getStaticClassID();
   1.221 +};
   1.222 +
   1.223 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
   1.224 +
   1.225 +UCollationPCE::UCollationPCE(UCollationElements *elems)
   1.226 +{
   1.227 +    init(elems->iteratordata_.coll);
   1.228 +}
   1.229 +
   1.230 +void UCollationPCE::init(const UCollator *coll)
   1.231 +{
   1.232 +    UErrorCode status = U_ZERO_ERROR;
   1.233 +
   1.234 +    strength    = ucol_getStrength(coll);
   1.235 +    toShift     = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
   1.236 +    isShifted   = FALSE;
   1.237 +    variableTop = coll->variableTopValue << 16;
   1.238 +}
   1.239 +
   1.240 +UCollationPCE::~UCollationPCE()
   1.241 +{
   1.242 +    // nothing to do
   1.243 +}
   1.244 +
   1.245 +
   1.246 +U_NAMESPACE_END
   1.247 +
   1.248 +
   1.249 +inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
   1.250 +{
   1.251 +    uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
   1.252 +
   1.253 +    // This is clean, but somewhat slow...
   1.254 +    // We could apply the mask to ce and then
   1.255 +    // just get all three orders...
   1.256 +    switch(elems->pce->strength) {
   1.257 +    default:
   1.258 +        tertiary = ucol_tertiaryOrder(ce);
   1.259 +        /* note fall-through */
   1.260 +
   1.261 +    case UCOL_SECONDARY:
   1.262 +        secondary = ucol_secondaryOrder(ce);
   1.263 +        /* note fall-through */
   1.264 +
   1.265 +    case UCOL_PRIMARY:
   1.266 +        primary = ucol_primaryOrder(ce);
   1.267 +    }
   1.268 +
   1.269 +    // **** This should probably handle continuations too.  ****
   1.270 +    // **** That means that we need 24 bits for the primary ****
   1.271 +    // **** instead of the 16 that we're currently using.   ****
   1.272 +    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
   1.273 +    // **** Another complication with continuations is that ****
   1.274 +    // **** the *second* CE is marked as a continuation, so ****
   1.275 +    // **** we always have to peek ahead to know how long   ****
   1.276 +    // **** the primary is...                               ****
   1.277 +    if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
   1.278 +                || (elems->pce->isShifted && primary == 0)) {
   1.279 +
   1.280 +        if (primary == 0) {
   1.281 +            return UCOL_IGNORABLE;
   1.282 +        }
   1.283 +
   1.284 +        if (elems->pce->strength >= UCOL_QUATERNARY) {
   1.285 +            quaternary = primary;
   1.286 +        }
   1.287 +
   1.288 +        primary = secondary = tertiary = 0;
   1.289 +        elems->pce->isShifted = TRUE;
   1.290 +    } else {
   1.291 +        if (elems->pce->strength >= UCOL_QUATERNARY) {
   1.292 +            quaternary = 0xFFFF;
   1.293 +        }
   1.294 +
   1.295 +        elems->pce->isShifted = FALSE;
   1.296 +    }
   1.297 +
   1.298 +    return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
   1.299 +}
   1.300 +
   1.301 +U_CAPI void U_EXPORT2
   1.302 +uprv_init_pce(const UCollationElements *elems)
   1.303 +{
   1.304 +    if (elems->pce != NULL) {
   1.305 +        elems->pce->init(elems->iteratordata_.coll);
   1.306 +    }
   1.307 +}
   1.308 +
   1.309 +
   1.310 +
   1.311 +/* public methods ---------------------------------------------------- */
   1.312 +
   1.313 +U_CAPI UCollationElements* U_EXPORT2
   1.314 +ucol_openElements(const UCollator  *coll,
   1.315 +                  const UChar      *text,
   1.316 +                        int32_t    textLength,
   1.317 +                        UErrorCode *status)
   1.318 +{
   1.319 +    if (U_FAILURE(*status)) {
   1.320 +        return NULL;
   1.321 +    }
   1.322 +
   1.323 +    UCollationElements *result = new UCollationElements;
   1.324 +    if (result == NULL) {
   1.325 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.326 +        return NULL;
   1.327 +    }
   1.328 +
   1.329 +    result->reset_ = TRUE;
   1.330 +    result->isWritable = FALSE;
   1.331 +    result->pce = NULL;
   1.332 +
   1.333 +    if (text == NULL) {
   1.334 +        textLength = 0;
   1.335 +    }
   1.336 +    uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
   1.337 +
   1.338 +    return result;
   1.339 +}
   1.340 +
   1.341 +
   1.342 +U_CAPI void U_EXPORT2
   1.343 +ucol_closeElements(UCollationElements *elems)
   1.344 +{
   1.345 +	if (elems != NULL) {
   1.346 +	  collIterate *ci = &elems->iteratordata_;
   1.347 +
   1.348 +	  if (ci->extendCEs) {
   1.349 +		  uprv_free(ci->extendCEs);
   1.350 +	  }
   1.351 +
   1.352 +	  if (ci->offsetBuffer) {
   1.353 +		  uprv_free(ci->offsetBuffer);
   1.354 +	  }
   1.355 +
   1.356 +	  if (elems->isWritable && elems->iteratordata_.string != NULL)
   1.357 +	  {
   1.358 +		uprv_free((UChar *)elems->iteratordata_.string);
   1.359 +	  }
   1.360 +
   1.361 +	  if (elems->pce != NULL) {
   1.362 +		  delete elems->pce;
   1.363 +	  }
   1.364 +
   1.365 +	  delete elems;
   1.366 +	}
   1.367 +}
   1.368 +
   1.369 +U_CAPI void U_EXPORT2
   1.370 +ucol_reset(UCollationElements *elems)
   1.371 +{
   1.372 +    collIterate *ci = &(elems->iteratordata_);
   1.373 +    elems->reset_   = TRUE;
   1.374 +    ci->pos         = ci->string;
   1.375 +    if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
   1.376 +        ci->endp      = ci->string + u_strlen(ci->string);
   1.377 +    }
   1.378 +    ci->CEpos       = ci->toReturn = ci->CEs;
   1.379 +    ci->flags       = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
   1.380 +    if (ci->coll->normalizationMode == UCOL_ON) {
   1.381 +        ci->flags |= UCOL_ITER_NORM;
   1.382 +    }
   1.383 +
   1.384 +    ci->writableBuffer.remove();
   1.385 +    ci->fcdPosition = NULL;
   1.386 +
   1.387 +  //ci->offsetReturn = ci->offsetStore = NULL;
   1.388 +	ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
   1.389 +}
   1.390 +
   1.391 +U_CAPI void U_EXPORT2
   1.392 +ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
   1.393 +{
   1.394 +    if (U_FAILURE(*status)) {
   1.395 +        return;
   1.396 +    }
   1.397 +
   1.398 +    if (elems == NULL) {
   1.399 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.400 +        return;
   1.401 +    }
   1.402 +
   1.403 +    elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
   1.404 +}
   1.405 +
   1.406 +U_CAPI int32_t U_EXPORT2
   1.407 +ucol_next(UCollationElements *elems, 
   1.408 +          UErrorCode         *status)
   1.409 +{
   1.410 +    int32_t result;
   1.411 +    if (U_FAILURE(*status)) {
   1.412 +        return UCOL_NULLORDER;
   1.413 +    }
   1.414 +
   1.415 +    elems->reset_ = FALSE;
   1.416 +
   1.417 +    result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
   1.418 +                                     &elems->iteratordata_, 
   1.419 +                                     status);
   1.420 +
   1.421 +    if (result == UCOL_NO_MORE_CES) {
   1.422 +        result = UCOL_NULLORDER;
   1.423 +    }
   1.424 +    return result;
   1.425 +}
   1.426 +
   1.427 +U_CAPI int64_t U_EXPORT2
   1.428 +ucol_nextProcessed(UCollationElements *elems,
   1.429 +                   int32_t            *ixLow,
   1.430 +                   int32_t            *ixHigh,
   1.431 +                   UErrorCode         *status)
   1.432 +{
   1.433 +    const UCollator *coll = elems->iteratordata_.coll;
   1.434 +    int64_t result = UCOL_IGNORABLE;
   1.435 +    uint32_t low = 0, high = 0;
   1.436 +
   1.437 +    if (U_FAILURE(*status)) {
   1.438 +        return UCOL_PROCESSED_NULLORDER;
   1.439 +    }
   1.440 +
   1.441 +    if (elems->pce == NULL) {
   1.442 +        elems->pce = new UCollationPCE(elems);
   1.443 +    } else {
   1.444 +        elems->pce->pceBuffer.reset();
   1.445 +    }
   1.446 +
   1.447 +    elems->reset_ = FALSE;
   1.448 +
   1.449 +    do {
   1.450 +        low = ucol_getOffset(elems);
   1.451 +        uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
   1.452 +        high = ucol_getOffset(elems);
   1.453 +
   1.454 +        if (ce == UCOL_NO_MORE_CES) {
   1.455 +             result = UCOL_PROCESSED_NULLORDER;
   1.456 +             break;
   1.457 +        }
   1.458 +
   1.459 +        result = processCE(elems, ce);
   1.460 +    } while (result == UCOL_IGNORABLE);
   1.461 +
   1.462 +    if (ixLow != NULL) {
   1.463 +        *ixLow = low;
   1.464 +    }
   1.465 +
   1.466 +    if (ixHigh != NULL) {
   1.467 +        *ixHigh = high;
   1.468 +    }
   1.469 +
   1.470 +    return result;
   1.471 +}
   1.472 +
   1.473 +U_CAPI int32_t U_EXPORT2
   1.474 +ucol_previous(UCollationElements *elems,
   1.475 +              UErrorCode         *status)
   1.476 +{
   1.477 +    if(U_FAILURE(*status)) {
   1.478 +        return UCOL_NULLORDER;
   1.479 +    }
   1.480 +    else
   1.481 +    {
   1.482 +        int32_t result;
   1.483 +
   1.484 +        if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
   1.485 +            if (elems->iteratordata_.endp == NULL) {
   1.486 +                elems->iteratordata_.endp = elems->iteratordata_.string + 
   1.487 +                                            u_strlen(elems->iteratordata_.string);
   1.488 +                elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
   1.489 +            }
   1.490 +            elems->iteratordata_.pos = elems->iteratordata_.endp;
   1.491 +            elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
   1.492 +        }
   1.493 +
   1.494 +        elems->reset_ = FALSE;
   1.495 +
   1.496 +        result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
   1.497 +                                         &(elems->iteratordata_), 
   1.498 +                                         status);
   1.499 +
   1.500 +        if (result == UCOL_NO_MORE_CES) {
   1.501 +            result = UCOL_NULLORDER;
   1.502 +        }
   1.503 +
   1.504 +        return result;
   1.505 +    }
   1.506 +}
   1.507 +
   1.508 +U_CAPI int64_t U_EXPORT2
   1.509 +ucol_previousProcessed(UCollationElements *elems,
   1.510 +                   int32_t            *ixLow,
   1.511 +                   int32_t            *ixHigh,
   1.512 +                   UErrorCode         *status)
   1.513 +{
   1.514 +    const UCollator *coll = elems->iteratordata_.coll;
   1.515 +    int64_t result = UCOL_IGNORABLE;
   1.516 + // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
   1.517 + // UCollationStrength strength = ucol_getStrength(coll);
   1.518 + //  UBool toShift   = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) ==  UCOL_SHIFTED;
   1.519 + // uint32_t variableTop = coll->variableTopValue;
   1.520 +    int32_t  low = 0, high = 0;
   1.521 +
   1.522 +    if (U_FAILURE(*status)) {
   1.523 +        return UCOL_PROCESSED_NULLORDER;
   1.524 +    }
   1.525 +
   1.526 +    if (elems->reset_ && 
   1.527 +        (elems->iteratordata_.pos == elems->iteratordata_.string)) {
   1.528 +        if (elems->iteratordata_.endp == NULL) {
   1.529 +            elems->iteratordata_.endp = elems->iteratordata_.string + 
   1.530 +                                        u_strlen(elems->iteratordata_.string);
   1.531 +            elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
   1.532 +        }
   1.533 +
   1.534 +        elems->iteratordata_.pos = elems->iteratordata_.endp;
   1.535 +        elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
   1.536 +    }
   1.537 +
   1.538 +    if (elems->pce == NULL) {
   1.539 +        elems->pce = new UCollationPCE(elems);
   1.540 +    } else {
   1.541 +      //elems->pce->pceBuffer.reset();
   1.542 +    }
   1.543 +
   1.544 +    elems->reset_ = FALSE;
   1.545 +
   1.546 +    while (elems->pce->pceBuffer.empty()) {
   1.547 +        // buffer raw CEs up to non-ignorable primary
   1.548 +        RCEBuffer rceb;
   1.549 +        uint32_t ce;
   1.550 +        
   1.551 +        // **** do we need to reset rceb, or will it always be empty at this point ****
   1.552 +        do {
   1.553 +            high = ucol_getOffset(elems);
   1.554 +            ce   = ucol_getPrevCE(coll, &elems->iteratordata_, status);
   1.555 +            low  = ucol_getOffset(elems);
   1.556 +
   1.557 +            if (ce == UCOL_NO_MORE_CES) {
   1.558 +                if (! rceb.empty()) {
   1.559 +                    break;
   1.560 +                }
   1.561 +
   1.562 +                goto finish;
   1.563 +            }
   1.564 +
   1.565 +            rceb.put(ce, low, high);
   1.566 +        } while ((ce & UCOL_PRIMARYMASK) == 0);
   1.567 +
   1.568 +        // process the raw CEs
   1.569 +        while (! rceb.empty()) {
   1.570 +            const RCEI *rcei = rceb.get();
   1.571 +
   1.572 +            result = processCE(elems, rcei->ce);
   1.573 +
   1.574 +            if (result != UCOL_IGNORABLE) {
   1.575 +                elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
   1.576 +            }
   1.577 +        }
   1.578 +    }
   1.579 +
   1.580 +finish:
   1.581 +    if (elems->pce->pceBuffer.empty()) {
   1.582 +        // **** Is -1 the right value for ixLow, ixHigh? ****
   1.583 +    	if (ixLow != NULL) {
   1.584 +    		*ixLow = -1;
   1.585 +    	}
   1.586 +    	
   1.587 +    	if (ixHigh != NULL) {
   1.588 +    		*ixHigh = -1
   1.589 +    		;
   1.590 +    	}
   1.591 +        return UCOL_PROCESSED_NULLORDER;
   1.592 +    }
   1.593 +
   1.594 +    const PCEI *pcei = elems->pce->pceBuffer.get();
   1.595 +
   1.596 +    if (ixLow != NULL) {
   1.597 +        *ixLow = pcei->low;
   1.598 +    }
   1.599 +
   1.600 +    if (ixHigh != NULL) {
   1.601 +        *ixHigh = pcei->high;
   1.602 +    }
   1.603 +
   1.604 +    return pcei->ce;
   1.605 +}
   1.606 +
   1.607 +U_CAPI int32_t U_EXPORT2
   1.608 +ucol_getMaxExpansion(const UCollationElements *elems,
   1.609 +                           int32_t            order)
   1.610 +{
   1.611 +    uint8_t result;
   1.612 +
   1.613 +#if 0
   1.614 +    UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
   1.615 +#else
   1.616 +    const UCollator *coll = elems->iteratordata_.coll;
   1.617 +    const uint32_t *start;
   1.618 +    const uint32_t *limit;
   1.619 +    const uint32_t *mid;
   1.620 +          uint32_t strengthMask = 0;
   1.621 +          uint32_t mOrder = (uint32_t) order;
   1.622 +
   1.623 +    switch (coll->strength) 
   1.624 +    {
   1.625 +    default:
   1.626 +        strengthMask |= UCOL_TERTIARYORDERMASK;
   1.627 +        /* fall through */
   1.628 +
   1.629 +    case UCOL_SECONDARY:
   1.630 +        strengthMask |= UCOL_SECONDARYORDERMASK;
   1.631 +        /* fall through */
   1.632 +
   1.633 +    case UCOL_PRIMARY:
   1.634 +        strengthMask |= UCOL_PRIMARYORDERMASK;
   1.635 +    }
   1.636 +
   1.637 +    mOrder &= strengthMask;
   1.638 +    start = (coll)->endExpansionCE;
   1.639 +    limit = (coll)->lastEndExpansionCE;
   1.640 +
   1.641 +    while (start < limit - 1) {
   1.642 +        mid = start + ((limit - start) >> 1);
   1.643 +        if (mOrder <= (*mid & strengthMask)) {
   1.644 +          limit = mid;
   1.645 +        } else {
   1.646 +          start = mid;
   1.647 +        }
   1.648 +    }
   1.649 +
   1.650 +    // FIXME: with a masked search, there might be more than one hit,
   1.651 +    // so we need to look forward and backward from the match to find all
   1.652 +    // of the hits...
   1.653 +    if ((*start & strengthMask) == mOrder) {
   1.654 +        result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
   1.655 +    } else if ((*limit & strengthMask) == mOrder) {
   1.656 +         result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
   1.657 +   } else if ((mOrder & 0xFFFF) == 0x00C0) {
   1.658 +        result = 2;
   1.659 +   } else {
   1.660 +       result = 1;
   1.661 +   }
   1.662 +#endif
   1.663 +
   1.664 +    return result;
   1.665 +}
   1.666 + 
   1.667 +U_CAPI void U_EXPORT2
   1.668 +ucol_setText(      UCollationElements *elems,
   1.669 +             const UChar              *text,
   1.670 +                   int32_t            textLength,
   1.671 +                   UErrorCode         *status)
   1.672 +{
   1.673 +    if (U_FAILURE(*status)) {
   1.674 +        return;
   1.675 +    }
   1.676 +
   1.677 +    if (elems->isWritable && elems->iteratordata_.string != NULL)
   1.678 +    {
   1.679 +        uprv_free((UChar *)elems->iteratordata_.string);
   1.680 +    }
   1.681 +
   1.682 +    if (text == NULL) {
   1.683 +        textLength = 0;
   1.684 +    }
   1.685 +
   1.686 +    elems->isWritable = FALSE;
   1.687 +    
   1.688 +    /* free offset buffer to avoid memory leak before initializing. */
   1.689 +    ucol_freeOffsetBuffer(&(elems->iteratordata_));
   1.690 +    /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
   1.691 +    if (elems->iteratordata_.extendCEs != NULL) {
   1.692 +        uprv_free(elems->iteratordata_.extendCEs);
   1.693 +    }
   1.694 +    uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, 
   1.695 +                          &elems->iteratordata_, status);
   1.696 +
   1.697 +    elems->reset_   = TRUE;
   1.698 +}
   1.699 +
   1.700 +U_CAPI int32_t U_EXPORT2
   1.701 +ucol_getOffset(const UCollationElements *elems)
   1.702 +{
   1.703 +  const collIterate *ci = &(elems->iteratordata_);
   1.704 +
   1.705 +  if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
   1.706 +      return ci->offsetRepeatValue;
   1.707 +  }
   1.708 +
   1.709 +  if (ci->offsetReturn != NULL) {
   1.710 +      return *ci->offsetReturn;
   1.711 +  }
   1.712 +
   1.713 +  // while processing characters in normalization buffer getOffset will 
   1.714 +  // return the next non-normalized character. 
   1.715 +  // should be inline with the old implementation since the old codes uses
   1.716 +  // nextDecomp in normalizer which also decomposes the string till the 
   1.717 +  // first base character is found.
   1.718 +  if (ci->flags & UCOL_ITER_INNORMBUF) {
   1.719 +      if (ci->fcdPosition == NULL) {
   1.720 +        return 0;
   1.721 +      }
   1.722 +      return (int32_t)(ci->fcdPosition - ci->string);
   1.723 +  }
   1.724 +  else {
   1.725 +      return (int32_t)(ci->pos - ci->string);
   1.726 +  }
   1.727 +}
   1.728 +
   1.729 +U_CAPI void U_EXPORT2
   1.730 +ucol_setOffset(UCollationElements    *elems,
   1.731 +               int32_t           offset,
   1.732 +               UErrorCode            *status)
   1.733 +{
   1.734 +    if (U_FAILURE(*status)) {
   1.735 +        return;
   1.736 +    }
   1.737 +
   1.738 +    // this methods will clean up any use of the writable buffer and points to 
   1.739 +    // the original string
   1.740 +    collIterate *ci = &(elems->iteratordata_);
   1.741 +    ci->pos         = ci->string + offset;
   1.742 +    ci->CEpos       = ci->toReturn = ci->CEs;
   1.743 +    if (ci->flags & UCOL_ITER_INNORMBUF) {
   1.744 +        ci->flags = ci->origFlags;
   1.745 +    }
   1.746 +    if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
   1.747 +        ci->endp  = ci->string + u_strlen(ci->string);
   1.748 +        ci->flags |= UCOL_ITER_HASLEN;
   1.749 +    }
   1.750 +    ci->fcdPosition = NULL;
   1.751 +    elems->reset_ = FALSE;
   1.752 +
   1.753 +	ci->offsetReturn = NULL;
   1.754 +    ci->offsetStore = ci->offsetBuffer;
   1.755 +	ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
   1.756 +}
   1.757 +
   1.758 +U_CAPI int32_t U_EXPORT2
   1.759 +ucol_primaryOrder (int32_t order) 
   1.760 +{
   1.761 +    order &= UCOL_PRIMARYMASK;
   1.762 +    return (order >> UCOL_PRIMARYORDERSHIFT);
   1.763 +}
   1.764 +
   1.765 +U_CAPI int32_t U_EXPORT2
   1.766 +ucol_secondaryOrder (int32_t order) 
   1.767 +{
   1.768 +    order &= UCOL_SECONDARYMASK;
   1.769 +    return (order >> UCOL_SECONDARYORDERSHIFT);
   1.770 +}
   1.771 +
   1.772 +U_CAPI int32_t U_EXPORT2
   1.773 +ucol_tertiaryOrder (int32_t order) 
   1.774 +{
   1.775 +    return (order & UCOL_TERTIARYMASK);
   1.776 +}
   1.777 +
   1.778 +
   1.779 +void ucol_freeOffsetBuffer(collIterate *s) {
   1.780 +    if (s != NULL && s->offsetBuffer != NULL) {
   1.781 +        uprv_free(s->offsetBuffer);
   1.782 +        s->offsetBuffer = NULL;
   1.783 +        s->offsetBufferSize = 0;
   1.784 +    }
   1.785 +}
   1.786 +
   1.787 +#endif /* #if !UCONFIG_NO_COLLATION */

mercurial