intl/icu/source/i18n/coleitr.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/coleitr.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,488 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +* Copyright (C) 1996-2011, International Business Machines Corporation and    *
     1.7 +* others. All Rights Reserved.                                                *
     1.8 +*******************************************************************************
     1.9 +*/
    1.10 +
    1.11 +/*
    1.12 +* File coleitr.cpp
    1.13 +*
    1.14 +* 
    1.15 +*
    1.16 +* Created by: Helena Shih
    1.17 +*
    1.18 +* Modification History:
    1.19 +*
    1.20 +*  Date      Name        Description
    1.21 +*
    1.22 +*  6/23/97   helena      Adding comments to make code more readable.
    1.23 +* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
    1.24 +* 12/10/99   aliu        Ported Thai collation support from Java.
    1.25 +* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
    1.26 +* 02/19/01   swquek      Removed CollationElementsIterator() since it is 
    1.27 +*                        private constructor and no calls are made to it
    1.28 +*/
    1.29 +
    1.30 +#include "unicode/utypes.h"
    1.31 +
    1.32 +#if !UCONFIG_NO_COLLATION
    1.33 +
    1.34 +#include "unicode/coleitr.h"
    1.35 +#include "unicode/ustring.h"
    1.36 +#include "ucol_imp.h"
    1.37 +#include "uassert.h"
    1.38 +#include "cmemory.h"
    1.39 +
    1.40 +
    1.41 +/* Constants --------------------------------------------------------------- */
    1.42 +
    1.43 +U_NAMESPACE_BEGIN
    1.44 +
    1.45 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
    1.46 +
    1.47 +/* CollationElementIterator public constructor/destructor ------------------ */
    1.48 +
    1.49 +CollationElementIterator::CollationElementIterator(
    1.50 +                                         const CollationElementIterator& other) 
    1.51 +                                         : UObject(other), isDataOwned_(TRUE)
    1.52 +{
    1.53 +    UErrorCode status = U_ZERO_ERROR;
    1.54 +    m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
    1.55 +                                &status);
    1.56 +
    1.57 +    *this = other;
    1.58 +}
    1.59 +
    1.60 +CollationElementIterator::~CollationElementIterator()
    1.61 +{
    1.62 +    if (isDataOwned_) {
    1.63 +        ucol_closeElements(m_data_);
    1.64 +    }
    1.65 +}
    1.66 +
    1.67 +/* CollationElementIterator public methods --------------------------------- */
    1.68 +
    1.69 +int32_t CollationElementIterator::getOffset() const
    1.70 +{
    1.71 +    return ucol_getOffset(m_data_);
    1.72 +}
    1.73 +
    1.74 +/**
    1.75 +* Get the ordering priority of the next character in the string.
    1.76 +* @return the next character's ordering. Returns NULLORDER if an error has 
    1.77 +*         occured or if the end of string has been reached
    1.78 +*/
    1.79 +int32_t CollationElementIterator::next(UErrorCode& status)
    1.80 +{
    1.81 +    return ucol_next(m_data_, &status);
    1.82 +}
    1.83 +
    1.84 +UBool CollationElementIterator::operator!=(
    1.85 +                                  const CollationElementIterator& other) const
    1.86 +{
    1.87 +    return !(*this == other);
    1.88 +}
    1.89 +
    1.90 +UBool CollationElementIterator::operator==(
    1.91 +                                    const CollationElementIterator& that) const
    1.92 +{
    1.93 +    if (this == &that || m_data_ == that.m_data_) {
    1.94 +        return TRUE;
    1.95 +    }
    1.96 +
    1.97 +    // option comparison
    1.98 +    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
    1.99 +    {
   1.100 +        return FALSE;
   1.101 +    }
   1.102 +
   1.103 +    // the constructor and setText always sets a length
   1.104 +    // and we only compare the string not the contents of the normalization
   1.105 +    // buffer
   1.106 +    int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
   1.107 +    int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
   1.108 +    
   1.109 +    if (thislength != thatlength) {
   1.110 +        return FALSE;
   1.111 +    }
   1.112 +
   1.113 +    if (uprv_memcmp(m_data_->iteratordata_.string, 
   1.114 +                    that.m_data_->iteratordata_.string, 
   1.115 +                    thislength * U_SIZEOF_UCHAR) != 0) {
   1.116 +        return FALSE;
   1.117 +    }
   1.118 +    if (getOffset() != that.getOffset()) {
   1.119 +        return FALSE;
   1.120 +    }
   1.121 +
   1.122 +    // checking normalization buffer
   1.123 +    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
   1.124 +        if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
   1.125 +            return FALSE;
   1.126 +        }
   1.127 +        // both are in the normalization buffer
   1.128 +        if (m_data_->iteratordata_.pos 
   1.129 +            - m_data_->iteratordata_.writableBuffer.getBuffer()
   1.130 +            != that.m_data_->iteratordata_.pos 
   1.131 +            - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
   1.132 +            // not in the same position in the normalization buffer
   1.133 +            return FALSE;
   1.134 +        }
   1.135 +    }
   1.136 +    else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
   1.137 +        return FALSE;
   1.138 +    }
   1.139 +    // checking ce position
   1.140 +    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
   1.141 +            == (that.m_data_->iteratordata_.CEpos 
   1.142 +                                        - that.m_data_->iteratordata_.CEs);
   1.143 +}
   1.144 +
   1.145 +/**
   1.146 +* Get the ordering priority of the previous collation element in the string.
   1.147 +* @param status the error code status.
   1.148 +* @return the previous element's ordering. Returns NULLORDER if an error has 
   1.149 +*         occured or if the start of string has been reached.
   1.150 +*/
   1.151 +int32_t CollationElementIterator::previous(UErrorCode& status)
   1.152 +{
   1.153 +    return ucol_previous(m_data_, &status);
   1.154 +}
   1.155 +
   1.156 +/**
   1.157 +* Resets the cursor to the beginning of the string.
   1.158 +*/
   1.159 +void CollationElementIterator::reset()
   1.160 +{
   1.161 +    ucol_reset(m_data_);
   1.162 +}
   1.163 +
   1.164 +void CollationElementIterator::setOffset(int32_t newOffset, 
   1.165 +                                         UErrorCode& status)
   1.166 +{
   1.167 +    ucol_setOffset(m_data_, newOffset, &status);
   1.168 +}
   1.169 +
   1.170 +/**
   1.171 +* Sets the source to the new source string.
   1.172 +*/
   1.173 +void CollationElementIterator::setText(const UnicodeString& source,
   1.174 +                                       UErrorCode& status)
   1.175 +{
   1.176 +    if (U_FAILURE(status)) {
   1.177 +        return;
   1.178 +    }
   1.179 +
   1.180 +    int32_t length = source.length();
   1.181 +    UChar *string = NULL;
   1.182 +    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
   1.183 +        uprv_free((UChar *)m_data_->iteratordata_.string);
   1.184 +    }
   1.185 +    m_data_->isWritable = TRUE;
   1.186 +    if (length > 0) {
   1.187 +        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   1.188 +        /* test for NULL */
   1.189 +        if (string == NULL) {
   1.190 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.191 +            return;
   1.192 +        }
   1.193 +        u_memcpy(string, source.getBuffer(), length);
   1.194 +    }
   1.195 +    else {
   1.196 +        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   1.197 +        /* test for NULL */
   1.198 +        if (string == NULL) {
   1.199 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.200 +            return;
   1.201 +        }
   1.202 +        *string = 0;
   1.203 +    }
   1.204 +    /* Free offsetBuffer before initializing it. */
   1.205 +    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
   1.206 +    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
   1.207 +        &m_data_->iteratordata_, &status);
   1.208 +
   1.209 +    m_data_->reset_   = TRUE;
   1.210 +}
   1.211 +
   1.212 +// Sets the source to the new character iterator.
   1.213 +void CollationElementIterator::setText(CharacterIterator& source, 
   1.214 +                                       UErrorCode& status)
   1.215 +{
   1.216 +    if (U_FAILURE(status)) 
   1.217 +        return;
   1.218 +
   1.219 +    int32_t length = source.getLength();
   1.220 +    UChar *buffer = NULL;
   1.221 +
   1.222 +    if (length == 0) {
   1.223 +        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   1.224 +        /* test for NULL */
   1.225 +        if (buffer == NULL) {
   1.226 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.227 +            return;
   1.228 +        }
   1.229 +        *buffer = 0;
   1.230 +    }
   1.231 +    else {
   1.232 +        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   1.233 +        /* test for NULL */
   1.234 +        if (buffer == NULL) {
   1.235 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.236 +            return;
   1.237 +        }
   1.238 +        /* 
   1.239 +        Using this constructor will prevent buffer from being removed when
   1.240 +        string gets removed
   1.241 +        */
   1.242 +        UnicodeString string;
   1.243 +        source.getText(string);
   1.244 +        u_memcpy(buffer, string.getBuffer(), length);
   1.245 +    }
   1.246 +
   1.247 +    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
   1.248 +        uprv_free((UChar *)m_data_->iteratordata_.string);
   1.249 +    }
   1.250 +    m_data_->isWritable = TRUE;
   1.251 +    /* Free offsetBuffer before initializing it. */
   1.252 +    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
   1.253 +    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
   1.254 +        &m_data_->iteratordata_, &status);
   1.255 +    m_data_->reset_   = TRUE;
   1.256 +}
   1.257 +
   1.258 +int32_t CollationElementIterator::strengthOrder(int32_t order) const
   1.259 +{
   1.260 +    UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
   1.261 +    // Mask off the unwanted differences.
   1.262 +    if (s == UCOL_PRIMARY) {
   1.263 +        order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
   1.264 +    }
   1.265 +    else if (s == UCOL_SECONDARY) {
   1.266 +        order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
   1.267 +    }
   1.268 +
   1.269 +    return order;
   1.270 +}
   1.271 +
   1.272 +/* CollationElementIterator private constructors/destructors --------------- */
   1.273 +
   1.274 +/** 
   1.275 +* This is the "real" constructor for this class; it constructs an iterator
   1.276 +* over the source text using the specified collator
   1.277 +*/
   1.278 +CollationElementIterator::CollationElementIterator(
   1.279 +                                               const UnicodeString& sourceText,
   1.280 +                                               const RuleBasedCollator* order,
   1.281 +                                               UErrorCode& status)
   1.282 +                                               : isDataOwned_(TRUE)
   1.283 +{
   1.284 +    if (U_FAILURE(status)) {
   1.285 +        return;
   1.286 +    }
   1.287 +
   1.288 +    int32_t length = sourceText.length();
   1.289 +    UChar *string = NULL;
   1.290 +
   1.291 +    if (length > 0) {
   1.292 +        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   1.293 +        /* test for NULL */
   1.294 +        if (string == NULL) {
   1.295 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.296 +            return;
   1.297 +        }
   1.298 +        /* 
   1.299 +        Using this constructor will prevent buffer from being removed when
   1.300 +        string gets removed
   1.301 +        */
   1.302 +        u_memcpy(string, sourceText.getBuffer(), length);
   1.303 +    }
   1.304 +    else {
   1.305 +        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   1.306 +        /* test for NULL */
   1.307 +        if (string == NULL) {
   1.308 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.309 +            return;
   1.310 +        }
   1.311 +        *string = 0;
   1.312 +    }
   1.313 +    m_data_ = ucol_openElements(order->ucollator, string, length, &status);
   1.314 +
   1.315 +    /* Test for buffer overflows */
   1.316 +    if (U_FAILURE(status)) {
   1.317 +        return;
   1.318 +    }
   1.319 +    m_data_->isWritable = TRUE;
   1.320 +}
   1.321 +
   1.322 +/** 
   1.323 +* This is the "real" constructor for this class; it constructs an iterator over 
   1.324 +* the source text using the specified collator
   1.325 +*/
   1.326 +CollationElementIterator::CollationElementIterator(
   1.327 +                                           const CharacterIterator& sourceText,
   1.328 +                                           const RuleBasedCollator* order,
   1.329 +                                           UErrorCode& status)
   1.330 +                                           : isDataOwned_(TRUE)
   1.331 +{
   1.332 +    if (U_FAILURE(status))
   1.333 +        return;
   1.334 +
   1.335 +    // **** should I just drop this test? ****
   1.336 +    /*
   1.337 +    if ( sourceText.endIndex() != 0 )
   1.338 +    {
   1.339 +        // A CollationElementIterator is really a two-layered beast.
   1.340 +        // Internally it uses a Normalizer to munge the source text into a form 
   1.341 +        // where all "composed" Unicode characters (such as \u00FC) are split into a 
   1.342 +        // normal character and a combining accent character.  
   1.343 +        // Afterward, CollationElementIterator does its own processing to handle
   1.344 +        // expanding and contracting collation sequences, ignorables, and so on.
   1.345 +        
   1.346 +        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
   1.347 +                                ? Normalizer::NO_OP : order->getDecomposition();
   1.348 +          
   1.349 +        text = new Normalizer(sourceText, decomp);
   1.350 +        if (text == NULL)
   1.351 +        status = U_MEMORY_ALLOCATION_ERROR;    
   1.352 +    }
   1.353 +    */
   1.354 +    int32_t length = sourceText.getLength();
   1.355 +    UChar *buffer;
   1.356 +    if (length > 0) {
   1.357 +        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   1.358 +        /* test for NULL */
   1.359 +        if (buffer == NULL) {
   1.360 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.361 +            return;
   1.362 +        }
   1.363 +        /* 
   1.364 +        Using this constructor will prevent buffer from being removed when
   1.365 +        string gets removed
   1.366 +        */
   1.367 +        UnicodeString string(buffer, length, length);
   1.368 +        ((CharacterIterator &)sourceText).getText(string);
   1.369 +        const UChar *temp = string.getBuffer();
   1.370 +        u_memcpy(buffer, temp, length);
   1.371 +    }
   1.372 +    else {
   1.373 +        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   1.374 +        /* test for NULL */
   1.375 +        if (buffer == NULL) {
   1.376 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.377 +            return;
   1.378 +        }
   1.379 +        *buffer = 0;
   1.380 +    }
   1.381 +    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
   1.382 +
   1.383 +    /* Test for buffer overflows */
   1.384 +    if (U_FAILURE(status)) {
   1.385 +        return;
   1.386 +    }
   1.387 +    m_data_->isWritable = TRUE;
   1.388 +}
   1.389 +
   1.390 +/* CollationElementIterator protected methods ----------------------------- */
   1.391 +
   1.392 +const CollationElementIterator& CollationElementIterator::operator=(
   1.393 +                                         const CollationElementIterator& other)
   1.394 +{
   1.395 +    if (this != &other)
   1.396 +    {
   1.397 +        UCollationElements *ucolelem      = this->m_data_;
   1.398 +        UCollationElements *otherucolelem = other.m_data_;
   1.399 +        collIterate        *coliter       = &(ucolelem->iteratordata_);
   1.400 +        collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
   1.401 +        int                length         = 0;
   1.402 +
   1.403 +        // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
   1.404 +        // the normalization buffer
   1.405 +        length = (int)(othercoliter->endp - othercoliter->string);
   1.406 +
   1.407 +        ucolelem->reset_         = otherucolelem->reset_;
   1.408 +        ucolelem->isWritable     = TRUE;
   1.409 +
   1.410 +        /* create a duplicate of string */
   1.411 +        if (length > 0) {
   1.412 +            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
   1.413 +            if(coliter->string != NULL) {
   1.414 +                uprv_memcpy((UChar *)coliter->string, othercoliter->string,
   1.415 +                    length * U_SIZEOF_UCHAR);
   1.416 +            } else { // Error: couldn't allocate memory. No copying should be done
   1.417 +                length = 0;
   1.418 +            }
   1.419 +        }
   1.420 +        else {
   1.421 +            coliter->string = NULL;
   1.422 +        }
   1.423 +
   1.424 +        /* start and end of string */
   1.425 +        coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
   1.426 +
   1.427 +        /* handle writable buffer here */
   1.428 +
   1.429 +        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
   1.430 +            coliter->writableBuffer = othercoliter->writableBuffer;
   1.431 +            coliter->writableBuffer.getTerminatedBuffer();
   1.432 +        }
   1.433 +
   1.434 +        /* current position */
   1.435 +        if (othercoliter->pos >= othercoliter->string && 
   1.436 +            othercoliter->pos <= othercoliter->endp)
   1.437 +        {
   1.438 +            U_ASSERT(coliter->string != NULL);
   1.439 +            coliter->pos = coliter->string + 
   1.440 +                (othercoliter->pos - othercoliter->string);
   1.441 +        }
   1.442 +        else {
   1.443 +            coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 
   1.444 +                (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
   1.445 +        }
   1.446 +
   1.447 +        /* CE buffer */
   1.448 +        int32_t CEsize;
   1.449 +        if (coliter->extendCEs) {
   1.450 +            uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
   1.451 +            CEsize = sizeof(othercoliter->extendCEs);
   1.452 +            if (CEsize > 0) {
   1.453 +                othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
   1.454 +                uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
   1.455 +            }
   1.456 +            coliter->toReturn = coliter->extendCEs + 
   1.457 +                (othercoliter->toReturn - othercoliter->extendCEs);
   1.458 +            coliter->CEpos    = coliter->extendCEs + CEsize;
   1.459 +        } else {
   1.460 +            CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
   1.461 +            if (CEsize > 0) {
   1.462 +                uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
   1.463 +            }
   1.464 +            coliter->toReturn = coliter->CEs + 
   1.465 +                (othercoliter->toReturn - othercoliter->CEs);
   1.466 +            coliter->CEpos    = coliter->CEs + CEsize;
   1.467 +        }
   1.468 +
   1.469 +        if (othercoliter->fcdPosition != NULL) {
   1.470 +            U_ASSERT(coliter->string != NULL);
   1.471 +            coliter->fcdPosition = coliter->string + 
   1.472 +                (othercoliter->fcdPosition 
   1.473 +                - othercoliter->string);
   1.474 +        }
   1.475 +        else {
   1.476 +            coliter->fcdPosition = NULL;
   1.477 +        }
   1.478 +        coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
   1.479 +        coliter->origFlags   = othercoliter->origFlags;
   1.480 +        coliter->coll = othercoliter->coll;
   1.481 +        this->isDataOwned_ = TRUE;
   1.482 +    }
   1.483 +
   1.484 +    return *this;
   1.485 +}
   1.486 +
   1.487 +U_NAMESPACE_END
   1.488 +
   1.489 +#endif /* #if !UCONFIG_NO_COLLATION */
   1.490 +
   1.491 +/* eof */

mercurial