intl/icu/source/i18n/coleitr.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
     4 * others. All Rights Reserved.                                                *
     5 *******************************************************************************
     6 */
     8 /*
     9 * File coleitr.cpp
    10 *
    11 * 
    12 *
    13 * Created by: Helena Shih
    14 *
    15 * Modification History:
    16 *
    17 *  Date      Name        Description
    18 *
    19 *  6/23/97   helena      Adding comments to make code more readable.
    20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
    21 * 12/10/99   aliu        Ported Thai collation support from Java.
    22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
    23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is 
    24 *                        private constructor and no calls are made to it
    25 */
    27 #include "unicode/utypes.h"
    29 #if !UCONFIG_NO_COLLATION
    31 #include "unicode/coleitr.h"
    32 #include "unicode/ustring.h"
    33 #include "ucol_imp.h"
    34 #include "uassert.h"
    35 #include "cmemory.h"
    38 /* Constants --------------------------------------------------------------- */
    40 U_NAMESPACE_BEGIN
    42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
    44 /* CollationElementIterator public constructor/destructor ------------------ */
    46 CollationElementIterator::CollationElementIterator(
    47                                          const CollationElementIterator& other) 
    48                                          : UObject(other), isDataOwned_(TRUE)
    49 {
    50     UErrorCode status = U_ZERO_ERROR;
    51     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
    52                                 &status);
    54     *this = other;
    55 }
    57 CollationElementIterator::~CollationElementIterator()
    58 {
    59     if (isDataOwned_) {
    60         ucol_closeElements(m_data_);
    61     }
    62 }
    64 /* CollationElementIterator public methods --------------------------------- */
    66 int32_t CollationElementIterator::getOffset() const
    67 {
    68     return ucol_getOffset(m_data_);
    69 }
    71 /**
    72 * Get the ordering priority of the next character in the string.
    73 * @return the next character's ordering. Returns NULLORDER if an error has 
    74 *         occured or if the end of string has been reached
    75 */
    76 int32_t CollationElementIterator::next(UErrorCode& status)
    77 {
    78     return ucol_next(m_data_, &status);
    79 }
    81 UBool CollationElementIterator::operator!=(
    82                                   const CollationElementIterator& other) const
    83 {
    84     return !(*this == other);
    85 }
    87 UBool CollationElementIterator::operator==(
    88                                     const CollationElementIterator& that) const
    89 {
    90     if (this == &that || m_data_ == that.m_data_) {
    91         return TRUE;
    92     }
    94     // option comparison
    95     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
    96     {
    97         return FALSE;
    98     }
   100     // the constructor and setText always sets a length
   101     // and we only compare the string not the contents of the normalization
   102     // buffer
   103     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
   104     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
   106     if (thislength != thatlength) {
   107         return FALSE;
   108     }
   110     if (uprv_memcmp(m_data_->iteratordata_.string, 
   111                     that.m_data_->iteratordata_.string, 
   112                     thislength * U_SIZEOF_UCHAR) != 0) {
   113         return FALSE;
   114     }
   115     if (getOffset() != that.getOffset()) {
   116         return FALSE;
   117     }
   119     // checking normalization buffer
   120     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
   121         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
   122             return FALSE;
   123         }
   124         // both are in the normalization buffer
   125         if (m_data_->iteratordata_.pos 
   126             - m_data_->iteratordata_.writableBuffer.getBuffer()
   127             != that.m_data_->iteratordata_.pos 
   128             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
   129             // not in the same position in the normalization buffer
   130             return FALSE;
   131         }
   132     }
   133     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
   134         return FALSE;
   135     }
   136     // checking ce position
   137     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
   138             == (that.m_data_->iteratordata_.CEpos 
   139                                         - that.m_data_->iteratordata_.CEs);
   140 }
   142 /**
   143 * Get the ordering priority of the previous collation element in the string.
   144 * @param status the error code status.
   145 * @return the previous element's ordering. Returns NULLORDER if an error has 
   146 *         occured or if the start of string has been reached.
   147 */
   148 int32_t CollationElementIterator::previous(UErrorCode& status)
   149 {
   150     return ucol_previous(m_data_, &status);
   151 }
   153 /**
   154 * Resets the cursor to the beginning of the string.
   155 */
   156 void CollationElementIterator::reset()
   157 {
   158     ucol_reset(m_data_);
   159 }
   161 void CollationElementIterator::setOffset(int32_t newOffset, 
   162                                          UErrorCode& status)
   163 {
   164     ucol_setOffset(m_data_, newOffset, &status);
   165 }
   167 /**
   168 * Sets the source to the new source string.
   169 */
   170 void CollationElementIterator::setText(const UnicodeString& source,
   171                                        UErrorCode& status)
   172 {
   173     if (U_FAILURE(status)) {
   174         return;
   175     }
   177     int32_t length = source.length();
   178     UChar *string = NULL;
   179     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
   180         uprv_free((UChar *)m_data_->iteratordata_.string);
   181     }
   182     m_data_->isWritable = TRUE;
   183     if (length > 0) {
   184         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   185         /* test for NULL */
   186         if (string == NULL) {
   187             status = U_MEMORY_ALLOCATION_ERROR;
   188             return;
   189         }
   190         u_memcpy(string, source.getBuffer(), length);
   191     }
   192     else {
   193         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   194         /* test for NULL */
   195         if (string == NULL) {
   196             status = U_MEMORY_ALLOCATION_ERROR;
   197             return;
   198         }
   199         *string = 0;
   200     }
   201     /* Free offsetBuffer before initializing it. */
   202     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
   203     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
   204         &m_data_->iteratordata_, &status);
   206     m_data_->reset_   = TRUE;
   207 }
   209 // Sets the source to the new character iterator.
   210 void CollationElementIterator::setText(CharacterIterator& source, 
   211                                        UErrorCode& status)
   212 {
   213     if (U_FAILURE(status)) 
   214         return;
   216     int32_t length = source.getLength();
   217     UChar *buffer = NULL;
   219     if (length == 0) {
   220         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   221         /* test for NULL */
   222         if (buffer == NULL) {
   223             status = U_MEMORY_ALLOCATION_ERROR;
   224             return;
   225         }
   226         *buffer = 0;
   227     }
   228     else {
   229         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   230         /* test for NULL */
   231         if (buffer == NULL) {
   232             status = U_MEMORY_ALLOCATION_ERROR;
   233             return;
   234         }
   235         /* 
   236         Using this constructor will prevent buffer from being removed when
   237         string gets removed
   238         */
   239         UnicodeString string;
   240         source.getText(string);
   241         u_memcpy(buffer, string.getBuffer(), length);
   242     }
   244     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
   245         uprv_free((UChar *)m_data_->iteratordata_.string);
   246     }
   247     m_data_->isWritable = TRUE;
   248     /* Free offsetBuffer before initializing it. */
   249     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
   250     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
   251         &m_data_->iteratordata_, &status);
   252     m_data_->reset_   = TRUE;
   253 }
   255 int32_t CollationElementIterator::strengthOrder(int32_t order) const
   256 {
   257     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
   258     // Mask off the unwanted differences.
   259     if (s == UCOL_PRIMARY) {
   260         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
   261     }
   262     else if (s == UCOL_SECONDARY) {
   263         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
   264     }
   266     return order;
   267 }
   269 /* CollationElementIterator private constructors/destructors --------------- */
   271 /** 
   272 * This is the "real" constructor for this class; it constructs an iterator
   273 * over the source text using the specified collator
   274 */
   275 CollationElementIterator::CollationElementIterator(
   276                                                const UnicodeString& sourceText,
   277                                                const RuleBasedCollator* order,
   278                                                UErrorCode& status)
   279                                                : isDataOwned_(TRUE)
   280 {
   281     if (U_FAILURE(status)) {
   282         return;
   283     }
   285     int32_t length = sourceText.length();
   286     UChar *string = NULL;
   288     if (length > 0) {
   289         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   290         /* test for NULL */
   291         if (string == NULL) {
   292             status = U_MEMORY_ALLOCATION_ERROR;
   293             return;
   294         }
   295         /* 
   296         Using this constructor will prevent buffer from being removed when
   297         string gets removed
   298         */
   299         u_memcpy(string, sourceText.getBuffer(), length);
   300     }
   301     else {
   302         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   303         /* test for NULL */
   304         if (string == NULL) {
   305             status = U_MEMORY_ALLOCATION_ERROR;
   306             return;
   307         }
   308         *string = 0;
   309     }
   310     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
   312     /* Test for buffer overflows */
   313     if (U_FAILURE(status)) {
   314         return;
   315     }
   316     m_data_->isWritable = TRUE;
   317 }
   319 /** 
   320 * This is the "real" constructor for this class; it constructs an iterator over 
   321 * the source text using the specified collator
   322 */
   323 CollationElementIterator::CollationElementIterator(
   324                                            const CharacterIterator& sourceText,
   325                                            const RuleBasedCollator* order,
   326                                            UErrorCode& status)
   327                                            : isDataOwned_(TRUE)
   328 {
   329     if (U_FAILURE(status))
   330         return;
   332     // **** should I just drop this test? ****
   333     /*
   334     if ( sourceText.endIndex() != 0 )
   335     {
   336         // A CollationElementIterator is really a two-layered beast.
   337         // Internally it uses a Normalizer to munge the source text into a form 
   338         // where all "composed" Unicode characters (such as \u00FC) are split into a 
   339         // normal character and a combining accent character.  
   340         // Afterward, CollationElementIterator does its own processing to handle
   341         // expanding and contracting collation sequences, ignorables, and so on.
   343         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
   344                                 ? Normalizer::NO_OP : order->getDecomposition();
   346         text = new Normalizer(sourceText, decomp);
   347         if (text == NULL)
   348         status = U_MEMORY_ALLOCATION_ERROR;    
   349     }
   350     */
   351     int32_t length = sourceText.getLength();
   352     UChar *buffer;
   353     if (length > 0) {
   354         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
   355         /* test for NULL */
   356         if (buffer == NULL) {
   357             status = U_MEMORY_ALLOCATION_ERROR;
   358             return;
   359         }
   360         /* 
   361         Using this constructor will prevent buffer from being removed when
   362         string gets removed
   363         */
   364         UnicodeString string(buffer, length, length);
   365         ((CharacterIterator &)sourceText).getText(string);
   366         const UChar *temp = string.getBuffer();
   367         u_memcpy(buffer, temp, length);
   368     }
   369     else {
   370         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
   371         /* test for NULL */
   372         if (buffer == NULL) {
   373             status = U_MEMORY_ALLOCATION_ERROR;
   374             return;
   375         }
   376         *buffer = 0;
   377     }
   378     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
   380     /* Test for buffer overflows */
   381     if (U_FAILURE(status)) {
   382         return;
   383     }
   384     m_data_->isWritable = TRUE;
   385 }
   387 /* CollationElementIterator protected methods ----------------------------- */
   389 const CollationElementIterator& CollationElementIterator::operator=(
   390                                          const CollationElementIterator& other)
   391 {
   392     if (this != &other)
   393     {
   394         UCollationElements *ucolelem      = this->m_data_;
   395         UCollationElements *otherucolelem = other.m_data_;
   396         collIterate        *coliter       = &(ucolelem->iteratordata_);
   397         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
   398         int                length         = 0;
   400         // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
   401         // the normalization buffer
   402         length = (int)(othercoliter->endp - othercoliter->string);
   404         ucolelem->reset_         = otherucolelem->reset_;
   405         ucolelem->isWritable     = TRUE;
   407         /* create a duplicate of string */
   408         if (length > 0) {
   409             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
   410             if(coliter->string != NULL) {
   411                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
   412                     length * U_SIZEOF_UCHAR);
   413             } else { // Error: couldn't allocate memory. No copying should be done
   414                 length = 0;
   415             }
   416         }
   417         else {
   418             coliter->string = NULL;
   419         }
   421         /* start and end of string */
   422         coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
   424         /* handle writable buffer here */
   426         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
   427             coliter->writableBuffer = othercoliter->writableBuffer;
   428             coliter->writableBuffer.getTerminatedBuffer();
   429         }
   431         /* current position */
   432         if (othercoliter->pos >= othercoliter->string && 
   433             othercoliter->pos <= othercoliter->endp)
   434         {
   435             U_ASSERT(coliter->string != NULL);
   436             coliter->pos = coliter->string + 
   437                 (othercoliter->pos - othercoliter->string);
   438         }
   439         else {
   440             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 
   441                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
   442         }
   444         /* CE buffer */
   445         int32_t CEsize;
   446         if (coliter->extendCEs) {
   447             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
   448             CEsize = sizeof(othercoliter->extendCEs);
   449             if (CEsize > 0) {
   450                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
   451                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
   452             }
   453             coliter->toReturn = coliter->extendCEs + 
   454                 (othercoliter->toReturn - othercoliter->extendCEs);
   455             coliter->CEpos    = coliter->extendCEs + CEsize;
   456         } else {
   457             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
   458             if (CEsize > 0) {
   459                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
   460             }
   461             coliter->toReturn = coliter->CEs + 
   462                 (othercoliter->toReturn - othercoliter->CEs);
   463             coliter->CEpos    = coliter->CEs + CEsize;
   464         }
   466         if (othercoliter->fcdPosition != NULL) {
   467             U_ASSERT(coliter->string != NULL);
   468             coliter->fcdPosition = coliter->string + 
   469                 (othercoliter->fcdPosition 
   470                 - othercoliter->string);
   471         }
   472         else {
   473             coliter->fcdPosition = NULL;
   474         }
   475         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
   476         coliter->origFlags   = othercoliter->origFlags;
   477         coliter->coll = othercoliter->coll;
   478         this->isDataOwned_ = TRUE;
   479     }
   481     return *this;
   482 }
   484 U_NAMESPACE_END
   486 #endif /* #if !UCONFIG_NO_COLLATION */
   488 /* eof */

mercurial