michael@0: /*
michael@0: *******************************************************************************
michael@0: * Copyright (C) 1996-2011, International Business Machines Corporation and    *
michael@0: * others. All Rights Reserved.                                                *
michael@0: *******************************************************************************
michael@0: */
michael@0: 
michael@0: /*
michael@0: * File coleitr.cpp
michael@0: *
michael@0: * 
michael@0: *
michael@0: * Created by: Helena Shih
michael@0: *
michael@0: * Modification History:
michael@0: *
michael@0: *  Date      Name        Description
michael@0: *
michael@0: *  6/23/97   helena      Adding comments to make code more readable.
michael@0: * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
michael@0: * 12/10/99   aliu        Ported Thai collation support from Java.
michael@0: * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
michael@0: * 02/19/01   swquek      Removed CollationElementsIterator() since it is 
michael@0: *                        private constructor and no calls are made to it
michael@0: */
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: #if !UCONFIG_NO_COLLATION
michael@0: 
michael@0: #include "unicode/coleitr.h"
michael@0: #include "unicode/ustring.h"
michael@0: #include "ucol_imp.h"
michael@0: #include "uassert.h"
michael@0: #include "cmemory.h"
michael@0: 
michael@0: 
michael@0: /* Constants --------------------------------------------------------------- */
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
michael@0: 
michael@0: /* CollationElementIterator public constructor/destructor ------------------ */
michael@0: 
michael@0: CollationElementIterator::CollationElementIterator(
michael@0:                                          const CollationElementIterator& other) 
michael@0:                                          : UObject(other), isDataOwned_(TRUE)
michael@0: {
michael@0:     UErrorCode status = U_ZERO_ERROR;
michael@0:     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 
michael@0:                                 &status);
michael@0: 
michael@0:     *this = other;
michael@0: }
michael@0: 
michael@0: CollationElementIterator::~CollationElementIterator()
michael@0: {
michael@0:     if (isDataOwned_) {
michael@0:         ucol_closeElements(m_data_);
michael@0:     }
michael@0: }
michael@0: 
michael@0: /* CollationElementIterator public methods --------------------------------- */
michael@0: 
michael@0: int32_t CollationElementIterator::getOffset() const
michael@0: {
michael@0:     return ucol_getOffset(m_data_);
michael@0: }
michael@0: 
michael@0: /**
michael@0: * Get the ordering priority of the next character in the string.
michael@0: * @return the next character's ordering. Returns NULLORDER if an error has 
michael@0: *         occured or if the end of string has been reached
michael@0: */
michael@0: int32_t CollationElementIterator::next(UErrorCode& status)
michael@0: {
michael@0:     return ucol_next(m_data_, &status);
michael@0: }
michael@0: 
michael@0: UBool CollationElementIterator::operator!=(
michael@0:                                   const CollationElementIterator& other) const
michael@0: {
michael@0:     return !(*this == other);
michael@0: }
michael@0: 
michael@0: UBool CollationElementIterator::operator==(
michael@0:                                     const CollationElementIterator& that) const
michael@0: {
michael@0:     if (this == &that || m_data_ == that.m_data_) {
michael@0:         return TRUE;
michael@0:     }
michael@0: 
michael@0:     // option comparison
michael@0:     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
michael@0:     {
michael@0:         return FALSE;
michael@0:     }
michael@0: 
michael@0:     // the constructor and setText always sets a length
michael@0:     // and we only compare the string not the contents of the normalization
michael@0:     // buffer
michael@0:     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
michael@0:     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
michael@0:     
michael@0:     if (thislength != thatlength) {
michael@0:         return FALSE;
michael@0:     }
michael@0: 
michael@0:     if (uprv_memcmp(m_data_->iteratordata_.string, 
michael@0:                     that.m_data_->iteratordata_.string, 
michael@0:                     thislength * U_SIZEOF_UCHAR) != 0) {
michael@0:         return FALSE;
michael@0:     }
michael@0:     if (getOffset() != that.getOffset()) {
michael@0:         return FALSE;
michael@0:     }
michael@0: 
michael@0:     // checking normalization buffer
michael@0:     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
michael@0:         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
michael@0:             return FALSE;
michael@0:         }
michael@0:         // both are in the normalization buffer
michael@0:         if (m_data_->iteratordata_.pos 
michael@0:             - m_data_->iteratordata_.writableBuffer.getBuffer()
michael@0:             != that.m_data_->iteratordata_.pos 
michael@0:             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
michael@0:             // not in the same position in the normalization buffer
michael@0:             return FALSE;
michael@0:         }
michael@0:     }
michael@0:     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
michael@0:         return FALSE;
michael@0:     }
michael@0:     // checking ce position
michael@0:     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
michael@0:             == (that.m_data_->iteratordata_.CEpos 
michael@0:                                         - that.m_data_->iteratordata_.CEs);
michael@0: }
michael@0: 
michael@0: /**
michael@0: * Get the ordering priority of the previous collation element in the string.
michael@0: * @param status the error code status.
michael@0: * @return the previous element's ordering. Returns NULLORDER if an error has 
michael@0: *         occured or if the start of string has been reached.
michael@0: */
michael@0: int32_t CollationElementIterator::previous(UErrorCode& status)
michael@0: {
michael@0:     return ucol_previous(m_data_, &status);
michael@0: }
michael@0: 
michael@0: /**
michael@0: * Resets the cursor to the beginning of the string.
michael@0: */
michael@0: void CollationElementIterator::reset()
michael@0: {
michael@0:     ucol_reset(m_data_);
michael@0: }
michael@0: 
michael@0: void CollationElementIterator::setOffset(int32_t newOffset, 
michael@0:                                          UErrorCode& status)
michael@0: {
michael@0:     ucol_setOffset(m_data_, newOffset, &status);
michael@0: }
michael@0: 
michael@0: /**
michael@0: * Sets the source to the new source string.
michael@0: */
michael@0: void CollationElementIterator::setText(const UnicodeString& source,
michael@0:                                        UErrorCode& status)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     int32_t length = source.length();
michael@0:     UChar *string = NULL;
michael@0:     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
michael@0:         uprv_free((UChar *)m_data_->iteratordata_.string);
michael@0:     }
michael@0:     m_data_->isWritable = TRUE;
michael@0:     if (length > 0) {
michael@0:         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0:         /* test for NULL */
michael@0:         if (string == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         u_memcpy(string, source.getBuffer(), length);
michael@0:     }
michael@0:     else {
michael@0:         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0:         /* test for NULL */
michael@0:         if (string == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         *string = 0;
michael@0:     }
michael@0:     /* Free offsetBuffer before initializing it. */
michael@0:     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
michael@0:     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 
michael@0:         &m_data_->iteratordata_, &status);
michael@0: 
michael@0:     m_data_->reset_   = TRUE;
michael@0: }
michael@0: 
michael@0: // Sets the source to the new character iterator.
michael@0: void CollationElementIterator::setText(CharacterIterator& source, 
michael@0:                                        UErrorCode& status)
michael@0: {
michael@0:     if (U_FAILURE(status)) 
michael@0:         return;
michael@0: 
michael@0:     int32_t length = source.getLength();
michael@0:     UChar *buffer = NULL;
michael@0: 
michael@0:     if (length == 0) {
michael@0:         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0:         /* test for NULL */
michael@0:         if (buffer == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         *buffer = 0;
michael@0:     }
michael@0:     else {
michael@0:         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0:         /* test for NULL */
michael@0:         if (buffer == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         /* 
michael@0:         Using this constructor will prevent buffer from being removed when
michael@0:         string gets removed
michael@0:         */
michael@0:         UnicodeString string;
michael@0:         source.getText(string);
michael@0:         u_memcpy(buffer, string.getBuffer(), length);
michael@0:     }
michael@0: 
michael@0:     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
michael@0:         uprv_free((UChar *)m_data_->iteratordata_.string);
michael@0:     }
michael@0:     m_data_->isWritable = TRUE;
michael@0:     /* Free offsetBuffer before initializing it. */
michael@0:     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
michael@0:     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 
michael@0:         &m_data_->iteratordata_, &status);
michael@0:     m_data_->reset_   = TRUE;
michael@0: }
michael@0: 
michael@0: int32_t CollationElementIterator::strengthOrder(int32_t order) const
michael@0: {
michael@0:     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
michael@0:     // Mask off the unwanted differences.
michael@0:     if (s == UCOL_PRIMARY) {
michael@0:         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
michael@0:     }
michael@0:     else if (s == UCOL_SECONDARY) {
michael@0:         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
michael@0:     }
michael@0: 
michael@0:     return order;
michael@0: }
michael@0: 
michael@0: /* CollationElementIterator private constructors/destructors --------------- */
michael@0: 
michael@0: /** 
michael@0: * This is the "real" constructor for this class; it constructs an iterator
michael@0: * over the source text using the specified collator
michael@0: */
michael@0: CollationElementIterator::CollationElementIterator(
michael@0:                                                const UnicodeString& sourceText,
michael@0:                                                const RuleBasedCollator* order,
michael@0:                                                UErrorCode& status)
michael@0:                                                : isDataOwned_(TRUE)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     int32_t length = sourceText.length();
michael@0:     UChar *string = NULL;
michael@0: 
michael@0:     if (length > 0) {
michael@0:         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0:         /* test for NULL */
michael@0:         if (string == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         /* 
michael@0:         Using this constructor will prevent buffer from being removed when
michael@0:         string gets removed
michael@0:         */
michael@0:         u_memcpy(string, sourceText.getBuffer(), length);
michael@0:     }
michael@0:     else {
michael@0:         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0:         /* test for NULL */
michael@0:         if (string == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         *string = 0;
michael@0:     }
michael@0:     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
michael@0: 
michael@0:     /* Test for buffer overflows */
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     m_data_->isWritable = TRUE;
michael@0: }
michael@0: 
michael@0: /** 
michael@0: * This is the "real" constructor for this class; it constructs an iterator over 
michael@0: * the source text using the specified collator
michael@0: */
michael@0: CollationElementIterator::CollationElementIterator(
michael@0:                                            const CharacterIterator& sourceText,
michael@0:                                            const RuleBasedCollator* order,
michael@0:                                            UErrorCode& status)
michael@0:                                            : isDataOwned_(TRUE)
michael@0: {
michael@0:     if (U_FAILURE(status))
michael@0:         return;
michael@0: 
michael@0:     // **** should I just drop this test? ****
michael@0:     /*
michael@0:     if ( sourceText.endIndex() != 0 )
michael@0:     {
michael@0:         // A CollationElementIterator is really a two-layered beast.
michael@0:         // Internally it uses a Normalizer to munge the source text into a form 
michael@0:         // where all "composed" Unicode characters (such as \u00FC) are split into a 
michael@0:         // normal character and a combining accent character.  
michael@0:         // Afterward, CollationElementIterator does its own processing to handle
michael@0:         // expanding and contracting collation sequences, ignorables, and so on.
michael@0:         
michael@0:         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
michael@0:                                 ? Normalizer::NO_OP : order->getDecomposition();
michael@0:           
michael@0:         text = new Normalizer(sourceText, decomp);
michael@0:         if (text == NULL)
michael@0:         status = U_MEMORY_ALLOCATION_ERROR;    
michael@0:     }
michael@0:     */
michael@0:     int32_t length = sourceText.getLength();
michael@0:     UChar *buffer;
michael@0:     if (length > 0) {
michael@0:         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0:         /* test for NULL */
michael@0:         if (buffer == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         /* 
michael@0:         Using this constructor will prevent buffer from being removed when
michael@0:         string gets removed
michael@0:         */
michael@0:         UnicodeString string(buffer, length, length);
michael@0:         ((CharacterIterator &)sourceText).getText(string);
michael@0:         const UChar *temp = string.getBuffer();
michael@0:         u_memcpy(buffer, temp, length);
michael@0:     }
michael@0:     else {
michael@0:         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0:         /* test for NULL */
michael@0:         if (buffer == NULL) {
michael@0:             status = U_MEMORY_ALLOCATION_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         *buffer = 0;
michael@0:     }
michael@0:     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
michael@0: 
michael@0:     /* Test for buffer overflows */
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     m_data_->isWritable = TRUE;
michael@0: }
michael@0: 
michael@0: /* CollationElementIterator protected methods ----------------------------- */
michael@0: 
michael@0: const CollationElementIterator& CollationElementIterator::operator=(
michael@0:                                          const CollationElementIterator& other)
michael@0: {
michael@0:     if (this != &other)
michael@0:     {
michael@0:         UCollationElements *ucolelem      = this->m_data_;
michael@0:         UCollationElements *otherucolelem = other.m_data_;
michael@0:         collIterate        *coliter       = &(ucolelem->iteratordata_);
michael@0:         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
michael@0:         int                length         = 0;
michael@0: 
michael@0:         // checking only UCOL_ITER_HASLEN is not enough here as we may be in 
michael@0:         // the normalization buffer
michael@0:         length = (int)(othercoliter->endp - othercoliter->string);
michael@0: 
michael@0:         ucolelem->reset_         = otherucolelem->reset_;
michael@0:         ucolelem->isWritable     = TRUE;
michael@0: 
michael@0:         /* create a duplicate of string */
michael@0:         if (length > 0) {
michael@0:             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
michael@0:             if(coliter->string != NULL) {
michael@0:                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
michael@0:                     length * U_SIZEOF_UCHAR);
michael@0:             } else { // Error: couldn't allocate memory. No copying should be done
michael@0:                 length = 0;
michael@0:             }
michael@0:         }
michael@0:         else {
michael@0:             coliter->string = NULL;
michael@0:         }
michael@0: 
michael@0:         /* start and end of string */
michael@0:         coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
michael@0: 
michael@0:         /* handle writable buffer here */
michael@0: 
michael@0:         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
michael@0:             coliter->writableBuffer = othercoliter->writableBuffer;
michael@0:             coliter->writableBuffer.getTerminatedBuffer();
michael@0:         }
michael@0: 
michael@0:         /* current position */
michael@0:         if (othercoliter->pos >= othercoliter->string && 
michael@0:             othercoliter->pos <= othercoliter->endp)
michael@0:         {
michael@0:             U_ASSERT(coliter->string != NULL);
michael@0:             coliter->pos = coliter->string + 
michael@0:                 (othercoliter->pos - othercoliter->string);
michael@0:         }
michael@0:         else {
michael@0:             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + 
michael@0:                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
michael@0:         }
michael@0: 
michael@0:         /* CE buffer */
michael@0:         int32_t CEsize;
michael@0:         if (coliter->extendCEs) {
michael@0:             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
michael@0:             CEsize = sizeof(othercoliter->extendCEs);
michael@0:             if (CEsize > 0) {
michael@0:                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
michael@0:                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
michael@0:             }
michael@0:             coliter->toReturn = coliter->extendCEs + 
michael@0:                 (othercoliter->toReturn - othercoliter->extendCEs);
michael@0:             coliter->CEpos    = coliter->extendCEs + CEsize;
michael@0:         } else {
michael@0:             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
michael@0:             if (CEsize > 0) {
michael@0:                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
michael@0:             }
michael@0:             coliter->toReturn = coliter->CEs + 
michael@0:                 (othercoliter->toReturn - othercoliter->CEs);
michael@0:             coliter->CEpos    = coliter->CEs + CEsize;
michael@0:         }
michael@0: 
michael@0:         if (othercoliter->fcdPosition != NULL) {
michael@0:             U_ASSERT(coliter->string != NULL);
michael@0:             coliter->fcdPosition = coliter->string + 
michael@0:                 (othercoliter->fcdPosition 
michael@0:                 - othercoliter->string);
michael@0:         }
michael@0:         else {
michael@0:             coliter->fcdPosition = NULL;
michael@0:         }
michael@0:         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
michael@0:         coliter->origFlags   = othercoliter->origFlags;
michael@0:         coliter->coll = othercoliter->coll;
michael@0:         this->isDataOwned_ = TRUE;
michael@0:     }
michael@0: 
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: #endif /* #if !UCONFIG_NO_COLLATION */
michael@0: 
michael@0: /* eof */