michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 1996-2011, International Business Machines Corporation and * michael@0: * others. All Rights Reserved. * michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: /* michael@0: * File coleitr.cpp michael@0: * michael@0: * michael@0: * michael@0: * Created by: Helena Shih michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * michael@0: * 6/23/97 helena Adding comments to make code more readable. michael@0: * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java michael@0: * 12/10/99 aliu Ported Thai collation support from Java. michael@0: * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) michael@0: * 02/19/01 swquek Removed CollationElementsIterator() since it is michael@0: * private constructor and no calls are made to it michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "unicode/coleitr.h" michael@0: #include "unicode/ustring.h" michael@0: #include "ucol_imp.h" michael@0: #include "uassert.h" michael@0: #include "cmemory.h" michael@0: michael@0: michael@0: /* Constants --------------------------------------------------------------- */ michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) michael@0: michael@0: /* CollationElementIterator public constructor/destructor ------------------ */ michael@0: michael@0: CollationElementIterator::CollationElementIterator( michael@0: const CollationElementIterator& other) michael@0: : UObject(other), isDataOwned_(TRUE) michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, michael@0: &status); michael@0: michael@0: *this = other; michael@0: } michael@0: michael@0: CollationElementIterator::~CollationElementIterator() michael@0: { michael@0: if (isDataOwned_) { michael@0: ucol_closeElements(m_data_); michael@0: } michael@0: } michael@0: michael@0: /* CollationElementIterator public methods --------------------------------- */ michael@0: michael@0: int32_t CollationElementIterator::getOffset() const michael@0: { michael@0: return ucol_getOffset(m_data_); michael@0: } michael@0: michael@0: /** michael@0: * Get the ordering priority of the next character in the string. michael@0: * @return the next character's ordering. Returns NULLORDER if an error has michael@0: * occured or if the end of string has been reached michael@0: */ michael@0: int32_t CollationElementIterator::next(UErrorCode& status) michael@0: { michael@0: return ucol_next(m_data_, &status); michael@0: } michael@0: michael@0: UBool CollationElementIterator::operator!=( michael@0: const CollationElementIterator& other) const michael@0: { michael@0: return !(*this == other); michael@0: } michael@0: michael@0: UBool CollationElementIterator::operator==( michael@0: const CollationElementIterator& that) const michael@0: { michael@0: if (this == &that || m_data_ == that.m_data_) { michael@0: return TRUE; michael@0: } michael@0: michael@0: // option comparison michael@0: if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) michael@0: { michael@0: return FALSE; michael@0: } michael@0: michael@0: // the constructor and setText always sets a length michael@0: // and we only compare the string not the contents of the normalization michael@0: // buffer michael@0: int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); michael@0: int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); michael@0: michael@0: if (thislength != thatlength) { michael@0: return FALSE; michael@0: } michael@0: michael@0: if (uprv_memcmp(m_data_->iteratordata_.string, michael@0: that.m_data_->iteratordata_.string, michael@0: thislength * U_SIZEOF_UCHAR) != 0) { michael@0: return FALSE; michael@0: } michael@0: if (getOffset() != that.getOffset()) { michael@0: return FALSE; michael@0: } michael@0: michael@0: // checking normalization buffer michael@0: if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { michael@0: if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { michael@0: return FALSE; michael@0: } michael@0: // both are in the normalization buffer michael@0: if (m_data_->iteratordata_.pos michael@0: - m_data_->iteratordata_.writableBuffer.getBuffer() michael@0: != that.m_data_->iteratordata_.pos michael@0: - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { michael@0: // not in the same position in the normalization buffer michael@0: return FALSE; michael@0: } michael@0: } michael@0: else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { michael@0: return FALSE; michael@0: } michael@0: // checking ce position michael@0: return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) michael@0: == (that.m_data_->iteratordata_.CEpos michael@0: - that.m_data_->iteratordata_.CEs); michael@0: } michael@0: michael@0: /** michael@0: * Get the ordering priority of the previous collation element in the string. michael@0: * @param status the error code status. michael@0: * @return the previous element's ordering. Returns NULLORDER if an error has michael@0: * occured or if the start of string has been reached. michael@0: */ michael@0: int32_t CollationElementIterator::previous(UErrorCode& status) michael@0: { michael@0: return ucol_previous(m_data_, &status); michael@0: } michael@0: michael@0: /** michael@0: * Resets the cursor to the beginning of the string. michael@0: */ michael@0: void CollationElementIterator::reset() michael@0: { michael@0: ucol_reset(m_data_); michael@0: } michael@0: michael@0: void CollationElementIterator::setOffset(int32_t newOffset, michael@0: UErrorCode& status) michael@0: { michael@0: ucol_setOffset(m_data_, newOffset, &status); michael@0: } michael@0: michael@0: /** michael@0: * Sets the source to the new source string. michael@0: */ michael@0: void CollationElementIterator::setText(const UnicodeString& source, michael@0: UErrorCode& status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: int32_t length = source.length(); michael@0: UChar *string = NULL; michael@0: if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { michael@0: uprv_free((UChar *)m_data_->iteratordata_.string); michael@0: } michael@0: m_data_->isWritable = TRUE; michael@0: if (length > 0) { michael@0: string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); michael@0: /* test for NULL */ michael@0: if (string == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: u_memcpy(string, source.getBuffer(), length); michael@0: } michael@0: else { michael@0: string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); michael@0: /* test for NULL */ michael@0: if (string == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: *string = 0; michael@0: } michael@0: /* Free offsetBuffer before initializing it. */ michael@0: ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); michael@0: uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, michael@0: &m_data_->iteratordata_, &status); michael@0: michael@0: m_data_->reset_ = TRUE; michael@0: } michael@0: michael@0: // Sets the source to the new character iterator. michael@0: void CollationElementIterator::setText(CharacterIterator& source, michael@0: UErrorCode& status) michael@0: { michael@0: if (U_FAILURE(status)) michael@0: return; michael@0: michael@0: int32_t length = source.getLength(); michael@0: UChar *buffer = NULL; michael@0: michael@0: if (length == 0) { michael@0: buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); michael@0: /* test for NULL */ michael@0: if (buffer == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: *buffer = 0; michael@0: } michael@0: else { michael@0: buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); michael@0: /* test for NULL */ michael@0: if (buffer == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: /* michael@0: Using this constructor will prevent buffer from being removed when michael@0: string gets removed michael@0: */ michael@0: UnicodeString string; michael@0: source.getText(string); michael@0: u_memcpy(buffer, string.getBuffer(), length); michael@0: } michael@0: michael@0: if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { michael@0: uprv_free((UChar *)m_data_->iteratordata_.string); michael@0: } michael@0: m_data_->isWritable = TRUE; michael@0: /* Free offsetBuffer before initializing it. */ michael@0: ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); michael@0: uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, michael@0: &m_data_->iteratordata_, &status); michael@0: m_data_->reset_ = TRUE; michael@0: } michael@0: michael@0: int32_t CollationElementIterator::strengthOrder(int32_t order) const michael@0: { michael@0: UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); michael@0: // Mask off the unwanted differences. michael@0: if (s == UCOL_PRIMARY) { michael@0: order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; michael@0: } michael@0: else if (s == UCOL_SECONDARY) { michael@0: order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; michael@0: } michael@0: michael@0: return order; michael@0: } michael@0: michael@0: /* CollationElementIterator private constructors/destructors --------------- */ michael@0: michael@0: /** michael@0: * This is the "real" constructor for this class; it constructs an iterator michael@0: * over the source text using the specified collator michael@0: */ michael@0: CollationElementIterator::CollationElementIterator( michael@0: const UnicodeString& sourceText, michael@0: const RuleBasedCollator* order, michael@0: UErrorCode& status) michael@0: : isDataOwned_(TRUE) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: int32_t length = sourceText.length(); michael@0: UChar *string = NULL; michael@0: michael@0: if (length > 0) { michael@0: string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); michael@0: /* test for NULL */ michael@0: if (string == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: /* michael@0: Using this constructor will prevent buffer from being removed when michael@0: string gets removed michael@0: */ michael@0: u_memcpy(string, sourceText.getBuffer(), length); michael@0: } michael@0: else { michael@0: string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); michael@0: /* test for NULL */ michael@0: if (string == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: *string = 0; michael@0: } michael@0: m_data_ = ucol_openElements(order->ucollator, string, length, &status); michael@0: michael@0: /* Test for buffer overflows */ michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: m_data_->isWritable = TRUE; michael@0: } michael@0: michael@0: /** michael@0: * This is the "real" constructor for this class; it constructs an iterator over michael@0: * the source text using the specified collator michael@0: */ michael@0: CollationElementIterator::CollationElementIterator( michael@0: const CharacterIterator& sourceText, michael@0: const RuleBasedCollator* order, michael@0: UErrorCode& status) michael@0: : isDataOwned_(TRUE) michael@0: { michael@0: if (U_FAILURE(status)) michael@0: return; michael@0: michael@0: // **** should I just drop this test? **** michael@0: /* michael@0: if ( sourceText.endIndex() != 0 ) michael@0: { michael@0: // A CollationElementIterator is really a two-layered beast. michael@0: // Internally it uses a Normalizer to munge the source text into a form michael@0: // where all "composed" Unicode characters (such as \u00FC) are split into a michael@0: // normal character and a combining accent character. michael@0: // Afterward, CollationElementIterator does its own processing to handle michael@0: // expanding and contracting collation sequences, ignorables, and so on. michael@0: michael@0: Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL michael@0: ? Normalizer::NO_OP : order->getDecomposition(); michael@0: michael@0: text = new Normalizer(sourceText, decomp); michael@0: if (text == NULL) michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: */ michael@0: int32_t length = sourceText.getLength(); michael@0: UChar *buffer; michael@0: if (length > 0) { michael@0: buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); michael@0: /* test for NULL */ michael@0: if (buffer == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: /* michael@0: Using this constructor will prevent buffer from being removed when michael@0: string gets removed michael@0: */ michael@0: UnicodeString string(buffer, length, length); michael@0: ((CharacterIterator &)sourceText).getText(string); michael@0: const UChar *temp = string.getBuffer(); michael@0: u_memcpy(buffer, temp, length); michael@0: } michael@0: else { michael@0: buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); michael@0: /* test for NULL */ michael@0: if (buffer == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: *buffer = 0; michael@0: } michael@0: m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); michael@0: michael@0: /* Test for buffer overflows */ michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: m_data_->isWritable = TRUE; michael@0: } michael@0: michael@0: /* CollationElementIterator protected methods ----------------------------- */ michael@0: michael@0: const CollationElementIterator& CollationElementIterator::operator=( michael@0: const CollationElementIterator& other) michael@0: { michael@0: if (this != &other) michael@0: { michael@0: UCollationElements *ucolelem = this->m_data_; michael@0: UCollationElements *otherucolelem = other.m_data_; michael@0: collIterate *coliter = &(ucolelem->iteratordata_); michael@0: collIterate *othercoliter = &(otherucolelem->iteratordata_); michael@0: int length = 0; michael@0: michael@0: // checking only UCOL_ITER_HASLEN is not enough here as we may be in michael@0: // the normalization buffer michael@0: length = (int)(othercoliter->endp - othercoliter->string); michael@0: michael@0: ucolelem->reset_ = otherucolelem->reset_; michael@0: ucolelem->isWritable = TRUE; michael@0: michael@0: /* create a duplicate of string */ michael@0: if (length > 0) { michael@0: coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); michael@0: if(coliter->string != NULL) { michael@0: uprv_memcpy((UChar *)coliter->string, othercoliter->string, michael@0: length * U_SIZEOF_UCHAR); michael@0: } else { // Error: couldn't allocate memory. No copying should be done michael@0: length = 0; michael@0: } michael@0: } michael@0: else { michael@0: coliter->string = NULL; michael@0: } michael@0: michael@0: /* start and end of string */ michael@0: coliter->endp = coliter->string == NULL ? NULL : coliter->string + length; michael@0: michael@0: /* handle writable buffer here */ michael@0: michael@0: if (othercoliter->flags & UCOL_ITER_INNORMBUF) { michael@0: coliter->writableBuffer = othercoliter->writableBuffer; michael@0: coliter->writableBuffer.getTerminatedBuffer(); michael@0: } michael@0: michael@0: /* current position */ michael@0: if (othercoliter->pos >= othercoliter->string && michael@0: othercoliter->pos <= othercoliter->endp) michael@0: { michael@0: U_ASSERT(coliter->string != NULL); michael@0: coliter->pos = coliter->string + michael@0: (othercoliter->pos - othercoliter->string); michael@0: } michael@0: else { michael@0: coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + michael@0: (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); michael@0: } michael@0: michael@0: /* CE buffer */ michael@0: int32_t CEsize; michael@0: if (coliter->extendCEs) { michael@0: uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); michael@0: CEsize = sizeof(othercoliter->extendCEs); michael@0: if (CEsize > 0) { michael@0: othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); michael@0: uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); michael@0: } michael@0: coliter->toReturn = coliter->extendCEs + michael@0: (othercoliter->toReturn - othercoliter->extendCEs); michael@0: coliter->CEpos = coliter->extendCEs + CEsize; michael@0: } else { michael@0: CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); michael@0: if (CEsize > 0) { michael@0: uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); michael@0: } michael@0: coliter->toReturn = coliter->CEs + michael@0: (othercoliter->toReturn - othercoliter->CEs); michael@0: coliter->CEpos = coliter->CEs + CEsize; michael@0: } michael@0: michael@0: if (othercoliter->fcdPosition != NULL) { michael@0: U_ASSERT(coliter->string != NULL); michael@0: coliter->fcdPosition = coliter->string + michael@0: (othercoliter->fcdPosition michael@0: - othercoliter->string); michael@0: } michael@0: else { michael@0: coliter->fcdPosition = NULL; michael@0: } michael@0: coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; michael@0: coliter->origFlags = othercoliter->origFlags; michael@0: coliter->coll = othercoliter->coll; michael@0: this->isDataOwned_ = TRUE; michael@0: } michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: /* eof */