michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 03/22/2000 helena Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION michael@0: michael@0: #include "unicode/brkiter.h" michael@0: #include "unicode/schriter.h" michael@0: #include "unicode/search.h" michael@0: #include "usrchimp.h" michael@0: #include "cmemory.h" michael@0: michael@0: // public constructors and destructors ----------------------------------- michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: SearchIterator::SearchIterator(const SearchIterator &other) michael@0: : UObject(other) michael@0: { michael@0: m_breakiterator_ = other.m_breakiterator_; michael@0: m_text_ = other.m_text_; michael@0: m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); michael@0: m_search_->breakIter = other.m_search_->breakIter; michael@0: m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; michael@0: m_search_->isOverlap = other.m_search_->isOverlap; michael@0: m_search_->elementComparisonType = other.m_search_->elementComparisonType; michael@0: m_search_->matchedIndex = other.m_search_->matchedIndex; michael@0: m_search_->matchedLength = other.m_search_->matchedLength; michael@0: m_search_->text = other.m_search_->text; michael@0: m_search_->textLength = other.m_search_->textLength; michael@0: } michael@0: michael@0: SearchIterator::~SearchIterator() michael@0: { michael@0: if (m_search_ != NULL) { michael@0: uprv_free(m_search_); michael@0: } michael@0: } michael@0: michael@0: // public get and set methods ---------------------------------------- michael@0: michael@0: void SearchIterator::setAttribute(USearchAttribute attribute, michael@0: USearchAttributeValue value, michael@0: UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: switch (attribute) michael@0: { michael@0: case USEARCH_OVERLAP : michael@0: m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); michael@0: break; michael@0: case USEARCH_CANONICAL_MATCH : michael@0: m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); michael@0: break; michael@0: case USEARCH_ELEMENT_COMPARISON : michael@0: if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { michael@0: m_search_->elementComparisonType = (int16_t)value; michael@0: } else { michael@0: m_search_->elementComparisonType = 0; michael@0: } michael@0: break; michael@0: default: michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: USearchAttributeValue SearchIterator::getAttribute( michael@0: USearchAttribute attribute) const michael@0: { michael@0: switch (attribute) { michael@0: case USEARCH_OVERLAP : michael@0: return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); michael@0: case USEARCH_CANONICAL_MATCH : michael@0: return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : michael@0: USEARCH_OFF); michael@0: case USEARCH_ELEMENT_COMPARISON : michael@0: { michael@0: int16_t value = m_search_->elementComparisonType; michael@0: if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { michael@0: return (USearchAttributeValue)value; michael@0: } else { michael@0: return USEARCH_STANDARD_ELEMENT_COMPARISON; michael@0: } michael@0: } michael@0: default : michael@0: return USEARCH_DEFAULT; michael@0: } michael@0: } michael@0: michael@0: int32_t SearchIterator::getMatchedStart() const michael@0: { michael@0: return m_search_->matchedIndex; michael@0: } michael@0: michael@0: int32_t SearchIterator::getMatchedLength() const michael@0: { michael@0: return m_search_->matchedLength; michael@0: } michael@0: michael@0: void SearchIterator::getMatchedText(UnicodeString &result) const michael@0: { michael@0: int32_t matchedindex = m_search_->matchedIndex; michael@0: int32_t matchedlength = m_search_->matchedLength; michael@0: if (matchedindex != USEARCH_DONE && matchedlength != 0) { michael@0: result.setTo(m_search_->text + matchedindex, matchedlength); michael@0: } michael@0: else { michael@0: result.remove(); michael@0: } michael@0: } michael@0: michael@0: void SearchIterator::setBreakIterator(BreakIterator *breakiter, michael@0: UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: #if 0 michael@0: m_search_->breakIter = NULL; michael@0: // the c++ breakiterator may not make use of ubreakiterator. michael@0: // so we'll have to keep track of it ourselves. michael@0: #else michael@0: // Well, gee... the Constructors that take a BreakIterator michael@0: // all cast the BreakIterator to a UBreakIterator and michael@0: // pass it to the corresponding usearch_openFromXXX michael@0: // routine, so there's no reason not to do this. michael@0: // michael@0: // Besides, a UBreakIterator is a BreakIterator, so michael@0: // any subclass of BreakIterator should work fine here... michael@0: m_search_->breakIter = (UBreakIterator *) breakiter; michael@0: #endif michael@0: michael@0: m_breakiterator_ = breakiter; michael@0: } michael@0: } michael@0: michael@0: const BreakIterator * SearchIterator::getBreakIterator(void) const michael@0: { michael@0: return m_breakiterator_; michael@0: } michael@0: michael@0: void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: if (text.length() == 0) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: else { michael@0: m_text_ = text; michael@0: m_search_->text = m_text_.getBuffer(); michael@0: m_search_->textLength = m_text_.length(); michael@0: } michael@0: } michael@0: } michael@0: michael@0: void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: text.getText(m_text_); michael@0: setText(m_text_, status); michael@0: } michael@0: } michael@0: michael@0: const UnicodeString & SearchIterator::getText(void) const michael@0: { michael@0: return m_text_; michael@0: } michael@0: michael@0: // operator overloading ---------------------------------------------- michael@0: michael@0: UBool SearchIterator::operator==(const SearchIterator &that) const michael@0: { michael@0: if (this == &that) { michael@0: return TRUE; michael@0: } michael@0: return (m_breakiterator_ == that.m_breakiterator_ && michael@0: m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && michael@0: m_search_->isOverlap == that.m_search_->isOverlap && michael@0: m_search_->elementComparisonType == that.m_search_->elementComparisonType && michael@0: m_search_->matchedIndex == that.m_search_->matchedIndex && michael@0: m_search_->matchedLength == that.m_search_->matchedLength && michael@0: m_search_->textLength == that.m_search_->textLength && michael@0: getOffset() == that.getOffset() && michael@0: (uprv_memcmp(m_search_->text, that.m_search_->text, michael@0: m_search_->textLength * sizeof(UChar)) == 0)); michael@0: } michael@0: michael@0: // public methods ---------------------------------------------------- michael@0: michael@0: int32_t SearchIterator::first(UErrorCode &status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: setOffset(0, status); michael@0: return handleNext(0, status); michael@0: } michael@0: michael@0: int32_t SearchIterator::following(int32_t position, michael@0: UErrorCode &status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: setOffset(position, status); michael@0: return handleNext(position, status); michael@0: } michael@0: michael@0: int32_t SearchIterator::last(UErrorCode &status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: setOffset(m_search_->textLength, status); michael@0: return handlePrev(m_search_->textLength, status); michael@0: } michael@0: michael@0: int32_t SearchIterator::preceding(int32_t position, michael@0: UErrorCode &status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: setOffset(position, status); michael@0: return handlePrev(position, status); michael@0: } michael@0: michael@0: int32_t SearchIterator::next(UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: int32_t offset = getOffset(); michael@0: int32_t matchindex = m_search_->matchedIndex; michael@0: int32_t matchlength = m_search_->matchedLength; michael@0: m_search_->reset = FALSE; michael@0: if (m_search_->isForwardSearching == TRUE) { michael@0: int32_t textlength = m_search_->textLength; michael@0: if (offset == textlength || matchindex == textlength || michael@0: (matchindex != USEARCH_DONE && michael@0: matchindex + matchlength >= textlength)) { michael@0: // not enough characters to match michael@0: setMatchNotFound(); michael@0: return USEARCH_DONE; michael@0: } michael@0: } michael@0: else { michael@0: // switching direction. michael@0: // if matchedIndex == USEARCH_DONE, it means that either a michael@0: // setOffset has been called or that previous ran off the text michael@0: // string. the iterator would have been set to offset 0 if a michael@0: // match is not found. michael@0: m_search_->isForwardSearching = TRUE; michael@0: if (m_search_->matchedIndex != USEARCH_DONE) { michael@0: // there's no need to set the collation element iterator michael@0: // the next call to next will set the offset. michael@0: return matchindex; michael@0: } michael@0: } michael@0: michael@0: if (matchlength > 0) { michael@0: // if matchlength is 0 we are at the start of the iteration michael@0: if (m_search_->isOverlap) { michael@0: offset ++; michael@0: } michael@0: else { michael@0: offset += matchlength; michael@0: } michael@0: } michael@0: return handleNext(offset, status); michael@0: } michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: int32_t SearchIterator::previous(UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: int32_t offset; michael@0: if (m_search_->reset) { michael@0: offset = m_search_->textLength; michael@0: m_search_->isForwardSearching = FALSE; michael@0: m_search_->reset = FALSE; michael@0: setOffset(offset, status); michael@0: } michael@0: else { michael@0: offset = getOffset(); michael@0: } michael@0: michael@0: int32_t matchindex = m_search_->matchedIndex; michael@0: if (m_search_->isForwardSearching == TRUE) { michael@0: // switching direction. michael@0: // if matchedIndex == USEARCH_DONE, it means that either a michael@0: // setOffset has been called or that next ran off the text michael@0: // string. the iterator would have been set to offset textLength if michael@0: // a match is not found. michael@0: m_search_->isForwardSearching = FALSE; michael@0: if (matchindex != USEARCH_DONE) { michael@0: return matchindex; michael@0: } michael@0: } michael@0: else { michael@0: if (offset == 0 || matchindex == 0) { michael@0: // not enough characters to match michael@0: setMatchNotFound(); michael@0: return USEARCH_DONE; michael@0: } michael@0: } michael@0: michael@0: if (matchindex != USEARCH_DONE) { michael@0: if (m_search_->isOverlap) { michael@0: matchindex += m_search_->matchedLength - 2; michael@0: } michael@0: michael@0: return handlePrev(matchindex, status); michael@0: } michael@0: michael@0: return handlePrev(offset, status); michael@0: } michael@0: michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: void SearchIterator::reset() michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: setMatchNotFound(); michael@0: setOffset(0, status); michael@0: m_search_->isOverlap = FALSE; michael@0: m_search_->isCanonicalMatch = FALSE; michael@0: m_search_->elementComparisonType = 0; michael@0: m_search_->isForwardSearching = TRUE; michael@0: m_search_->reset = TRUE; michael@0: } michael@0: michael@0: // protected constructors and destructors ----------------------------- michael@0: michael@0: SearchIterator::SearchIterator() michael@0: { michael@0: m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); michael@0: m_search_->breakIter = NULL; michael@0: m_search_->isOverlap = FALSE; michael@0: m_search_->isCanonicalMatch = FALSE; michael@0: m_search_->elementComparisonType = 0; michael@0: m_search_->isForwardSearching = TRUE; michael@0: m_search_->reset = TRUE; michael@0: m_search_->matchedIndex = USEARCH_DONE; michael@0: m_search_->matchedLength = 0; michael@0: m_search_->text = NULL; michael@0: m_search_->textLength = 0; michael@0: m_breakiterator_ = NULL; michael@0: } michael@0: michael@0: SearchIterator::SearchIterator(const UnicodeString &text, michael@0: BreakIterator *breakiter) : michael@0: m_breakiterator_(breakiter), michael@0: m_text_(text) michael@0: { michael@0: m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); michael@0: m_search_->breakIter = NULL; michael@0: m_search_->isOverlap = FALSE; michael@0: m_search_->isCanonicalMatch = FALSE; michael@0: m_search_->elementComparisonType = 0; michael@0: m_search_->isForwardSearching = TRUE; michael@0: m_search_->reset = TRUE; michael@0: m_search_->matchedIndex = USEARCH_DONE; michael@0: m_search_->matchedLength = 0; michael@0: m_search_->text = m_text_.getBuffer(); michael@0: m_search_->textLength = text.length(); michael@0: } michael@0: michael@0: SearchIterator::SearchIterator(CharacterIterator &text, michael@0: BreakIterator *breakiter) : michael@0: m_breakiterator_(breakiter) michael@0: { michael@0: m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); michael@0: m_search_->breakIter = NULL; michael@0: m_search_->isOverlap = FALSE; michael@0: m_search_->isCanonicalMatch = FALSE; michael@0: m_search_->elementComparisonType = 0; michael@0: m_search_->isForwardSearching = TRUE; michael@0: m_search_->reset = TRUE; michael@0: m_search_->matchedIndex = USEARCH_DONE; michael@0: m_search_->matchedLength = 0; michael@0: text.getText(m_text_); michael@0: m_search_->text = m_text_.getBuffer(); michael@0: m_search_->textLength = m_text_.length(); michael@0: m_breakiterator_ = breakiter; michael@0: } michael@0: michael@0: // protected methods ------------------------------------------------------ michael@0: michael@0: SearchIterator & SearchIterator::operator=(const SearchIterator &that) michael@0: { michael@0: if (this != &that) { michael@0: m_breakiterator_ = that.m_breakiterator_; michael@0: m_text_ = that.m_text_; michael@0: m_search_->breakIter = that.m_search_->breakIter; michael@0: m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; michael@0: m_search_->isOverlap = that.m_search_->isOverlap; michael@0: m_search_->elementComparisonType = that.m_search_->elementComparisonType; michael@0: m_search_->matchedIndex = that.m_search_->matchedIndex; michael@0: m_search_->matchedLength = that.m_search_->matchedLength; michael@0: m_search_->text = that.m_search_->text; michael@0: m_search_->textLength = that.m_search_->textLength; michael@0: } michael@0: return *this; michael@0: } michael@0: michael@0: void SearchIterator::setMatchLength(int32_t length) michael@0: { michael@0: m_search_->matchedLength = length; michael@0: } michael@0: michael@0: void SearchIterator::setMatchStart(int32_t position) michael@0: { michael@0: m_search_->matchedIndex = position; michael@0: } michael@0: michael@0: void SearchIterator::setMatchNotFound() michael@0: { michael@0: setMatchStart(USEARCH_DONE); michael@0: setMatchLength(0); michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: // by default no errors should be returned here since offsets are within michael@0: // range. michael@0: if (m_search_->isForwardSearching) { michael@0: setOffset(m_search_->textLength, status); michael@0: } michael@0: else { michael@0: setOffset(0, status); michael@0: } michael@0: } michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */