michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2001-2008 IBM and others. All rights reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 03/22/2000 helena Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION michael@0: michael@0: #include "unicode/stsearch.h" michael@0: #include "usrchimp.h" michael@0: #include "cmemory.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) michael@0: michael@0: // public constructors and destructors ----------------------------------- michael@0: michael@0: StringSearch::StringSearch(const UnicodeString &pattern, michael@0: const UnicodeString &text, michael@0: const Locale &locale, michael@0: BreakIterator *breakiter, michael@0: UErrorCode &status) : michael@0: SearchIterator(text, breakiter), michael@0: m_collator_(), michael@0: m_pattern_(pattern) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: michael@0: m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), michael@0: m_text_.getBuffer(), m_text_.length(), michael@0: locale.getName(), (UBreakIterator *)breakiter, michael@0: &status); michael@0: uprv_free(m_search_); michael@0: m_search_ = NULL; michael@0: michael@0: // !!! dlf m_collator_ is an odd beast. basically it is an aliasing michael@0: // wrapper around the internal collator and rules, which (here) are michael@0: // owned by this stringsearch object. this means 1) it's destructor michael@0: // _should not_ delete the ucollator or rules, and 2) changes made michael@0: // to the exposed collator (setStrength etc) _should_ modify the michael@0: // ucollator. thus the collator is not a copy-on-write alias, and it michael@0: // needs to distinguish itself not merely from 'stand alone' colators michael@0: // but also from copy-on-write ones. it needs additional state, which michael@0: // setUCollator should set. michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: // m_search_ has been created by the base SearchIterator class michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: michael@0: StringSearch::StringSearch(const UnicodeString &pattern, michael@0: const UnicodeString &text, michael@0: RuleBasedCollator *coll, michael@0: BreakIterator *breakiter, michael@0: UErrorCode &status) : michael@0: SearchIterator(text, breakiter), michael@0: m_collator_(), michael@0: m_pattern_(pattern) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: if (coll == NULL) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), michael@0: m_pattern_.length(), michael@0: m_text_.getBuffer(), michael@0: m_text_.length(), coll->ucollator, michael@0: (UBreakIterator *)breakiter, michael@0: &status); michael@0: uprv_free(m_search_); michael@0: m_search_ = NULL; michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: // m_search_ has been created by the base SearchIterator class michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: michael@0: StringSearch::StringSearch(const UnicodeString &pattern, michael@0: CharacterIterator &text, michael@0: const Locale &locale, michael@0: BreakIterator *breakiter, michael@0: UErrorCode &status) : michael@0: SearchIterator(text, breakiter), michael@0: m_collator_(), michael@0: m_pattern_(pattern) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), michael@0: m_text_.getBuffer(), m_text_.length(), michael@0: locale.getName(), (UBreakIterator *)breakiter, michael@0: &status); michael@0: uprv_free(m_search_); michael@0: m_search_ = NULL; michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: // m_search_ has been created by the base SearchIterator class michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: michael@0: StringSearch::StringSearch(const UnicodeString &pattern, michael@0: CharacterIterator &text, michael@0: RuleBasedCollator *coll, michael@0: BreakIterator *breakiter, michael@0: UErrorCode &status) : michael@0: SearchIterator(text, breakiter), michael@0: m_collator_(), michael@0: m_pattern_(pattern) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: if (coll == NULL) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: m_strsrch_ = NULL; michael@0: return; michael@0: } michael@0: m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), michael@0: m_pattern_.length(), michael@0: m_text_.getBuffer(), michael@0: m_text_.length(), coll->ucollator, michael@0: (UBreakIterator *)breakiter, michael@0: &status); michael@0: uprv_free(m_search_); michael@0: m_search_ = NULL; michael@0: michael@0: if (U_SUCCESS(status)) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: // m_search_ has been created by the base SearchIterator class michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: michael@0: StringSearch::StringSearch(const StringSearch &that) : michael@0: SearchIterator(that.m_text_, that.m_breakiterator_), michael@0: m_collator_(), michael@0: m_pattern_(that.m_pattern_) michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: michael@0: // Free m_search_ from the superclass michael@0: uprv_free(m_search_); michael@0: m_search_ = NULL; michael@0: michael@0: if (that.m_strsrch_ == NULL) { michael@0: // This was not a good copy michael@0: m_strsrch_ = NULL; michael@0: } michael@0: else { michael@0: // Make a deep copy michael@0: m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), michael@0: m_pattern_.length(), michael@0: m_text_.getBuffer(), michael@0: m_text_.length(), michael@0: that.m_strsrch_->collator, michael@0: (UBreakIterator *)that.m_breakiterator_, michael@0: &status); michael@0: if (U_SUCCESS(status)) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: // m_search_ has been created by the base SearchIterator class michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: } michael@0: michael@0: StringSearch::~StringSearch() michael@0: { michael@0: if (m_strsrch_ != NULL) { michael@0: usearch_close(m_strsrch_); michael@0: m_search_ = NULL; michael@0: } michael@0: } michael@0: michael@0: StringSearch * michael@0: StringSearch::clone() const { michael@0: return new StringSearch(*this); michael@0: } michael@0: michael@0: // operator overloading --------------------------------------------- michael@0: StringSearch & StringSearch::operator=(const StringSearch &that) michael@0: { michael@0: if ((*this) != that) { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: m_text_ = that.m_text_; michael@0: m_breakiterator_ = that.m_breakiterator_; michael@0: m_pattern_ = that.m_pattern_; michael@0: // all m_search_ in the parent class is linked up with m_strsrch_ michael@0: usearch_close(m_strsrch_); michael@0: m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), michael@0: m_pattern_.length(), michael@0: m_text_.getBuffer(), michael@0: m_text_.length(), michael@0: that.m_strsrch_->collator, michael@0: NULL, &status); michael@0: // Check null pointer michael@0: if (m_strsrch_ != NULL) { michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: m_search_ = m_strsrch_->search; michael@0: } michael@0: } michael@0: return *this; michael@0: } michael@0: michael@0: UBool StringSearch::operator==(const SearchIterator &that) const michael@0: { michael@0: if (this == &that) { michael@0: return TRUE; michael@0: } michael@0: if (SearchIterator::operator ==(that)) { michael@0: StringSearch &thatsrch = (StringSearch &)that; michael@0: return (this->m_pattern_ == thatsrch.m_pattern_ && michael@0: this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: // public get and set methods ---------------------------------------- michael@0: michael@0: void StringSearch::setOffset(int32_t position, UErrorCode &status) michael@0: { michael@0: // status checked in usearch_setOffset michael@0: usearch_setOffset(m_strsrch_, position, &status); michael@0: } michael@0: michael@0: int32_t StringSearch::getOffset(void) const michael@0: { michael@0: return usearch_getOffset(m_strsrch_); michael@0: } michael@0: michael@0: void StringSearch::setText(const UnicodeString &text, UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: m_text_ = text; michael@0: usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); michael@0: } michael@0: } michael@0: michael@0: void StringSearch::setText(CharacterIterator &text, UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: text.getText(m_text_); michael@0: usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); michael@0: } michael@0: } michael@0: michael@0: RuleBasedCollator * StringSearch::getCollator() const michael@0: { michael@0: return (RuleBasedCollator *)&m_collator_; michael@0: } michael@0: michael@0: void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); michael@0: // Alias the collator michael@0: m_collator_.setUCollator((UCollator *)m_strsrch_->collator); michael@0: } michael@0: } michael@0: michael@0: void StringSearch::setPattern(const UnicodeString &pattern, michael@0: UErrorCode &status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: m_pattern_ = pattern; michael@0: usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), michael@0: &status); michael@0: } michael@0: } michael@0: michael@0: const UnicodeString & StringSearch::getPattern() const michael@0: { michael@0: return m_pattern_; michael@0: } michael@0: michael@0: // public methods ---------------------------------------------------- michael@0: michael@0: void StringSearch::reset() michael@0: { michael@0: usearch_reset(m_strsrch_); michael@0: } michael@0: michael@0: SearchIterator * StringSearch::safeClone(void) const michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: StringSearch *result = new StringSearch(m_pattern_, m_text_, michael@0: (RuleBasedCollator *)&m_collator_, michael@0: m_breakiterator_, michael@0: status); michael@0: /* test for NULL */ michael@0: if (result == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: result->setOffset(getOffset(), status); michael@0: result->setMatchStart(m_strsrch_->search->matchedIndex); michael@0: result->setMatchLength(m_strsrch_->search->matchedLength); michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: // protected method ------------------------------------------------- michael@0: michael@0: int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) michael@0: { michael@0: // values passed here are already in the pre-shift position michael@0: if (U_SUCCESS(status)) { michael@0: if (m_strsrch_->pattern.CELength == 0) { michael@0: m_search_->matchedIndex = michael@0: m_search_->matchedIndex == USEARCH_DONE ? michael@0: getOffset() : m_search_->matchedIndex + 1; michael@0: m_search_->matchedLength = 0; michael@0: ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, michael@0: &status); michael@0: if (m_search_->matchedIndex == m_search_->textLength) { michael@0: m_search_->matchedIndex = USEARCH_DONE; michael@0: } michael@0: } michael@0: else { michael@0: // looking at usearch.cpp, this part is shifted out to michael@0: // StringSearch instead of SearchIterator because m_strsrch_ is michael@0: // not accessible in SearchIterator michael@0: #if 0 michael@0: if (position + m_strsrch_->pattern.defaultShiftSize michael@0: > m_search_->textLength) { michael@0: setMatchNotFound(); michael@0: return USEARCH_DONE; michael@0: } michael@0: #endif michael@0: if (m_search_->matchedLength <= 0) { michael@0: // the flipping direction issue has already been handled michael@0: // in next() michael@0: // for boundary check purposes. this will ensure that the michael@0: // next match will not preceed the current offset michael@0: // note search->matchedIndex will always be set to something michael@0: // in the code michael@0: m_search_->matchedIndex = position - 1; michael@0: } michael@0: michael@0: ucol_setOffset(m_strsrch_->textIter, position, &status); michael@0: michael@0: #if 0 michael@0: for (;;) { michael@0: if (m_search_->isCanonicalMatch) { michael@0: // can't use exact here since extra accents are allowed. michael@0: usearch_handleNextCanonical(m_strsrch_, &status); michael@0: } michael@0: else { michael@0: usearch_handleNextExact(m_strsrch_, &status); michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: if (m_breakiterator_ == NULL michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: || michael@0: m_search_->matchedIndex == USEARCH_DONE || michael@0: (m_breakiterator_->isBoundary(m_search_->matchedIndex) && michael@0: m_breakiterator_->isBoundary(m_search_->matchedIndex + michael@0: m_search_->matchedLength)) michael@0: #endif michael@0: ) { michael@0: if (m_search_->matchedIndex == USEARCH_DONE) { michael@0: ucol_setOffset(m_strsrch_->textIter, michael@0: m_search_->textLength, &status); michael@0: } michael@0: else { michael@0: ucol_setOffset(m_strsrch_->textIter, michael@0: m_search_->matchedIndex, &status); michael@0: } michael@0: return m_search_->matchedIndex; michael@0: } michael@0: } michael@0: #else michael@0: // if m_strsrch_->breakIter is always the same as m_breakiterator_ michael@0: // then we don't need to check the match boundaries here because michael@0: // usearch_handleNextXXX will already have done it. michael@0: if (m_search_->isCanonicalMatch) { michael@0: // *could* actually use exact here 'cause no extra accents allowed... michael@0: usearch_handleNextCanonical(m_strsrch_, &status); michael@0: } else { michael@0: usearch_handleNextExact(m_strsrch_, &status); michael@0: } michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: if (m_search_->matchedIndex == USEARCH_DONE) { michael@0: ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); michael@0: } else { michael@0: ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); michael@0: } michael@0: michael@0: return m_search_->matchedIndex; michael@0: #endif michael@0: } michael@0: } michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) michael@0: { michael@0: // values passed here are already in the pre-shift position michael@0: if (U_SUCCESS(status)) { michael@0: if (m_strsrch_->pattern.CELength == 0) { michael@0: m_search_->matchedIndex = michael@0: (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : michael@0: m_search_->matchedIndex); michael@0: if (m_search_->matchedIndex == 0) { michael@0: setMatchNotFound(); michael@0: } michael@0: else { michael@0: m_search_->matchedIndex --; michael@0: ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, michael@0: &status); michael@0: m_search_->matchedLength = 0; michael@0: } michael@0: } michael@0: else { michael@0: // looking at usearch.cpp, this part is shifted out to michael@0: // StringSearch instead of SearchIterator because m_strsrch_ is michael@0: // not accessible in SearchIterator michael@0: #if 0 michael@0: if (!m_search_->isOverlap && michael@0: position - m_strsrch_->pattern.defaultShiftSize < 0) { michael@0: setMatchNotFound(); michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: for (;;) { michael@0: if (m_search_->isCanonicalMatch) { michael@0: // can't use exact here since extra accents are allowed. michael@0: usearch_handlePreviousCanonical(m_strsrch_, &status); michael@0: } michael@0: else { michael@0: usearch_handlePreviousExact(m_strsrch_, &status); michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: if (m_breakiterator_ == NULL michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: || michael@0: m_search_->matchedIndex == USEARCH_DONE || michael@0: (m_breakiterator_->isBoundary(m_search_->matchedIndex) && michael@0: m_breakiterator_->isBoundary(m_search_->matchedIndex + michael@0: m_search_->matchedLength)) michael@0: #endif michael@0: ) { michael@0: return m_search_->matchedIndex; michael@0: } michael@0: } michael@0: #else michael@0: ucol_setOffset(m_strsrch_->textIter, position, &status); michael@0: michael@0: if (m_search_->isCanonicalMatch) { michael@0: // *could* use exact match here since extra accents *not* allowed! michael@0: usearch_handlePreviousCanonical(m_strsrch_, &status); michael@0: } else { michael@0: usearch_handlePreviousExact(m_strsrch_, &status); michael@0: } michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: return m_search_->matchedIndex; michael@0: #endif michael@0: } michael@0: michael@0: return m_search_->matchedIndex; michael@0: } michael@0: return USEARCH_DONE; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */