michael@0: /*
michael@0: **********************************************************************
michael@0: *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
michael@0: **********************************************************************
michael@0: *   Date        Name        Description
michael@0: *  03/22/2000   helena      Creation.
michael@0: **********************************************************************
michael@0: */
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
michael@0: 
michael@0: #include "unicode/stsearch.h"
michael@0: #include "usrchimp.h"
michael@0: #include "cmemory.h"
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
michael@0: 
michael@0: // public constructors and destructors -----------------------------------
michael@0: 
michael@0: StringSearch::StringSearch(const UnicodeString &pattern,
michael@0:                            const UnicodeString &text,
michael@0:                            const Locale        &locale,
michael@0:                                  BreakIterator *breakiter,
michael@0:                                  UErrorCode    &status) :
michael@0:                            SearchIterator(text, breakiter),
michael@0:                            m_collator_(),
michael@0:                            m_pattern_(pattern)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
michael@0:                               m_text_.getBuffer(), m_text_.length(),
michael@0:                               locale.getName(), (UBreakIterator *)breakiter,
michael@0:                               &status);
michael@0:     uprv_free(m_search_);
michael@0:     m_search_ = NULL;
michael@0: 
michael@0:     // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
michael@0:     // wrapper around the internal collator and rules, which (here) are
michael@0:     // owned by this stringsearch object.  this means 1) it's destructor
michael@0:     // _should not_ delete the ucollator or rules, and 2) changes made
michael@0:     // to the exposed collator (setStrength etc) _should_ modify the
michael@0:     // ucollator.  thus the collator is not a copy-on-write alias, and it
michael@0:     // needs to distinguish itself not merely from 'stand alone' colators
michael@0:     // but also from copy-on-write ones.  it needs additional state, which
michael@0:     // setUCollator should set.
michael@0: 
michael@0:     if (U_SUCCESS(status)) {
michael@0:         // Alias the collator
michael@0:         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:         // m_search_ has been created by the base SearchIterator class
michael@0:         m_search_        = m_strsrch_->search;
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch::StringSearch(const UnicodeString     &pattern,
michael@0:                            const UnicodeString     &text,
michael@0:                                  RuleBasedCollator *coll,
michael@0:                                  BreakIterator     *breakiter,
michael@0:                                  UErrorCode        &status) :
michael@0:                            SearchIterator(text, breakiter),
michael@0:                            m_collator_(),
michael@0:                            m_pattern_(pattern)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0:     if (coll == NULL) {
michael@0:         status     = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0:     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
michael@0:                                           m_pattern_.length(),
michael@0:                                           m_text_.getBuffer(),
michael@0:                                           m_text_.length(), coll->ucollator,
michael@0:                                           (UBreakIterator *)breakiter,
michael@0:                                           &status);
michael@0:     uprv_free(m_search_);
michael@0:     m_search_ = NULL;
michael@0: 
michael@0:     if (U_SUCCESS(status)) {
michael@0:         // Alias the collator
michael@0:         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:         // m_search_ has been created by the base SearchIterator class
michael@0:         m_search_ = m_strsrch_->search;
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch::StringSearch(const UnicodeString     &pattern,
michael@0:                                  CharacterIterator &text,
michael@0:                            const Locale            &locale,
michael@0:                                  BreakIterator     *breakiter,
michael@0:                                  UErrorCode        &status) :
michael@0:                            SearchIterator(text, breakiter),
michael@0:                            m_collator_(),
michael@0:                            m_pattern_(pattern)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0:     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
michael@0:                               m_text_.getBuffer(), m_text_.length(),
michael@0:                               locale.getName(), (UBreakIterator *)breakiter,
michael@0:                               &status);
michael@0:     uprv_free(m_search_);
michael@0:     m_search_ = NULL;
michael@0: 
michael@0:     if (U_SUCCESS(status)) {
michael@0:         // Alias the collator
michael@0:         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:         // m_search_ has been created by the base SearchIterator class
michael@0:         m_search_ = m_strsrch_->search;
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch::StringSearch(const UnicodeString     &pattern,
michael@0:                                  CharacterIterator &text,
michael@0:                                  RuleBasedCollator *coll,
michael@0:                                  BreakIterator     *breakiter,
michael@0:                                  UErrorCode        &status) :
michael@0:                            SearchIterator(text, breakiter),
michael@0:                            m_collator_(),
michael@0:                            m_pattern_(pattern)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0:     if (coll == NULL) {
michael@0:         status     = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         m_strsrch_ = NULL;
michael@0:         return;
michael@0:     }
michael@0:     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
michael@0:                                           m_pattern_.length(),
michael@0:                                           m_text_.getBuffer(),
michael@0:                                           m_text_.length(), coll->ucollator,
michael@0:                                           (UBreakIterator *)breakiter,
michael@0:                                           &status);
michael@0:     uprv_free(m_search_);
michael@0:     m_search_ = NULL;
michael@0: 
michael@0:     if (U_SUCCESS(status)) {
michael@0:         // Alias the collator
michael@0:         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:         // m_search_ has been created by the base SearchIterator class
michael@0:         m_search_ = m_strsrch_->search;
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch::StringSearch(const StringSearch &that) :
michael@0:                        SearchIterator(that.m_text_, that.m_breakiterator_),
michael@0:                        m_collator_(),
michael@0:                        m_pattern_(that.m_pattern_)
michael@0: {
michael@0:     UErrorCode status = U_ZERO_ERROR;
michael@0: 
michael@0:     // Free m_search_ from the superclass
michael@0:     uprv_free(m_search_);
michael@0:     m_search_ = NULL;
michael@0: 
michael@0:     if (that.m_strsrch_ == NULL) {
michael@0:         // This was not a good copy
michael@0:         m_strsrch_ = NULL;
michael@0:     }
michael@0:     else {
michael@0:         // Make a deep copy
michael@0:         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
michael@0:                                               m_pattern_.length(),
michael@0:                                               m_text_.getBuffer(),
michael@0:                                               m_text_.length(),
michael@0:                                               that.m_strsrch_->collator,
michael@0:                                              (UBreakIterator *)that.m_breakiterator_,
michael@0:                                               &status);
michael@0:         if (U_SUCCESS(status)) {
michael@0:             // Alias the collator
michael@0:             m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:             // m_search_ has been created by the base SearchIterator class
michael@0:             m_search_        = m_strsrch_->search;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch::~StringSearch()
michael@0: {
michael@0:     if (m_strsrch_ != NULL) {
michael@0:         usearch_close(m_strsrch_);
michael@0:         m_search_ = NULL;
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringSearch *
michael@0: StringSearch::clone() const {
michael@0:     return new StringSearch(*this);
michael@0: }
michael@0: 
michael@0: // operator overloading ---------------------------------------------
michael@0: StringSearch & StringSearch::operator=(const StringSearch &that)
michael@0: {
michael@0:     if ((*this) != that) {
michael@0:         UErrorCode status = U_ZERO_ERROR;
michael@0:         m_text_          = that.m_text_;
michael@0:         m_breakiterator_ = that.m_breakiterator_;
michael@0:         m_pattern_       = that.m_pattern_;
michael@0:         // all m_search_ in the parent class is linked up with m_strsrch_
michael@0:         usearch_close(m_strsrch_);
michael@0:         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
michael@0:                                               m_pattern_.length(),
michael@0:                                               m_text_.getBuffer(),
michael@0:                                               m_text_.length(),
michael@0:                                               that.m_strsrch_->collator,
michael@0:                                               NULL, &status);
michael@0:         // Check null pointer
michael@0:         if (m_strsrch_ != NULL) {
michael@0: 	        // Alias the collator
michael@0: 	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0: 	        m_search_ = m_strsrch_->search;
michael@0:         }
michael@0:     }
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: UBool StringSearch::operator==(const SearchIterator &that) const
michael@0: {
michael@0:     if (this == &that) {
michael@0:         return TRUE;
michael@0:     }
michael@0:     if (SearchIterator::operator ==(that)) {
michael@0:         StringSearch &thatsrch = (StringSearch &)that;
michael@0:         return (this->m_pattern_ == thatsrch.m_pattern_ &&
michael@0:                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
michael@0:     }
michael@0:     return FALSE;
michael@0: }
michael@0: 
michael@0: // public get and set methods ----------------------------------------
michael@0: 
michael@0: void StringSearch::setOffset(int32_t position, UErrorCode &status)
michael@0: {
michael@0:     // status checked in usearch_setOffset
michael@0:     usearch_setOffset(m_strsrch_, position, &status);
michael@0: }
michael@0: 
michael@0: int32_t StringSearch::getOffset(void) const
michael@0: {
michael@0:     return usearch_getOffset(m_strsrch_);
michael@0: }
michael@0: 
michael@0: void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
michael@0: {
michael@0:     if (U_SUCCESS(status)) {
michael@0:         m_text_ = text;
michael@0:         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
michael@0:     }
michael@0: }
michael@0: 
michael@0: void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
michael@0: {
michael@0:     if (U_SUCCESS(status)) {
michael@0:         text.getText(m_text_);
michael@0:         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
michael@0:     }
michael@0: }
michael@0: 
michael@0: RuleBasedCollator * StringSearch::getCollator() const
michael@0: {
michael@0:     return (RuleBasedCollator *)&m_collator_;
michael@0: }
michael@0: 
michael@0: void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
michael@0: {
michael@0:     if (U_SUCCESS(status)) {
michael@0:         usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
michael@0:         // Alias the collator
michael@0:         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
michael@0:     }
michael@0: }
michael@0: 
michael@0: void StringSearch::setPattern(const UnicodeString &pattern,
michael@0:                                     UErrorCode    &status)
michael@0: {
michael@0:     if (U_SUCCESS(status)) {
michael@0:         m_pattern_ = pattern;
michael@0:         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
michael@0:                            &status);
michael@0:     }
michael@0: }
michael@0: 
michael@0: const UnicodeString & StringSearch::getPattern() const
michael@0: {
michael@0:     return m_pattern_;
michael@0: }
michael@0: 
michael@0: // public methods ----------------------------------------------------
michael@0: 
michael@0: void StringSearch::reset()
michael@0: {
michael@0:     usearch_reset(m_strsrch_);
michael@0: }
michael@0: 
michael@0: SearchIterator * StringSearch::safeClone(void) const
michael@0: {
michael@0:     UErrorCode status = U_ZERO_ERROR;
michael@0:     StringSearch *result = new StringSearch(m_pattern_, m_text_,
michael@0:                                             (RuleBasedCollator *)&m_collator_,
michael@0:                                             m_breakiterator_,
michael@0:                                             status);
michael@0:     /* test for NULL */
michael@0:     if (result == 0) {
michael@0:         status = U_MEMORY_ALLOCATION_ERROR;
michael@0:         return 0;
michael@0:     }
michael@0:     result->setOffset(getOffset(), status);
michael@0:     result->setMatchStart(m_strsrch_->search->matchedIndex);
michael@0:     result->setMatchLength(m_strsrch_->search->matchedLength);
michael@0:     if (U_FAILURE(status)) {
michael@0:         return NULL;
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: // protected method -------------------------------------------------
michael@0: 
michael@0: int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
michael@0: {
michael@0:     // values passed here are already in the pre-shift position
michael@0:     if (U_SUCCESS(status)) {
michael@0:         if (m_strsrch_->pattern.CELength == 0) {
michael@0:             m_search_->matchedIndex =
michael@0:                                     m_search_->matchedIndex == USEARCH_DONE ?
michael@0:                                     getOffset() : m_search_->matchedIndex + 1;
michael@0:             m_search_->matchedLength = 0;
michael@0:             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
michael@0:                            &status);
michael@0:             if (m_search_->matchedIndex == m_search_->textLength) {
michael@0:                 m_search_->matchedIndex = USEARCH_DONE;
michael@0:             }
michael@0:         }
michael@0:         else {
michael@0:             // looking at usearch.cpp, this part is shifted out to
michael@0:             // StringSearch instead of SearchIterator because m_strsrch_ is
michael@0:             // not accessible in SearchIterator
michael@0: #if 0
michael@0:             if (position + m_strsrch_->pattern.defaultShiftSize
michael@0:                 > m_search_->textLength) {
michael@0:                 setMatchNotFound();
michael@0:                 return USEARCH_DONE;
michael@0:             }
michael@0: #endif
michael@0:             if (m_search_->matchedLength <= 0) {
michael@0:                 // the flipping direction issue has already been handled
michael@0:                 // in next()
michael@0:                 // for boundary check purposes. this will ensure that the
michael@0:                 // next match will not preceed the current offset
michael@0:                 // note search->matchedIndex will always be set to something
michael@0:                 // in the code
michael@0:                 m_search_->matchedIndex = position - 1;
michael@0:             }
michael@0: 
michael@0:             ucol_setOffset(m_strsrch_->textIter, position, &status);
michael@0:             
michael@0: #if 0
michael@0:             for (;;) {
michael@0:                 if (m_search_->isCanonicalMatch) {
michael@0:                     // can't use exact here since extra accents are allowed.
michael@0:                     usearch_handleNextCanonical(m_strsrch_, &status);
michael@0:                 }
michael@0:                 else {
michael@0:                     usearch_handleNextExact(m_strsrch_, &status);
michael@0:                 }
michael@0:                 if (U_FAILURE(status)) {
michael@0:                     return USEARCH_DONE;
michael@0:                 }
michael@0:                 if (m_breakiterator_ == NULL
michael@0: #if !UCONFIG_NO_BREAK_ITERATION
michael@0:                     ||
michael@0:                     m_search_->matchedIndex == USEARCH_DONE ||
michael@0:                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
michael@0:                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
michael@0:                                                   m_search_->matchedLength))
michael@0: #endif
michael@0:                 ) {
michael@0:                     if (m_search_->matchedIndex == USEARCH_DONE) {
michael@0:                         ucol_setOffset(m_strsrch_->textIter,
michael@0:                                        m_search_->textLength, &status);
michael@0:                     }
michael@0:                     else {
michael@0:                         ucol_setOffset(m_strsrch_->textIter,
michael@0:                                        m_search_->matchedIndex, &status);
michael@0:                     }
michael@0:                     return m_search_->matchedIndex;
michael@0:                 }
michael@0:             }
michael@0: #else
michael@0:             // if m_strsrch_->breakIter is always the same as m_breakiterator_
michael@0:             // then we don't need to check the match boundaries here because
michael@0:             // usearch_handleNextXXX will already have done it.
michael@0:             if (m_search_->isCanonicalMatch) {
michael@0:             	// *could* actually use exact here 'cause no extra accents allowed...
michael@0:             	usearch_handleNextCanonical(m_strsrch_, &status);
michael@0:             } else {
michael@0:             	usearch_handleNextExact(m_strsrch_, &status);
michael@0:             }
michael@0:             
michael@0:             if (U_FAILURE(status)) {
michael@0:             	return USEARCH_DONE;
michael@0:             }
michael@0:             
michael@0:             if (m_search_->matchedIndex == USEARCH_DONE) {
michael@0:             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
michael@0:             } else {
michael@0:             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
michael@0:             }
michael@0:             
michael@0:             return m_search_->matchedIndex;
michael@0: #endif
michael@0:         }
michael@0:     }
michael@0:     return USEARCH_DONE;
michael@0: }
michael@0: 
michael@0: int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
michael@0: {
michael@0:     // values passed here are already in the pre-shift position
michael@0:     if (U_SUCCESS(status)) {
michael@0:         if (m_strsrch_->pattern.CELength == 0) {
michael@0:             m_search_->matchedIndex =
michael@0:                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
michael@0:                    m_search_->matchedIndex);
michael@0:             if (m_search_->matchedIndex == 0) {
michael@0:                 setMatchNotFound();
michael@0:             }
michael@0:             else {
michael@0:                 m_search_->matchedIndex --;
michael@0:                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
michael@0:                                &status);
michael@0:                 m_search_->matchedLength = 0;
michael@0:             }
michael@0:         }
michael@0:         else {
michael@0:             // looking at usearch.cpp, this part is shifted out to
michael@0:             // StringSearch instead of SearchIterator because m_strsrch_ is
michael@0:             // not accessible in SearchIterator
michael@0: #if 0
michael@0:             if (!m_search_->isOverlap &&
michael@0:                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
michael@0:                 setMatchNotFound();
michael@0:                 return USEARCH_DONE;
michael@0:             }
michael@0:             
michael@0:             for (;;) {
michael@0:                 if (m_search_->isCanonicalMatch) {
michael@0:                     // can't use exact here since extra accents are allowed.
michael@0:                     usearch_handlePreviousCanonical(m_strsrch_, &status);
michael@0:                 }
michael@0:                 else {
michael@0:                     usearch_handlePreviousExact(m_strsrch_, &status);
michael@0:                 }
michael@0:                 if (U_FAILURE(status)) {
michael@0:                     return USEARCH_DONE;
michael@0:                 }
michael@0:                 if (m_breakiterator_ == NULL
michael@0: #if !UCONFIG_NO_BREAK_ITERATION
michael@0:                     ||
michael@0:                     m_search_->matchedIndex == USEARCH_DONE ||
michael@0:                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
michael@0:                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
michael@0:                                                   m_search_->matchedLength))
michael@0: #endif
michael@0:                 ) {
michael@0:                     return m_search_->matchedIndex;
michael@0:                 }
michael@0:             }
michael@0: #else
michael@0:             ucol_setOffset(m_strsrch_->textIter, position, &status);
michael@0:             
michael@0:             if (m_search_->isCanonicalMatch) {
michael@0:             	// *could* use exact match here since extra accents *not* allowed!
michael@0:             	usearch_handlePreviousCanonical(m_strsrch_, &status);
michael@0:             } else {
michael@0:             	usearch_handlePreviousExact(m_strsrch_, &status);
michael@0:             }
michael@0:             
michael@0:             if (U_FAILURE(status)) {
michael@0:             	return USEARCH_DONE;
michael@0:             }
michael@0:             
michael@0:             return m_search_->matchedIndex;
michael@0: #endif
michael@0:         }
michael@0: 
michael@0:         return m_search_->matchedIndex;
michael@0:     }
michael@0:     return USEARCH_DONE;
michael@0: }
michael@0: 
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: #endif /* #if !UCONFIG_NO_COLLATION */