intl/icu/source/i18n/stsearch.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/stsearch.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,509 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
     1.7 +**********************************************************************
     1.8 +*   Date        Name        Description
     1.9 +*  03/22/2000   helena      Creation.
    1.10 +**********************************************************************
    1.11 +*/
    1.12 +
    1.13 +#include "unicode/utypes.h"
    1.14 +
    1.15 +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
    1.16 +
    1.17 +#include "unicode/stsearch.h"
    1.18 +#include "usrchimp.h"
    1.19 +#include "cmemory.h"
    1.20 +
    1.21 +U_NAMESPACE_BEGIN
    1.22 +
    1.23 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
    1.24 +
    1.25 +// public constructors and destructors -----------------------------------
    1.26 +
    1.27 +StringSearch::StringSearch(const UnicodeString &pattern,
    1.28 +                           const UnicodeString &text,
    1.29 +                           const Locale        &locale,
    1.30 +                                 BreakIterator *breakiter,
    1.31 +                                 UErrorCode    &status) :
    1.32 +                           SearchIterator(text, breakiter),
    1.33 +                           m_collator_(),
    1.34 +                           m_pattern_(pattern)
    1.35 +{
    1.36 +    if (U_FAILURE(status)) {
    1.37 +        m_strsrch_ = NULL;
    1.38 +        return;
    1.39 +    }
    1.40 +
    1.41 +    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
    1.42 +                              m_text_.getBuffer(), m_text_.length(),
    1.43 +                              locale.getName(), (UBreakIterator *)breakiter,
    1.44 +                              &status);
    1.45 +    uprv_free(m_search_);
    1.46 +    m_search_ = NULL;
    1.47 +
    1.48 +    // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
    1.49 +    // wrapper around the internal collator and rules, which (here) are
    1.50 +    // owned by this stringsearch object.  this means 1) it's destructor
    1.51 +    // _should not_ delete the ucollator or rules, and 2) changes made
    1.52 +    // to the exposed collator (setStrength etc) _should_ modify the
    1.53 +    // ucollator.  thus the collator is not a copy-on-write alias, and it
    1.54 +    // needs to distinguish itself not merely from 'stand alone' colators
    1.55 +    // but also from copy-on-write ones.  it needs additional state, which
    1.56 +    // setUCollator should set.
    1.57 +
    1.58 +    if (U_SUCCESS(status)) {
    1.59 +        // Alias the collator
    1.60 +        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    1.61 +        // m_search_ has been created by the base SearchIterator class
    1.62 +        m_search_        = m_strsrch_->search;
    1.63 +    }
    1.64 +}
    1.65 +
    1.66 +StringSearch::StringSearch(const UnicodeString     &pattern,
    1.67 +                           const UnicodeString     &text,
    1.68 +                                 RuleBasedCollator *coll,
    1.69 +                                 BreakIterator     *breakiter,
    1.70 +                                 UErrorCode        &status) :
    1.71 +                           SearchIterator(text, breakiter),
    1.72 +                           m_collator_(),
    1.73 +                           m_pattern_(pattern)
    1.74 +{
    1.75 +    if (U_FAILURE(status)) {
    1.76 +        m_strsrch_ = NULL;
    1.77 +        return;
    1.78 +    }
    1.79 +    if (coll == NULL) {
    1.80 +        status     = U_ILLEGAL_ARGUMENT_ERROR;
    1.81 +        m_strsrch_ = NULL;
    1.82 +        return;
    1.83 +    }
    1.84 +    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    1.85 +                                          m_pattern_.length(),
    1.86 +                                          m_text_.getBuffer(),
    1.87 +                                          m_text_.length(), coll->ucollator,
    1.88 +                                          (UBreakIterator *)breakiter,
    1.89 +                                          &status);
    1.90 +    uprv_free(m_search_);
    1.91 +    m_search_ = NULL;
    1.92 +
    1.93 +    if (U_SUCCESS(status)) {
    1.94 +        // Alias the collator
    1.95 +        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    1.96 +        // m_search_ has been created by the base SearchIterator class
    1.97 +        m_search_ = m_strsrch_->search;
    1.98 +    }
    1.99 +}
   1.100 +
   1.101 +StringSearch::StringSearch(const UnicodeString     &pattern,
   1.102 +                                 CharacterIterator &text,
   1.103 +                           const Locale            &locale,
   1.104 +                                 BreakIterator     *breakiter,
   1.105 +                                 UErrorCode        &status) :
   1.106 +                           SearchIterator(text, breakiter),
   1.107 +                           m_collator_(),
   1.108 +                           m_pattern_(pattern)
   1.109 +{
   1.110 +    if (U_FAILURE(status)) {
   1.111 +        m_strsrch_ = NULL;
   1.112 +        return;
   1.113 +    }
   1.114 +    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
   1.115 +                              m_text_.getBuffer(), m_text_.length(),
   1.116 +                              locale.getName(), (UBreakIterator *)breakiter,
   1.117 +                              &status);
   1.118 +    uprv_free(m_search_);
   1.119 +    m_search_ = NULL;
   1.120 +
   1.121 +    if (U_SUCCESS(status)) {
   1.122 +        // Alias the collator
   1.123 +        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   1.124 +        // m_search_ has been created by the base SearchIterator class
   1.125 +        m_search_ = m_strsrch_->search;
   1.126 +    }
   1.127 +}
   1.128 +
   1.129 +StringSearch::StringSearch(const UnicodeString     &pattern,
   1.130 +                                 CharacterIterator &text,
   1.131 +                                 RuleBasedCollator *coll,
   1.132 +                                 BreakIterator     *breakiter,
   1.133 +                                 UErrorCode        &status) :
   1.134 +                           SearchIterator(text, breakiter),
   1.135 +                           m_collator_(),
   1.136 +                           m_pattern_(pattern)
   1.137 +{
   1.138 +    if (U_FAILURE(status)) {
   1.139 +        m_strsrch_ = NULL;
   1.140 +        return;
   1.141 +    }
   1.142 +    if (coll == NULL) {
   1.143 +        status     = U_ILLEGAL_ARGUMENT_ERROR;
   1.144 +        m_strsrch_ = NULL;
   1.145 +        return;
   1.146 +    }
   1.147 +    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   1.148 +                                          m_pattern_.length(),
   1.149 +                                          m_text_.getBuffer(),
   1.150 +                                          m_text_.length(), coll->ucollator,
   1.151 +                                          (UBreakIterator *)breakiter,
   1.152 +                                          &status);
   1.153 +    uprv_free(m_search_);
   1.154 +    m_search_ = NULL;
   1.155 +
   1.156 +    if (U_SUCCESS(status)) {
   1.157 +        // Alias the collator
   1.158 +        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   1.159 +        // m_search_ has been created by the base SearchIterator class
   1.160 +        m_search_ = m_strsrch_->search;
   1.161 +    }
   1.162 +}
   1.163 +
   1.164 +StringSearch::StringSearch(const StringSearch &that) :
   1.165 +                       SearchIterator(that.m_text_, that.m_breakiterator_),
   1.166 +                       m_collator_(),
   1.167 +                       m_pattern_(that.m_pattern_)
   1.168 +{
   1.169 +    UErrorCode status = U_ZERO_ERROR;
   1.170 +
   1.171 +    // Free m_search_ from the superclass
   1.172 +    uprv_free(m_search_);
   1.173 +    m_search_ = NULL;
   1.174 +
   1.175 +    if (that.m_strsrch_ == NULL) {
   1.176 +        // This was not a good copy
   1.177 +        m_strsrch_ = NULL;
   1.178 +    }
   1.179 +    else {
   1.180 +        // Make a deep copy
   1.181 +        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   1.182 +                                              m_pattern_.length(),
   1.183 +                                              m_text_.getBuffer(),
   1.184 +                                              m_text_.length(),
   1.185 +                                              that.m_strsrch_->collator,
   1.186 +                                             (UBreakIterator *)that.m_breakiterator_,
   1.187 +                                              &status);
   1.188 +        if (U_SUCCESS(status)) {
   1.189 +            // Alias the collator
   1.190 +            m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   1.191 +            // m_search_ has been created by the base SearchIterator class
   1.192 +            m_search_        = m_strsrch_->search;
   1.193 +        }
   1.194 +    }
   1.195 +}
   1.196 +
   1.197 +StringSearch::~StringSearch()
   1.198 +{
   1.199 +    if (m_strsrch_ != NULL) {
   1.200 +        usearch_close(m_strsrch_);
   1.201 +        m_search_ = NULL;
   1.202 +    }
   1.203 +}
   1.204 +
   1.205 +StringSearch *
   1.206 +StringSearch::clone() const {
   1.207 +    return new StringSearch(*this);
   1.208 +}
   1.209 +
   1.210 +// operator overloading ---------------------------------------------
   1.211 +StringSearch & StringSearch::operator=(const StringSearch &that)
   1.212 +{
   1.213 +    if ((*this) != that) {
   1.214 +        UErrorCode status = U_ZERO_ERROR;
   1.215 +        m_text_          = that.m_text_;
   1.216 +        m_breakiterator_ = that.m_breakiterator_;
   1.217 +        m_pattern_       = that.m_pattern_;
   1.218 +        // all m_search_ in the parent class is linked up with m_strsrch_
   1.219 +        usearch_close(m_strsrch_);
   1.220 +        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   1.221 +                                              m_pattern_.length(),
   1.222 +                                              m_text_.getBuffer(),
   1.223 +                                              m_text_.length(),
   1.224 +                                              that.m_strsrch_->collator,
   1.225 +                                              NULL, &status);
   1.226 +        // Check null pointer
   1.227 +        if (m_strsrch_ != NULL) {
   1.228 +	        // Alias the collator
   1.229 +	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   1.230 +	        m_search_ = m_strsrch_->search;
   1.231 +        }
   1.232 +    }
   1.233 +    return *this;
   1.234 +}
   1.235 +
   1.236 +UBool StringSearch::operator==(const SearchIterator &that) const
   1.237 +{
   1.238 +    if (this == &that) {
   1.239 +        return TRUE;
   1.240 +    }
   1.241 +    if (SearchIterator::operator ==(that)) {
   1.242 +        StringSearch &thatsrch = (StringSearch &)that;
   1.243 +        return (this->m_pattern_ == thatsrch.m_pattern_ &&
   1.244 +                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
   1.245 +    }
   1.246 +    return FALSE;
   1.247 +}
   1.248 +
   1.249 +// public get and set methods ----------------------------------------
   1.250 +
   1.251 +void StringSearch::setOffset(int32_t position, UErrorCode &status)
   1.252 +{
   1.253 +    // status checked in usearch_setOffset
   1.254 +    usearch_setOffset(m_strsrch_, position, &status);
   1.255 +}
   1.256 +
   1.257 +int32_t StringSearch::getOffset(void) const
   1.258 +{
   1.259 +    return usearch_getOffset(m_strsrch_);
   1.260 +}
   1.261 +
   1.262 +void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
   1.263 +{
   1.264 +    if (U_SUCCESS(status)) {
   1.265 +        m_text_ = text;
   1.266 +        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
   1.267 +    }
   1.268 +}
   1.269 +
   1.270 +void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
   1.271 +{
   1.272 +    if (U_SUCCESS(status)) {
   1.273 +        text.getText(m_text_);
   1.274 +        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
   1.275 +    }
   1.276 +}
   1.277 +
   1.278 +RuleBasedCollator * StringSearch::getCollator() const
   1.279 +{
   1.280 +    return (RuleBasedCollator *)&m_collator_;
   1.281 +}
   1.282 +
   1.283 +void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
   1.284 +{
   1.285 +    if (U_SUCCESS(status)) {
   1.286 +        usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
   1.287 +        // Alias the collator
   1.288 +        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   1.289 +    }
   1.290 +}
   1.291 +
   1.292 +void StringSearch::setPattern(const UnicodeString &pattern,
   1.293 +                                    UErrorCode    &status)
   1.294 +{
   1.295 +    if (U_SUCCESS(status)) {
   1.296 +        m_pattern_ = pattern;
   1.297 +        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
   1.298 +                           &status);
   1.299 +    }
   1.300 +}
   1.301 +
   1.302 +const UnicodeString & StringSearch::getPattern() const
   1.303 +{
   1.304 +    return m_pattern_;
   1.305 +}
   1.306 +
   1.307 +// public methods ----------------------------------------------------
   1.308 +
   1.309 +void StringSearch::reset()
   1.310 +{
   1.311 +    usearch_reset(m_strsrch_);
   1.312 +}
   1.313 +
   1.314 +SearchIterator * StringSearch::safeClone(void) const
   1.315 +{
   1.316 +    UErrorCode status = U_ZERO_ERROR;
   1.317 +    StringSearch *result = new StringSearch(m_pattern_, m_text_,
   1.318 +                                            (RuleBasedCollator *)&m_collator_,
   1.319 +                                            m_breakiterator_,
   1.320 +                                            status);
   1.321 +    /* test for NULL */
   1.322 +    if (result == 0) {
   1.323 +        status = U_MEMORY_ALLOCATION_ERROR;
   1.324 +        return 0;
   1.325 +    }
   1.326 +    result->setOffset(getOffset(), status);
   1.327 +    result->setMatchStart(m_strsrch_->search->matchedIndex);
   1.328 +    result->setMatchLength(m_strsrch_->search->matchedLength);
   1.329 +    if (U_FAILURE(status)) {
   1.330 +        return NULL;
   1.331 +    }
   1.332 +    return result;
   1.333 +}
   1.334 +
   1.335 +// protected method -------------------------------------------------
   1.336 +
   1.337 +int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
   1.338 +{
   1.339 +    // values passed here are already in the pre-shift position
   1.340 +    if (U_SUCCESS(status)) {
   1.341 +        if (m_strsrch_->pattern.CELength == 0) {
   1.342 +            m_search_->matchedIndex =
   1.343 +                                    m_search_->matchedIndex == USEARCH_DONE ?
   1.344 +                                    getOffset() : m_search_->matchedIndex + 1;
   1.345 +            m_search_->matchedLength = 0;
   1.346 +            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
   1.347 +                           &status);
   1.348 +            if (m_search_->matchedIndex == m_search_->textLength) {
   1.349 +                m_search_->matchedIndex = USEARCH_DONE;
   1.350 +            }
   1.351 +        }
   1.352 +        else {
   1.353 +            // looking at usearch.cpp, this part is shifted out to
   1.354 +            // StringSearch instead of SearchIterator because m_strsrch_ is
   1.355 +            // not accessible in SearchIterator
   1.356 +#if 0
   1.357 +            if (position + m_strsrch_->pattern.defaultShiftSize
   1.358 +                > m_search_->textLength) {
   1.359 +                setMatchNotFound();
   1.360 +                return USEARCH_DONE;
   1.361 +            }
   1.362 +#endif
   1.363 +            if (m_search_->matchedLength <= 0) {
   1.364 +                // the flipping direction issue has already been handled
   1.365 +                // in next()
   1.366 +                // for boundary check purposes. this will ensure that the
   1.367 +                // next match will not preceed the current offset
   1.368 +                // note search->matchedIndex will always be set to something
   1.369 +                // in the code
   1.370 +                m_search_->matchedIndex = position - 1;
   1.371 +            }
   1.372 +
   1.373 +            ucol_setOffset(m_strsrch_->textIter, position, &status);
   1.374 +            
   1.375 +#if 0
   1.376 +            for (;;) {
   1.377 +                if (m_search_->isCanonicalMatch) {
   1.378 +                    // can't use exact here since extra accents are allowed.
   1.379 +                    usearch_handleNextCanonical(m_strsrch_, &status);
   1.380 +                }
   1.381 +                else {
   1.382 +                    usearch_handleNextExact(m_strsrch_, &status);
   1.383 +                }
   1.384 +                if (U_FAILURE(status)) {
   1.385 +                    return USEARCH_DONE;
   1.386 +                }
   1.387 +                if (m_breakiterator_ == NULL
   1.388 +#if !UCONFIG_NO_BREAK_ITERATION
   1.389 +                    ||
   1.390 +                    m_search_->matchedIndex == USEARCH_DONE ||
   1.391 +                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
   1.392 +                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
   1.393 +                                                  m_search_->matchedLength))
   1.394 +#endif
   1.395 +                ) {
   1.396 +                    if (m_search_->matchedIndex == USEARCH_DONE) {
   1.397 +                        ucol_setOffset(m_strsrch_->textIter,
   1.398 +                                       m_search_->textLength, &status);
   1.399 +                    }
   1.400 +                    else {
   1.401 +                        ucol_setOffset(m_strsrch_->textIter,
   1.402 +                                       m_search_->matchedIndex, &status);
   1.403 +                    }
   1.404 +                    return m_search_->matchedIndex;
   1.405 +                }
   1.406 +            }
   1.407 +#else
   1.408 +            // if m_strsrch_->breakIter is always the same as m_breakiterator_
   1.409 +            // then we don't need to check the match boundaries here because
   1.410 +            // usearch_handleNextXXX will already have done it.
   1.411 +            if (m_search_->isCanonicalMatch) {
   1.412 +            	// *could* actually use exact here 'cause no extra accents allowed...
   1.413 +            	usearch_handleNextCanonical(m_strsrch_, &status);
   1.414 +            } else {
   1.415 +            	usearch_handleNextExact(m_strsrch_, &status);
   1.416 +            }
   1.417 +            
   1.418 +            if (U_FAILURE(status)) {
   1.419 +            	return USEARCH_DONE;
   1.420 +            }
   1.421 +            
   1.422 +            if (m_search_->matchedIndex == USEARCH_DONE) {
   1.423 +            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
   1.424 +            } else {
   1.425 +            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
   1.426 +            }
   1.427 +            
   1.428 +            return m_search_->matchedIndex;
   1.429 +#endif
   1.430 +        }
   1.431 +    }
   1.432 +    return USEARCH_DONE;
   1.433 +}
   1.434 +
   1.435 +int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
   1.436 +{
   1.437 +    // values passed here are already in the pre-shift position
   1.438 +    if (U_SUCCESS(status)) {
   1.439 +        if (m_strsrch_->pattern.CELength == 0) {
   1.440 +            m_search_->matchedIndex =
   1.441 +                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
   1.442 +                   m_search_->matchedIndex);
   1.443 +            if (m_search_->matchedIndex == 0) {
   1.444 +                setMatchNotFound();
   1.445 +            }
   1.446 +            else {
   1.447 +                m_search_->matchedIndex --;
   1.448 +                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
   1.449 +                               &status);
   1.450 +                m_search_->matchedLength = 0;
   1.451 +            }
   1.452 +        }
   1.453 +        else {
   1.454 +            // looking at usearch.cpp, this part is shifted out to
   1.455 +            // StringSearch instead of SearchIterator because m_strsrch_ is
   1.456 +            // not accessible in SearchIterator
   1.457 +#if 0
   1.458 +            if (!m_search_->isOverlap &&
   1.459 +                position - m_strsrch_->pattern.defaultShiftSize < 0) {
   1.460 +                setMatchNotFound();
   1.461 +                return USEARCH_DONE;
   1.462 +            }
   1.463 +            
   1.464 +            for (;;) {
   1.465 +                if (m_search_->isCanonicalMatch) {
   1.466 +                    // can't use exact here since extra accents are allowed.
   1.467 +                    usearch_handlePreviousCanonical(m_strsrch_, &status);
   1.468 +                }
   1.469 +                else {
   1.470 +                    usearch_handlePreviousExact(m_strsrch_, &status);
   1.471 +                }
   1.472 +                if (U_FAILURE(status)) {
   1.473 +                    return USEARCH_DONE;
   1.474 +                }
   1.475 +                if (m_breakiterator_ == NULL
   1.476 +#if !UCONFIG_NO_BREAK_ITERATION
   1.477 +                    ||
   1.478 +                    m_search_->matchedIndex == USEARCH_DONE ||
   1.479 +                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
   1.480 +                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
   1.481 +                                                  m_search_->matchedLength))
   1.482 +#endif
   1.483 +                ) {
   1.484 +                    return m_search_->matchedIndex;
   1.485 +                }
   1.486 +            }
   1.487 +#else
   1.488 +            ucol_setOffset(m_strsrch_->textIter, position, &status);
   1.489 +            
   1.490 +            if (m_search_->isCanonicalMatch) {
   1.491 +            	// *could* use exact match here since extra accents *not* allowed!
   1.492 +            	usearch_handlePreviousCanonical(m_strsrch_, &status);
   1.493 +            } else {
   1.494 +            	usearch_handlePreviousExact(m_strsrch_, &status);
   1.495 +            }
   1.496 +            
   1.497 +            if (U_FAILURE(status)) {
   1.498 +            	return USEARCH_DONE;
   1.499 +            }
   1.500 +            
   1.501 +            return m_search_->matchedIndex;
   1.502 +#endif
   1.503 +        }
   1.504 +
   1.505 +        return m_search_->matchedIndex;
   1.506 +    }
   1.507 +    return USEARCH_DONE;
   1.508 +}
   1.509 +
   1.510 +U_NAMESPACE_END
   1.511 +
   1.512 +#endif /* #if !UCONFIG_NO_COLLATION */

mercurial