intl/icu/source/i18n/search.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/search.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,443 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
     1.7 +**********************************************************************
     1.8 +*   Date        Name        Description
     1.9 +*  03/22/2000   helena      Creation.
    1.10 +**********************************************************************
    1.11 +*/
    1.12 +
    1.13 +#include "unicode/utypes.h"
    1.14 +
    1.15 +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
    1.16 +
    1.17 +#include "unicode/brkiter.h"
    1.18 +#include "unicode/schriter.h"
    1.19 +#include "unicode/search.h"
    1.20 +#include "usrchimp.h"
    1.21 +#include "cmemory.h"
    1.22 +
    1.23 +// public constructors and destructors -----------------------------------
    1.24 +U_NAMESPACE_BEGIN
    1.25 +
    1.26 +SearchIterator::SearchIterator(const SearchIterator &other)
    1.27 +    : UObject(other)
    1.28 +{   
    1.29 +    m_breakiterator_            = other.m_breakiterator_;
    1.30 +    m_text_                     = other.m_text_;
    1.31 +    m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));   
    1.32 +    m_search_->breakIter        = other.m_search_->breakIter;
    1.33 +    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
    1.34 +    m_search_->isOverlap        = other.m_search_->isOverlap;
    1.35 +    m_search_->elementComparisonType = other.m_search_->elementComparisonType;
    1.36 +    m_search_->matchedIndex     = other.m_search_->matchedIndex;
    1.37 +    m_search_->matchedLength    = other.m_search_->matchedLength;
    1.38 +    m_search_->text             = other.m_search_->text;
    1.39 +    m_search_->textLength       = other.m_search_->textLength;
    1.40 +}
    1.41 +
    1.42 +SearchIterator::~SearchIterator()
    1.43 +{
    1.44 +    if (m_search_ != NULL) {
    1.45 +        uprv_free(m_search_);
    1.46 +    }
    1.47 +}
    1.48 +
    1.49 +// public get and set methods ----------------------------------------
    1.50 +
    1.51 +void SearchIterator::setAttribute(USearchAttribute       attribute,
    1.52 +                                  USearchAttributeValue  value,
    1.53 +                                  UErrorCode            &status)
    1.54 +{
    1.55 +    if (U_SUCCESS(status)) {
    1.56 +        switch (attribute)
    1.57 +        {
    1.58 +        case USEARCH_OVERLAP :
    1.59 +            m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
    1.60 +            break;
    1.61 +        case USEARCH_CANONICAL_MATCH :
    1.62 +            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
    1.63 +            break;
    1.64 +        case USEARCH_ELEMENT_COMPARISON :
    1.65 +            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
    1.66 +                m_search_->elementComparisonType = (int16_t)value;
    1.67 +            } else {
    1.68 +                m_search_->elementComparisonType = 0;
    1.69 +            }
    1.70 +            break;
    1.71 +        default:
    1.72 +            status = U_ILLEGAL_ARGUMENT_ERROR;
    1.73 +        }
    1.74 +    }
    1.75 +    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
    1.76 +        status = U_ILLEGAL_ARGUMENT_ERROR;
    1.77 +    }
    1.78 +}
    1.79 +
    1.80 +USearchAttributeValue SearchIterator::getAttribute(
    1.81 +                                          USearchAttribute  attribute) const
    1.82 +{
    1.83 +    switch (attribute) {
    1.84 +    case USEARCH_OVERLAP :
    1.85 +        return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
    1.86 +    case USEARCH_CANONICAL_MATCH :
    1.87 +        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 
    1.88 +                                                                USEARCH_OFF);
    1.89 +    case USEARCH_ELEMENT_COMPARISON :
    1.90 +        {
    1.91 +            int16_t value = m_search_->elementComparisonType;
    1.92 +            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
    1.93 +                return (USearchAttributeValue)value;
    1.94 +            } else {
    1.95 +                return USEARCH_STANDARD_ELEMENT_COMPARISON;
    1.96 +            }
    1.97 +        }
    1.98 +    default :
    1.99 +        return USEARCH_DEFAULT;
   1.100 +    }
   1.101 +}
   1.102 +    
   1.103 +int32_t SearchIterator::getMatchedStart() const
   1.104 +{
   1.105 +    return m_search_->matchedIndex;
   1.106 +}
   1.107 +
   1.108 +int32_t SearchIterator::getMatchedLength() const
   1.109 +{
   1.110 +    return m_search_->matchedLength;
   1.111 +}
   1.112 +    
   1.113 +void SearchIterator::getMatchedText(UnicodeString &result) const
   1.114 +{
   1.115 +    int32_t matchedindex  = m_search_->matchedIndex;
   1.116 +    int32_t     matchedlength = m_search_->matchedLength;
   1.117 +    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
   1.118 +        result.setTo(m_search_->text + matchedindex, matchedlength); 
   1.119 +    }
   1.120 +    else {
   1.121 +        result.remove();
   1.122 +    }
   1.123 +}
   1.124 +    
   1.125 +void SearchIterator::setBreakIterator(BreakIterator *breakiter, 
   1.126 +                                      UErrorCode &status)
   1.127 +{
   1.128 +    if (U_SUCCESS(status)) {
   1.129 +#if 0
   1.130 +        m_search_->breakIter = NULL;
   1.131 +        // the c++ breakiterator may not make use of ubreakiterator.
   1.132 +        // so we'll have to keep track of it ourselves.
   1.133 +#else
   1.134 +        // Well, gee... the Constructors that take a BreakIterator
   1.135 +        // all cast the BreakIterator to a UBreakIterator and
   1.136 +        // pass it to the corresponding usearch_openFromXXX
   1.137 +        // routine, so there's no reason not to do this.
   1.138 +        //
   1.139 +        // Besides, a UBreakIterator is a BreakIterator, so
   1.140 +        // any subclass of BreakIterator should work fine here...
   1.141 +        m_search_->breakIter = (UBreakIterator *) breakiter;
   1.142 +#endif
   1.143 +        
   1.144 +        m_breakiterator_ = breakiter;
   1.145 +    }
   1.146 +}
   1.147 +    
   1.148 +const BreakIterator * SearchIterator::getBreakIterator(void) const
   1.149 +{
   1.150 +    return m_breakiterator_;
   1.151 +}
   1.152 +
   1.153 +void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
   1.154 +{
   1.155 +    if (U_SUCCESS(status)) {
   1.156 +        if (text.length() == 0) {
   1.157 +            status = U_ILLEGAL_ARGUMENT_ERROR;
   1.158 +        }
   1.159 +        else {
   1.160 +            m_text_        = text;
   1.161 +            m_search_->text = m_text_.getBuffer();
   1.162 +            m_search_->textLength = m_text_.length();
   1.163 +        }
   1.164 +    }
   1.165 +}
   1.166 +
   1.167 +void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
   1.168 +{
   1.169 +    if (U_SUCCESS(status)) {
   1.170 +        text.getText(m_text_);
   1.171 +        setText(m_text_, status);
   1.172 +    }
   1.173 +}
   1.174 +    
   1.175 +const UnicodeString & SearchIterator::getText(void) const
   1.176 +{
   1.177 +    return m_text_;
   1.178 +}
   1.179 +
   1.180 +// operator overloading ----------------------------------------------
   1.181 +
   1.182 +UBool SearchIterator::operator==(const SearchIterator &that) const
   1.183 +{
   1.184 +    if (this == &that) {
   1.185 +        return TRUE;
   1.186 +    }
   1.187 +    return (m_breakiterator_            == that.m_breakiterator_ &&
   1.188 +            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
   1.189 +            m_search_->isOverlap        == that.m_search_->isOverlap &&
   1.190 +            m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
   1.191 +            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
   1.192 +            m_search_->matchedLength    == that.m_search_->matchedLength &&
   1.193 +            m_search_->textLength       == that.m_search_->textLength &&
   1.194 +            getOffset() == that.getOffset() &&
   1.195 +            (uprv_memcmp(m_search_->text, that.m_search_->text, 
   1.196 +                              m_search_->textLength * sizeof(UChar)) == 0));
   1.197 +}
   1.198 +
   1.199 +// public methods ----------------------------------------------------
   1.200 +
   1.201 +int32_t SearchIterator::first(UErrorCode &status)
   1.202 +{
   1.203 +    if (U_FAILURE(status)) {
   1.204 +        return USEARCH_DONE;
   1.205 +    }
   1.206 +    setOffset(0, status);
   1.207 +    return handleNext(0, status);
   1.208 +}
   1.209 +
   1.210 +int32_t SearchIterator::following(int32_t position, 
   1.211 +                                      UErrorCode &status)
   1.212 +{
   1.213 +    if (U_FAILURE(status)) {
   1.214 +        return USEARCH_DONE;
   1.215 +    }
   1.216 +    setOffset(position, status);
   1.217 +    return handleNext(position, status);
   1.218 +}
   1.219 +    
   1.220 +int32_t SearchIterator::last(UErrorCode &status)
   1.221 +{
   1.222 +    if (U_FAILURE(status)) {
   1.223 +        return USEARCH_DONE;
   1.224 +    }
   1.225 +    setOffset(m_search_->textLength, status);
   1.226 +    return handlePrev(m_search_->textLength, status);
   1.227 +}
   1.228 +
   1.229 +int32_t SearchIterator::preceding(int32_t position, 
   1.230 +                                      UErrorCode &status)
   1.231 +{
   1.232 +    if (U_FAILURE(status)) {
   1.233 +        return USEARCH_DONE;
   1.234 +    }
   1.235 +    setOffset(position, status);
   1.236 +    return handlePrev(position, status);
   1.237 +}
   1.238 +
   1.239 +int32_t SearchIterator::next(UErrorCode &status)
   1.240 +{
   1.241 +    if (U_SUCCESS(status)) {
   1.242 +        int32_t offset = getOffset();
   1.243 +        int32_t matchindex  = m_search_->matchedIndex;
   1.244 +        int32_t     matchlength = m_search_->matchedLength;
   1.245 +        m_search_->reset = FALSE;
   1.246 +        if (m_search_->isForwardSearching == TRUE) {
   1.247 +            int32_t textlength = m_search_->textLength;
   1.248 +            if (offset == textlength || matchindex == textlength || 
   1.249 +                (matchindex != USEARCH_DONE && 
   1.250 +                matchindex + matchlength >= textlength)) {
   1.251 +                // not enough characters to match
   1.252 +                setMatchNotFound();
   1.253 +                return USEARCH_DONE; 
   1.254 +            }
   1.255 +        }
   1.256 +        else {
   1.257 +            // switching direction. 
   1.258 +            // if matchedIndex == USEARCH_DONE, it means that either a 
   1.259 +            // setOffset has been called or that previous ran off the text
   1.260 +            // string. the iterator would have been set to offset 0 if a 
   1.261 +            // match is not found.
   1.262 +            m_search_->isForwardSearching = TRUE;
   1.263 +            if (m_search_->matchedIndex != USEARCH_DONE) {
   1.264 +                // there's no need to set the collation element iterator
   1.265 +                // the next call to next will set the offset.
   1.266 +                return matchindex;
   1.267 +            }
   1.268 +        }
   1.269 +
   1.270 +        if (matchlength > 0) {
   1.271 +            // if matchlength is 0 we are at the start of the iteration
   1.272 +            if (m_search_->isOverlap) {
   1.273 +                offset ++;
   1.274 +            }
   1.275 +            else {
   1.276 +                offset += matchlength;
   1.277 +            }
   1.278 +        }
   1.279 +        return handleNext(offset, status);
   1.280 +    }
   1.281 +    return USEARCH_DONE;
   1.282 +}
   1.283 +
   1.284 +int32_t SearchIterator::previous(UErrorCode &status)
   1.285 +{
   1.286 +    if (U_SUCCESS(status)) {
   1.287 +        int32_t offset;
   1.288 +        if (m_search_->reset) {
   1.289 +            offset                       = m_search_->textLength;
   1.290 +            m_search_->isForwardSearching = FALSE;
   1.291 +            m_search_->reset              = FALSE;
   1.292 +            setOffset(offset, status);
   1.293 +        }
   1.294 +        else {
   1.295 +            offset = getOffset();
   1.296 +        }
   1.297 +        
   1.298 +        int32_t matchindex = m_search_->matchedIndex;
   1.299 +        if (m_search_->isForwardSearching == TRUE) {
   1.300 +            // switching direction. 
   1.301 +            // if matchedIndex == USEARCH_DONE, it means that either a 
   1.302 +            // setOffset has been called or that next ran off the text
   1.303 +            // string. the iterator would have been set to offset textLength if 
   1.304 +            // a match is not found.
   1.305 +            m_search_->isForwardSearching = FALSE;
   1.306 +            if (matchindex != USEARCH_DONE) {
   1.307 +                return matchindex;
   1.308 +            }
   1.309 +        }
   1.310 +        else {
   1.311 +            if (offset == 0 || matchindex == 0) {
   1.312 +                // not enough characters to match
   1.313 +                setMatchNotFound();
   1.314 +                return USEARCH_DONE; 
   1.315 +            }
   1.316 +        }
   1.317 +
   1.318 +        if (matchindex != USEARCH_DONE) {
   1.319 +            if (m_search_->isOverlap) {
   1.320 +                matchindex += m_search_->matchedLength - 2;
   1.321 +            }
   1.322 +
   1.323 +            return handlePrev(matchindex, status); 
   1.324 +        }
   1.325 +
   1.326 +        return handlePrev(offset, status);
   1.327 +    }
   1.328 +
   1.329 +    return USEARCH_DONE;
   1.330 +}
   1.331 +
   1.332 +void SearchIterator::reset()
   1.333 +{
   1.334 +    UErrorCode status = U_ZERO_ERROR;
   1.335 +    setMatchNotFound();
   1.336 +    setOffset(0, status);
   1.337 +    m_search_->isOverlap          = FALSE;
   1.338 +    m_search_->isCanonicalMatch   = FALSE;
   1.339 +    m_search_->elementComparisonType = 0;
   1.340 +    m_search_->isForwardSearching = TRUE;
   1.341 +    m_search_->reset              = TRUE;
   1.342 +}
   1.343 +
   1.344 +// protected constructors and destructors -----------------------------
   1.345 +
   1.346 +SearchIterator::SearchIterator()
   1.347 +{
   1.348 +    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   1.349 +    m_search_->breakIter          = NULL;
   1.350 +    m_search_->isOverlap          = FALSE;
   1.351 +    m_search_->isCanonicalMatch   = FALSE;
   1.352 +    m_search_->elementComparisonType = 0;
   1.353 +    m_search_->isForwardSearching = TRUE;
   1.354 +    m_search_->reset              = TRUE;
   1.355 +    m_search_->matchedIndex       = USEARCH_DONE;
   1.356 +    m_search_->matchedLength      = 0;
   1.357 +    m_search_->text               = NULL;
   1.358 +    m_search_->textLength         = 0;
   1.359 +    m_breakiterator_              = NULL;
   1.360 +}
   1.361 +
   1.362 +SearchIterator::SearchIterator(const UnicodeString &text, 
   1.363 +                                     BreakIterator *breakiter) :
   1.364 +                                     m_breakiterator_(breakiter),
   1.365 +                                     m_text_(text)
   1.366 +{
   1.367 +    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   1.368 +    m_search_->breakIter          = NULL;
   1.369 +    m_search_->isOverlap          = FALSE;
   1.370 +    m_search_->isCanonicalMatch   = FALSE;
   1.371 +    m_search_->elementComparisonType = 0;
   1.372 +    m_search_->isForwardSearching = TRUE;
   1.373 +    m_search_->reset              = TRUE;
   1.374 +    m_search_->matchedIndex       = USEARCH_DONE;
   1.375 +    m_search_->matchedLength      = 0;
   1.376 +    m_search_->text               = m_text_.getBuffer();
   1.377 +    m_search_->textLength         = text.length();
   1.378 +}
   1.379 +
   1.380 +SearchIterator::SearchIterator(CharacterIterator &text, 
   1.381 +                               BreakIterator     *breakiter) :
   1.382 +                               m_breakiterator_(breakiter)
   1.383 +{
   1.384 +    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   1.385 +    m_search_->breakIter          = NULL;
   1.386 +    m_search_->isOverlap          = FALSE;
   1.387 +    m_search_->isCanonicalMatch   = FALSE;
   1.388 +    m_search_->elementComparisonType = 0;
   1.389 +    m_search_->isForwardSearching = TRUE;
   1.390 +    m_search_->reset              = TRUE;
   1.391 +    m_search_->matchedIndex       = USEARCH_DONE;
   1.392 +    m_search_->matchedLength      = 0;
   1.393 +    text.getText(m_text_);
   1.394 +    m_search_->text               = m_text_.getBuffer();
   1.395 +    m_search_->textLength         = m_text_.length();
   1.396 +    m_breakiterator_             = breakiter;
   1.397 +}
   1.398 +
   1.399 +// protected methods ------------------------------------------------------
   1.400 +
   1.401 +SearchIterator & SearchIterator::operator=(const SearchIterator &that)
   1.402 +{
   1.403 +    if (this != &that) {
   1.404 +        m_breakiterator_            = that.m_breakiterator_;
   1.405 +        m_text_                     = that.m_text_;
   1.406 +        m_search_->breakIter        = that.m_search_->breakIter;
   1.407 +        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
   1.408 +        m_search_->isOverlap        = that.m_search_->isOverlap;
   1.409 +        m_search_->elementComparisonType = that.m_search_->elementComparisonType;
   1.410 +        m_search_->matchedIndex     = that.m_search_->matchedIndex;
   1.411 +        m_search_->matchedLength    = that.m_search_->matchedLength;
   1.412 +        m_search_->text             = that.m_search_->text;
   1.413 +        m_search_->textLength       = that.m_search_->textLength;
   1.414 +    }
   1.415 +    return *this;
   1.416 +}
   1.417 +
   1.418 +void SearchIterator::setMatchLength(int32_t length)
   1.419 +{
   1.420 +    m_search_->matchedLength = length;
   1.421 +}
   1.422 +
   1.423 +void SearchIterator::setMatchStart(int32_t position)
   1.424 +{
   1.425 +    m_search_->matchedIndex = position;
   1.426 +}
   1.427 +
   1.428 +void SearchIterator::setMatchNotFound() 
   1.429 +{
   1.430 +    setMatchStart(USEARCH_DONE);
   1.431 +    setMatchLength(0);
   1.432 +    UErrorCode status = U_ZERO_ERROR;
   1.433 +    // by default no errors should be returned here since offsets are within 
   1.434 +    // range.
   1.435 +    if (m_search_->isForwardSearching) {
   1.436 +        setOffset(m_search_->textLength, status);
   1.437 +    }
   1.438 +    else {
   1.439 +        setOffset(0, status);
   1.440 +    }
   1.441 +}
   1.442 +
   1.443 +
   1.444 +U_NAMESPACE_END
   1.445 +
   1.446 +#endif /* #if !UCONFIG_NO_COLLATION */

mercurial