intl/icu/source/i18n/search.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
     4 **********************************************************************
     5 *   Date        Name        Description
     6 *  03/22/2000   helena      Creation.
     7 **********************************************************************
     8 */
    10 #include "unicode/utypes.h"
    12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
    14 #include "unicode/brkiter.h"
    15 #include "unicode/schriter.h"
    16 #include "unicode/search.h"
    17 #include "usrchimp.h"
    18 #include "cmemory.h"
    20 // public constructors and destructors -----------------------------------
    21 U_NAMESPACE_BEGIN
    23 SearchIterator::SearchIterator(const SearchIterator &other)
    24     : UObject(other)
    25 {   
    26     m_breakiterator_            = other.m_breakiterator_;
    27     m_text_                     = other.m_text_;
    28     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));   
    29     m_search_->breakIter        = other.m_search_->breakIter;
    30     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
    31     m_search_->isOverlap        = other.m_search_->isOverlap;
    32     m_search_->elementComparisonType = other.m_search_->elementComparisonType;
    33     m_search_->matchedIndex     = other.m_search_->matchedIndex;
    34     m_search_->matchedLength    = other.m_search_->matchedLength;
    35     m_search_->text             = other.m_search_->text;
    36     m_search_->textLength       = other.m_search_->textLength;
    37 }
    39 SearchIterator::~SearchIterator()
    40 {
    41     if (m_search_ != NULL) {
    42         uprv_free(m_search_);
    43     }
    44 }
    46 // public get and set methods ----------------------------------------
    48 void SearchIterator::setAttribute(USearchAttribute       attribute,
    49                                   USearchAttributeValue  value,
    50                                   UErrorCode            &status)
    51 {
    52     if (U_SUCCESS(status)) {
    53         switch (attribute)
    54         {
    55         case USEARCH_OVERLAP :
    56             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
    57             break;
    58         case USEARCH_CANONICAL_MATCH :
    59             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
    60             break;
    61         case USEARCH_ELEMENT_COMPARISON :
    62             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
    63                 m_search_->elementComparisonType = (int16_t)value;
    64             } else {
    65                 m_search_->elementComparisonType = 0;
    66             }
    67             break;
    68         default:
    69             status = U_ILLEGAL_ARGUMENT_ERROR;
    70         }
    71     }
    72     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
    73         status = U_ILLEGAL_ARGUMENT_ERROR;
    74     }
    75 }
    77 USearchAttributeValue SearchIterator::getAttribute(
    78                                           USearchAttribute  attribute) const
    79 {
    80     switch (attribute) {
    81     case USEARCH_OVERLAP :
    82         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
    83     case USEARCH_CANONICAL_MATCH :
    84         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 
    85                                                                 USEARCH_OFF);
    86     case USEARCH_ELEMENT_COMPARISON :
    87         {
    88             int16_t value = m_search_->elementComparisonType;
    89             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
    90                 return (USearchAttributeValue)value;
    91             } else {
    92                 return USEARCH_STANDARD_ELEMENT_COMPARISON;
    93             }
    94         }
    95     default :
    96         return USEARCH_DEFAULT;
    97     }
    98 }
   100 int32_t SearchIterator::getMatchedStart() const
   101 {
   102     return m_search_->matchedIndex;
   103 }
   105 int32_t SearchIterator::getMatchedLength() const
   106 {
   107     return m_search_->matchedLength;
   108 }
   110 void SearchIterator::getMatchedText(UnicodeString &result) const
   111 {
   112     int32_t matchedindex  = m_search_->matchedIndex;
   113     int32_t     matchedlength = m_search_->matchedLength;
   114     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
   115         result.setTo(m_search_->text + matchedindex, matchedlength); 
   116     }
   117     else {
   118         result.remove();
   119     }
   120 }
   122 void SearchIterator::setBreakIterator(BreakIterator *breakiter, 
   123                                       UErrorCode &status)
   124 {
   125     if (U_SUCCESS(status)) {
   126 #if 0
   127         m_search_->breakIter = NULL;
   128         // the c++ breakiterator may not make use of ubreakiterator.
   129         // so we'll have to keep track of it ourselves.
   130 #else
   131         // Well, gee... the Constructors that take a BreakIterator
   132         // all cast the BreakIterator to a UBreakIterator and
   133         // pass it to the corresponding usearch_openFromXXX
   134         // routine, so there's no reason not to do this.
   135         //
   136         // Besides, a UBreakIterator is a BreakIterator, so
   137         // any subclass of BreakIterator should work fine here...
   138         m_search_->breakIter = (UBreakIterator *) breakiter;
   139 #endif
   141         m_breakiterator_ = breakiter;
   142     }
   143 }
   145 const BreakIterator * SearchIterator::getBreakIterator(void) const
   146 {
   147     return m_breakiterator_;
   148 }
   150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
   151 {
   152     if (U_SUCCESS(status)) {
   153         if (text.length() == 0) {
   154             status = U_ILLEGAL_ARGUMENT_ERROR;
   155         }
   156         else {
   157             m_text_        = text;
   158             m_search_->text = m_text_.getBuffer();
   159             m_search_->textLength = m_text_.length();
   160         }
   161     }
   162 }
   164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
   165 {
   166     if (U_SUCCESS(status)) {
   167         text.getText(m_text_);
   168         setText(m_text_, status);
   169     }
   170 }
   172 const UnicodeString & SearchIterator::getText(void) const
   173 {
   174     return m_text_;
   175 }
   177 // operator overloading ----------------------------------------------
   179 UBool SearchIterator::operator==(const SearchIterator &that) const
   180 {
   181     if (this == &that) {
   182         return TRUE;
   183     }
   184     return (m_breakiterator_            == that.m_breakiterator_ &&
   185             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
   186             m_search_->isOverlap        == that.m_search_->isOverlap &&
   187             m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
   188             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
   189             m_search_->matchedLength    == that.m_search_->matchedLength &&
   190             m_search_->textLength       == that.m_search_->textLength &&
   191             getOffset() == that.getOffset() &&
   192             (uprv_memcmp(m_search_->text, that.m_search_->text, 
   193                               m_search_->textLength * sizeof(UChar)) == 0));
   194 }
   196 // public methods ----------------------------------------------------
   198 int32_t SearchIterator::first(UErrorCode &status)
   199 {
   200     if (U_FAILURE(status)) {
   201         return USEARCH_DONE;
   202     }
   203     setOffset(0, status);
   204     return handleNext(0, status);
   205 }
   207 int32_t SearchIterator::following(int32_t position, 
   208                                       UErrorCode &status)
   209 {
   210     if (U_FAILURE(status)) {
   211         return USEARCH_DONE;
   212     }
   213     setOffset(position, status);
   214     return handleNext(position, status);
   215 }
   217 int32_t SearchIterator::last(UErrorCode &status)
   218 {
   219     if (U_FAILURE(status)) {
   220         return USEARCH_DONE;
   221     }
   222     setOffset(m_search_->textLength, status);
   223     return handlePrev(m_search_->textLength, status);
   224 }
   226 int32_t SearchIterator::preceding(int32_t position, 
   227                                       UErrorCode &status)
   228 {
   229     if (U_FAILURE(status)) {
   230         return USEARCH_DONE;
   231     }
   232     setOffset(position, status);
   233     return handlePrev(position, status);
   234 }
   236 int32_t SearchIterator::next(UErrorCode &status)
   237 {
   238     if (U_SUCCESS(status)) {
   239         int32_t offset = getOffset();
   240         int32_t matchindex  = m_search_->matchedIndex;
   241         int32_t     matchlength = m_search_->matchedLength;
   242         m_search_->reset = FALSE;
   243         if (m_search_->isForwardSearching == TRUE) {
   244             int32_t textlength = m_search_->textLength;
   245             if (offset == textlength || matchindex == textlength || 
   246                 (matchindex != USEARCH_DONE && 
   247                 matchindex + matchlength >= textlength)) {
   248                 // not enough characters to match
   249                 setMatchNotFound();
   250                 return USEARCH_DONE; 
   251             }
   252         }
   253         else {
   254             // switching direction. 
   255             // if matchedIndex == USEARCH_DONE, it means that either a 
   256             // setOffset has been called or that previous ran off the text
   257             // string. the iterator would have been set to offset 0 if a 
   258             // match is not found.
   259             m_search_->isForwardSearching = TRUE;
   260             if (m_search_->matchedIndex != USEARCH_DONE) {
   261                 // there's no need to set the collation element iterator
   262                 // the next call to next will set the offset.
   263                 return matchindex;
   264             }
   265         }
   267         if (matchlength > 0) {
   268             // if matchlength is 0 we are at the start of the iteration
   269             if (m_search_->isOverlap) {
   270                 offset ++;
   271             }
   272             else {
   273                 offset += matchlength;
   274             }
   275         }
   276         return handleNext(offset, status);
   277     }
   278     return USEARCH_DONE;
   279 }
   281 int32_t SearchIterator::previous(UErrorCode &status)
   282 {
   283     if (U_SUCCESS(status)) {
   284         int32_t offset;
   285         if (m_search_->reset) {
   286             offset                       = m_search_->textLength;
   287             m_search_->isForwardSearching = FALSE;
   288             m_search_->reset              = FALSE;
   289             setOffset(offset, status);
   290         }
   291         else {
   292             offset = getOffset();
   293         }
   295         int32_t matchindex = m_search_->matchedIndex;
   296         if (m_search_->isForwardSearching == TRUE) {
   297             // switching direction. 
   298             // if matchedIndex == USEARCH_DONE, it means that either a 
   299             // setOffset has been called or that next ran off the text
   300             // string. the iterator would have been set to offset textLength if 
   301             // a match is not found.
   302             m_search_->isForwardSearching = FALSE;
   303             if (matchindex != USEARCH_DONE) {
   304                 return matchindex;
   305             }
   306         }
   307         else {
   308             if (offset == 0 || matchindex == 0) {
   309                 // not enough characters to match
   310                 setMatchNotFound();
   311                 return USEARCH_DONE; 
   312             }
   313         }
   315         if (matchindex != USEARCH_DONE) {
   316             if (m_search_->isOverlap) {
   317                 matchindex += m_search_->matchedLength - 2;
   318             }
   320             return handlePrev(matchindex, status); 
   321         }
   323         return handlePrev(offset, status);
   324     }
   326     return USEARCH_DONE;
   327 }
   329 void SearchIterator::reset()
   330 {
   331     UErrorCode status = U_ZERO_ERROR;
   332     setMatchNotFound();
   333     setOffset(0, status);
   334     m_search_->isOverlap          = FALSE;
   335     m_search_->isCanonicalMatch   = FALSE;
   336     m_search_->elementComparisonType = 0;
   337     m_search_->isForwardSearching = TRUE;
   338     m_search_->reset              = TRUE;
   339 }
   341 // protected constructors and destructors -----------------------------
   343 SearchIterator::SearchIterator()
   344 {
   345     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   346     m_search_->breakIter          = NULL;
   347     m_search_->isOverlap          = FALSE;
   348     m_search_->isCanonicalMatch   = FALSE;
   349     m_search_->elementComparisonType = 0;
   350     m_search_->isForwardSearching = TRUE;
   351     m_search_->reset              = TRUE;
   352     m_search_->matchedIndex       = USEARCH_DONE;
   353     m_search_->matchedLength      = 0;
   354     m_search_->text               = NULL;
   355     m_search_->textLength         = 0;
   356     m_breakiterator_              = NULL;
   357 }
   359 SearchIterator::SearchIterator(const UnicodeString &text, 
   360                                      BreakIterator *breakiter) :
   361                                      m_breakiterator_(breakiter),
   362                                      m_text_(text)
   363 {
   364     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   365     m_search_->breakIter          = NULL;
   366     m_search_->isOverlap          = FALSE;
   367     m_search_->isCanonicalMatch   = FALSE;
   368     m_search_->elementComparisonType = 0;
   369     m_search_->isForwardSearching = TRUE;
   370     m_search_->reset              = TRUE;
   371     m_search_->matchedIndex       = USEARCH_DONE;
   372     m_search_->matchedLength      = 0;
   373     m_search_->text               = m_text_.getBuffer();
   374     m_search_->textLength         = text.length();
   375 }
   377 SearchIterator::SearchIterator(CharacterIterator &text, 
   378                                BreakIterator     *breakiter) :
   379                                m_breakiterator_(breakiter)
   380 {
   381     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
   382     m_search_->breakIter          = NULL;
   383     m_search_->isOverlap          = FALSE;
   384     m_search_->isCanonicalMatch   = FALSE;
   385     m_search_->elementComparisonType = 0;
   386     m_search_->isForwardSearching = TRUE;
   387     m_search_->reset              = TRUE;
   388     m_search_->matchedIndex       = USEARCH_DONE;
   389     m_search_->matchedLength      = 0;
   390     text.getText(m_text_);
   391     m_search_->text               = m_text_.getBuffer();
   392     m_search_->textLength         = m_text_.length();
   393     m_breakiterator_             = breakiter;
   394 }
   396 // protected methods ------------------------------------------------------
   398 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
   399 {
   400     if (this != &that) {
   401         m_breakiterator_            = that.m_breakiterator_;
   402         m_text_                     = that.m_text_;
   403         m_search_->breakIter        = that.m_search_->breakIter;
   404         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
   405         m_search_->isOverlap        = that.m_search_->isOverlap;
   406         m_search_->elementComparisonType = that.m_search_->elementComparisonType;
   407         m_search_->matchedIndex     = that.m_search_->matchedIndex;
   408         m_search_->matchedLength    = that.m_search_->matchedLength;
   409         m_search_->text             = that.m_search_->text;
   410         m_search_->textLength       = that.m_search_->textLength;
   411     }
   412     return *this;
   413 }
   415 void SearchIterator::setMatchLength(int32_t length)
   416 {
   417     m_search_->matchedLength = length;
   418 }
   420 void SearchIterator::setMatchStart(int32_t position)
   421 {
   422     m_search_->matchedIndex = position;
   423 }
   425 void SearchIterator::setMatchNotFound() 
   426 {
   427     setMatchStart(USEARCH_DONE);
   428     setMatchLength(0);
   429     UErrorCode status = U_ZERO_ERROR;
   430     // by default no errors should be returned here since offsets are within 
   431     // range.
   432     if (m_search_->isForwardSearching) {
   433         setOffset(m_search_->textLength, status);
   434     }
   435     else {
   436         setOffset(0, status);
   437     }
   438 }
   441 U_NAMESPACE_END
   443 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial