intl/icu/source/i18n/stsearch.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
     4 **********************************************************************
     5 *   Date        Name        Description
     6 *  03/22/2000   helena      Creation.
     7 **********************************************************************
     8 */
    10 #include "unicode/utypes.h"
    12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
    14 #include "unicode/stsearch.h"
    15 #include "usrchimp.h"
    16 #include "cmemory.h"
    18 U_NAMESPACE_BEGIN
    20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
    22 // public constructors and destructors -----------------------------------
    24 StringSearch::StringSearch(const UnicodeString &pattern,
    25                            const UnicodeString &text,
    26                            const Locale        &locale,
    27                                  BreakIterator *breakiter,
    28                                  UErrorCode    &status) :
    29                            SearchIterator(text, breakiter),
    30                            m_collator_(),
    31                            m_pattern_(pattern)
    32 {
    33     if (U_FAILURE(status)) {
    34         m_strsrch_ = NULL;
    35         return;
    36     }
    38     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
    39                               m_text_.getBuffer(), m_text_.length(),
    40                               locale.getName(), (UBreakIterator *)breakiter,
    41                               &status);
    42     uprv_free(m_search_);
    43     m_search_ = NULL;
    45     // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
    46     // wrapper around the internal collator and rules, which (here) are
    47     // owned by this stringsearch object.  this means 1) it's destructor
    48     // _should not_ delete the ucollator or rules, and 2) changes made
    49     // to the exposed collator (setStrength etc) _should_ modify the
    50     // ucollator.  thus the collator is not a copy-on-write alias, and it
    51     // needs to distinguish itself not merely from 'stand alone' colators
    52     // but also from copy-on-write ones.  it needs additional state, which
    53     // setUCollator should set.
    55     if (U_SUCCESS(status)) {
    56         // Alias the collator
    57         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    58         // m_search_ has been created by the base SearchIterator class
    59         m_search_        = m_strsrch_->search;
    60     }
    61 }
    63 StringSearch::StringSearch(const UnicodeString     &pattern,
    64                            const UnicodeString     &text,
    65                                  RuleBasedCollator *coll,
    66                                  BreakIterator     *breakiter,
    67                                  UErrorCode        &status) :
    68                            SearchIterator(text, breakiter),
    69                            m_collator_(),
    70                            m_pattern_(pattern)
    71 {
    72     if (U_FAILURE(status)) {
    73         m_strsrch_ = NULL;
    74         return;
    75     }
    76     if (coll == NULL) {
    77         status     = U_ILLEGAL_ARGUMENT_ERROR;
    78         m_strsrch_ = NULL;
    79         return;
    80     }
    81     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    82                                           m_pattern_.length(),
    83                                           m_text_.getBuffer(),
    84                                           m_text_.length(), coll->ucollator,
    85                                           (UBreakIterator *)breakiter,
    86                                           &status);
    87     uprv_free(m_search_);
    88     m_search_ = NULL;
    90     if (U_SUCCESS(status)) {
    91         // Alias the collator
    92         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    93         // m_search_ has been created by the base SearchIterator class
    94         m_search_ = m_strsrch_->search;
    95     }
    96 }
    98 StringSearch::StringSearch(const UnicodeString     &pattern,
    99                                  CharacterIterator &text,
   100                            const Locale            &locale,
   101                                  BreakIterator     *breakiter,
   102                                  UErrorCode        &status) :
   103                            SearchIterator(text, breakiter),
   104                            m_collator_(),
   105                            m_pattern_(pattern)
   106 {
   107     if (U_FAILURE(status)) {
   108         m_strsrch_ = NULL;
   109         return;
   110     }
   111     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
   112                               m_text_.getBuffer(), m_text_.length(),
   113                               locale.getName(), (UBreakIterator *)breakiter,
   114                               &status);
   115     uprv_free(m_search_);
   116     m_search_ = NULL;
   118     if (U_SUCCESS(status)) {
   119         // Alias the collator
   120         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   121         // m_search_ has been created by the base SearchIterator class
   122         m_search_ = m_strsrch_->search;
   123     }
   124 }
   126 StringSearch::StringSearch(const UnicodeString     &pattern,
   127                                  CharacterIterator &text,
   128                                  RuleBasedCollator *coll,
   129                                  BreakIterator     *breakiter,
   130                                  UErrorCode        &status) :
   131                            SearchIterator(text, breakiter),
   132                            m_collator_(),
   133                            m_pattern_(pattern)
   134 {
   135     if (U_FAILURE(status)) {
   136         m_strsrch_ = NULL;
   137         return;
   138     }
   139     if (coll == NULL) {
   140         status     = U_ILLEGAL_ARGUMENT_ERROR;
   141         m_strsrch_ = NULL;
   142         return;
   143     }
   144     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   145                                           m_pattern_.length(),
   146                                           m_text_.getBuffer(),
   147                                           m_text_.length(), coll->ucollator,
   148                                           (UBreakIterator *)breakiter,
   149                                           &status);
   150     uprv_free(m_search_);
   151     m_search_ = NULL;
   153     if (U_SUCCESS(status)) {
   154         // Alias the collator
   155         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   156         // m_search_ has been created by the base SearchIterator class
   157         m_search_ = m_strsrch_->search;
   158     }
   159 }
   161 StringSearch::StringSearch(const StringSearch &that) :
   162                        SearchIterator(that.m_text_, that.m_breakiterator_),
   163                        m_collator_(),
   164                        m_pattern_(that.m_pattern_)
   165 {
   166     UErrorCode status = U_ZERO_ERROR;
   168     // Free m_search_ from the superclass
   169     uprv_free(m_search_);
   170     m_search_ = NULL;
   172     if (that.m_strsrch_ == NULL) {
   173         // This was not a good copy
   174         m_strsrch_ = NULL;
   175     }
   176     else {
   177         // Make a deep copy
   178         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   179                                               m_pattern_.length(),
   180                                               m_text_.getBuffer(),
   181                                               m_text_.length(),
   182                                               that.m_strsrch_->collator,
   183                                              (UBreakIterator *)that.m_breakiterator_,
   184                                               &status);
   185         if (U_SUCCESS(status)) {
   186             // Alias the collator
   187             m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   188             // m_search_ has been created by the base SearchIterator class
   189             m_search_        = m_strsrch_->search;
   190         }
   191     }
   192 }
   194 StringSearch::~StringSearch()
   195 {
   196     if (m_strsrch_ != NULL) {
   197         usearch_close(m_strsrch_);
   198         m_search_ = NULL;
   199     }
   200 }
   202 StringSearch *
   203 StringSearch::clone() const {
   204     return new StringSearch(*this);
   205 }
   207 // operator overloading ---------------------------------------------
   208 StringSearch & StringSearch::operator=(const StringSearch &that)
   209 {
   210     if ((*this) != that) {
   211         UErrorCode status = U_ZERO_ERROR;
   212         m_text_          = that.m_text_;
   213         m_breakiterator_ = that.m_breakiterator_;
   214         m_pattern_       = that.m_pattern_;
   215         // all m_search_ in the parent class is linked up with m_strsrch_
   216         usearch_close(m_strsrch_);
   217         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
   218                                               m_pattern_.length(),
   219                                               m_text_.getBuffer(),
   220                                               m_text_.length(),
   221                                               that.m_strsrch_->collator,
   222                                               NULL, &status);
   223         // Check null pointer
   224         if (m_strsrch_ != NULL) {
   225 	        // Alias the collator
   226 	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   227 	        m_search_ = m_strsrch_->search;
   228         }
   229     }
   230     return *this;
   231 }
   233 UBool StringSearch::operator==(const SearchIterator &that) const
   234 {
   235     if (this == &that) {
   236         return TRUE;
   237     }
   238     if (SearchIterator::operator ==(that)) {
   239         StringSearch &thatsrch = (StringSearch &)that;
   240         return (this->m_pattern_ == thatsrch.m_pattern_ &&
   241                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
   242     }
   243     return FALSE;
   244 }
   246 // public get and set methods ----------------------------------------
   248 void StringSearch::setOffset(int32_t position, UErrorCode &status)
   249 {
   250     // status checked in usearch_setOffset
   251     usearch_setOffset(m_strsrch_, position, &status);
   252 }
   254 int32_t StringSearch::getOffset(void) const
   255 {
   256     return usearch_getOffset(m_strsrch_);
   257 }
   259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
   260 {
   261     if (U_SUCCESS(status)) {
   262         m_text_ = text;
   263         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
   264     }
   265 }
   267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
   268 {
   269     if (U_SUCCESS(status)) {
   270         text.getText(m_text_);
   271         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
   272     }
   273 }
   275 RuleBasedCollator * StringSearch::getCollator() const
   276 {
   277     return (RuleBasedCollator *)&m_collator_;
   278 }
   280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
   281 {
   282     if (U_SUCCESS(status)) {
   283         usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
   284         // Alias the collator
   285         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
   286     }
   287 }
   289 void StringSearch::setPattern(const UnicodeString &pattern,
   290                                     UErrorCode    &status)
   291 {
   292     if (U_SUCCESS(status)) {
   293         m_pattern_ = pattern;
   294         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
   295                            &status);
   296     }
   297 }
   299 const UnicodeString & StringSearch::getPattern() const
   300 {
   301     return m_pattern_;
   302 }
   304 // public methods ----------------------------------------------------
   306 void StringSearch::reset()
   307 {
   308     usearch_reset(m_strsrch_);
   309 }
   311 SearchIterator * StringSearch::safeClone(void) const
   312 {
   313     UErrorCode status = U_ZERO_ERROR;
   314     StringSearch *result = new StringSearch(m_pattern_, m_text_,
   315                                             (RuleBasedCollator *)&m_collator_,
   316                                             m_breakiterator_,
   317                                             status);
   318     /* test for NULL */
   319     if (result == 0) {
   320         status = U_MEMORY_ALLOCATION_ERROR;
   321         return 0;
   322     }
   323     result->setOffset(getOffset(), status);
   324     result->setMatchStart(m_strsrch_->search->matchedIndex);
   325     result->setMatchLength(m_strsrch_->search->matchedLength);
   326     if (U_FAILURE(status)) {
   327         return NULL;
   328     }
   329     return result;
   330 }
   332 // protected method -------------------------------------------------
   334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
   335 {
   336     // values passed here are already in the pre-shift position
   337     if (U_SUCCESS(status)) {
   338         if (m_strsrch_->pattern.CELength == 0) {
   339             m_search_->matchedIndex =
   340                                     m_search_->matchedIndex == USEARCH_DONE ?
   341                                     getOffset() : m_search_->matchedIndex + 1;
   342             m_search_->matchedLength = 0;
   343             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
   344                            &status);
   345             if (m_search_->matchedIndex == m_search_->textLength) {
   346                 m_search_->matchedIndex = USEARCH_DONE;
   347             }
   348         }
   349         else {
   350             // looking at usearch.cpp, this part is shifted out to
   351             // StringSearch instead of SearchIterator because m_strsrch_ is
   352             // not accessible in SearchIterator
   353 #if 0
   354             if (position + m_strsrch_->pattern.defaultShiftSize
   355                 > m_search_->textLength) {
   356                 setMatchNotFound();
   357                 return USEARCH_DONE;
   358             }
   359 #endif
   360             if (m_search_->matchedLength <= 0) {
   361                 // the flipping direction issue has already been handled
   362                 // in next()
   363                 // for boundary check purposes. this will ensure that the
   364                 // next match will not preceed the current offset
   365                 // note search->matchedIndex will always be set to something
   366                 // in the code
   367                 m_search_->matchedIndex = position - 1;
   368             }
   370             ucol_setOffset(m_strsrch_->textIter, position, &status);
   372 #if 0
   373             for (;;) {
   374                 if (m_search_->isCanonicalMatch) {
   375                     // can't use exact here since extra accents are allowed.
   376                     usearch_handleNextCanonical(m_strsrch_, &status);
   377                 }
   378                 else {
   379                     usearch_handleNextExact(m_strsrch_, &status);
   380                 }
   381                 if (U_FAILURE(status)) {
   382                     return USEARCH_DONE;
   383                 }
   384                 if (m_breakiterator_ == NULL
   385 #if !UCONFIG_NO_BREAK_ITERATION
   386                     ||
   387                     m_search_->matchedIndex == USEARCH_DONE ||
   388                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
   389                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
   390                                                   m_search_->matchedLength))
   391 #endif
   392                 ) {
   393                     if (m_search_->matchedIndex == USEARCH_DONE) {
   394                         ucol_setOffset(m_strsrch_->textIter,
   395                                        m_search_->textLength, &status);
   396                     }
   397                     else {
   398                         ucol_setOffset(m_strsrch_->textIter,
   399                                        m_search_->matchedIndex, &status);
   400                     }
   401                     return m_search_->matchedIndex;
   402                 }
   403             }
   404 #else
   405             // if m_strsrch_->breakIter is always the same as m_breakiterator_
   406             // then we don't need to check the match boundaries here because
   407             // usearch_handleNextXXX will already have done it.
   408             if (m_search_->isCanonicalMatch) {
   409             	// *could* actually use exact here 'cause no extra accents allowed...
   410             	usearch_handleNextCanonical(m_strsrch_, &status);
   411             } else {
   412             	usearch_handleNextExact(m_strsrch_, &status);
   413             }
   415             if (U_FAILURE(status)) {
   416             	return USEARCH_DONE;
   417             }
   419             if (m_search_->matchedIndex == USEARCH_DONE) {
   420             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
   421             } else {
   422             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
   423             }
   425             return m_search_->matchedIndex;
   426 #endif
   427         }
   428     }
   429     return USEARCH_DONE;
   430 }
   432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
   433 {
   434     // values passed here are already in the pre-shift position
   435     if (U_SUCCESS(status)) {
   436         if (m_strsrch_->pattern.CELength == 0) {
   437             m_search_->matchedIndex =
   438                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
   439                    m_search_->matchedIndex);
   440             if (m_search_->matchedIndex == 0) {
   441                 setMatchNotFound();
   442             }
   443             else {
   444                 m_search_->matchedIndex --;
   445                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
   446                                &status);
   447                 m_search_->matchedLength = 0;
   448             }
   449         }
   450         else {
   451             // looking at usearch.cpp, this part is shifted out to
   452             // StringSearch instead of SearchIterator because m_strsrch_ is
   453             // not accessible in SearchIterator
   454 #if 0
   455             if (!m_search_->isOverlap &&
   456                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
   457                 setMatchNotFound();
   458                 return USEARCH_DONE;
   459             }
   461             for (;;) {
   462                 if (m_search_->isCanonicalMatch) {
   463                     // can't use exact here since extra accents are allowed.
   464                     usearch_handlePreviousCanonical(m_strsrch_, &status);
   465                 }
   466                 else {
   467                     usearch_handlePreviousExact(m_strsrch_, &status);
   468                 }
   469                 if (U_FAILURE(status)) {
   470                     return USEARCH_DONE;
   471                 }
   472                 if (m_breakiterator_ == NULL
   473 #if !UCONFIG_NO_BREAK_ITERATION
   474                     ||
   475                     m_search_->matchedIndex == USEARCH_DONE ||
   476                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
   477                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
   478                                                   m_search_->matchedLength))
   479 #endif
   480                 ) {
   481                     return m_search_->matchedIndex;
   482                 }
   483             }
   484 #else
   485             ucol_setOffset(m_strsrch_->textIter, position, &status);
   487             if (m_search_->isCanonicalMatch) {
   488             	// *could* use exact match here since extra accents *not* allowed!
   489             	usearch_handlePreviousCanonical(m_strsrch_, &status);
   490             } else {
   491             	usearch_handlePreviousExact(m_strsrch_, &status);
   492             }
   494             if (U_FAILURE(status)) {
   495             	return USEARCH_DONE;
   496             }
   498             return m_search_->matchedIndex;
   499 #endif
   500         }
   502         return m_search_->matchedIndex;
   503     }
   504     return USEARCH_DONE;
   505 }
   507 U_NAMESPACE_END
   509 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial