1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/stsearch.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,509 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2001-2008 IBM and others. All rights reserved. 1.7 +********************************************************************** 1.8 +* Date Name Description 1.9 +* 03/22/2000 helena Creation. 1.10 +********************************************************************** 1.11 +*/ 1.12 + 1.13 +#include "unicode/utypes.h" 1.14 + 1.15 +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 1.16 + 1.17 +#include "unicode/stsearch.h" 1.18 +#include "usrchimp.h" 1.19 +#include "cmemory.h" 1.20 + 1.21 +U_NAMESPACE_BEGIN 1.22 + 1.23 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 1.24 + 1.25 +// public constructors and destructors ----------------------------------- 1.26 + 1.27 +StringSearch::StringSearch(const UnicodeString &pattern, 1.28 + const UnicodeString &text, 1.29 + const Locale &locale, 1.30 + BreakIterator *breakiter, 1.31 + UErrorCode &status) : 1.32 + SearchIterator(text, breakiter), 1.33 + m_collator_(), 1.34 + m_pattern_(pattern) 1.35 +{ 1.36 + if (U_FAILURE(status)) { 1.37 + m_strsrch_ = NULL; 1.38 + return; 1.39 + } 1.40 + 1.41 + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 1.42 + m_text_.getBuffer(), m_text_.length(), 1.43 + locale.getName(), (UBreakIterator *)breakiter, 1.44 + &status); 1.45 + uprv_free(m_search_); 1.46 + m_search_ = NULL; 1.47 + 1.48 + // !!! dlf m_collator_ is an odd beast. basically it is an aliasing 1.49 + // wrapper around the internal collator and rules, which (here) are 1.50 + // owned by this stringsearch object. this means 1) it's destructor 1.51 + // _should not_ delete the ucollator or rules, and 2) changes made 1.52 + // to the exposed collator (setStrength etc) _should_ modify the 1.53 + // ucollator. thus the collator is not a copy-on-write alias, and it 1.54 + // needs to distinguish itself not merely from 'stand alone' colators 1.55 + // but also from copy-on-write ones. it needs additional state, which 1.56 + // setUCollator should set. 1.57 + 1.58 + if (U_SUCCESS(status)) { 1.59 + // Alias the collator 1.60 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.61 + // m_search_ has been created by the base SearchIterator class 1.62 + m_search_ = m_strsrch_->search; 1.63 + } 1.64 +} 1.65 + 1.66 +StringSearch::StringSearch(const UnicodeString &pattern, 1.67 + const UnicodeString &text, 1.68 + RuleBasedCollator *coll, 1.69 + BreakIterator *breakiter, 1.70 + UErrorCode &status) : 1.71 + SearchIterator(text, breakiter), 1.72 + m_collator_(), 1.73 + m_pattern_(pattern) 1.74 +{ 1.75 + if (U_FAILURE(status)) { 1.76 + m_strsrch_ = NULL; 1.77 + return; 1.78 + } 1.79 + if (coll == NULL) { 1.80 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.81 + m_strsrch_ = NULL; 1.82 + return; 1.83 + } 1.84 + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 1.85 + m_pattern_.length(), 1.86 + m_text_.getBuffer(), 1.87 + m_text_.length(), coll->ucollator, 1.88 + (UBreakIterator *)breakiter, 1.89 + &status); 1.90 + uprv_free(m_search_); 1.91 + m_search_ = NULL; 1.92 + 1.93 + if (U_SUCCESS(status)) { 1.94 + // Alias the collator 1.95 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.96 + // m_search_ has been created by the base SearchIterator class 1.97 + m_search_ = m_strsrch_->search; 1.98 + } 1.99 +} 1.100 + 1.101 +StringSearch::StringSearch(const UnicodeString &pattern, 1.102 + CharacterIterator &text, 1.103 + const Locale &locale, 1.104 + BreakIterator *breakiter, 1.105 + UErrorCode &status) : 1.106 + SearchIterator(text, breakiter), 1.107 + m_collator_(), 1.108 + m_pattern_(pattern) 1.109 +{ 1.110 + if (U_FAILURE(status)) { 1.111 + m_strsrch_ = NULL; 1.112 + return; 1.113 + } 1.114 + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 1.115 + m_text_.getBuffer(), m_text_.length(), 1.116 + locale.getName(), (UBreakIterator *)breakiter, 1.117 + &status); 1.118 + uprv_free(m_search_); 1.119 + m_search_ = NULL; 1.120 + 1.121 + if (U_SUCCESS(status)) { 1.122 + // Alias the collator 1.123 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.124 + // m_search_ has been created by the base SearchIterator class 1.125 + m_search_ = m_strsrch_->search; 1.126 + } 1.127 +} 1.128 + 1.129 +StringSearch::StringSearch(const UnicodeString &pattern, 1.130 + CharacterIterator &text, 1.131 + RuleBasedCollator *coll, 1.132 + BreakIterator *breakiter, 1.133 + UErrorCode &status) : 1.134 + SearchIterator(text, breakiter), 1.135 + m_collator_(), 1.136 + m_pattern_(pattern) 1.137 +{ 1.138 + if (U_FAILURE(status)) { 1.139 + m_strsrch_ = NULL; 1.140 + return; 1.141 + } 1.142 + if (coll == NULL) { 1.143 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.144 + m_strsrch_ = NULL; 1.145 + return; 1.146 + } 1.147 + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 1.148 + m_pattern_.length(), 1.149 + m_text_.getBuffer(), 1.150 + m_text_.length(), coll->ucollator, 1.151 + (UBreakIterator *)breakiter, 1.152 + &status); 1.153 + uprv_free(m_search_); 1.154 + m_search_ = NULL; 1.155 + 1.156 + if (U_SUCCESS(status)) { 1.157 + // Alias the collator 1.158 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.159 + // m_search_ has been created by the base SearchIterator class 1.160 + m_search_ = m_strsrch_->search; 1.161 + } 1.162 +} 1.163 + 1.164 +StringSearch::StringSearch(const StringSearch &that) : 1.165 + SearchIterator(that.m_text_, that.m_breakiterator_), 1.166 + m_collator_(), 1.167 + m_pattern_(that.m_pattern_) 1.168 +{ 1.169 + UErrorCode status = U_ZERO_ERROR; 1.170 + 1.171 + // Free m_search_ from the superclass 1.172 + uprv_free(m_search_); 1.173 + m_search_ = NULL; 1.174 + 1.175 + if (that.m_strsrch_ == NULL) { 1.176 + // This was not a good copy 1.177 + m_strsrch_ = NULL; 1.178 + } 1.179 + else { 1.180 + // Make a deep copy 1.181 + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 1.182 + m_pattern_.length(), 1.183 + m_text_.getBuffer(), 1.184 + m_text_.length(), 1.185 + that.m_strsrch_->collator, 1.186 + (UBreakIterator *)that.m_breakiterator_, 1.187 + &status); 1.188 + if (U_SUCCESS(status)) { 1.189 + // Alias the collator 1.190 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.191 + // m_search_ has been created by the base SearchIterator class 1.192 + m_search_ = m_strsrch_->search; 1.193 + } 1.194 + } 1.195 +} 1.196 + 1.197 +StringSearch::~StringSearch() 1.198 +{ 1.199 + if (m_strsrch_ != NULL) { 1.200 + usearch_close(m_strsrch_); 1.201 + m_search_ = NULL; 1.202 + } 1.203 +} 1.204 + 1.205 +StringSearch * 1.206 +StringSearch::clone() const { 1.207 + return new StringSearch(*this); 1.208 +} 1.209 + 1.210 +// operator overloading --------------------------------------------- 1.211 +StringSearch & StringSearch::operator=(const StringSearch &that) 1.212 +{ 1.213 + if ((*this) != that) { 1.214 + UErrorCode status = U_ZERO_ERROR; 1.215 + m_text_ = that.m_text_; 1.216 + m_breakiterator_ = that.m_breakiterator_; 1.217 + m_pattern_ = that.m_pattern_; 1.218 + // all m_search_ in the parent class is linked up with m_strsrch_ 1.219 + usearch_close(m_strsrch_); 1.220 + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 1.221 + m_pattern_.length(), 1.222 + m_text_.getBuffer(), 1.223 + m_text_.length(), 1.224 + that.m_strsrch_->collator, 1.225 + NULL, &status); 1.226 + // Check null pointer 1.227 + if (m_strsrch_ != NULL) { 1.228 + // Alias the collator 1.229 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.230 + m_search_ = m_strsrch_->search; 1.231 + } 1.232 + } 1.233 + return *this; 1.234 +} 1.235 + 1.236 +UBool StringSearch::operator==(const SearchIterator &that) const 1.237 +{ 1.238 + if (this == &that) { 1.239 + return TRUE; 1.240 + } 1.241 + if (SearchIterator::operator ==(that)) { 1.242 + StringSearch &thatsrch = (StringSearch &)that; 1.243 + return (this->m_pattern_ == thatsrch.m_pattern_ && 1.244 + this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); 1.245 + } 1.246 + return FALSE; 1.247 +} 1.248 + 1.249 +// public get and set methods ---------------------------------------- 1.250 + 1.251 +void StringSearch::setOffset(int32_t position, UErrorCode &status) 1.252 +{ 1.253 + // status checked in usearch_setOffset 1.254 + usearch_setOffset(m_strsrch_, position, &status); 1.255 +} 1.256 + 1.257 +int32_t StringSearch::getOffset(void) const 1.258 +{ 1.259 + return usearch_getOffset(m_strsrch_); 1.260 +} 1.261 + 1.262 +void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 1.263 +{ 1.264 + if (U_SUCCESS(status)) { 1.265 + m_text_ = text; 1.266 + usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 1.267 + } 1.268 +} 1.269 + 1.270 +void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 1.271 +{ 1.272 + if (U_SUCCESS(status)) { 1.273 + text.getText(m_text_); 1.274 + usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 1.275 + } 1.276 +} 1.277 + 1.278 +RuleBasedCollator * StringSearch::getCollator() const 1.279 +{ 1.280 + return (RuleBasedCollator *)&m_collator_; 1.281 +} 1.282 + 1.283 +void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 1.284 +{ 1.285 + if (U_SUCCESS(status)) { 1.286 + usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); 1.287 + // Alias the collator 1.288 + m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 1.289 + } 1.290 +} 1.291 + 1.292 +void StringSearch::setPattern(const UnicodeString &pattern, 1.293 + UErrorCode &status) 1.294 +{ 1.295 + if (U_SUCCESS(status)) { 1.296 + m_pattern_ = pattern; 1.297 + usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 1.298 + &status); 1.299 + } 1.300 +} 1.301 + 1.302 +const UnicodeString & StringSearch::getPattern() const 1.303 +{ 1.304 + return m_pattern_; 1.305 +} 1.306 + 1.307 +// public methods ---------------------------------------------------- 1.308 + 1.309 +void StringSearch::reset() 1.310 +{ 1.311 + usearch_reset(m_strsrch_); 1.312 +} 1.313 + 1.314 +SearchIterator * StringSearch::safeClone(void) const 1.315 +{ 1.316 + UErrorCode status = U_ZERO_ERROR; 1.317 + StringSearch *result = new StringSearch(m_pattern_, m_text_, 1.318 + (RuleBasedCollator *)&m_collator_, 1.319 + m_breakiterator_, 1.320 + status); 1.321 + /* test for NULL */ 1.322 + if (result == 0) { 1.323 + status = U_MEMORY_ALLOCATION_ERROR; 1.324 + return 0; 1.325 + } 1.326 + result->setOffset(getOffset(), status); 1.327 + result->setMatchStart(m_strsrch_->search->matchedIndex); 1.328 + result->setMatchLength(m_strsrch_->search->matchedLength); 1.329 + if (U_FAILURE(status)) { 1.330 + return NULL; 1.331 + } 1.332 + return result; 1.333 +} 1.334 + 1.335 +// protected method ------------------------------------------------- 1.336 + 1.337 +int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 1.338 +{ 1.339 + // values passed here are already in the pre-shift position 1.340 + if (U_SUCCESS(status)) { 1.341 + if (m_strsrch_->pattern.CELength == 0) { 1.342 + m_search_->matchedIndex = 1.343 + m_search_->matchedIndex == USEARCH_DONE ? 1.344 + getOffset() : m_search_->matchedIndex + 1; 1.345 + m_search_->matchedLength = 0; 1.346 + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 1.347 + &status); 1.348 + if (m_search_->matchedIndex == m_search_->textLength) { 1.349 + m_search_->matchedIndex = USEARCH_DONE; 1.350 + } 1.351 + } 1.352 + else { 1.353 + // looking at usearch.cpp, this part is shifted out to 1.354 + // StringSearch instead of SearchIterator because m_strsrch_ is 1.355 + // not accessible in SearchIterator 1.356 +#if 0 1.357 + if (position + m_strsrch_->pattern.defaultShiftSize 1.358 + > m_search_->textLength) { 1.359 + setMatchNotFound(); 1.360 + return USEARCH_DONE; 1.361 + } 1.362 +#endif 1.363 + if (m_search_->matchedLength <= 0) { 1.364 + // the flipping direction issue has already been handled 1.365 + // in next() 1.366 + // for boundary check purposes. this will ensure that the 1.367 + // next match will not preceed the current offset 1.368 + // note search->matchedIndex will always be set to something 1.369 + // in the code 1.370 + m_search_->matchedIndex = position - 1; 1.371 + } 1.372 + 1.373 + ucol_setOffset(m_strsrch_->textIter, position, &status); 1.374 + 1.375 +#if 0 1.376 + for (;;) { 1.377 + if (m_search_->isCanonicalMatch) { 1.378 + // can't use exact here since extra accents are allowed. 1.379 + usearch_handleNextCanonical(m_strsrch_, &status); 1.380 + } 1.381 + else { 1.382 + usearch_handleNextExact(m_strsrch_, &status); 1.383 + } 1.384 + if (U_FAILURE(status)) { 1.385 + return USEARCH_DONE; 1.386 + } 1.387 + if (m_breakiterator_ == NULL 1.388 +#if !UCONFIG_NO_BREAK_ITERATION 1.389 + || 1.390 + m_search_->matchedIndex == USEARCH_DONE || 1.391 + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 1.392 + m_breakiterator_->isBoundary(m_search_->matchedIndex + 1.393 + m_search_->matchedLength)) 1.394 +#endif 1.395 + ) { 1.396 + if (m_search_->matchedIndex == USEARCH_DONE) { 1.397 + ucol_setOffset(m_strsrch_->textIter, 1.398 + m_search_->textLength, &status); 1.399 + } 1.400 + else { 1.401 + ucol_setOffset(m_strsrch_->textIter, 1.402 + m_search_->matchedIndex, &status); 1.403 + } 1.404 + return m_search_->matchedIndex; 1.405 + } 1.406 + } 1.407 +#else 1.408 + // if m_strsrch_->breakIter is always the same as m_breakiterator_ 1.409 + // then we don't need to check the match boundaries here because 1.410 + // usearch_handleNextXXX will already have done it. 1.411 + if (m_search_->isCanonicalMatch) { 1.412 + // *could* actually use exact here 'cause no extra accents allowed... 1.413 + usearch_handleNextCanonical(m_strsrch_, &status); 1.414 + } else { 1.415 + usearch_handleNextExact(m_strsrch_, &status); 1.416 + } 1.417 + 1.418 + if (U_FAILURE(status)) { 1.419 + return USEARCH_DONE; 1.420 + } 1.421 + 1.422 + if (m_search_->matchedIndex == USEARCH_DONE) { 1.423 + ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 1.424 + } else { 1.425 + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 1.426 + } 1.427 + 1.428 + return m_search_->matchedIndex; 1.429 +#endif 1.430 + } 1.431 + } 1.432 + return USEARCH_DONE; 1.433 +} 1.434 + 1.435 +int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 1.436 +{ 1.437 + // values passed here are already in the pre-shift position 1.438 + if (U_SUCCESS(status)) { 1.439 + if (m_strsrch_->pattern.CELength == 0) { 1.440 + m_search_->matchedIndex = 1.441 + (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 1.442 + m_search_->matchedIndex); 1.443 + if (m_search_->matchedIndex == 0) { 1.444 + setMatchNotFound(); 1.445 + } 1.446 + else { 1.447 + m_search_->matchedIndex --; 1.448 + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 1.449 + &status); 1.450 + m_search_->matchedLength = 0; 1.451 + } 1.452 + } 1.453 + else { 1.454 + // looking at usearch.cpp, this part is shifted out to 1.455 + // StringSearch instead of SearchIterator because m_strsrch_ is 1.456 + // not accessible in SearchIterator 1.457 +#if 0 1.458 + if (!m_search_->isOverlap && 1.459 + position - m_strsrch_->pattern.defaultShiftSize < 0) { 1.460 + setMatchNotFound(); 1.461 + return USEARCH_DONE; 1.462 + } 1.463 + 1.464 + for (;;) { 1.465 + if (m_search_->isCanonicalMatch) { 1.466 + // can't use exact here since extra accents are allowed. 1.467 + usearch_handlePreviousCanonical(m_strsrch_, &status); 1.468 + } 1.469 + else { 1.470 + usearch_handlePreviousExact(m_strsrch_, &status); 1.471 + } 1.472 + if (U_FAILURE(status)) { 1.473 + return USEARCH_DONE; 1.474 + } 1.475 + if (m_breakiterator_ == NULL 1.476 +#if !UCONFIG_NO_BREAK_ITERATION 1.477 + || 1.478 + m_search_->matchedIndex == USEARCH_DONE || 1.479 + (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 1.480 + m_breakiterator_->isBoundary(m_search_->matchedIndex + 1.481 + m_search_->matchedLength)) 1.482 +#endif 1.483 + ) { 1.484 + return m_search_->matchedIndex; 1.485 + } 1.486 + } 1.487 +#else 1.488 + ucol_setOffset(m_strsrch_->textIter, position, &status); 1.489 + 1.490 + if (m_search_->isCanonicalMatch) { 1.491 + // *could* use exact match here since extra accents *not* allowed! 1.492 + usearch_handlePreviousCanonical(m_strsrch_, &status); 1.493 + } else { 1.494 + usearch_handlePreviousExact(m_strsrch_, &status); 1.495 + } 1.496 + 1.497 + if (U_FAILURE(status)) { 1.498 + return USEARCH_DONE; 1.499 + } 1.500 + 1.501 + return m_search_->matchedIndex; 1.502 +#endif 1.503 + } 1.504 + 1.505 + return m_search_->matchedIndex; 1.506 + } 1.507 + return USEARCH_DONE; 1.508 +} 1.509 + 1.510 +U_NAMESPACE_END 1.511 + 1.512 +#endif /* #if !UCONFIG_NO_COLLATION */