1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/search.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,443 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. 1.7 +********************************************************************** 1.8 +* Date Name Description 1.9 +* 03/22/2000 helena Creation. 1.10 +********************************************************************** 1.11 +*/ 1.12 + 1.13 +#include "unicode/utypes.h" 1.14 + 1.15 +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 1.16 + 1.17 +#include "unicode/brkiter.h" 1.18 +#include "unicode/schriter.h" 1.19 +#include "unicode/search.h" 1.20 +#include "usrchimp.h" 1.21 +#include "cmemory.h" 1.22 + 1.23 +// public constructors and destructors ----------------------------------- 1.24 +U_NAMESPACE_BEGIN 1.25 + 1.26 +SearchIterator::SearchIterator(const SearchIterator &other) 1.27 + : UObject(other) 1.28 +{ 1.29 + m_breakiterator_ = other.m_breakiterator_; 1.30 + m_text_ = other.m_text_; 1.31 + m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 1.32 + m_search_->breakIter = other.m_search_->breakIter; 1.33 + m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; 1.34 + m_search_->isOverlap = other.m_search_->isOverlap; 1.35 + m_search_->elementComparisonType = other.m_search_->elementComparisonType; 1.36 + m_search_->matchedIndex = other.m_search_->matchedIndex; 1.37 + m_search_->matchedLength = other.m_search_->matchedLength; 1.38 + m_search_->text = other.m_search_->text; 1.39 + m_search_->textLength = other.m_search_->textLength; 1.40 +} 1.41 + 1.42 +SearchIterator::~SearchIterator() 1.43 +{ 1.44 + if (m_search_ != NULL) { 1.45 + uprv_free(m_search_); 1.46 + } 1.47 +} 1.48 + 1.49 +// public get and set methods ---------------------------------------- 1.50 + 1.51 +void SearchIterator::setAttribute(USearchAttribute attribute, 1.52 + USearchAttributeValue value, 1.53 + UErrorCode &status) 1.54 +{ 1.55 + if (U_SUCCESS(status)) { 1.56 + switch (attribute) 1.57 + { 1.58 + case USEARCH_OVERLAP : 1.59 + m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); 1.60 + break; 1.61 + case USEARCH_CANONICAL_MATCH : 1.62 + m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); 1.63 + break; 1.64 + case USEARCH_ELEMENT_COMPARISON : 1.65 + if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 1.66 + m_search_->elementComparisonType = (int16_t)value; 1.67 + } else { 1.68 + m_search_->elementComparisonType = 0; 1.69 + } 1.70 + break; 1.71 + default: 1.72 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.73 + } 1.74 + } 1.75 + if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { 1.76 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.77 + } 1.78 +} 1.79 + 1.80 +USearchAttributeValue SearchIterator::getAttribute( 1.81 + USearchAttribute attribute) const 1.82 +{ 1.83 + switch (attribute) { 1.84 + case USEARCH_OVERLAP : 1.85 + return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); 1.86 + case USEARCH_CANONICAL_MATCH : 1.87 + return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 1.88 + USEARCH_OFF); 1.89 + case USEARCH_ELEMENT_COMPARISON : 1.90 + { 1.91 + int16_t value = m_search_->elementComparisonType; 1.92 + if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 1.93 + return (USearchAttributeValue)value; 1.94 + } else { 1.95 + return USEARCH_STANDARD_ELEMENT_COMPARISON; 1.96 + } 1.97 + } 1.98 + default : 1.99 + return USEARCH_DEFAULT; 1.100 + } 1.101 +} 1.102 + 1.103 +int32_t SearchIterator::getMatchedStart() const 1.104 +{ 1.105 + return m_search_->matchedIndex; 1.106 +} 1.107 + 1.108 +int32_t SearchIterator::getMatchedLength() const 1.109 +{ 1.110 + return m_search_->matchedLength; 1.111 +} 1.112 + 1.113 +void SearchIterator::getMatchedText(UnicodeString &result) const 1.114 +{ 1.115 + int32_t matchedindex = m_search_->matchedIndex; 1.116 + int32_t matchedlength = m_search_->matchedLength; 1.117 + if (matchedindex != USEARCH_DONE && matchedlength != 0) { 1.118 + result.setTo(m_search_->text + matchedindex, matchedlength); 1.119 + } 1.120 + else { 1.121 + result.remove(); 1.122 + } 1.123 +} 1.124 + 1.125 +void SearchIterator::setBreakIterator(BreakIterator *breakiter, 1.126 + UErrorCode &status) 1.127 +{ 1.128 + if (U_SUCCESS(status)) { 1.129 +#if 0 1.130 + m_search_->breakIter = NULL; 1.131 + // the c++ breakiterator may not make use of ubreakiterator. 1.132 + // so we'll have to keep track of it ourselves. 1.133 +#else 1.134 + // Well, gee... the Constructors that take a BreakIterator 1.135 + // all cast the BreakIterator to a UBreakIterator and 1.136 + // pass it to the corresponding usearch_openFromXXX 1.137 + // routine, so there's no reason not to do this. 1.138 + // 1.139 + // Besides, a UBreakIterator is a BreakIterator, so 1.140 + // any subclass of BreakIterator should work fine here... 1.141 + m_search_->breakIter = (UBreakIterator *) breakiter; 1.142 +#endif 1.143 + 1.144 + m_breakiterator_ = breakiter; 1.145 + } 1.146 +} 1.147 + 1.148 +const BreakIterator * SearchIterator::getBreakIterator(void) const 1.149 +{ 1.150 + return m_breakiterator_; 1.151 +} 1.152 + 1.153 +void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) 1.154 +{ 1.155 + if (U_SUCCESS(status)) { 1.156 + if (text.length() == 0) { 1.157 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.158 + } 1.159 + else { 1.160 + m_text_ = text; 1.161 + m_search_->text = m_text_.getBuffer(); 1.162 + m_search_->textLength = m_text_.length(); 1.163 + } 1.164 + } 1.165 +} 1.166 + 1.167 +void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) 1.168 +{ 1.169 + if (U_SUCCESS(status)) { 1.170 + text.getText(m_text_); 1.171 + setText(m_text_, status); 1.172 + } 1.173 +} 1.174 + 1.175 +const UnicodeString & SearchIterator::getText(void) const 1.176 +{ 1.177 + return m_text_; 1.178 +} 1.179 + 1.180 +// operator overloading ---------------------------------------------- 1.181 + 1.182 +UBool SearchIterator::operator==(const SearchIterator &that) const 1.183 +{ 1.184 + if (this == &that) { 1.185 + return TRUE; 1.186 + } 1.187 + return (m_breakiterator_ == that.m_breakiterator_ && 1.188 + m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && 1.189 + m_search_->isOverlap == that.m_search_->isOverlap && 1.190 + m_search_->elementComparisonType == that.m_search_->elementComparisonType && 1.191 + m_search_->matchedIndex == that.m_search_->matchedIndex && 1.192 + m_search_->matchedLength == that.m_search_->matchedLength && 1.193 + m_search_->textLength == that.m_search_->textLength && 1.194 + getOffset() == that.getOffset() && 1.195 + (uprv_memcmp(m_search_->text, that.m_search_->text, 1.196 + m_search_->textLength * sizeof(UChar)) == 0)); 1.197 +} 1.198 + 1.199 +// public methods ---------------------------------------------------- 1.200 + 1.201 +int32_t SearchIterator::first(UErrorCode &status) 1.202 +{ 1.203 + if (U_FAILURE(status)) { 1.204 + return USEARCH_DONE; 1.205 + } 1.206 + setOffset(0, status); 1.207 + return handleNext(0, status); 1.208 +} 1.209 + 1.210 +int32_t SearchIterator::following(int32_t position, 1.211 + UErrorCode &status) 1.212 +{ 1.213 + if (U_FAILURE(status)) { 1.214 + return USEARCH_DONE; 1.215 + } 1.216 + setOffset(position, status); 1.217 + return handleNext(position, status); 1.218 +} 1.219 + 1.220 +int32_t SearchIterator::last(UErrorCode &status) 1.221 +{ 1.222 + if (U_FAILURE(status)) { 1.223 + return USEARCH_DONE; 1.224 + } 1.225 + setOffset(m_search_->textLength, status); 1.226 + return handlePrev(m_search_->textLength, status); 1.227 +} 1.228 + 1.229 +int32_t SearchIterator::preceding(int32_t position, 1.230 + UErrorCode &status) 1.231 +{ 1.232 + if (U_FAILURE(status)) { 1.233 + return USEARCH_DONE; 1.234 + } 1.235 + setOffset(position, status); 1.236 + return handlePrev(position, status); 1.237 +} 1.238 + 1.239 +int32_t SearchIterator::next(UErrorCode &status) 1.240 +{ 1.241 + if (U_SUCCESS(status)) { 1.242 + int32_t offset = getOffset(); 1.243 + int32_t matchindex = m_search_->matchedIndex; 1.244 + int32_t matchlength = m_search_->matchedLength; 1.245 + m_search_->reset = FALSE; 1.246 + if (m_search_->isForwardSearching == TRUE) { 1.247 + int32_t textlength = m_search_->textLength; 1.248 + if (offset == textlength || matchindex == textlength || 1.249 + (matchindex != USEARCH_DONE && 1.250 + matchindex + matchlength >= textlength)) { 1.251 + // not enough characters to match 1.252 + setMatchNotFound(); 1.253 + return USEARCH_DONE; 1.254 + } 1.255 + } 1.256 + else { 1.257 + // switching direction. 1.258 + // if matchedIndex == USEARCH_DONE, it means that either a 1.259 + // setOffset has been called or that previous ran off the text 1.260 + // string. the iterator would have been set to offset 0 if a 1.261 + // match is not found. 1.262 + m_search_->isForwardSearching = TRUE; 1.263 + if (m_search_->matchedIndex != USEARCH_DONE) { 1.264 + // there's no need to set the collation element iterator 1.265 + // the next call to next will set the offset. 1.266 + return matchindex; 1.267 + } 1.268 + } 1.269 + 1.270 + if (matchlength > 0) { 1.271 + // if matchlength is 0 we are at the start of the iteration 1.272 + if (m_search_->isOverlap) { 1.273 + offset ++; 1.274 + } 1.275 + else { 1.276 + offset += matchlength; 1.277 + } 1.278 + } 1.279 + return handleNext(offset, status); 1.280 + } 1.281 + return USEARCH_DONE; 1.282 +} 1.283 + 1.284 +int32_t SearchIterator::previous(UErrorCode &status) 1.285 +{ 1.286 + if (U_SUCCESS(status)) { 1.287 + int32_t offset; 1.288 + if (m_search_->reset) { 1.289 + offset = m_search_->textLength; 1.290 + m_search_->isForwardSearching = FALSE; 1.291 + m_search_->reset = FALSE; 1.292 + setOffset(offset, status); 1.293 + } 1.294 + else { 1.295 + offset = getOffset(); 1.296 + } 1.297 + 1.298 + int32_t matchindex = m_search_->matchedIndex; 1.299 + if (m_search_->isForwardSearching == TRUE) { 1.300 + // switching direction. 1.301 + // if matchedIndex == USEARCH_DONE, it means that either a 1.302 + // setOffset has been called or that next ran off the text 1.303 + // string. the iterator would have been set to offset textLength if 1.304 + // a match is not found. 1.305 + m_search_->isForwardSearching = FALSE; 1.306 + if (matchindex != USEARCH_DONE) { 1.307 + return matchindex; 1.308 + } 1.309 + } 1.310 + else { 1.311 + if (offset == 0 || matchindex == 0) { 1.312 + // not enough characters to match 1.313 + setMatchNotFound(); 1.314 + return USEARCH_DONE; 1.315 + } 1.316 + } 1.317 + 1.318 + if (matchindex != USEARCH_DONE) { 1.319 + if (m_search_->isOverlap) { 1.320 + matchindex += m_search_->matchedLength - 2; 1.321 + } 1.322 + 1.323 + return handlePrev(matchindex, status); 1.324 + } 1.325 + 1.326 + return handlePrev(offset, status); 1.327 + } 1.328 + 1.329 + return USEARCH_DONE; 1.330 +} 1.331 + 1.332 +void SearchIterator::reset() 1.333 +{ 1.334 + UErrorCode status = U_ZERO_ERROR; 1.335 + setMatchNotFound(); 1.336 + setOffset(0, status); 1.337 + m_search_->isOverlap = FALSE; 1.338 + m_search_->isCanonicalMatch = FALSE; 1.339 + m_search_->elementComparisonType = 0; 1.340 + m_search_->isForwardSearching = TRUE; 1.341 + m_search_->reset = TRUE; 1.342 +} 1.343 + 1.344 +// protected constructors and destructors ----------------------------- 1.345 + 1.346 +SearchIterator::SearchIterator() 1.347 +{ 1.348 + m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 1.349 + m_search_->breakIter = NULL; 1.350 + m_search_->isOverlap = FALSE; 1.351 + m_search_->isCanonicalMatch = FALSE; 1.352 + m_search_->elementComparisonType = 0; 1.353 + m_search_->isForwardSearching = TRUE; 1.354 + m_search_->reset = TRUE; 1.355 + m_search_->matchedIndex = USEARCH_DONE; 1.356 + m_search_->matchedLength = 0; 1.357 + m_search_->text = NULL; 1.358 + m_search_->textLength = 0; 1.359 + m_breakiterator_ = NULL; 1.360 +} 1.361 + 1.362 +SearchIterator::SearchIterator(const UnicodeString &text, 1.363 + BreakIterator *breakiter) : 1.364 + m_breakiterator_(breakiter), 1.365 + m_text_(text) 1.366 +{ 1.367 + m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 1.368 + m_search_->breakIter = NULL; 1.369 + m_search_->isOverlap = FALSE; 1.370 + m_search_->isCanonicalMatch = FALSE; 1.371 + m_search_->elementComparisonType = 0; 1.372 + m_search_->isForwardSearching = TRUE; 1.373 + m_search_->reset = TRUE; 1.374 + m_search_->matchedIndex = USEARCH_DONE; 1.375 + m_search_->matchedLength = 0; 1.376 + m_search_->text = m_text_.getBuffer(); 1.377 + m_search_->textLength = text.length(); 1.378 +} 1.379 + 1.380 +SearchIterator::SearchIterator(CharacterIterator &text, 1.381 + BreakIterator *breakiter) : 1.382 + m_breakiterator_(breakiter) 1.383 +{ 1.384 + m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 1.385 + m_search_->breakIter = NULL; 1.386 + m_search_->isOverlap = FALSE; 1.387 + m_search_->isCanonicalMatch = FALSE; 1.388 + m_search_->elementComparisonType = 0; 1.389 + m_search_->isForwardSearching = TRUE; 1.390 + m_search_->reset = TRUE; 1.391 + m_search_->matchedIndex = USEARCH_DONE; 1.392 + m_search_->matchedLength = 0; 1.393 + text.getText(m_text_); 1.394 + m_search_->text = m_text_.getBuffer(); 1.395 + m_search_->textLength = m_text_.length(); 1.396 + m_breakiterator_ = breakiter; 1.397 +} 1.398 + 1.399 +// protected methods ------------------------------------------------------ 1.400 + 1.401 +SearchIterator & SearchIterator::operator=(const SearchIterator &that) 1.402 +{ 1.403 + if (this != &that) { 1.404 + m_breakiterator_ = that.m_breakiterator_; 1.405 + m_text_ = that.m_text_; 1.406 + m_search_->breakIter = that.m_search_->breakIter; 1.407 + m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; 1.408 + m_search_->isOverlap = that.m_search_->isOverlap; 1.409 + m_search_->elementComparisonType = that.m_search_->elementComparisonType; 1.410 + m_search_->matchedIndex = that.m_search_->matchedIndex; 1.411 + m_search_->matchedLength = that.m_search_->matchedLength; 1.412 + m_search_->text = that.m_search_->text; 1.413 + m_search_->textLength = that.m_search_->textLength; 1.414 + } 1.415 + return *this; 1.416 +} 1.417 + 1.418 +void SearchIterator::setMatchLength(int32_t length) 1.419 +{ 1.420 + m_search_->matchedLength = length; 1.421 +} 1.422 + 1.423 +void SearchIterator::setMatchStart(int32_t position) 1.424 +{ 1.425 + m_search_->matchedIndex = position; 1.426 +} 1.427 + 1.428 +void SearchIterator::setMatchNotFound() 1.429 +{ 1.430 + setMatchStart(USEARCH_DONE); 1.431 + setMatchLength(0); 1.432 + UErrorCode status = U_ZERO_ERROR; 1.433 + // by default no errors should be returned here since offsets are within 1.434 + // range. 1.435 + if (m_search_->isForwardSearching) { 1.436 + setOffset(m_search_->textLength, status); 1.437 + } 1.438 + else { 1.439 + setOffset(0, status); 1.440 + } 1.441 +} 1.442 + 1.443 + 1.444 +U_NAMESPACE_END 1.445 + 1.446 +#endif /* #if !UCONFIG_NO_COLLATION */