intl/icu/source/i18n/search.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
michael@0 4 **********************************************************************
michael@0 5 * Date Name Description
michael@0 6 * 03/22/2000 helena Creation.
michael@0 7 **********************************************************************
michael@0 8 */
michael@0 9
michael@0 10 #include "unicode/utypes.h"
michael@0 11
michael@0 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
michael@0 13
michael@0 14 #include "unicode/brkiter.h"
michael@0 15 #include "unicode/schriter.h"
michael@0 16 #include "unicode/search.h"
michael@0 17 #include "usrchimp.h"
michael@0 18 #include "cmemory.h"
michael@0 19
michael@0 20 // public constructors and destructors -----------------------------------
michael@0 21 U_NAMESPACE_BEGIN
michael@0 22
michael@0 23 SearchIterator::SearchIterator(const SearchIterator &other)
michael@0 24 : UObject(other)
michael@0 25 {
michael@0 26 m_breakiterator_ = other.m_breakiterator_;
michael@0 27 m_text_ = other.m_text_;
michael@0 28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
michael@0 29 m_search_->breakIter = other.m_search_->breakIter;
michael@0 30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
michael@0 31 m_search_->isOverlap = other.m_search_->isOverlap;
michael@0 32 m_search_->elementComparisonType = other.m_search_->elementComparisonType;
michael@0 33 m_search_->matchedIndex = other.m_search_->matchedIndex;
michael@0 34 m_search_->matchedLength = other.m_search_->matchedLength;
michael@0 35 m_search_->text = other.m_search_->text;
michael@0 36 m_search_->textLength = other.m_search_->textLength;
michael@0 37 }
michael@0 38
michael@0 39 SearchIterator::~SearchIterator()
michael@0 40 {
michael@0 41 if (m_search_ != NULL) {
michael@0 42 uprv_free(m_search_);
michael@0 43 }
michael@0 44 }
michael@0 45
michael@0 46 // public get and set methods ----------------------------------------
michael@0 47
michael@0 48 void SearchIterator::setAttribute(USearchAttribute attribute,
michael@0 49 USearchAttributeValue value,
michael@0 50 UErrorCode &status)
michael@0 51 {
michael@0 52 if (U_SUCCESS(status)) {
michael@0 53 switch (attribute)
michael@0 54 {
michael@0 55 case USEARCH_OVERLAP :
michael@0 56 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
michael@0 57 break;
michael@0 58 case USEARCH_CANONICAL_MATCH :
michael@0 59 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
michael@0 60 break;
michael@0 61 case USEARCH_ELEMENT_COMPARISON :
michael@0 62 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
michael@0 63 m_search_->elementComparisonType = (int16_t)value;
michael@0 64 } else {
michael@0 65 m_search_->elementComparisonType = 0;
michael@0 66 }
michael@0 67 break;
michael@0 68 default:
michael@0 69 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 70 }
michael@0 71 }
michael@0 72 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
michael@0 73 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 74 }
michael@0 75 }
michael@0 76
michael@0 77 USearchAttributeValue SearchIterator::getAttribute(
michael@0 78 USearchAttribute attribute) const
michael@0 79 {
michael@0 80 switch (attribute) {
michael@0 81 case USEARCH_OVERLAP :
michael@0 82 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
michael@0 83 case USEARCH_CANONICAL_MATCH :
michael@0 84 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
michael@0 85 USEARCH_OFF);
michael@0 86 case USEARCH_ELEMENT_COMPARISON :
michael@0 87 {
michael@0 88 int16_t value = m_search_->elementComparisonType;
michael@0 89 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
michael@0 90 return (USearchAttributeValue)value;
michael@0 91 } else {
michael@0 92 return USEARCH_STANDARD_ELEMENT_COMPARISON;
michael@0 93 }
michael@0 94 }
michael@0 95 default :
michael@0 96 return USEARCH_DEFAULT;
michael@0 97 }
michael@0 98 }
michael@0 99
michael@0 100 int32_t SearchIterator::getMatchedStart() const
michael@0 101 {
michael@0 102 return m_search_->matchedIndex;
michael@0 103 }
michael@0 104
michael@0 105 int32_t SearchIterator::getMatchedLength() const
michael@0 106 {
michael@0 107 return m_search_->matchedLength;
michael@0 108 }
michael@0 109
michael@0 110 void SearchIterator::getMatchedText(UnicodeString &result) const
michael@0 111 {
michael@0 112 int32_t matchedindex = m_search_->matchedIndex;
michael@0 113 int32_t matchedlength = m_search_->matchedLength;
michael@0 114 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
michael@0 115 result.setTo(m_search_->text + matchedindex, matchedlength);
michael@0 116 }
michael@0 117 else {
michael@0 118 result.remove();
michael@0 119 }
michael@0 120 }
michael@0 121
michael@0 122 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
michael@0 123 UErrorCode &status)
michael@0 124 {
michael@0 125 if (U_SUCCESS(status)) {
michael@0 126 #if 0
michael@0 127 m_search_->breakIter = NULL;
michael@0 128 // the c++ breakiterator may not make use of ubreakiterator.
michael@0 129 // so we'll have to keep track of it ourselves.
michael@0 130 #else
michael@0 131 // Well, gee... the Constructors that take a BreakIterator
michael@0 132 // all cast the BreakIterator to a UBreakIterator and
michael@0 133 // pass it to the corresponding usearch_openFromXXX
michael@0 134 // routine, so there's no reason not to do this.
michael@0 135 //
michael@0 136 // Besides, a UBreakIterator is a BreakIterator, so
michael@0 137 // any subclass of BreakIterator should work fine here...
michael@0 138 m_search_->breakIter = (UBreakIterator *) breakiter;
michael@0 139 #endif
michael@0 140
michael@0 141 m_breakiterator_ = breakiter;
michael@0 142 }
michael@0 143 }
michael@0 144
michael@0 145 const BreakIterator * SearchIterator::getBreakIterator(void) const
michael@0 146 {
michael@0 147 return m_breakiterator_;
michael@0 148 }
michael@0 149
michael@0 150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
michael@0 151 {
michael@0 152 if (U_SUCCESS(status)) {
michael@0 153 if (text.length() == 0) {
michael@0 154 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 155 }
michael@0 156 else {
michael@0 157 m_text_ = text;
michael@0 158 m_search_->text = m_text_.getBuffer();
michael@0 159 m_search_->textLength = m_text_.length();
michael@0 160 }
michael@0 161 }
michael@0 162 }
michael@0 163
michael@0 164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
michael@0 165 {
michael@0 166 if (U_SUCCESS(status)) {
michael@0 167 text.getText(m_text_);
michael@0 168 setText(m_text_, status);
michael@0 169 }
michael@0 170 }
michael@0 171
michael@0 172 const UnicodeString & SearchIterator::getText(void) const
michael@0 173 {
michael@0 174 return m_text_;
michael@0 175 }
michael@0 176
michael@0 177 // operator overloading ----------------------------------------------
michael@0 178
michael@0 179 UBool SearchIterator::operator==(const SearchIterator &that) const
michael@0 180 {
michael@0 181 if (this == &that) {
michael@0 182 return TRUE;
michael@0 183 }
michael@0 184 return (m_breakiterator_ == that.m_breakiterator_ &&
michael@0 185 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
michael@0 186 m_search_->isOverlap == that.m_search_->isOverlap &&
michael@0 187 m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
michael@0 188 m_search_->matchedIndex == that.m_search_->matchedIndex &&
michael@0 189 m_search_->matchedLength == that.m_search_->matchedLength &&
michael@0 190 m_search_->textLength == that.m_search_->textLength &&
michael@0 191 getOffset() == that.getOffset() &&
michael@0 192 (uprv_memcmp(m_search_->text, that.m_search_->text,
michael@0 193 m_search_->textLength * sizeof(UChar)) == 0));
michael@0 194 }
michael@0 195
michael@0 196 // public methods ----------------------------------------------------
michael@0 197
michael@0 198 int32_t SearchIterator::first(UErrorCode &status)
michael@0 199 {
michael@0 200 if (U_FAILURE(status)) {
michael@0 201 return USEARCH_DONE;
michael@0 202 }
michael@0 203 setOffset(0, status);
michael@0 204 return handleNext(0, status);
michael@0 205 }
michael@0 206
michael@0 207 int32_t SearchIterator::following(int32_t position,
michael@0 208 UErrorCode &status)
michael@0 209 {
michael@0 210 if (U_FAILURE(status)) {
michael@0 211 return USEARCH_DONE;
michael@0 212 }
michael@0 213 setOffset(position, status);
michael@0 214 return handleNext(position, status);
michael@0 215 }
michael@0 216
michael@0 217 int32_t SearchIterator::last(UErrorCode &status)
michael@0 218 {
michael@0 219 if (U_FAILURE(status)) {
michael@0 220 return USEARCH_DONE;
michael@0 221 }
michael@0 222 setOffset(m_search_->textLength, status);
michael@0 223 return handlePrev(m_search_->textLength, status);
michael@0 224 }
michael@0 225
michael@0 226 int32_t SearchIterator::preceding(int32_t position,
michael@0 227 UErrorCode &status)
michael@0 228 {
michael@0 229 if (U_FAILURE(status)) {
michael@0 230 return USEARCH_DONE;
michael@0 231 }
michael@0 232 setOffset(position, status);
michael@0 233 return handlePrev(position, status);
michael@0 234 }
michael@0 235
michael@0 236 int32_t SearchIterator::next(UErrorCode &status)
michael@0 237 {
michael@0 238 if (U_SUCCESS(status)) {
michael@0 239 int32_t offset = getOffset();
michael@0 240 int32_t matchindex = m_search_->matchedIndex;
michael@0 241 int32_t matchlength = m_search_->matchedLength;
michael@0 242 m_search_->reset = FALSE;
michael@0 243 if (m_search_->isForwardSearching == TRUE) {
michael@0 244 int32_t textlength = m_search_->textLength;
michael@0 245 if (offset == textlength || matchindex == textlength ||
michael@0 246 (matchindex != USEARCH_DONE &&
michael@0 247 matchindex + matchlength >= textlength)) {
michael@0 248 // not enough characters to match
michael@0 249 setMatchNotFound();
michael@0 250 return USEARCH_DONE;
michael@0 251 }
michael@0 252 }
michael@0 253 else {
michael@0 254 // switching direction.
michael@0 255 // if matchedIndex == USEARCH_DONE, it means that either a
michael@0 256 // setOffset has been called or that previous ran off the text
michael@0 257 // string. the iterator would have been set to offset 0 if a
michael@0 258 // match is not found.
michael@0 259 m_search_->isForwardSearching = TRUE;
michael@0 260 if (m_search_->matchedIndex != USEARCH_DONE) {
michael@0 261 // there's no need to set the collation element iterator
michael@0 262 // the next call to next will set the offset.
michael@0 263 return matchindex;
michael@0 264 }
michael@0 265 }
michael@0 266
michael@0 267 if (matchlength > 0) {
michael@0 268 // if matchlength is 0 we are at the start of the iteration
michael@0 269 if (m_search_->isOverlap) {
michael@0 270 offset ++;
michael@0 271 }
michael@0 272 else {
michael@0 273 offset += matchlength;
michael@0 274 }
michael@0 275 }
michael@0 276 return handleNext(offset, status);
michael@0 277 }
michael@0 278 return USEARCH_DONE;
michael@0 279 }
michael@0 280
michael@0 281 int32_t SearchIterator::previous(UErrorCode &status)
michael@0 282 {
michael@0 283 if (U_SUCCESS(status)) {
michael@0 284 int32_t offset;
michael@0 285 if (m_search_->reset) {
michael@0 286 offset = m_search_->textLength;
michael@0 287 m_search_->isForwardSearching = FALSE;
michael@0 288 m_search_->reset = FALSE;
michael@0 289 setOffset(offset, status);
michael@0 290 }
michael@0 291 else {
michael@0 292 offset = getOffset();
michael@0 293 }
michael@0 294
michael@0 295 int32_t matchindex = m_search_->matchedIndex;
michael@0 296 if (m_search_->isForwardSearching == TRUE) {
michael@0 297 // switching direction.
michael@0 298 // if matchedIndex == USEARCH_DONE, it means that either a
michael@0 299 // setOffset has been called or that next ran off the text
michael@0 300 // string. the iterator would have been set to offset textLength if
michael@0 301 // a match is not found.
michael@0 302 m_search_->isForwardSearching = FALSE;
michael@0 303 if (matchindex != USEARCH_DONE) {
michael@0 304 return matchindex;
michael@0 305 }
michael@0 306 }
michael@0 307 else {
michael@0 308 if (offset == 0 || matchindex == 0) {
michael@0 309 // not enough characters to match
michael@0 310 setMatchNotFound();
michael@0 311 return USEARCH_DONE;
michael@0 312 }
michael@0 313 }
michael@0 314
michael@0 315 if (matchindex != USEARCH_DONE) {
michael@0 316 if (m_search_->isOverlap) {
michael@0 317 matchindex += m_search_->matchedLength - 2;
michael@0 318 }
michael@0 319
michael@0 320 return handlePrev(matchindex, status);
michael@0 321 }
michael@0 322
michael@0 323 return handlePrev(offset, status);
michael@0 324 }
michael@0 325
michael@0 326 return USEARCH_DONE;
michael@0 327 }
michael@0 328
michael@0 329 void SearchIterator::reset()
michael@0 330 {
michael@0 331 UErrorCode status = U_ZERO_ERROR;
michael@0 332 setMatchNotFound();
michael@0 333 setOffset(0, status);
michael@0 334 m_search_->isOverlap = FALSE;
michael@0 335 m_search_->isCanonicalMatch = FALSE;
michael@0 336 m_search_->elementComparisonType = 0;
michael@0 337 m_search_->isForwardSearching = TRUE;
michael@0 338 m_search_->reset = TRUE;
michael@0 339 }
michael@0 340
michael@0 341 // protected constructors and destructors -----------------------------
michael@0 342
michael@0 343 SearchIterator::SearchIterator()
michael@0 344 {
michael@0 345 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
michael@0 346 m_search_->breakIter = NULL;
michael@0 347 m_search_->isOverlap = FALSE;
michael@0 348 m_search_->isCanonicalMatch = FALSE;
michael@0 349 m_search_->elementComparisonType = 0;
michael@0 350 m_search_->isForwardSearching = TRUE;
michael@0 351 m_search_->reset = TRUE;
michael@0 352 m_search_->matchedIndex = USEARCH_DONE;
michael@0 353 m_search_->matchedLength = 0;
michael@0 354 m_search_->text = NULL;
michael@0 355 m_search_->textLength = 0;
michael@0 356 m_breakiterator_ = NULL;
michael@0 357 }
michael@0 358
michael@0 359 SearchIterator::SearchIterator(const UnicodeString &text,
michael@0 360 BreakIterator *breakiter) :
michael@0 361 m_breakiterator_(breakiter),
michael@0 362 m_text_(text)
michael@0 363 {
michael@0 364 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
michael@0 365 m_search_->breakIter = NULL;
michael@0 366 m_search_->isOverlap = FALSE;
michael@0 367 m_search_->isCanonicalMatch = FALSE;
michael@0 368 m_search_->elementComparisonType = 0;
michael@0 369 m_search_->isForwardSearching = TRUE;
michael@0 370 m_search_->reset = TRUE;
michael@0 371 m_search_->matchedIndex = USEARCH_DONE;
michael@0 372 m_search_->matchedLength = 0;
michael@0 373 m_search_->text = m_text_.getBuffer();
michael@0 374 m_search_->textLength = text.length();
michael@0 375 }
michael@0 376
michael@0 377 SearchIterator::SearchIterator(CharacterIterator &text,
michael@0 378 BreakIterator *breakiter) :
michael@0 379 m_breakiterator_(breakiter)
michael@0 380 {
michael@0 381 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
michael@0 382 m_search_->breakIter = NULL;
michael@0 383 m_search_->isOverlap = FALSE;
michael@0 384 m_search_->isCanonicalMatch = FALSE;
michael@0 385 m_search_->elementComparisonType = 0;
michael@0 386 m_search_->isForwardSearching = TRUE;
michael@0 387 m_search_->reset = TRUE;
michael@0 388 m_search_->matchedIndex = USEARCH_DONE;
michael@0 389 m_search_->matchedLength = 0;
michael@0 390 text.getText(m_text_);
michael@0 391 m_search_->text = m_text_.getBuffer();
michael@0 392 m_search_->textLength = m_text_.length();
michael@0 393 m_breakiterator_ = breakiter;
michael@0 394 }
michael@0 395
michael@0 396 // protected methods ------------------------------------------------------
michael@0 397
michael@0 398 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
michael@0 399 {
michael@0 400 if (this != &that) {
michael@0 401 m_breakiterator_ = that.m_breakiterator_;
michael@0 402 m_text_ = that.m_text_;
michael@0 403 m_search_->breakIter = that.m_search_->breakIter;
michael@0 404 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
michael@0 405 m_search_->isOverlap = that.m_search_->isOverlap;
michael@0 406 m_search_->elementComparisonType = that.m_search_->elementComparisonType;
michael@0 407 m_search_->matchedIndex = that.m_search_->matchedIndex;
michael@0 408 m_search_->matchedLength = that.m_search_->matchedLength;
michael@0 409 m_search_->text = that.m_search_->text;
michael@0 410 m_search_->textLength = that.m_search_->textLength;
michael@0 411 }
michael@0 412 return *this;
michael@0 413 }
michael@0 414
michael@0 415 void SearchIterator::setMatchLength(int32_t length)
michael@0 416 {
michael@0 417 m_search_->matchedLength = length;
michael@0 418 }
michael@0 419
michael@0 420 void SearchIterator::setMatchStart(int32_t position)
michael@0 421 {
michael@0 422 m_search_->matchedIndex = position;
michael@0 423 }
michael@0 424
michael@0 425 void SearchIterator::setMatchNotFound()
michael@0 426 {
michael@0 427 setMatchStart(USEARCH_DONE);
michael@0 428 setMatchLength(0);
michael@0 429 UErrorCode status = U_ZERO_ERROR;
michael@0 430 // by default no errors should be returned here since offsets are within
michael@0 431 // range.
michael@0 432 if (m_search_->isForwardSearching) {
michael@0 433 setOffset(m_search_->textLength, status);
michael@0 434 }
michael@0 435 else {
michael@0 436 setOffset(0, status);
michael@0 437 }
michael@0 438 }
michael@0 439
michael@0 440
michael@0 441 U_NAMESPACE_END
michael@0 442
michael@0 443 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial