|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved. |
|
4 ********************************************************************** |
|
5 * Date Name Description |
|
6 * 03/22/2000 helena Creation. |
|
7 ********************************************************************** |
|
8 */ |
|
9 |
|
10 #include "unicode/utypes.h" |
|
11 |
|
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
|
13 |
|
14 #include "unicode/stsearch.h" |
|
15 #include "usrchimp.h" |
|
16 #include "cmemory.h" |
|
17 |
|
18 U_NAMESPACE_BEGIN |
|
19 |
|
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) |
|
21 |
|
22 // public constructors and destructors ----------------------------------- |
|
23 |
|
24 StringSearch::StringSearch(const UnicodeString &pattern, |
|
25 const UnicodeString &text, |
|
26 const Locale &locale, |
|
27 BreakIterator *breakiter, |
|
28 UErrorCode &status) : |
|
29 SearchIterator(text, breakiter), |
|
30 m_collator_(), |
|
31 m_pattern_(pattern) |
|
32 { |
|
33 if (U_FAILURE(status)) { |
|
34 m_strsrch_ = NULL; |
|
35 return; |
|
36 } |
|
37 |
|
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
|
39 m_text_.getBuffer(), m_text_.length(), |
|
40 locale.getName(), (UBreakIterator *)breakiter, |
|
41 &status); |
|
42 uprv_free(m_search_); |
|
43 m_search_ = NULL; |
|
44 |
|
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing |
|
46 // wrapper around the internal collator and rules, which (here) are |
|
47 // owned by this stringsearch object. this means 1) it's destructor |
|
48 // _should not_ delete the ucollator or rules, and 2) changes made |
|
49 // to the exposed collator (setStrength etc) _should_ modify the |
|
50 // ucollator. thus the collator is not a copy-on-write alias, and it |
|
51 // needs to distinguish itself not merely from 'stand alone' colators |
|
52 // but also from copy-on-write ones. it needs additional state, which |
|
53 // setUCollator should set. |
|
54 |
|
55 if (U_SUCCESS(status)) { |
|
56 // Alias the collator |
|
57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
58 // m_search_ has been created by the base SearchIterator class |
|
59 m_search_ = m_strsrch_->search; |
|
60 } |
|
61 } |
|
62 |
|
63 StringSearch::StringSearch(const UnicodeString &pattern, |
|
64 const UnicodeString &text, |
|
65 RuleBasedCollator *coll, |
|
66 BreakIterator *breakiter, |
|
67 UErrorCode &status) : |
|
68 SearchIterator(text, breakiter), |
|
69 m_collator_(), |
|
70 m_pattern_(pattern) |
|
71 { |
|
72 if (U_FAILURE(status)) { |
|
73 m_strsrch_ = NULL; |
|
74 return; |
|
75 } |
|
76 if (coll == NULL) { |
|
77 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
78 m_strsrch_ = NULL; |
|
79 return; |
|
80 } |
|
81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
|
82 m_pattern_.length(), |
|
83 m_text_.getBuffer(), |
|
84 m_text_.length(), coll->ucollator, |
|
85 (UBreakIterator *)breakiter, |
|
86 &status); |
|
87 uprv_free(m_search_); |
|
88 m_search_ = NULL; |
|
89 |
|
90 if (U_SUCCESS(status)) { |
|
91 // Alias the collator |
|
92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
93 // m_search_ has been created by the base SearchIterator class |
|
94 m_search_ = m_strsrch_->search; |
|
95 } |
|
96 } |
|
97 |
|
98 StringSearch::StringSearch(const UnicodeString &pattern, |
|
99 CharacterIterator &text, |
|
100 const Locale &locale, |
|
101 BreakIterator *breakiter, |
|
102 UErrorCode &status) : |
|
103 SearchIterator(text, breakiter), |
|
104 m_collator_(), |
|
105 m_pattern_(pattern) |
|
106 { |
|
107 if (U_FAILURE(status)) { |
|
108 m_strsrch_ = NULL; |
|
109 return; |
|
110 } |
|
111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
|
112 m_text_.getBuffer(), m_text_.length(), |
|
113 locale.getName(), (UBreakIterator *)breakiter, |
|
114 &status); |
|
115 uprv_free(m_search_); |
|
116 m_search_ = NULL; |
|
117 |
|
118 if (U_SUCCESS(status)) { |
|
119 // Alias the collator |
|
120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
121 // m_search_ has been created by the base SearchIterator class |
|
122 m_search_ = m_strsrch_->search; |
|
123 } |
|
124 } |
|
125 |
|
126 StringSearch::StringSearch(const UnicodeString &pattern, |
|
127 CharacterIterator &text, |
|
128 RuleBasedCollator *coll, |
|
129 BreakIterator *breakiter, |
|
130 UErrorCode &status) : |
|
131 SearchIterator(text, breakiter), |
|
132 m_collator_(), |
|
133 m_pattern_(pattern) |
|
134 { |
|
135 if (U_FAILURE(status)) { |
|
136 m_strsrch_ = NULL; |
|
137 return; |
|
138 } |
|
139 if (coll == NULL) { |
|
140 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
141 m_strsrch_ = NULL; |
|
142 return; |
|
143 } |
|
144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
|
145 m_pattern_.length(), |
|
146 m_text_.getBuffer(), |
|
147 m_text_.length(), coll->ucollator, |
|
148 (UBreakIterator *)breakiter, |
|
149 &status); |
|
150 uprv_free(m_search_); |
|
151 m_search_ = NULL; |
|
152 |
|
153 if (U_SUCCESS(status)) { |
|
154 // Alias the collator |
|
155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
156 // m_search_ has been created by the base SearchIterator class |
|
157 m_search_ = m_strsrch_->search; |
|
158 } |
|
159 } |
|
160 |
|
161 StringSearch::StringSearch(const StringSearch &that) : |
|
162 SearchIterator(that.m_text_, that.m_breakiterator_), |
|
163 m_collator_(), |
|
164 m_pattern_(that.m_pattern_) |
|
165 { |
|
166 UErrorCode status = U_ZERO_ERROR; |
|
167 |
|
168 // Free m_search_ from the superclass |
|
169 uprv_free(m_search_); |
|
170 m_search_ = NULL; |
|
171 |
|
172 if (that.m_strsrch_ == NULL) { |
|
173 // This was not a good copy |
|
174 m_strsrch_ = NULL; |
|
175 } |
|
176 else { |
|
177 // Make a deep copy |
|
178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
|
179 m_pattern_.length(), |
|
180 m_text_.getBuffer(), |
|
181 m_text_.length(), |
|
182 that.m_strsrch_->collator, |
|
183 (UBreakIterator *)that.m_breakiterator_, |
|
184 &status); |
|
185 if (U_SUCCESS(status)) { |
|
186 // Alias the collator |
|
187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
188 // m_search_ has been created by the base SearchIterator class |
|
189 m_search_ = m_strsrch_->search; |
|
190 } |
|
191 } |
|
192 } |
|
193 |
|
194 StringSearch::~StringSearch() |
|
195 { |
|
196 if (m_strsrch_ != NULL) { |
|
197 usearch_close(m_strsrch_); |
|
198 m_search_ = NULL; |
|
199 } |
|
200 } |
|
201 |
|
202 StringSearch * |
|
203 StringSearch::clone() const { |
|
204 return new StringSearch(*this); |
|
205 } |
|
206 |
|
207 // operator overloading --------------------------------------------- |
|
208 StringSearch & StringSearch::operator=(const StringSearch &that) |
|
209 { |
|
210 if ((*this) != that) { |
|
211 UErrorCode status = U_ZERO_ERROR; |
|
212 m_text_ = that.m_text_; |
|
213 m_breakiterator_ = that.m_breakiterator_; |
|
214 m_pattern_ = that.m_pattern_; |
|
215 // all m_search_ in the parent class is linked up with m_strsrch_ |
|
216 usearch_close(m_strsrch_); |
|
217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
|
218 m_pattern_.length(), |
|
219 m_text_.getBuffer(), |
|
220 m_text_.length(), |
|
221 that.m_strsrch_->collator, |
|
222 NULL, &status); |
|
223 // Check null pointer |
|
224 if (m_strsrch_ != NULL) { |
|
225 // Alias the collator |
|
226 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
227 m_search_ = m_strsrch_->search; |
|
228 } |
|
229 } |
|
230 return *this; |
|
231 } |
|
232 |
|
233 UBool StringSearch::operator==(const SearchIterator &that) const |
|
234 { |
|
235 if (this == &that) { |
|
236 return TRUE; |
|
237 } |
|
238 if (SearchIterator::operator ==(that)) { |
|
239 StringSearch &thatsrch = (StringSearch &)that; |
|
240 return (this->m_pattern_ == thatsrch.m_pattern_ && |
|
241 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); |
|
242 } |
|
243 return FALSE; |
|
244 } |
|
245 |
|
246 // public get and set methods ---------------------------------------- |
|
247 |
|
248 void StringSearch::setOffset(int32_t position, UErrorCode &status) |
|
249 { |
|
250 // status checked in usearch_setOffset |
|
251 usearch_setOffset(m_strsrch_, position, &status); |
|
252 } |
|
253 |
|
254 int32_t StringSearch::getOffset(void) const |
|
255 { |
|
256 return usearch_getOffset(m_strsrch_); |
|
257 } |
|
258 |
|
259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) |
|
260 { |
|
261 if (U_SUCCESS(status)) { |
|
262 m_text_ = text; |
|
263 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); |
|
264 } |
|
265 } |
|
266 |
|
267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) |
|
268 { |
|
269 if (U_SUCCESS(status)) { |
|
270 text.getText(m_text_); |
|
271 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); |
|
272 } |
|
273 } |
|
274 |
|
275 RuleBasedCollator * StringSearch::getCollator() const |
|
276 { |
|
277 return (RuleBasedCollator *)&m_collator_; |
|
278 } |
|
279 |
|
280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) |
|
281 { |
|
282 if (U_SUCCESS(status)) { |
|
283 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); |
|
284 // Alias the collator |
|
285 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
|
286 } |
|
287 } |
|
288 |
|
289 void StringSearch::setPattern(const UnicodeString &pattern, |
|
290 UErrorCode &status) |
|
291 { |
|
292 if (U_SUCCESS(status)) { |
|
293 m_pattern_ = pattern; |
|
294 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), |
|
295 &status); |
|
296 } |
|
297 } |
|
298 |
|
299 const UnicodeString & StringSearch::getPattern() const |
|
300 { |
|
301 return m_pattern_; |
|
302 } |
|
303 |
|
304 // public methods ---------------------------------------------------- |
|
305 |
|
306 void StringSearch::reset() |
|
307 { |
|
308 usearch_reset(m_strsrch_); |
|
309 } |
|
310 |
|
311 SearchIterator * StringSearch::safeClone(void) const |
|
312 { |
|
313 UErrorCode status = U_ZERO_ERROR; |
|
314 StringSearch *result = new StringSearch(m_pattern_, m_text_, |
|
315 (RuleBasedCollator *)&m_collator_, |
|
316 m_breakiterator_, |
|
317 status); |
|
318 /* test for NULL */ |
|
319 if (result == 0) { |
|
320 status = U_MEMORY_ALLOCATION_ERROR; |
|
321 return 0; |
|
322 } |
|
323 result->setOffset(getOffset(), status); |
|
324 result->setMatchStart(m_strsrch_->search->matchedIndex); |
|
325 result->setMatchLength(m_strsrch_->search->matchedLength); |
|
326 if (U_FAILURE(status)) { |
|
327 return NULL; |
|
328 } |
|
329 return result; |
|
330 } |
|
331 |
|
332 // protected method ------------------------------------------------- |
|
333 |
|
334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) |
|
335 { |
|
336 // values passed here are already in the pre-shift position |
|
337 if (U_SUCCESS(status)) { |
|
338 if (m_strsrch_->pattern.CELength == 0) { |
|
339 m_search_->matchedIndex = |
|
340 m_search_->matchedIndex == USEARCH_DONE ? |
|
341 getOffset() : m_search_->matchedIndex + 1; |
|
342 m_search_->matchedLength = 0; |
|
343 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
|
344 &status); |
|
345 if (m_search_->matchedIndex == m_search_->textLength) { |
|
346 m_search_->matchedIndex = USEARCH_DONE; |
|
347 } |
|
348 } |
|
349 else { |
|
350 // looking at usearch.cpp, this part is shifted out to |
|
351 // StringSearch instead of SearchIterator because m_strsrch_ is |
|
352 // not accessible in SearchIterator |
|
353 #if 0 |
|
354 if (position + m_strsrch_->pattern.defaultShiftSize |
|
355 > m_search_->textLength) { |
|
356 setMatchNotFound(); |
|
357 return USEARCH_DONE; |
|
358 } |
|
359 #endif |
|
360 if (m_search_->matchedLength <= 0) { |
|
361 // the flipping direction issue has already been handled |
|
362 // in next() |
|
363 // for boundary check purposes. this will ensure that the |
|
364 // next match will not preceed the current offset |
|
365 // note search->matchedIndex will always be set to something |
|
366 // in the code |
|
367 m_search_->matchedIndex = position - 1; |
|
368 } |
|
369 |
|
370 ucol_setOffset(m_strsrch_->textIter, position, &status); |
|
371 |
|
372 #if 0 |
|
373 for (;;) { |
|
374 if (m_search_->isCanonicalMatch) { |
|
375 // can't use exact here since extra accents are allowed. |
|
376 usearch_handleNextCanonical(m_strsrch_, &status); |
|
377 } |
|
378 else { |
|
379 usearch_handleNextExact(m_strsrch_, &status); |
|
380 } |
|
381 if (U_FAILURE(status)) { |
|
382 return USEARCH_DONE; |
|
383 } |
|
384 if (m_breakiterator_ == NULL |
|
385 #if !UCONFIG_NO_BREAK_ITERATION |
|
386 || |
|
387 m_search_->matchedIndex == USEARCH_DONE || |
|
388 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
|
389 m_breakiterator_->isBoundary(m_search_->matchedIndex + |
|
390 m_search_->matchedLength)) |
|
391 #endif |
|
392 ) { |
|
393 if (m_search_->matchedIndex == USEARCH_DONE) { |
|
394 ucol_setOffset(m_strsrch_->textIter, |
|
395 m_search_->textLength, &status); |
|
396 } |
|
397 else { |
|
398 ucol_setOffset(m_strsrch_->textIter, |
|
399 m_search_->matchedIndex, &status); |
|
400 } |
|
401 return m_search_->matchedIndex; |
|
402 } |
|
403 } |
|
404 #else |
|
405 // if m_strsrch_->breakIter is always the same as m_breakiterator_ |
|
406 // then we don't need to check the match boundaries here because |
|
407 // usearch_handleNextXXX will already have done it. |
|
408 if (m_search_->isCanonicalMatch) { |
|
409 // *could* actually use exact here 'cause no extra accents allowed... |
|
410 usearch_handleNextCanonical(m_strsrch_, &status); |
|
411 } else { |
|
412 usearch_handleNextExact(m_strsrch_, &status); |
|
413 } |
|
414 |
|
415 if (U_FAILURE(status)) { |
|
416 return USEARCH_DONE; |
|
417 } |
|
418 |
|
419 if (m_search_->matchedIndex == USEARCH_DONE) { |
|
420 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); |
|
421 } else { |
|
422 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); |
|
423 } |
|
424 |
|
425 return m_search_->matchedIndex; |
|
426 #endif |
|
427 } |
|
428 } |
|
429 return USEARCH_DONE; |
|
430 } |
|
431 |
|
432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) |
|
433 { |
|
434 // values passed here are already in the pre-shift position |
|
435 if (U_SUCCESS(status)) { |
|
436 if (m_strsrch_->pattern.CELength == 0) { |
|
437 m_search_->matchedIndex = |
|
438 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : |
|
439 m_search_->matchedIndex); |
|
440 if (m_search_->matchedIndex == 0) { |
|
441 setMatchNotFound(); |
|
442 } |
|
443 else { |
|
444 m_search_->matchedIndex --; |
|
445 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
|
446 &status); |
|
447 m_search_->matchedLength = 0; |
|
448 } |
|
449 } |
|
450 else { |
|
451 // looking at usearch.cpp, this part is shifted out to |
|
452 // StringSearch instead of SearchIterator because m_strsrch_ is |
|
453 // not accessible in SearchIterator |
|
454 #if 0 |
|
455 if (!m_search_->isOverlap && |
|
456 position - m_strsrch_->pattern.defaultShiftSize < 0) { |
|
457 setMatchNotFound(); |
|
458 return USEARCH_DONE; |
|
459 } |
|
460 |
|
461 for (;;) { |
|
462 if (m_search_->isCanonicalMatch) { |
|
463 // can't use exact here since extra accents are allowed. |
|
464 usearch_handlePreviousCanonical(m_strsrch_, &status); |
|
465 } |
|
466 else { |
|
467 usearch_handlePreviousExact(m_strsrch_, &status); |
|
468 } |
|
469 if (U_FAILURE(status)) { |
|
470 return USEARCH_DONE; |
|
471 } |
|
472 if (m_breakiterator_ == NULL |
|
473 #if !UCONFIG_NO_BREAK_ITERATION |
|
474 || |
|
475 m_search_->matchedIndex == USEARCH_DONE || |
|
476 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
|
477 m_breakiterator_->isBoundary(m_search_->matchedIndex + |
|
478 m_search_->matchedLength)) |
|
479 #endif |
|
480 ) { |
|
481 return m_search_->matchedIndex; |
|
482 } |
|
483 } |
|
484 #else |
|
485 ucol_setOffset(m_strsrch_->textIter, position, &status); |
|
486 |
|
487 if (m_search_->isCanonicalMatch) { |
|
488 // *could* use exact match here since extra accents *not* allowed! |
|
489 usearch_handlePreviousCanonical(m_strsrch_, &status); |
|
490 } else { |
|
491 usearch_handlePreviousExact(m_strsrch_, &status); |
|
492 } |
|
493 |
|
494 if (U_FAILURE(status)) { |
|
495 return USEARCH_DONE; |
|
496 } |
|
497 |
|
498 return m_search_->matchedIndex; |
|
499 #endif |
|
500 } |
|
501 |
|
502 return m_search_->matchedIndex; |
|
503 } |
|
504 return USEARCH_DONE; |
|
505 } |
|
506 |
|
507 U_NAMESPACE_END |
|
508 |
|
509 #endif /* #if !UCONFIG_NO_COLLATION */ |