|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. |
|
4 ********************************************************************** |
|
5 * Date Name Description |
|
6 * 03/22/2000 helena Creation. |
|
7 ********************************************************************** |
|
8 */ |
|
9 |
|
10 #include "unicode/utypes.h" |
|
11 |
|
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
|
13 |
|
14 #include "unicode/brkiter.h" |
|
15 #include "unicode/schriter.h" |
|
16 #include "unicode/search.h" |
|
17 #include "usrchimp.h" |
|
18 #include "cmemory.h" |
|
19 |
|
20 // public constructors and destructors ----------------------------------- |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 SearchIterator::SearchIterator(const SearchIterator &other) |
|
24 : UObject(other) |
|
25 { |
|
26 m_breakiterator_ = other.m_breakiterator_; |
|
27 m_text_ = other.m_text_; |
|
28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
|
29 m_search_->breakIter = other.m_search_->breakIter; |
|
30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; |
|
31 m_search_->isOverlap = other.m_search_->isOverlap; |
|
32 m_search_->elementComparisonType = other.m_search_->elementComparisonType; |
|
33 m_search_->matchedIndex = other.m_search_->matchedIndex; |
|
34 m_search_->matchedLength = other.m_search_->matchedLength; |
|
35 m_search_->text = other.m_search_->text; |
|
36 m_search_->textLength = other.m_search_->textLength; |
|
37 } |
|
38 |
|
39 SearchIterator::~SearchIterator() |
|
40 { |
|
41 if (m_search_ != NULL) { |
|
42 uprv_free(m_search_); |
|
43 } |
|
44 } |
|
45 |
|
46 // public get and set methods ---------------------------------------- |
|
47 |
|
48 void SearchIterator::setAttribute(USearchAttribute attribute, |
|
49 USearchAttributeValue value, |
|
50 UErrorCode &status) |
|
51 { |
|
52 if (U_SUCCESS(status)) { |
|
53 switch (attribute) |
|
54 { |
|
55 case USEARCH_OVERLAP : |
|
56 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); |
|
57 break; |
|
58 case USEARCH_CANONICAL_MATCH : |
|
59 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); |
|
60 break; |
|
61 case USEARCH_ELEMENT_COMPARISON : |
|
62 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { |
|
63 m_search_->elementComparisonType = (int16_t)value; |
|
64 } else { |
|
65 m_search_->elementComparisonType = 0; |
|
66 } |
|
67 break; |
|
68 default: |
|
69 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
70 } |
|
71 } |
|
72 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { |
|
73 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
74 } |
|
75 } |
|
76 |
|
77 USearchAttributeValue SearchIterator::getAttribute( |
|
78 USearchAttribute attribute) const |
|
79 { |
|
80 switch (attribute) { |
|
81 case USEARCH_OVERLAP : |
|
82 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); |
|
83 case USEARCH_CANONICAL_MATCH : |
|
84 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : |
|
85 USEARCH_OFF); |
|
86 case USEARCH_ELEMENT_COMPARISON : |
|
87 { |
|
88 int16_t value = m_search_->elementComparisonType; |
|
89 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { |
|
90 return (USearchAttributeValue)value; |
|
91 } else { |
|
92 return USEARCH_STANDARD_ELEMENT_COMPARISON; |
|
93 } |
|
94 } |
|
95 default : |
|
96 return USEARCH_DEFAULT; |
|
97 } |
|
98 } |
|
99 |
|
100 int32_t SearchIterator::getMatchedStart() const |
|
101 { |
|
102 return m_search_->matchedIndex; |
|
103 } |
|
104 |
|
105 int32_t SearchIterator::getMatchedLength() const |
|
106 { |
|
107 return m_search_->matchedLength; |
|
108 } |
|
109 |
|
110 void SearchIterator::getMatchedText(UnicodeString &result) const |
|
111 { |
|
112 int32_t matchedindex = m_search_->matchedIndex; |
|
113 int32_t matchedlength = m_search_->matchedLength; |
|
114 if (matchedindex != USEARCH_DONE && matchedlength != 0) { |
|
115 result.setTo(m_search_->text + matchedindex, matchedlength); |
|
116 } |
|
117 else { |
|
118 result.remove(); |
|
119 } |
|
120 } |
|
121 |
|
122 void SearchIterator::setBreakIterator(BreakIterator *breakiter, |
|
123 UErrorCode &status) |
|
124 { |
|
125 if (U_SUCCESS(status)) { |
|
126 #if 0 |
|
127 m_search_->breakIter = NULL; |
|
128 // the c++ breakiterator may not make use of ubreakiterator. |
|
129 // so we'll have to keep track of it ourselves. |
|
130 #else |
|
131 // Well, gee... the Constructors that take a BreakIterator |
|
132 // all cast the BreakIterator to a UBreakIterator and |
|
133 // pass it to the corresponding usearch_openFromXXX |
|
134 // routine, so there's no reason not to do this. |
|
135 // |
|
136 // Besides, a UBreakIterator is a BreakIterator, so |
|
137 // any subclass of BreakIterator should work fine here... |
|
138 m_search_->breakIter = (UBreakIterator *) breakiter; |
|
139 #endif |
|
140 |
|
141 m_breakiterator_ = breakiter; |
|
142 } |
|
143 } |
|
144 |
|
145 const BreakIterator * SearchIterator::getBreakIterator(void) const |
|
146 { |
|
147 return m_breakiterator_; |
|
148 } |
|
149 |
|
150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) |
|
151 { |
|
152 if (U_SUCCESS(status)) { |
|
153 if (text.length() == 0) { |
|
154 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
155 } |
|
156 else { |
|
157 m_text_ = text; |
|
158 m_search_->text = m_text_.getBuffer(); |
|
159 m_search_->textLength = m_text_.length(); |
|
160 } |
|
161 } |
|
162 } |
|
163 |
|
164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) |
|
165 { |
|
166 if (U_SUCCESS(status)) { |
|
167 text.getText(m_text_); |
|
168 setText(m_text_, status); |
|
169 } |
|
170 } |
|
171 |
|
172 const UnicodeString & SearchIterator::getText(void) const |
|
173 { |
|
174 return m_text_; |
|
175 } |
|
176 |
|
177 // operator overloading ---------------------------------------------- |
|
178 |
|
179 UBool SearchIterator::operator==(const SearchIterator &that) const |
|
180 { |
|
181 if (this == &that) { |
|
182 return TRUE; |
|
183 } |
|
184 return (m_breakiterator_ == that.m_breakiterator_ && |
|
185 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && |
|
186 m_search_->isOverlap == that.m_search_->isOverlap && |
|
187 m_search_->elementComparisonType == that.m_search_->elementComparisonType && |
|
188 m_search_->matchedIndex == that.m_search_->matchedIndex && |
|
189 m_search_->matchedLength == that.m_search_->matchedLength && |
|
190 m_search_->textLength == that.m_search_->textLength && |
|
191 getOffset() == that.getOffset() && |
|
192 (uprv_memcmp(m_search_->text, that.m_search_->text, |
|
193 m_search_->textLength * sizeof(UChar)) == 0)); |
|
194 } |
|
195 |
|
196 // public methods ---------------------------------------------------- |
|
197 |
|
198 int32_t SearchIterator::first(UErrorCode &status) |
|
199 { |
|
200 if (U_FAILURE(status)) { |
|
201 return USEARCH_DONE; |
|
202 } |
|
203 setOffset(0, status); |
|
204 return handleNext(0, status); |
|
205 } |
|
206 |
|
207 int32_t SearchIterator::following(int32_t position, |
|
208 UErrorCode &status) |
|
209 { |
|
210 if (U_FAILURE(status)) { |
|
211 return USEARCH_DONE; |
|
212 } |
|
213 setOffset(position, status); |
|
214 return handleNext(position, status); |
|
215 } |
|
216 |
|
217 int32_t SearchIterator::last(UErrorCode &status) |
|
218 { |
|
219 if (U_FAILURE(status)) { |
|
220 return USEARCH_DONE; |
|
221 } |
|
222 setOffset(m_search_->textLength, status); |
|
223 return handlePrev(m_search_->textLength, status); |
|
224 } |
|
225 |
|
226 int32_t SearchIterator::preceding(int32_t position, |
|
227 UErrorCode &status) |
|
228 { |
|
229 if (U_FAILURE(status)) { |
|
230 return USEARCH_DONE; |
|
231 } |
|
232 setOffset(position, status); |
|
233 return handlePrev(position, status); |
|
234 } |
|
235 |
|
236 int32_t SearchIterator::next(UErrorCode &status) |
|
237 { |
|
238 if (U_SUCCESS(status)) { |
|
239 int32_t offset = getOffset(); |
|
240 int32_t matchindex = m_search_->matchedIndex; |
|
241 int32_t matchlength = m_search_->matchedLength; |
|
242 m_search_->reset = FALSE; |
|
243 if (m_search_->isForwardSearching == TRUE) { |
|
244 int32_t textlength = m_search_->textLength; |
|
245 if (offset == textlength || matchindex == textlength || |
|
246 (matchindex != USEARCH_DONE && |
|
247 matchindex + matchlength >= textlength)) { |
|
248 // not enough characters to match |
|
249 setMatchNotFound(); |
|
250 return USEARCH_DONE; |
|
251 } |
|
252 } |
|
253 else { |
|
254 // switching direction. |
|
255 // if matchedIndex == USEARCH_DONE, it means that either a |
|
256 // setOffset has been called or that previous ran off the text |
|
257 // string. the iterator would have been set to offset 0 if a |
|
258 // match is not found. |
|
259 m_search_->isForwardSearching = TRUE; |
|
260 if (m_search_->matchedIndex != USEARCH_DONE) { |
|
261 // there's no need to set the collation element iterator |
|
262 // the next call to next will set the offset. |
|
263 return matchindex; |
|
264 } |
|
265 } |
|
266 |
|
267 if (matchlength > 0) { |
|
268 // if matchlength is 0 we are at the start of the iteration |
|
269 if (m_search_->isOverlap) { |
|
270 offset ++; |
|
271 } |
|
272 else { |
|
273 offset += matchlength; |
|
274 } |
|
275 } |
|
276 return handleNext(offset, status); |
|
277 } |
|
278 return USEARCH_DONE; |
|
279 } |
|
280 |
|
281 int32_t SearchIterator::previous(UErrorCode &status) |
|
282 { |
|
283 if (U_SUCCESS(status)) { |
|
284 int32_t offset; |
|
285 if (m_search_->reset) { |
|
286 offset = m_search_->textLength; |
|
287 m_search_->isForwardSearching = FALSE; |
|
288 m_search_->reset = FALSE; |
|
289 setOffset(offset, status); |
|
290 } |
|
291 else { |
|
292 offset = getOffset(); |
|
293 } |
|
294 |
|
295 int32_t matchindex = m_search_->matchedIndex; |
|
296 if (m_search_->isForwardSearching == TRUE) { |
|
297 // switching direction. |
|
298 // if matchedIndex == USEARCH_DONE, it means that either a |
|
299 // setOffset has been called or that next ran off the text |
|
300 // string. the iterator would have been set to offset textLength if |
|
301 // a match is not found. |
|
302 m_search_->isForwardSearching = FALSE; |
|
303 if (matchindex != USEARCH_DONE) { |
|
304 return matchindex; |
|
305 } |
|
306 } |
|
307 else { |
|
308 if (offset == 0 || matchindex == 0) { |
|
309 // not enough characters to match |
|
310 setMatchNotFound(); |
|
311 return USEARCH_DONE; |
|
312 } |
|
313 } |
|
314 |
|
315 if (matchindex != USEARCH_DONE) { |
|
316 if (m_search_->isOverlap) { |
|
317 matchindex += m_search_->matchedLength - 2; |
|
318 } |
|
319 |
|
320 return handlePrev(matchindex, status); |
|
321 } |
|
322 |
|
323 return handlePrev(offset, status); |
|
324 } |
|
325 |
|
326 return USEARCH_DONE; |
|
327 } |
|
328 |
|
329 void SearchIterator::reset() |
|
330 { |
|
331 UErrorCode status = U_ZERO_ERROR; |
|
332 setMatchNotFound(); |
|
333 setOffset(0, status); |
|
334 m_search_->isOverlap = FALSE; |
|
335 m_search_->isCanonicalMatch = FALSE; |
|
336 m_search_->elementComparisonType = 0; |
|
337 m_search_->isForwardSearching = TRUE; |
|
338 m_search_->reset = TRUE; |
|
339 } |
|
340 |
|
341 // protected constructors and destructors ----------------------------- |
|
342 |
|
343 SearchIterator::SearchIterator() |
|
344 { |
|
345 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
|
346 m_search_->breakIter = NULL; |
|
347 m_search_->isOverlap = FALSE; |
|
348 m_search_->isCanonicalMatch = FALSE; |
|
349 m_search_->elementComparisonType = 0; |
|
350 m_search_->isForwardSearching = TRUE; |
|
351 m_search_->reset = TRUE; |
|
352 m_search_->matchedIndex = USEARCH_DONE; |
|
353 m_search_->matchedLength = 0; |
|
354 m_search_->text = NULL; |
|
355 m_search_->textLength = 0; |
|
356 m_breakiterator_ = NULL; |
|
357 } |
|
358 |
|
359 SearchIterator::SearchIterator(const UnicodeString &text, |
|
360 BreakIterator *breakiter) : |
|
361 m_breakiterator_(breakiter), |
|
362 m_text_(text) |
|
363 { |
|
364 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
|
365 m_search_->breakIter = NULL; |
|
366 m_search_->isOverlap = FALSE; |
|
367 m_search_->isCanonicalMatch = FALSE; |
|
368 m_search_->elementComparisonType = 0; |
|
369 m_search_->isForwardSearching = TRUE; |
|
370 m_search_->reset = TRUE; |
|
371 m_search_->matchedIndex = USEARCH_DONE; |
|
372 m_search_->matchedLength = 0; |
|
373 m_search_->text = m_text_.getBuffer(); |
|
374 m_search_->textLength = text.length(); |
|
375 } |
|
376 |
|
377 SearchIterator::SearchIterator(CharacterIterator &text, |
|
378 BreakIterator *breakiter) : |
|
379 m_breakiterator_(breakiter) |
|
380 { |
|
381 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
|
382 m_search_->breakIter = NULL; |
|
383 m_search_->isOverlap = FALSE; |
|
384 m_search_->isCanonicalMatch = FALSE; |
|
385 m_search_->elementComparisonType = 0; |
|
386 m_search_->isForwardSearching = TRUE; |
|
387 m_search_->reset = TRUE; |
|
388 m_search_->matchedIndex = USEARCH_DONE; |
|
389 m_search_->matchedLength = 0; |
|
390 text.getText(m_text_); |
|
391 m_search_->text = m_text_.getBuffer(); |
|
392 m_search_->textLength = m_text_.length(); |
|
393 m_breakiterator_ = breakiter; |
|
394 } |
|
395 |
|
396 // protected methods ------------------------------------------------------ |
|
397 |
|
398 SearchIterator & SearchIterator::operator=(const SearchIterator &that) |
|
399 { |
|
400 if (this != &that) { |
|
401 m_breakiterator_ = that.m_breakiterator_; |
|
402 m_text_ = that.m_text_; |
|
403 m_search_->breakIter = that.m_search_->breakIter; |
|
404 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; |
|
405 m_search_->isOverlap = that.m_search_->isOverlap; |
|
406 m_search_->elementComparisonType = that.m_search_->elementComparisonType; |
|
407 m_search_->matchedIndex = that.m_search_->matchedIndex; |
|
408 m_search_->matchedLength = that.m_search_->matchedLength; |
|
409 m_search_->text = that.m_search_->text; |
|
410 m_search_->textLength = that.m_search_->textLength; |
|
411 } |
|
412 return *this; |
|
413 } |
|
414 |
|
415 void SearchIterator::setMatchLength(int32_t length) |
|
416 { |
|
417 m_search_->matchedLength = length; |
|
418 } |
|
419 |
|
420 void SearchIterator::setMatchStart(int32_t position) |
|
421 { |
|
422 m_search_->matchedIndex = position; |
|
423 } |
|
424 |
|
425 void SearchIterator::setMatchNotFound() |
|
426 { |
|
427 setMatchStart(USEARCH_DONE); |
|
428 setMatchLength(0); |
|
429 UErrorCode status = U_ZERO_ERROR; |
|
430 // by default no errors should be returned here since offsets are within |
|
431 // range. |
|
432 if (m_search_->isForwardSearching) { |
|
433 setOffset(m_search_->textLength, status); |
|
434 } |
|
435 else { |
|
436 setOffset(0, status); |
|
437 } |
|
438 } |
|
439 |
|
440 |
|
441 U_NAMESPACE_END |
|
442 |
|
443 #endif /* #if !UCONFIG_NO_COLLATION */ |