Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (C) 2001-2011 IBM and others. All rights reserved. |
michael@0 | 4 | ********************************************************************** |
michael@0 | 5 | * Date Name Description |
michael@0 | 6 | * 03/22/2000 helena Creation. |
michael@0 | 7 | ********************************************************************** |
michael@0 | 8 | */ |
michael@0 | 9 | |
michael@0 | 10 | #ifndef SEARCH_H |
michael@0 | 11 | #define SEARCH_H |
michael@0 | 12 | |
michael@0 | 13 | #include "unicode/utypes.h" |
michael@0 | 14 | |
michael@0 | 15 | /** |
michael@0 | 16 | * \file |
michael@0 | 17 | * \brief C++ API: SearchIterator object. |
michael@0 | 18 | */ |
michael@0 | 19 | |
michael@0 | 20 | #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
michael@0 | 21 | |
michael@0 | 22 | #include "unicode/uobject.h" |
michael@0 | 23 | #include "unicode/unistr.h" |
michael@0 | 24 | #include "unicode/chariter.h" |
michael@0 | 25 | #include "unicode/brkiter.h" |
michael@0 | 26 | #include "unicode/usearch.h" |
michael@0 | 27 | |
michael@0 | 28 | /** |
michael@0 | 29 | * @stable ICU 2.0 |
michael@0 | 30 | */ |
michael@0 | 31 | struct USearch; |
michael@0 | 32 | /** |
michael@0 | 33 | * @stable ICU 2.0 |
michael@0 | 34 | */ |
michael@0 | 35 | typedef struct USearch USearch; |
michael@0 | 36 | |
michael@0 | 37 | U_NAMESPACE_BEGIN |
michael@0 | 38 | |
michael@0 | 39 | /** |
michael@0 | 40 | * |
michael@0 | 41 | * <tt>SearchIterator</tt> is an abstract base class that provides |
michael@0 | 42 | * methods to search for a pattern within a text string. Instances of |
michael@0 | 43 | * <tt>SearchIterator</tt> maintain a current position and scans over the |
michael@0 | 44 | * target text, returning the indices the pattern is matched and the length |
michael@0 | 45 | * of each match. |
michael@0 | 46 | * <p> |
michael@0 | 47 | * <tt>SearchIterator</tt> defines a protocol for text searching. |
michael@0 | 48 | * Subclasses provide concrete implementations of various search algorithms. |
michael@0 | 49 | * For example, <tt>StringSearch</tt> implements language-sensitive pattern |
michael@0 | 50 | * matching based on the comparison rules defined in a |
michael@0 | 51 | * <tt>RuleBasedCollator</tt> object. |
michael@0 | 52 | * <p> |
michael@0 | 53 | * Other options for searching includes using a BreakIterator to restrict |
michael@0 | 54 | * the points at which matches are detected. |
michael@0 | 55 | * <p> |
michael@0 | 56 | * <tt>SearchIterator</tt> provides an API that is similar to that of |
michael@0 | 57 | * other text iteration classes such as <tt>BreakIterator</tt>. Using |
michael@0 | 58 | * this class, it is easy to scan through text looking for all occurances of |
michael@0 | 59 | * a given pattern. The following example uses a <tt>StringSearch</tt> |
michael@0 | 60 | * object to find all instances of "fox" in the target string. Any other |
michael@0 | 61 | * subclass of <tt>SearchIterator</tt> can be used in an identical |
michael@0 | 62 | * manner. |
michael@0 | 63 | * <pre><code> |
michael@0 | 64 | * UnicodeString target("The quick brown fox jumped over the lazy fox"); |
michael@0 | 65 | * UnicodeString pattern("fox"); |
michael@0 | 66 | * |
michael@0 | 67 | * SearchIterator *iter = new StringSearch(pattern, target); |
michael@0 | 68 | * UErrorCode error = U_ZERO_ERROR; |
michael@0 | 69 | * for (int pos = iter->first(error); pos != USEARCH_DONE; |
michael@0 | 70 | * pos = iter->next(error)) { |
michael@0 | 71 | * printf("Found match at %d pos, length is %d\n", pos, |
michael@0 | 72 | * iter.getMatchLength()); |
michael@0 | 73 | * } |
michael@0 | 74 | * </code></pre> |
michael@0 | 75 | * |
michael@0 | 76 | * @see StringSearch |
michael@0 | 77 | * @see RuleBasedCollator |
michael@0 | 78 | */ |
michael@0 | 79 | class U_I18N_API SearchIterator : public UObject { |
michael@0 | 80 | |
michael@0 | 81 | public: |
michael@0 | 82 | |
michael@0 | 83 | // public constructors and destructors ------------------------------- |
michael@0 | 84 | |
michael@0 | 85 | /** |
michael@0 | 86 | * Copy constructor that creates a SearchIterator instance with the same |
michael@0 | 87 | * behavior, and iterating over the same text. |
michael@0 | 88 | * @param other the SearchIterator instance to be copied. |
michael@0 | 89 | * @stable ICU 2.0 |
michael@0 | 90 | */ |
michael@0 | 91 | SearchIterator(const SearchIterator &other); |
michael@0 | 92 | |
michael@0 | 93 | /** |
michael@0 | 94 | * Destructor. Cleans up the search iterator data struct. |
michael@0 | 95 | * @stable ICU 2.0 |
michael@0 | 96 | */ |
michael@0 | 97 | virtual ~SearchIterator(); |
michael@0 | 98 | |
michael@0 | 99 | // public get and set methods ---------------------------------------- |
michael@0 | 100 | |
michael@0 | 101 | /** |
michael@0 | 102 | * Sets the index to point to the given position, and clears any state |
michael@0 | 103 | * that's affected. |
michael@0 | 104 | * <p> |
michael@0 | 105 | * This method takes the argument index and sets the position in the text |
michael@0 | 106 | * string accordingly without checking if the index is pointing to a |
michael@0 | 107 | * valid starting point to begin searching. |
michael@0 | 108 | * @param position within the text to be set. If position is less |
michael@0 | 109 | * than or greater than the text range for searching, |
michael@0 | 110 | * an U_INDEX_OUTOFBOUNDS_ERROR will be returned |
michael@0 | 111 | * @param status for errors if it occurs |
michael@0 | 112 | * @stable ICU 2.0 |
michael@0 | 113 | */ |
michael@0 | 114 | virtual void setOffset(int32_t position, UErrorCode &status) = 0; |
michael@0 | 115 | |
michael@0 | 116 | /** |
michael@0 | 117 | * Return the current index in the text being searched. |
michael@0 | 118 | * If the iteration has gone past the end of the text |
michael@0 | 119 | * (or past the beginning for a backwards search), USEARCH_DONE |
michael@0 | 120 | * is returned. |
michael@0 | 121 | * @return current index in the text being searched. |
michael@0 | 122 | * @stable ICU 2.0 |
michael@0 | 123 | */ |
michael@0 | 124 | virtual int32_t getOffset(void) const = 0; |
michael@0 | 125 | |
michael@0 | 126 | /** |
michael@0 | 127 | * Sets the text searching attributes located in the enum |
michael@0 | 128 | * USearchAttribute with values from the enum USearchAttributeValue. |
michael@0 | 129 | * USEARCH_DEFAULT can be used for all attributes for resetting. |
michael@0 | 130 | * @param attribute text attribute (enum USearchAttribute) to be set |
michael@0 | 131 | * @param value text attribute value |
michael@0 | 132 | * @param status for errors if it occurs |
michael@0 | 133 | * @stable ICU 2.0 |
michael@0 | 134 | */ |
michael@0 | 135 | void setAttribute(USearchAttribute attribute, |
michael@0 | 136 | USearchAttributeValue value, |
michael@0 | 137 | UErrorCode &status); |
michael@0 | 138 | |
michael@0 | 139 | /** |
michael@0 | 140 | * Gets the text searching attributes |
michael@0 | 141 | * @param attribute text attribute (enum USearchAttribute) to be retrieve |
michael@0 | 142 | * @return text attribute value |
michael@0 | 143 | * @stable ICU 2.0 |
michael@0 | 144 | */ |
michael@0 | 145 | USearchAttributeValue getAttribute(USearchAttribute attribute) const; |
michael@0 | 146 | |
michael@0 | 147 | /** |
michael@0 | 148 | * Returns the index to the match in the text string that was searched. |
michael@0 | 149 | * This call returns a valid result only after a successful call to |
michael@0 | 150 | * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. |
michael@0 | 151 | * Just after construction, or after a searching method returns |
michael@0 | 152 | * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>. |
michael@0 | 153 | * <p> |
michael@0 | 154 | * Use getMatchedLength to get the matched string length. |
michael@0 | 155 | * @return index of a substring within the text string that is being |
michael@0 | 156 | * searched. |
michael@0 | 157 | * @see #first |
michael@0 | 158 | * @see #next |
michael@0 | 159 | * @see #previous |
michael@0 | 160 | * @see #last |
michael@0 | 161 | * @stable ICU 2.0 |
michael@0 | 162 | */ |
michael@0 | 163 | int32_t getMatchedStart(void) const; |
michael@0 | 164 | |
michael@0 | 165 | /** |
michael@0 | 166 | * Returns the length of text in the string which matches the search |
michael@0 | 167 | * pattern. This call returns a valid result only after a successful call |
michael@0 | 168 | * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. |
michael@0 | 169 | * Just after construction, or after a searching method returns |
michael@0 | 170 | * <tt>USEARCH_DONE</tt>, this method will return 0. |
michael@0 | 171 | * @return The length of the match in the target text, or 0 if there |
michael@0 | 172 | * is no match currently. |
michael@0 | 173 | * @see #first |
michael@0 | 174 | * @see #next |
michael@0 | 175 | * @see #previous |
michael@0 | 176 | * @see #last |
michael@0 | 177 | * @stable ICU 2.0 |
michael@0 | 178 | */ |
michael@0 | 179 | int32_t getMatchedLength(void) const; |
michael@0 | 180 | |
michael@0 | 181 | /** |
michael@0 | 182 | * Returns the text that was matched by the most recent call to |
michael@0 | 183 | * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. |
michael@0 | 184 | * If the iterator is not pointing at a valid match (e.g. just after |
michael@0 | 185 | * construction or after <tt>USEARCH_DONE</tt> has been returned, |
michael@0 | 186 | * returns an empty string. |
michael@0 | 187 | * @param result stores the matched string or an empty string if a match |
michael@0 | 188 | * is not found. |
michael@0 | 189 | * @see #first |
michael@0 | 190 | * @see #next |
michael@0 | 191 | * @see #previous |
michael@0 | 192 | * @see #last |
michael@0 | 193 | * @stable ICU 2.0 |
michael@0 | 194 | */ |
michael@0 | 195 | void getMatchedText(UnicodeString &result) const; |
michael@0 | 196 | |
michael@0 | 197 | /** |
michael@0 | 198 | * Set the BreakIterator that will be used to restrict the points |
michael@0 | 199 | * at which matches are detected. The user is responsible for deleting |
michael@0 | 200 | * the breakiterator. |
michael@0 | 201 | * @param breakiter A BreakIterator that will be used to restrict the |
michael@0 | 202 | * points at which matches are detected. If a match is |
michael@0 | 203 | * found, but the match's start or end index is not a |
michael@0 | 204 | * boundary as determined by the <tt>BreakIterator</tt>, |
michael@0 | 205 | * the match will be rejected and another will be searched |
michael@0 | 206 | * for. If this parameter is <tt>NULL</tt>, no break |
michael@0 | 207 | * detection is attempted. |
michael@0 | 208 | * @param status for errors if it occurs |
michael@0 | 209 | * @see BreakIterator |
michael@0 | 210 | * @stable ICU 2.0 |
michael@0 | 211 | */ |
michael@0 | 212 | void setBreakIterator(BreakIterator *breakiter, UErrorCode &status); |
michael@0 | 213 | |
michael@0 | 214 | /** |
michael@0 | 215 | * Returns the BreakIterator that is used to restrict the points at |
michael@0 | 216 | * which matches are detected. This will be the same object that was |
michael@0 | 217 | * passed to the constructor or to <tt>setBreakIterator</tt>. |
michael@0 | 218 | * Note that <tt>NULL</tt> is a legal value; it means that break |
michael@0 | 219 | * detection should not be attempted. |
michael@0 | 220 | * @return BreakIterator used to restrict matchings. |
michael@0 | 221 | * @see #setBreakIterator |
michael@0 | 222 | * @stable ICU 2.0 |
michael@0 | 223 | */ |
michael@0 | 224 | const BreakIterator * getBreakIterator(void) const; |
michael@0 | 225 | |
michael@0 | 226 | /** |
michael@0 | 227 | * Set the string text to be searched. Text iteration will hence begin at |
michael@0 | 228 | * the start of the text string. This method is useful if you want to |
michael@0 | 229 | * re-use an iterator to search for the same pattern within a different |
michael@0 | 230 | * body of text. The user is responsible for deleting the text. |
michael@0 | 231 | * @param text string to be searched. |
michael@0 | 232 | * @param status for errors. If the text length is 0, |
michael@0 | 233 | * an U_ILLEGAL_ARGUMENT_ERROR is returned. |
michael@0 | 234 | * @stable ICU 2.0 |
michael@0 | 235 | */ |
michael@0 | 236 | virtual void setText(const UnicodeString &text, UErrorCode &status); |
michael@0 | 237 | |
michael@0 | 238 | /** |
michael@0 | 239 | * Set the string text to be searched. Text iteration will hence begin at |
michael@0 | 240 | * the start of the text string. This method is useful if you want to |
michael@0 | 241 | * re-use an iterator to search for the same pattern within a different |
michael@0 | 242 | * body of text. |
michael@0 | 243 | * <p> |
michael@0 | 244 | * Note: No parsing of the text within the <tt>CharacterIterator</tt> |
michael@0 | 245 | * will be done during searching for this version. The block of text |
michael@0 | 246 | * in <tt>CharacterIterator</tt> will be used as it is. |
michael@0 | 247 | * The user is responsible for deleting the text. |
michael@0 | 248 | * @param text string iterator to be searched. |
michael@0 | 249 | * @param status for errors if any. If the text length is 0 then an |
michael@0 | 250 | * U_ILLEGAL_ARGUMENT_ERROR is returned. |
michael@0 | 251 | * @stable ICU 2.0 |
michael@0 | 252 | */ |
michael@0 | 253 | virtual void setText(CharacterIterator &text, UErrorCode &status); |
michael@0 | 254 | |
michael@0 | 255 | /** |
michael@0 | 256 | * Return the string text to be searched. |
michael@0 | 257 | * @return text string to be searched. |
michael@0 | 258 | * @stable ICU 2.0 |
michael@0 | 259 | */ |
michael@0 | 260 | const UnicodeString & getText(void) const; |
michael@0 | 261 | |
michael@0 | 262 | // operator overloading ---------------------------------------------- |
michael@0 | 263 | |
michael@0 | 264 | /** |
michael@0 | 265 | * Equality operator. |
michael@0 | 266 | * @param that SearchIterator instance to be compared. |
michael@0 | 267 | * @return TRUE if both BreakIterators are of the same class, have the |
michael@0 | 268 | * same behavior, terates over the same text and have the same |
michael@0 | 269 | * attributes. FALSE otherwise. |
michael@0 | 270 | * @stable ICU 2.0 |
michael@0 | 271 | */ |
michael@0 | 272 | virtual UBool operator==(const SearchIterator &that) const; |
michael@0 | 273 | |
michael@0 | 274 | /** |
michael@0 | 275 | * Not-equal operator. |
michael@0 | 276 | * @param that SearchIterator instance to be compared. |
michael@0 | 277 | * @return FALSE if operator== returns TRUE, and vice versa. |
michael@0 | 278 | * @stable ICU 2.0 |
michael@0 | 279 | */ |
michael@0 | 280 | UBool operator!=(const SearchIterator &that) const; |
michael@0 | 281 | |
michael@0 | 282 | // public methods ---------------------------------------------------- |
michael@0 | 283 | |
michael@0 | 284 | /** |
michael@0 | 285 | * Returns a copy of SearchIterator with the same behavior, and |
michael@0 | 286 | * iterating over the same text, as this one. Note that all data will be |
michael@0 | 287 | * replicated, except for the text string to be searched. |
michael@0 | 288 | * @return cloned object |
michael@0 | 289 | * @stable ICU 2.0 |
michael@0 | 290 | */ |
michael@0 | 291 | virtual SearchIterator* safeClone(void) const = 0; |
michael@0 | 292 | |
michael@0 | 293 | /** |
michael@0 | 294 | * Returns the first index at which the string text matches the search |
michael@0 | 295 | * pattern. The iterator is adjusted so that its current index (as |
michael@0 | 296 | * returned by <tt>getOffset</tt>) is the match position if one |
michael@0 | 297 | * was found. |
michael@0 | 298 | * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and |
michael@0 | 299 | * the iterator will be adjusted to the index USEARCH_DONE |
michael@0 | 300 | * @param status for errors if it occurs |
michael@0 | 301 | * @return The character index of the first match, or |
michael@0 | 302 | * <tt>USEARCH_DONE</tt> if there are no matches. |
michael@0 | 303 | * @see #getOffset |
michael@0 | 304 | * @stable ICU 2.0 |
michael@0 | 305 | */ |
michael@0 | 306 | int32_t first(UErrorCode &status); |
michael@0 | 307 | |
michael@0 | 308 | /** |
michael@0 | 309 | * Returns the first index equal or greater than <tt>position</tt> at which the |
michael@0 | 310 | * string text matches the search pattern. The iterator is adjusted so |
michael@0 | 311 | * that its current index (as returned by <tt>getOffset</tt>) is the |
michael@0 | 312 | * match position if one was found. |
michael@0 | 313 | * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and the |
michael@0 | 314 | * iterator will be adjusted to the index <tt>USEARCH_DONE</tt>. |
michael@0 | 315 | * @param position where search if to start from. If position is less |
michael@0 | 316 | * than or greater than the text range for searching, |
michael@0 | 317 | * an U_INDEX_OUTOFBOUNDS_ERROR will be returned |
michael@0 | 318 | * @param status for errors if it occurs |
michael@0 | 319 | * @return The character index of the first match following |
michael@0 | 320 | * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no |
michael@0 | 321 | * matches. |
michael@0 | 322 | * @see #getOffset |
michael@0 | 323 | * @stable ICU 2.0 |
michael@0 | 324 | */ |
michael@0 | 325 | int32_t following(int32_t position, UErrorCode &status); |
michael@0 | 326 | |
michael@0 | 327 | /** |
michael@0 | 328 | * Returns the last index in the target text at which it matches the |
michael@0 | 329 | * search pattern. The iterator is adjusted so that its current index |
michael@0 | 330 | * (as returned by <tt>getOffset</tt>) is the match position if one was |
michael@0 | 331 | * found. |
michael@0 | 332 | * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and |
michael@0 | 333 | * the iterator will be adjusted to the index USEARCH_DONE. |
michael@0 | 334 | * @param status for errors if it occurs |
michael@0 | 335 | * @return The index of the first match, or <tt>USEARCH_DONE</tt> if |
michael@0 | 336 | * there are no matches. |
michael@0 | 337 | * @see #getOffset |
michael@0 | 338 | * @stable ICU 2.0 |
michael@0 | 339 | */ |
michael@0 | 340 | int32_t last(UErrorCode &status); |
michael@0 | 341 | |
michael@0 | 342 | /** |
michael@0 | 343 | * Returns the first index less than <tt>position</tt> at which the string |
michael@0 | 344 | * text matches the search pattern. The iterator is adjusted so that its |
michael@0 | 345 | * current index (as returned by <tt>getOffset</tt>) is the match |
michael@0 | 346 | * position if one was found. If a match is not found, |
michael@0 | 347 | * <tt>USEARCH_DONE</tt> will be returned and the iterator will be |
michael@0 | 348 | * adjusted to the index USEARCH_DONE |
michael@0 | 349 | * <p> |
michael@0 | 350 | * When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the |
michael@0 | 351 | * result match is always less than <tt>position</tt>. |
michael@0 | 352 | * When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across |
michael@0 | 353 | * <tt>position</tt>. |
michael@0 | 354 | * |
michael@0 | 355 | * @param position where search is to start from. If position is less |
michael@0 | 356 | * than or greater than the text range for searching, |
michael@0 | 357 | * an U_INDEX_OUTOFBOUNDS_ERROR will be returned |
michael@0 | 358 | * @param status for errors if it occurs |
michael@0 | 359 | * @return The character index of the first match preceding |
michael@0 | 360 | * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are |
michael@0 | 361 | * no matches. |
michael@0 | 362 | * @see #getOffset |
michael@0 | 363 | * @stable ICU 2.0 |
michael@0 | 364 | */ |
michael@0 | 365 | int32_t preceding(int32_t position, UErrorCode &status); |
michael@0 | 366 | |
michael@0 | 367 | /** |
michael@0 | 368 | * Returns the index of the next point at which the text matches the |
michael@0 | 369 | * search pattern, starting from the current position |
michael@0 | 370 | * The iterator is adjusted so that its current index (as returned by |
michael@0 | 371 | * <tt>getOffset</tt>) is the match position if one was found. |
michael@0 | 372 | * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and |
michael@0 | 373 | * the iterator will be adjusted to a position after the end of the text |
michael@0 | 374 | * string. |
michael@0 | 375 | * @param status for errors if it occurs |
michael@0 | 376 | * @return The index of the next match after the current position, |
michael@0 | 377 | * or <tt>USEARCH_DONE</tt> if there are no more matches. |
michael@0 | 378 | * @see #getOffset |
michael@0 | 379 | * @stable ICU 2.0 |
michael@0 | 380 | */ |
michael@0 | 381 | int32_t next(UErrorCode &status); |
michael@0 | 382 | |
michael@0 | 383 | /** |
michael@0 | 384 | * Returns the index of the previous point at which the string text |
michael@0 | 385 | * matches the search pattern, starting at the current position. |
michael@0 | 386 | * The iterator is adjusted so that its current index (as returned by |
michael@0 | 387 | * <tt>getOffset</tt>) is the match position if one was found. |
michael@0 | 388 | * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and |
michael@0 | 389 | * the iterator will be adjusted to the index USEARCH_DONE |
michael@0 | 390 | * @param status for errors if it occurs |
michael@0 | 391 | * @return The index of the previous match before the current position, |
michael@0 | 392 | * or <tt>USEARCH_DONE</tt> if there are no more matches. |
michael@0 | 393 | * @see #getOffset |
michael@0 | 394 | * @stable ICU 2.0 |
michael@0 | 395 | */ |
michael@0 | 396 | int32_t previous(UErrorCode &status); |
michael@0 | 397 | |
michael@0 | 398 | /** |
michael@0 | 399 | * Resets the iteration. |
michael@0 | 400 | * Search will begin at the start of the text string if a forward |
michael@0 | 401 | * iteration is initiated before a backwards iteration. Otherwise if a |
michael@0 | 402 | * backwards iteration is initiated before a forwards iteration, the |
michael@0 | 403 | * search will begin at the end of the text string. |
michael@0 | 404 | * @stable ICU 2.0 |
michael@0 | 405 | */ |
michael@0 | 406 | virtual void reset(); |
michael@0 | 407 | |
michael@0 | 408 | protected: |
michael@0 | 409 | // protected data members --------------------------------------------- |
michael@0 | 410 | |
michael@0 | 411 | /** |
michael@0 | 412 | * C search data struct |
michael@0 | 413 | * @stable ICU 2.0 |
michael@0 | 414 | */ |
michael@0 | 415 | USearch *m_search_; |
michael@0 | 416 | |
michael@0 | 417 | /** |
michael@0 | 418 | * Break iterator. |
michael@0 | 419 | * Currently the C++ breakiterator does not have getRules etc to reproduce |
michael@0 | 420 | * another in C. Hence we keep the original around and do the verification |
michael@0 | 421 | * at the end of the match. The user is responsible for deleting this |
michael@0 | 422 | * break iterator. |
michael@0 | 423 | * @stable ICU 2.0 |
michael@0 | 424 | */ |
michael@0 | 425 | BreakIterator *m_breakiterator_; |
michael@0 | 426 | |
michael@0 | 427 | /** |
michael@0 | 428 | * Unicode string version of the search text |
michael@0 | 429 | * @stable ICU 2.0 |
michael@0 | 430 | */ |
michael@0 | 431 | UnicodeString m_text_; |
michael@0 | 432 | |
michael@0 | 433 | // protected constructors and destructors ----------------------------- |
michael@0 | 434 | |
michael@0 | 435 | /** |
michael@0 | 436 | * Default constructor. |
michael@0 | 437 | * Initializes data to the default values. |
michael@0 | 438 | * @stable ICU 2.0 |
michael@0 | 439 | */ |
michael@0 | 440 | SearchIterator(); |
michael@0 | 441 | |
michael@0 | 442 | /** |
michael@0 | 443 | * Constructor for use by subclasses. |
michael@0 | 444 | * @param text The target text to be searched. |
michael@0 | 445 | * @param breakiter A {@link BreakIterator} that is used to restrict the |
michael@0 | 446 | * points at which matches are detected. If |
michael@0 | 447 | * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a |
michael@0 | 448 | * match, but the match's start or end index is not a |
michael@0 | 449 | * boundary as determined by the <tt>BreakIterator</tt>, |
michael@0 | 450 | * the match is rejected and <tt>handleNext</tt> or |
michael@0 | 451 | * <tt>handlePrev</tt> is called again. If this parameter |
michael@0 | 452 | * is <tt>NULL</tt>, no break detection is attempted. |
michael@0 | 453 | * @see #handleNext |
michael@0 | 454 | * @see #handlePrev |
michael@0 | 455 | * @stable ICU 2.0 |
michael@0 | 456 | */ |
michael@0 | 457 | SearchIterator(const UnicodeString &text, |
michael@0 | 458 | BreakIterator *breakiter = NULL); |
michael@0 | 459 | |
michael@0 | 460 | /** |
michael@0 | 461 | * Constructor for use by subclasses. |
michael@0 | 462 | * <p> |
michael@0 | 463 | * Note: No parsing of the text within the <tt>CharacterIterator</tt> |
michael@0 | 464 | * will be done during searching for this version. The block of text |
michael@0 | 465 | * in <tt>CharacterIterator</tt> will be used as it is. |
michael@0 | 466 | * @param text The target text to be searched. |
michael@0 | 467 | * @param breakiter A {@link BreakIterator} that is used to restrict the |
michael@0 | 468 | * points at which matches are detected. If |
michael@0 | 469 | * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a |
michael@0 | 470 | * match, but the match's start or end index is not a |
michael@0 | 471 | * boundary as determined by the <tt>BreakIterator</tt>, |
michael@0 | 472 | * the match is rejected and <tt>handleNext</tt> or |
michael@0 | 473 | * <tt>handlePrev</tt> is called again. If this parameter |
michael@0 | 474 | * is <tt>NULL</tt>, no break detection is attempted. |
michael@0 | 475 | * @see #handleNext |
michael@0 | 476 | * @see #handlePrev |
michael@0 | 477 | * @stable ICU 2.0 |
michael@0 | 478 | */ |
michael@0 | 479 | SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL); |
michael@0 | 480 | |
michael@0 | 481 | // protected methods -------------------------------------------------- |
michael@0 | 482 | |
michael@0 | 483 | /** |
michael@0 | 484 | * Assignment operator. Sets this iterator to have the same behavior, |
michael@0 | 485 | * and iterate over the same text, as the one passed in. |
michael@0 | 486 | * @param that instance to be copied. |
michael@0 | 487 | * @stable ICU 2.0 |
michael@0 | 488 | */ |
michael@0 | 489 | SearchIterator & operator=(const SearchIterator &that); |
michael@0 | 490 | |
michael@0 | 491 | /** |
michael@0 | 492 | * Abstract method which subclasses override to provide the mechanism |
michael@0 | 493 | * for finding the next match in the target text. This allows different |
michael@0 | 494 | * subclasses to provide different search algorithms. |
michael@0 | 495 | * <p> |
michael@0 | 496 | * If a match is found, the implementation should return the index at |
michael@0 | 497 | * which the match starts and should call |
michael@0 | 498 | * <tt>setMatchLength</tt> with the number of characters |
michael@0 | 499 | * in the target text that make up the match. If no match is found, the |
michael@0 | 500 | * method should return USEARCH_DONE. |
michael@0 | 501 | * <p> |
michael@0 | 502 | * @param position The index in the target text at which the search |
michael@0 | 503 | * should start. |
michael@0 | 504 | * @param status for error codes if it occurs. |
michael@0 | 505 | * @return index at which the match starts, else if match is not found |
michael@0 | 506 | * USEARCH_DONE is returned |
michael@0 | 507 | * @see #setMatchLength |
michael@0 | 508 | * @stable ICU 2.0 |
michael@0 | 509 | */ |
michael@0 | 510 | virtual int32_t handleNext(int32_t position, UErrorCode &status) |
michael@0 | 511 | = 0; |
michael@0 | 512 | |
michael@0 | 513 | /** |
michael@0 | 514 | * Abstract method which subclasses override to provide the mechanism for |
michael@0 | 515 | * finding the previous match in the target text. This allows different |
michael@0 | 516 | * subclasses to provide different search algorithms. |
michael@0 | 517 | * <p> |
michael@0 | 518 | * If a match is found, the implementation should return the index at |
michael@0 | 519 | * which the match starts and should call |
michael@0 | 520 | * <tt>setMatchLength</tt> with the number of characters |
michael@0 | 521 | * in the target text that make up the match. If no match is found, the |
michael@0 | 522 | * method should return USEARCH_DONE. |
michael@0 | 523 | * <p> |
michael@0 | 524 | * @param position The index in the target text at which the search |
michael@0 | 525 | * should start. |
michael@0 | 526 | * @param status for error codes if it occurs. |
michael@0 | 527 | * @return index at which the match starts, else if match is not found |
michael@0 | 528 | * USEARCH_DONE is returned |
michael@0 | 529 | * @see #setMatchLength |
michael@0 | 530 | * @stable ICU 2.0 |
michael@0 | 531 | */ |
michael@0 | 532 | virtual int32_t handlePrev(int32_t position, UErrorCode &status) |
michael@0 | 533 | = 0; |
michael@0 | 534 | |
michael@0 | 535 | /** |
michael@0 | 536 | * Sets the length of the currently matched string in the text string to |
michael@0 | 537 | * be searched. |
michael@0 | 538 | * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> |
michael@0 | 539 | * methods should call this when they find a match in the target text. |
michael@0 | 540 | * @param length length of the matched text. |
michael@0 | 541 | * @see #handleNext |
michael@0 | 542 | * @see #handlePrev |
michael@0 | 543 | * @stable ICU 2.0 |
michael@0 | 544 | */ |
michael@0 | 545 | virtual void setMatchLength(int32_t length); |
michael@0 | 546 | |
michael@0 | 547 | /** |
michael@0 | 548 | * Sets the offset of the currently matched string in the text string to |
michael@0 | 549 | * be searched. |
michael@0 | 550 | * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> |
michael@0 | 551 | * methods should call this when they find a match in the target text. |
michael@0 | 552 | * @param position start offset of the matched text. |
michael@0 | 553 | * @see #handleNext |
michael@0 | 554 | * @see #handlePrev |
michael@0 | 555 | * @stable ICU 2.0 |
michael@0 | 556 | */ |
michael@0 | 557 | virtual void setMatchStart(int32_t position); |
michael@0 | 558 | |
michael@0 | 559 | /** |
michael@0 | 560 | * sets match not found |
michael@0 | 561 | * @stable ICU 2.0 |
michael@0 | 562 | */ |
michael@0 | 563 | void setMatchNotFound(); |
michael@0 | 564 | }; |
michael@0 | 565 | |
michael@0 | 566 | inline UBool SearchIterator::operator!=(const SearchIterator &that) const |
michael@0 | 567 | { |
michael@0 | 568 | return !operator==(that); |
michael@0 | 569 | } |
michael@0 | 570 | U_NAMESPACE_END |
michael@0 | 571 | |
michael@0 | 572 | #endif /* #if !UCONFIG_NO_COLLATION */ |
michael@0 | 573 | |
michael@0 | 574 | #endif |
michael@0 | 575 |