intl/icu/source/common/unicode/chariter.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ********************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1997-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ********************************************************************
michael@0 8 */
michael@0 9
michael@0 10 #ifndef CHARITER_H
michael@0 11 #define CHARITER_H
michael@0 12
michael@0 13 #include "unicode/utypes.h"
michael@0 14 #include "unicode/uobject.h"
michael@0 15 #include "unicode/unistr.h"
michael@0 16 /**
michael@0 17 * \file
michael@0 18 * \brief C++ API: Character Iterator
michael@0 19 */
michael@0 20
michael@0 21 U_NAMESPACE_BEGIN
michael@0 22 /**
michael@0 23 * Abstract class that defines an API for forward-only iteration
michael@0 24 * on text objects.
michael@0 25 * This is a minimal interface for iteration without random access
michael@0 26 * or backwards iteration. It is especially useful for wrapping
michael@0 27 * streams with converters into an object for collation or
michael@0 28 * normalization.
michael@0 29 *
michael@0 30 * <p>Characters can be accessed in two ways: as code units or as
michael@0 31 * code points.
michael@0 32 * Unicode code points are 21-bit integers and are the scalar values
michael@0 33 * of Unicode characters. ICU uses the type UChar32 for them.
michael@0 34 * Unicode code units are the storage units of a given
michael@0 35 * Unicode/UCS Transformation Format (a character encoding scheme).
michael@0 36 * With UTF-16, all code points can be represented with either one
michael@0 37 * or two code units ("surrogates").
michael@0 38 * String storage is typically based on code units, while properties
michael@0 39 * of characters are typically determined using code point values.
michael@0 40 * Some processes may be designed to work with sequences of code units,
michael@0 41 * or it may be known that all characters that are important to an
michael@0 42 * algorithm can be represented with single code units.
michael@0 43 * Other processes will need to use the code point access functions.</p>
michael@0 44 *
michael@0 45 * <p>ForwardCharacterIterator provides nextPostInc() to access
michael@0 46 * a code unit and advance an internal position into the text object,
michael@0 47 * similar to a <code>return text[position++]</code>.<br>
michael@0 48 * It provides next32PostInc() to access a code point and advance an internal
michael@0 49 * position.</p>
michael@0 50 *
michael@0 51 * <p>next32PostInc() assumes that the current position is that of
michael@0 52 * the beginning of a code point, i.e., of its first code unit.
michael@0 53 * After next32PostInc(), this will be true again.
michael@0 54 * In general, access to code units and code points in the same
michael@0 55 * iteration loop should not be mixed. In UTF-16, if the current position
michael@0 56 * is on a second code unit (Low Surrogate), then only that code unit
michael@0 57 * is returned even by next32PostInc().</p>
michael@0 58 *
michael@0 59 * <p>For iteration with either function, there are two ways to
michael@0 60 * check for the end of the iteration. When there are no more
michael@0 61 * characters in the text object:
michael@0 62 * <ul>
michael@0 63 * <li>The hasNext() function returns FALSE.</li>
michael@0 64 * <li>nextPostInc() and next32PostInc() return DONE
michael@0 65 * when one attempts to read beyond the end of the text object.</li>
michael@0 66 * </ul>
michael@0 67 *
michael@0 68 * Example:
michael@0 69 * \code
michael@0 70 * void function1(ForwardCharacterIterator &it) {
michael@0 71 * UChar32 c;
michael@0 72 * while(it.hasNext()) {
michael@0 73 * c=it.next32PostInc();
michael@0 74 * // use c
michael@0 75 * }
michael@0 76 * }
michael@0 77 *
michael@0 78 * void function1(ForwardCharacterIterator &it) {
michael@0 79 * UChar c;
michael@0 80 * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
michael@0 81 * // use c
michael@0 82 * }
michael@0 83 * }
michael@0 84 * \endcode
michael@0 85 * </p>
michael@0 86 *
michael@0 87 * @stable ICU 2.0
michael@0 88 */
michael@0 89 class U_COMMON_API ForwardCharacterIterator : public UObject {
michael@0 90 public:
michael@0 91 /**
michael@0 92 * Value returned by most of ForwardCharacterIterator's functions
michael@0 93 * when the iterator has reached the limits of its iteration.
michael@0 94 * @stable ICU 2.0
michael@0 95 */
michael@0 96 enum { DONE = 0xffff };
michael@0 97
michael@0 98 /**
michael@0 99 * Destructor.
michael@0 100 * @stable ICU 2.0
michael@0 101 */
michael@0 102 virtual ~ForwardCharacterIterator();
michael@0 103
michael@0 104 /**
michael@0 105 * Returns true when both iterators refer to the same
michael@0 106 * character in the same character-storage object.
michael@0 107 * @param that The ForwardCharacterIterator to be compared for equality
michael@0 108 * @return true when both iterators refer to the same
michael@0 109 * character in the same character-storage object
michael@0 110 * @stable ICU 2.0
michael@0 111 */
michael@0 112 virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
michael@0 113
michael@0 114 /**
michael@0 115 * Returns true when the iterators refer to different
michael@0 116 * text-storage objects, or to different characters in the
michael@0 117 * same text-storage object.
michael@0 118 * @param that The ForwardCharacterIterator to be compared for inequality
michael@0 119 * @return true when the iterators refer to different
michael@0 120 * text-storage objects, or to different characters in the
michael@0 121 * same text-storage object
michael@0 122 * @stable ICU 2.0
michael@0 123 */
michael@0 124 inline UBool operator!=(const ForwardCharacterIterator& that) const;
michael@0 125
michael@0 126 /**
michael@0 127 * Generates a hash code for this iterator.
michael@0 128 * @return the hash code.
michael@0 129 * @stable ICU 2.0
michael@0 130 */
michael@0 131 virtual int32_t hashCode(void) const = 0;
michael@0 132
michael@0 133 /**
michael@0 134 * Returns a UClassID for this ForwardCharacterIterator ("poor man's
michael@0 135 * RTTI").<P> Despite the fact that this function is public,
michael@0 136 * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
michael@0 137 * @return a UClassID for this ForwardCharacterIterator
michael@0 138 * @stable ICU 2.0
michael@0 139 */
michael@0 140 virtual UClassID getDynamicClassID(void) const = 0;
michael@0 141
michael@0 142 /**
michael@0 143 * Gets the current code unit for returning and advances to the next code unit
michael@0 144 * in the iteration range
michael@0 145 * (toward endIndex()). If there are
michael@0 146 * no more code units to return, returns DONE.
michael@0 147 * @return the current code unit.
michael@0 148 * @stable ICU 2.0
michael@0 149 */
michael@0 150 virtual UChar nextPostInc(void) = 0;
michael@0 151
michael@0 152 /**
michael@0 153 * Gets the current code point for returning and advances to the next code point
michael@0 154 * in the iteration range
michael@0 155 * (toward endIndex()). If there are
michael@0 156 * no more code points to return, returns DONE.
michael@0 157 * @return the current code point.
michael@0 158 * @stable ICU 2.0
michael@0 159 */
michael@0 160 virtual UChar32 next32PostInc(void) = 0;
michael@0 161
michael@0 162 /**
michael@0 163 * Returns FALSE if there are no more code units or code points
michael@0 164 * at or after the current position in the iteration range.
michael@0 165 * This is used with nextPostInc() or next32PostInc() in forward
michael@0 166 * iteration.
michael@0 167 * @returns FALSE if there are no more code units or code points
michael@0 168 * at or after the current position in the iteration range.
michael@0 169 * @stable ICU 2.0
michael@0 170 */
michael@0 171 virtual UBool hasNext() = 0;
michael@0 172
michael@0 173 protected:
michael@0 174 /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
michael@0 175 ForwardCharacterIterator();
michael@0 176
michael@0 177 /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
michael@0 178 ForwardCharacterIterator(const ForwardCharacterIterator &other);
michael@0 179
michael@0 180 /**
michael@0 181 * Assignment operator to be overridden in the implementing class.
michael@0 182 * @stable ICU 2.0
michael@0 183 */
michael@0 184 ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
michael@0 185 };
michael@0 186
michael@0 187 /**
michael@0 188 * Abstract class that defines an API for iteration
michael@0 189 * on text objects.
michael@0 190 * This is an interface for forward and backward iteration
michael@0 191 * and random access into a text object.
michael@0 192 *
michael@0 193 * <p>The API provides backward compatibility to the Java and older ICU
michael@0 194 * CharacterIterator classes but extends them significantly:
michael@0 195 * <ol>
michael@0 196 * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
michael@0 197 * <li>While the old API functions provided forward iteration with
michael@0 198 * "pre-increment" semantics, the new one also provides functions
michael@0 199 * with "post-increment" semantics. They are more efficient and should
michael@0 200 * be the preferred iterator functions for new implementations.
michael@0 201 * The backward iteration always had "pre-decrement" semantics, which
michael@0 202 * are efficient.</li>
michael@0 203 * <li>Just like ForwardCharacterIterator, it provides access to
michael@0 204 * both code units and code points. Code point access versions are available
michael@0 205 * for the old and the new iteration semantics.</li>
michael@0 206 * <li>There are new functions for setting and moving the current position
michael@0 207 * without returning a character, for efficiency.</li>
michael@0 208 * </ol>
michael@0 209 *
michael@0 210 * See ForwardCharacterIterator for examples for using the new forward iteration
michael@0 211 * functions. For backward iteration, there is also a hasPrevious() function
michael@0 212 * that can be used analogously to hasNext().
michael@0 213 * The old functions work as before and are shown below.</p>
michael@0 214 *
michael@0 215 * <p>Examples for some of the new functions:</p>
michael@0 216 *
michael@0 217 * Forward iteration with hasNext():
michael@0 218 * \code
michael@0 219 * void forward1(CharacterIterator &it) {
michael@0 220 * UChar32 c;
michael@0 221 * for(it.setToStart(); it.hasNext();) {
michael@0 222 * c=it.next32PostInc();
michael@0 223 * // use c
michael@0 224 * }
michael@0 225 * }
michael@0 226 * \endcode
michael@0 227 * Forward iteration more similar to loops with the old forward iteration,
michael@0 228 * showing a way to convert simple for() loops:
michael@0 229 * \code
michael@0 230 * void forward2(CharacterIterator &it) {
michael@0 231 * UChar c;
michael@0 232 * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
michael@0 233 * // use c
michael@0 234 * }
michael@0 235 * }
michael@0 236 * \endcode
michael@0 237 * Backward iteration with setToEnd() and hasPrevious():
michael@0 238 * \code
michael@0 239 * void backward1(CharacterIterator &it) {
michael@0 240 * UChar32 c;
michael@0 241 * for(it.setToEnd(); it.hasPrevious();) {
michael@0 242 * c=it.previous32();
michael@0 243 * // use c
michael@0 244 * }
michael@0 245 * }
michael@0 246 * \endcode
michael@0 247 * Backward iteration with a more traditional for() loop:
michael@0 248 * \code
michael@0 249 * void backward2(CharacterIterator &it) {
michael@0 250 * UChar c;
michael@0 251 * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
michael@0 252 * // use c
michael@0 253 * }
michael@0 254 * }
michael@0 255 * \endcode
michael@0 256 *
michael@0 257 * Example for random access:
michael@0 258 * \code
michael@0 259 * void random(CharacterIterator &it) {
michael@0 260 * // set to the third code point from the beginning
michael@0 261 * it.move32(3, CharacterIterator::kStart);
michael@0 262 * // get a code point from here without moving the position
michael@0 263 * UChar32 c=it.current32();
michael@0 264 * // get the position
michael@0 265 * int32_t pos=it.getIndex();
michael@0 266 * // get the previous code unit
michael@0 267 * UChar u=it.previous();
michael@0 268 * // move back one more code unit
michael@0 269 * it.move(-1, CharacterIterator::kCurrent);
michael@0 270 * // set the position back to where it was
michael@0 271 * // and read the same code point c and move beyond it
michael@0 272 * it.setIndex(pos);
michael@0 273 * if(c!=it.next32PostInc()) {
michael@0 274 * exit(1); // CharacterIterator inconsistent
michael@0 275 * }
michael@0 276 * }
michael@0 277 * \endcode
michael@0 278 *
michael@0 279 * <p>Examples, especially for the old API:</p>
michael@0 280 *
michael@0 281 * Function processing characters, in this example simple output
michael@0 282 * <pre>
michael@0 283 * \code
michael@0 284 * void processChar( UChar c )
michael@0 285 * {
michael@0 286 * cout << " " << c;
michael@0 287 * }
michael@0 288 * \endcode
michael@0 289 * </pre>
michael@0 290 * Traverse the text from start to finish
michael@0 291 * <pre>
michael@0 292 * \code
michael@0 293 * void traverseForward(CharacterIterator& iter)
michael@0 294 * {
michael@0 295 * for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
michael@0 296 * processChar(c);
michael@0 297 * }
michael@0 298 * }
michael@0 299 * \endcode
michael@0 300 * </pre>
michael@0 301 * Traverse the text backwards, from end to start
michael@0 302 * <pre>
michael@0 303 * \code
michael@0 304 * void traverseBackward(CharacterIterator& iter)
michael@0 305 * {
michael@0 306 * for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
michael@0 307 * processChar(c);
michael@0 308 * }
michael@0 309 * }
michael@0 310 * \endcode
michael@0 311 * </pre>
michael@0 312 * Traverse both forward and backward from a given position in the text.
michael@0 313 * Calls to notBoundary() in this example represents some additional stopping criteria.
michael@0 314 * <pre>
michael@0 315 * \code
michael@0 316 * void traverseOut(CharacterIterator& iter, int32_t pos)
michael@0 317 * {
michael@0 318 * UChar c;
michael@0 319 * for (c = iter.setIndex(pos);
michael@0 320 * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
michael@0 321 * c = iter.next()) {}
michael@0 322 * int32_t end = iter.getIndex();
michael@0 323 * for (c = iter.setIndex(pos);
michael@0 324 * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
michael@0 325 * c = iter.previous()) {}
michael@0 326 * int32_t start = iter.getIndex() + 1;
michael@0 327 *
michael@0 328 * cout << "start: " << start << " end: " << end << endl;
michael@0 329 * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
michael@0 330 * processChar(c);
michael@0 331 * }
michael@0 332 * }
michael@0 333 * \endcode
michael@0 334 * </pre>
michael@0 335 * Creating a StringCharacterIterator and calling the test functions
michael@0 336 * <pre>
michael@0 337 * \code
michael@0 338 * void CharacterIterator_Example( void )
michael@0 339 * {
michael@0 340 * cout << endl << "===== CharacterIterator_Example: =====" << endl;
michael@0 341 * UnicodeString text("Ein kleiner Satz.");
michael@0 342 * StringCharacterIterator iterator(text);
michael@0 343 * cout << "----- traverseForward: -----------" << endl;
michael@0 344 * traverseForward( iterator );
michael@0 345 * cout << endl << endl << "----- traverseBackward: ----------" << endl;
michael@0 346 * traverseBackward( iterator );
michael@0 347 * cout << endl << endl << "----- traverseOut: ---------------" << endl;
michael@0 348 * traverseOut( iterator, 7 );
michael@0 349 * cout << endl << endl << "-----" << endl;
michael@0 350 * }
michael@0 351 * \endcode
michael@0 352 * </pre>
michael@0 353 *
michael@0 354 * @stable ICU 2.0
michael@0 355 */
michael@0 356 class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
michael@0 357 public:
michael@0 358 /**
michael@0 359 * Origin enumeration for the move() and move32() functions.
michael@0 360 * @stable ICU 2.0
michael@0 361 */
michael@0 362 enum EOrigin { kStart, kCurrent, kEnd };
michael@0 363
michael@0 364 /**
michael@0 365 * Destructor.
michael@0 366 * @stable ICU 2.0
michael@0 367 */
michael@0 368 virtual ~CharacterIterator();
michael@0 369
michael@0 370 /**
michael@0 371 * Returns a pointer to a new CharacterIterator of the same
michael@0 372 * concrete class as this one, and referring to the same
michael@0 373 * character in the same text-storage object as this one. The
michael@0 374 * caller is responsible for deleting the new clone.
michael@0 375 * @return a pointer to a new CharacterIterator
michael@0 376 * @stable ICU 2.0
michael@0 377 */
michael@0 378 virtual CharacterIterator* clone(void) const = 0;
michael@0 379
michael@0 380 /**
michael@0 381 * Sets the iterator to refer to the first code unit in its
michael@0 382 * iteration range, and returns that code unit.
michael@0 383 * This can be used to begin an iteration with next().
michael@0 384 * @return the first code unit in its iteration range.
michael@0 385 * @stable ICU 2.0
michael@0 386 */
michael@0 387 virtual UChar first(void) = 0;
michael@0 388
michael@0 389 /**
michael@0 390 * Sets the iterator to refer to the first code unit in its
michael@0 391 * iteration range, returns that code unit, and moves the position
michael@0 392 * to the second code unit. This is an alternative to setToStart()
michael@0 393 * for forward iteration with nextPostInc().
michael@0 394 * @return the first code unit in its iteration range.
michael@0 395 * @stable ICU 2.0
michael@0 396 */
michael@0 397 virtual UChar firstPostInc(void);
michael@0 398
michael@0 399 /**
michael@0 400 * Sets the iterator to refer to the first code point in its
michael@0 401 * iteration range, and returns that code unit,
michael@0 402 * This can be used to begin an iteration with next32().
michael@0 403 * Note that an iteration with next32PostInc(), beginning with,
michael@0 404 * e.g., setToStart() or firstPostInc(), is more efficient.
michael@0 405 * @return the first code point in its iteration range.
michael@0 406 * @stable ICU 2.0
michael@0 407 */
michael@0 408 virtual UChar32 first32(void) = 0;
michael@0 409
michael@0 410 /**
michael@0 411 * Sets the iterator to refer to the first code point in its
michael@0 412 * iteration range, returns that code point, and moves the position
michael@0 413 * to the second code point. This is an alternative to setToStart()
michael@0 414 * for forward iteration with next32PostInc().
michael@0 415 * @return the first code point in its iteration range.
michael@0 416 * @stable ICU 2.0
michael@0 417 */
michael@0 418 virtual UChar32 first32PostInc(void);
michael@0 419
michael@0 420 /**
michael@0 421 * Sets the iterator to refer to the first code unit or code point in its
michael@0 422 * iteration range. This can be used to begin a forward
michael@0 423 * iteration with nextPostInc() or next32PostInc().
michael@0 424 * @return the start position of the iteration range
michael@0 425 * @stable ICU 2.0
michael@0 426 */
michael@0 427 inline int32_t setToStart();
michael@0 428
michael@0 429 /**
michael@0 430 * Sets the iterator to refer to the last code unit in its
michael@0 431 * iteration range, and returns that code unit.
michael@0 432 * This can be used to begin an iteration with previous().
michael@0 433 * @return the last code unit.
michael@0 434 * @stable ICU 2.0
michael@0 435 */
michael@0 436 virtual UChar last(void) = 0;
michael@0 437
michael@0 438 /**
michael@0 439 * Sets the iterator to refer to the last code point in its
michael@0 440 * iteration range, and returns that code unit.
michael@0 441 * This can be used to begin an iteration with previous32().
michael@0 442 * @return the last code point.
michael@0 443 * @stable ICU 2.0
michael@0 444 */
michael@0 445 virtual UChar32 last32(void) = 0;
michael@0 446
michael@0 447 /**
michael@0 448 * Sets the iterator to the end of its iteration range, just behind
michael@0 449 * the last code unit or code point. This can be used to begin a backward
michael@0 450 * iteration with previous() or previous32().
michael@0 451 * @return the end position of the iteration range
michael@0 452 * @stable ICU 2.0
michael@0 453 */
michael@0 454 inline int32_t setToEnd();
michael@0 455
michael@0 456 /**
michael@0 457 * Sets the iterator to refer to the "position"-th code unit
michael@0 458 * in the text-storage object the iterator refers to, and
michael@0 459 * returns that code unit.
michael@0 460 * @param position the "position"-th code unit in the text-storage object
michael@0 461 * @return the "position"-th code unit.
michael@0 462 * @stable ICU 2.0
michael@0 463 */
michael@0 464 virtual UChar setIndex(int32_t position) = 0;
michael@0 465
michael@0 466 /**
michael@0 467 * Sets the iterator to refer to the beginning of the code point
michael@0 468 * that contains the "position"-th code unit
michael@0 469 * in the text-storage object the iterator refers to, and
michael@0 470 * returns that code point.
michael@0 471 * The current position is adjusted to the beginning of the code point
michael@0 472 * (its first code unit).
michael@0 473 * @param position the "position"-th code unit in the text-storage object
michael@0 474 * @return the "position"-th code point.
michael@0 475 * @stable ICU 2.0
michael@0 476 */
michael@0 477 virtual UChar32 setIndex32(int32_t position) = 0;
michael@0 478
michael@0 479 /**
michael@0 480 * Returns the code unit the iterator currently refers to.
michael@0 481 * @return the current code unit.
michael@0 482 * @stable ICU 2.0
michael@0 483 */
michael@0 484 virtual UChar current(void) const = 0;
michael@0 485
michael@0 486 /**
michael@0 487 * Returns the code point the iterator currently refers to.
michael@0 488 * @return the current code point.
michael@0 489 * @stable ICU 2.0
michael@0 490 */
michael@0 491 virtual UChar32 current32(void) const = 0;
michael@0 492
michael@0 493 /**
michael@0 494 * Advances to the next code unit in the iteration range
michael@0 495 * (toward endIndex()), and returns that code unit. If there are
michael@0 496 * no more code units to return, returns DONE.
michael@0 497 * @return the next code unit.
michael@0 498 * @stable ICU 2.0
michael@0 499 */
michael@0 500 virtual UChar next(void) = 0;
michael@0 501
michael@0 502 /**
michael@0 503 * Advances to the next code point in the iteration range
michael@0 504 * (toward endIndex()), and returns that code point. If there are
michael@0 505 * no more code points to return, returns DONE.
michael@0 506 * Note that iteration with "pre-increment" semantics is less
michael@0 507 * efficient than iteration with "post-increment" semantics
michael@0 508 * that is provided by next32PostInc().
michael@0 509 * @return the next code point.
michael@0 510 * @stable ICU 2.0
michael@0 511 */
michael@0 512 virtual UChar32 next32(void) = 0;
michael@0 513
michael@0 514 /**
michael@0 515 * Advances to the previous code unit in the iteration range
michael@0 516 * (toward startIndex()), and returns that code unit. If there are
michael@0 517 * no more code units to return, returns DONE.
michael@0 518 * @return the previous code unit.
michael@0 519 * @stable ICU 2.0
michael@0 520 */
michael@0 521 virtual UChar previous(void) = 0;
michael@0 522
michael@0 523 /**
michael@0 524 * Advances to the previous code point in the iteration range
michael@0 525 * (toward startIndex()), and returns that code point. If there are
michael@0 526 * no more code points to return, returns DONE.
michael@0 527 * @return the previous code point.
michael@0 528 * @stable ICU 2.0
michael@0 529 */
michael@0 530 virtual UChar32 previous32(void) = 0;
michael@0 531
michael@0 532 /**
michael@0 533 * Returns FALSE if there are no more code units or code points
michael@0 534 * before the current position in the iteration range.
michael@0 535 * This is used with previous() or previous32() in backward
michael@0 536 * iteration.
michael@0 537 * @return FALSE if there are no more code units or code points
michael@0 538 * before the current position in the iteration range, return TRUE otherwise.
michael@0 539 * @stable ICU 2.0
michael@0 540 */
michael@0 541 virtual UBool hasPrevious() = 0;
michael@0 542
michael@0 543 /**
michael@0 544 * Returns the numeric index in the underlying text-storage
michael@0 545 * object of the character returned by first(). Since it's
michael@0 546 * possible to create an iterator that iterates across only
michael@0 547 * part of a text-storage object, this number isn't
michael@0 548 * necessarily 0.
michael@0 549 * @returns the numeric index in the underlying text-storage
michael@0 550 * object of the character returned by first().
michael@0 551 * @stable ICU 2.0
michael@0 552 */
michael@0 553 inline int32_t startIndex(void) const;
michael@0 554
michael@0 555 /**
michael@0 556 * Returns the numeric index in the underlying text-storage
michael@0 557 * object of the position immediately BEYOND the character
michael@0 558 * returned by last().
michael@0 559 * @return the numeric index in the underlying text-storage
michael@0 560 * object of the position immediately BEYOND the character
michael@0 561 * returned by last().
michael@0 562 * @stable ICU 2.0
michael@0 563 */
michael@0 564 inline int32_t endIndex(void) const;
michael@0 565
michael@0 566 /**
michael@0 567 * Returns the numeric index in the underlying text-storage
michael@0 568 * object of the character the iterator currently refers to
michael@0 569 * (i.e., the character returned by current()).
michael@0 570 * @return the numberic index in the text-storage object of
michael@0 571 * the character the iterator currently refers to
michael@0 572 * @stable ICU 2.0
michael@0 573 */
michael@0 574 inline int32_t getIndex(void) const;
michael@0 575
michael@0 576 /**
michael@0 577 * Returns the length of the entire text in the underlying
michael@0 578 * text-storage object.
michael@0 579 * @return the length of the entire text in the text-storage object
michael@0 580 * @stable ICU 2.0
michael@0 581 */
michael@0 582 inline int32_t getLength() const;
michael@0 583
michael@0 584 /**
michael@0 585 * Moves the current position relative to the start or end of the
michael@0 586 * iteration range, or relative to the current position itself.
michael@0 587 * The movement is expressed in numbers of code units forward
michael@0 588 * or backward by specifying a positive or negative delta.
michael@0 589 * @param delta the position relative to origin. A positive delta means forward;
michael@0 590 * a negative delta means backward.
michael@0 591 * @param origin Origin enumeration {kStart, kCurrent, kEnd}
michael@0 592 * @return the new position
michael@0 593 * @stable ICU 2.0
michael@0 594 */
michael@0 595 virtual int32_t move(int32_t delta, EOrigin origin) = 0;
michael@0 596
michael@0 597 /**
michael@0 598 * Moves the current position relative to the start or end of the
michael@0 599 * iteration range, or relative to the current position itself.
michael@0 600 * The movement is expressed in numbers of code points forward
michael@0 601 * or backward by specifying a positive or negative delta.
michael@0 602 * @param delta the position relative to origin. A positive delta means forward;
michael@0 603 * a negative delta means backward.
michael@0 604 * @param origin Origin enumeration {kStart, kCurrent, kEnd}
michael@0 605 * @return the new position
michael@0 606 * @stable ICU 2.0
michael@0 607 */
michael@0 608 virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
michael@0 609
michael@0 610 /**
michael@0 611 * Copies the text under iteration into the UnicodeString
michael@0 612 * referred to by "result".
michael@0 613 * @param result Receives a copy of the text under iteration.
michael@0 614 * @stable ICU 2.0
michael@0 615 */
michael@0 616 virtual void getText(UnicodeString& result) = 0;
michael@0 617
michael@0 618 protected:
michael@0 619 /**
michael@0 620 * Empty constructor.
michael@0 621 * @stable ICU 2.0
michael@0 622 */
michael@0 623 CharacterIterator();
michael@0 624
michael@0 625 /**
michael@0 626 * Constructor, just setting the length field in this base class.
michael@0 627 * @stable ICU 2.0
michael@0 628 */
michael@0 629 CharacterIterator(int32_t length);
michael@0 630
michael@0 631 /**
michael@0 632 * Constructor, just setting the length and position fields in this base class.
michael@0 633 * @stable ICU 2.0
michael@0 634 */
michael@0 635 CharacterIterator(int32_t length, int32_t position);
michael@0 636
michael@0 637 /**
michael@0 638 * Constructor, just setting the length, start, end, and position fields in this base class.
michael@0 639 * @stable ICU 2.0
michael@0 640 */
michael@0 641 CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
michael@0 642
michael@0 643 /**
michael@0 644 * Copy constructor.
michael@0 645 *
michael@0 646 * @param that The CharacterIterator to be copied
michael@0 647 * @stable ICU 2.0
michael@0 648 */
michael@0 649 CharacterIterator(const CharacterIterator &that);
michael@0 650
michael@0 651 /**
michael@0 652 * Assignment operator. Sets this CharacterIterator to have the same behavior,
michael@0 653 * as the one passed in.
michael@0 654 * @param that The CharacterIterator passed in.
michael@0 655 * @return the newly set CharacterIterator.
michael@0 656 * @stable ICU 2.0
michael@0 657 */
michael@0 658 CharacterIterator &operator=(const CharacterIterator &that);
michael@0 659
michael@0 660 /**
michael@0 661 * Base class text length field.
michael@0 662 * Necessary this for correct getText() and hashCode().
michael@0 663 * @stable ICU 2.0
michael@0 664 */
michael@0 665 int32_t textLength;
michael@0 666
michael@0 667 /**
michael@0 668 * Base class field for the current position.
michael@0 669 * @stable ICU 2.0
michael@0 670 */
michael@0 671 int32_t pos;
michael@0 672
michael@0 673 /**
michael@0 674 * Base class field for the start of the iteration range.
michael@0 675 * @stable ICU 2.0
michael@0 676 */
michael@0 677 int32_t begin;
michael@0 678
michael@0 679 /**
michael@0 680 * Base class field for the end of the iteration range.
michael@0 681 * @stable ICU 2.0
michael@0 682 */
michael@0 683 int32_t end;
michael@0 684 };
michael@0 685
michael@0 686 inline UBool
michael@0 687 ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
michael@0 688 return !operator==(that);
michael@0 689 }
michael@0 690
michael@0 691 inline int32_t
michael@0 692 CharacterIterator::setToStart() {
michael@0 693 return move(0, kStart);
michael@0 694 }
michael@0 695
michael@0 696 inline int32_t
michael@0 697 CharacterIterator::setToEnd() {
michael@0 698 return move(0, kEnd);
michael@0 699 }
michael@0 700
michael@0 701 inline int32_t
michael@0 702 CharacterIterator::startIndex(void) const {
michael@0 703 return begin;
michael@0 704 }
michael@0 705
michael@0 706 inline int32_t
michael@0 707 CharacterIterator::endIndex(void) const {
michael@0 708 return end;
michael@0 709 }
michael@0 710
michael@0 711 inline int32_t
michael@0 712 CharacterIterator::getIndex(void) const {
michael@0 713 return pos;
michael@0 714 }
michael@0 715
michael@0 716 inline int32_t
michael@0 717 CharacterIterator::getLength(void) const {
michael@0 718 return textLength;
michael@0 719 }
michael@0 720
michael@0 721 U_NAMESPACE_END
michael@0 722 #endif

mercurial