intl/icu/source/common/unicode/uiter.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2002-2011 International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uiter.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2002jan18
michael@0 14 * created by: Markus W. Scherer
michael@0 15 */
michael@0 16
michael@0 17 #ifndef __UITER_H__
michael@0 18 #define __UITER_H__
michael@0 19
michael@0 20 /**
michael@0 21 * \file
michael@0 22 * \brief C API: Unicode Character Iteration
michael@0 23 *
michael@0 24 * @see UCharIterator
michael@0 25 */
michael@0 26
michael@0 27 #include "unicode/utypes.h"
michael@0 28
michael@0 29 #if U_SHOW_CPLUSPLUS_API
michael@0 30 U_NAMESPACE_BEGIN
michael@0 31
michael@0 32 class CharacterIterator;
michael@0 33 class Replaceable;
michael@0 34
michael@0 35 U_NAMESPACE_END
michael@0 36 #endif
michael@0 37
michael@0 38 U_CDECL_BEGIN
michael@0 39
michael@0 40 struct UCharIterator;
michael@0 41 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
michael@0 42
michael@0 43 /**
michael@0 44 * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
michael@0 45 * @see UCharIteratorMove
michael@0 46 * @see UCharIterator
michael@0 47 * @stable ICU 2.1
michael@0 48 */
michael@0 49 typedef enum UCharIteratorOrigin {
michael@0 50 UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
michael@0 51 } UCharIteratorOrigin;
michael@0 52
michael@0 53 /** Constants for UCharIterator. @stable ICU 2.6 */
michael@0 54 enum {
michael@0 55 /**
michael@0 56 * Constant value that may be returned by UCharIteratorMove
michael@0 57 * indicating that the final UTF-16 index is not known, but that the move succeeded.
michael@0 58 * This can occur when moving relative to limit or length, or
michael@0 59 * when moving relative to the current index after a setState()
michael@0 60 * when the current UTF-16 index is not known.
michael@0 61 *
michael@0 62 * It would be very inefficient to have to count from the beginning of the text
michael@0 63 * just to get the current/limit/length index after moving relative to it.
michael@0 64 * The actual index can be determined with getIndex(UITER_CURRENT)
michael@0 65 * which will count the UChars if necessary.
michael@0 66 *
michael@0 67 * @stable ICU 2.6
michael@0 68 */
michael@0 69 UITER_UNKNOWN_INDEX=-2
michael@0 70 };
michael@0 71
michael@0 72
michael@0 73 /**
michael@0 74 * Constant for UCharIterator getState() indicating an error or
michael@0 75 * an unknown state.
michael@0 76 * Returned by uiter_getState()/UCharIteratorGetState
michael@0 77 * when an error occurs.
michael@0 78 * Also, some UCharIterator implementations may not be able to return
michael@0 79 * a valid state for each position. This will be clearly documented
michael@0 80 * for each such iterator (none of the public ones here).
michael@0 81 *
michael@0 82 * @stable ICU 2.6
michael@0 83 */
michael@0 84 #define UITER_NO_STATE ((uint32_t)0xffffffff)
michael@0 85
michael@0 86 /**
michael@0 87 * Function type declaration for UCharIterator.getIndex().
michael@0 88 *
michael@0 89 * Gets the current position, or the start or limit of the
michael@0 90 * iteration range.
michael@0 91 *
michael@0 92 * This function may perform slowly for UITER_CURRENT after setState() was called,
michael@0 93 * or for UITER_LENGTH, because an iterator implementation may have to count
michael@0 94 * UChars if the underlying storage is not UTF-16.
michael@0 95 *
michael@0 96 * @param iter the UCharIterator structure ("this pointer")
michael@0 97 * @param origin get the 0, start, limit, length, or current index
michael@0 98 * @return the requested index, or U_SENTINEL in an error condition
michael@0 99 *
michael@0 100 * @see UCharIteratorOrigin
michael@0 101 * @see UCharIterator
michael@0 102 * @stable ICU 2.1
michael@0 103 */
michael@0 104 typedef int32_t U_CALLCONV
michael@0 105 UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
michael@0 106
michael@0 107 /**
michael@0 108 * Function type declaration for UCharIterator.move().
michael@0 109 *
michael@0 110 * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
michael@0 111 *
michael@0 112 * Moves the current position relative to the start or limit of the
michael@0 113 * iteration range, or relative to the current position itself.
michael@0 114 * The movement is expressed in numbers of code units forward
michael@0 115 * or backward by specifying a positive or negative delta.
michael@0 116 * Out of bounds movement will be pinned to the start or limit.
michael@0 117 *
michael@0 118 * This function may perform slowly for moving relative to UITER_LENGTH
michael@0 119 * because an iterator implementation may have to count the rest of the
michael@0 120 * UChars if the native storage is not UTF-16.
michael@0 121 *
michael@0 122 * When moving relative to the limit or length, or
michael@0 123 * relative to the current position after setState() was called,
michael@0 124 * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
michael@0 125 * determination of the actual UTF-16 index.
michael@0 126 * The actual index can be determined with getIndex(UITER_CURRENT)
michael@0 127 * which will count the UChars if necessary.
michael@0 128 * See UITER_UNKNOWN_INDEX for details.
michael@0 129 *
michael@0 130 * @param iter the UCharIterator structure ("this pointer")
michael@0 131 * @param delta can be positive, zero, or negative
michael@0 132 * @param origin move relative to the 0, start, limit, length, or current index
michael@0 133 * @return the new index, or U_SENTINEL on an error condition,
michael@0 134 * or UITER_UNKNOWN_INDEX when the index is not known.
michael@0 135 *
michael@0 136 * @see UCharIteratorOrigin
michael@0 137 * @see UCharIterator
michael@0 138 * @see UITER_UNKNOWN_INDEX
michael@0 139 * @stable ICU 2.1
michael@0 140 */
michael@0 141 typedef int32_t U_CALLCONV
michael@0 142 UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
michael@0 143
michael@0 144 /**
michael@0 145 * Function type declaration for UCharIterator.hasNext().
michael@0 146 *
michael@0 147 * Check if current() and next() can still
michael@0 148 * return another code unit.
michael@0 149 *
michael@0 150 * @param iter the UCharIterator structure ("this pointer")
michael@0 151 * @return boolean value for whether current() and next() can still return another code unit
michael@0 152 *
michael@0 153 * @see UCharIterator
michael@0 154 * @stable ICU 2.1
michael@0 155 */
michael@0 156 typedef UBool U_CALLCONV
michael@0 157 UCharIteratorHasNext(UCharIterator *iter);
michael@0 158
michael@0 159 /**
michael@0 160 * Function type declaration for UCharIterator.hasPrevious().
michael@0 161 *
michael@0 162 * Check if previous() can still return another code unit.
michael@0 163 *
michael@0 164 * @param iter the UCharIterator structure ("this pointer")
michael@0 165 * @return boolean value for whether previous() can still return another code unit
michael@0 166 *
michael@0 167 * @see UCharIterator
michael@0 168 * @stable ICU 2.1
michael@0 169 */
michael@0 170 typedef UBool U_CALLCONV
michael@0 171 UCharIteratorHasPrevious(UCharIterator *iter);
michael@0 172
michael@0 173 /**
michael@0 174 * Function type declaration for UCharIterator.current().
michael@0 175 *
michael@0 176 * Return the code unit at the current position,
michael@0 177 * or U_SENTINEL if there is none (index is at the limit).
michael@0 178 *
michael@0 179 * @param iter the UCharIterator structure ("this pointer")
michael@0 180 * @return the current code unit
michael@0 181 *
michael@0 182 * @see UCharIterator
michael@0 183 * @stable ICU 2.1
michael@0 184 */
michael@0 185 typedef UChar32 U_CALLCONV
michael@0 186 UCharIteratorCurrent(UCharIterator *iter);
michael@0 187
michael@0 188 /**
michael@0 189 * Function type declaration for UCharIterator.next().
michael@0 190 *
michael@0 191 * Return the code unit at the current index and increment
michael@0 192 * the index (post-increment, like s[i++]),
michael@0 193 * or return U_SENTINEL if there is none (index is at the limit).
michael@0 194 *
michael@0 195 * @param iter the UCharIterator structure ("this pointer")
michael@0 196 * @return the current code unit (and post-increment the current index)
michael@0 197 *
michael@0 198 * @see UCharIterator
michael@0 199 * @stable ICU 2.1
michael@0 200 */
michael@0 201 typedef UChar32 U_CALLCONV
michael@0 202 UCharIteratorNext(UCharIterator *iter);
michael@0 203
michael@0 204 /**
michael@0 205 * Function type declaration for UCharIterator.previous().
michael@0 206 *
michael@0 207 * Decrement the index and return the code unit from there
michael@0 208 * (pre-decrement, like s[--i]),
michael@0 209 * or return U_SENTINEL if there is none (index is at the start).
michael@0 210 *
michael@0 211 * @param iter the UCharIterator structure ("this pointer")
michael@0 212 * @return the previous code unit (after pre-decrementing the current index)
michael@0 213 *
michael@0 214 * @see UCharIterator
michael@0 215 * @stable ICU 2.1
michael@0 216 */
michael@0 217 typedef UChar32 U_CALLCONV
michael@0 218 UCharIteratorPrevious(UCharIterator *iter);
michael@0 219
michael@0 220 /**
michael@0 221 * Function type declaration for UCharIterator.reservedFn().
michael@0 222 * Reserved for future use.
michael@0 223 *
michael@0 224 * @param iter the UCharIterator structure ("this pointer")
michael@0 225 * @param something some integer argument
michael@0 226 * @return some integer
michael@0 227 *
michael@0 228 * @see UCharIterator
michael@0 229 * @stable ICU 2.1
michael@0 230 */
michael@0 231 typedef int32_t U_CALLCONV
michael@0 232 UCharIteratorReserved(UCharIterator *iter, int32_t something);
michael@0 233
michael@0 234 /**
michael@0 235 * Function type declaration for UCharIterator.getState().
michael@0 236 *
michael@0 237 * Get the "state" of the iterator in the form of a single 32-bit word.
michael@0 238 * It is recommended that the state value be calculated to be as small as
michael@0 239 * is feasible. For strings with limited lengths, fewer than 32 bits may
michael@0 240 * be sufficient.
michael@0 241 *
michael@0 242 * This is used together with setState()/UCharIteratorSetState
michael@0 243 * to save and restore the iterator position more efficiently than with
michael@0 244 * getIndex()/move().
michael@0 245 *
michael@0 246 * The iterator state is defined as a uint32_t value because it is designed
michael@0 247 * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
michael@0 248 * of the character iterator.
michael@0 249 *
michael@0 250 * With some UCharIterator implementations (e.g., UTF-8),
michael@0 251 * getting and setting the UTF-16 index with existing functions
michael@0 252 * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
michael@0 253 * relatively slow because the iterator has to "walk" from a known index
michael@0 254 * to the requested one.
michael@0 255 * This takes more time the farther it needs to go.
michael@0 256 *
michael@0 257 * An opaque state value allows an iterator implementation to provide
michael@0 258 * an internal index (UTF-8: the source byte array index) for
michael@0 259 * fast, constant-time restoration.
michael@0 260 *
michael@0 261 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
michael@0 262 * the UTF-16 index may not be restored as well, but the iterator can deliver
michael@0 263 * the correct text contents and move relative to the current position
michael@0 264 * without performance degradation.
michael@0 265 *
michael@0 266 * Some UCharIterator implementations may not be able to return
michael@0 267 * a valid state for each position, in which case they return UITER_NO_STATE instead.
michael@0 268 * This will be clearly documented for each such iterator (none of the public ones here).
michael@0 269 *
michael@0 270 * @param iter the UCharIterator structure ("this pointer")
michael@0 271 * @return the state word
michael@0 272 *
michael@0 273 * @see UCharIterator
michael@0 274 * @see UCharIteratorSetState
michael@0 275 * @see UITER_NO_STATE
michael@0 276 * @stable ICU 2.6
michael@0 277 */
michael@0 278 typedef uint32_t U_CALLCONV
michael@0 279 UCharIteratorGetState(const UCharIterator *iter);
michael@0 280
michael@0 281 /**
michael@0 282 * Function type declaration for UCharIterator.setState().
michael@0 283 *
michael@0 284 * Restore the "state" of the iterator using a state word from a getState() call.
michael@0 285 * The iterator object need not be the same one as for which getState() was called,
michael@0 286 * but it must be of the same type (set up using the same uiter_setXYZ function)
michael@0 287 * and it must iterate over the same string
michael@0 288 * (binary identical regardless of memory address).
michael@0 289 * For more about the state word see UCharIteratorGetState.
michael@0 290 *
michael@0 291 * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
michael@0 292 * the UTF-16 index may not be restored as well, but the iterator can deliver
michael@0 293 * the correct text contents and move relative to the current position
michael@0 294 * without performance degradation.
michael@0 295 *
michael@0 296 * @param iter the UCharIterator structure ("this pointer")
michael@0 297 * @param state the state word from a getState() call
michael@0 298 * on a same-type, same-string iterator
michael@0 299 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 300 * which must not indicate a failure before the function call.
michael@0 301 *
michael@0 302 * @see UCharIterator
michael@0 303 * @see UCharIteratorGetState
michael@0 304 * @stable ICU 2.6
michael@0 305 */
michael@0 306 typedef void U_CALLCONV
michael@0 307 UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
michael@0 308
michael@0 309
michael@0 310 /**
michael@0 311 * C API for code unit iteration.
michael@0 312 * This can be used as a C wrapper around
michael@0 313 * CharacterIterator, Replaceable, or implemented using simple strings, etc.
michael@0 314 *
michael@0 315 * There are two roles for using UCharIterator:
michael@0 316 *
michael@0 317 * A "provider" sets the necessary function pointers and controls the "protected"
michael@0 318 * fields of the UCharIterator structure. A "provider" passes a UCharIterator
michael@0 319 * into C APIs that need a UCharIterator as an abstract, flexible string interface.
michael@0 320 *
michael@0 321 * Implementations of such C APIs are "callers" of UCharIterator functions;
michael@0 322 * they only use the "public" function pointers and never access the "protected"
michael@0 323 * fields directly.
michael@0 324 *
michael@0 325 * The current() and next() functions only check the current index against the
michael@0 326 * limit, and previous() only checks the current index against the start,
michael@0 327 * to see if the iterator already reached the end of the iteration range.
michael@0 328 *
michael@0 329 * The assumption - in all iterators - is that the index is moved via the API,
michael@0 330 * which means it won't go out of bounds, or the index is modified by
michael@0 331 * user code that knows enough about the iterator implementation to set valid
michael@0 332 * index values.
michael@0 333 *
michael@0 334 * UCharIterator functions return code unit values 0..0xffff,
michael@0 335 * or U_SENTINEL if the iteration bounds are reached.
michael@0 336 *
michael@0 337 * @stable ICU 2.1
michael@0 338 */
michael@0 339 struct UCharIterator {
michael@0 340 /**
michael@0 341 * (protected) Pointer to string or wrapped object or similar.
michael@0 342 * Not used by caller.
michael@0 343 * @stable ICU 2.1
michael@0 344 */
michael@0 345 const void *context;
michael@0 346
michael@0 347 /**
michael@0 348 * (protected) Length of string or similar.
michael@0 349 * Not used by caller.
michael@0 350 * @stable ICU 2.1
michael@0 351 */
michael@0 352 int32_t length;
michael@0 353
michael@0 354 /**
michael@0 355 * (protected) Start index or similar.
michael@0 356 * Not used by caller.
michael@0 357 * @stable ICU 2.1
michael@0 358 */
michael@0 359 int32_t start;
michael@0 360
michael@0 361 /**
michael@0 362 * (protected) Current index or similar.
michael@0 363 * Not used by caller.
michael@0 364 * @stable ICU 2.1
michael@0 365 */
michael@0 366 int32_t index;
michael@0 367
michael@0 368 /**
michael@0 369 * (protected) Limit index or similar.
michael@0 370 * Not used by caller.
michael@0 371 * @stable ICU 2.1
michael@0 372 */
michael@0 373 int32_t limit;
michael@0 374
michael@0 375 /**
michael@0 376 * (protected) Used by UTF-8 iterators and possibly others.
michael@0 377 * @stable ICU 2.1
michael@0 378 */
michael@0 379 int32_t reservedField;
michael@0 380
michael@0 381 /**
michael@0 382 * (public) Returns the current position or the
michael@0 383 * start or limit index of the iteration range.
michael@0 384 *
michael@0 385 * @see UCharIteratorGetIndex
michael@0 386 * @stable ICU 2.1
michael@0 387 */
michael@0 388 UCharIteratorGetIndex *getIndex;
michael@0 389
michael@0 390 /**
michael@0 391 * (public) Moves the current position relative to the start or limit of the
michael@0 392 * iteration range, or relative to the current position itself.
michael@0 393 * The movement is expressed in numbers of code units forward
michael@0 394 * or backward by specifying a positive or negative delta.
michael@0 395 *
michael@0 396 * @see UCharIteratorMove
michael@0 397 * @stable ICU 2.1
michael@0 398 */
michael@0 399 UCharIteratorMove *move;
michael@0 400
michael@0 401 /**
michael@0 402 * (public) Check if current() and next() can still
michael@0 403 * return another code unit.
michael@0 404 *
michael@0 405 * @see UCharIteratorHasNext
michael@0 406 * @stable ICU 2.1
michael@0 407 */
michael@0 408 UCharIteratorHasNext *hasNext;
michael@0 409
michael@0 410 /**
michael@0 411 * (public) Check if previous() can still return another code unit.
michael@0 412 *
michael@0 413 * @see UCharIteratorHasPrevious
michael@0 414 * @stable ICU 2.1
michael@0 415 */
michael@0 416 UCharIteratorHasPrevious *hasPrevious;
michael@0 417
michael@0 418 /**
michael@0 419 * (public) Return the code unit at the current position,
michael@0 420 * or U_SENTINEL if there is none (index is at the limit).
michael@0 421 *
michael@0 422 * @see UCharIteratorCurrent
michael@0 423 * @stable ICU 2.1
michael@0 424 */
michael@0 425 UCharIteratorCurrent *current;
michael@0 426
michael@0 427 /**
michael@0 428 * (public) Return the code unit at the current index and increment
michael@0 429 * the index (post-increment, like s[i++]),
michael@0 430 * or return U_SENTINEL if there is none (index is at the limit).
michael@0 431 *
michael@0 432 * @see UCharIteratorNext
michael@0 433 * @stable ICU 2.1
michael@0 434 */
michael@0 435 UCharIteratorNext *next;
michael@0 436
michael@0 437 /**
michael@0 438 * (public) Decrement the index and return the code unit from there
michael@0 439 * (pre-decrement, like s[--i]),
michael@0 440 * or return U_SENTINEL if there is none (index is at the start).
michael@0 441 *
michael@0 442 * @see UCharIteratorPrevious
michael@0 443 * @stable ICU 2.1
michael@0 444 */
michael@0 445 UCharIteratorPrevious *previous;
michael@0 446
michael@0 447 /**
michael@0 448 * (public) Reserved for future use. Currently NULL.
michael@0 449 *
michael@0 450 * @see UCharIteratorReserved
michael@0 451 * @stable ICU 2.1
michael@0 452 */
michael@0 453 UCharIteratorReserved *reservedFn;
michael@0 454
michael@0 455 /**
michael@0 456 * (public) Return the state of the iterator, to be restored later with setState().
michael@0 457 * This function pointer is NULL if the iterator does not implement it.
michael@0 458 *
michael@0 459 * @see UCharIteratorGet
michael@0 460 * @stable ICU 2.6
michael@0 461 */
michael@0 462 UCharIteratorGetState *getState;
michael@0 463
michael@0 464 /**
michael@0 465 * (public) Restore the iterator state from the state word from a call
michael@0 466 * to getState().
michael@0 467 * This function pointer is NULL if the iterator does not implement it.
michael@0 468 *
michael@0 469 * @see UCharIteratorSet
michael@0 470 * @stable ICU 2.6
michael@0 471 */
michael@0 472 UCharIteratorSetState *setState;
michael@0 473 };
michael@0 474
michael@0 475 /**
michael@0 476 * Helper function for UCharIterator to get the code point
michael@0 477 * at the current index.
michael@0 478 *
michael@0 479 * Return the code point that includes the code unit at the current position,
michael@0 480 * or U_SENTINEL if there is none (index is at the limit).
michael@0 481 * If the current code unit is a lead or trail surrogate,
michael@0 482 * then the following or preceding surrogate is used to form
michael@0 483 * the code point value.
michael@0 484 *
michael@0 485 * @param iter the UCharIterator structure ("this pointer")
michael@0 486 * @return the current code point
michael@0 487 *
michael@0 488 * @see UCharIterator
michael@0 489 * @see U16_GET
michael@0 490 * @see UnicodeString::char32At()
michael@0 491 * @stable ICU 2.1
michael@0 492 */
michael@0 493 U_STABLE UChar32 U_EXPORT2
michael@0 494 uiter_current32(UCharIterator *iter);
michael@0 495
michael@0 496 /**
michael@0 497 * Helper function for UCharIterator to get the next code point.
michael@0 498 *
michael@0 499 * Return the code point at the current index and increment
michael@0 500 * the index (post-increment, like s[i++]),
michael@0 501 * or return U_SENTINEL if there is none (index is at the limit).
michael@0 502 *
michael@0 503 * @param iter the UCharIterator structure ("this pointer")
michael@0 504 * @return the current code point (and post-increment the current index)
michael@0 505 *
michael@0 506 * @see UCharIterator
michael@0 507 * @see U16_NEXT
michael@0 508 * @stable ICU 2.1
michael@0 509 */
michael@0 510 U_STABLE UChar32 U_EXPORT2
michael@0 511 uiter_next32(UCharIterator *iter);
michael@0 512
michael@0 513 /**
michael@0 514 * Helper function for UCharIterator to get the previous code point.
michael@0 515 *
michael@0 516 * Decrement the index and return the code point from there
michael@0 517 * (pre-decrement, like s[--i]),
michael@0 518 * or return U_SENTINEL if there is none (index is at the start).
michael@0 519 *
michael@0 520 * @param iter the UCharIterator structure ("this pointer")
michael@0 521 * @return the previous code point (after pre-decrementing the current index)
michael@0 522 *
michael@0 523 * @see UCharIterator
michael@0 524 * @see U16_PREV
michael@0 525 * @stable ICU 2.1
michael@0 526 */
michael@0 527 U_STABLE UChar32 U_EXPORT2
michael@0 528 uiter_previous32(UCharIterator *iter);
michael@0 529
michael@0 530 /**
michael@0 531 * Get the "state" of the iterator in the form of a single 32-bit word.
michael@0 532 * This is a convenience function that calls iter->getState(iter)
michael@0 533 * if iter->getState is not NULL;
michael@0 534 * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
michael@0 535 *
michael@0 536 * Some UCharIterator implementations may not be able to return
michael@0 537 * a valid state for each position, in which case they return UITER_NO_STATE instead.
michael@0 538 * This will be clearly documented for each such iterator (none of the public ones here).
michael@0 539 *
michael@0 540 * @param iter the UCharIterator structure ("this pointer")
michael@0 541 * @return the state word
michael@0 542 *
michael@0 543 * @see UCharIterator
michael@0 544 * @see UCharIteratorGetState
michael@0 545 * @see UITER_NO_STATE
michael@0 546 * @stable ICU 2.6
michael@0 547 */
michael@0 548 U_STABLE uint32_t U_EXPORT2
michael@0 549 uiter_getState(const UCharIterator *iter);
michael@0 550
michael@0 551 /**
michael@0 552 * Restore the "state" of the iterator using a state word from a getState() call.
michael@0 553 * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
michael@0 554 * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
michael@0 555 *
michael@0 556 * @param iter the UCharIterator structure ("this pointer")
michael@0 557 * @param state the state word from a getState() call
michael@0 558 * on a same-type, same-string iterator
michael@0 559 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 560 * which must not indicate a failure before the function call.
michael@0 561 *
michael@0 562 * @see UCharIterator
michael@0 563 * @see UCharIteratorSetState
michael@0 564 * @stable ICU 2.6
michael@0 565 */
michael@0 566 U_STABLE void U_EXPORT2
michael@0 567 uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
michael@0 568
michael@0 569 /**
michael@0 570 * Set up a UCharIterator to iterate over a string.
michael@0 571 *
michael@0 572 * Sets the UCharIterator function pointers for iteration over the string s
michael@0 573 * with iteration boundaries start=index=0 and length=limit=string length.
michael@0 574 * The "provider" may set the start, index, and limit values at any time
michael@0 575 * within the range 0..length.
michael@0 576 * The length field will be ignored.
michael@0 577 *
michael@0 578 * The string pointer s is set into UCharIterator.context without copying
michael@0 579 * or reallocating the string contents.
michael@0 580 *
michael@0 581 * getState() simply returns the current index.
michael@0 582 * move() will always return the final index.
michael@0 583 *
michael@0 584 * @param iter UCharIterator structure to be set for iteration
michael@0 585 * @param s String to iterate over
michael@0 586 * @param length Length of s, or -1 if NUL-terminated
michael@0 587 *
michael@0 588 * @see UCharIterator
michael@0 589 * @stable ICU 2.1
michael@0 590 */
michael@0 591 U_STABLE void U_EXPORT2
michael@0 592 uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
michael@0 593
michael@0 594 /**
michael@0 595 * Set up a UCharIterator to iterate over a UTF-16BE string
michael@0 596 * (byte vector with a big-endian pair of bytes per UChar).
michael@0 597 *
michael@0 598 * Everything works just like with a normal UChar iterator (uiter_setString),
michael@0 599 * except that UChars are assembled from byte pairs,
michael@0 600 * and that the length argument here indicates an even number of bytes.
michael@0 601 *
michael@0 602 * getState() simply returns the current index.
michael@0 603 * move() will always return the final index.
michael@0 604 *
michael@0 605 * @param iter UCharIterator structure to be set for iteration
michael@0 606 * @param s UTF-16BE string to iterate over
michael@0 607 * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
michael@0 608 * (NUL means pair of 0 bytes at even index from s)
michael@0 609 *
michael@0 610 * @see UCharIterator
michael@0 611 * @see uiter_setString
michael@0 612 * @stable ICU 2.6
michael@0 613 */
michael@0 614 U_STABLE void U_EXPORT2
michael@0 615 uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
michael@0 616
michael@0 617 /**
michael@0 618 * Set up a UCharIterator to iterate over a UTF-8 string.
michael@0 619 *
michael@0 620 * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
michael@0 621 * with UTF-8 iteration boundaries 0 and length.
michael@0 622 * The implementation counts the UTF-16 index on the fly and
michael@0 623 * lazily evaluates the UTF-16 length of the text.
michael@0 624 *
michael@0 625 * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
michael@0 626 * When the reservedField is not 0, then it contains a supplementary code point
michael@0 627 * and the UTF-16 index is between the two corresponding surrogates.
michael@0 628 * At that point, the UTF-8 index is behind that code point.
michael@0 629 *
michael@0 630 * The UTF-8 string pointer s is set into UCharIterator.context without copying
michael@0 631 * or reallocating the string contents.
michael@0 632 *
michael@0 633 * getState() returns a state value consisting of
michael@0 634 * - the current UTF-8 source byte index (bits 31..1)
michael@0 635 * - a flag (bit 0) that indicates whether the UChar position is in the middle
michael@0 636 * of a surrogate pair
michael@0 637 * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
michael@0 638 *
michael@0 639 * getState() cannot also encode the UTF-16 index in the state value.
michael@0 640 * move(relative to limit or length), or
michael@0 641 * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
michael@0 642 *
michael@0 643 * @param iter UCharIterator structure to be set for iteration
michael@0 644 * @param s UTF-8 string to iterate over
michael@0 645 * @param length Length of s in bytes, or -1 if NUL-terminated
michael@0 646 *
michael@0 647 * @see UCharIterator
michael@0 648 * @stable ICU 2.6
michael@0 649 */
michael@0 650 U_STABLE void U_EXPORT2
michael@0 651 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
michael@0 652
michael@0 653 #if U_SHOW_CPLUSPLUS_API
michael@0 654
michael@0 655 /**
michael@0 656 * Set up a UCharIterator to wrap around a C++ CharacterIterator.
michael@0 657 *
michael@0 658 * Sets the UCharIterator function pointers for iteration using the
michael@0 659 * CharacterIterator charIter.
michael@0 660 *
michael@0 661 * The CharacterIterator pointer charIter is set into UCharIterator.context
michael@0 662 * without copying or cloning the CharacterIterator object.
michael@0 663 * The other "protected" UCharIterator fields are set to 0 and will be ignored.
michael@0 664 * The iteration index and boundaries are controlled by the CharacterIterator.
michael@0 665 *
michael@0 666 * getState() simply returns the current index.
michael@0 667 * move() will always return the final index.
michael@0 668 *
michael@0 669 * @param iter UCharIterator structure to be set for iteration
michael@0 670 * @param charIter CharacterIterator to wrap
michael@0 671 *
michael@0 672 * @see UCharIterator
michael@0 673 * @stable ICU 2.1
michael@0 674 */
michael@0 675 U_STABLE void U_EXPORT2
michael@0 676 uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
michael@0 677
michael@0 678 /**
michael@0 679 * Set up a UCharIterator to iterate over a C++ Replaceable.
michael@0 680 *
michael@0 681 * Sets the UCharIterator function pointers for iteration over the
michael@0 682 * Replaceable rep with iteration boundaries start=index=0 and
michael@0 683 * length=limit=rep->length().
michael@0 684 * The "provider" may set the start, index, and limit values at any time
michael@0 685 * within the range 0..length=rep->length().
michael@0 686 * The length field will be ignored.
michael@0 687 *
michael@0 688 * The Replaceable pointer rep is set into UCharIterator.context without copying
michael@0 689 * or cloning/reallocating the Replaceable object.
michael@0 690 *
michael@0 691 * getState() simply returns the current index.
michael@0 692 * move() will always return the final index.
michael@0 693 *
michael@0 694 * @param iter UCharIterator structure to be set for iteration
michael@0 695 * @param rep Replaceable to iterate over
michael@0 696 *
michael@0 697 * @see UCharIterator
michael@0 698 * @stable ICU 2.1
michael@0 699 */
michael@0 700 U_STABLE void U_EXPORT2
michael@0 701 uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
michael@0 702
michael@0 703 #endif
michael@0 704
michael@0 705 U_CDECL_END
michael@0 706
michael@0 707 #endif

mercurial