Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2002-2011 International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: uiter.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2002jan18 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | */ |
michael@0 | 16 | |
michael@0 | 17 | #ifndef __UITER_H__ |
michael@0 | 18 | #define __UITER_H__ |
michael@0 | 19 | |
michael@0 | 20 | /** |
michael@0 | 21 | * \file |
michael@0 | 22 | * \brief C API: Unicode Character Iteration |
michael@0 | 23 | * |
michael@0 | 24 | * @see UCharIterator |
michael@0 | 25 | */ |
michael@0 | 26 | |
michael@0 | 27 | #include "unicode/utypes.h" |
michael@0 | 28 | |
michael@0 | 29 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 30 | U_NAMESPACE_BEGIN |
michael@0 | 31 | |
michael@0 | 32 | class CharacterIterator; |
michael@0 | 33 | class Replaceable; |
michael@0 | 34 | |
michael@0 | 35 | U_NAMESPACE_END |
michael@0 | 36 | #endif |
michael@0 | 37 | |
michael@0 | 38 | U_CDECL_BEGIN |
michael@0 | 39 | |
michael@0 | 40 | struct UCharIterator; |
michael@0 | 41 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ |
michael@0 | 42 | |
michael@0 | 43 | /** |
michael@0 | 44 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). |
michael@0 | 45 | * @see UCharIteratorMove |
michael@0 | 46 | * @see UCharIterator |
michael@0 | 47 | * @stable ICU 2.1 |
michael@0 | 48 | */ |
michael@0 | 49 | typedef enum UCharIteratorOrigin { |
michael@0 | 50 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH |
michael@0 | 51 | } UCharIteratorOrigin; |
michael@0 | 52 | |
michael@0 | 53 | /** Constants for UCharIterator. @stable ICU 2.6 */ |
michael@0 | 54 | enum { |
michael@0 | 55 | /** |
michael@0 | 56 | * Constant value that may be returned by UCharIteratorMove |
michael@0 | 57 | * indicating that the final UTF-16 index is not known, but that the move succeeded. |
michael@0 | 58 | * This can occur when moving relative to limit or length, or |
michael@0 | 59 | * when moving relative to the current index after a setState() |
michael@0 | 60 | * when the current UTF-16 index is not known. |
michael@0 | 61 | * |
michael@0 | 62 | * It would be very inefficient to have to count from the beginning of the text |
michael@0 | 63 | * just to get the current/limit/length index after moving relative to it. |
michael@0 | 64 | * The actual index can be determined with getIndex(UITER_CURRENT) |
michael@0 | 65 | * which will count the UChars if necessary. |
michael@0 | 66 | * |
michael@0 | 67 | * @stable ICU 2.6 |
michael@0 | 68 | */ |
michael@0 | 69 | UITER_UNKNOWN_INDEX=-2 |
michael@0 | 70 | }; |
michael@0 | 71 | |
michael@0 | 72 | |
michael@0 | 73 | /** |
michael@0 | 74 | * Constant for UCharIterator getState() indicating an error or |
michael@0 | 75 | * an unknown state. |
michael@0 | 76 | * Returned by uiter_getState()/UCharIteratorGetState |
michael@0 | 77 | * when an error occurs. |
michael@0 | 78 | * Also, some UCharIterator implementations may not be able to return |
michael@0 | 79 | * a valid state for each position. This will be clearly documented |
michael@0 | 80 | * for each such iterator (none of the public ones here). |
michael@0 | 81 | * |
michael@0 | 82 | * @stable ICU 2.6 |
michael@0 | 83 | */ |
michael@0 | 84 | #define UITER_NO_STATE ((uint32_t)0xffffffff) |
michael@0 | 85 | |
michael@0 | 86 | /** |
michael@0 | 87 | * Function type declaration for UCharIterator.getIndex(). |
michael@0 | 88 | * |
michael@0 | 89 | * Gets the current position, or the start or limit of the |
michael@0 | 90 | * iteration range. |
michael@0 | 91 | * |
michael@0 | 92 | * This function may perform slowly for UITER_CURRENT after setState() was called, |
michael@0 | 93 | * or for UITER_LENGTH, because an iterator implementation may have to count |
michael@0 | 94 | * UChars if the underlying storage is not UTF-16. |
michael@0 | 95 | * |
michael@0 | 96 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 97 | * @param origin get the 0, start, limit, length, or current index |
michael@0 | 98 | * @return the requested index, or U_SENTINEL in an error condition |
michael@0 | 99 | * |
michael@0 | 100 | * @see UCharIteratorOrigin |
michael@0 | 101 | * @see UCharIterator |
michael@0 | 102 | * @stable ICU 2.1 |
michael@0 | 103 | */ |
michael@0 | 104 | typedef int32_t U_CALLCONV |
michael@0 | 105 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); |
michael@0 | 106 | |
michael@0 | 107 | /** |
michael@0 | 108 | * Function type declaration for UCharIterator.move(). |
michael@0 | 109 | * |
michael@0 | 110 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). |
michael@0 | 111 | * |
michael@0 | 112 | * Moves the current position relative to the start or limit of the |
michael@0 | 113 | * iteration range, or relative to the current position itself. |
michael@0 | 114 | * The movement is expressed in numbers of code units forward |
michael@0 | 115 | * or backward by specifying a positive or negative delta. |
michael@0 | 116 | * Out of bounds movement will be pinned to the start or limit. |
michael@0 | 117 | * |
michael@0 | 118 | * This function may perform slowly for moving relative to UITER_LENGTH |
michael@0 | 119 | * because an iterator implementation may have to count the rest of the |
michael@0 | 120 | * UChars if the native storage is not UTF-16. |
michael@0 | 121 | * |
michael@0 | 122 | * When moving relative to the limit or length, or |
michael@0 | 123 | * relative to the current position after setState() was called, |
michael@0 | 124 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient |
michael@0 | 125 | * determination of the actual UTF-16 index. |
michael@0 | 126 | * The actual index can be determined with getIndex(UITER_CURRENT) |
michael@0 | 127 | * which will count the UChars if necessary. |
michael@0 | 128 | * See UITER_UNKNOWN_INDEX for details. |
michael@0 | 129 | * |
michael@0 | 130 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 131 | * @param delta can be positive, zero, or negative |
michael@0 | 132 | * @param origin move relative to the 0, start, limit, length, or current index |
michael@0 | 133 | * @return the new index, or U_SENTINEL on an error condition, |
michael@0 | 134 | * or UITER_UNKNOWN_INDEX when the index is not known. |
michael@0 | 135 | * |
michael@0 | 136 | * @see UCharIteratorOrigin |
michael@0 | 137 | * @see UCharIterator |
michael@0 | 138 | * @see UITER_UNKNOWN_INDEX |
michael@0 | 139 | * @stable ICU 2.1 |
michael@0 | 140 | */ |
michael@0 | 141 | typedef int32_t U_CALLCONV |
michael@0 | 142 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); |
michael@0 | 143 | |
michael@0 | 144 | /** |
michael@0 | 145 | * Function type declaration for UCharIterator.hasNext(). |
michael@0 | 146 | * |
michael@0 | 147 | * Check if current() and next() can still |
michael@0 | 148 | * return another code unit. |
michael@0 | 149 | * |
michael@0 | 150 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 151 | * @return boolean value for whether current() and next() can still return another code unit |
michael@0 | 152 | * |
michael@0 | 153 | * @see UCharIterator |
michael@0 | 154 | * @stable ICU 2.1 |
michael@0 | 155 | */ |
michael@0 | 156 | typedef UBool U_CALLCONV |
michael@0 | 157 | UCharIteratorHasNext(UCharIterator *iter); |
michael@0 | 158 | |
michael@0 | 159 | /** |
michael@0 | 160 | * Function type declaration for UCharIterator.hasPrevious(). |
michael@0 | 161 | * |
michael@0 | 162 | * Check if previous() can still return another code unit. |
michael@0 | 163 | * |
michael@0 | 164 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 165 | * @return boolean value for whether previous() can still return another code unit |
michael@0 | 166 | * |
michael@0 | 167 | * @see UCharIterator |
michael@0 | 168 | * @stable ICU 2.1 |
michael@0 | 169 | */ |
michael@0 | 170 | typedef UBool U_CALLCONV |
michael@0 | 171 | UCharIteratorHasPrevious(UCharIterator *iter); |
michael@0 | 172 | |
michael@0 | 173 | /** |
michael@0 | 174 | * Function type declaration for UCharIterator.current(). |
michael@0 | 175 | * |
michael@0 | 176 | * Return the code unit at the current position, |
michael@0 | 177 | * or U_SENTINEL if there is none (index is at the limit). |
michael@0 | 178 | * |
michael@0 | 179 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 180 | * @return the current code unit |
michael@0 | 181 | * |
michael@0 | 182 | * @see UCharIterator |
michael@0 | 183 | * @stable ICU 2.1 |
michael@0 | 184 | */ |
michael@0 | 185 | typedef UChar32 U_CALLCONV |
michael@0 | 186 | UCharIteratorCurrent(UCharIterator *iter); |
michael@0 | 187 | |
michael@0 | 188 | /** |
michael@0 | 189 | * Function type declaration for UCharIterator.next(). |
michael@0 | 190 | * |
michael@0 | 191 | * Return the code unit at the current index and increment |
michael@0 | 192 | * the index (post-increment, like s[i++]), |
michael@0 | 193 | * or return U_SENTINEL if there is none (index is at the limit). |
michael@0 | 194 | * |
michael@0 | 195 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 196 | * @return the current code unit (and post-increment the current index) |
michael@0 | 197 | * |
michael@0 | 198 | * @see UCharIterator |
michael@0 | 199 | * @stable ICU 2.1 |
michael@0 | 200 | */ |
michael@0 | 201 | typedef UChar32 U_CALLCONV |
michael@0 | 202 | UCharIteratorNext(UCharIterator *iter); |
michael@0 | 203 | |
michael@0 | 204 | /** |
michael@0 | 205 | * Function type declaration for UCharIterator.previous(). |
michael@0 | 206 | * |
michael@0 | 207 | * Decrement the index and return the code unit from there |
michael@0 | 208 | * (pre-decrement, like s[--i]), |
michael@0 | 209 | * or return U_SENTINEL if there is none (index is at the start). |
michael@0 | 210 | * |
michael@0 | 211 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 212 | * @return the previous code unit (after pre-decrementing the current index) |
michael@0 | 213 | * |
michael@0 | 214 | * @see UCharIterator |
michael@0 | 215 | * @stable ICU 2.1 |
michael@0 | 216 | */ |
michael@0 | 217 | typedef UChar32 U_CALLCONV |
michael@0 | 218 | UCharIteratorPrevious(UCharIterator *iter); |
michael@0 | 219 | |
michael@0 | 220 | /** |
michael@0 | 221 | * Function type declaration for UCharIterator.reservedFn(). |
michael@0 | 222 | * Reserved for future use. |
michael@0 | 223 | * |
michael@0 | 224 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 225 | * @param something some integer argument |
michael@0 | 226 | * @return some integer |
michael@0 | 227 | * |
michael@0 | 228 | * @see UCharIterator |
michael@0 | 229 | * @stable ICU 2.1 |
michael@0 | 230 | */ |
michael@0 | 231 | typedef int32_t U_CALLCONV |
michael@0 | 232 | UCharIteratorReserved(UCharIterator *iter, int32_t something); |
michael@0 | 233 | |
michael@0 | 234 | /** |
michael@0 | 235 | * Function type declaration for UCharIterator.getState(). |
michael@0 | 236 | * |
michael@0 | 237 | * Get the "state" of the iterator in the form of a single 32-bit word. |
michael@0 | 238 | * It is recommended that the state value be calculated to be as small as |
michael@0 | 239 | * is feasible. For strings with limited lengths, fewer than 32 bits may |
michael@0 | 240 | * be sufficient. |
michael@0 | 241 | * |
michael@0 | 242 | * This is used together with setState()/UCharIteratorSetState |
michael@0 | 243 | * to save and restore the iterator position more efficiently than with |
michael@0 | 244 | * getIndex()/move(). |
michael@0 | 245 | * |
michael@0 | 246 | * The iterator state is defined as a uint32_t value because it is designed |
michael@0 | 247 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state |
michael@0 | 248 | * of the character iterator. |
michael@0 | 249 | * |
michael@0 | 250 | * With some UCharIterator implementations (e.g., UTF-8), |
michael@0 | 251 | * getting and setting the UTF-16 index with existing functions |
michael@0 | 252 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but |
michael@0 | 253 | * relatively slow because the iterator has to "walk" from a known index |
michael@0 | 254 | * to the requested one. |
michael@0 | 255 | * This takes more time the farther it needs to go. |
michael@0 | 256 | * |
michael@0 | 257 | * An opaque state value allows an iterator implementation to provide |
michael@0 | 258 | * an internal index (UTF-8: the source byte array index) for |
michael@0 | 259 | * fast, constant-time restoration. |
michael@0 | 260 | * |
michael@0 | 261 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
michael@0 | 262 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
michael@0 | 263 | * the correct text contents and move relative to the current position |
michael@0 | 264 | * without performance degradation. |
michael@0 | 265 | * |
michael@0 | 266 | * Some UCharIterator implementations may not be able to return |
michael@0 | 267 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
michael@0 | 268 | * This will be clearly documented for each such iterator (none of the public ones here). |
michael@0 | 269 | * |
michael@0 | 270 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 271 | * @return the state word |
michael@0 | 272 | * |
michael@0 | 273 | * @see UCharIterator |
michael@0 | 274 | * @see UCharIteratorSetState |
michael@0 | 275 | * @see UITER_NO_STATE |
michael@0 | 276 | * @stable ICU 2.6 |
michael@0 | 277 | */ |
michael@0 | 278 | typedef uint32_t U_CALLCONV |
michael@0 | 279 | UCharIteratorGetState(const UCharIterator *iter); |
michael@0 | 280 | |
michael@0 | 281 | /** |
michael@0 | 282 | * Function type declaration for UCharIterator.setState(). |
michael@0 | 283 | * |
michael@0 | 284 | * Restore the "state" of the iterator using a state word from a getState() call. |
michael@0 | 285 | * The iterator object need not be the same one as for which getState() was called, |
michael@0 | 286 | * but it must be of the same type (set up using the same uiter_setXYZ function) |
michael@0 | 287 | * and it must iterate over the same string |
michael@0 | 288 | * (binary identical regardless of memory address). |
michael@0 | 289 | * For more about the state word see UCharIteratorGetState. |
michael@0 | 290 | * |
michael@0 | 291 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
michael@0 | 292 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
michael@0 | 293 | * the correct text contents and move relative to the current position |
michael@0 | 294 | * without performance degradation. |
michael@0 | 295 | * |
michael@0 | 296 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 297 | * @param state the state word from a getState() call |
michael@0 | 298 | * on a same-type, same-string iterator |
michael@0 | 299 | * @param pErrorCode Must be a valid pointer to an error code value, |
michael@0 | 300 | * which must not indicate a failure before the function call. |
michael@0 | 301 | * |
michael@0 | 302 | * @see UCharIterator |
michael@0 | 303 | * @see UCharIteratorGetState |
michael@0 | 304 | * @stable ICU 2.6 |
michael@0 | 305 | */ |
michael@0 | 306 | typedef void U_CALLCONV |
michael@0 | 307 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
michael@0 | 308 | |
michael@0 | 309 | |
michael@0 | 310 | /** |
michael@0 | 311 | * C API for code unit iteration. |
michael@0 | 312 | * This can be used as a C wrapper around |
michael@0 | 313 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. |
michael@0 | 314 | * |
michael@0 | 315 | * There are two roles for using UCharIterator: |
michael@0 | 316 | * |
michael@0 | 317 | * A "provider" sets the necessary function pointers and controls the "protected" |
michael@0 | 318 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator |
michael@0 | 319 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. |
michael@0 | 320 | * |
michael@0 | 321 | * Implementations of such C APIs are "callers" of UCharIterator functions; |
michael@0 | 322 | * they only use the "public" function pointers and never access the "protected" |
michael@0 | 323 | * fields directly. |
michael@0 | 324 | * |
michael@0 | 325 | * The current() and next() functions only check the current index against the |
michael@0 | 326 | * limit, and previous() only checks the current index against the start, |
michael@0 | 327 | * to see if the iterator already reached the end of the iteration range. |
michael@0 | 328 | * |
michael@0 | 329 | * The assumption - in all iterators - is that the index is moved via the API, |
michael@0 | 330 | * which means it won't go out of bounds, or the index is modified by |
michael@0 | 331 | * user code that knows enough about the iterator implementation to set valid |
michael@0 | 332 | * index values. |
michael@0 | 333 | * |
michael@0 | 334 | * UCharIterator functions return code unit values 0..0xffff, |
michael@0 | 335 | * or U_SENTINEL if the iteration bounds are reached. |
michael@0 | 336 | * |
michael@0 | 337 | * @stable ICU 2.1 |
michael@0 | 338 | */ |
michael@0 | 339 | struct UCharIterator { |
michael@0 | 340 | /** |
michael@0 | 341 | * (protected) Pointer to string or wrapped object or similar. |
michael@0 | 342 | * Not used by caller. |
michael@0 | 343 | * @stable ICU 2.1 |
michael@0 | 344 | */ |
michael@0 | 345 | const void *context; |
michael@0 | 346 | |
michael@0 | 347 | /** |
michael@0 | 348 | * (protected) Length of string or similar. |
michael@0 | 349 | * Not used by caller. |
michael@0 | 350 | * @stable ICU 2.1 |
michael@0 | 351 | */ |
michael@0 | 352 | int32_t length; |
michael@0 | 353 | |
michael@0 | 354 | /** |
michael@0 | 355 | * (protected) Start index or similar. |
michael@0 | 356 | * Not used by caller. |
michael@0 | 357 | * @stable ICU 2.1 |
michael@0 | 358 | */ |
michael@0 | 359 | int32_t start; |
michael@0 | 360 | |
michael@0 | 361 | /** |
michael@0 | 362 | * (protected) Current index or similar. |
michael@0 | 363 | * Not used by caller. |
michael@0 | 364 | * @stable ICU 2.1 |
michael@0 | 365 | */ |
michael@0 | 366 | int32_t index; |
michael@0 | 367 | |
michael@0 | 368 | /** |
michael@0 | 369 | * (protected) Limit index or similar. |
michael@0 | 370 | * Not used by caller. |
michael@0 | 371 | * @stable ICU 2.1 |
michael@0 | 372 | */ |
michael@0 | 373 | int32_t limit; |
michael@0 | 374 | |
michael@0 | 375 | /** |
michael@0 | 376 | * (protected) Used by UTF-8 iterators and possibly others. |
michael@0 | 377 | * @stable ICU 2.1 |
michael@0 | 378 | */ |
michael@0 | 379 | int32_t reservedField; |
michael@0 | 380 | |
michael@0 | 381 | /** |
michael@0 | 382 | * (public) Returns the current position or the |
michael@0 | 383 | * start or limit index of the iteration range. |
michael@0 | 384 | * |
michael@0 | 385 | * @see UCharIteratorGetIndex |
michael@0 | 386 | * @stable ICU 2.1 |
michael@0 | 387 | */ |
michael@0 | 388 | UCharIteratorGetIndex *getIndex; |
michael@0 | 389 | |
michael@0 | 390 | /** |
michael@0 | 391 | * (public) Moves the current position relative to the start or limit of the |
michael@0 | 392 | * iteration range, or relative to the current position itself. |
michael@0 | 393 | * The movement is expressed in numbers of code units forward |
michael@0 | 394 | * or backward by specifying a positive or negative delta. |
michael@0 | 395 | * |
michael@0 | 396 | * @see UCharIteratorMove |
michael@0 | 397 | * @stable ICU 2.1 |
michael@0 | 398 | */ |
michael@0 | 399 | UCharIteratorMove *move; |
michael@0 | 400 | |
michael@0 | 401 | /** |
michael@0 | 402 | * (public) Check if current() and next() can still |
michael@0 | 403 | * return another code unit. |
michael@0 | 404 | * |
michael@0 | 405 | * @see UCharIteratorHasNext |
michael@0 | 406 | * @stable ICU 2.1 |
michael@0 | 407 | */ |
michael@0 | 408 | UCharIteratorHasNext *hasNext; |
michael@0 | 409 | |
michael@0 | 410 | /** |
michael@0 | 411 | * (public) Check if previous() can still return another code unit. |
michael@0 | 412 | * |
michael@0 | 413 | * @see UCharIteratorHasPrevious |
michael@0 | 414 | * @stable ICU 2.1 |
michael@0 | 415 | */ |
michael@0 | 416 | UCharIteratorHasPrevious *hasPrevious; |
michael@0 | 417 | |
michael@0 | 418 | /** |
michael@0 | 419 | * (public) Return the code unit at the current position, |
michael@0 | 420 | * or U_SENTINEL if there is none (index is at the limit). |
michael@0 | 421 | * |
michael@0 | 422 | * @see UCharIteratorCurrent |
michael@0 | 423 | * @stable ICU 2.1 |
michael@0 | 424 | */ |
michael@0 | 425 | UCharIteratorCurrent *current; |
michael@0 | 426 | |
michael@0 | 427 | /** |
michael@0 | 428 | * (public) Return the code unit at the current index and increment |
michael@0 | 429 | * the index (post-increment, like s[i++]), |
michael@0 | 430 | * or return U_SENTINEL if there is none (index is at the limit). |
michael@0 | 431 | * |
michael@0 | 432 | * @see UCharIteratorNext |
michael@0 | 433 | * @stable ICU 2.1 |
michael@0 | 434 | */ |
michael@0 | 435 | UCharIteratorNext *next; |
michael@0 | 436 | |
michael@0 | 437 | /** |
michael@0 | 438 | * (public) Decrement the index and return the code unit from there |
michael@0 | 439 | * (pre-decrement, like s[--i]), |
michael@0 | 440 | * or return U_SENTINEL if there is none (index is at the start). |
michael@0 | 441 | * |
michael@0 | 442 | * @see UCharIteratorPrevious |
michael@0 | 443 | * @stable ICU 2.1 |
michael@0 | 444 | */ |
michael@0 | 445 | UCharIteratorPrevious *previous; |
michael@0 | 446 | |
michael@0 | 447 | /** |
michael@0 | 448 | * (public) Reserved for future use. Currently NULL. |
michael@0 | 449 | * |
michael@0 | 450 | * @see UCharIteratorReserved |
michael@0 | 451 | * @stable ICU 2.1 |
michael@0 | 452 | */ |
michael@0 | 453 | UCharIteratorReserved *reservedFn; |
michael@0 | 454 | |
michael@0 | 455 | /** |
michael@0 | 456 | * (public) Return the state of the iterator, to be restored later with setState(). |
michael@0 | 457 | * This function pointer is NULL if the iterator does not implement it. |
michael@0 | 458 | * |
michael@0 | 459 | * @see UCharIteratorGet |
michael@0 | 460 | * @stable ICU 2.6 |
michael@0 | 461 | */ |
michael@0 | 462 | UCharIteratorGetState *getState; |
michael@0 | 463 | |
michael@0 | 464 | /** |
michael@0 | 465 | * (public) Restore the iterator state from the state word from a call |
michael@0 | 466 | * to getState(). |
michael@0 | 467 | * This function pointer is NULL if the iterator does not implement it. |
michael@0 | 468 | * |
michael@0 | 469 | * @see UCharIteratorSet |
michael@0 | 470 | * @stable ICU 2.6 |
michael@0 | 471 | */ |
michael@0 | 472 | UCharIteratorSetState *setState; |
michael@0 | 473 | }; |
michael@0 | 474 | |
michael@0 | 475 | /** |
michael@0 | 476 | * Helper function for UCharIterator to get the code point |
michael@0 | 477 | * at the current index. |
michael@0 | 478 | * |
michael@0 | 479 | * Return the code point that includes the code unit at the current position, |
michael@0 | 480 | * or U_SENTINEL if there is none (index is at the limit). |
michael@0 | 481 | * If the current code unit is a lead or trail surrogate, |
michael@0 | 482 | * then the following or preceding surrogate is used to form |
michael@0 | 483 | * the code point value. |
michael@0 | 484 | * |
michael@0 | 485 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 486 | * @return the current code point |
michael@0 | 487 | * |
michael@0 | 488 | * @see UCharIterator |
michael@0 | 489 | * @see U16_GET |
michael@0 | 490 | * @see UnicodeString::char32At() |
michael@0 | 491 | * @stable ICU 2.1 |
michael@0 | 492 | */ |
michael@0 | 493 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 494 | uiter_current32(UCharIterator *iter); |
michael@0 | 495 | |
michael@0 | 496 | /** |
michael@0 | 497 | * Helper function for UCharIterator to get the next code point. |
michael@0 | 498 | * |
michael@0 | 499 | * Return the code point at the current index and increment |
michael@0 | 500 | * the index (post-increment, like s[i++]), |
michael@0 | 501 | * or return U_SENTINEL if there is none (index is at the limit). |
michael@0 | 502 | * |
michael@0 | 503 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 504 | * @return the current code point (and post-increment the current index) |
michael@0 | 505 | * |
michael@0 | 506 | * @see UCharIterator |
michael@0 | 507 | * @see U16_NEXT |
michael@0 | 508 | * @stable ICU 2.1 |
michael@0 | 509 | */ |
michael@0 | 510 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 511 | uiter_next32(UCharIterator *iter); |
michael@0 | 512 | |
michael@0 | 513 | /** |
michael@0 | 514 | * Helper function for UCharIterator to get the previous code point. |
michael@0 | 515 | * |
michael@0 | 516 | * Decrement the index and return the code point from there |
michael@0 | 517 | * (pre-decrement, like s[--i]), |
michael@0 | 518 | * or return U_SENTINEL if there is none (index is at the start). |
michael@0 | 519 | * |
michael@0 | 520 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 521 | * @return the previous code point (after pre-decrementing the current index) |
michael@0 | 522 | * |
michael@0 | 523 | * @see UCharIterator |
michael@0 | 524 | * @see U16_PREV |
michael@0 | 525 | * @stable ICU 2.1 |
michael@0 | 526 | */ |
michael@0 | 527 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 528 | uiter_previous32(UCharIterator *iter); |
michael@0 | 529 | |
michael@0 | 530 | /** |
michael@0 | 531 | * Get the "state" of the iterator in the form of a single 32-bit word. |
michael@0 | 532 | * This is a convenience function that calls iter->getState(iter) |
michael@0 | 533 | * if iter->getState is not NULL; |
michael@0 | 534 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. |
michael@0 | 535 | * |
michael@0 | 536 | * Some UCharIterator implementations may not be able to return |
michael@0 | 537 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
michael@0 | 538 | * This will be clearly documented for each such iterator (none of the public ones here). |
michael@0 | 539 | * |
michael@0 | 540 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 541 | * @return the state word |
michael@0 | 542 | * |
michael@0 | 543 | * @see UCharIterator |
michael@0 | 544 | * @see UCharIteratorGetState |
michael@0 | 545 | * @see UITER_NO_STATE |
michael@0 | 546 | * @stable ICU 2.6 |
michael@0 | 547 | */ |
michael@0 | 548 | U_STABLE uint32_t U_EXPORT2 |
michael@0 | 549 | uiter_getState(const UCharIterator *iter); |
michael@0 | 550 | |
michael@0 | 551 | /** |
michael@0 | 552 | * Restore the "state" of the iterator using a state word from a getState() call. |
michael@0 | 553 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) |
michael@0 | 554 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. |
michael@0 | 555 | * |
michael@0 | 556 | * @param iter the UCharIterator structure ("this pointer") |
michael@0 | 557 | * @param state the state word from a getState() call |
michael@0 | 558 | * on a same-type, same-string iterator |
michael@0 | 559 | * @param pErrorCode Must be a valid pointer to an error code value, |
michael@0 | 560 | * which must not indicate a failure before the function call. |
michael@0 | 561 | * |
michael@0 | 562 | * @see UCharIterator |
michael@0 | 563 | * @see UCharIteratorSetState |
michael@0 | 564 | * @stable ICU 2.6 |
michael@0 | 565 | */ |
michael@0 | 566 | U_STABLE void U_EXPORT2 |
michael@0 | 567 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
michael@0 | 568 | |
michael@0 | 569 | /** |
michael@0 | 570 | * Set up a UCharIterator to iterate over a string. |
michael@0 | 571 | * |
michael@0 | 572 | * Sets the UCharIterator function pointers for iteration over the string s |
michael@0 | 573 | * with iteration boundaries start=index=0 and length=limit=string length. |
michael@0 | 574 | * The "provider" may set the start, index, and limit values at any time |
michael@0 | 575 | * within the range 0..length. |
michael@0 | 576 | * The length field will be ignored. |
michael@0 | 577 | * |
michael@0 | 578 | * The string pointer s is set into UCharIterator.context without copying |
michael@0 | 579 | * or reallocating the string contents. |
michael@0 | 580 | * |
michael@0 | 581 | * getState() simply returns the current index. |
michael@0 | 582 | * move() will always return the final index. |
michael@0 | 583 | * |
michael@0 | 584 | * @param iter UCharIterator structure to be set for iteration |
michael@0 | 585 | * @param s String to iterate over |
michael@0 | 586 | * @param length Length of s, or -1 if NUL-terminated |
michael@0 | 587 | * |
michael@0 | 588 | * @see UCharIterator |
michael@0 | 589 | * @stable ICU 2.1 |
michael@0 | 590 | */ |
michael@0 | 591 | U_STABLE void U_EXPORT2 |
michael@0 | 592 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); |
michael@0 | 593 | |
michael@0 | 594 | /** |
michael@0 | 595 | * Set up a UCharIterator to iterate over a UTF-16BE string |
michael@0 | 596 | * (byte vector with a big-endian pair of bytes per UChar). |
michael@0 | 597 | * |
michael@0 | 598 | * Everything works just like with a normal UChar iterator (uiter_setString), |
michael@0 | 599 | * except that UChars are assembled from byte pairs, |
michael@0 | 600 | * and that the length argument here indicates an even number of bytes. |
michael@0 | 601 | * |
michael@0 | 602 | * getState() simply returns the current index. |
michael@0 | 603 | * move() will always return the final index. |
michael@0 | 604 | * |
michael@0 | 605 | * @param iter UCharIterator structure to be set for iteration |
michael@0 | 606 | * @param s UTF-16BE string to iterate over |
michael@0 | 607 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated |
michael@0 | 608 | * (NUL means pair of 0 bytes at even index from s) |
michael@0 | 609 | * |
michael@0 | 610 | * @see UCharIterator |
michael@0 | 611 | * @see uiter_setString |
michael@0 | 612 | * @stable ICU 2.6 |
michael@0 | 613 | */ |
michael@0 | 614 | U_STABLE void U_EXPORT2 |
michael@0 | 615 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); |
michael@0 | 616 | |
michael@0 | 617 | /** |
michael@0 | 618 | * Set up a UCharIterator to iterate over a UTF-8 string. |
michael@0 | 619 | * |
michael@0 | 620 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s |
michael@0 | 621 | * with UTF-8 iteration boundaries 0 and length. |
michael@0 | 622 | * The implementation counts the UTF-16 index on the fly and |
michael@0 | 623 | * lazily evaluates the UTF-16 length of the text. |
michael@0 | 624 | * |
michael@0 | 625 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. |
michael@0 | 626 | * When the reservedField is not 0, then it contains a supplementary code point |
michael@0 | 627 | * and the UTF-16 index is between the two corresponding surrogates. |
michael@0 | 628 | * At that point, the UTF-8 index is behind that code point. |
michael@0 | 629 | * |
michael@0 | 630 | * The UTF-8 string pointer s is set into UCharIterator.context without copying |
michael@0 | 631 | * or reallocating the string contents. |
michael@0 | 632 | * |
michael@0 | 633 | * getState() returns a state value consisting of |
michael@0 | 634 | * - the current UTF-8 source byte index (bits 31..1) |
michael@0 | 635 | * - a flag (bit 0) that indicates whether the UChar position is in the middle |
michael@0 | 636 | * of a surrogate pair |
michael@0 | 637 | * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) |
michael@0 | 638 | * |
michael@0 | 639 | * getState() cannot also encode the UTF-16 index in the state value. |
michael@0 | 640 | * move(relative to limit or length), or |
michael@0 | 641 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. |
michael@0 | 642 | * |
michael@0 | 643 | * @param iter UCharIterator structure to be set for iteration |
michael@0 | 644 | * @param s UTF-8 string to iterate over |
michael@0 | 645 | * @param length Length of s in bytes, or -1 if NUL-terminated |
michael@0 | 646 | * |
michael@0 | 647 | * @see UCharIterator |
michael@0 | 648 | * @stable ICU 2.6 |
michael@0 | 649 | */ |
michael@0 | 650 | U_STABLE void U_EXPORT2 |
michael@0 | 651 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); |
michael@0 | 652 | |
michael@0 | 653 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 654 | |
michael@0 | 655 | /** |
michael@0 | 656 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. |
michael@0 | 657 | * |
michael@0 | 658 | * Sets the UCharIterator function pointers for iteration using the |
michael@0 | 659 | * CharacterIterator charIter. |
michael@0 | 660 | * |
michael@0 | 661 | * The CharacterIterator pointer charIter is set into UCharIterator.context |
michael@0 | 662 | * without copying or cloning the CharacterIterator object. |
michael@0 | 663 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. |
michael@0 | 664 | * The iteration index and boundaries are controlled by the CharacterIterator. |
michael@0 | 665 | * |
michael@0 | 666 | * getState() simply returns the current index. |
michael@0 | 667 | * move() will always return the final index. |
michael@0 | 668 | * |
michael@0 | 669 | * @param iter UCharIterator structure to be set for iteration |
michael@0 | 670 | * @param charIter CharacterIterator to wrap |
michael@0 | 671 | * |
michael@0 | 672 | * @see UCharIterator |
michael@0 | 673 | * @stable ICU 2.1 |
michael@0 | 674 | */ |
michael@0 | 675 | U_STABLE void U_EXPORT2 |
michael@0 | 676 | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); |
michael@0 | 677 | |
michael@0 | 678 | /** |
michael@0 | 679 | * Set up a UCharIterator to iterate over a C++ Replaceable. |
michael@0 | 680 | * |
michael@0 | 681 | * Sets the UCharIterator function pointers for iteration over the |
michael@0 | 682 | * Replaceable rep with iteration boundaries start=index=0 and |
michael@0 | 683 | * length=limit=rep->length(). |
michael@0 | 684 | * The "provider" may set the start, index, and limit values at any time |
michael@0 | 685 | * within the range 0..length=rep->length(). |
michael@0 | 686 | * The length field will be ignored. |
michael@0 | 687 | * |
michael@0 | 688 | * The Replaceable pointer rep is set into UCharIterator.context without copying |
michael@0 | 689 | * or cloning/reallocating the Replaceable object. |
michael@0 | 690 | * |
michael@0 | 691 | * getState() simply returns the current index. |
michael@0 | 692 | * move() will always return the final index. |
michael@0 | 693 | * |
michael@0 | 694 | * @param iter UCharIterator structure to be set for iteration |
michael@0 | 695 | * @param rep Replaceable to iterate over |
michael@0 | 696 | * |
michael@0 | 697 | * @see UCharIterator |
michael@0 | 698 | * @stable ICU 2.1 |
michael@0 | 699 | */ |
michael@0 | 700 | U_STABLE void U_EXPORT2 |
michael@0 | 701 | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); |
michael@0 | 702 | |
michael@0 | 703 | #endif |
michael@0 | 704 | |
michael@0 | 705 | U_CDECL_END |
michael@0 | 706 | |
michael@0 | 707 | #endif |