1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/uiter.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,707 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2002-2011 International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: uiter.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2002jan18 1.17 +* created by: Markus W. Scherer 1.18 +*/ 1.19 + 1.20 +#ifndef __UITER_H__ 1.21 +#define __UITER_H__ 1.22 + 1.23 +/** 1.24 + * \file 1.25 + * \brief C API: Unicode Character Iteration 1.26 + * 1.27 + * @see UCharIterator 1.28 + */ 1.29 + 1.30 +#include "unicode/utypes.h" 1.31 + 1.32 +#if U_SHOW_CPLUSPLUS_API 1.33 + U_NAMESPACE_BEGIN 1.34 + 1.35 + class CharacterIterator; 1.36 + class Replaceable; 1.37 + 1.38 + U_NAMESPACE_END 1.39 +#endif 1.40 + 1.41 +U_CDECL_BEGIN 1.42 + 1.43 +struct UCharIterator; 1.44 +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ 1.45 + 1.46 +/** 1.47 + * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). 1.48 + * @see UCharIteratorMove 1.49 + * @see UCharIterator 1.50 + * @stable ICU 2.1 1.51 + */ 1.52 +typedef enum UCharIteratorOrigin { 1.53 + UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH 1.54 +} UCharIteratorOrigin; 1.55 + 1.56 +/** Constants for UCharIterator. @stable ICU 2.6 */ 1.57 +enum { 1.58 + /** 1.59 + * Constant value that may be returned by UCharIteratorMove 1.60 + * indicating that the final UTF-16 index is not known, but that the move succeeded. 1.61 + * This can occur when moving relative to limit or length, or 1.62 + * when moving relative to the current index after a setState() 1.63 + * when the current UTF-16 index is not known. 1.64 + * 1.65 + * It would be very inefficient to have to count from the beginning of the text 1.66 + * just to get the current/limit/length index after moving relative to it. 1.67 + * The actual index can be determined with getIndex(UITER_CURRENT) 1.68 + * which will count the UChars if necessary. 1.69 + * 1.70 + * @stable ICU 2.6 1.71 + */ 1.72 + UITER_UNKNOWN_INDEX=-2 1.73 +}; 1.74 + 1.75 + 1.76 +/** 1.77 + * Constant for UCharIterator getState() indicating an error or 1.78 + * an unknown state. 1.79 + * Returned by uiter_getState()/UCharIteratorGetState 1.80 + * when an error occurs. 1.81 + * Also, some UCharIterator implementations may not be able to return 1.82 + * a valid state for each position. This will be clearly documented 1.83 + * for each such iterator (none of the public ones here). 1.84 + * 1.85 + * @stable ICU 2.6 1.86 + */ 1.87 +#define UITER_NO_STATE ((uint32_t)0xffffffff) 1.88 + 1.89 +/** 1.90 + * Function type declaration for UCharIterator.getIndex(). 1.91 + * 1.92 + * Gets the current position, or the start or limit of the 1.93 + * iteration range. 1.94 + * 1.95 + * This function may perform slowly for UITER_CURRENT after setState() was called, 1.96 + * or for UITER_LENGTH, because an iterator implementation may have to count 1.97 + * UChars if the underlying storage is not UTF-16. 1.98 + * 1.99 + * @param iter the UCharIterator structure ("this pointer") 1.100 + * @param origin get the 0, start, limit, length, or current index 1.101 + * @return the requested index, or U_SENTINEL in an error condition 1.102 + * 1.103 + * @see UCharIteratorOrigin 1.104 + * @see UCharIterator 1.105 + * @stable ICU 2.1 1.106 + */ 1.107 +typedef int32_t U_CALLCONV 1.108 +UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); 1.109 + 1.110 +/** 1.111 + * Function type declaration for UCharIterator.move(). 1.112 + * 1.113 + * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). 1.114 + * 1.115 + * Moves the current position relative to the start or limit of the 1.116 + * iteration range, or relative to the current position itself. 1.117 + * The movement is expressed in numbers of code units forward 1.118 + * or backward by specifying a positive or negative delta. 1.119 + * Out of bounds movement will be pinned to the start or limit. 1.120 + * 1.121 + * This function may perform slowly for moving relative to UITER_LENGTH 1.122 + * because an iterator implementation may have to count the rest of the 1.123 + * UChars if the native storage is not UTF-16. 1.124 + * 1.125 + * When moving relative to the limit or length, or 1.126 + * relative to the current position after setState() was called, 1.127 + * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient 1.128 + * determination of the actual UTF-16 index. 1.129 + * The actual index can be determined with getIndex(UITER_CURRENT) 1.130 + * which will count the UChars if necessary. 1.131 + * See UITER_UNKNOWN_INDEX for details. 1.132 + * 1.133 + * @param iter the UCharIterator structure ("this pointer") 1.134 + * @param delta can be positive, zero, or negative 1.135 + * @param origin move relative to the 0, start, limit, length, or current index 1.136 + * @return the new index, or U_SENTINEL on an error condition, 1.137 + * or UITER_UNKNOWN_INDEX when the index is not known. 1.138 + * 1.139 + * @see UCharIteratorOrigin 1.140 + * @see UCharIterator 1.141 + * @see UITER_UNKNOWN_INDEX 1.142 + * @stable ICU 2.1 1.143 + */ 1.144 +typedef int32_t U_CALLCONV 1.145 +UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); 1.146 + 1.147 +/** 1.148 + * Function type declaration for UCharIterator.hasNext(). 1.149 + * 1.150 + * Check if current() and next() can still 1.151 + * return another code unit. 1.152 + * 1.153 + * @param iter the UCharIterator structure ("this pointer") 1.154 + * @return boolean value for whether current() and next() can still return another code unit 1.155 + * 1.156 + * @see UCharIterator 1.157 + * @stable ICU 2.1 1.158 + */ 1.159 +typedef UBool U_CALLCONV 1.160 +UCharIteratorHasNext(UCharIterator *iter); 1.161 + 1.162 +/** 1.163 + * Function type declaration for UCharIterator.hasPrevious(). 1.164 + * 1.165 + * Check if previous() can still return another code unit. 1.166 + * 1.167 + * @param iter the UCharIterator structure ("this pointer") 1.168 + * @return boolean value for whether previous() can still return another code unit 1.169 + * 1.170 + * @see UCharIterator 1.171 + * @stable ICU 2.1 1.172 + */ 1.173 +typedef UBool U_CALLCONV 1.174 +UCharIteratorHasPrevious(UCharIterator *iter); 1.175 + 1.176 +/** 1.177 + * Function type declaration for UCharIterator.current(). 1.178 + * 1.179 + * Return the code unit at the current position, 1.180 + * or U_SENTINEL if there is none (index is at the limit). 1.181 + * 1.182 + * @param iter the UCharIterator structure ("this pointer") 1.183 + * @return the current code unit 1.184 + * 1.185 + * @see UCharIterator 1.186 + * @stable ICU 2.1 1.187 + */ 1.188 +typedef UChar32 U_CALLCONV 1.189 +UCharIteratorCurrent(UCharIterator *iter); 1.190 + 1.191 +/** 1.192 + * Function type declaration for UCharIterator.next(). 1.193 + * 1.194 + * Return the code unit at the current index and increment 1.195 + * the index (post-increment, like s[i++]), 1.196 + * or return U_SENTINEL if there is none (index is at the limit). 1.197 + * 1.198 + * @param iter the UCharIterator structure ("this pointer") 1.199 + * @return the current code unit (and post-increment the current index) 1.200 + * 1.201 + * @see UCharIterator 1.202 + * @stable ICU 2.1 1.203 + */ 1.204 +typedef UChar32 U_CALLCONV 1.205 +UCharIteratorNext(UCharIterator *iter); 1.206 + 1.207 +/** 1.208 + * Function type declaration for UCharIterator.previous(). 1.209 + * 1.210 + * Decrement the index and return the code unit from there 1.211 + * (pre-decrement, like s[--i]), 1.212 + * or return U_SENTINEL if there is none (index is at the start). 1.213 + * 1.214 + * @param iter the UCharIterator structure ("this pointer") 1.215 + * @return the previous code unit (after pre-decrementing the current index) 1.216 + * 1.217 + * @see UCharIterator 1.218 + * @stable ICU 2.1 1.219 + */ 1.220 +typedef UChar32 U_CALLCONV 1.221 +UCharIteratorPrevious(UCharIterator *iter); 1.222 + 1.223 +/** 1.224 + * Function type declaration for UCharIterator.reservedFn(). 1.225 + * Reserved for future use. 1.226 + * 1.227 + * @param iter the UCharIterator structure ("this pointer") 1.228 + * @param something some integer argument 1.229 + * @return some integer 1.230 + * 1.231 + * @see UCharIterator 1.232 + * @stable ICU 2.1 1.233 + */ 1.234 +typedef int32_t U_CALLCONV 1.235 +UCharIteratorReserved(UCharIterator *iter, int32_t something); 1.236 + 1.237 +/** 1.238 + * Function type declaration for UCharIterator.getState(). 1.239 + * 1.240 + * Get the "state" of the iterator in the form of a single 32-bit word. 1.241 + * It is recommended that the state value be calculated to be as small as 1.242 + * is feasible. For strings with limited lengths, fewer than 32 bits may 1.243 + * be sufficient. 1.244 + * 1.245 + * This is used together with setState()/UCharIteratorSetState 1.246 + * to save and restore the iterator position more efficiently than with 1.247 + * getIndex()/move(). 1.248 + * 1.249 + * The iterator state is defined as a uint32_t value because it is designed 1.250 + * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state 1.251 + * of the character iterator. 1.252 + * 1.253 + * With some UCharIterator implementations (e.g., UTF-8), 1.254 + * getting and setting the UTF-16 index with existing functions 1.255 + * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but 1.256 + * relatively slow because the iterator has to "walk" from a known index 1.257 + * to the requested one. 1.258 + * This takes more time the farther it needs to go. 1.259 + * 1.260 + * An opaque state value allows an iterator implementation to provide 1.261 + * an internal index (UTF-8: the source byte array index) for 1.262 + * fast, constant-time restoration. 1.263 + * 1.264 + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because 1.265 + * the UTF-16 index may not be restored as well, but the iterator can deliver 1.266 + * the correct text contents and move relative to the current position 1.267 + * without performance degradation. 1.268 + * 1.269 + * Some UCharIterator implementations may not be able to return 1.270 + * a valid state for each position, in which case they return UITER_NO_STATE instead. 1.271 + * This will be clearly documented for each such iterator (none of the public ones here). 1.272 + * 1.273 + * @param iter the UCharIterator structure ("this pointer") 1.274 + * @return the state word 1.275 + * 1.276 + * @see UCharIterator 1.277 + * @see UCharIteratorSetState 1.278 + * @see UITER_NO_STATE 1.279 + * @stable ICU 2.6 1.280 + */ 1.281 +typedef uint32_t U_CALLCONV 1.282 +UCharIteratorGetState(const UCharIterator *iter); 1.283 + 1.284 +/** 1.285 + * Function type declaration for UCharIterator.setState(). 1.286 + * 1.287 + * Restore the "state" of the iterator using a state word from a getState() call. 1.288 + * The iterator object need not be the same one as for which getState() was called, 1.289 + * but it must be of the same type (set up using the same uiter_setXYZ function) 1.290 + * and it must iterate over the same string 1.291 + * (binary identical regardless of memory address). 1.292 + * For more about the state word see UCharIteratorGetState. 1.293 + * 1.294 + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because 1.295 + * the UTF-16 index may not be restored as well, but the iterator can deliver 1.296 + * the correct text contents and move relative to the current position 1.297 + * without performance degradation. 1.298 + * 1.299 + * @param iter the UCharIterator structure ("this pointer") 1.300 + * @param state the state word from a getState() call 1.301 + * on a same-type, same-string iterator 1.302 + * @param pErrorCode Must be a valid pointer to an error code value, 1.303 + * which must not indicate a failure before the function call. 1.304 + * 1.305 + * @see UCharIterator 1.306 + * @see UCharIteratorGetState 1.307 + * @stable ICU 2.6 1.308 + */ 1.309 +typedef void U_CALLCONV 1.310 +UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); 1.311 + 1.312 + 1.313 +/** 1.314 + * C API for code unit iteration. 1.315 + * This can be used as a C wrapper around 1.316 + * CharacterIterator, Replaceable, or implemented using simple strings, etc. 1.317 + * 1.318 + * There are two roles for using UCharIterator: 1.319 + * 1.320 + * A "provider" sets the necessary function pointers and controls the "protected" 1.321 + * fields of the UCharIterator structure. A "provider" passes a UCharIterator 1.322 + * into C APIs that need a UCharIterator as an abstract, flexible string interface. 1.323 + * 1.324 + * Implementations of such C APIs are "callers" of UCharIterator functions; 1.325 + * they only use the "public" function pointers and never access the "protected" 1.326 + * fields directly. 1.327 + * 1.328 + * The current() and next() functions only check the current index against the 1.329 + * limit, and previous() only checks the current index against the start, 1.330 + * to see if the iterator already reached the end of the iteration range. 1.331 + * 1.332 + * The assumption - in all iterators - is that the index is moved via the API, 1.333 + * which means it won't go out of bounds, or the index is modified by 1.334 + * user code that knows enough about the iterator implementation to set valid 1.335 + * index values. 1.336 + * 1.337 + * UCharIterator functions return code unit values 0..0xffff, 1.338 + * or U_SENTINEL if the iteration bounds are reached. 1.339 + * 1.340 + * @stable ICU 2.1 1.341 + */ 1.342 +struct UCharIterator { 1.343 + /** 1.344 + * (protected) Pointer to string or wrapped object or similar. 1.345 + * Not used by caller. 1.346 + * @stable ICU 2.1 1.347 + */ 1.348 + const void *context; 1.349 + 1.350 + /** 1.351 + * (protected) Length of string or similar. 1.352 + * Not used by caller. 1.353 + * @stable ICU 2.1 1.354 + */ 1.355 + int32_t length; 1.356 + 1.357 + /** 1.358 + * (protected) Start index or similar. 1.359 + * Not used by caller. 1.360 + * @stable ICU 2.1 1.361 + */ 1.362 + int32_t start; 1.363 + 1.364 + /** 1.365 + * (protected) Current index or similar. 1.366 + * Not used by caller. 1.367 + * @stable ICU 2.1 1.368 + */ 1.369 + int32_t index; 1.370 + 1.371 + /** 1.372 + * (protected) Limit index or similar. 1.373 + * Not used by caller. 1.374 + * @stable ICU 2.1 1.375 + */ 1.376 + int32_t limit; 1.377 + 1.378 + /** 1.379 + * (protected) Used by UTF-8 iterators and possibly others. 1.380 + * @stable ICU 2.1 1.381 + */ 1.382 + int32_t reservedField; 1.383 + 1.384 + /** 1.385 + * (public) Returns the current position or the 1.386 + * start or limit index of the iteration range. 1.387 + * 1.388 + * @see UCharIteratorGetIndex 1.389 + * @stable ICU 2.1 1.390 + */ 1.391 + UCharIteratorGetIndex *getIndex; 1.392 + 1.393 + /** 1.394 + * (public) Moves the current position relative to the start or limit of the 1.395 + * iteration range, or relative to the current position itself. 1.396 + * The movement is expressed in numbers of code units forward 1.397 + * or backward by specifying a positive or negative delta. 1.398 + * 1.399 + * @see UCharIteratorMove 1.400 + * @stable ICU 2.1 1.401 + */ 1.402 + UCharIteratorMove *move; 1.403 + 1.404 + /** 1.405 + * (public) Check if current() and next() can still 1.406 + * return another code unit. 1.407 + * 1.408 + * @see UCharIteratorHasNext 1.409 + * @stable ICU 2.1 1.410 + */ 1.411 + UCharIteratorHasNext *hasNext; 1.412 + 1.413 + /** 1.414 + * (public) Check if previous() can still return another code unit. 1.415 + * 1.416 + * @see UCharIteratorHasPrevious 1.417 + * @stable ICU 2.1 1.418 + */ 1.419 + UCharIteratorHasPrevious *hasPrevious; 1.420 + 1.421 + /** 1.422 + * (public) Return the code unit at the current position, 1.423 + * or U_SENTINEL if there is none (index is at the limit). 1.424 + * 1.425 + * @see UCharIteratorCurrent 1.426 + * @stable ICU 2.1 1.427 + */ 1.428 + UCharIteratorCurrent *current; 1.429 + 1.430 + /** 1.431 + * (public) Return the code unit at the current index and increment 1.432 + * the index (post-increment, like s[i++]), 1.433 + * or return U_SENTINEL if there is none (index is at the limit). 1.434 + * 1.435 + * @see UCharIteratorNext 1.436 + * @stable ICU 2.1 1.437 + */ 1.438 + UCharIteratorNext *next; 1.439 + 1.440 + /** 1.441 + * (public) Decrement the index and return the code unit from there 1.442 + * (pre-decrement, like s[--i]), 1.443 + * or return U_SENTINEL if there is none (index is at the start). 1.444 + * 1.445 + * @see UCharIteratorPrevious 1.446 + * @stable ICU 2.1 1.447 + */ 1.448 + UCharIteratorPrevious *previous; 1.449 + 1.450 + /** 1.451 + * (public) Reserved for future use. Currently NULL. 1.452 + * 1.453 + * @see UCharIteratorReserved 1.454 + * @stable ICU 2.1 1.455 + */ 1.456 + UCharIteratorReserved *reservedFn; 1.457 + 1.458 + /** 1.459 + * (public) Return the state of the iterator, to be restored later with setState(). 1.460 + * This function pointer is NULL if the iterator does not implement it. 1.461 + * 1.462 + * @see UCharIteratorGet 1.463 + * @stable ICU 2.6 1.464 + */ 1.465 + UCharIteratorGetState *getState; 1.466 + 1.467 + /** 1.468 + * (public) Restore the iterator state from the state word from a call 1.469 + * to getState(). 1.470 + * This function pointer is NULL if the iterator does not implement it. 1.471 + * 1.472 + * @see UCharIteratorSet 1.473 + * @stable ICU 2.6 1.474 + */ 1.475 + UCharIteratorSetState *setState; 1.476 +}; 1.477 + 1.478 +/** 1.479 + * Helper function for UCharIterator to get the code point 1.480 + * at the current index. 1.481 + * 1.482 + * Return the code point that includes the code unit at the current position, 1.483 + * or U_SENTINEL if there is none (index is at the limit). 1.484 + * If the current code unit is a lead or trail surrogate, 1.485 + * then the following or preceding surrogate is used to form 1.486 + * the code point value. 1.487 + * 1.488 + * @param iter the UCharIterator structure ("this pointer") 1.489 + * @return the current code point 1.490 + * 1.491 + * @see UCharIterator 1.492 + * @see U16_GET 1.493 + * @see UnicodeString::char32At() 1.494 + * @stable ICU 2.1 1.495 + */ 1.496 +U_STABLE UChar32 U_EXPORT2 1.497 +uiter_current32(UCharIterator *iter); 1.498 + 1.499 +/** 1.500 + * Helper function for UCharIterator to get the next code point. 1.501 + * 1.502 + * Return the code point at the current index and increment 1.503 + * the index (post-increment, like s[i++]), 1.504 + * or return U_SENTINEL if there is none (index is at the limit). 1.505 + * 1.506 + * @param iter the UCharIterator structure ("this pointer") 1.507 + * @return the current code point (and post-increment the current index) 1.508 + * 1.509 + * @see UCharIterator 1.510 + * @see U16_NEXT 1.511 + * @stable ICU 2.1 1.512 + */ 1.513 +U_STABLE UChar32 U_EXPORT2 1.514 +uiter_next32(UCharIterator *iter); 1.515 + 1.516 +/** 1.517 + * Helper function for UCharIterator to get the previous code point. 1.518 + * 1.519 + * Decrement the index and return the code point from there 1.520 + * (pre-decrement, like s[--i]), 1.521 + * or return U_SENTINEL if there is none (index is at the start). 1.522 + * 1.523 + * @param iter the UCharIterator structure ("this pointer") 1.524 + * @return the previous code point (after pre-decrementing the current index) 1.525 + * 1.526 + * @see UCharIterator 1.527 + * @see U16_PREV 1.528 + * @stable ICU 2.1 1.529 + */ 1.530 +U_STABLE UChar32 U_EXPORT2 1.531 +uiter_previous32(UCharIterator *iter); 1.532 + 1.533 +/** 1.534 + * Get the "state" of the iterator in the form of a single 32-bit word. 1.535 + * This is a convenience function that calls iter->getState(iter) 1.536 + * if iter->getState is not NULL; 1.537 + * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. 1.538 + * 1.539 + * Some UCharIterator implementations may not be able to return 1.540 + * a valid state for each position, in which case they return UITER_NO_STATE instead. 1.541 + * This will be clearly documented for each such iterator (none of the public ones here). 1.542 + * 1.543 + * @param iter the UCharIterator structure ("this pointer") 1.544 + * @return the state word 1.545 + * 1.546 + * @see UCharIterator 1.547 + * @see UCharIteratorGetState 1.548 + * @see UITER_NO_STATE 1.549 + * @stable ICU 2.6 1.550 + */ 1.551 +U_STABLE uint32_t U_EXPORT2 1.552 +uiter_getState(const UCharIterator *iter); 1.553 + 1.554 +/** 1.555 + * Restore the "state" of the iterator using a state word from a getState() call. 1.556 + * This is a convenience function that calls iter->setState(iter, state, pErrorCode) 1.557 + * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. 1.558 + * 1.559 + * @param iter the UCharIterator structure ("this pointer") 1.560 + * @param state the state word from a getState() call 1.561 + * on a same-type, same-string iterator 1.562 + * @param pErrorCode Must be a valid pointer to an error code value, 1.563 + * which must not indicate a failure before the function call. 1.564 + * 1.565 + * @see UCharIterator 1.566 + * @see UCharIteratorSetState 1.567 + * @stable ICU 2.6 1.568 + */ 1.569 +U_STABLE void U_EXPORT2 1.570 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); 1.571 + 1.572 +/** 1.573 + * Set up a UCharIterator to iterate over a string. 1.574 + * 1.575 + * Sets the UCharIterator function pointers for iteration over the string s 1.576 + * with iteration boundaries start=index=0 and length=limit=string length. 1.577 + * The "provider" may set the start, index, and limit values at any time 1.578 + * within the range 0..length. 1.579 + * The length field will be ignored. 1.580 + * 1.581 + * The string pointer s is set into UCharIterator.context without copying 1.582 + * or reallocating the string contents. 1.583 + * 1.584 + * getState() simply returns the current index. 1.585 + * move() will always return the final index. 1.586 + * 1.587 + * @param iter UCharIterator structure to be set for iteration 1.588 + * @param s String to iterate over 1.589 + * @param length Length of s, or -1 if NUL-terminated 1.590 + * 1.591 + * @see UCharIterator 1.592 + * @stable ICU 2.1 1.593 + */ 1.594 +U_STABLE void U_EXPORT2 1.595 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); 1.596 + 1.597 +/** 1.598 + * Set up a UCharIterator to iterate over a UTF-16BE string 1.599 + * (byte vector with a big-endian pair of bytes per UChar). 1.600 + * 1.601 + * Everything works just like with a normal UChar iterator (uiter_setString), 1.602 + * except that UChars are assembled from byte pairs, 1.603 + * and that the length argument here indicates an even number of bytes. 1.604 + * 1.605 + * getState() simply returns the current index. 1.606 + * move() will always return the final index. 1.607 + * 1.608 + * @param iter UCharIterator structure to be set for iteration 1.609 + * @param s UTF-16BE string to iterate over 1.610 + * @param length Length of s as an even number of bytes, or -1 if NUL-terminated 1.611 + * (NUL means pair of 0 bytes at even index from s) 1.612 + * 1.613 + * @see UCharIterator 1.614 + * @see uiter_setString 1.615 + * @stable ICU 2.6 1.616 + */ 1.617 +U_STABLE void U_EXPORT2 1.618 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); 1.619 + 1.620 +/** 1.621 + * Set up a UCharIterator to iterate over a UTF-8 string. 1.622 + * 1.623 + * Sets the UCharIterator function pointers for iteration over the UTF-8 string s 1.624 + * with UTF-8 iteration boundaries 0 and length. 1.625 + * The implementation counts the UTF-16 index on the fly and 1.626 + * lazily evaluates the UTF-16 length of the text. 1.627 + * 1.628 + * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. 1.629 + * When the reservedField is not 0, then it contains a supplementary code point 1.630 + * and the UTF-16 index is between the two corresponding surrogates. 1.631 + * At that point, the UTF-8 index is behind that code point. 1.632 + * 1.633 + * The UTF-8 string pointer s is set into UCharIterator.context without copying 1.634 + * or reallocating the string contents. 1.635 + * 1.636 + * getState() returns a state value consisting of 1.637 + * - the current UTF-8 source byte index (bits 31..1) 1.638 + * - a flag (bit 0) that indicates whether the UChar position is in the middle 1.639 + * of a surrogate pair 1.640 + * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) 1.641 + * 1.642 + * getState() cannot also encode the UTF-16 index in the state value. 1.643 + * move(relative to limit or length), or 1.644 + * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. 1.645 + * 1.646 + * @param iter UCharIterator structure to be set for iteration 1.647 + * @param s UTF-8 string to iterate over 1.648 + * @param length Length of s in bytes, or -1 if NUL-terminated 1.649 + * 1.650 + * @see UCharIterator 1.651 + * @stable ICU 2.6 1.652 + */ 1.653 +U_STABLE void U_EXPORT2 1.654 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); 1.655 + 1.656 +#if U_SHOW_CPLUSPLUS_API 1.657 + 1.658 +/** 1.659 + * Set up a UCharIterator to wrap around a C++ CharacterIterator. 1.660 + * 1.661 + * Sets the UCharIterator function pointers for iteration using the 1.662 + * CharacterIterator charIter. 1.663 + * 1.664 + * The CharacterIterator pointer charIter is set into UCharIterator.context 1.665 + * without copying or cloning the CharacterIterator object. 1.666 + * The other "protected" UCharIterator fields are set to 0 and will be ignored. 1.667 + * The iteration index and boundaries are controlled by the CharacterIterator. 1.668 + * 1.669 + * getState() simply returns the current index. 1.670 + * move() will always return the final index. 1.671 + * 1.672 + * @param iter UCharIterator structure to be set for iteration 1.673 + * @param charIter CharacterIterator to wrap 1.674 + * 1.675 + * @see UCharIterator 1.676 + * @stable ICU 2.1 1.677 + */ 1.678 +U_STABLE void U_EXPORT2 1.679 +uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); 1.680 + 1.681 +/** 1.682 + * Set up a UCharIterator to iterate over a C++ Replaceable. 1.683 + * 1.684 + * Sets the UCharIterator function pointers for iteration over the 1.685 + * Replaceable rep with iteration boundaries start=index=0 and 1.686 + * length=limit=rep->length(). 1.687 + * The "provider" may set the start, index, and limit values at any time 1.688 + * within the range 0..length=rep->length(). 1.689 + * The length field will be ignored. 1.690 + * 1.691 + * The Replaceable pointer rep is set into UCharIterator.context without copying 1.692 + * or cloning/reallocating the Replaceable object. 1.693 + * 1.694 + * getState() simply returns the current index. 1.695 + * move() will always return the final index. 1.696 + * 1.697 + * @param iter UCharIterator structure to be set for iteration 1.698 + * @param rep Replaceable to iterate over 1.699 + * 1.700 + * @see UCharIterator 1.701 + * @stable ICU 2.1 1.702 + */ 1.703 +U_STABLE void U_EXPORT2 1.704 +uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); 1.705 + 1.706 +#endif 1.707 + 1.708 +U_CDECL_END 1.709 + 1.710 +#endif