michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2002-2011 International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: uiter.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2002jan18 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #ifndef __UITER_H__ michael@0: #define __UITER_H__ michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C API: Unicode Character Iteration michael@0: * michael@0: * @see UCharIterator michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if U_SHOW_CPLUSPLUS_API michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class CharacterIterator; michael@0: class Replaceable; michael@0: michael@0: U_NAMESPACE_END michael@0: #endif michael@0: michael@0: U_CDECL_BEGIN michael@0: michael@0: struct UCharIterator; michael@0: typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ michael@0: michael@0: /** michael@0: * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). michael@0: * @see UCharIteratorMove michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef enum UCharIteratorOrigin { michael@0: UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH michael@0: } UCharIteratorOrigin; michael@0: michael@0: /** Constants for UCharIterator. @stable ICU 2.6 */ michael@0: enum { michael@0: /** michael@0: * Constant value that may be returned by UCharIteratorMove michael@0: * indicating that the final UTF-16 index is not known, but that the move succeeded. michael@0: * This can occur when moving relative to limit or length, or michael@0: * when moving relative to the current index after a setState() michael@0: * when the current UTF-16 index is not known. michael@0: * michael@0: * It would be very inefficient to have to count from the beginning of the text michael@0: * just to get the current/limit/length index after moving relative to it. michael@0: * The actual index can be determined with getIndex(UITER_CURRENT) michael@0: * which will count the UChars if necessary. michael@0: * michael@0: * @stable ICU 2.6 michael@0: */ michael@0: UITER_UNKNOWN_INDEX=-2 michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * Constant for UCharIterator getState() indicating an error or michael@0: * an unknown state. michael@0: * Returned by uiter_getState()/UCharIteratorGetState michael@0: * when an error occurs. michael@0: * Also, some UCharIterator implementations may not be able to return michael@0: * a valid state for each position. This will be clearly documented michael@0: * for each such iterator (none of the public ones here). michael@0: * michael@0: * @stable ICU 2.6 michael@0: */ michael@0: #define UITER_NO_STATE ((uint32_t)0xffffffff) michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.getIndex(). michael@0: * michael@0: * Gets the current position, or the start or limit of the michael@0: * iteration range. michael@0: * michael@0: * This function may perform slowly for UITER_CURRENT after setState() was called, michael@0: * or for UITER_LENGTH, because an iterator implementation may have to count michael@0: * UChars if the underlying storage is not UTF-16. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @param origin get the 0, start, limit, length, or current index michael@0: * @return the requested index, or U_SENTINEL in an error condition michael@0: * michael@0: * @see UCharIteratorOrigin michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef int32_t U_CALLCONV michael@0: UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.move(). michael@0: * michael@0: * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). michael@0: * michael@0: * Moves the current position relative to the start or limit of the michael@0: * iteration range, or relative to the current position itself. michael@0: * The movement is expressed in numbers of code units forward michael@0: * or backward by specifying a positive or negative delta. michael@0: * Out of bounds movement will be pinned to the start or limit. michael@0: * michael@0: * This function may perform slowly for moving relative to UITER_LENGTH michael@0: * because an iterator implementation may have to count the rest of the michael@0: * UChars if the native storage is not UTF-16. michael@0: * michael@0: * When moving relative to the limit or length, or michael@0: * relative to the current position after setState() was called, michael@0: * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient michael@0: * determination of the actual UTF-16 index. michael@0: * The actual index can be determined with getIndex(UITER_CURRENT) michael@0: * which will count the UChars if necessary. michael@0: * See UITER_UNKNOWN_INDEX for details. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @param delta can be positive, zero, or negative michael@0: * @param origin move relative to the 0, start, limit, length, or current index michael@0: * @return the new index, or U_SENTINEL on an error condition, michael@0: * or UITER_UNKNOWN_INDEX when the index is not known. michael@0: * michael@0: * @see UCharIteratorOrigin michael@0: * @see UCharIterator michael@0: * @see UITER_UNKNOWN_INDEX michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef int32_t U_CALLCONV michael@0: UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.hasNext(). michael@0: * michael@0: * Check if current() and next() can still michael@0: * return another code unit. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return boolean value for whether current() and next() can still return another code unit michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef UBool U_CALLCONV michael@0: UCharIteratorHasNext(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.hasPrevious(). michael@0: * michael@0: * Check if previous() can still return another code unit. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return boolean value for whether previous() can still return another code unit michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef UBool U_CALLCONV michael@0: UCharIteratorHasPrevious(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.current(). michael@0: * michael@0: * Return the code unit at the current position, michael@0: * or U_SENTINEL if there is none (index is at the limit). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the current code unit michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef UChar32 U_CALLCONV michael@0: UCharIteratorCurrent(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.next(). michael@0: * michael@0: * Return the code unit at the current index and increment michael@0: * the index (post-increment, like s[i++]), michael@0: * or return U_SENTINEL if there is none (index is at the limit). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the current code unit (and post-increment the current index) michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef UChar32 U_CALLCONV michael@0: UCharIteratorNext(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.previous(). michael@0: * michael@0: * Decrement the index and return the code unit from there michael@0: * (pre-decrement, like s[--i]), michael@0: * or return U_SENTINEL if there is none (index is at the start). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the previous code unit (after pre-decrementing the current index) michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef UChar32 U_CALLCONV michael@0: UCharIteratorPrevious(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.reservedFn(). michael@0: * Reserved for future use. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @param something some integer argument michael@0: * @return some integer michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: typedef int32_t U_CALLCONV michael@0: UCharIteratorReserved(UCharIterator *iter, int32_t something); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.getState(). michael@0: * michael@0: * Get the "state" of the iterator in the form of a single 32-bit word. michael@0: * It is recommended that the state value be calculated to be as small as michael@0: * is feasible. For strings with limited lengths, fewer than 32 bits may michael@0: * be sufficient. michael@0: * michael@0: * This is used together with setState()/UCharIteratorSetState michael@0: * to save and restore the iterator position more efficiently than with michael@0: * getIndex()/move(). michael@0: * michael@0: * The iterator state is defined as a uint32_t value because it is designed michael@0: * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state michael@0: * of the character iterator. michael@0: * michael@0: * With some UCharIterator implementations (e.g., UTF-8), michael@0: * getting and setting the UTF-16 index with existing functions michael@0: * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but michael@0: * relatively slow because the iterator has to "walk" from a known index michael@0: * to the requested one. michael@0: * This takes more time the farther it needs to go. michael@0: * michael@0: * An opaque state value allows an iterator implementation to provide michael@0: * an internal index (UTF-8: the source byte array index) for michael@0: * fast, constant-time restoration. michael@0: * michael@0: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because michael@0: * the UTF-16 index may not be restored as well, but the iterator can deliver michael@0: * the correct text contents and move relative to the current position michael@0: * without performance degradation. michael@0: * michael@0: * Some UCharIterator implementations may not be able to return michael@0: * a valid state for each position, in which case they return UITER_NO_STATE instead. michael@0: * This will be clearly documented for each such iterator (none of the public ones here). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the state word michael@0: * michael@0: * @see UCharIterator michael@0: * @see UCharIteratorSetState michael@0: * @see UITER_NO_STATE michael@0: * @stable ICU 2.6 michael@0: */ michael@0: typedef uint32_t U_CALLCONV michael@0: UCharIteratorGetState(const UCharIterator *iter); michael@0: michael@0: /** michael@0: * Function type declaration for UCharIterator.setState(). michael@0: * michael@0: * Restore the "state" of the iterator using a state word from a getState() call. michael@0: * The iterator object need not be the same one as for which getState() was called, michael@0: * but it must be of the same type (set up using the same uiter_setXYZ function) michael@0: * and it must iterate over the same string michael@0: * (binary identical regardless of memory address). michael@0: * For more about the state word see UCharIteratorGetState. michael@0: * michael@0: * After calling setState(), a getIndex(UITER_CURRENT) may be slow because michael@0: * the UTF-16 index may not be restored as well, but the iterator can deliver michael@0: * the correct text contents and move relative to the current position michael@0: * without performance degradation. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @param state the state word from a getState() call michael@0: * on a same-type, same-string iterator michael@0: * @param pErrorCode Must be a valid pointer to an error code value, michael@0: * which must not indicate a failure before the function call. michael@0: * michael@0: * @see UCharIterator michael@0: * @see UCharIteratorGetState michael@0: * @stable ICU 2.6 michael@0: */ michael@0: typedef void U_CALLCONV michael@0: UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); michael@0: michael@0: michael@0: /** michael@0: * C API for code unit iteration. michael@0: * This can be used as a C wrapper around michael@0: * CharacterIterator, Replaceable, or implemented using simple strings, etc. michael@0: * michael@0: * There are two roles for using UCharIterator: michael@0: * michael@0: * A "provider" sets the necessary function pointers and controls the "protected" michael@0: * fields of the UCharIterator structure. A "provider" passes a UCharIterator michael@0: * into C APIs that need a UCharIterator as an abstract, flexible string interface. michael@0: * michael@0: * Implementations of such C APIs are "callers" of UCharIterator functions; michael@0: * they only use the "public" function pointers and never access the "protected" michael@0: * fields directly. michael@0: * michael@0: * The current() and next() functions only check the current index against the michael@0: * limit, and previous() only checks the current index against the start, michael@0: * to see if the iterator already reached the end of the iteration range. michael@0: * michael@0: * The assumption - in all iterators - is that the index is moved via the API, michael@0: * which means it won't go out of bounds, or the index is modified by michael@0: * user code that knows enough about the iterator implementation to set valid michael@0: * index values. michael@0: * michael@0: * UCharIterator functions return code unit values 0..0xffff, michael@0: * or U_SENTINEL if the iteration bounds are reached. michael@0: * michael@0: * @stable ICU 2.1 michael@0: */ michael@0: struct UCharIterator { michael@0: /** michael@0: * (protected) Pointer to string or wrapped object or similar. michael@0: * Not used by caller. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: const void *context; michael@0: michael@0: /** michael@0: * (protected) Length of string or similar. michael@0: * Not used by caller. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: int32_t length; michael@0: michael@0: /** michael@0: * (protected) Start index or similar. michael@0: * Not used by caller. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: int32_t start; michael@0: michael@0: /** michael@0: * (protected) Current index or similar. michael@0: * Not used by caller. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: int32_t index; michael@0: michael@0: /** michael@0: * (protected) Limit index or similar. michael@0: * Not used by caller. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: int32_t limit; michael@0: michael@0: /** michael@0: * (protected) Used by UTF-8 iterators and possibly others. michael@0: * @stable ICU 2.1 michael@0: */ michael@0: int32_t reservedField; michael@0: michael@0: /** michael@0: * (public) Returns the current position or the michael@0: * start or limit index of the iteration range. michael@0: * michael@0: * @see UCharIteratorGetIndex michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorGetIndex *getIndex; michael@0: michael@0: /** michael@0: * (public) Moves the current position relative to the start or limit of the michael@0: * iteration range, or relative to the current position itself. michael@0: * The movement is expressed in numbers of code units forward michael@0: * or backward by specifying a positive or negative delta. michael@0: * michael@0: * @see UCharIteratorMove michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorMove *move; michael@0: michael@0: /** michael@0: * (public) Check if current() and next() can still michael@0: * return another code unit. michael@0: * michael@0: * @see UCharIteratorHasNext michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorHasNext *hasNext; michael@0: michael@0: /** michael@0: * (public) Check if previous() can still return another code unit. michael@0: * michael@0: * @see UCharIteratorHasPrevious michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorHasPrevious *hasPrevious; michael@0: michael@0: /** michael@0: * (public) Return the code unit at the current position, michael@0: * or U_SENTINEL if there is none (index is at the limit). michael@0: * michael@0: * @see UCharIteratorCurrent michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorCurrent *current; michael@0: michael@0: /** michael@0: * (public) Return the code unit at the current index and increment michael@0: * the index (post-increment, like s[i++]), michael@0: * or return U_SENTINEL if there is none (index is at the limit). michael@0: * michael@0: * @see UCharIteratorNext michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorNext *next; michael@0: michael@0: /** michael@0: * (public) Decrement the index and return the code unit from there michael@0: * (pre-decrement, like s[--i]), michael@0: * or return U_SENTINEL if there is none (index is at the start). michael@0: * michael@0: * @see UCharIteratorPrevious michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorPrevious *previous; michael@0: michael@0: /** michael@0: * (public) Reserved for future use. Currently NULL. michael@0: * michael@0: * @see UCharIteratorReserved michael@0: * @stable ICU 2.1 michael@0: */ michael@0: UCharIteratorReserved *reservedFn; michael@0: michael@0: /** michael@0: * (public) Return the state of the iterator, to be restored later with setState(). michael@0: * This function pointer is NULL if the iterator does not implement it. michael@0: * michael@0: * @see UCharIteratorGet michael@0: * @stable ICU 2.6 michael@0: */ michael@0: UCharIteratorGetState *getState; michael@0: michael@0: /** michael@0: * (public) Restore the iterator state from the state word from a call michael@0: * to getState(). michael@0: * This function pointer is NULL if the iterator does not implement it. michael@0: * michael@0: * @see UCharIteratorSet michael@0: * @stable ICU 2.6 michael@0: */ michael@0: UCharIteratorSetState *setState; michael@0: }; michael@0: michael@0: /** michael@0: * Helper function for UCharIterator to get the code point michael@0: * at the current index. michael@0: * michael@0: * Return the code point that includes the code unit at the current position, michael@0: * or U_SENTINEL if there is none (index is at the limit). michael@0: * If the current code unit is a lead or trail surrogate, michael@0: * then the following or preceding surrogate is used to form michael@0: * the code point value. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the current code point michael@0: * michael@0: * @see UCharIterator michael@0: * @see U16_GET michael@0: * @see UnicodeString::char32At() michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE UChar32 U_EXPORT2 michael@0: uiter_current32(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Helper function for UCharIterator to get the next code point. michael@0: * michael@0: * Return the code point at the current index and increment michael@0: * the index (post-increment, like s[i++]), michael@0: * or return U_SENTINEL if there is none (index is at the limit). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the current code point (and post-increment the current index) michael@0: * michael@0: * @see UCharIterator michael@0: * @see U16_NEXT michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE UChar32 U_EXPORT2 michael@0: uiter_next32(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Helper function for UCharIterator to get the previous code point. michael@0: * michael@0: * Decrement the index and return the code point from there michael@0: * (pre-decrement, like s[--i]), michael@0: * or return U_SENTINEL if there is none (index is at the start). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the previous code point (after pre-decrementing the current index) michael@0: * michael@0: * @see UCharIterator michael@0: * @see U16_PREV michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE UChar32 U_EXPORT2 michael@0: uiter_previous32(UCharIterator *iter); michael@0: michael@0: /** michael@0: * Get the "state" of the iterator in the form of a single 32-bit word. michael@0: * This is a convenience function that calls iter->getState(iter) michael@0: * if iter->getState is not NULL; michael@0: * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. michael@0: * michael@0: * Some UCharIterator implementations may not be able to return michael@0: * a valid state for each position, in which case they return UITER_NO_STATE instead. michael@0: * This will be clearly documented for each such iterator (none of the public ones here). michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @return the state word michael@0: * michael@0: * @see UCharIterator michael@0: * @see UCharIteratorGetState michael@0: * @see UITER_NO_STATE michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE uint32_t U_EXPORT2 michael@0: uiter_getState(const UCharIterator *iter); michael@0: michael@0: /** michael@0: * Restore the "state" of the iterator using a state word from a getState() call. michael@0: * This is a convenience function that calls iter->setState(iter, state, pErrorCode) michael@0: * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. michael@0: * michael@0: * @param iter the UCharIterator structure ("this pointer") michael@0: * @param state the state word from a getState() call michael@0: * on a same-type, same-string iterator michael@0: * @param pErrorCode Must be a valid pointer to an error code value, michael@0: * which must not indicate a failure before the function call. michael@0: * michael@0: * @see UCharIterator michael@0: * @see UCharIteratorSetState michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); michael@0: michael@0: /** michael@0: * Set up a UCharIterator to iterate over a string. michael@0: * michael@0: * Sets the UCharIterator function pointers for iteration over the string s michael@0: * with iteration boundaries start=index=0 and length=limit=string length. michael@0: * The "provider" may set the start, index, and limit values at any time michael@0: * within the range 0..length. michael@0: * The length field will be ignored. michael@0: * michael@0: * The string pointer s is set into UCharIterator.context without copying michael@0: * or reallocating the string contents. michael@0: * michael@0: * getState() simply returns the current index. michael@0: * move() will always return the final index. michael@0: * michael@0: * @param iter UCharIterator structure to be set for iteration michael@0: * @param s String to iterate over michael@0: * @param length Length of s, or -1 if NUL-terminated michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); michael@0: michael@0: /** michael@0: * Set up a UCharIterator to iterate over a UTF-16BE string michael@0: * (byte vector with a big-endian pair of bytes per UChar). michael@0: * michael@0: * Everything works just like with a normal UChar iterator (uiter_setString), michael@0: * except that UChars are assembled from byte pairs, michael@0: * and that the length argument here indicates an even number of bytes. michael@0: * michael@0: * getState() simply returns the current index. michael@0: * move() will always return the final index. michael@0: * michael@0: * @param iter UCharIterator structure to be set for iteration michael@0: * @param s UTF-16BE string to iterate over michael@0: * @param length Length of s as an even number of bytes, or -1 if NUL-terminated michael@0: * (NUL means pair of 0 bytes at even index from s) michael@0: * michael@0: * @see UCharIterator michael@0: * @see uiter_setString michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); michael@0: michael@0: /** michael@0: * Set up a UCharIterator to iterate over a UTF-8 string. michael@0: * michael@0: * Sets the UCharIterator function pointers for iteration over the UTF-8 string s michael@0: * with UTF-8 iteration boundaries 0 and length. michael@0: * The implementation counts the UTF-16 index on the fly and michael@0: * lazily evaluates the UTF-16 length of the text. michael@0: * michael@0: * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. michael@0: * When the reservedField is not 0, then it contains a supplementary code point michael@0: * and the UTF-16 index is between the two corresponding surrogates. michael@0: * At that point, the UTF-8 index is behind that code point. michael@0: * michael@0: * The UTF-8 string pointer s is set into UCharIterator.context without copying michael@0: * or reallocating the string contents. michael@0: * michael@0: * getState() returns a state value consisting of michael@0: * - the current UTF-8 source byte index (bits 31..1) michael@0: * - a flag (bit 0) that indicates whether the UChar position is in the middle michael@0: * of a surrogate pair michael@0: * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) michael@0: * michael@0: * getState() cannot also encode the UTF-16 index in the state value. michael@0: * move(relative to limit or length), or michael@0: * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. michael@0: * michael@0: * @param iter UCharIterator structure to be set for iteration michael@0: * @param s UTF-8 string to iterate over michael@0: * @param length Length of s in bytes, or -1 if NUL-terminated michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); michael@0: michael@0: #if U_SHOW_CPLUSPLUS_API michael@0: michael@0: /** michael@0: * Set up a UCharIterator to wrap around a C++ CharacterIterator. michael@0: * michael@0: * Sets the UCharIterator function pointers for iteration using the michael@0: * CharacterIterator charIter. michael@0: * michael@0: * The CharacterIterator pointer charIter is set into UCharIterator.context michael@0: * without copying or cloning the CharacterIterator object. michael@0: * The other "protected" UCharIterator fields are set to 0 and will be ignored. michael@0: * The iteration index and boundaries are controlled by the CharacterIterator. michael@0: * michael@0: * getState() simply returns the current index. michael@0: * move() will always return the final index. michael@0: * michael@0: * @param iter UCharIterator structure to be set for iteration michael@0: * @param charIter CharacterIterator to wrap michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); michael@0: michael@0: /** michael@0: * Set up a UCharIterator to iterate over a C++ Replaceable. michael@0: * michael@0: * Sets the UCharIterator function pointers for iteration over the michael@0: * Replaceable rep with iteration boundaries start=index=0 and michael@0: * length=limit=rep->length(). michael@0: * The "provider" may set the start, index, and limit values at any time michael@0: * within the range 0..length=rep->length(). michael@0: * The length field will be ignored. michael@0: * michael@0: * The Replaceable pointer rep is set into UCharIterator.context without copying michael@0: * or cloning/reallocating the Replaceable object. michael@0: * michael@0: * getState() simply returns the current index. michael@0: * move() will always return the final index. michael@0: * michael@0: * @param iter UCharIterator structure to be set for iteration michael@0: * @param rep Replaceable to iterate over michael@0: * michael@0: * @see UCharIterator michael@0: * @stable ICU 2.1 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); michael@0: michael@0: #endif michael@0: michael@0: U_CDECL_END michael@0: michael@0: #endif