1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/utext.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1600 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2004-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: utext.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2004oct06 1.17 +* created by: Markus W. Scherer 1.18 +*/ 1.19 + 1.20 +#ifndef __UTEXT_H__ 1.21 +#define __UTEXT_H__ 1.22 + 1.23 +/** 1.24 + * \file 1.25 + * \brief C API: Abstract Unicode Text API 1.26 + * 1.27 + * The Text Access API provides a means to allow text that is stored in alternative 1.28 + * formats to work with ICU services. ICU normally operates on text that is 1.29 + * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type 1.30 + * UnicodeString for C++ APIs. 1.31 + * 1.32 + * ICU Text Access allows other formats, such as UTF-8 or non-contiguous 1.33 + * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. 1.34 + * 1.35 + * There are three general classes of usage for UText: 1.36 + * 1.37 + * Application Level Use. This is the simplest usage - applications would 1.38 + * use one of the utext_open() functions on their input text, and pass 1.39 + * the resulting UText to the desired ICU service. 1.40 + * 1.41 + * Second is usage in ICU Services, such as break iteration, that will need to 1.42 + * operate on input presented to them as a UText. These implementations 1.43 + * will need to use the iteration and related UText functions to gain 1.44 + * access to the actual text. 1.45 + * 1.46 + * The third class of UText users are "text providers." These are the 1.47 + * UText implementations for the various text storage formats. An application 1.48 + * or system with a unique text storage format can implement a set of 1.49 + * UText provider functions for that format, which will then allow 1.50 + * ICU services to operate on that format. 1.51 + * 1.52 + * 1.53 + * <em>Iterating over text</em> 1.54 + * 1.55 + * Here is sample code for a forward iteration over the contents of a UText 1.56 + * 1.57 + * \code 1.58 + * UChar32 c; 1.59 + * UText *ut = whatever(); 1.60 + * 1.61 + * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { 1.62 + * // do whatever with the codepoint c here. 1.63 + * } 1.64 + * \endcode 1.65 + * 1.66 + * And here is similar code to iterate in the reverse direction, from the end 1.67 + * of the text towards the beginning. 1.68 + * 1.69 + * \code 1.70 + * UChar32 c; 1.71 + * UText *ut = whatever(); 1.72 + * int textLength = utext_nativeLength(ut); 1.73 + * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { 1.74 + * // do whatever with the codepoint c here. 1.75 + * } 1.76 + * \endcode 1.77 + * 1.78 + * <em>Characters and Indexing</em> 1.79 + * 1.80 + * Indexing into text by UText functions is nearly always in terms of the native 1.81 + * indexing of the underlying text storage. The storage format could be UTF-8 1.82 + * or UTF-32, for example. When coding to the UText access API, no assumptions 1.83 + * can be made regarding the size of characters, or how far an index 1.84 + * may move when iterating between characters. 1.85 + * 1.86 + * All indices supplied to UText functions are pinned to the length of the 1.87 + * text. An out-of-bounds index is not considered to be an error, but is 1.88 + * adjusted to be in the range 0 <= index <= length of input text. 1.89 + * 1.90 + * 1.91 + * When an index position is returned from a UText function, it will be 1.92 + * a native index to the underlying text. In the case of multi-unit characters, 1.93 + * it will always refer to the first position of the character, 1.94 + * never to the interior. This is essentially the same thing as saying that 1.95 + * a returned index will always point to a boundary between characters. 1.96 + * 1.97 + * When a native index is supplied to a UText function, all indices that 1.98 + * refer to any part of a multi-unit character representation are considered 1.99 + * to be equivalent. In the case of multi-unit characters, an incoming index 1.100 + * will be logically normalized to refer to the start of the character. 1.101 + * 1.102 + * It is possible to test whether a native index is on a code point boundary 1.103 + * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). 1.104 + * If the index is returned unchanged, it was on a code point boundary. If 1.105 + * an adjusted index is returned, the original index referred to the 1.106 + * interior of a character. 1.107 + * 1.108 + * <em>Conventions for calling UText functions</em> 1.109 + * 1.110 + * Most UText access functions have as their first parameter a (UText *) pointer, 1.111 + * which specifies the UText to be used. Unless otherwise noted, the 1.112 + * pointer must refer to a valid, open UText. Attempting to 1.113 + * use a closed UText or passing a NULL pointer is a programming error and 1.114 + * will produce undefined results or NULL pointer exceptions. 1.115 + * 1.116 + * The UText_Open family of functions can either open an existing (closed) 1.117 + * UText, or heap allocate a new UText. Here is sample code for creating 1.118 + * a stack-allocated UText. 1.119 + * 1.120 + * \code 1.121 + * char *s = whatever(); // A utf-8 string 1.122 + * U_ErrorCode status = U_ZERO_ERROR; 1.123 + * UText ut = UTEXT_INITIALIZER; 1.124 + * utext_openUTF8(ut, s, -1, &status); 1.125 + * if (U_FAILURE(status)) { 1.126 + * // error handling 1.127 + * } else { 1.128 + * // work with the UText 1.129 + * } 1.130 + * \endcode 1.131 + * 1.132 + * Any existing UText passed to an open function _must_ have been initialized, 1.133 + * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated 1.134 + * by an open function. Passing NULL will cause the open function to 1.135 + * heap-allocate and fully initialize a new UText. 1.136 + * 1.137 + */ 1.138 + 1.139 + 1.140 + 1.141 +#include "unicode/utypes.h" 1.142 +#include "unicode/uchar.h" 1.143 +#if U_SHOW_CPLUSPLUS_API 1.144 +#include "unicode/localpointer.h" 1.145 +#include "unicode/rep.h" 1.146 +#include "unicode/unistr.h" 1.147 +#include "unicode/chariter.h" 1.148 +#endif 1.149 + 1.150 + 1.151 +U_CDECL_BEGIN 1.152 + 1.153 +struct UText; 1.154 +typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ 1.155 + 1.156 + 1.157 +/*************************************************************************************** 1.158 + * 1.159 + * C Functions for creating UText wrappers around various kinds of text strings. 1.160 + * 1.161 + ****************************************************************************************/ 1.162 + 1.163 + 1.164 +/** 1.165 + * Close function for UText instances. 1.166 + * Cleans up, releases any resources being held by an open UText. 1.167 + * <p> 1.168 + * If the UText was originally allocated by one of the utext_open functions, 1.169 + * the storage associated with the utext will also be freed. 1.170 + * If the UText storage originated with the application, as it would with 1.171 + * a local or static instance, the storage will not be deleted. 1.172 + * 1.173 + * An open UText can be reset to refer to new string by using one of the utext_open() 1.174 + * functions without first closing the UText. 1.175 + * 1.176 + * @param ut The UText to be closed. 1.177 + * @return NULL if the UText struct was deleted by the close. If the UText struct 1.178 + * was originally provided by the caller to the open function, it is 1.179 + * returned by this function, and may be safely used again in 1.180 + * a subsequent utext_open. 1.181 + * 1.182 + * @stable ICU 3.4 1.183 + */ 1.184 +U_STABLE UText * U_EXPORT2 1.185 +utext_close(UText *ut); 1.186 + 1.187 +#if U_SHOW_CPLUSPLUS_API 1.188 + 1.189 +U_NAMESPACE_BEGIN 1.190 + 1.191 +/** 1.192 + * \class LocalUTextPointer 1.193 + * "Smart pointer" class, closes a UText via utext_close(). 1.194 + * For most methods see the LocalPointerBase base class. 1.195 + * 1.196 + * @see LocalPointerBase 1.197 + * @see LocalPointer 1.198 + * @stable ICU 4.4 1.199 + */ 1.200 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); 1.201 + 1.202 +U_NAMESPACE_END 1.203 + 1.204 +#endif 1.205 + 1.206 +/** 1.207 + * Open a read-only UText implementation for UTF-8 strings. 1.208 + * 1.209 + * \htmlonly 1.210 + * Any invalid UTF-8 in the input will be handled in this way: 1.211 + * a sequence of bytes that has the form of a truncated, but otherwise valid, 1.212 + * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 1.213 + * Any other illegal bytes will each be replaced by a \uFFFD. 1.214 + * \endhtmlonly 1.215 + * 1.216 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.217 + * If non-NULL, must refer to an initialized UText struct, which will then 1.218 + * be reset to reference the specified UTF-8 string. 1.219 + * @param s A UTF-8 string. Must not be NULL. 1.220 + * @param length The length of the UTF-8 string in bytes, or -1 if the string is 1.221 + * zero terminated. 1.222 + * @param status Errors are returned here. 1.223 + * @return A pointer to the UText. If a pre-allocated UText was provided, it 1.224 + * will always be used and returned. 1.225 + * @stable ICU 3.4 1.226 + */ 1.227 +U_STABLE UText * U_EXPORT2 1.228 +utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); 1.229 + 1.230 + 1.231 +/** 1.232 + * Open a read-only UText for UChar * string. 1.233 + * 1.234 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.235 + * If non-NULL, must refer to an initialized UText struct, which will then 1.236 + * be reset to reference the specified UChar string. 1.237 + * @param s A UChar (UTF-16) string 1.238 + * @param length The number of UChars in the input string, or -1 if the string is 1.239 + * zero terminated. 1.240 + * @param status Errors are returned here. 1.241 + * @return A pointer to the UText. If a pre-allocated UText was provided, it 1.242 + * will always be used and returned. 1.243 + * @stable ICU 3.4 1.244 + */ 1.245 +U_STABLE UText * U_EXPORT2 1.246 +utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); 1.247 + 1.248 + 1.249 +#if U_SHOW_CPLUSPLUS_API 1.250 +/** 1.251 + * Open a writable UText for a non-const UnicodeString. 1.252 + * 1.253 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.254 + * If non-NULL, must refer to an initialized UText struct, which will then 1.255 + * be reset to reference the specified input string. 1.256 + * @param s A UnicodeString. 1.257 + * @param status Errors are returned here. 1.258 + * @return Pointer to the UText. If a UText was supplied as input, this 1.259 + * will always be used and returned. 1.260 + * @stable ICU 3.4 1.261 + */ 1.262 +U_STABLE UText * U_EXPORT2 1.263 +utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); 1.264 + 1.265 + 1.266 +/** 1.267 + * Open a UText for a const UnicodeString. The resulting UText will not be writable. 1.268 + * 1.269 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.270 + * If non-NULL, must refer to an initialized UText struct, which will then 1.271 + * be reset to reference the specified input string. 1.272 + * @param s A const UnicodeString to be wrapped. 1.273 + * @param status Errors are returned here. 1.274 + * @return Pointer to the UText. If a UText was supplied as input, this 1.275 + * will always be used and returned. 1.276 + * @stable ICU 3.4 1.277 + */ 1.278 +U_STABLE UText * U_EXPORT2 1.279 +utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); 1.280 + 1.281 + 1.282 +/** 1.283 + * Open a writable UText implementation for an ICU Replaceable object. 1.284 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.285 + * If non-NULL, must refer to an already existing UText, which will then 1.286 + * be reset to reference the specified replaceable text. 1.287 + * @param rep A Replaceable text object. 1.288 + * @param status Errors are returned here. 1.289 + * @return Pointer to the UText. If a UText was supplied as input, this 1.290 + * will always be used and returned. 1.291 + * @see Replaceable 1.292 + * @stable ICU 3.4 1.293 + */ 1.294 +U_STABLE UText * U_EXPORT2 1.295 +utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); 1.296 + 1.297 +/** 1.298 + * Open a UText implementation over an ICU CharacterIterator. 1.299 + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. 1.300 + * If non-NULL, must refer to an already existing UText, which will then 1.301 + * be reset to reference the specified replaceable text. 1.302 + * @param ci A Character Iterator. 1.303 + * @param status Errors are returned here. 1.304 + * @return Pointer to the UText. If a UText was supplied as input, this 1.305 + * will always be used and returned. 1.306 + * @see Replaceable 1.307 + * @stable ICU 3.4 1.308 + */ 1.309 +U_STABLE UText * U_EXPORT2 1.310 +utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); 1.311 + 1.312 +#endif 1.313 + 1.314 + 1.315 +/** 1.316 + * Clone a UText. This is much like opening a UText where the source text is itself 1.317 + * another UText. 1.318 + * 1.319 + * A deep clone will copy both the UText data structures and the underlying text. 1.320 + * The original and cloned UText will operate completely independently; modifications 1.321 + * made to the text in one will not affect the other. Text providers are not 1.322 + * required to support deep clones. The user of clone() must check the status return 1.323 + * and be prepared to handle failures. 1.324 + * 1.325 + * The standard UText implementations for UTF8, UChar *, UnicodeString and 1.326 + * Replaceable all support deep cloning. 1.327 + * 1.328 + * The UText returned from a deep clone will be writable, assuming that the text 1.329 + * provider is able to support writing, even if the source UText had been made 1.330 + * non-writable by means of UText_freeze(). 1.331 + * 1.332 + * A shallow clone replicates only the UText data structures; it does not make 1.333 + * a copy of the underlying text. Shallow clones can be used as an efficient way to 1.334 + * have multiple iterators active in a single text string that is not being 1.335 + * modified. 1.336 + * 1.337 + * A shallow clone operation will not fail, barring truly exceptional conditions such 1.338 + * as memory allocation failures. 1.339 + * 1.340 + * Shallow UText clones should be avoided if the UText functions that modify the 1.341 + * text are expected to be used, either on the original or the cloned UText. 1.342 + * Any such modifications can cause unpredictable behavior. Read Only 1.343 + * shallow clones provide some protection against errors of this type by 1.344 + * disabling text modification via the cloned UText. 1.345 + * 1.346 + * A shallow clone made with the readOnly parameter == FALSE will preserve the 1.347 + * utext_isWritable() state of the source object. Note, however, that 1.348 + * write operations must be avoided while more than one UText exists that refer 1.349 + * to the same underlying text. 1.350 + * 1.351 + * A UText and its clone may be safely concurrently accessed by separate threads. 1.352 + * This is true for read access only with shallow clones, and for both read and 1.353 + * write access with deep clones. 1.354 + * It is the responsibility of the Text Provider to ensure that this thread safety 1.355 + * constraint is met. 1.356 + * 1.357 + * @param dest A UText struct to be filled in with the result of the clone operation, 1.358 + * or NULL if the clone function should heap-allocate a new UText struct. 1.359 + * If non-NULL, must refer to an already existing UText, which will then 1.360 + * be reset to become the clone. 1.361 + * @param src The UText to be cloned. 1.362 + * @param deep TRUE to request a deep clone, FALSE for a shallow clone. 1.363 + * @param readOnly TRUE to request that the cloned UText have read only access to the 1.364 + * underlying text. 1.365 + 1.366 + * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 1.367 + * will be returned if the text provider is unable to clone the 1.368 + * original text. 1.369 + * @return The newly created clone, or NULL if the clone operation failed. 1.370 + * @stable ICU 3.4 1.371 + */ 1.372 +U_STABLE UText * U_EXPORT2 1.373 +utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); 1.374 + 1.375 + 1.376 +/** 1.377 + * Compare two UText objects for equality. 1.378 + * UTexts are equal if they are iterating over the same text, and 1.379 + * have the same iteration position within the text. 1.380 + * If either or both of the parameters are NULL, the comparison is FALSE. 1.381 + * 1.382 + * @param a The first of the two UTexts to compare. 1.383 + * @param b The other UText to be compared. 1.384 + * @return TRUE if the two UTexts are equal. 1.385 + * @stable ICU 3.6 1.386 + */ 1.387 +U_STABLE UBool U_EXPORT2 1.388 +utext_equals(const UText *a, const UText *b); 1.389 + 1.390 + 1.391 +/***************************************************************************** 1.392 + * 1.393 + * Functions to work with the text represeted by a UText wrapper 1.394 + * 1.395 + *****************************************************************************/ 1.396 + 1.397 +/** 1.398 + * Get the length of the text. Depending on the characteristics 1.399 + * of the underlying text representation, this may be expensive. 1.400 + * @see utext_isLengthExpensive() 1.401 + * 1.402 + * 1.403 + * @param ut the text to be accessed. 1.404 + * @return the length of the text, expressed in native units. 1.405 + * 1.406 + * @stable ICU 3.4 1.407 + */ 1.408 +U_STABLE int64_t U_EXPORT2 1.409 +utext_nativeLength(UText *ut); 1.410 + 1.411 +/** 1.412 + * Return TRUE if calculating the length of the text could be expensive. 1.413 + * Finding the length of NUL terminated strings is considered to be expensive. 1.414 + * 1.415 + * Note that the value of this function may change 1.416 + * as the result of other operations on a UText. 1.417 + * Once the length of a string has been discovered, it will no longer 1.418 + * be expensive to report it. 1.419 + * 1.420 + * @param ut the text to be accessed. 1.421 + * @return TRUE if determining the length of the text could be time consuming. 1.422 + * @stable ICU 3.4 1.423 + */ 1.424 +U_STABLE UBool U_EXPORT2 1.425 +utext_isLengthExpensive(const UText *ut); 1.426 + 1.427 +/** 1.428 + * Returns the code point at the requested index, 1.429 + * or U_SENTINEL (-1) if it is out of bounds. 1.430 + * 1.431 + * If the specified index points to the interior of a multi-unit 1.432 + * character - one of the trail bytes of a UTF-8 sequence, for example - 1.433 + * the complete code point will be returned. 1.434 + * 1.435 + * The iteration position will be set to the start of the returned code point. 1.436 + * 1.437 + * This function is roughly equivalent to the the sequence 1.438 + * utext_setNativeIndex(index); 1.439 + * utext_current32(); 1.440 + * (There is a subtle difference if the index is out of bounds by being less than zero - 1.441 + * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() 1.442 + * will return the char at zero. utext_char32At(negative index), on the other hand, will 1.443 + * return the U_SENTINEL value of -1.) 1.444 + * 1.445 + * @param ut the text to be accessed 1.446 + * @param nativeIndex the native index of the character to be accessed. If the index points 1.447 + * to other than the first unit of a multi-unit character, it will be adjusted 1.448 + * to the start of the character. 1.449 + * @return the code point at the specified index. 1.450 + * @stable ICU 3.4 1.451 + */ 1.452 +U_STABLE UChar32 U_EXPORT2 1.453 +utext_char32At(UText *ut, int64_t nativeIndex); 1.454 + 1.455 + 1.456 +/** 1.457 + * 1.458 + * Get the code point at the current iteration position, 1.459 + * or U_SENTINEL (-1) if the iteration has reached the end of 1.460 + * the input text. 1.461 + * 1.462 + * @param ut the text to be accessed. 1.463 + * @return the Unicode code point at the current iterator position. 1.464 + * @stable ICU 3.4 1.465 + */ 1.466 +U_STABLE UChar32 U_EXPORT2 1.467 +utext_current32(UText *ut); 1.468 + 1.469 + 1.470 +/** 1.471 + * Get the code point at the current iteration position of the UText, and 1.472 + * advance the position to the first index following the character. 1.473 + * 1.474 + * If the position is at the end of the text (the index following 1.475 + * the last character, which is also the length of the text), 1.476 + * return U_SENTINEL (-1) and do not advance the index. 1.477 + * 1.478 + * This is a post-increment operation. 1.479 + * 1.480 + * An inline macro version of this function, UTEXT_NEXT32(), 1.481 + * is available for performance critical use. 1.482 + * 1.483 + * @param ut the text to be accessed. 1.484 + * @return the Unicode code point at the iteration position. 1.485 + * @see UTEXT_NEXT32 1.486 + * @stable ICU 3.4 1.487 + */ 1.488 +U_STABLE UChar32 U_EXPORT2 1.489 +utext_next32(UText *ut); 1.490 + 1.491 + 1.492 +/** 1.493 + * Move the iterator position to the character (code point) whose 1.494 + * index precedes the current position, and return that character. 1.495 + * This is a pre-decrement operation. 1.496 + * 1.497 + * If the initial position is at the start of the text (index of 0) 1.498 + * return U_SENTINEL (-1), and leave the position unchanged. 1.499 + * 1.500 + * An inline macro version of this function, UTEXT_PREVIOUS32(), 1.501 + * is available for performance critical use. 1.502 + * 1.503 + * @param ut the text to be accessed. 1.504 + * @return the previous UChar32 code point, or U_SENTINEL (-1) 1.505 + * if the iteration has reached the start of the text. 1.506 + * @see UTEXT_PREVIOUS32 1.507 + * @stable ICU 3.4 1.508 + */ 1.509 +U_STABLE UChar32 U_EXPORT2 1.510 +utext_previous32(UText *ut); 1.511 + 1.512 + 1.513 +/** 1.514 + * Set the iteration index and return the code point at that index. 1.515 + * Leave the iteration index at the start of the following code point. 1.516 + * 1.517 + * This function is the most efficient and convenient way to 1.518 + * begin a forward iteration. The results are identical to the those 1.519 + * from the sequence 1.520 + * \code 1.521 + * utext_setIndex(); 1.522 + * utext_next32(); 1.523 + * \endcode 1.524 + * 1.525 + * @param ut the text to be accessed. 1.526 + * @param nativeIndex Iteration index, in the native units of the text provider. 1.527 + * @return Code point which starts at or before index, 1.528 + * or U_SENTINEL (-1) if it is out of bounds. 1.529 + * @stable ICU 3.4 1.530 + */ 1.531 +U_STABLE UChar32 U_EXPORT2 1.532 +utext_next32From(UText *ut, int64_t nativeIndex); 1.533 + 1.534 + 1.535 + 1.536 +/** 1.537 + * Set the iteration index, and return the code point preceding the 1.538 + * one specified by the initial index. Leave the iteration position 1.539 + * at the start of the returned code point. 1.540 + * 1.541 + * This function is the most efficient and convenient way to 1.542 + * begin a backwards iteration. 1.543 + * 1.544 + * @param ut the text to be accessed. 1.545 + * @param nativeIndex Iteration index in the native units of the text provider. 1.546 + * @return Code point preceding the one at the initial index, 1.547 + * or U_SENTINEL (-1) if it is out of bounds. 1.548 + * 1.549 + * @stable ICU 3.4 1.550 + */ 1.551 +U_STABLE UChar32 U_EXPORT2 1.552 +utext_previous32From(UText *ut, int64_t nativeIndex); 1.553 + 1.554 +/** 1.555 + * Get the current iterator position, which can range from 0 to 1.556 + * the length of the text. 1.557 + * The position is a native index into the input text, in whatever format it 1.558 + * may have (possibly UTF-8 for example), and may not always be the same as 1.559 + * the corresponding UChar (UTF-16) index. 1.560 + * The returned position will always be aligned to a code point boundary. 1.561 + * 1.562 + * @param ut the text to be accessed. 1.563 + * @return the current index position, in the native units of the text provider. 1.564 + * @stable ICU 3.4 1.565 + */ 1.566 +U_STABLE int64_t U_EXPORT2 1.567 +utext_getNativeIndex(const UText *ut); 1.568 + 1.569 +/** 1.570 + * Set the current iteration position to the nearest code point 1.571 + * boundary at or preceding the specified index. 1.572 + * The index is in the native units of the original input text. 1.573 + * If the index is out of range, it will be pinned to be within 1.574 + * the range of the input text. 1.575 + * <p> 1.576 + * It will usually be more efficient to begin an iteration 1.577 + * using the functions utext_next32From() or utext_previous32From() 1.578 + * rather than setIndex(). 1.579 + * <p> 1.580 + * Moving the index position to an adjacent character is best done 1.581 + * with utext_next32(), utext_previous32() or utext_moveIndex32(). 1.582 + * Attempting to do direct arithmetic on the index position is 1.583 + * complicated by the fact that the size (in native units) of a 1.584 + * character depends on the underlying representation of the character 1.585 + * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not 1.586 + * easily knowable. 1.587 + * 1.588 + * @param ut the text to be accessed. 1.589 + * @param nativeIndex the native unit index of the new iteration position. 1.590 + * @stable ICU 3.4 1.591 + */ 1.592 +U_STABLE void U_EXPORT2 1.593 +utext_setNativeIndex(UText *ut, int64_t nativeIndex); 1.594 + 1.595 +/** 1.596 + * Move the iterator postion by delta code points. The number of code points 1.597 + * is a signed number; a negative delta will move the iterator backwards, 1.598 + * towards the start of the text. 1.599 + * <p> 1.600 + * The index is moved by <code>delta</code> code points 1.601 + * forward or backward, but no further backward than to 0 and 1.602 + * no further forward than to utext_nativeLength(). 1.603 + * The resulting index value will be in between 0 and length, inclusive. 1.604 + * 1.605 + * @param ut the text to be accessed. 1.606 + * @param delta the signed number of code points to move the iteration position. 1.607 + * @return TRUE if the position could be moved the requested number of positions while 1.608 + * staying within the range [0 - text length]. 1.609 + * @stable ICU 3.4 1.610 + */ 1.611 +U_STABLE UBool U_EXPORT2 1.612 +utext_moveIndex32(UText *ut, int32_t delta); 1.613 + 1.614 +/** 1.615 + * Get the native index of the character preceeding the current position. 1.616 + * If the iteration position is already at the start of the text, zero 1.617 + * is returned. 1.618 + * The value returned is the same as that obtained from the following sequence, 1.619 + * but without the side effect of changing the iteration position. 1.620 + * 1.621 + * \code 1.622 + * UText *ut = whatever; 1.623 + * ... 1.624 + * utext_previous(ut) 1.625 + * utext_getNativeIndex(ut); 1.626 + * \endcode 1.627 + * 1.628 + * This function is most useful during forwards iteration, where it will get the 1.629 + * native index of the character most recently returned from utext_next(). 1.630 + * 1.631 + * @param ut the text to be accessed 1.632 + * @return the native index of the character preceeding the current index position, 1.633 + * or zero if the current position is at the start of the text. 1.634 + * @stable ICU 3.6 1.635 + */ 1.636 +U_STABLE int64_t U_EXPORT2 1.637 +utext_getPreviousNativeIndex(UText *ut); 1.638 + 1.639 + 1.640 +/** 1.641 + * 1.642 + * Extract text from a UText into a UChar buffer. The range of text to be extracted 1.643 + * is specified in the native indices of the UText provider. These may not necessarily 1.644 + * be UTF-16 indices. 1.645 + * <p> 1.646 + * The size (number of 16 bit UChars) of the data to be extracted is returned. The 1.647 + * full number of UChars is returned, even when the extracted text is truncated 1.648 + * because the specified buffer size is too small. 1.649 + * <p> 1.650 + * The extracted string will (if you are a user) / must (if you are a text provider) 1.651 + * be NUL-terminated if there is sufficient space in the destination buffer. This 1.652 + * terminating NUL is not included in the returned length. 1.653 + * <p> 1.654 + * The iteration index is left at the position following the last extracted character. 1.655 + * 1.656 + * @param ut the UText from which to extract data. 1.657 + * @param nativeStart the native index of the first character to extract.\ 1.658 + * If the specified index is out of range, 1.659 + * it will be pinned to to be within 0 <= index <= textLength 1.660 + * @param nativeLimit the native string index of the position following the last 1.661 + * character to extract. If the specified index is out of range, 1.662 + * it will be pinned to to be within 0 <= index <= textLength. 1.663 + * nativeLimit must be >= nativeStart. 1.664 + * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 1.665 + * @param destCapacity The size, in UChars, of the destination buffer. May be zero 1.666 + * for precomputing the required size. 1.667 + * @param status receives any error status. 1.668 + * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 1.669 + * buffer was too small. Returns number of UChars for preflighting. 1.670 + * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. 1.671 + * 1.672 + * @stable ICU 3.4 1.673 + */ 1.674 +U_STABLE int32_t U_EXPORT2 1.675 +utext_extract(UText *ut, 1.676 + int64_t nativeStart, int64_t nativeLimit, 1.677 + UChar *dest, int32_t destCapacity, 1.678 + UErrorCode *status); 1.679 + 1.680 + 1.681 + 1.682 +/************************************************************************************ 1.683 + * 1.684 + * #define inline versions of selected performance-critical text access functions 1.685 + * Caution: do not use auto increment++ or decrement-- expressions 1.686 + * as parameters to these macros. 1.687 + * 1.688 + * For most use, where there is no extreme performance constraint, the 1.689 + * normal, non-inline functions are a better choice. The resulting code 1.690 + * will be smaller, and, if the need ever arises, easier to debug. 1.691 + * 1.692 + * These are implemented as #defines rather than real functions 1.693 + * because there is no fully portable way to do inline functions in plain C. 1.694 + * 1.695 + ************************************************************************************/ 1.696 + 1.697 +#ifndef U_HIDE_INTERNAL_API 1.698 +/** 1.699 + * inline version of utext_current32(), for performance-critical situations. 1.700 + * 1.701 + * Get the code point at the current iteration position of the UText. 1.702 + * Returns U_SENTINEL (-1) if the position is at the end of the 1.703 + * text. 1.704 + * 1.705 + * @internal ICU 4.4 technology preview 1.706 + */ 1.707 +#define UTEXT_CURRENT32(ut) \ 1.708 + ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 1.709 + ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) 1.710 +#endif /* U_HIDE_INTERNAL_API */ 1.711 + 1.712 +/** 1.713 + * inline version of utext_next32(), for performance-critical situations. 1.714 + * 1.715 + * Get the code point at the current iteration position of the UText, and 1.716 + * advance the position to the first index following the character. 1.717 + * This is a post-increment operation. 1.718 + * Returns U_SENTINEL (-1) if the position is at the end of the 1.719 + * text. 1.720 + * 1.721 + * @stable ICU 3.4 1.722 + */ 1.723 +#define UTEXT_NEXT32(ut) \ 1.724 + ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ 1.725 + ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) 1.726 + 1.727 +/** 1.728 + * inline version of utext_previous32(), for performance-critical situations. 1.729 + * 1.730 + * Move the iterator position to the character (code point) whose 1.731 + * index precedes the current position, and return that character. 1.732 + * This is a pre-decrement operation. 1.733 + * Returns U_SENTINEL (-1) if the position is at the start of the text. 1.734 + * 1.735 + * @stable ICU 3.4 1.736 + */ 1.737 +#define UTEXT_PREVIOUS32(ut) \ 1.738 + ((ut)->chunkOffset > 0 && \ 1.739 + (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ 1.740 + (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) 1.741 + 1.742 +/** 1.743 + * inline version of utext_getNativeIndex(), for performance-critical situations. 1.744 + * 1.745 + * Get the current iterator position, which can range from 0 to 1.746 + * the length of the text. 1.747 + * The position is a native index into the input text, in whatever format it 1.748 + * may have (possibly UTF-8 for example), and may not always be the same as 1.749 + * the corresponding UChar (UTF-16) index. 1.750 + * The returned position will always be aligned to a code point boundary. 1.751 + * 1.752 + * @stable ICU 3.6 1.753 + */ 1.754 +#define UTEXT_GETNATIVEINDEX(ut) \ 1.755 + ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ 1.756 + (ut)->chunkNativeStart+(ut)->chunkOffset : \ 1.757 + (ut)->pFuncs->mapOffsetToNative(ut)) 1.758 + 1.759 +/** 1.760 + * inline version of utext_setNativeIndex(), for performance-critical situations. 1.761 + * 1.762 + * Set the current iteration position to the nearest code point 1.763 + * boundary at or preceding the specified index. 1.764 + * The index is in the native units of the original input text. 1.765 + * If the index is out of range, it will be pinned to be within 1.766 + * the range of the input text. 1.767 + * 1.768 + * @stable ICU 3.8 1.769 + */ 1.770 +#define UTEXT_SETNATIVEINDEX(ut, ix) \ 1.771 + { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ 1.772 + if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ 1.773 + (ut)->chunkOffset=(int32_t)__offset; \ 1.774 + } else { \ 1.775 + utext_setNativeIndex((ut), (ix)); } } 1.776 + 1.777 + 1.778 + 1.779 +/************************************************************************************ 1.780 + * 1.781 + * Functions related to writing or modifying the text. 1.782 + * These will work only with modifiable UTexts. Attempting to 1.783 + * modify a read-only UText will return an error status. 1.784 + * 1.785 + ************************************************************************************/ 1.786 + 1.787 + 1.788 +/** 1.789 + * Return TRUE if the text can be written (modified) with utext_replace() or 1.790 + * utext_copy(). For the text to be writable, the text provider must 1.791 + * be of a type that supports writing and the UText must not be frozen. 1.792 + * 1.793 + * Attempting to modify text when utext_isWriteable() is FALSE will fail - 1.794 + * the text will not be modified, and an error will be returned from the function 1.795 + * that attempted the modification. 1.796 + * 1.797 + * @param ut the UText to be tested. 1.798 + * @return TRUE if the text is modifiable. 1.799 + * 1.800 + * @see utext_freeze() 1.801 + * @see utext_replace() 1.802 + * @see utext_copy() 1.803 + * @stable ICU 3.4 1.804 + * 1.805 + */ 1.806 +U_STABLE UBool U_EXPORT2 1.807 +utext_isWritable(const UText *ut); 1.808 + 1.809 + 1.810 +/** 1.811 + * Test whether there is meta data associated with the text. 1.812 + * @see Replaceable::hasMetaData() 1.813 + * 1.814 + * @param ut The UText to be tested 1.815 + * @return TRUE if the underlying text includes meta data. 1.816 + * @stable ICU 3.4 1.817 + */ 1.818 +U_STABLE UBool U_EXPORT2 1.819 +utext_hasMetaData(const UText *ut); 1.820 + 1.821 + 1.822 +/** 1.823 + * Replace a range of the original text with a replacement text. 1.824 + * 1.825 + * Leaves the current iteration position at the position following the 1.826 + * newly inserted replacement text. 1.827 + * 1.828 + * This function is only available on UText types that support writing, 1.829 + * that is, ones where utext_isWritable() returns TRUE. 1.830 + * 1.831 + * When using this function, there should be only a single UText opened onto the 1.832 + * underlying native text string. Behavior after a replace operation 1.833 + * on a UText is undefined for any other additional UTexts that refer to the 1.834 + * modified string. 1.835 + * 1.836 + * @param ut the UText representing the text to be operated on. 1.837 + * @param nativeStart the native index of the start of the region to be replaced 1.838 + * @param nativeLimit the native index of the character following the region to be replaced. 1.839 + * @param replacementText pointer to the replacement text 1.840 + * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. 1.841 + * @param status receives any error status. Possible errors include 1.842 + * U_NO_WRITE_PERMISSION 1.843 + * 1.844 + * @return The signed number of (native) storage units by which 1.845 + * the length of the text expanded or contracted. 1.846 + * 1.847 + * @stable ICU 3.4 1.848 + */ 1.849 +U_STABLE int32_t U_EXPORT2 1.850 +utext_replace(UText *ut, 1.851 + int64_t nativeStart, int64_t nativeLimit, 1.852 + const UChar *replacementText, int32_t replacementLength, 1.853 + UErrorCode *status); 1.854 + 1.855 + 1.856 + 1.857 +/** 1.858 + * 1.859 + * Copy or move a substring from one position to another within the text, 1.860 + * while retaining any metadata associated with the text. 1.861 + * This function is used to duplicate or reorder substrings. 1.862 + * The destination index must not overlap the source range. 1.863 + * 1.864 + * The text to be copied or moved is inserted at destIndex; 1.865 + * it does not replace or overwrite any existing text. 1.866 + * 1.867 + * The iteration position is left following the newly inserted text 1.868 + * at the destination position. 1.869 + * 1.870 + * This function is only available on UText types that support writing, 1.871 + * that is, ones where utext_isWritable() returns TRUE. 1.872 + * 1.873 + * When using this function, there should be only a single UText opened onto the 1.874 + * underlying native text string. Behavior after a copy operation 1.875 + * on a UText is undefined in any other additional UTexts that refer to the 1.876 + * modified string. 1.877 + * 1.878 + * @param ut The UText representing the text to be operated on. 1.879 + * @param nativeStart The native index of the start of the region to be copied or moved 1.880 + * @param nativeLimit The native index of the character position following the region 1.881 + * to be copied. 1.882 + * @param destIndex The native destination index to which the source substring is 1.883 + * copied or moved. 1.884 + * @param move If TRUE, then the substring is moved, not copied/duplicated. 1.885 + * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 1.886 + * 1.887 + * @stable ICU 3.4 1.888 + */ 1.889 +U_STABLE void U_EXPORT2 1.890 +utext_copy(UText *ut, 1.891 + int64_t nativeStart, int64_t nativeLimit, 1.892 + int64_t destIndex, 1.893 + UBool move, 1.894 + UErrorCode *status); 1.895 + 1.896 + 1.897 +/** 1.898 + * <p> 1.899 + * Freeze a UText. This prevents any modification to the underlying text itself 1.900 + * by means of functions operating on this UText. 1.901 + * </p> 1.902 + * <p> 1.903 + * Once frozen, a UText can not be unfrozen. The intent is to ensure 1.904 + * that a the text underlying a frozen UText wrapper cannot be modified via that UText. 1.905 + * </p> 1.906 + * <p> 1.907 + * Caution: freezing a UText will disable changes made via the specific 1.908 + * frozen UText wrapper only; it will not have any effect on the ability to 1.909 + * directly modify the text by bypassing the UText. Any such backdoor modifications 1.910 + * are always an error while UText access is occuring because the underlying 1.911 + * text can get out of sync with UText's buffering. 1.912 + * </p> 1.913 + * 1.914 + * @param ut The UText to be frozen. 1.915 + * @see utext_isWritable() 1.916 + * @stable ICU 3.6 1.917 + */ 1.918 +U_STABLE void U_EXPORT2 1.919 +utext_freeze(UText *ut); 1.920 + 1.921 + 1.922 +/** 1.923 + * UText provider properties (bit field indexes). 1.924 + * 1.925 + * @see UText 1.926 + * @stable ICU 3.4 1.927 + */ 1.928 +enum { 1.929 + /** 1.930 + * It is potentially time consuming for the provider to determine the length of the text. 1.931 + * @stable ICU 3.4 1.932 + */ 1.933 + UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, 1.934 + /** 1.935 + * Text chunks remain valid and usable until the text object is modified or 1.936 + * deleted, not just until the next time the access() function is called 1.937 + * (which is the default). 1.938 + * @stable ICU 3.4 1.939 + */ 1.940 + UTEXT_PROVIDER_STABLE_CHUNKS = 2, 1.941 + /** 1.942 + * The provider supports modifying the text via the replace() and copy() 1.943 + * functions. 1.944 + * @see Replaceable 1.945 + * @stable ICU 3.4 1.946 + */ 1.947 + UTEXT_PROVIDER_WRITABLE = 3, 1.948 + /** 1.949 + * There is meta data associated with the text. 1.950 + * @see Replaceable::hasMetaData() 1.951 + * @stable ICU 3.4 1.952 + */ 1.953 + UTEXT_PROVIDER_HAS_META_DATA = 4, 1.954 + /** 1.955 + * Text provider owns the text storage. 1.956 + * Generally occurs as the result of a deep clone of the UText. 1.957 + * When closing the UText, the associated text must 1.958 + * also be closed/deleted/freed/ whatever is appropriate. 1.959 + * @stable ICU 3.6 1.960 + */ 1.961 + UTEXT_PROVIDER_OWNS_TEXT = 5 1.962 +}; 1.963 + 1.964 +/** 1.965 + * Function type declaration for UText.clone(). 1.966 + * 1.967 + * clone a UText. Much like opening a UText where the source text is itself 1.968 + * another UText. 1.969 + * 1.970 + * A deep clone will copy both the UText data structures and the underlying text. 1.971 + * The original and cloned UText will operate completely independently; modifications 1.972 + * made to the text in one will not effect the other. Text providers are not 1.973 + * required to support deep clones. The user of clone() must check the status return 1.974 + * and be prepared to handle failures. 1.975 + * 1.976 + * A shallow clone replicates only the UText data structures; it does not make 1.977 + * a copy of the underlying text. Shallow clones can be used as an efficient way to 1.978 + * have multiple iterators active in a single text string that is not being 1.979 + * modified. 1.980 + * 1.981 + * A shallow clone operation must not fail except for truly exceptional conditions such 1.982 + * as memory allocation failures. 1.983 + * 1.984 + * A UText and its clone may be safely concurrently accessed by separate threads. 1.985 + * This is true for both shallow and deep clones. 1.986 + * It is the responsibility of the Text Provider to ensure that this thread safety 1.987 + * constraint is met. 1.988 + 1.989 + * 1.990 + * @param dest A UText struct to be filled in with the result of the clone operation, 1.991 + * or NULL if the clone function should heap-allocate a new UText struct. 1.992 + * @param src The UText to be cloned. 1.993 + * @param deep TRUE to request a deep clone, FALSE for a shallow clone. 1.994 + * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR 1.995 + * should be returned if the text provider is unable to clone the 1.996 + * original text. 1.997 + * @return The newly created clone, or NULL if the clone operation failed. 1.998 + * 1.999 + * @stable ICU 3.4 1.1000 + */ 1.1001 +typedef UText * U_CALLCONV 1.1002 +UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); 1.1003 + 1.1004 + 1.1005 +/** 1.1006 + * Function type declaration for UText.nativeLength(). 1.1007 + * 1.1008 + * @param ut the UText to get the length of. 1.1009 + * @return the length, in the native units of the original text string. 1.1010 + * @see UText 1.1011 + * @stable ICU 3.4 1.1012 + */ 1.1013 +typedef int64_t U_CALLCONV 1.1014 +UTextNativeLength(UText *ut); 1.1015 + 1.1016 +/** 1.1017 + * Function type declaration for UText.access(). Get the description of the text chunk 1.1018 + * containing the text at a requested native index. The UText's iteration 1.1019 + * position will be left at the requested index. If the index is out 1.1020 + * of bounds, the iteration position will be left at the start or end 1.1021 + * of the string, as appropriate. 1.1022 + * 1.1023 + * Chunks must begin and end on code point boundaries. A single code point 1.1024 + * comprised of multiple storage units must never span a chunk boundary. 1.1025 + * 1.1026 + * 1.1027 + * @param ut the UText being accessed. 1.1028 + * @param nativeIndex Requested index of the text to be accessed. 1.1029 + * @param forward If TRUE, then the returned chunk must contain text 1.1030 + * starting from the index, so that start<=index<limit. 1.1031 + * If FALSE, then the returned chunk must contain text 1.1032 + * before the index, so that start<index<=limit. 1.1033 + * @return True if the requested index could be accessed. The chunk 1.1034 + * will contain the requested text. 1.1035 + * False value if a chunk cannot be accessed 1.1036 + * (the requested index is out of bounds). 1.1037 + * 1.1038 + * @see UText 1.1039 + * @stable ICU 3.4 1.1040 + */ 1.1041 +typedef UBool U_CALLCONV 1.1042 +UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); 1.1043 + 1.1044 +/** 1.1045 + * Function type declaration for UText.extract(). 1.1046 + * 1.1047 + * Extract text from a UText into a UChar buffer. The range of text to be extracted 1.1048 + * is specified in the native indices of the UText provider. These may not necessarily 1.1049 + * be UTF-16 indices. 1.1050 + * <p> 1.1051 + * The size (number of 16 bit UChars) in the data to be extracted is returned. The 1.1052 + * full amount is returned, even when the specified buffer size is smaller. 1.1053 + * <p> 1.1054 + * The extracted string will (if you are a user) / must (if you are a text provider) 1.1055 + * be NUL-terminated if there is sufficient space in the destination buffer. 1.1056 + * 1.1057 + * @param ut the UText from which to extract data. 1.1058 + * @param nativeStart the native index of the first characer to extract. 1.1059 + * @param nativeLimit the native string index of the position following the last 1.1060 + * character to extract. 1.1061 + * @param dest the UChar (UTF-16) buffer into which the extracted text is placed 1.1062 + * @param destCapacity The size, in UChars, of the destination buffer. May be zero 1.1063 + * for precomputing the required size. 1.1064 + * @param status receives any error status. 1.1065 + * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for 1.1066 + * preflighting. 1.1067 + * @return Number of UChars in the data. Does not include a trailing NUL. 1.1068 + * 1.1069 + * @stable ICU 3.4 1.1070 + */ 1.1071 +typedef int32_t U_CALLCONV 1.1072 +UTextExtract(UText *ut, 1.1073 + int64_t nativeStart, int64_t nativeLimit, 1.1074 + UChar *dest, int32_t destCapacity, 1.1075 + UErrorCode *status); 1.1076 + 1.1077 +/** 1.1078 + * Function type declaration for UText.replace(). 1.1079 + * 1.1080 + * Replace a range of the original text with a replacement text. 1.1081 + * 1.1082 + * Leaves the current iteration position at the position following the 1.1083 + * newly inserted replacement text. 1.1084 + * 1.1085 + * This function need only be implemented on UText types that support writing. 1.1086 + * 1.1087 + * When using this function, there should be only a single UText opened onto the 1.1088 + * underlying native text string. The function is responsible for updating the 1.1089 + * text chunk within the UText to reflect the updated iteration position, 1.1090 + * taking into account any changes to the underlying string's structure caused 1.1091 + * by the replace operation. 1.1092 + * 1.1093 + * @param ut the UText representing the text to be operated on. 1.1094 + * @param nativeStart the index of the start of the region to be replaced 1.1095 + * @param nativeLimit the index of the character following the region to be replaced. 1.1096 + * @param replacementText pointer to the replacement text 1.1097 + * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. 1.1098 + * @param status receives any error status. Possible errors include 1.1099 + * U_NO_WRITE_PERMISSION 1.1100 + * 1.1101 + * @return The signed number of (native) storage units by which 1.1102 + * the length of the text expanded or contracted. 1.1103 + * 1.1104 + * @stable ICU 3.4 1.1105 + */ 1.1106 +typedef int32_t U_CALLCONV 1.1107 +UTextReplace(UText *ut, 1.1108 + int64_t nativeStart, int64_t nativeLimit, 1.1109 + const UChar *replacementText, int32_t replacmentLength, 1.1110 + UErrorCode *status); 1.1111 + 1.1112 +/** 1.1113 + * Function type declaration for UText.copy(). 1.1114 + * 1.1115 + * Copy or move a substring from one position to another within the text, 1.1116 + * while retaining any metadata associated with the text. 1.1117 + * This function is used to duplicate or reorder substrings. 1.1118 + * The destination index must not overlap the source range. 1.1119 + * 1.1120 + * The text to be copied or moved is inserted at destIndex; 1.1121 + * it does not replace or overwrite any existing text. 1.1122 + * 1.1123 + * This function need only be implemented for UText types that support writing. 1.1124 + * 1.1125 + * When using this function, there should be only a single UText opened onto the 1.1126 + * underlying native text string. The function is responsible for updating the 1.1127 + * text chunk within the UText to reflect the updated iteration position, 1.1128 + * taking into account any changes to the underlying string's structure caused 1.1129 + * by the replace operation. 1.1130 + * 1.1131 + * @param ut The UText representing the text to be operated on. 1.1132 + * @param nativeStart The index of the start of the region to be copied or moved 1.1133 + * @param nativeLimit The index of the character following the region to be replaced. 1.1134 + * @param nativeDest The destination index to which the source substring is copied or moved. 1.1135 + * @param move If TRUE, then the substring is moved, not copied/duplicated. 1.1136 + * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION 1.1137 + * 1.1138 + * @stable ICU 3.4 1.1139 + */ 1.1140 +typedef void U_CALLCONV 1.1141 +UTextCopy(UText *ut, 1.1142 + int64_t nativeStart, int64_t nativeLimit, 1.1143 + int64_t nativeDest, 1.1144 + UBool move, 1.1145 + UErrorCode *status); 1.1146 + 1.1147 +/** 1.1148 + * Function type declaration for UText.mapOffsetToNative(). 1.1149 + * Map from the current UChar offset within the current text chunk to 1.1150 + * the corresponding native index in the original source text. 1.1151 + * 1.1152 + * This is required only for text providers that do not use native UTF-16 indexes. 1.1153 + * 1.1154 + * @param ut the UText. 1.1155 + * @return Absolute (native) index corresponding to chunkOffset in the current chunk. 1.1156 + * The returned native index should always be to a code point boundary. 1.1157 + * 1.1158 + * @stable ICU 3.4 1.1159 + */ 1.1160 +typedef int64_t U_CALLCONV 1.1161 +UTextMapOffsetToNative(const UText *ut); 1.1162 + 1.1163 +/** 1.1164 + * Function type declaration for UText.mapIndexToUTF16(). 1.1165 + * Map from a native index to a UChar offset within a text chunk. 1.1166 + * Behavior is undefined if the native index does not fall within the 1.1167 + * current chunk. 1.1168 + * 1.1169 + * This function is required only for text providers that do not use native UTF-16 indexes. 1.1170 + * 1.1171 + * @param ut The UText containing the text chunk. 1.1172 + * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. 1.1173 + * @return Chunk-relative UTF-16 offset corresponding to the specified native 1.1174 + * index. 1.1175 + * 1.1176 + * @stable ICU 3.4 1.1177 + */ 1.1178 +typedef int32_t U_CALLCONV 1.1179 +UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); 1.1180 + 1.1181 + 1.1182 +/** 1.1183 + * Function type declaration for UText.utextClose(). 1.1184 + * 1.1185 + * A Text Provider close function is only required for provider types that make 1.1186 + * allocations in their open function (or other functions) that must be 1.1187 + * cleaned when the UText is closed. 1.1188 + * 1.1189 + * The allocation of the UText struct itself and any "extra" storage 1.1190 + * associated with the UText is handled by the common UText implementation 1.1191 + * and does not require provider specific cleanup in a close function. 1.1192 + * 1.1193 + * Most UText provider implementations do not need to implement this function. 1.1194 + * 1.1195 + * @param ut A UText object to be closed. 1.1196 + * 1.1197 + * @stable ICU 3.4 1.1198 + */ 1.1199 +typedef void U_CALLCONV 1.1200 +UTextClose(UText *ut); 1.1201 + 1.1202 + 1.1203 +/** 1.1204 + * (public) Function dispatch table for UText. 1.1205 + * Conceptually very much like a C++ Virtual Function Table. 1.1206 + * This struct defines the organization of the table. 1.1207 + * Each text provider implementation must provide an 1.1208 + * actual table that is initialized with the appropriate functions 1.1209 + * for the type of text being handled. 1.1210 + * @stable ICU 3.6 1.1211 + */ 1.1212 +struct UTextFuncs { 1.1213 + /** 1.1214 + * (public) Function table size, sizeof(UTextFuncs) 1.1215 + * Intended for use should the table grow to accomodate added 1.1216 + * functions in the future, to allow tests for older format 1.1217 + * function tables that do not contain the extensions. 1.1218 + * 1.1219 + * Fields are placed for optimal alignment on 1.1220 + * 32/64/128-bit-pointer machines, by normally grouping together 1.1221 + * 4 32-bit fields, 1.1222 + * 4 pointers, 1.1223 + * 2 64-bit fields 1.1224 + * in sequence. 1.1225 + * @stable ICU 3.6 1.1226 + */ 1.1227 + int32_t tableSize; 1.1228 + 1.1229 + /** 1.1230 + * (private) Alignment padding. 1.1231 + * Do not use, reserved for use by the UText framework only. 1.1232 + * @internal 1.1233 + */ 1.1234 + int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; 1.1235 + 1.1236 + 1.1237 + /** 1.1238 + * (public) Function pointer for UTextClone 1.1239 + * 1.1240 + * @see UTextClone 1.1241 + * @stable ICU 3.6 1.1242 + */ 1.1243 + UTextClone *clone; 1.1244 + 1.1245 + /** 1.1246 + * (public) function pointer for UTextLength 1.1247 + * May be expensive to compute! 1.1248 + * 1.1249 + * @see UTextLength 1.1250 + * @stable ICU 3.6 1.1251 + */ 1.1252 + UTextNativeLength *nativeLength; 1.1253 + 1.1254 + /** 1.1255 + * (public) Function pointer for UTextAccess. 1.1256 + * 1.1257 + * @see UTextAccess 1.1258 + * @stable ICU 3.6 1.1259 + */ 1.1260 + UTextAccess *access; 1.1261 + 1.1262 + /** 1.1263 + * (public) Function pointer for UTextExtract. 1.1264 + * 1.1265 + * @see UTextExtract 1.1266 + * @stable ICU 3.6 1.1267 + */ 1.1268 + UTextExtract *extract; 1.1269 + 1.1270 + /** 1.1271 + * (public) Function pointer for UTextReplace. 1.1272 + * 1.1273 + * @see UTextReplace 1.1274 + * @stable ICU 3.6 1.1275 + */ 1.1276 + UTextReplace *replace; 1.1277 + 1.1278 + /** 1.1279 + * (public) Function pointer for UTextCopy. 1.1280 + * 1.1281 + * @see UTextCopy 1.1282 + * @stable ICU 3.6 1.1283 + */ 1.1284 + UTextCopy *copy; 1.1285 + 1.1286 + /** 1.1287 + * (public) Function pointer for UTextMapOffsetToNative. 1.1288 + * 1.1289 + * @see UTextMapOffsetToNative 1.1290 + * @stable ICU 3.6 1.1291 + */ 1.1292 + UTextMapOffsetToNative *mapOffsetToNative; 1.1293 + 1.1294 + /** 1.1295 + * (public) Function pointer for UTextMapNativeIndexToUTF16. 1.1296 + * 1.1297 + * @see UTextMapNativeIndexToUTF16 1.1298 + * @stable ICU 3.6 1.1299 + */ 1.1300 + UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; 1.1301 + 1.1302 + /** 1.1303 + * (public) Function pointer for UTextClose. 1.1304 + * 1.1305 + * @see UTextClose 1.1306 + * @stable ICU 3.6 1.1307 + */ 1.1308 + UTextClose *close; 1.1309 + 1.1310 + /** 1.1311 + * (private) Spare function pointer 1.1312 + * @internal 1.1313 + */ 1.1314 + UTextClose *spare1; 1.1315 + 1.1316 + /** 1.1317 + * (private) Spare function pointer 1.1318 + * @internal 1.1319 + */ 1.1320 + UTextClose *spare2; 1.1321 + 1.1322 + /** 1.1323 + * (private) Spare function pointer 1.1324 + * @internal 1.1325 + */ 1.1326 + UTextClose *spare3; 1.1327 + 1.1328 +}; 1.1329 +/** 1.1330 + * Function dispatch table for UText 1.1331 + * @see UTextFuncs 1.1332 + */ 1.1333 +typedef struct UTextFuncs UTextFuncs; 1.1334 + 1.1335 + /** 1.1336 + * UText struct. Provides the interface between the generic UText access code 1.1337 + * and the UText provider code that works on specific kinds of 1.1338 + * text (UTF-8, noncontiguous UTF-16, whatever.) 1.1339 + * 1.1340 + * Applications that are using predefined types of text providers 1.1341 + * to pass text data to ICU services will have no need to view the 1.1342 + * internals of the UText structs that they open. 1.1343 + * 1.1344 + * @stable ICU 3.6 1.1345 + */ 1.1346 +struct UText { 1.1347 + /** 1.1348 + * (private) Magic. Used to help detect when UText functions are handed 1.1349 + * invalid or unitialized UText structs. 1.1350 + * utext_openXYZ() functions take an initialized, 1.1351 + * but not necessarily open, UText struct as an 1.1352 + * optional fill-in parameter. This magic field 1.1353 + * is used to check for that initialization. 1.1354 + * Text provider close functions must NOT clear 1.1355 + * the magic field because that would prevent 1.1356 + * reuse of the UText struct. 1.1357 + * @internal 1.1358 + */ 1.1359 + uint32_t magic; 1.1360 + 1.1361 + 1.1362 + /** 1.1363 + * (private) Flags for managing the allocation and freeing of 1.1364 + * memory associated with this UText. 1.1365 + * @internal 1.1366 + */ 1.1367 + int32_t flags; 1.1368 + 1.1369 + 1.1370 + /** 1.1371 + * Text provider properties. This set of flags is maintainted by the 1.1372 + * text provider implementation. 1.1373 + * @stable ICU 3.4 1.1374 + */ 1.1375 + int32_t providerProperties; 1.1376 + 1.1377 + /** 1.1378 + * (public) sizeOfStruct=sizeof(UText) 1.1379 + * Allows possible backward compatible extension. 1.1380 + * 1.1381 + * @stable ICU 3.4 1.1382 + */ 1.1383 + int32_t sizeOfStruct; 1.1384 + 1.1385 + /* ------ 16 byte alignment boundary ----------- */ 1.1386 + 1.1387 + 1.1388 + /** 1.1389 + * (protected) Native index of the first character position following 1.1390 + * the current chunk. 1.1391 + * @stable ICU 3.6 1.1392 + */ 1.1393 + int64_t chunkNativeLimit; 1.1394 + 1.1395 + /** 1.1396 + * (protected) Size in bytes of the extra space (pExtra). 1.1397 + * @stable ICU 3.4 1.1398 + */ 1.1399 + int32_t extraSize; 1.1400 + 1.1401 + /** 1.1402 + * (protected) The highest chunk offset where native indexing and 1.1403 + * chunk (UTF-16) indexing correspond. For UTF-16 sources, value 1.1404 + * will be equal to chunkLength. 1.1405 + * 1.1406 + * @stable ICU 3.6 1.1407 + */ 1.1408 + int32_t nativeIndexingLimit; 1.1409 + 1.1410 + /* ---- 16 byte alignment boundary------ */ 1.1411 + 1.1412 + /** 1.1413 + * (protected) Native index of the first character in the text chunk. 1.1414 + * @stable ICU 3.6 1.1415 + */ 1.1416 + int64_t chunkNativeStart; 1.1417 + 1.1418 + /** 1.1419 + * (protected) Current iteration position within the text chunk (UTF-16 buffer). 1.1420 + * This is the index to the character that will be returned by utext_next32(). 1.1421 + * @stable ICU 3.6 1.1422 + */ 1.1423 + int32_t chunkOffset; 1.1424 + 1.1425 + /** 1.1426 + * (protected) Length the text chunk (UTF-16 buffer), in UChars. 1.1427 + * @stable ICU 3.6 1.1428 + */ 1.1429 + int32_t chunkLength; 1.1430 + 1.1431 + /* ---- 16 byte alignment boundary-- */ 1.1432 + 1.1433 + 1.1434 + /** 1.1435 + * (protected) pointer to a chunk of text in UTF-16 format. 1.1436 + * May refer either to original storage of the source of the text, or 1.1437 + * if conversion was required, to a buffer owned by the UText. 1.1438 + * @stable ICU 3.6 1.1439 + */ 1.1440 + const UChar *chunkContents; 1.1441 + 1.1442 + /** 1.1443 + * (public) Pointer to Dispatch table for accessing functions for this UText. 1.1444 + * @stable ICU 3.6 1.1445 + */ 1.1446 + const UTextFuncs *pFuncs; 1.1447 + 1.1448 + /** 1.1449 + * (protected) Pointer to additional space requested by the 1.1450 + * text provider during the utext_open operation. 1.1451 + * @stable ICU 3.4 1.1452 + */ 1.1453 + void *pExtra; 1.1454 + 1.1455 + /** 1.1456 + * (protected) Pointer to string or text-containin object or similar. 1.1457 + * This is the source of the text that this UText is wrapping, in a format 1.1458 + * that is known to the text provider functions. 1.1459 + * @stable ICU 3.4 1.1460 + */ 1.1461 + const void *context; 1.1462 + 1.1463 + /* --- 16 byte alignment boundary--- */ 1.1464 + 1.1465 + /** 1.1466 + * (protected) Pointer fields available for use by the text provider. 1.1467 + * Not used by UText common code. 1.1468 + * @stable ICU 3.6 1.1469 + */ 1.1470 + const void *p; 1.1471 + /** 1.1472 + * (protected) Pointer fields available for use by the text provider. 1.1473 + * Not used by UText common code. 1.1474 + * @stable ICU 3.6 1.1475 + */ 1.1476 + const void *q; 1.1477 + /** 1.1478 + * (protected) Pointer fields available for use by the text provider. 1.1479 + * Not used by UText common code. 1.1480 + * @stable ICU 3.6 1.1481 + */ 1.1482 + const void *r; 1.1483 + 1.1484 + /** 1.1485 + * Private field reserved for future use by the UText framework 1.1486 + * itself. This is not to be touched by the text providers. 1.1487 + * @internal ICU 3.4 1.1488 + */ 1.1489 + void *privP; 1.1490 + 1.1491 + 1.1492 + /* --- 16 byte alignment boundary--- */ 1.1493 + 1.1494 + 1.1495 + /** 1.1496 + * (protected) Integer field reserved for use by the text provider. 1.1497 + * Not used by the UText framework, or by the client (user) of the UText. 1.1498 + * @stable ICU 3.4 1.1499 + */ 1.1500 + int64_t a; 1.1501 + 1.1502 + /** 1.1503 + * (protected) Integer field reserved for use by the text provider. 1.1504 + * Not used by the UText framework, or by the client (user) of the UText. 1.1505 + * @stable ICU 3.4 1.1506 + */ 1.1507 + int32_t b; 1.1508 + 1.1509 + /** 1.1510 + * (protected) Integer field reserved for use by the text provider. 1.1511 + * Not used by the UText framework, or by the client (user) of the UText. 1.1512 + * @stable ICU 3.4 1.1513 + */ 1.1514 + int32_t c; 1.1515 + 1.1516 + /* ---- 16 byte alignment boundary---- */ 1.1517 + 1.1518 + 1.1519 + /** 1.1520 + * Private field reserved for future use by the UText framework 1.1521 + * itself. This is not to be touched by the text providers. 1.1522 + * @internal ICU 3.4 1.1523 + */ 1.1524 + int64_t privA; 1.1525 + /** 1.1526 + * Private field reserved for future use by the UText framework 1.1527 + * itself. This is not to be touched by the text providers. 1.1528 + * @internal ICU 3.4 1.1529 + */ 1.1530 + int32_t privB; 1.1531 + /** 1.1532 + * Private field reserved for future use by the UText framework 1.1533 + * itself. This is not to be touched by the text providers. 1.1534 + * @internal ICU 3.4 1.1535 + */ 1.1536 + int32_t privC; 1.1537 +}; 1.1538 + 1.1539 + 1.1540 +/** 1.1541 + * Common function for use by Text Provider implementations to allocate and/or initialize 1.1542 + * a new UText struct. To be called in the implementation of utext_open() functions. 1.1543 + * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. 1.1544 + * If the supplied UText is already open, the provider's close function will be called 1.1545 + * so that the struct can be reused by the open that is in progress. 1.1546 + * 1.1547 + * @param ut pointer to a UText struct to be re-used, or null if a new UText 1.1548 + * should be allocated. 1.1549 + * @param extraSpace The amount of additional space to be allocated as part 1.1550 + * of this UText, for use by types of providers that require 1.1551 + * additional storage. 1.1552 + * @param status Errors are returned here. 1.1553 + * @return pointer to the UText, allocated if necessary, with extra space set up if requested. 1.1554 + * @stable ICU 3.4 1.1555 + */ 1.1556 +U_STABLE UText * U_EXPORT2 1.1557 +utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); 1.1558 + 1.1559 +#ifndef U_HIDE_INTERNAL_API 1.1560 +/** 1.1561 + * @internal 1.1562 + * Value used to help identify correctly initialized UText structs. 1.1563 + * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. 1.1564 + */ 1.1565 +enum { 1.1566 + UTEXT_MAGIC = 0x345ad82c 1.1567 +}; 1.1568 +#endif /* U_HIDE_INTERNAL_API */ 1.1569 + 1.1570 +/** 1.1571 + * initializer to be used with local (stack) instances of a UText 1.1572 + * struct. UText structs must be initialized before passing 1.1573 + * them to one of the utext_open functions. 1.1574 + * 1.1575 + * @stable ICU 3.6 1.1576 + */ 1.1577 +#define UTEXT_INITIALIZER { \ 1.1578 + UTEXT_MAGIC, /* magic */ \ 1.1579 + 0, /* flags */ \ 1.1580 + 0, /* providerProps */ \ 1.1581 + sizeof(UText), /* sizeOfStruct */ \ 1.1582 + 0, /* chunkNativeLimit */ \ 1.1583 + 0, /* extraSize */ \ 1.1584 + 0, /* nativeIndexingLimit */ \ 1.1585 + 0, /* chunkNativeStart */ \ 1.1586 + 0, /* chunkOffset */ \ 1.1587 + 0, /* chunkLength */ \ 1.1588 + NULL, /* chunkContents */ \ 1.1589 + NULL, /* pFuncs */ \ 1.1590 + NULL, /* pExtra */ \ 1.1591 + NULL, /* context */ \ 1.1592 + NULL, NULL, NULL, /* p, q, r */ \ 1.1593 + NULL, /* privP */ \ 1.1594 + 0, 0, 0, /* a, b, c */ \ 1.1595 + 0, 0, 0 /* privA,B,C, */ \ 1.1596 + } 1.1597 + 1.1598 + 1.1599 +U_CDECL_END 1.1600 + 1.1601 + 1.1602 + 1.1603 +#endif