Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2004-2012, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: utext.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2004oct06 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | */ |
michael@0 | 16 | |
michael@0 | 17 | #ifndef __UTEXT_H__ |
michael@0 | 18 | #define __UTEXT_H__ |
michael@0 | 19 | |
michael@0 | 20 | /** |
michael@0 | 21 | * \file |
michael@0 | 22 | * \brief C API: Abstract Unicode Text API |
michael@0 | 23 | * |
michael@0 | 24 | * The Text Access API provides a means to allow text that is stored in alternative |
michael@0 | 25 | * formats to work with ICU services. ICU normally operates on text that is |
michael@0 | 26 | * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type |
michael@0 | 27 | * UnicodeString for C++ APIs. |
michael@0 | 28 | * |
michael@0 | 29 | * ICU Text Access allows other formats, such as UTF-8 or non-contiguous |
michael@0 | 30 | * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. |
michael@0 | 31 | * |
michael@0 | 32 | * There are three general classes of usage for UText: |
michael@0 | 33 | * |
michael@0 | 34 | * Application Level Use. This is the simplest usage - applications would |
michael@0 | 35 | * use one of the utext_open() functions on their input text, and pass |
michael@0 | 36 | * the resulting UText to the desired ICU service. |
michael@0 | 37 | * |
michael@0 | 38 | * Second is usage in ICU Services, such as break iteration, that will need to |
michael@0 | 39 | * operate on input presented to them as a UText. These implementations |
michael@0 | 40 | * will need to use the iteration and related UText functions to gain |
michael@0 | 41 | * access to the actual text. |
michael@0 | 42 | * |
michael@0 | 43 | * The third class of UText users are "text providers." These are the |
michael@0 | 44 | * UText implementations for the various text storage formats. An application |
michael@0 | 45 | * or system with a unique text storage format can implement a set of |
michael@0 | 46 | * UText provider functions for that format, which will then allow |
michael@0 | 47 | * ICU services to operate on that format. |
michael@0 | 48 | * |
michael@0 | 49 | * |
michael@0 | 50 | * <em>Iterating over text</em> |
michael@0 | 51 | * |
michael@0 | 52 | * Here is sample code for a forward iteration over the contents of a UText |
michael@0 | 53 | * |
michael@0 | 54 | * \code |
michael@0 | 55 | * UChar32 c; |
michael@0 | 56 | * UText *ut = whatever(); |
michael@0 | 57 | * |
michael@0 | 58 | * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { |
michael@0 | 59 | * // do whatever with the codepoint c here. |
michael@0 | 60 | * } |
michael@0 | 61 | * \endcode |
michael@0 | 62 | * |
michael@0 | 63 | * And here is similar code to iterate in the reverse direction, from the end |
michael@0 | 64 | * of the text towards the beginning. |
michael@0 | 65 | * |
michael@0 | 66 | * \code |
michael@0 | 67 | * UChar32 c; |
michael@0 | 68 | * UText *ut = whatever(); |
michael@0 | 69 | * int textLength = utext_nativeLength(ut); |
michael@0 | 70 | * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { |
michael@0 | 71 | * // do whatever with the codepoint c here. |
michael@0 | 72 | * } |
michael@0 | 73 | * \endcode |
michael@0 | 74 | * |
michael@0 | 75 | * <em>Characters and Indexing</em> |
michael@0 | 76 | * |
michael@0 | 77 | * Indexing into text by UText functions is nearly always in terms of the native |
michael@0 | 78 | * indexing of the underlying text storage. The storage format could be UTF-8 |
michael@0 | 79 | * or UTF-32, for example. When coding to the UText access API, no assumptions |
michael@0 | 80 | * can be made regarding the size of characters, or how far an index |
michael@0 | 81 | * may move when iterating between characters. |
michael@0 | 82 | * |
michael@0 | 83 | * All indices supplied to UText functions are pinned to the length of the |
michael@0 | 84 | * text. An out-of-bounds index is not considered to be an error, but is |
michael@0 | 85 | * adjusted to be in the range 0 <= index <= length of input text. |
michael@0 | 86 | * |
michael@0 | 87 | * |
michael@0 | 88 | * When an index position is returned from a UText function, it will be |
michael@0 | 89 | * a native index to the underlying text. In the case of multi-unit characters, |
michael@0 | 90 | * it will always refer to the first position of the character, |
michael@0 | 91 | * never to the interior. This is essentially the same thing as saying that |
michael@0 | 92 | * a returned index will always point to a boundary between characters. |
michael@0 | 93 | * |
michael@0 | 94 | * When a native index is supplied to a UText function, all indices that |
michael@0 | 95 | * refer to any part of a multi-unit character representation are considered |
michael@0 | 96 | * to be equivalent. In the case of multi-unit characters, an incoming index |
michael@0 | 97 | * will be logically normalized to refer to the start of the character. |
michael@0 | 98 | * |
michael@0 | 99 | * It is possible to test whether a native index is on a code point boundary |
michael@0 | 100 | * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). |
michael@0 | 101 | * If the index is returned unchanged, it was on a code point boundary. If |
michael@0 | 102 | * an adjusted index is returned, the original index referred to the |
michael@0 | 103 | * interior of a character. |
michael@0 | 104 | * |
michael@0 | 105 | * <em>Conventions for calling UText functions</em> |
michael@0 | 106 | * |
michael@0 | 107 | * Most UText access functions have as their first parameter a (UText *) pointer, |
michael@0 | 108 | * which specifies the UText to be used. Unless otherwise noted, the |
michael@0 | 109 | * pointer must refer to a valid, open UText. Attempting to |
michael@0 | 110 | * use a closed UText or passing a NULL pointer is a programming error and |
michael@0 | 111 | * will produce undefined results or NULL pointer exceptions. |
michael@0 | 112 | * |
michael@0 | 113 | * The UText_Open family of functions can either open an existing (closed) |
michael@0 | 114 | * UText, or heap allocate a new UText. Here is sample code for creating |
michael@0 | 115 | * a stack-allocated UText. |
michael@0 | 116 | * |
michael@0 | 117 | * \code |
michael@0 | 118 | * char *s = whatever(); // A utf-8 string |
michael@0 | 119 | * U_ErrorCode status = U_ZERO_ERROR; |
michael@0 | 120 | * UText ut = UTEXT_INITIALIZER; |
michael@0 | 121 | * utext_openUTF8(ut, s, -1, &status); |
michael@0 | 122 | * if (U_FAILURE(status)) { |
michael@0 | 123 | * // error handling |
michael@0 | 124 | * } else { |
michael@0 | 125 | * // work with the UText |
michael@0 | 126 | * } |
michael@0 | 127 | * \endcode |
michael@0 | 128 | * |
michael@0 | 129 | * Any existing UText passed to an open function _must_ have been initialized, |
michael@0 | 130 | * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated |
michael@0 | 131 | * by an open function. Passing NULL will cause the open function to |
michael@0 | 132 | * heap-allocate and fully initialize a new UText. |
michael@0 | 133 | * |
michael@0 | 134 | */ |
michael@0 | 135 | |
michael@0 | 136 | |
michael@0 | 137 | |
michael@0 | 138 | #include "unicode/utypes.h" |
michael@0 | 139 | #include "unicode/uchar.h" |
michael@0 | 140 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 141 | #include "unicode/localpointer.h" |
michael@0 | 142 | #include "unicode/rep.h" |
michael@0 | 143 | #include "unicode/unistr.h" |
michael@0 | 144 | #include "unicode/chariter.h" |
michael@0 | 145 | #endif |
michael@0 | 146 | |
michael@0 | 147 | |
michael@0 | 148 | U_CDECL_BEGIN |
michael@0 | 149 | |
michael@0 | 150 | struct UText; |
michael@0 | 151 | typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ |
michael@0 | 152 | |
michael@0 | 153 | |
michael@0 | 154 | /*************************************************************************************** |
michael@0 | 155 | * |
michael@0 | 156 | * C Functions for creating UText wrappers around various kinds of text strings. |
michael@0 | 157 | * |
michael@0 | 158 | ****************************************************************************************/ |
michael@0 | 159 | |
michael@0 | 160 | |
michael@0 | 161 | /** |
michael@0 | 162 | * Close function for UText instances. |
michael@0 | 163 | * Cleans up, releases any resources being held by an open UText. |
michael@0 | 164 | * <p> |
michael@0 | 165 | * If the UText was originally allocated by one of the utext_open functions, |
michael@0 | 166 | * the storage associated with the utext will also be freed. |
michael@0 | 167 | * If the UText storage originated with the application, as it would with |
michael@0 | 168 | * a local or static instance, the storage will not be deleted. |
michael@0 | 169 | * |
michael@0 | 170 | * An open UText can be reset to refer to new string by using one of the utext_open() |
michael@0 | 171 | * functions without first closing the UText. |
michael@0 | 172 | * |
michael@0 | 173 | * @param ut The UText to be closed. |
michael@0 | 174 | * @return NULL if the UText struct was deleted by the close. If the UText struct |
michael@0 | 175 | * was originally provided by the caller to the open function, it is |
michael@0 | 176 | * returned by this function, and may be safely used again in |
michael@0 | 177 | * a subsequent utext_open. |
michael@0 | 178 | * |
michael@0 | 179 | * @stable ICU 3.4 |
michael@0 | 180 | */ |
michael@0 | 181 | U_STABLE UText * U_EXPORT2 |
michael@0 | 182 | utext_close(UText *ut); |
michael@0 | 183 | |
michael@0 | 184 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 185 | |
michael@0 | 186 | U_NAMESPACE_BEGIN |
michael@0 | 187 | |
michael@0 | 188 | /** |
michael@0 | 189 | * \class LocalUTextPointer |
michael@0 | 190 | * "Smart pointer" class, closes a UText via utext_close(). |
michael@0 | 191 | * For most methods see the LocalPointerBase base class. |
michael@0 | 192 | * |
michael@0 | 193 | * @see LocalPointerBase |
michael@0 | 194 | * @see LocalPointer |
michael@0 | 195 | * @stable ICU 4.4 |
michael@0 | 196 | */ |
michael@0 | 197 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); |
michael@0 | 198 | |
michael@0 | 199 | U_NAMESPACE_END |
michael@0 | 200 | |
michael@0 | 201 | #endif |
michael@0 | 202 | |
michael@0 | 203 | /** |
michael@0 | 204 | * Open a read-only UText implementation for UTF-8 strings. |
michael@0 | 205 | * |
michael@0 | 206 | * \htmlonly |
michael@0 | 207 | * Any invalid UTF-8 in the input will be handled in this way: |
michael@0 | 208 | * a sequence of bytes that has the form of a truncated, but otherwise valid, |
michael@0 | 209 | * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. |
michael@0 | 210 | * Any other illegal bytes will each be replaced by a \uFFFD. |
michael@0 | 211 | * \endhtmlonly |
michael@0 | 212 | * |
michael@0 | 213 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 214 | * If non-NULL, must refer to an initialized UText struct, which will then |
michael@0 | 215 | * be reset to reference the specified UTF-8 string. |
michael@0 | 216 | * @param s A UTF-8 string. Must not be NULL. |
michael@0 | 217 | * @param length The length of the UTF-8 string in bytes, or -1 if the string is |
michael@0 | 218 | * zero terminated. |
michael@0 | 219 | * @param status Errors are returned here. |
michael@0 | 220 | * @return A pointer to the UText. If a pre-allocated UText was provided, it |
michael@0 | 221 | * will always be used and returned. |
michael@0 | 222 | * @stable ICU 3.4 |
michael@0 | 223 | */ |
michael@0 | 224 | U_STABLE UText * U_EXPORT2 |
michael@0 | 225 | utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); |
michael@0 | 226 | |
michael@0 | 227 | |
michael@0 | 228 | /** |
michael@0 | 229 | * Open a read-only UText for UChar * string. |
michael@0 | 230 | * |
michael@0 | 231 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 232 | * If non-NULL, must refer to an initialized UText struct, which will then |
michael@0 | 233 | * be reset to reference the specified UChar string. |
michael@0 | 234 | * @param s A UChar (UTF-16) string |
michael@0 | 235 | * @param length The number of UChars in the input string, or -1 if the string is |
michael@0 | 236 | * zero terminated. |
michael@0 | 237 | * @param status Errors are returned here. |
michael@0 | 238 | * @return A pointer to the UText. If a pre-allocated UText was provided, it |
michael@0 | 239 | * will always be used and returned. |
michael@0 | 240 | * @stable ICU 3.4 |
michael@0 | 241 | */ |
michael@0 | 242 | U_STABLE UText * U_EXPORT2 |
michael@0 | 243 | utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); |
michael@0 | 244 | |
michael@0 | 245 | |
michael@0 | 246 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 247 | /** |
michael@0 | 248 | * Open a writable UText for a non-const UnicodeString. |
michael@0 | 249 | * |
michael@0 | 250 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 251 | * If non-NULL, must refer to an initialized UText struct, which will then |
michael@0 | 252 | * be reset to reference the specified input string. |
michael@0 | 253 | * @param s A UnicodeString. |
michael@0 | 254 | * @param status Errors are returned here. |
michael@0 | 255 | * @return Pointer to the UText. If a UText was supplied as input, this |
michael@0 | 256 | * will always be used and returned. |
michael@0 | 257 | * @stable ICU 3.4 |
michael@0 | 258 | */ |
michael@0 | 259 | U_STABLE UText * U_EXPORT2 |
michael@0 | 260 | utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); |
michael@0 | 261 | |
michael@0 | 262 | |
michael@0 | 263 | /** |
michael@0 | 264 | * Open a UText for a const UnicodeString. The resulting UText will not be writable. |
michael@0 | 265 | * |
michael@0 | 266 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 267 | * If non-NULL, must refer to an initialized UText struct, which will then |
michael@0 | 268 | * be reset to reference the specified input string. |
michael@0 | 269 | * @param s A const UnicodeString to be wrapped. |
michael@0 | 270 | * @param status Errors are returned here. |
michael@0 | 271 | * @return Pointer to the UText. If a UText was supplied as input, this |
michael@0 | 272 | * will always be used and returned. |
michael@0 | 273 | * @stable ICU 3.4 |
michael@0 | 274 | */ |
michael@0 | 275 | U_STABLE UText * U_EXPORT2 |
michael@0 | 276 | utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); |
michael@0 | 277 | |
michael@0 | 278 | |
michael@0 | 279 | /** |
michael@0 | 280 | * Open a writable UText implementation for an ICU Replaceable object. |
michael@0 | 281 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 282 | * If non-NULL, must refer to an already existing UText, which will then |
michael@0 | 283 | * be reset to reference the specified replaceable text. |
michael@0 | 284 | * @param rep A Replaceable text object. |
michael@0 | 285 | * @param status Errors are returned here. |
michael@0 | 286 | * @return Pointer to the UText. If a UText was supplied as input, this |
michael@0 | 287 | * will always be used and returned. |
michael@0 | 288 | * @see Replaceable |
michael@0 | 289 | * @stable ICU 3.4 |
michael@0 | 290 | */ |
michael@0 | 291 | U_STABLE UText * U_EXPORT2 |
michael@0 | 292 | utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); |
michael@0 | 293 | |
michael@0 | 294 | /** |
michael@0 | 295 | * Open a UText implementation over an ICU CharacterIterator. |
michael@0 | 296 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
michael@0 | 297 | * If non-NULL, must refer to an already existing UText, which will then |
michael@0 | 298 | * be reset to reference the specified replaceable text. |
michael@0 | 299 | * @param ci A Character Iterator. |
michael@0 | 300 | * @param status Errors are returned here. |
michael@0 | 301 | * @return Pointer to the UText. If a UText was supplied as input, this |
michael@0 | 302 | * will always be used and returned. |
michael@0 | 303 | * @see Replaceable |
michael@0 | 304 | * @stable ICU 3.4 |
michael@0 | 305 | */ |
michael@0 | 306 | U_STABLE UText * U_EXPORT2 |
michael@0 | 307 | utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); |
michael@0 | 308 | |
michael@0 | 309 | #endif |
michael@0 | 310 | |
michael@0 | 311 | |
michael@0 | 312 | /** |
michael@0 | 313 | * Clone a UText. This is much like opening a UText where the source text is itself |
michael@0 | 314 | * another UText. |
michael@0 | 315 | * |
michael@0 | 316 | * A deep clone will copy both the UText data structures and the underlying text. |
michael@0 | 317 | * The original and cloned UText will operate completely independently; modifications |
michael@0 | 318 | * made to the text in one will not affect the other. Text providers are not |
michael@0 | 319 | * required to support deep clones. The user of clone() must check the status return |
michael@0 | 320 | * and be prepared to handle failures. |
michael@0 | 321 | * |
michael@0 | 322 | * The standard UText implementations for UTF8, UChar *, UnicodeString and |
michael@0 | 323 | * Replaceable all support deep cloning. |
michael@0 | 324 | * |
michael@0 | 325 | * The UText returned from a deep clone will be writable, assuming that the text |
michael@0 | 326 | * provider is able to support writing, even if the source UText had been made |
michael@0 | 327 | * non-writable by means of UText_freeze(). |
michael@0 | 328 | * |
michael@0 | 329 | * A shallow clone replicates only the UText data structures; it does not make |
michael@0 | 330 | * a copy of the underlying text. Shallow clones can be used as an efficient way to |
michael@0 | 331 | * have multiple iterators active in a single text string that is not being |
michael@0 | 332 | * modified. |
michael@0 | 333 | * |
michael@0 | 334 | * A shallow clone operation will not fail, barring truly exceptional conditions such |
michael@0 | 335 | * as memory allocation failures. |
michael@0 | 336 | * |
michael@0 | 337 | * Shallow UText clones should be avoided if the UText functions that modify the |
michael@0 | 338 | * text are expected to be used, either on the original or the cloned UText. |
michael@0 | 339 | * Any such modifications can cause unpredictable behavior. Read Only |
michael@0 | 340 | * shallow clones provide some protection against errors of this type by |
michael@0 | 341 | * disabling text modification via the cloned UText. |
michael@0 | 342 | * |
michael@0 | 343 | * A shallow clone made with the readOnly parameter == FALSE will preserve the |
michael@0 | 344 | * utext_isWritable() state of the source object. Note, however, that |
michael@0 | 345 | * write operations must be avoided while more than one UText exists that refer |
michael@0 | 346 | * to the same underlying text. |
michael@0 | 347 | * |
michael@0 | 348 | * A UText and its clone may be safely concurrently accessed by separate threads. |
michael@0 | 349 | * This is true for read access only with shallow clones, and for both read and |
michael@0 | 350 | * write access with deep clones. |
michael@0 | 351 | * It is the responsibility of the Text Provider to ensure that this thread safety |
michael@0 | 352 | * constraint is met. |
michael@0 | 353 | * |
michael@0 | 354 | * @param dest A UText struct to be filled in with the result of the clone operation, |
michael@0 | 355 | * or NULL if the clone function should heap-allocate a new UText struct. |
michael@0 | 356 | * If non-NULL, must refer to an already existing UText, which will then |
michael@0 | 357 | * be reset to become the clone. |
michael@0 | 358 | * @param src The UText to be cloned. |
michael@0 | 359 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
michael@0 | 360 | * @param readOnly TRUE to request that the cloned UText have read only access to the |
michael@0 | 361 | * underlying text. |
michael@0 | 362 | |
michael@0 | 363 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
michael@0 | 364 | * will be returned if the text provider is unable to clone the |
michael@0 | 365 | * original text. |
michael@0 | 366 | * @return The newly created clone, or NULL if the clone operation failed. |
michael@0 | 367 | * @stable ICU 3.4 |
michael@0 | 368 | */ |
michael@0 | 369 | U_STABLE UText * U_EXPORT2 |
michael@0 | 370 | utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); |
michael@0 | 371 | |
michael@0 | 372 | |
michael@0 | 373 | /** |
michael@0 | 374 | * Compare two UText objects for equality. |
michael@0 | 375 | * UTexts are equal if they are iterating over the same text, and |
michael@0 | 376 | * have the same iteration position within the text. |
michael@0 | 377 | * If either or both of the parameters are NULL, the comparison is FALSE. |
michael@0 | 378 | * |
michael@0 | 379 | * @param a The first of the two UTexts to compare. |
michael@0 | 380 | * @param b The other UText to be compared. |
michael@0 | 381 | * @return TRUE if the two UTexts are equal. |
michael@0 | 382 | * @stable ICU 3.6 |
michael@0 | 383 | */ |
michael@0 | 384 | U_STABLE UBool U_EXPORT2 |
michael@0 | 385 | utext_equals(const UText *a, const UText *b); |
michael@0 | 386 | |
michael@0 | 387 | |
michael@0 | 388 | /***************************************************************************** |
michael@0 | 389 | * |
michael@0 | 390 | * Functions to work with the text represeted by a UText wrapper |
michael@0 | 391 | * |
michael@0 | 392 | *****************************************************************************/ |
michael@0 | 393 | |
michael@0 | 394 | /** |
michael@0 | 395 | * Get the length of the text. Depending on the characteristics |
michael@0 | 396 | * of the underlying text representation, this may be expensive. |
michael@0 | 397 | * @see utext_isLengthExpensive() |
michael@0 | 398 | * |
michael@0 | 399 | * |
michael@0 | 400 | * @param ut the text to be accessed. |
michael@0 | 401 | * @return the length of the text, expressed in native units. |
michael@0 | 402 | * |
michael@0 | 403 | * @stable ICU 3.4 |
michael@0 | 404 | */ |
michael@0 | 405 | U_STABLE int64_t U_EXPORT2 |
michael@0 | 406 | utext_nativeLength(UText *ut); |
michael@0 | 407 | |
michael@0 | 408 | /** |
michael@0 | 409 | * Return TRUE if calculating the length of the text could be expensive. |
michael@0 | 410 | * Finding the length of NUL terminated strings is considered to be expensive. |
michael@0 | 411 | * |
michael@0 | 412 | * Note that the value of this function may change |
michael@0 | 413 | * as the result of other operations on a UText. |
michael@0 | 414 | * Once the length of a string has been discovered, it will no longer |
michael@0 | 415 | * be expensive to report it. |
michael@0 | 416 | * |
michael@0 | 417 | * @param ut the text to be accessed. |
michael@0 | 418 | * @return TRUE if determining the length of the text could be time consuming. |
michael@0 | 419 | * @stable ICU 3.4 |
michael@0 | 420 | */ |
michael@0 | 421 | U_STABLE UBool U_EXPORT2 |
michael@0 | 422 | utext_isLengthExpensive(const UText *ut); |
michael@0 | 423 | |
michael@0 | 424 | /** |
michael@0 | 425 | * Returns the code point at the requested index, |
michael@0 | 426 | * or U_SENTINEL (-1) if it is out of bounds. |
michael@0 | 427 | * |
michael@0 | 428 | * If the specified index points to the interior of a multi-unit |
michael@0 | 429 | * character - one of the trail bytes of a UTF-8 sequence, for example - |
michael@0 | 430 | * the complete code point will be returned. |
michael@0 | 431 | * |
michael@0 | 432 | * The iteration position will be set to the start of the returned code point. |
michael@0 | 433 | * |
michael@0 | 434 | * This function is roughly equivalent to the the sequence |
michael@0 | 435 | * utext_setNativeIndex(index); |
michael@0 | 436 | * utext_current32(); |
michael@0 | 437 | * (There is a subtle difference if the index is out of bounds by being less than zero - |
michael@0 | 438 | * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() |
michael@0 | 439 | * will return the char at zero. utext_char32At(negative index), on the other hand, will |
michael@0 | 440 | * return the U_SENTINEL value of -1.) |
michael@0 | 441 | * |
michael@0 | 442 | * @param ut the text to be accessed |
michael@0 | 443 | * @param nativeIndex the native index of the character to be accessed. If the index points |
michael@0 | 444 | * to other than the first unit of a multi-unit character, it will be adjusted |
michael@0 | 445 | * to the start of the character. |
michael@0 | 446 | * @return the code point at the specified index. |
michael@0 | 447 | * @stable ICU 3.4 |
michael@0 | 448 | */ |
michael@0 | 449 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 450 | utext_char32At(UText *ut, int64_t nativeIndex); |
michael@0 | 451 | |
michael@0 | 452 | |
michael@0 | 453 | /** |
michael@0 | 454 | * |
michael@0 | 455 | * Get the code point at the current iteration position, |
michael@0 | 456 | * or U_SENTINEL (-1) if the iteration has reached the end of |
michael@0 | 457 | * the input text. |
michael@0 | 458 | * |
michael@0 | 459 | * @param ut the text to be accessed. |
michael@0 | 460 | * @return the Unicode code point at the current iterator position. |
michael@0 | 461 | * @stable ICU 3.4 |
michael@0 | 462 | */ |
michael@0 | 463 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 464 | utext_current32(UText *ut); |
michael@0 | 465 | |
michael@0 | 466 | |
michael@0 | 467 | /** |
michael@0 | 468 | * Get the code point at the current iteration position of the UText, and |
michael@0 | 469 | * advance the position to the first index following the character. |
michael@0 | 470 | * |
michael@0 | 471 | * If the position is at the end of the text (the index following |
michael@0 | 472 | * the last character, which is also the length of the text), |
michael@0 | 473 | * return U_SENTINEL (-1) and do not advance the index. |
michael@0 | 474 | * |
michael@0 | 475 | * This is a post-increment operation. |
michael@0 | 476 | * |
michael@0 | 477 | * An inline macro version of this function, UTEXT_NEXT32(), |
michael@0 | 478 | * is available for performance critical use. |
michael@0 | 479 | * |
michael@0 | 480 | * @param ut the text to be accessed. |
michael@0 | 481 | * @return the Unicode code point at the iteration position. |
michael@0 | 482 | * @see UTEXT_NEXT32 |
michael@0 | 483 | * @stable ICU 3.4 |
michael@0 | 484 | */ |
michael@0 | 485 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 486 | utext_next32(UText *ut); |
michael@0 | 487 | |
michael@0 | 488 | |
michael@0 | 489 | /** |
michael@0 | 490 | * Move the iterator position to the character (code point) whose |
michael@0 | 491 | * index precedes the current position, and return that character. |
michael@0 | 492 | * This is a pre-decrement operation. |
michael@0 | 493 | * |
michael@0 | 494 | * If the initial position is at the start of the text (index of 0) |
michael@0 | 495 | * return U_SENTINEL (-1), and leave the position unchanged. |
michael@0 | 496 | * |
michael@0 | 497 | * An inline macro version of this function, UTEXT_PREVIOUS32(), |
michael@0 | 498 | * is available for performance critical use. |
michael@0 | 499 | * |
michael@0 | 500 | * @param ut the text to be accessed. |
michael@0 | 501 | * @return the previous UChar32 code point, or U_SENTINEL (-1) |
michael@0 | 502 | * if the iteration has reached the start of the text. |
michael@0 | 503 | * @see UTEXT_PREVIOUS32 |
michael@0 | 504 | * @stable ICU 3.4 |
michael@0 | 505 | */ |
michael@0 | 506 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 507 | utext_previous32(UText *ut); |
michael@0 | 508 | |
michael@0 | 509 | |
michael@0 | 510 | /** |
michael@0 | 511 | * Set the iteration index and return the code point at that index. |
michael@0 | 512 | * Leave the iteration index at the start of the following code point. |
michael@0 | 513 | * |
michael@0 | 514 | * This function is the most efficient and convenient way to |
michael@0 | 515 | * begin a forward iteration. The results are identical to the those |
michael@0 | 516 | * from the sequence |
michael@0 | 517 | * \code |
michael@0 | 518 | * utext_setIndex(); |
michael@0 | 519 | * utext_next32(); |
michael@0 | 520 | * \endcode |
michael@0 | 521 | * |
michael@0 | 522 | * @param ut the text to be accessed. |
michael@0 | 523 | * @param nativeIndex Iteration index, in the native units of the text provider. |
michael@0 | 524 | * @return Code point which starts at or before index, |
michael@0 | 525 | * or U_SENTINEL (-1) if it is out of bounds. |
michael@0 | 526 | * @stable ICU 3.4 |
michael@0 | 527 | */ |
michael@0 | 528 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 529 | utext_next32From(UText *ut, int64_t nativeIndex); |
michael@0 | 530 | |
michael@0 | 531 | |
michael@0 | 532 | |
michael@0 | 533 | /** |
michael@0 | 534 | * Set the iteration index, and return the code point preceding the |
michael@0 | 535 | * one specified by the initial index. Leave the iteration position |
michael@0 | 536 | * at the start of the returned code point. |
michael@0 | 537 | * |
michael@0 | 538 | * This function is the most efficient and convenient way to |
michael@0 | 539 | * begin a backwards iteration. |
michael@0 | 540 | * |
michael@0 | 541 | * @param ut the text to be accessed. |
michael@0 | 542 | * @param nativeIndex Iteration index in the native units of the text provider. |
michael@0 | 543 | * @return Code point preceding the one at the initial index, |
michael@0 | 544 | * or U_SENTINEL (-1) if it is out of bounds. |
michael@0 | 545 | * |
michael@0 | 546 | * @stable ICU 3.4 |
michael@0 | 547 | */ |
michael@0 | 548 | U_STABLE UChar32 U_EXPORT2 |
michael@0 | 549 | utext_previous32From(UText *ut, int64_t nativeIndex); |
michael@0 | 550 | |
michael@0 | 551 | /** |
michael@0 | 552 | * Get the current iterator position, which can range from 0 to |
michael@0 | 553 | * the length of the text. |
michael@0 | 554 | * The position is a native index into the input text, in whatever format it |
michael@0 | 555 | * may have (possibly UTF-8 for example), and may not always be the same as |
michael@0 | 556 | * the corresponding UChar (UTF-16) index. |
michael@0 | 557 | * The returned position will always be aligned to a code point boundary. |
michael@0 | 558 | * |
michael@0 | 559 | * @param ut the text to be accessed. |
michael@0 | 560 | * @return the current index position, in the native units of the text provider. |
michael@0 | 561 | * @stable ICU 3.4 |
michael@0 | 562 | */ |
michael@0 | 563 | U_STABLE int64_t U_EXPORT2 |
michael@0 | 564 | utext_getNativeIndex(const UText *ut); |
michael@0 | 565 | |
michael@0 | 566 | /** |
michael@0 | 567 | * Set the current iteration position to the nearest code point |
michael@0 | 568 | * boundary at or preceding the specified index. |
michael@0 | 569 | * The index is in the native units of the original input text. |
michael@0 | 570 | * If the index is out of range, it will be pinned to be within |
michael@0 | 571 | * the range of the input text. |
michael@0 | 572 | * <p> |
michael@0 | 573 | * It will usually be more efficient to begin an iteration |
michael@0 | 574 | * using the functions utext_next32From() or utext_previous32From() |
michael@0 | 575 | * rather than setIndex(). |
michael@0 | 576 | * <p> |
michael@0 | 577 | * Moving the index position to an adjacent character is best done |
michael@0 | 578 | * with utext_next32(), utext_previous32() or utext_moveIndex32(). |
michael@0 | 579 | * Attempting to do direct arithmetic on the index position is |
michael@0 | 580 | * complicated by the fact that the size (in native units) of a |
michael@0 | 581 | * character depends on the underlying representation of the character |
michael@0 | 582 | * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not |
michael@0 | 583 | * easily knowable. |
michael@0 | 584 | * |
michael@0 | 585 | * @param ut the text to be accessed. |
michael@0 | 586 | * @param nativeIndex the native unit index of the new iteration position. |
michael@0 | 587 | * @stable ICU 3.4 |
michael@0 | 588 | */ |
michael@0 | 589 | U_STABLE void U_EXPORT2 |
michael@0 | 590 | utext_setNativeIndex(UText *ut, int64_t nativeIndex); |
michael@0 | 591 | |
michael@0 | 592 | /** |
michael@0 | 593 | * Move the iterator postion by delta code points. The number of code points |
michael@0 | 594 | * is a signed number; a negative delta will move the iterator backwards, |
michael@0 | 595 | * towards the start of the text. |
michael@0 | 596 | * <p> |
michael@0 | 597 | * The index is moved by <code>delta</code> code points |
michael@0 | 598 | * forward or backward, but no further backward than to 0 and |
michael@0 | 599 | * no further forward than to utext_nativeLength(). |
michael@0 | 600 | * The resulting index value will be in between 0 and length, inclusive. |
michael@0 | 601 | * |
michael@0 | 602 | * @param ut the text to be accessed. |
michael@0 | 603 | * @param delta the signed number of code points to move the iteration position. |
michael@0 | 604 | * @return TRUE if the position could be moved the requested number of positions while |
michael@0 | 605 | * staying within the range [0 - text length]. |
michael@0 | 606 | * @stable ICU 3.4 |
michael@0 | 607 | */ |
michael@0 | 608 | U_STABLE UBool U_EXPORT2 |
michael@0 | 609 | utext_moveIndex32(UText *ut, int32_t delta); |
michael@0 | 610 | |
michael@0 | 611 | /** |
michael@0 | 612 | * Get the native index of the character preceeding the current position. |
michael@0 | 613 | * If the iteration position is already at the start of the text, zero |
michael@0 | 614 | * is returned. |
michael@0 | 615 | * The value returned is the same as that obtained from the following sequence, |
michael@0 | 616 | * but without the side effect of changing the iteration position. |
michael@0 | 617 | * |
michael@0 | 618 | * \code |
michael@0 | 619 | * UText *ut = whatever; |
michael@0 | 620 | * ... |
michael@0 | 621 | * utext_previous(ut) |
michael@0 | 622 | * utext_getNativeIndex(ut); |
michael@0 | 623 | * \endcode |
michael@0 | 624 | * |
michael@0 | 625 | * This function is most useful during forwards iteration, where it will get the |
michael@0 | 626 | * native index of the character most recently returned from utext_next(). |
michael@0 | 627 | * |
michael@0 | 628 | * @param ut the text to be accessed |
michael@0 | 629 | * @return the native index of the character preceeding the current index position, |
michael@0 | 630 | * or zero if the current position is at the start of the text. |
michael@0 | 631 | * @stable ICU 3.6 |
michael@0 | 632 | */ |
michael@0 | 633 | U_STABLE int64_t U_EXPORT2 |
michael@0 | 634 | utext_getPreviousNativeIndex(UText *ut); |
michael@0 | 635 | |
michael@0 | 636 | |
michael@0 | 637 | /** |
michael@0 | 638 | * |
michael@0 | 639 | * Extract text from a UText into a UChar buffer. The range of text to be extracted |
michael@0 | 640 | * is specified in the native indices of the UText provider. These may not necessarily |
michael@0 | 641 | * be UTF-16 indices. |
michael@0 | 642 | * <p> |
michael@0 | 643 | * The size (number of 16 bit UChars) of the data to be extracted is returned. The |
michael@0 | 644 | * full number of UChars is returned, even when the extracted text is truncated |
michael@0 | 645 | * because the specified buffer size is too small. |
michael@0 | 646 | * <p> |
michael@0 | 647 | * The extracted string will (if you are a user) / must (if you are a text provider) |
michael@0 | 648 | * be NUL-terminated if there is sufficient space in the destination buffer. This |
michael@0 | 649 | * terminating NUL is not included in the returned length. |
michael@0 | 650 | * <p> |
michael@0 | 651 | * The iteration index is left at the position following the last extracted character. |
michael@0 | 652 | * |
michael@0 | 653 | * @param ut the UText from which to extract data. |
michael@0 | 654 | * @param nativeStart the native index of the first character to extract.\ |
michael@0 | 655 | * If the specified index is out of range, |
michael@0 | 656 | * it will be pinned to to be within 0 <= index <= textLength |
michael@0 | 657 | * @param nativeLimit the native string index of the position following the last |
michael@0 | 658 | * character to extract. If the specified index is out of range, |
michael@0 | 659 | * it will be pinned to to be within 0 <= index <= textLength. |
michael@0 | 660 | * nativeLimit must be >= nativeStart. |
michael@0 | 661 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
michael@0 | 662 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
michael@0 | 663 | * for precomputing the required size. |
michael@0 | 664 | * @param status receives any error status. |
michael@0 | 665 | * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the |
michael@0 | 666 | * buffer was too small. Returns number of UChars for preflighting. |
michael@0 | 667 | * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. |
michael@0 | 668 | * |
michael@0 | 669 | * @stable ICU 3.4 |
michael@0 | 670 | */ |
michael@0 | 671 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 672 | utext_extract(UText *ut, |
michael@0 | 673 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 674 | UChar *dest, int32_t destCapacity, |
michael@0 | 675 | UErrorCode *status); |
michael@0 | 676 | |
michael@0 | 677 | |
michael@0 | 678 | |
michael@0 | 679 | /************************************************************************************ |
michael@0 | 680 | * |
michael@0 | 681 | * #define inline versions of selected performance-critical text access functions |
michael@0 | 682 | * Caution: do not use auto increment++ or decrement-- expressions |
michael@0 | 683 | * as parameters to these macros. |
michael@0 | 684 | * |
michael@0 | 685 | * For most use, where there is no extreme performance constraint, the |
michael@0 | 686 | * normal, non-inline functions are a better choice. The resulting code |
michael@0 | 687 | * will be smaller, and, if the need ever arises, easier to debug. |
michael@0 | 688 | * |
michael@0 | 689 | * These are implemented as #defines rather than real functions |
michael@0 | 690 | * because there is no fully portable way to do inline functions in plain C. |
michael@0 | 691 | * |
michael@0 | 692 | ************************************************************************************/ |
michael@0 | 693 | |
michael@0 | 694 | #ifndef U_HIDE_INTERNAL_API |
michael@0 | 695 | /** |
michael@0 | 696 | * inline version of utext_current32(), for performance-critical situations. |
michael@0 | 697 | * |
michael@0 | 698 | * Get the code point at the current iteration position of the UText. |
michael@0 | 699 | * Returns U_SENTINEL (-1) if the position is at the end of the |
michael@0 | 700 | * text. |
michael@0 | 701 | * |
michael@0 | 702 | * @internal ICU 4.4 technology preview |
michael@0 | 703 | */ |
michael@0 | 704 | #define UTEXT_CURRENT32(ut) \ |
michael@0 | 705 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
michael@0 | 706 | ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) |
michael@0 | 707 | #endif /* U_HIDE_INTERNAL_API */ |
michael@0 | 708 | |
michael@0 | 709 | /** |
michael@0 | 710 | * inline version of utext_next32(), for performance-critical situations. |
michael@0 | 711 | * |
michael@0 | 712 | * Get the code point at the current iteration position of the UText, and |
michael@0 | 713 | * advance the position to the first index following the character. |
michael@0 | 714 | * This is a post-increment operation. |
michael@0 | 715 | * Returns U_SENTINEL (-1) if the position is at the end of the |
michael@0 | 716 | * text. |
michael@0 | 717 | * |
michael@0 | 718 | * @stable ICU 3.4 |
michael@0 | 719 | */ |
michael@0 | 720 | #define UTEXT_NEXT32(ut) \ |
michael@0 | 721 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
michael@0 | 722 | ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) |
michael@0 | 723 | |
michael@0 | 724 | /** |
michael@0 | 725 | * inline version of utext_previous32(), for performance-critical situations. |
michael@0 | 726 | * |
michael@0 | 727 | * Move the iterator position to the character (code point) whose |
michael@0 | 728 | * index precedes the current position, and return that character. |
michael@0 | 729 | * This is a pre-decrement operation. |
michael@0 | 730 | * Returns U_SENTINEL (-1) if the position is at the start of the text. |
michael@0 | 731 | * |
michael@0 | 732 | * @stable ICU 3.4 |
michael@0 | 733 | */ |
michael@0 | 734 | #define UTEXT_PREVIOUS32(ut) \ |
michael@0 | 735 | ((ut)->chunkOffset > 0 && \ |
michael@0 | 736 | (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ |
michael@0 | 737 | (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) |
michael@0 | 738 | |
michael@0 | 739 | /** |
michael@0 | 740 | * inline version of utext_getNativeIndex(), for performance-critical situations. |
michael@0 | 741 | * |
michael@0 | 742 | * Get the current iterator position, which can range from 0 to |
michael@0 | 743 | * the length of the text. |
michael@0 | 744 | * The position is a native index into the input text, in whatever format it |
michael@0 | 745 | * may have (possibly UTF-8 for example), and may not always be the same as |
michael@0 | 746 | * the corresponding UChar (UTF-16) index. |
michael@0 | 747 | * The returned position will always be aligned to a code point boundary. |
michael@0 | 748 | * |
michael@0 | 749 | * @stable ICU 3.6 |
michael@0 | 750 | */ |
michael@0 | 751 | #define UTEXT_GETNATIVEINDEX(ut) \ |
michael@0 | 752 | ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ |
michael@0 | 753 | (ut)->chunkNativeStart+(ut)->chunkOffset : \ |
michael@0 | 754 | (ut)->pFuncs->mapOffsetToNative(ut)) |
michael@0 | 755 | |
michael@0 | 756 | /** |
michael@0 | 757 | * inline version of utext_setNativeIndex(), for performance-critical situations. |
michael@0 | 758 | * |
michael@0 | 759 | * Set the current iteration position to the nearest code point |
michael@0 | 760 | * boundary at or preceding the specified index. |
michael@0 | 761 | * The index is in the native units of the original input text. |
michael@0 | 762 | * If the index is out of range, it will be pinned to be within |
michael@0 | 763 | * the range of the input text. |
michael@0 | 764 | * |
michael@0 | 765 | * @stable ICU 3.8 |
michael@0 | 766 | */ |
michael@0 | 767 | #define UTEXT_SETNATIVEINDEX(ut, ix) \ |
michael@0 | 768 | { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ |
michael@0 | 769 | if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ |
michael@0 | 770 | (ut)->chunkOffset=(int32_t)__offset; \ |
michael@0 | 771 | } else { \ |
michael@0 | 772 | utext_setNativeIndex((ut), (ix)); } } |
michael@0 | 773 | |
michael@0 | 774 | |
michael@0 | 775 | |
michael@0 | 776 | /************************************************************************************ |
michael@0 | 777 | * |
michael@0 | 778 | * Functions related to writing or modifying the text. |
michael@0 | 779 | * These will work only with modifiable UTexts. Attempting to |
michael@0 | 780 | * modify a read-only UText will return an error status. |
michael@0 | 781 | * |
michael@0 | 782 | ************************************************************************************/ |
michael@0 | 783 | |
michael@0 | 784 | |
michael@0 | 785 | /** |
michael@0 | 786 | * Return TRUE if the text can be written (modified) with utext_replace() or |
michael@0 | 787 | * utext_copy(). For the text to be writable, the text provider must |
michael@0 | 788 | * be of a type that supports writing and the UText must not be frozen. |
michael@0 | 789 | * |
michael@0 | 790 | * Attempting to modify text when utext_isWriteable() is FALSE will fail - |
michael@0 | 791 | * the text will not be modified, and an error will be returned from the function |
michael@0 | 792 | * that attempted the modification. |
michael@0 | 793 | * |
michael@0 | 794 | * @param ut the UText to be tested. |
michael@0 | 795 | * @return TRUE if the text is modifiable. |
michael@0 | 796 | * |
michael@0 | 797 | * @see utext_freeze() |
michael@0 | 798 | * @see utext_replace() |
michael@0 | 799 | * @see utext_copy() |
michael@0 | 800 | * @stable ICU 3.4 |
michael@0 | 801 | * |
michael@0 | 802 | */ |
michael@0 | 803 | U_STABLE UBool U_EXPORT2 |
michael@0 | 804 | utext_isWritable(const UText *ut); |
michael@0 | 805 | |
michael@0 | 806 | |
michael@0 | 807 | /** |
michael@0 | 808 | * Test whether there is meta data associated with the text. |
michael@0 | 809 | * @see Replaceable::hasMetaData() |
michael@0 | 810 | * |
michael@0 | 811 | * @param ut The UText to be tested |
michael@0 | 812 | * @return TRUE if the underlying text includes meta data. |
michael@0 | 813 | * @stable ICU 3.4 |
michael@0 | 814 | */ |
michael@0 | 815 | U_STABLE UBool U_EXPORT2 |
michael@0 | 816 | utext_hasMetaData(const UText *ut); |
michael@0 | 817 | |
michael@0 | 818 | |
michael@0 | 819 | /** |
michael@0 | 820 | * Replace a range of the original text with a replacement text. |
michael@0 | 821 | * |
michael@0 | 822 | * Leaves the current iteration position at the position following the |
michael@0 | 823 | * newly inserted replacement text. |
michael@0 | 824 | * |
michael@0 | 825 | * This function is only available on UText types that support writing, |
michael@0 | 826 | * that is, ones where utext_isWritable() returns TRUE. |
michael@0 | 827 | * |
michael@0 | 828 | * When using this function, there should be only a single UText opened onto the |
michael@0 | 829 | * underlying native text string. Behavior after a replace operation |
michael@0 | 830 | * on a UText is undefined for any other additional UTexts that refer to the |
michael@0 | 831 | * modified string. |
michael@0 | 832 | * |
michael@0 | 833 | * @param ut the UText representing the text to be operated on. |
michael@0 | 834 | * @param nativeStart the native index of the start of the region to be replaced |
michael@0 | 835 | * @param nativeLimit the native index of the character following the region to be replaced. |
michael@0 | 836 | * @param replacementText pointer to the replacement text |
michael@0 | 837 | * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. |
michael@0 | 838 | * @param status receives any error status. Possible errors include |
michael@0 | 839 | * U_NO_WRITE_PERMISSION |
michael@0 | 840 | * |
michael@0 | 841 | * @return The signed number of (native) storage units by which |
michael@0 | 842 | * the length of the text expanded or contracted. |
michael@0 | 843 | * |
michael@0 | 844 | * @stable ICU 3.4 |
michael@0 | 845 | */ |
michael@0 | 846 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 847 | utext_replace(UText *ut, |
michael@0 | 848 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 849 | const UChar *replacementText, int32_t replacementLength, |
michael@0 | 850 | UErrorCode *status); |
michael@0 | 851 | |
michael@0 | 852 | |
michael@0 | 853 | |
michael@0 | 854 | /** |
michael@0 | 855 | * |
michael@0 | 856 | * Copy or move a substring from one position to another within the text, |
michael@0 | 857 | * while retaining any metadata associated with the text. |
michael@0 | 858 | * This function is used to duplicate or reorder substrings. |
michael@0 | 859 | * The destination index must not overlap the source range. |
michael@0 | 860 | * |
michael@0 | 861 | * The text to be copied or moved is inserted at destIndex; |
michael@0 | 862 | * it does not replace or overwrite any existing text. |
michael@0 | 863 | * |
michael@0 | 864 | * The iteration position is left following the newly inserted text |
michael@0 | 865 | * at the destination position. |
michael@0 | 866 | * |
michael@0 | 867 | * This function is only available on UText types that support writing, |
michael@0 | 868 | * that is, ones where utext_isWritable() returns TRUE. |
michael@0 | 869 | * |
michael@0 | 870 | * When using this function, there should be only a single UText opened onto the |
michael@0 | 871 | * underlying native text string. Behavior after a copy operation |
michael@0 | 872 | * on a UText is undefined in any other additional UTexts that refer to the |
michael@0 | 873 | * modified string. |
michael@0 | 874 | * |
michael@0 | 875 | * @param ut The UText representing the text to be operated on. |
michael@0 | 876 | * @param nativeStart The native index of the start of the region to be copied or moved |
michael@0 | 877 | * @param nativeLimit The native index of the character position following the region |
michael@0 | 878 | * to be copied. |
michael@0 | 879 | * @param destIndex The native destination index to which the source substring is |
michael@0 | 880 | * copied or moved. |
michael@0 | 881 | * @param move If TRUE, then the substring is moved, not copied/duplicated. |
michael@0 | 882 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
michael@0 | 883 | * |
michael@0 | 884 | * @stable ICU 3.4 |
michael@0 | 885 | */ |
michael@0 | 886 | U_STABLE void U_EXPORT2 |
michael@0 | 887 | utext_copy(UText *ut, |
michael@0 | 888 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 889 | int64_t destIndex, |
michael@0 | 890 | UBool move, |
michael@0 | 891 | UErrorCode *status); |
michael@0 | 892 | |
michael@0 | 893 | |
michael@0 | 894 | /** |
michael@0 | 895 | * <p> |
michael@0 | 896 | * Freeze a UText. This prevents any modification to the underlying text itself |
michael@0 | 897 | * by means of functions operating on this UText. |
michael@0 | 898 | * </p> |
michael@0 | 899 | * <p> |
michael@0 | 900 | * Once frozen, a UText can not be unfrozen. The intent is to ensure |
michael@0 | 901 | * that a the text underlying a frozen UText wrapper cannot be modified via that UText. |
michael@0 | 902 | * </p> |
michael@0 | 903 | * <p> |
michael@0 | 904 | * Caution: freezing a UText will disable changes made via the specific |
michael@0 | 905 | * frozen UText wrapper only; it will not have any effect on the ability to |
michael@0 | 906 | * directly modify the text by bypassing the UText. Any such backdoor modifications |
michael@0 | 907 | * are always an error while UText access is occuring because the underlying |
michael@0 | 908 | * text can get out of sync with UText's buffering. |
michael@0 | 909 | * </p> |
michael@0 | 910 | * |
michael@0 | 911 | * @param ut The UText to be frozen. |
michael@0 | 912 | * @see utext_isWritable() |
michael@0 | 913 | * @stable ICU 3.6 |
michael@0 | 914 | */ |
michael@0 | 915 | U_STABLE void U_EXPORT2 |
michael@0 | 916 | utext_freeze(UText *ut); |
michael@0 | 917 | |
michael@0 | 918 | |
michael@0 | 919 | /** |
michael@0 | 920 | * UText provider properties (bit field indexes). |
michael@0 | 921 | * |
michael@0 | 922 | * @see UText |
michael@0 | 923 | * @stable ICU 3.4 |
michael@0 | 924 | */ |
michael@0 | 925 | enum { |
michael@0 | 926 | /** |
michael@0 | 927 | * It is potentially time consuming for the provider to determine the length of the text. |
michael@0 | 928 | * @stable ICU 3.4 |
michael@0 | 929 | */ |
michael@0 | 930 | UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, |
michael@0 | 931 | /** |
michael@0 | 932 | * Text chunks remain valid and usable until the text object is modified or |
michael@0 | 933 | * deleted, not just until the next time the access() function is called |
michael@0 | 934 | * (which is the default). |
michael@0 | 935 | * @stable ICU 3.4 |
michael@0 | 936 | */ |
michael@0 | 937 | UTEXT_PROVIDER_STABLE_CHUNKS = 2, |
michael@0 | 938 | /** |
michael@0 | 939 | * The provider supports modifying the text via the replace() and copy() |
michael@0 | 940 | * functions. |
michael@0 | 941 | * @see Replaceable |
michael@0 | 942 | * @stable ICU 3.4 |
michael@0 | 943 | */ |
michael@0 | 944 | UTEXT_PROVIDER_WRITABLE = 3, |
michael@0 | 945 | /** |
michael@0 | 946 | * There is meta data associated with the text. |
michael@0 | 947 | * @see Replaceable::hasMetaData() |
michael@0 | 948 | * @stable ICU 3.4 |
michael@0 | 949 | */ |
michael@0 | 950 | UTEXT_PROVIDER_HAS_META_DATA = 4, |
michael@0 | 951 | /** |
michael@0 | 952 | * Text provider owns the text storage. |
michael@0 | 953 | * Generally occurs as the result of a deep clone of the UText. |
michael@0 | 954 | * When closing the UText, the associated text must |
michael@0 | 955 | * also be closed/deleted/freed/ whatever is appropriate. |
michael@0 | 956 | * @stable ICU 3.6 |
michael@0 | 957 | */ |
michael@0 | 958 | UTEXT_PROVIDER_OWNS_TEXT = 5 |
michael@0 | 959 | }; |
michael@0 | 960 | |
michael@0 | 961 | /** |
michael@0 | 962 | * Function type declaration for UText.clone(). |
michael@0 | 963 | * |
michael@0 | 964 | * clone a UText. Much like opening a UText where the source text is itself |
michael@0 | 965 | * another UText. |
michael@0 | 966 | * |
michael@0 | 967 | * A deep clone will copy both the UText data structures and the underlying text. |
michael@0 | 968 | * The original and cloned UText will operate completely independently; modifications |
michael@0 | 969 | * made to the text in one will not effect the other. Text providers are not |
michael@0 | 970 | * required to support deep clones. The user of clone() must check the status return |
michael@0 | 971 | * and be prepared to handle failures. |
michael@0 | 972 | * |
michael@0 | 973 | * A shallow clone replicates only the UText data structures; it does not make |
michael@0 | 974 | * a copy of the underlying text. Shallow clones can be used as an efficient way to |
michael@0 | 975 | * have multiple iterators active in a single text string that is not being |
michael@0 | 976 | * modified. |
michael@0 | 977 | * |
michael@0 | 978 | * A shallow clone operation must not fail except for truly exceptional conditions such |
michael@0 | 979 | * as memory allocation failures. |
michael@0 | 980 | * |
michael@0 | 981 | * A UText and its clone may be safely concurrently accessed by separate threads. |
michael@0 | 982 | * This is true for both shallow and deep clones. |
michael@0 | 983 | * It is the responsibility of the Text Provider to ensure that this thread safety |
michael@0 | 984 | * constraint is met. |
michael@0 | 985 | |
michael@0 | 986 | * |
michael@0 | 987 | * @param dest A UText struct to be filled in with the result of the clone operation, |
michael@0 | 988 | * or NULL if the clone function should heap-allocate a new UText struct. |
michael@0 | 989 | * @param src The UText to be cloned. |
michael@0 | 990 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
michael@0 | 991 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
michael@0 | 992 | * should be returned if the text provider is unable to clone the |
michael@0 | 993 | * original text. |
michael@0 | 994 | * @return The newly created clone, or NULL if the clone operation failed. |
michael@0 | 995 | * |
michael@0 | 996 | * @stable ICU 3.4 |
michael@0 | 997 | */ |
michael@0 | 998 | typedef UText * U_CALLCONV |
michael@0 | 999 | UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); |
michael@0 | 1000 | |
michael@0 | 1001 | |
michael@0 | 1002 | /** |
michael@0 | 1003 | * Function type declaration for UText.nativeLength(). |
michael@0 | 1004 | * |
michael@0 | 1005 | * @param ut the UText to get the length of. |
michael@0 | 1006 | * @return the length, in the native units of the original text string. |
michael@0 | 1007 | * @see UText |
michael@0 | 1008 | * @stable ICU 3.4 |
michael@0 | 1009 | */ |
michael@0 | 1010 | typedef int64_t U_CALLCONV |
michael@0 | 1011 | UTextNativeLength(UText *ut); |
michael@0 | 1012 | |
michael@0 | 1013 | /** |
michael@0 | 1014 | * Function type declaration for UText.access(). Get the description of the text chunk |
michael@0 | 1015 | * containing the text at a requested native index. The UText's iteration |
michael@0 | 1016 | * position will be left at the requested index. If the index is out |
michael@0 | 1017 | * of bounds, the iteration position will be left at the start or end |
michael@0 | 1018 | * of the string, as appropriate. |
michael@0 | 1019 | * |
michael@0 | 1020 | * Chunks must begin and end on code point boundaries. A single code point |
michael@0 | 1021 | * comprised of multiple storage units must never span a chunk boundary. |
michael@0 | 1022 | * |
michael@0 | 1023 | * |
michael@0 | 1024 | * @param ut the UText being accessed. |
michael@0 | 1025 | * @param nativeIndex Requested index of the text to be accessed. |
michael@0 | 1026 | * @param forward If TRUE, then the returned chunk must contain text |
michael@0 | 1027 | * starting from the index, so that start<=index<limit. |
michael@0 | 1028 | * If FALSE, then the returned chunk must contain text |
michael@0 | 1029 | * before the index, so that start<index<=limit. |
michael@0 | 1030 | * @return True if the requested index could be accessed. The chunk |
michael@0 | 1031 | * will contain the requested text. |
michael@0 | 1032 | * False value if a chunk cannot be accessed |
michael@0 | 1033 | * (the requested index is out of bounds). |
michael@0 | 1034 | * |
michael@0 | 1035 | * @see UText |
michael@0 | 1036 | * @stable ICU 3.4 |
michael@0 | 1037 | */ |
michael@0 | 1038 | typedef UBool U_CALLCONV |
michael@0 | 1039 | UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); |
michael@0 | 1040 | |
michael@0 | 1041 | /** |
michael@0 | 1042 | * Function type declaration for UText.extract(). |
michael@0 | 1043 | * |
michael@0 | 1044 | * Extract text from a UText into a UChar buffer. The range of text to be extracted |
michael@0 | 1045 | * is specified in the native indices of the UText provider. These may not necessarily |
michael@0 | 1046 | * be UTF-16 indices. |
michael@0 | 1047 | * <p> |
michael@0 | 1048 | * The size (number of 16 bit UChars) in the data to be extracted is returned. The |
michael@0 | 1049 | * full amount is returned, even when the specified buffer size is smaller. |
michael@0 | 1050 | * <p> |
michael@0 | 1051 | * The extracted string will (if you are a user) / must (if you are a text provider) |
michael@0 | 1052 | * be NUL-terminated if there is sufficient space in the destination buffer. |
michael@0 | 1053 | * |
michael@0 | 1054 | * @param ut the UText from which to extract data. |
michael@0 | 1055 | * @param nativeStart the native index of the first characer to extract. |
michael@0 | 1056 | * @param nativeLimit the native string index of the position following the last |
michael@0 | 1057 | * character to extract. |
michael@0 | 1058 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
michael@0 | 1059 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
michael@0 | 1060 | * for precomputing the required size. |
michael@0 | 1061 | * @param status receives any error status. |
michael@0 | 1062 | * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for |
michael@0 | 1063 | * preflighting. |
michael@0 | 1064 | * @return Number of UChars in the data. Does not include a trailing NUL. |
michael@0 | 1065 | * |
michael@0 | 1066 | * @stable ICU 3.4 |
michael@0 | 1067 | */ |
michael@0 | 1068 | typedef int32_t U_CALLCONV |
michael@0 | 1069 | UTextExtract(UText *ut, |
michael@0 | 1070 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 1071 | UChar *dest, int32_t destCapacity, |
michael@0 | 1072 | UErrorCode *status); |
michael@0 | 1073 | |
michael@0 | 1074 | /** |
michael@0 | 1075 | * Function type declaration for UText.replace(). |
michael@0 | 1076 | * |
michael@0 | 1077 | * Replace a range of the original text with a replacement text. |
michael@0 | 1078 | * |
michael@0 | 1079 | * Leaves the current iteration position at the position following the |
michael@0 | 1080 | * newly inserted replacement text. |
michael@0 | 1081 | * |
michael@0 | 1082 | * This function need only be implemented on UText types that support writing. |
michael@0 | 1083 | * |
michael@0 | 1084 | * When using this function, there should be only a single UText opened onto the |
michael@0 | 1085 | * underlying native text string. The function is responsible for updating the |
michael@0 | 1086 | * text chunk within the UText to reflect the updated iteration position, |
michael@0 | 1087 | * taking into account any changes to the underlying string's structure caused |
michael@0 | 1088 | * by the replace operation. |
michael@0 | 1089 | * |
michael@0 | 1090 | * @param ut the UText representing the text to be operated on. |
michael@0 | 1091 | * @param nativeStart the index of the start of the region to be replaced |
michael@0 | 1092 | * @param nativeLimit the index of the character following the region to be replaced. |
michael@0 | 1093 | * @param replacementText pointer to the replacement text |
michael@0 | 1094 | * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. |
michael@0 | 1095 | * @param status receives any error status. Possible errors include |
michael@0 | 1096 | * U_NO_WRITE_PERMISSION |
michael@0 | 1097 | * |
michael@0 | 1098 | * @return The signed number of (native) storage units by which |
michael@0 | 1099 | * the length of the text expanded or contracted. |
michael@0 | 1100 | * |
michael@0 | 1101 | * @stable ICU 3.4 |
michael@0 | 1102 | */ |
michael@0 | 1103 | typedef int32_t U_CALLCONV |
michael@0 | 1104 | UTextReplace(UText *ut, |
michael@0 | 1105 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 1106 | const UChar *replacementText, int32_t replacmentLength, |
michael@0 | 1107 | UErrorCode *status); |
michael@0 | 1108 | |
michael@0 | 1109 | /** |
michael@0 | 1110 | * Function type declaration for UText.copy(). |
michael@0 | 1111 | * |
michael@0 | 1112 | * Copy or move a substring from one position to another within the text, |
michael@0 | 1113 | * while retaining any metadata associated with the text. |
michael@0 | 1114 | * This function is used to duplicate or reorder substrings. |
michael@0 | 1115 | * The destination index must not overlap the source range. |
michael@0 | 1116 | * |
michael@0 | 1117 | * The text to be copied or moved is inserted at destIndex; |
michael@0 | 1118 | * it does not replace or overwrite any existing text. |
michael@0 | 1119 | * |
michael@0 | 1120 | * This function need only be implemented for UText types that support writing. |
michael@0 | 1121 | * |
michael@0 | 1122 | * When using this function, there should be only a single UText opened onto the |
michael@0 | 1123 | * underlying native text string. The function is responsible for updating the |
michael@0 | 1124 | * text chunk within the UText to reflect the updated iteration position, |
michael@0 | 1125 | * taking into account any changes to the underlying string's structure caused |
michael@0 | 1126 | * by the replace operation. |
michael@0 | 1127 | * |
michael@0 | 1128 | * @param ut The UText representing the text to be operated on. |
michael@0 | 1129 | * @param nativeStart The index of the start of the region to be copied or moved |
michael@0 | 1130 | * @param nativeLimit The index of the character following the region to be replaced. |
michael@0 | 1131 | * @param nativeDest The destination index to which the source substring is copied or moved. |
michael@0 | 1132 | * @param move If TRUE, then the substring is moved, not copied/duplicated. |
michael@0 | 1133 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
michael@0 | 1134 | * |
michael@0 | 1135 | * @stable ICU 3.4 |
michael@0 | 1136 | */ |
michael@0 | 1137 | typedef void U_CALLCONV |
michael@0 | 1138 | UTextCopy(UText *ut, |
michael@0 | 1139 | int64_t nativeStart, int64_t nativeLimit, |
michael@0 | 1140 | int64_t nativeDest, |
michael@0 | 1141 | UBool move, |
michael@0 | 1142 | UErrorCode *status); |
michael@0 | 1143 | |
michael@0 | 1144 | /** |
michael@0 | 1145 | * Function type declaration for UText.mapOffsetToNative(). |
michael@0 | 1146 | * Map from the current UChar offset within the current text chunk to |
michael@0 | 1147 | * the corresponding native index in the original source text. |
michael@0 | 1148 | * |
michael@0 | 1149 | * This is required only for text providers that do not use native UTF-16 indexes. |
michael@0 | 1150 | * |
michael@0 | 1151 | * @param ut the UText. |
michael@0 | 1152 | * @return Absolute (native) index corresponding to chunkOffset in the current chunk. |
michael@0 | 1153 | * The returned native index should always be to a code point boundary. |
michael@0 | 1154 | * |
michael@0 | 1155 | * @stable ICU 3.4 |
michael@0 | 1156 | */ |
michael@0 | 1157 | typedef int64_t U_CALLCONV |
michael@0 | 1158 | UTextMapOffsetToNative(const UText *ut); |
michael@0 | 1159 | |
michael@0 | 1160 | /** |
michael@0 | 1161 | * Function type declaration for UText.mapIndexToUTF16(). |
michael@0 | 1162 | * Map from a native index to a UChar offset within a text chunk. |
michael@0 | 1163 | * Behavior is undefined if the native index does not fall within the |
michael@0 | 1164 | * current chunk. |
michael@0 | 1165 | * |
michael@0 | 1166 | * This function is required only for text providers that do not use native UTF-16 indexes. |
michael@0 | 1167 | * |
michael@0 | 1168 | * @param ut The UText containing the text chunk. |
michael@0 | 1169 | * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. |
michael@0 | 1170 | * @return Chunk-relative UTF-16 offset corresponding to the specified native |
michael@0 | 1171 | * index. |
michael@0 | 1172 | * |
michael@0 | 1173 | * @stable ICU 3.4 |
michael@0 | 1174 | */ |
michael@0 | 1175 | typedef int32_t U_CALLCONV |
michael@0 | 1176 | UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); |
michael@0 | 1177 | |
michael@0 | 1178 | |
michael@0 | 1179 | /** |
michael@0 | 1180 | * Function type declaration for UText.utextClose(). |
michael@0 | 1181 | * |
michael@0 | 1182 | * A Text Provider close function is only required for provider types that make |
michael@0 | 1183 | * allocations in their open function (or other functions) that must be |
michael@0 | 1184 | * cleaned when the UText is closed. |
michael@0 | 1185 | * |
michael@0 | 1186 | * The allocation of the UText struct itself and any "extra" storage |
michael@0 | 1187 | * associated with the UText is handled by the common UText implementation |
michael@0 | 1188 | * and does not require provider specific cleanup in a close function. |
michael@0 | 1189 | * |
michael@0 | 1190 | * Most UText provider implementations do not need to implement this function. |
michael@0 | 1191 | * |
michael@0 | 1192 | * @param ut A UText object to be closed. |
michael@0 | 1193 | * |
michael@0 | 1194 | * @stable ICU 3.4 |
michael@0 | 1195 | */ |
michael@0 | 1196 | typedef void U_CALLCONV |
michael@0 | 1197 | UTextClose(UText *ut); |
michael@0 | 1198 | |
michael@0 | 1199 | |
michael@0 | 1200 | /** |
michael@0 | 1201 | * (public) Function dispatch table for UText. |
michael@0 | 1202 | * Conceptually very much like a C++ Virtual Function Table. |
michael@0 | 1203 | * This struct defines the organization of the table. |
michael@0 | 1204 | * Each text provider implementation must provide an |
michael@0 | 1205 | * actual table that is initialized with the appropriate functions |
michael@0 | 1206 | * for the type of text being handled. |
michael@0 | 1207 | * @stable ICU 3.6 |
michael@0 | 1208 | */ |
michael@0 | 1209 | struct UTextFuncs { |
michael@0 | 1210 | /** |
michael@0 | 1211 | * (public) Function table size, sizeof(UTextFuncs) |
michael@0 | 1212 | * Intended for use should the table grow to accomodate added |
michael@0 | 1213 | * functions in the future, to allow tests for older format |
michael@0 | 1214 | * function tables that do not contain the extensions. |
michael@0 | 1215 | * |
michael@0 | 1216 | * Fields are placed for optimal alignment on |
michael@0 | 1217 | * 32/64/128-bit-pointer machines, by normally grouping together |
michael@0 | 1218 | * 4 32-bit fields, |
michael@0 | 1219 | * 4 pointers, |
michael@0 | 1220 | * 2 64-bit fields |
michael@0 | 1221 | * in sequence. |
michael@0 | 1222 | * @stable ICU 3.6 |
michael@0 | 1223 | */ |
michael@0 | 1224 | int32_t tableSize; |
michael@0 | 1225 | |
michael@0 | 1226 | /** |
michael@0 | 1227 | * (private) Alignment padding. |
michael@0 | 1228 | * Do not use, reserved for use by the UText framework only. |
michael@0 | 1229 | * @internal |
michael@0 | 1230 | */ |
michael@0 | 1231 | int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; |
michael@0 | 1232 | |
michael@0 | 1233 | |
michael@0 | 1234 | /** |
michael@0 | 1235 | * (public) Function pointer for UTextClone |
michael@0 | 1236 | * |
michael@0 | 1237 | * @see UTextClone |
michael@0 | 1238 | * @stable ICU 3.6 |
michael@0 | 1239 | */ |
michael@0 | 1240 | UTextClone *clone; |
michael@0 | 1241 | |
michael@0 | 1242 | /** |
michael@0 | 1243 | * (public) function pointer for UTextLength |
michael@0 | 1244 | * May be expensive to compute! |
michael@0 | 1245 | * |
michael@0 | 1246 | * @see UTextLength |
michael@0 | 1247 | * @stable ICU 3.6 |
michael@0 | 1248 | */ |
michael@0 | 1249 | UTextNativeLength *nativeLength; |
michael@0 | 1250 | |
michael@0 | 1251 | /** |
michael@0 | 1252 | * (public) Function pointer for UTextAccess. |
michael@0 | 1253 | * |
michael@0 | 1254 | * @see UTextAccess |
michael@0 | 1255 | * @stable ICU 3.6 |
michael@0 | 1256 | */ |
michael@0 | 1257 | UTextAccess *access; |
michael@0 | 1258 | |
michael@0 | 1259 | /** |
michael@0 | 1260 | * (public) Function pointer for UTextExtract. |
michael@0 | 1261 | * |
michael@0 | 1262 | * @see UTextExtract |
michael@0 | 1263 | * @stable ICU 3.6 |
michael@0 | 1264 | */ |
michael@0 | 1265 | UTextExtract *extract; |
michael@0 | 1266 | |
michael@0 | 1267 | /** |
michael@0 | 1268 | * (public) Function pointer for UTextReplace. |
michael@0 | 1269 | * |
michael@0 | 1270 | * @see UTextReplace |
michael@0 | 1271 | * @stable ICU 3.6 |
michael@0 | 1272 | */ |
michael@0 | 1273 | UTextReplace *replace; |
michael@0 | 1274 | |
michael@0 | 1275 | /** |
michael@0 | 1276 | * (public) Function pointer for UTextCopy. |
michael@0 | 1277 | * |
michael@0 | 1278 | * @see UTextCopy |
michael@0 | 1279 | * @stable ICU 3.6 |
michael@0 | 1280 | */ |
michael@0 | 1281 | UTextCopy *copy; |
michael@0 | 1282 | |
michael@0 | 1283 | /** |
michael@0 | 1284 | * (public) Function pointer for UTextMapOffsetToNative. |
michael@0 | 1285 | * |
michael@0 | 1286 | * @see UTextMapOffsetToNative |
michael@0 | 1287 | * @stable ICU 3.6 |
michael@0 | 1288 | */ |
michael@0 | 1289 | UTextMapOffsetToNative *mapOffsetToNative; |
michael@0 | 1290 | |
michael@0 | 1291 | /** |
michael@0 | 1292 | * (public) Function pointer for UTextMapNativeIndexToUTF16. |
michael@0 | 1293 | * |
michael@0 | 1294 | * @see UTextMapNativeIndexToUTF16 |
michael@0 | 1295 | * @stable ICU 3.6 |
michael@0 | 1296 | */ |
michael@0 | 1297 | UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; |
michael@0 | 1298 | |
michael@0 | 1299 | /** |
michael@0 | 1300 | * (public) Function pointer for UTextClose. |
michael@0 | 1301 | * |
michael@0 | 1302 | * @see UTextClose |
michael@0 | 1303 | * @stable ICU 3.6 |
michael@0 | 1304 | */ |
michael@0 | 1305 | UTextClose *close; |
michael@0 | 1306 | |
michael@0 | 1307 | /** |
michael@0 | 1308 | * (private) Spare function pointer |
michael@0 | 1309 | * @internal |
michael@0 | 1310 | */ |
michael@0 | 1311 | UTextClose *spare1; |
michael@0 | 1312 | |
michael@0 | 1313 | /** |
michael@0 | 1314 | * (private) Spare function pointer |
michael@0 | 1315 | * @internal |
michael@0 | 1316 | */ |
michael@0 | 1317 | UTextClose *spare2; |
michael@0 | 1318 | |
michael@0 | 1319 | /** |
michael@0 | 1320 | * (private) Spare function pointer |
michael@0 | 1321 | * @internal |
michael@0 | 1322 | */ |
michael@0 | 1323 | UTextClose *spare3; |
michael@0 | 1324 | |
michael@0 | 1325 | }; |
michael@0 | 1326 | /** |
michael@0 | 1327 | * Function dispatch table for UText |
michael@0 | 1328 | * @see UTextFuncs |
michael@0 | 1329 | */ |
michael@0 | 1330 | typedef struct UTextFuncs UTextFuncs; |
michael@0 | 1331 | |
michael@0 | 1332 | /** |
michael@0 | 1333 | * UText struct. Provides the interface between the generic UText access code |
michael@0 | 1334 | * and the UText provider code that works on specific kinds of |
michael@0 | 1335 | * text (UTF-8, noncontiguous UTF-16, whatever.) |
michael@0 | 1336 | * |
michael@0 | 1337 | * Applications that are using predefined types of text providers |
michael@0 | 1338 | * to pass text data to ICU services will have no need to view the |
michael@0 | 1339 | * internals of the UText structs that they open. |
michael@0 | 1340 | * |
michael@0 | 1341 | * @stable ICU 3.6 |
michael@0 | 1342 | */ |
michael@0 | 1343 | struct UText { |
michael@0 | 1344 | /** |
michael@0 | 1345 | * (private) Magic. Used to help detect when UText functions are handed |
michael@0 | 1346 | * invalid or unitialized UText structs. |
michael@0 | 1347 | * utext_openXYZ() functions take an initialized, |
michael@0 | 1348 | * but not necessarily open, UText struct as an |
michael@0 | 1349 | * optional fill-in parameter. This magic field |
michael@0 | 1350 | * is used to check for that initialization. |
michael@0 | 1351 | * Text provider close functions must NOT clear |
michael@0 | 1352 | * the magic field because that would prevent |
michael@0 | 1353 | * reuse of the UText struct. |
michael@0 | 1354 | * @internal |
michael@0 | 1355 | */ |
michael@0 | 1356 | uint32_t magic; |
michael@0 | 1357 | |
michael@0 | 1358 | |
michael@0 | 1359 | /** |
michael@0 | 1360 | * (private) Flags for managing the allocation and freeing of |
michael@0 | 1361 | * memory associated with this UText. |
michael@0 | 1362 | * @internal |
michael@0 | 1363 | */ |
michael@0 | 1364 | int32_t flags; |
michael@0 | 1365 | |
michael@0 | 1366 | |
michael@0 | 1367 | /** |
michael@0 | 1368 | * Text provider properties. This set of flags is maintainted by the |
michael@0 | 1369 | * text provider implementation. |
michael@0 | 1370 | * @stable ICU 3.4 |
michael@0 | 1371 | */ |
michael@0 | 1372 | int32_t providerProperties; |
michael@0 | 1373 | |
michael@0 | 1374 | /** |
michael@0 | 1375 | * (public) sizeOfStruct=sizeof(UText) |
michael@0 | 1376 | * Allows possible backward compatible extension. |
michael@0 | 1377 | * |
michael@0 | 1378 | * @stable ICU 3.4 |
michael@0 | 1379 | */ |
michael@0 | 1380 | int32_t sizeOfStruct; |
michael@0 | 1381 | |
michael@0 | 1382 | /* ------ 16 byte alignment boundary ----------- */ |
michael@0 | 1383 | |
michael@0 | 1384 | |
michael@0 | 1385 | /** |
michael@0 | 1386 | * (protected) Native index of the first character position following |
michael@0 | 1387 | * the current chunk. |
michael@0 | 1388 | * @stable ICU 3.6 |
michael@0 | 1389 | */ |
michael@0 | 1390 | int64_t chunkNativeLimit; |
michael@0 | 1391 | |
michael@0 | 1392 | /** |
michael@0 | 1393 | * (protected) Size in bytes of the extra space (pExtra). |
michael@0 | 1394 | * @stable ICU 3.4 |
michael@0 | 1395 | */ |
michael@0 | 1396 | int32_t extraSize; |
michael@0 | 1397 | |
michael@0 | 1398 | /** |
michael@0 | 1399 | * (protected) The highest chunk offset where native indexing and |
michael@0 | 1400 | * chunk (UTF-16) indexing correspond. For UTF-16 sources, value |
michael@0 | 1401 | * will be equal to chunkLength. |
michael@0 | 1402 | * |
michael@0 | 1403 | * @stable ICU 3.6 |
michael@0 | 1404 | */ |
michael@0 | 1405 | int32_t nativeIndexingLimit; |
michael@0 | 1406 | |
michael@0 | 1407 | /* ---- 16 byte alignment boundary------ */ |
michael@0 | 1408 | |
michael@0 | 1409 | /** |
michael@0 | 1410 | * (protected) Native index of the first character in the text chunk. |
michael@0 | 1411 | * @stable ICU 3.6 |
michael@0 | 1412 | */ |
michael@0 | 1413 | int64_t chunkNativeStart; |
michael@0 | 1414 | |
michael@0 | 1415 | /** |
michael@0 | 1416 | * (protected) Current iteration position within the text chunk (UTF-16 buffer). |
michael@0 | 1417 | * This is the index to the character that will be returned by utext_next32(). |
michael@0 | 1418 | * @stable ICU 3.6 |
michael@0 | 1419 | */ |
michael@0 | 1420 | int32_t chunkOffset; |
michael@0 | 1421 | |
michael@0 | 1422 | /** |
michael@0 | 1423 | * (protected) Length the text chunk (UTF-16 buffer), in UChars. |
michael@0 | 1424 | * @stable ICU 3.6 |
michael@0 | 1425 | */ |
michael@0 | 1426 | int32_t chunkLength; |
michael@0 | 1427 | |
michael@0 | 1428 | /* ---- 16 byte alignment boundary-- */ |
michael@0 | 1429 | |
michael@0 | 1430 | |
michael@0 | 1431 | /** |
michael@0 | 1432 | * (protected) pointer to a chunk of text in UTF-16 format. |
michael@0 | 1433 | * May refer either to original storage of the source of the text, or |
michael@0 | 1434 | * if conversion was required, to a buffer owned by the UText. |
michael@0 | 1435 | * @stable ICU 3.6 |
michael@0 | 1436 | */ |
michael@0 | 1437 | const UChar *chunkContents; |
michael@0 | 1438 | |
michael@0 | 1439 | /** |
michael@0 | 1440 | * (public) Pointer to Dispatch table for accessing functions for this UText. |
michael@0 | 1441 | * @stable ICU 3.6 |
michael@0 | 1442 | */ |
michael@0 | 1443 | const UTextFuncs *pFuncs; |
michael@0 | 1444 | |
michael@0 | 1445 | /** |
michael@0 | 1446 | * (protected) Pointer to additional space requested by the |
michael@0 | 1447 | * text provider during the utext_open operation. |
michael@0 | 1448 | * @stable ICU 3.4 |
michael@0 | 1449 | */ |
michael@0 | 1450 | void *pExtra; |
michael@0 | 1451 | |
michael@0 | 1452 | /** |
michael@0 | 1453 | * (protected) Pointer to string or text-containin object or similar. |
michael@0 | 1454 | * This is the source of the text that this UText is wrapping, in a format |
michael@0 | 1455 | * that is known to the text provider functions. |
michael@0 | 1456 | * @stable ICU 3.4 |
michael@0 | 1457 | */ |
michael@0 | 1458 | const void *context; |
michael@0 | 1459 | |
michael@0 | 1460 | /* --- 16 byte alignment boundary--- */ |
michael@0 | 1461 | |
michael@0 | 1462 | /** |
michael@0 | 1463 | * (protected) Pointer fields available for use by the text provider. |
michael@0 | 1464 | * Not used by UText common code. |
michael@0 | 1465 | * @stable ICU 3.6 |
michael@0 | 1466 | */ |
michael@0 | 1467 | const void *p; |
michael@0 | 1468 | /** |
michael@0 | 1469 | * (protected) Pointer fields available for use by the text provider. |
michael@0 | 1470 | * Not used by UText common code. |
michael@0 | 1471 | * @stable ICU 3.6 |
michael@0 | 1472 | */ |
michael@0 | 1473 | const void *q; |
michael@0 | 1474 | /** |
michael@0 | 1475 | * (protected) Pointer fields available for use by the text provider. |
michael@0 | 1476 | * Not used by UText common code. |
michael@0 | 1477 | * @stable ICU 3.6 |
michael@0 | 1478 | */ |
michael@0 | 1479 | const void *r; |
michael@0 | 1480 | |
michael@0 | 1481 | /** |
michael@0 | 1482 | * Private field reserved for future use by the UText framework |
michael@0 | 1483 | * itself. This is not to be touched by the text providers. |
michael@0 | 1484 | * @internal ICU 3.4 |
michael@0 | 1485 | */ |
michael@0 | 1486 | void *privP; |
michael@0 | 1487 | |
michael@0 | 1488 | |
michael@0 | 1489 | /* --- 16 byte alignment boundary--- */ |
michael@0 | 1490 | |
michael@0 | 1491 | |
michael@0 | 1492 | /** |
michael@0 | 1493 | * (protected) Integer field reserved for use by the text provider. |
michael@0 | 1494 | * Not used by the UText framework, or by the client (user) of the UText. |
michael@0 | 1495 | * @stable ICU 3.4 |
michael@0 | 1496 | */ |
michael@0 | 1497 | int64_t a; |
michael@0 | 1498 | |
michael@0 | 1499 | /** |
michael@0 | 1500 | * (protected) Integer field reserved for use by the text provider. |
michael@0 | 1501 | * Not used by the UText framework, or by the client (user) of the UText. |
michael@0 | 1502 | * @stable ICU 3.4 |
michael@0 | 1503 | */ |
michael@0 | 1504 | int32_t b; |
michael@0 | 1505 | |
michael@0 | 1506 | /** |
michael@0 | 1507 | * (protected) Integer field reserved for use by the text provider. |
michael@0 | 1508 | * Not used by the UText framework, or by the client (user) of the UText. |
michael@0 | 1509 | * @stable ICU 3.4 |
michael@0 | 1510 | */ |
michael@0 | 1511 | int32_t c; |
michael@0 | 1512 | |
michael@0 | 1513 | /* ---- 16 byte alignment boundary---- */ |
michael@0 | 1514 | |
michael@0 | 1515 | |
michael@0 | 1516 | /** |
michael@0 | 1517 | * Private field reserved for future use by the UText framework |
michael@0 | 1518 | * itself. This is not to be touched by the text providers. |
michael@0 | 1519 | * @internal ICU 3.4 |
michael@0 | 1520 | */ |
michael@0 | 1521 | int64_t privA; |
michael@0 | 1522 | /** |
michael@0 | 1523 | * Private field reserved for future use by the UText framework |
michael@0 | 1524 | * itself. This is not to be touched by the text providers. |
michael@0 | 1525 | * @internal ICU 3.4 |
michael@0 | 1526 | */ |
michael@0 | 1527 | int32_t privB; |
michael@0 | 1528 | /** |
michael@0 | 1529 | * Private field reserved for future use by the UText framework |
michael@0 | 1530 | * itself. This is not to be touched by the text providers. |
michael@0 | 1531 | * @internal ICU 3.4 |
michael@0 | 1532 | */ |
michael@0 | 1533 | int32_t privC; |
michael@0 | 1534 | }; |
michael@0 | 1535 | |
michael@0 | 1536 | |
michael@0 | 1537 | /** |
michael@0 | 1538 | * Common function for use by Text Provider implementations to allocate and/or initialize |
michael@0 | 1539 | * a new UText struct. To be called in the implementation of utext_open() functions. |
michael@0 | 1540 | * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. |
michael@0 | 1541 | * If the supplied UText is already open, the provider's close function will be called |
michael@0 | 1542 | * so that the struct can be reused by the open that is in progress. |
michael@0 | 1543 | * |
michael@0 | 1544 | * @param ut pointer to a UText struct to be re-used, or null if a new UText |
michael@0 | 1545 | * should be allocated. |
michael@0 | 1546 | * @param extraSpace The amount of additional space to be allocated as part |
michael@0 | 1547 | * of this UText, for use by types of providers that require |
michael@0 | 1548 | * additional storage. |
michael@0 | 1549 | * @param status Errors are returned here. |
michael@0 | 1550 | * @return pointer to the UText, allocated if necessary, with extra space set up if requested. |
michael@0 | 1551 | * @stable ICU 3.4 |
michael@0 | 1552 | */ |
michael@0 | 1553 | U_STABLE UText * U_EXPORT2 |
michael@0 | 1554 | utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); |
michael@0 | 1555 | |
michael@0 | 1556 | #ifndef U_HIDE_INTERNAL_API |
michael@0 | 1557 | /** |
michael@0 | 1558 | * @internal |
michael@0 | 1559 | * Value used to help identify correctly initialized UText structs. |
michael@0 | 1560 | * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. |
michael@0 | 1561 | */ |
michael@0 | 1562 | enum { |
michael@0 | 1563 | UTEXT_MAGIC = 0x345ad82c |
michael@0 | 1564 | }; |
michael@0 | 1565 | #endif /* U_HIDE_INTERNAL_API */ |
michael@0 | 1566 | |
michael@0 | 1567 | /** |
michael@0 | 1568 | * initializer to be used with local (stack) instances of a UText |
michael@0 | 1569 | * struct. UText structs must be initialized before passing |
michael@0 | 1570 | * them to one of the utext_open functions. |
michael@0 | 1571 | * |
michael@0 | 1572 | * @stable ICU 3.6 |
michael@0 | 1573 | */ |
michael@0 | 1574 | #define UTEXT_INITIALIZER { \ |
michael@0 | 1575 | UTEXT_MAGIC, /* magic */ \ |
michael@0 | 1576 | 0, /* flags */ \ |
michael@0 | 1577 | 0, /* providerProps */ \ |
michael@0 | 1578 | sizeof(UText), /* sizeOfStruct */ \ |
michael@0 | 1579 | 0, /* chunkNativeLimit */ \ |
michael@0 | 1580 | 0, /* extraSize */ \ |
michael@0 | 1581 | 0, /* nativeIndexingLimit */ \ |
michael@0 | 1582 | 0, /* chunkNativeStart */ \ |
michael@0 | 1583 | 0, /* chunkOffset */ \ |
michael@0 | 1584 | 0, /* chunkLength */ \ |
michael@0 | 1585 | NULL, /* chunkContents */ \ |
michael@0 | 1586 | NULL, /* pFuncs */ \ |
michael@0 | 1587 | NULL, /* pExtra */ \ |
michael@0 | 1588 | NULL, /* context */ \ |
michael@0 | 1589 | NULL, NULL, NULL, /* p, q, r */ \ |
michael@0 | 1590 | NULL, /* privP */ \ |
michael@0 | 1591 | 0, 0, 0, /* a, b, c */ \ |
michael@0 | 1592 | 0, 0, 0 /* privA,B,C, */ \ |
michael@0 | 1593 | } |
michael@0 | 1594 | |
michael@0 | 1595 | |
michael@0 | 1596 | U_CDECL_END |
michael@0 | 1597 | |
michael@0 | 1598 | |
michael@0 | 1599 | |
michael@0 | 1600 | #endif |