|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2004-2012, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: utext.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2004oct06 |
|
14 * created by: Markus W. Scherer |
|
15 */ |
|
16 |
|
17 #ifndef __UTEXT_H__ |
|
18 #define __UTEXT_H__ |
|
19 |
|
20 /** |
|
21 * \file |
|
22 * \brief C API: Abstract Unicode Text API |
|
23 * |
|
24 * The Text Access API provides a means to allow text that is stored in alternative |
|
25 * formats to work with ICU services. ICU normally operates on text that is |
|
26 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type |
|
27 * UnicodeString for C++ APIs. |
|
28 * |
|
29 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous |
|
30 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. |
|
31 * |
|
32 * There are three general classes of usage for UText: |
|
33 * |
|
34 * Application Level Use. This is the simplest usage - applications would |
|
35 * use one of the utext_open() functions on their input text, and pass |
|
36 * the resulting UText to the desired ICU service. |
|
37 * |
|
38 * Second is usage in ICU Services, such as break iteration, that will need to |
|
39 * operate on input presented to them as a UText. These implementations |
|
40 * will need to use the iteration and related UText functions to gain |
|
41 * access to the actual text. |
|
42 * |
|
43 * The third class of UText users are "text providers." These are the |
|
44 * UText implementations for the various text storage formats. An application |
|
45 * or system with a unique text storage format can implement a set of |
|
46 * UText provider functions for that format, which will then allow |
|
47 * ICU services to operate on that format. |
|
48 * |
|
49 * |
|
50 * <em>Iterating over text</em> |
|
51 * |
|
52 * Here is sample code for a forward iteration over the contents of a UText |
|
53 * |
|
54 * \code |
|
55 * UChar32 c; |
|
56 * UText *ut = whatever(); |
|
57 * |
|
58 * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { |
|
59 * // do whatever with the codepoint c here. |
|
60 * } |
|
61 * \endcode |
|
62 * |
|
63 * And here is similar code to iterate in the reverse direction, from the end |
|
64 * of the text towards the beginning. |
|
65 * |
|
66 * \code |
|
67 * UChar32 c; |
|
68 * UText *ut = whatever(); |
|
69 * int textLength = utext_nativeLength(ut); |
|
70 * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { |
|
71 * // do whatever with the codepoint c here. |
|
72 * } |
|
73 * \endcode |
|
74 * |
|
75 * <em>Characters and Indexing</em> |
|
76 * |
|
77 * Indexing into text by UText functions is nearly always in terms of the native |
|
78 * indexing of the underlying text storage. The storage format could be UTF-8 |
|
79 * or UTF-32, for example. When coding to the UText access API, no assumptions |
|
80 * can be made regarding the size of characters, or how far an index |
|
81 * may move when iterating between characters. |
|
82 * |
|
83 * All indices supplied to UText functions are pinned to the length of the |
|
84 * text. An out-of-bounds index is not considered to be an error, but is |
|
85 * adjusted to be in the range 0 <= index <= length of input text. |
|
86 * |
|
87 * |
|
88 * When an index position is returned from a UText function, it will be |
|
89 * a native index to the underlying text. In the case of multi-unit characters, |
|
90 * it will always refer to the first position of the character, |
|
91 * never to the interior. This is essentially the same thing as saying that |
|
92 * a returned index will always point to a boundary between characters. |
|
93 * |
|
94 * When a native index is supplied to a UText function, all indices that |
|
95 * refer to any part of a multi-unit character representation are considered |
|
96 * to be equivalent. In the case of multi-unit characters, an incoming index |
|
97 * will be logically normalized to refer to the start of the character. |
|
98 * |
|
99 * It is possible to test whether a native index is on a code point boundary |
|
100 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). |
|
101 * If the index is returned unchanged, it was on a code point boundary. If |
|
102 * an adjusted index is returned, the original index referred to the |
|
103 * interior of a character. |
|
104 * |
|
105 * <em>Conventions for calling UText functions</em> |
|
106 * |
|
107 * Most UText access functions have as their first parameter a (UText *) pointer, |
|
108 * which specifies the UText to be used. Unless otherwise noted, the |
|
109 * pointer must refer to a valid, open UText. Attempting to |
|
110 * use a closed UText or passing a NULL pointer is a programming error and |
|
111 * will produce undefined results or NULL pointer exceptions. |
|
112 * |
|
113 * The UText_Open family of functions can either open an existing (closed) |
|
114 * UText, or heap allocate a new UText. Here is sample code for creating |
|
115 * a stack-allocated UText. |
|
116 * |
|
117 * \code |
|
118 * char *s = whatever(); // A utf-8 string |
|
119 * U_ErrorCode status = U_ZERO_ERROR; |
|
120 * UText ut = UTEXT_INITIALIZER; |
|
121 * utext_openUTF8(ut, s, -1, &status); |
|
122 * if (U_FAILURE(status)) { |
|
123 * // error handling |
|
124 * } else { |
|
125 * // work with the UText |
|
126 * } |
|
127 * \endcode |
|
128 * |
|
129 * Any existing UText passed to an open function _must_ have been initialized, |
|
130 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated |
|
131 * by an open function. Passing NULL will cause the open function to |
|
132 * heap-allocate and fully initialize a new UText. |
|
133 * |
|
134 */ |
|
135 |
|
136 |
|
137 |
|
138 #include "unicode/utypes.h" |
|
139 #include "unicode/uchar.h" |
|
140 #if U_SHOW_CPLUSPLUS_API |
|
141 #include "unicode/localpointer.h" |
|
142 #include "unicode/rep.h" |
|
143 #include "unicode/unistr.h" |
|
144 #include "unicode/chariter.h" |
|
145 #endif |
|
146 |
|
147 |
|
148 U_CDECL_BEGIN |
|
149 |
|
150 struct UText; |
|
151 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ |
|
152 |
|
153 |
|
154 /*************************************************************************************** |
|
155 * |
|
156 * C Functions for creating UText wrappers around various kinds of text strings. |
|
157 * |
|
158 ****************************************************************************************/ |
|
159 |
|
160 |
|
161 /** |
|
162 * Close function for UText instances. |
|
163 * Cleans up, releases any resources being held by an open UText. |
|
164 * <p> |
|
165 * If the UText was originally allocated by one of the utext_open functions, |
|
166 * the storage associated with the utext will also be freed. |
|
167 * If the UText storage originated with the application, as it would with |
|
168 * a local or static instance, the storage will not be deleted. |
|
169 * |
|
170 * An open UText can be reset to refer to new string by using one of the utext_open() |
|
171 * functions without first closing the UText. |
|
172 * |
|
173 * @param ut The UText to be closed. |
|
174 * @return NULL if the UText struct was deleted by the close. If the UText struct |
|
175 * was originally provided by the caller to the open function, it is |
|
176 * returned by this function, and may be safely used again in |
|
177 * a subsequent utext_open. |
|
178 * |
|
179 * @stable ICU 3.4 |
|
180 */ |
|
181 U_STABLE UText * U_EXPORT2 |
|
182 utext_close(UText *ut); |
|
183 |
|
184 #if U_SHOW_CPLUSPLUS_API |
|
185 |
|
186 U_NAMESPACE_BEGIN |
|
187 |
|
188 /** |
|
189 * \class LocalUTextPointer |
|
190 * "Smart pointer" class, closes a UText via utext_close(). |
|
191 * For most methods see the LocalPointerBase base class. |
|
192 * |
|
193 * @see LocalPointerBase |
|
194 * @see LocalPointer |
|
195 * @stable ICU 4.4 |
|
196 */ |
|
197 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); |
|
198 |
|
199 U_NAMESPACE_END |
|
200 |
|
201 #endif |
|
202 |
|
203 /** |
|
204 * Open a read-only UText implementation for UTF-8 strings. |
|
205 * |
|
206 * \htmlonly |
|
207 * Any invalid UTF-8 in the input will be handled in this way: |
|
208 * a sequence of bytes that has the form of a truncated, but otherwise valid, |
|
209 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. |
|
210 * Any other illegal bytes will each be replaced by a \uFFFD. |
|
211 * \endhtmlonly |
|
212 * |
|
213 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
214 * If non-NULL, must refer to an initialized UText struct, which will then |
|
215 * be reset to reference the specified UTF-8 string. |
|
216 * @param s A UTF-8 string. Must not be NULL. |
|
217 * @param length The length of the UTF-8 string in bytes, or -1 if the string is |
|
218 * zero terminated. |
|
219 * @param status Errors are returned here. |
|
220 * @return A pointer to the UText. If a pre-allocated UText was provided, it |
|
221 * will always be used and returned. |
|
222 * @stable ICU 3.4 |
|
223 */ |
|
224 U_STABLE UText * U_EXPORT2 |
|
225 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); |
|
226 |
|
227 |
|
228 /** |
|
229 * Open a read-only UText for UChar * string. |
|
230 * |
|
231 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
232 * If non-NULL, must refer to an initialized UText struct, which will then |
|
233 * be reset to reference the specified UChar string. |
|
234 * @param s A UChar (UTF-16) string |
|
235 * @param length The number of UChars in the input string, or -1 if the string is |
|
236 * zero terminated. |
|
237 * @param status Errors are returned here. |
|
238 * @return A pointer to the UText. If a pre-allocated UText was provided, it |
|
239 * will always be used and returned. |
|
240 * @stable ICU 3.4 |
|
241 */ |
|
242 U_STABLE UText * U_EXPORT2 |
|
243 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); |
|
244 |
|
245 |
|
246 #if U_SHOW_CPLUSPLUS_API |
|
247 /** |
|
248 * Open a writable UText for a non-const UnicodeString. |
|
249 * |
|
250 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
251 * If non-NULL, must refer to an initialized UText struct, which will then |
|
252 * be reset to reference the specified input string. |
|
253 * @param s A UnicodeString. |
|
254 * @param status Errors are returned here. |
|
255 * @return Pointer to the UText. If a UText was supplied as input, this |
|
256 * will always be used and returned. |
|
257 * @stable ICU 3.4 |
|
258 */ |
|
259 U_STABLE UText * U_EXPORT2 |
|
260 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); |
|
261 |
|
262 |
|
263 /** |
|
264 * Open a UText for a const UnicodeString. The resulting UText will not be writable. |
|
265 * |
|
266 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
267 * If non-NULL, must refer to an initialized UText struct, which will then |
|
268 * be reset to reference the specified input string. |
|
269 * @param s A const UnicodeString to be wrapped. |
|
270 * @param status Errors are returned here. |
|
271 * @return Pointer to the UText. If a UText was supplied as input, this |
|
272 * will always be used and returned. |
|
273 * @stable ICU 3.4 |
|
274 */ |
|
275 U_STABLE UText * U_EXPORT2 |
|
276 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); |
|
277 |
|
278 |
|
279 /** |
|
280 * Open a writable UText implementation for an ICU Replaceable object. |
|
281 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
282 * If non-NULL, must refer to an already existing UText, which will then |
|
283 * be reset to reference the specified replaceable text. |
|
284 * @param rep A Replaceable text object. |
|
285 * @param status Errors are returned here. |
|
286 * @return Pointer to the UText. If a UText was supplied as input, this |
|
287 * will always be used and returned. |
|
288 * @see Replaceable |
|
289 * @stable ICU 3.4 |
|
290 */ |
|
291 U_STABLE UText * U_EXPORT2 |
|
292 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); |
|
293 |
|
294 /** |
|
295 * Open a UText implementation over an ICU CharacterIterator. |
|
296 * @param ut Pointer to a UText struct. If NULL, a new UText will be created. |
|
297 * If non-NULL, must refer to an already existing UText, which will then |
|
298 * be reset to reference the specified replaceable text. |
|
299 * @param ci A Character Iterator. |
|
300 * @param status Errors are returned here. |
|
301 * @return Pointer to the UText. If a UText was supplied as input, this |
|
302 * will always be used and returned. |
|
303 * @see Replaceable |
|
304 * @stable ICU 3.4 |
|
305 */ |
|
306 U_STABLE UText * U_EXPORT2 |
|
307 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); |
|
308 |
|
309 #endif |
|
310 |
|
311 |
|
312 /** |
|
313 * Clone a UText. This is much like opening a UText where the source text is itself |
|
314 * another UText. |
|
315 * |
|
316 * A deep clone will copy both the UText data structures and the underlying text. |
|
317 * The original and cloned UText will operate completely independently; modifications |
|
318 * made to the text in one will not affect the other. Text providers are not |
|
319 * required to support deep clones. The user of clone() must check the status return |
|
320 * and be prepared to handle failures. |
|
321 * |
|
322 * The standard UText implementations for UTF8, UChar *, UnicodeString and |
|
323 * Replaceable all support deep cloning. |
|
324 * |
|
325 * The UText returned from a deep clone will be writable, assuming that the text |
|
326 * provider is able to support writing, even if the source UText had been made |
|
327 * non-writable by means of UText_freeze(). |
|
328 * |
|
329 * A shallow clone replicates only the UText data structures; it does not make |
|
330 * a copy of the underlying text. Shallow clones can be used as an efficient way to |
|
331 * have multiple iterators active in a single text string that is not being |
|
332 * modified. |
|
333 * |
|
334 * A shallow clone operation will not fail, barring truly exceptional conditions such |
|
335 * as memory allocation failures. |
|
336 * |
|
337 * Shallow UText clones should be avoided if the UText functions that modify the |
|
338 * text are expected to be used, either on the original or the cloned UText. |
|
339 * Any such modifications can cause unpredictable behavior. Read Only |
|
340 * shallow clones provide some protection against errors of this type by |
|
341 * disabling text modification via the cloned UText. |
|
342 * |
|
343 * A shallow clone made with the readOnly parameter == FALSE will preserve the |
|
344 * utext_isWritable() state of the source object. Note, however, that |
|
345 * write operations must be avoided while more than one UText exists that refer |
|
346 * to the same underlying text. |
|
347 * |
|
348 * A UText and its clone may be safely concurrently accessed by separate threads. |
|
349 * This is true for read access only with shallow clones, and for both read and |
|
350 * write access with deep clones. |
|
351 * It is the responsibility of the Text Provider to ensure that this thread safety |
|
352 * constraint is met. |
|
353 * |
|
354 * @param dest A UText struct to be filled in with the result of the clone operation, |
|
355 * or NULL if the clone function should heap-allocate a new UText struct. |
|
356 * If non-NULL, must refer to an already existing UText, which will then |
|
357 * be reset to become the clone. |
|
358 * @param src The UText to be cloned. |
|
359 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
|
360 * @param readOnly TRUE to request that the cloned UText have read only access to the |
|
361 * underlying text. |
|
362 |
|
363 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
|
364 * will be returned if the text provider is unable to clone the |
|
365 * original text. |
|
366 * @return The newly created clone, or NULL if the clone operation failed. |
|
367 * @stable ICU 3.4 |
|
368 */ |
|
369 U_STABLE UText * U_EXPORT2 |
|
370 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); |
|
371 |
|
372 |
|
373 /** |
|
374 * Compare two UText objects for equality. |
|
375 * UTexts are equal if they are iterating over the same text, and |
|
376 * have the same iteration position within the text. |
|
377 * If either or both of the parameters are NULL, the comparison is FALSE. |
|
378 * |
|
379 * @param a The first of the two UTexts to compare. |
|
380 * @param b The other UText to be compared. |
|
381 * @return TRUE if the two UTexts are equal. |
|
382 * @stable ICU 3.6 |
|
383 */ |
|
384 U_STABLE UBool U_EXPORT2 |
|
385 utext_equals(const UText *a, const UText *b); |
|
386 |
|
387 |
|
388 /***************************************************************************** |
|
389 * |
|
390 * Functions to work with the text represeted by a UText wrapper |
|
391 * |
|
392 *****************************************************************************/ |
|
393 |
|
394 /** |
|
395 * Get the length of the text. Depending on the characteristics |
|
396 * of the underlying text representation, this may be expensive. |
|
397 * @see utext_isLengthExpensive() |
|
398 * |
|
399 * |
|
400 * @param ut the text to be accessed. |
|
401 * @return the length of the text, expressed in native units. |
|
402 * |
|
403 * @stable ICU 3.4 |
|
404 */ |
|
405 U_STABLE int64_t U_EXPORT2 |
|
406 utext_nativeLength(UText *ut); |
|
407 |
|
408 /** |
|
409 * Return TRUE if calculating the length of the text could be expensive. |
|
410 * Finding the length of NUL terminated strings is considered to be expensive. |
|
411 * |
|
412 * Note that the value of this function may change |
|
413 * as the result of other operations on a UText. |
|
414 * Once the length of a string has been discovered, it will no longer |
|
415 * be expensive to report it. |
|
416 * |
|
417 * @param ut the text to be accessed. |
|
418 * @return TRUE if determining the length of the text could be time consuming. |
|
419 * @stable ICU 3.4 |
|
420 */ |
|
421 U_STABLE UBool U_EXPORT2 |
|
422 utext_isLengthExpensive(const UText *ut); |
|
423 |
|
424 /** |
|
425 * Returns the code point at the requested index, |
|
426 * or U_SENTINEL (-1) if it is out of bounds. |
|
427 * |
|
428 * If the specified index points to the interior of a multi-unit |
|
429 * character - one of the trail bytes of a UTF-8 sequence, for example - |
|
430 * the complete code point will be returned. |
|
431 * |
|
432 * The iteration position will be set to the start of the returned code point. |
|
433 * |
|
434 * This function is roughly equivalent to the the sequence |
|
435 * utext_setNativeIndex(index); |
|
436 * utext_current32(); |
|
437 * (There is a subtle difference if the index is out of bounds by being less than zero - |
|
438 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() |
|
439 * will return the char at zero. utext_char32At(negative index), on the other hand, will |
|
440 * return the U_SENTINEL value of -1.) |
|
441 * |
|
442 * @param ut the text to be accessed |
|
443 * @param nativeIndex the native index of the character to be accessed. If the index points |
|
444 * to other than the first unit of a multi-unit character, it will be adjusted |
|
445 * to the start of the character. |
|
446 * @return the code point at the specified index. |
|
447 * @stable ICU 3.4 |
|
448 */ |
|
449 U_STABLE UChar32 U_EXPORT2 |
|
450 utext_char32At(UText *ut, int64_t nativeIndex); |
|
451 |
|
452 |
|
453 /** |
|
454 * |
|
455 * Get the code point at the current iteration position, |
|
456 * or U_SENTINEL (-1) if the iteration has reached the end of |
|
457 * the input text. |
|
458 * |
|
459 * @param ut the text to be accessed. |
|
460 * @return the Unicode code point at the current iterator position. |
|
461 * @stable ICU 3.4 |
|
462 */ |
|
463 U_STABLE UChar32 U_EXPORT2 |
|
464 utext_current32(UText *ut); |
|
465 |
|
466 |
|
467 /** |
|
468 * Get the code point at the current iteration position of the UText, and |
|
469 * advance the position to the first index following the character. |
|
470 * |
|
471 * If the position is at the end of the text (the index following |
|
472 * the last character, which is also the length of the text), |
|
473 * return U_SENTINEL (-1) and do not advance the index. |
|
474 * |
|
475 * This is a post-increment operation. |
|
476 * |
|
477 * An inline macro version of this function, UTEXT_NEXT32(), |
|
478 * is available for performance critical use. |
|
479 * |
|
480 * @param ut the text to be accessed. |
|
481 * @return the Unicode code point at the iteration position. |
|
482 * @see UTEXT_NEXT32 |
|
483 * @stable ICU 3.4 |
|
484 */ |
|
485 U_STABLE UChar32 U_EXPORT2 |
|
486 utext_next32(UText *ut); |
|
487 |
|
488 |
|
489 /** |
|
490 * Move the iterator position to the character (code point) whose |
|
491 * index precedes the current position, and return that character. |
|
492 * This is a pre-decrement operation. |
|
493 * |
|
494 * If the initial position is at the start of the text (index of 0) |
|
495 * return U_SENTINEL (-1), and leave the position unchanged. |
|
496 * |
|
497 * An inline macro version of this function, UTEXT_PREVIOUS32(), |
|
498 * is available for performance critical use. |
|
499 * |
|
500 * @param ut the text to be accessed. |
|
501 * @return the previous UChar32 code point, or U_SENTINEL (-1) |
|
502 * if the iteration has reached the start of the text. |
|
503 * @see UTEXT_PREVIOUS32 |
|
504 * @stable ICU 3.4 |
|
505 */ |
|
506 U_STABLE UChar32 U_EXPORT2 |
|
507 utext_previous32(UText *ut); |
|
508 |
|
509 |
|
510 /** |
|
511 * Set the iteration index and return the code point at that index. |
|
512 * Leave the iteration index at the start of the following code point. |
|
513 * |
|
514 * This function is the most efficient and convenient way to |
|
515 * begin a forward iteration. The results are identical to the those |
|
516 * from the sequence |
|
517 * \code |
|
518 * utext_setIndex(); |
|
519 * utext_next32(); |
|
520 * \endcode |
|
521 * |
|
522 * @param ut the text to be accessed. |
|
523 * @param nativeIndex Iteration index, in the native units of the text provider. |
|
524 * @return Code point which starts at or before index, |
|
525 * or U_SENTINEL (-1) if it is out of bounds. |
|
526 * @stable ICU 3.4 |
|
527 */ |
|
528 U_STABLE UChar32 U_EXPORT2 |
|
529 utext_next32From(UText *ut, int64_t nativeIndex); |
|
530 |
|
531 |
|
532 |
|
533 /** |
|
534 * Set the iteration index, and return the code point preceding the |
|
535 * one specified by the initial index. Leave the iteration position |
|
536 * at the start of the returned code point. |
|
537 * |
|
538 * This function is the most efficient and convenient way to |
|
539 * begin a backwards iteration. |
|
540 * |
|
541 * @param ut the text to be accessed. |
|
542 * @param nativeIndex Iteration index in the native units of the text provider. |
|
543 * @return Code point preceding the one at the initial index, |
|
544 * or U_SENTINEL (-1) if it is out of bounds. |
|
545 * |
|
546 * @stable ICU 3.4 |
|
547 */ |
|
548 U_STABLE UChar32 U_EXPORT2 |
|
549 utext_previous32From(UText *ut, int64_t nativeIndex); |
|
550 |
|
551 /** |
|
552 * Get the current iterator position, which can range from 0 to |
|
553 * the length of the text. |
|
554 * The position is a native index into the input text, in whatever format it |
|
555 * may have (possibly UTF-8 for example), and may not always be the same as |
|
556 * the corresponding UChar (UTF-16) index. |
|
557 * The returned position will always be aligned to a code point boundary. |
|
558 * |
|
559 * @param ut the text to be accessed. |
|
560 * @return the current index position, in the native units of the text provider. |
|
561 * @stable ICU 3.4 |
|
562 */ |
|
563 U_STABLE int64_t U_EXPORT2 |
|
564 utext_getNativeIndex(const UText *ut); |
|
565 |
|
566 /** |
|
567 * Set the current iteration position to the nearest code point |
|
568 * boundary at or preceding the specified index. |
|
569 * The index is in the native units of the original input text. |
|
570 * If the index is out of range, it will be pinned to be within |
|
571 * the range of the input text. |
|
572 * <p> |
|
573 * It will usually be more efficient to begin an iteration |
|
574 * using the functions utext_next32From() or utext_previous32From() |
|
575 * rather than setIndex(). |
|
576 * <p> |
|
577 * Moving the index position to an adjacent character is best done |
|
578 * with utext_next32(), utext_previous32() or utext_moveIndex32(). |
|
579 * Attempting to do direct arithmetic on the index position is |
|
580 * complicated by the fact that the size (in native units) of a |
|
581 * character depends on the underlying representation of the character |
|
582 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not |
|
583 * easily knowable. |
|
584 * |
|
585 * @param ut the text to be accessed. |
|
586 * @param nativeIndex the native unit index of the new iteration position. |
|
587 * @stable ICU 3.4 |
|
588 */ |
|
589 U_STABLE void U_EXPORT2 |
|
590 utext_setNativeIndex(UText *ut, int64_t nativeIndex); |
|
591 |
|
592 /** |
|
593 * Move the iterator postion by delta code points. The number of code points |
|
594 * is a signed number; a negative delta will move the iterator backwards, |
|
595 * towards the start of the text. |
|
596 * <p> |
|
597 * The index is moved by <code>delta</code> code points |
|
598 * forward or backward, but no further backward than to 0 and |
|
599 * no further forward than to utext_nativeLength(). |
|
600 * The resulting index value will be in between 0 and length, inclusive. |
|
601 * |
|
602 * @param ut the text to be accessed. |
|
603 * @param delta the signed number of code points to move the iteration position. |
|
604 * @return TRUE if the position could be moved the requested number of positions while |
|
605 * staying within the range [0 - text length]. |
|
606 * @stable ICU 3.4 |
|
607 */ |
|
608 U_STABLE UBool U_EXPORT2 |
|
609 utext_moveIndex32(UText *ut, int32_t delta); |
|
610 |
|
611 /** |
|
612 * Get the native index of the character preceeding the current position. |
|
613 * If the iteration position is already at the start of the text, zero |
|
614 * is returned. |
|
615 * The value returned is the same as that obtained from the following sequence, |
|
616 * but without the side effect of changing the iteration position. |
|
617 * |
|
618 * \code |
|
619 * UText *ut = whatever; |
|
620 * ... |
|
621 * utext_previous(ut) |
|
622 * utext_getNativeIndex(ut); |
|
623 * \endcode |
|
624 * |
|
625 * This function is most useful during forwards iteration, where it will get the |
|
626 * native index of the character most recently returned from utext_next(). |
|
627 * |
|
628 * @param ut the text to be accessed |
|
629 * @return the native index of the character preceeding the current index position, |
|
630 * or zero if the current position is at the start of the text. |
|
631 * @stable ICU 3.6 |
|
632 */ |
|
633 U_STABLE int64_t U_EXPORT2 |
|
634 utext_getPreviousNativeIndex(UText *ut); |
|
635 |
|
636 |
|
637 /** |
|
638 * |
|
639 * Extract text from a UText into a UChar buffer. The range of text to be extracted |
|
640 * is specified in the native indices of the UText provider. These may not necessarily |
|
641 * be UTF-16 indices. |
|
642 * <p> |
|
643 * The size (number of 16 bit UChars) of the data to be extracted is returned. The |
|
644 * full number of UChars is returned, even when the extracted text is truncated |
|
645 * because the specified buffer size is too small. |
|
646 * <p> |
|
647 * The extracted string will (if you are a user) / must (if you are a text provider) |
|
648 * be NUL-terminated if there is sufficient space in the destination buffer. This |
|
649 * terminating NUL is not included in the returned length. |
|
650 * <p> |
|
651 * The iteration index is left at the position following the last extracted character. |
|
652 * |
|
653 * @param ut the UText from which to extract data. |
|
654 * @param nativeStart the native index of the first character to extract.\ |
|
655 * If the specified index is out of range, |
|
656 * it will be pinned to to be within 0 <= index <= textLength |
|
657 * @param nativeLimit the native string index of the position following the last |
|
658 * character to extract. If the specified index is out of range, |
|
659 * it will be pinned to to be within 0 <= index <= textLength. |
|
660 * nativeLimit must be >= nativeStart. |
|
661 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
|
662 * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
|
663 * for precomputing the required size. |
|
664 * @param status receives any error status. |
|
665 * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the |
|
666 * buffer was too small. Returns number of UChars for preflighting. |
|
667 * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. |
|
668 * |
|
669 * @stable ICU 3.4 |
|
670 */ |
|
671 U_STABLE int32_t U_EXPORT2 |
|
672 utext_extract(UText *ut, |
|
673 int64_t nativeStart, int64_t nativeLimit, |
|
674 UChar *dest, int32_t destCapacity, |
|
675 UErrorCode *status); |
|
676 |
|
677 |
|
678 |
|
679 /************************************************************************************ |
|
680 * |
|
681 * #define inline versions of selected performance-critical text access functions |
|
682 * Caution: do not use auto increment++ or decrement-- expressions |
|
683 * as parameters to these macros. |
|
684 * |
|
685 * For most use, where there is no extreme performance constraint, the |
|
686 * normal, non-inline functions are a better choice. The resulting code |
|
687 * will be smaller, and, if the need ever arises, easier to debug. |
|
688 * |
|
689 * These are implemented as #defines rather than real functions |
|
690 * because there is no fully portable way to do inline functions in plain C. |
|
691 * |
|
692 ************************************************************************************/ |
|
693 |
|
694 #ifndef U_HIDE_INTERNAL_API |
|
695 /** |
|
696 * inline version of utext_current32(), for performance-critical situations. |
|
697 * |
|
698 * Get the code point at the current iteration position of the UText. |
|
699 * Returns U_SENTINEL (-1) if the position is at the end of the |
|
700 * text. |
|
701 * |
|
702 * @internal ICU 4.4 technology preview |
|
703 */ |
|
704 #define UTEXT_CURRENT32(ut) \ |
|
705 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
|
706 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) |
|
707 #endif /* U_HIDE_INTERNAL_API */ |
|
708 |
|
709 /** |
|
710 * inline version of utext_next32(), for performance-critical situations. |
|
711 * |
|
712 * Get the code point at the current iteration position of the UText, and |
|
713 * advance the position to the first index following the character. |
|
714 * This is a post-increment operation. |
|
715 * Returns U_SENTINEL (-1) if the position is at the end of the |
|
716 * text. |
|
717 * |
|
718 * @stable ICU 3.4 |
|
719 */ |
|
720 #define UTEXT_NEXT32(ut) \ |
|
721 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ |
|
722 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) |
|
723 |
|
724 /** |
|
725 * inline version of utext_previous32(), for performance-critical situations. |
|
726 * |
|
727 * Move the iterator position to the character (code point) whose |
|
728 * index precedes the current position, and return that character. |
|
729 * This is a pre-decrement operation. |
|
730 * Returns U_SENTINEL (-1) if the position is at the start of the text. |
|
731 * |
|
732 * @stable ICU 3.4 |
|
733 */ |
|
734 #define UTEXT_PREVIOUS32(ut) \ |
|
735 ((ut)->chunkOffset > 0 && \ |
|
736 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ |
|
737 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) |
|
738 |
|
739 /** |
|
740 * inline version of utext_getNativeIndex(), for performance-critical situations. |
|
741 * |
|
742 * Get the current iterator position, which can range from 0 to |
|
743 * the length of the text. |
|
744 * The position is a native index into the input text, in whatever format it |
|
745 * may have (possibly UTF-8 for example), and may not always be the same as |
|
746 * the corresponding UChar (UTF-16) index. |
|
747 * The returned position will always be aligned to a code point boundary. |
|
748 * |
|
749 * @stable ICU 3.6 |
|
750 */ |
|
751 #define UTEXT_GETNATIVEINDEX(ut) \ |
|
752 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ |
|
753 (ut)->chunkNativeStart+(ut)->chunkOffset : \ |
|
754 (ut)->pFuncs->mapOffsetToNative(ut)) |
|
755 |
|
756 /** |
|
757 * inline version of utext_setNativeIndex(), for performance-critical situations. |
|
758 * |
|
759 * Set the current iteration position to the nearest code point |
|
760 * boundary at or preceding the specified index. |
|
761 * The index is in the native units of the original input text. |
|
762 * If the index is out of range, it will be pinned to be within |
|
763 * the range of the input text. |
|
764 * |
|
765 * @stable ICU 3.8 |
|
766 */ |
|
767 #define UTEXT_SETNATIVEINDEX(ut, ix) \ |
|
768 { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ |
|
769 if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ |
|
770 (ut)->chunkOffset=(int32_t)__offset; \ |
|
771 } else { \ |
|
772 utext_setNativeIndex((ut), (ix)); } } |
|
773 |
|
774 |
|
775 |
|
776 /************************************************************************************ |
|
777 * |
|
778 * Functions related to writing or modifying the text. |
|
779 * These will work only with modifiable UTexts. Attempting to |
|
780 * modify a read-only UText will return an error status. |
|
781 * |
|
782 ************************************************************************************/ |
|
783 |
|
784 |
|
785 /** |
|
786 * Return TRUE if the text can be written (modified) with utext_replace() or |
|
787 * utext_copy(). For the text to be writable, the text provider must |
|
788 * be of a type that supports writing and the UText must not be frozen. |
|
789 * |
|
790 * Attempting to modify text when utext_isWriteable() is FALSE will fail - |
|
791 * the text will not be modified, and an error will be returned from the function |
|
792 * that attempted the modification. |
|
793 * |
|
794 * @param ut the UText to be tested. |
|
795 * @return TRUE if the text is modifiable. |
|
796 * |
|
797 * @see utext_freeze() |
|
798 * @see utext_replace() |
|
799 * @see utext_copy() |
|
800 * @stable ICU 3.4 |
|
801 * |
|
802 */ |
|
803 U_STABLE UBool U_EXPORT2 |
|
804 utext_isWritable(const UText *ut); |
|
805 |
|
806 |
|
807 /** |
|
808 * Test whether there is meta data associated with the text. |
|
809 * @see Replaceable::hasMetaData() |
|
810 * |
|
811 * @param ut The UText to be tested |
|
812 * @return TRUE if the underlying text includes meta data. |
|
813 * @stable ICU 3.4 |
|
814 */ |
|
815 U_STABLE UBool U_EXPORT2 |
|
816 utext_hasMetaData(const UText *ut); |
|
817 |
|
818 |
|
819 /** |
|
820 * Replace a range of the original text with a replacement text. |
|
821 * |
|
822 * Leaves the current iteration position at the position following the |
|
823 * newly inserted replacement text. |
|
824 * |
|
825 * This function is only available on UText types that support writing, |
|
826 * that is, ones where utext_isWritable() returns TRUE. |
|
827 * |
|
828 * When using this function, there should be only a single UText opened onto the |
|
829 * underlying native text string. Behavior after a replace operation |
|
830 * on a UText is undefined for any other additional UTexts that refer to the |
|
831 * modified string. |
|
832 * |
|
833 * @param ut the UText representing the text to be operated on. |
|
834 * @param nativeStart the native index of the start of the region to be replaced |
|
835 * @param nativeLimit the native index of the character following the region to be replaced. |
|
836 * @param replacementText pointer to the replacement text |
|
837 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. |
|
838 * @param status receives any error status. Possible errors include |
|
839 * U_NO_WRITE_PERMISSION |
|
840 * |
|
841 * @return The signed number of (native) storage units by which |
|
842 * the length of the text expanded or contracted. |
|
843 * |
|
844 * @stable ICU 3.4 |
|
845 */ |
|
846 U_STABLE int32_t U_EXPORT2 |
|
847 utext_replace(UText *ut, |
|
848 int64_t nativeStart, int64_t nativeLimit, |
|
849 const UChar *replacementText, int32_t replacementLength, |
|
850 UErrorCode *status); |
|
851 |
|
852 |
|
853 |
|
854 /** |
|
855 * |
|
856 * Copy or move a substring from one position to another within the text, |
|
857 * while retaining any metadata associated with the text. |
|
858 * This function is used to duplicate or reorder substrings. |
|
859 * The destination index must not overlap the source range. |
|
860 * |
|
861 * The text to be copied or moved is inserted at destIndex; |
|
862 * it does not replace or overwrite any existing text. |
|
863 * |
|
864 * The iteration position is left following the newly inserted text |
|
865 * at the destination position. |
|
866 * |
|
867 * This function is only available on UText types that support writing, |
|
868 * that is, ones where utext_isWritable() returns TRUE. |
|
869 * |
|
870 * When using this function, there should be only a single UText opened onto the |
|
871 * underlying native text string. Behavior after a copy operation |
|
872 * on a UText is undefined in any other additional UTexts that refer to the |
|
873 * modified string. |
|
874 * |
|
875 * @param ut The UText representing the text to be operated on. |
|
876 * @param nativeStart The native index of the start of the region to be copied or moved |
|
877 * @param nativeLimit The native index of the character position following the region |
|
878 * to be copied. |
|
879 * @param destIndex The native destination index to which the source substring is |
|
880 * copied or moved. |
|
881 * @param move If TRUE, then the substring is moved, not copied/duplicated. |
|
882 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
|
883 * |
|
884 * @stable ICU 3.4 |
|
885 */ |
|
886 U_STABLE void U_EXPORT2 |
|
887 utext_copy(UText *ut, |
|
888 int64_t nativeStart, int64_t nativeLimit, |
|
889 int64_t destIndex, |
|
890 UBool move, |
|
891 UErrorCode *status); |
|
892 |
|
893 |
|
894 /** |
|
895 * <p> |
|
896 * Freeze a UText. This prevents any modification to the underlying text itself |
|
897 * by means of functions operating on this UText. |
|
898 * </p> |
|
899 * <p> |
|
900 * Once frozen, a UText can not be unfrozen. The intent is to ensure |
|
901 * that a the text underlying a frozen UText wrapper cannot be modified via that UText. |
|
902 * </p> |
|
903 * <p> |
|
904 * Caution: freezing a UText will disable changes made via the specific |
|
905 * frozen UText wrapper only; it will not have any effect on the ability to |
|
906 * directly modify the text by bypassing the UText. Any such backdoor modifications |
|
907 * are always an error while UText access is occuring because the underlying |
|
908 * text can get out of sync with UText's buffering. |
|
909 * </p> |
|
910 * |
|
911 * @param ut The UText to be frozen. |
|
912 * @see utext_isWritable() |
|
913 * @stable ICU 3.6 |
|
914 */ |
|
915 U_STABLE void U_EXPORT2 |
|
916 utext_freeze(UText *ut); |
|
917 |
|
918 |
|
919 /** |
|
920 * UText provider properties (bit field indexes). |
|
921 * |
|
922 * @see UText |
|
923 * @stable ICU 3.4 |
|
924 */ |
|
925 enum { |
|
926 /** |
|
927 * It is potentially time consuming for the provider to determine the length of the text. |
|
928 * @stable ICU 3.4 |
|
929 */ |
|
930 UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, |
|
931 /** |
|
932 * Text chunks remain valid and usable until the text object is modified or |
|
933 * deleted, not just until the next time the access() function is called |
|
934 * (which is the default). |
|
935 * @stable ICU 3.4 |
|
936 */ |
|
937 UTEXT_PROVIDER_STABLE_CHUNKS = 2, |
|
938 /** |
|
939 * The provider supports modifying the text via the replace() and copy() |
|
940 * functions. |
|
941 * @see Replaceable |
|
942 * @stable ICU 3.4 |
|
943 */ |
|
944 UTEXT_PROVIDER_WRITABLE = 3, |
|
945 /** |
|
946 * There is meta data associated with the text. |
|
947 * @see Replaceable::hasMetaData() |
|
948 * @stable ICU 3.4 |
|
949 */ |
|
950 UTEXT_PROVIDER_HAS_META_DATA = 4, |
|
951 /** |
|
952 * Text provider owns the text storage. |
|
953 * Generally occurs as the result of a deep clone of the UText. |
|
954 * When closing the UText, the associated text must |
|
955 * also be closed/deleted/freed/ whatever is appropriate. |
|
956 * @stable ICU 3.6 |
|
957 */ |
|
958 UTEXT_PROVIDER_OWNS_TEXT = 5 |
|
959 }; |
|
960 |
|
961 /** |
|
962 * Function type declaration for UText.clone(). |
|
963 * |
|
964 * clone a UText. Much like opening a UText where the source text is itself |
|
965 * another UText. |
|
966 * |
|
967 * A deep clone will copy both the UText data structures and the underlying text. |
|
968 * The original and cloned UText will operate completely independently; modifications |
|
969 * made to the text in one will not effect the other. Text providers are not |
|
970 * required to support deep clones. The user of clone() must check the status return |
|
971 * and be prepared to handle failures. |
|
972 * |
|
973 * A shallow clone replicates only the UText data structures; it does not make |
|
974 * a copy of the underlying text. Shallow clones can be used as an efficient way to |
|
975 * have multiple iterators active in a single text string that is not being |
|
976 * modified. |
|
977 * |
|
978 * A shallow clone operation must not fail except for truly exceptional conditions such |
|
979 * as memory allocation failures. |
|
980 * |
|
981 * A UText and its clone may be safely concurrently accessed by separate threads. |
|
982 * This is true for both shallow and deep clones. |
|
983 * It is the responsibility of the Text Provider to ensure that this thread safety |
|
984 * constraint is met. |
|
985 |
|
986 * |
|
987 * @param dest A UText struct to be filled in with the result of the clone operation, |
|
988 * or NULL if the clone function should heap-allocate a new UText struct. |
|
989 * @param src The UText to be cloned. |
|
990 * @param deep TRUE to request a deep clone, FALSE for a shallow clone. |
|
991 * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR |
|
992 * should be returned if the text provider is unable to clone the |
|
993 * original text. |
|
994 * @return The newly created clone, or NULL if the clone operation failed. |
|
995 * |
|
996 * @stable ICU 3.4 |
|
997 */ |
|
998 typedef UText * U_CALLCONV |
|
999 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); |
|
1000 |
|
1001 |
|
1002 /** |
|
1003 * Function type declaration for UText.nativeLength(). |
|
1004 * |
|
1005 * @param ut the UText to get the length of. |
|
1006 * @return the length, in the native units of the original text string. |
|
1007 * @see UText |
|
1008 * @stable ICU 3.4 |
|
1009 */ |
|
1010 typedef int64_t U_CALLCONV |
|
1011 UTextNativeLength(UText *ut); |
|
1012 |
|
1013 /** |
|
1014 * Function type declaration for UText.access(). Get the description of the text chunk |
|
1015 * containing the text at a requested native index. The UText's iteration |
|
1016 * position will be left at the requested index. If the index is out |
|
1017 * of bounds, the iteration position will be left at the start or end |
|
1018 * of the string, as appropriate. |
|
1019 * |
|
1020 * Chunks must begin and end on code point boundaries. A single code point |
|
1021 * comprised of multiple storage units must never span a chunk boundary. |
|
1022 * |
|
1023 * |
|
1024 * @param ut the UText being accessed. |
|
1025 * @param nativeIndex Requested index of the text to be accessed. |
|
1026 * @param forward If TRUE, then the returned chunk must contain text |
|
1027 * starting from the index, so that start<=index<limit. |
|
1028 * If FALSE, then the returned chunk must contain text |
|
1029 * before the index, so that start<index<=limit. |
|
1030 * @return True if the requested index could be accessed. The chunk |
|
1031 * will contain the requested text. |
|
1032 * False value if a chunk cannot be accessed |
|
1033 * (the requested index is out of bounds). |
|
1034 * |
|
1035 * @see UText |
|
1036 * @stable ICU 3.4 |
|
1037 */ |
|
1038 typedef UBool U_CALLCONV |
|
1039 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); |
|
1040 |
|
1041 /** |
|
1042 * Function type declaration for UText.extract(). |
|
1043 * |
|
1044 * Extract text from a UText into a UChar buffer. The range of text to be extracted |
|
1045 * is specified in the native indices of the UText provider. These may not necessarily |
|
1046 * be UTF-16 indices. |
|
1047 * <p> |
|
1048 * The size (number of 16 bit UChars) in the data to be extracted is returned. The |
|
1049 * full amount is returned, even when the specified buffer size is smaller. |
|
1050 * <p> |
|
1051 * The extracted string will (if you are a user) / must (if you are a text provider) |
|
1052 * be NUL-terminated if there is sufficient space in the destination buffer. |
|
1053 * |
|
1054 * @param ut the UText from which to extract data. |
|
1055 * @param nativeStart the native index of the first characer to extract. |
|
1056 * @param nativeLimit the native string index of the position following the last |
|
1057 * character to extract. |
|
1058 * @param dest the UChar (UTF-16) buffer into which the extracted text is placed |
|
1059 * @param destCapacity The size, in UChars, of the destination buffer. May be zero |
|
1060 * for precomputing the required size. |
|
1061 * @param status receives any error status. |
|
1062 * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for |
|
1063 * preflighting. |
|
1064 * @return Number of UChars in the data. Does not include a trailing NUL. |
|
1065 * |
|
1066 * @stable ICU 3.4 |
|
1067 */ |
|
1068 typedef int32_t U_CALLCONV |
|
1069 UTextExtract(UText *ut, |
|
1070 int64_t nativeStart, int64_t nativeLimit, |
|
1071 UChar *dest, int32_t destCapacity, |
|
1072 UErrorCode *status); |
|
1073 |
|
1074 /** |
|
1075 * Function type declaration for UText.replace(). |
|
1076 * |
|
1077 * Replace a range of the original text with a replacement text. |
|
1078 * |
|
1079 * Leaves the current iteration position at the position following the |
|
1080 * newly inserted replacement text. |
|
1081 * |
|
1082 * This function need only be implemented on UText types that support writing. |
|
1083 * |
|
1084 * When using this function, there should be only a single UText opened onto the |
|
1085 * underlying native text string. The function is responsible for updating the |
|
1086 * text chunk within the UText to reflect the updated iteration position, |
|
1087 * taking into account any changes to the underlying string's structure caused |
|
1088 * by the replace operation. |
|
1089 * |
|
1090 * @param ut the UText representing the text to be operated on. |
|
1091 * @param nativeStart the index of the start of the region to be replaced |
|
1092 * @param nativeLimit the index of the character following the region to be replaced. |
|
1093 * @param replacementText pointer to the replacement text |
|
1094 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. |
|
1095 * @param status receives any error status. Possible errors include |
|
1096 * U_NO_WRITE_PERMISSION |
|
1097 * |
|
1098 * @return The signed number of (native) storage units by which |
|
1099 * the length of the text expanded or contracted. |
|
1100 * |
|
1101 * @stable ICU 3.4 |
|
1102 */ |
|
1103 typedef int32_t U_CALLCONV |
|
1104 UTextReplace(UText *ut, |
|
1105 int64_t nativeStart, int64_t nativeLimit, |
|
1106 const UChar *replacementText, int32_t replacmentLength, |
|
1107 UErrorCode *status); |
|
1108 |
|
1109 /** |
|
1110 * Function type declaration for UText.copy(). |
|
1111 * |
|
1112 * Copy or move a substring from one position to another within the text, |
|
1113 * while retaining any metadata associated with the text. |
|
1114 * This function is used to duplicate or reorder substrings. |
|
1115 * The destination index must not overlap the source range. |
|
1116 * |
|
1117 * The text to be copied or moved is inserted at destIndex; |
|
1118 * it does not replace or overwrite any existing text. |
|
1119 * |
|
1120 * This function need only be implemented for UText types that support writing. |
|
1121 * |
|
1122 * When using this function, there should be only a single UText opened onto the |
|
1123 * underlying native text string. The function is responsible for updating the |
|
1124 * text chunk within the UText to reflect the updated iteration position, |
|
1125 * taking into account any changes to the underlying string's structure caused |
|
1126 * by the replace operation. |
|
1127 * |
|
1128 * @param ut The UText representing the text to be operated on. |
|
1129 * @param nativeStart The index of the start of the region to be copied or moved |
|
1130 * @param nativeLimit The index of the character following the region to be replaced. |
|
1131 * @param nativeDest The destination index to which the source substring is copied or moved. |
|
1132 * @param move If TRUE, then the substring is moved, not copied/duplicated. |
|
1133 * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION |
|
1134 * |
|
1135 * @stable ICU 3.4 |
|
1136 */ |
|
1137 typedef void U_CALLCONV |
|
1138 UTextCopy(UText *ut, |
|
1139 int64_t nativeStart, int64_t nativeLimit, |
|
1140 int64_t nativeDest, |
|
1141 UBool move, |
|
1142 UErrorCode *status); |
|
1143 |
|
1144 /** |
|
1145 * Function type declaration for UText.mapOffsetToNative(). |
|
1146 * Map from the current UChar offset within the current text chunk to |
|
1147 * the corresponding native index in the original source text. |
|
1148 * |
|
1149 * This is required only for text providers that do not use native UTF-16 indexes. |
|
1150 * |
|
1151 * @param ut the UText. |
|
1152 * @return Absolute (native) index corresponding to chunkOffset in the current chunk. |
|
1153 * The returned native index should always be to a code point boundary. |
|
1154 * |
|
1155 * @stable ICU 3.4 |
|
1156 */ |
|
1157 typedef int64_t U_CALLCONV |
|
1158 UTextMapOffsetToNative(const UText *ut); |
|
1159 |
|
1160 /** |
|
1161 * Function type declaration for UText.mapIndexToUTF16(). |
|
1162 * Map from a native index to a UChar offset within a text chunk. |
|
1163 * Behavior is undefined if the native index does not fall within the |
|
1164 * current chunk. |
|
1165 * |
|
1166 * This function is required only for text providers that do not use native UTF-16 indexes. |
|
1167 * |
|
1168 * @param ut The UText containing the text chunk. |
|
1169 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. |
|
1170 * @return Chunk-relative UTF-16 offset corresponding to the specified native |
|
1171 * index. |
|
1172 * |
|
1173 * @stable ICU 3.4 |
|
1174 */ |
|
1175 typedef int32_t U_CALLCONV |
|
1176 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); |
|
1177 |
|
1178 |
|
1179 /** |
|
1180 * Function type declaration for UText.utextClose(). |
|
1181 * |
|
1182 * A Text Provider close function is only required for provider types that make |
|
1183 * allocations in their open function (or other functions) that must be |
|
1184 * cleaned when the UText is closed. |
|
1185 * |
|
1186 * The allocation of the UText struct itself and any "extra" storage |
|
1187 * associated with the UText is handled by the common UText implementation |
|
1188 * and does not require provider specific cleanup in a close function. |
|
1189 * |
|
1190 * Most UText provider implementations do not need to implement this function. |
|
1191 * |
|
1192 * @param ut A UText object to be closed. |
|
1193 * |
|
1194 * @stable ICU 3.4 |
|
1195 */ |
|
1196 typedef void U_CALLCONV |
|
1197 UTextClose(UText *ut); |
|
1198 |
|
1199 |
|
1200 /** |
|
1201 * (public) Function dispatch table for UText. |
|
1202 * Conceptually very much like a C++ Virtual Function Table. |
|
1203 * This struct defines the organization of the table. |
|
1204 * Each text provider implementation must provide an |
|
1205 * actual table that is initialized with the appropriate functions |
|
1206 * for the type of text being handled. |
|
1207 * @stable ICU 3.6 |
|
1208 */ |
|
1209 struct UTextFuncs { |
|
1210 /** |
|
1211 * (public) Function table size, sizeof(UTextFuncs) |
|
1212 * Intended for use should the table grow to accomodate added |
|
1213 * functions in the future, to allow tests for older format |
|
1214 * function tables that do not contain the extensions. |
|
1215 * |
|
1216 * Fields are placed for optimal alignment on |
|
1217 * 32/64/128-bit-pointer machines, by normally grouping together |
|
1218 * 4 32-bit fields, |
|
1219 * 4 pointers, |
|
1220 * 2 64-bit fields |
|
1221 * in sequence. |
|
1222 * @stable ICU 3.6 |
|
1223 */ |
|
1224 int32_t tableSize; |
|
1225 |
|
1226 /** |
|
1227 * (private) Alignment padding. |
|
1228 * Do not use, reserved for use by the UText framework only. |
|
1229 * @internal |
|
1230 */ |
|
1231 int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; |
|
1232 |
|
1233 |
|
1234 /** |
|
1235 * (public) Function pointer for UTextClone |
|
1236 * |
|
1237 * @see UTextClone |
|
1238 * @stable ICU 3.6 |
|
1239 */ |
|
1240 UTextClone *clone; |
|
1241 |
|
1242 /** |
|
1243 * (public) function pointer for UTextLength |
|
1244 * May be expensive to compute! |
|
1245 * |
|
1246 * @see UTextLength |
|
1247 * @stable ICU 3.6 |
|
1248 */ |
|
1249 UTextNativeLength *nativeLength; |
|
1250 |
|
1251 /** |
|
1252 * (public) Function pointer for UTextAccess. |
|
1253 * |
|
1254 * @see UTextAccess |
|
1255 * @stable ICU 3.6 |
|
1256 */ |
|
1257 UTextAccess *access; |
|
1258 |
|
1259 /** |
|
1260 * (public) Function pointer for UTextExtract. |
|
1261 * |
|
1262 * @see UTextExtract |
|
1263 * @stable ICU 3.6 |
|
1264 */ |
|
1265 UTextExtract *extract; |
|
1266 |
|
1267 /** |
|
1268 * (public) Function pointer for UTextReplace. |
|
1269 * |
|
1270 * @see UTextReplace |
|
1271 * @stable ICU 3.6 |
|
1272 */ |
|
1273 UTextReplace *replace; |
|
1274 |
|
1275 /** |
|
1276 * (public) Function pointer for UTextCopy. |
|
1277 * |
|
1278 * @see UTextCopy |
|
1279 * @stable ICU 3.6 |
|
1280 */ |
|
1281 UTextCopy *copy; |
|
1282 |
|
1283 /** |
|
1284 * (public) Function pointer for UTextMapOffsetToNative. |
|
1285 * |
|
1286 * @see UTextMapOffsetToNative |
|
1287 * @stable ICU 3.6 |
|
1288 */ |
|
1289 UTextMapOffsetToNative *mapOffsetToNative; |
|
1290 |
|
1291 /** |
|
1292 * (public) Function pointer for UTextMapNativeIndexToUTF16. |
|
1293 * |
|
1294 * @see UTextMapNativeIndexToUTF16 |
|
1295 * @stable ICU 3.6 |
|
1296 */ |
|
1297 UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; |
|
1298 |
|
1299 /** |
|
1300 * (public) Function pointer for UTextClose. |
|
1301 * |
|
1302 * @see UTextClose |
|
1303 * @stable ICU 3.6 |
|
1304 */ |
|
1305 UTextClose *close; |
|
1306 |
|
1307 /** |
|
1308 * (private) Spare function pointer |
|
1309 * @internal |
|
1310 */ |
|
1311 UTextClose *spare1; |
|
1312 |
|
1313 /** |
|
1314 * (private) Spare function pointer |
|
1315 * @internal |
|
1316 */ |
|
1317 UTextClose *spare2; |
|
1318 |
|
1319 /** |
|
1320 * (private) Spare function pointer |
|
1321 * @internal |
|
1322 */ |
|
1323 UTextClose *spare3; |
|
1324 |
|
1325 }; |
|
1326 /** |
|
1327 * Function dispatch table for UText |
|
1328 * @see UTextFuncs |
|
1329 */ |
|
1330 typedef struct UTextFuncs UTextFuncs; |
|
1331 |
|
1332 /** |
|
1333 * UText struct. Provides the interface between the generic UText access code |
|
1334 * and the UText provider code that works on specific kinds of |
|
1335 * text (UTF-8, noncontiguous UTF-16, whatever.) |
|
1336 * |
|
1337 * Applications that are using predefined types of text providers |
|
1338 * to pass text data to ICU services will have no need to view the |
|
1339 * internals of the UText structs that they open. |
|
1340 * |
|
1341 * @stable ICU 3.6 |
|
1342 */ |
|
1343 struct UText { |
|
1344 /** |
|
1345 * (private) Magic. Used to help detect when UText functions are handed |
|
1346 * invalid or unitialized UText structs. |
|
1347 * utext_openXYZ() functions take an initialized, |
|
1348 * but not necessarily open, UText struct as an |
|
1349 * optional fill-in parameter. This magic field |
|
1350 * is used to check for that initialization. |
|
1351 * Text provider close functions must NOT clear |
|
1352 * the magic field because that would prevent |
|
1353 * reuse of the UText struct. |
|
1354 * @internal |
|
1355 */ |
|
1356 uint32_t magic; |
|
1357 |
|
1358 |
|
1359 /** |
|
1360 * (private) Flags for managing the allocation and freeing of |
|
1361 * memory associated with this UText. |
|
1362 * @internal |
|
1363 */ |
|
1364 int32_t flags; |
|
1365 |
|
1366 |
|
1367 /** |
|
1368 * Text provider properties. This set of flags is maintainted by the |
|
1369 * text provider implementation. |
|
1370 * @stable ICU 3.4 |
|
1371 */ |
|
1372 int32_t providerProperties; |
|
1373 |
|
1374 /** |
|
1375 * (public) sizeOfStruct=sizeof(UText) |
|
1376 * Allows possible backward compatible extension. |
|
1377 * |
|
1378 * @stable ICU 3.4 |
|
1379 */ |
|
1380 int32_t sizeOfStruct; |
|
1381 |
|
1382 /* ------ 16 byte alignment boundary ----------- */ |
|
1383 |
|
1384 |
|
1385 /** |
|
1386 * (protected) Native index of the first character position following |
|
1387 * the current chunk. |
|
1388 * @stable ICU 3.6 |
|
1389 */ |
|
1390 int64_t chunkNativeLimit; |
|
1391 |
|
1392 /** |
|
1393 * (protected) Size in bytes of the extra space (pExtra). |
|
1394 * @stable ICU 3.4 |
|
1395 */ |
|
1396 int32_t extraSize; |
|
1397 |
|
1398 /** |
|
1399 * (protected) The highest chunk offset where native indexing and |
|
1400 * chunk (UTF-16) indexing correspond. For UTF-16 sources, value |
|
1401 * will be equal to chunkLength. |
|
1402 * |
|
1403 * @stable ICU 3.6 |
|
1404 */ |
|
1405 int32_t nativeIndexingLimit; |
|
1406 |
|
1407 /* ---- 16 byte alignment boundary------ */ |
|
1408 |
|
1409 /** |
|
1410 * (protected) Native index of the first character in the text chunk. |
|
1411 * @stable ICU 3.6 |
|
1412 */ |
|
1413 int64_t chunkNativeStart; |
|
1414 |
|
1415 /** |
|
1416 * (protected) Current iteration position within the text chunk (UTF-16 buffer). |
|
1417 * This is the index to the character that will be returned by utext_next32(). |
|
1418 * @stable ICU 3.6 |
|
1419 */ |
|
1420 int32_t chunkOffset; |
|
1421 |
|
1422 /** |
|
1423 * (protected) Length the text chunk (UTF-16 buffer), in UChars. |
|
1424 * @stable ICU 3.6 |
|
1425 */ |
|
1426 int32_t chunkLength; |
|
1427 |
|
1428 /* ---- 16 byte alignment boundary-- */ |
|
1429 |
|
1430 |
|
1431 /** |
|
1432 * (protected) pointer to a chunk of text in UTF-16 format. |
|
1433 * May refer either to original storage of the source of the text, or |
|
1434 * if conversion was required, to a buffer owned by the UText. |
|
1435 * @stable ICU 3.6 |
|
1436 */ |
|
1437 const UChar *chunkContents; |
|
1438 |
|
1439 /** |
|
1440 * (public) Pointer to Dispatch table for accessing functions for this UText. |
|
1441 * @stable ICU 3.6 |
|
1442 */ |
|
1443 const UTextFuncs *pFuncs; |
|
1444 |
|
1445 /** |
|
1446 * (protected) Pointer to additional space requested by the |
|
1447 * text provider during the utext_open operation. |
|
1448 * @stable ICU 3.4 |
|
1449 */ |
|
1450 void *pExtra; |
|
1451 |
|
1452 /** |
|
1453 * (protected) Pointer to string or text-containin object or similar. |
|
1454 * This is the source of the text that this UText is wrapping, in a format |
|
1455 * that is known to the text provider functions. |
|
1456 * @stable ICU 3.4 |
|
1457 */ |
|
1458 const void *context; |
|
1459 |
|
1460 /* --- 16 byte alignment boundary--- */ |
|
1461 |
|
1462 /** |
|
1463 * (protected) Pointer fields available for use by the text provider. |
|
1464 * Not used by UText common code. |
|
1465 * @stable ICU 3.6 |
|
1466 */ |
|
1467 const void *p; |
|
1468 /** |
|
1469 * (protected) Pointer fields available for use by the text provider. |
|
1470 * Not used by UText common code. |
|
1471 * @stable ICU 3.6 |
|
1472 */ |
|
1473 const void *q; |
|
1474 /** |
|
1475 * (protected) Pointer fields available for use by the text provider. |
|
1476 * Not used by UText common code. |
|
1477 * @stable ICU 3.6 |
|
1478 */ |
|
1479 const void *r; |
|
1480 |
|
1481 /** |
|
1482 * Private field reserved for future use by the UText framework |
|
1483 * itself. This is not to be touched by the text providers. |
|
1484 * @internal ICU 3.4 |
|
1485 */ |
|
1486 void *privP; |
|
1487 |
|
1488 |
|
1489 /* --- 16 byte alignment boundary--- */ |
|
1490 |
|
1491 |
|
1492 /** |
|
1493 * (protected) Integer field reserved for use by the text provider. |
|
1494 * Not used by the UText framework, or by the client (user) of the UText. |
|
1495 * @stable ICU 3.4 |
|
1496 */ |
|
1497 int64_t a; |
|
1498 |
|
1499 /** |
|
1500 * (protected) Integer field reserved for use by the text provider. |
|
1501 * Not used by the UText framework, or by the client (user) of the UText. |
|
1502 * @stable ICU 3.4 |
|
1503 */ |
|
1504 int32_t b; |
|
1505 |
|
1506 /** |
|
1507 * (protected) Integer field reserved for use by the text provider. |
|
1508 * Not used by the UText framework, or by the client (user) of the UText. |
|
1509 * @stable ICU 3.4 |
|
1510 */ |
|
1511 int32_t c; |
|
1512 |
|
1513 /* ---- 16 byte alignment boundary---- */ |
|
1514 |
|
1515 |
|
1516 /** |
|
1517 * Private field reserved for future use by the UText framework |
|
1518 * itself. This is not to be touched by the text providers. |
|
1519 * @internal ICU 3.4 |
|
1520 */ |
|
1521 int64_t privA; |
|
1522 /** |
|
1523 * Private field reserved for future use by the UText framework |
|
1524 * itself. This is not to be touched by the text providers. |
|
1525 * @internal ICU 3.4 |
|
1526 */ |
|
1527 int32_t privB; |
|
1528 /** |
|
1529 * Private field reserved for future use by the UText framework |
|
1530 * itself. This is not to be touched by the text providers. |
|
1531 * @internal ICU 3.4 |
|
1532 */ |
|
1533 int32_t privC; |
|
1534 }; |
|
1535 |
|
1536 |
|
1537 /** |
|
1538 * Common function for use by Text Provider implementations to allocate and/or initialize |
|
1539 * a new UText struct. To be called in the implementation of utext_open() functions. |
|
1540 * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. |
|
1541 * If the supplied UText is already open, the provider's close function will be called |
|
1542 * so that the struct can be reused by the open that is in progress. |
|
1543 * |
|
1544 * @param ut pointer to a UText struct to be re-used, or null if a new UText |
|
1545 * should be allocated. |
|
1546 * @param extraSpace The amount of additional space to be allocated as part |
|
1547 * of this UText, for use by types of providers that require |
|
1548 * additional storage. |
|
1549 * @param status Errors are returned here. |
|
1550 * @return pointer to the UText, allocated if necessary, with extra space set up if requested. |
|
1551 * @stable ICU 3.4 |
|
1552 */ |
|
1553 U_STABLE UText * U_EXPORT2 |
|
1554 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); |
|
1555 |
|
1556 #ifndef U_HIDE_INTERNAL_API |
|
1557 /** |
|
1558 * @internal |
|
1559 * Value used to help identify correctly initialized UText structs. |
|
1560 * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. |
|
1561 */ |
|
1562 enum { |
|
1563 UTEXT_MAGIC = 0x345ad82c |
|
1564 }; |
|
1565 #endif /* U_HIDE_INTERNAL_API */ |
|
1566 |
|
1567 /** |
|
1568 * initializer to be used with local (stack) instances of a UText |
|
1569 * struct. UText structs must be initialized before passing |
|
1570 * them to one of the utext_open functions. |
|
1571 * |
|
1572 * @stable ICU 3.6 |
|
1573 */ |
|
1574 #define UTEXT_INITIALIZER { \ |
|
1575 UTEXT_MAGIC, /* magic */ \ |
|
1576 0, /* flags */ \ |
|
1577 0, /* providerProps */ \ |
|
1578 sizeof(UText), /* sizeOfStruct */ \ |
|
1579 0, /* chunkNativeLimit */ \ |
|
1580 0, /* extraSize */ \ |
|
1581 0, /* nativeIndexingLimit */ \ |
|
1582 0, /* chunkNativeStart */ \ |
|
1583 0, /* chunkOffset */ \ |
|
1584 0, /* chunkLength */ \ |
|
1585 NULL, /* chunkContents */ \ |
|
1586 NULL, /* pFuncs */ \ |
|
1587 NULL, /* pExtra */ \ |
|
1588 NULL, /* context */ \ |
|
1589 NULL, NULL, NULL, /* p, q, r */ \ |
|
1590 NULL, /* privP */ \ |
|
1591 0, 0, 0, /* a, b, c */ \ |
|
1592 0, 0, 0 /* privA,B,C, */ \ |
|
1593 } |
|
1594 |
|
1595 |
|
1596 U_CDECL_END |
|
1597 |
|
1598 |
|
1599 |
|
1600 #endif |