intl/icu/source/common/unicode/utext.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2004-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  utext.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2004oct06
    14 *   created by: Markus W. Scherer
    15 */
    17 #ifndef __UTEXT_H__
    18 #define __UTEXT_H__
    20 /**
    21  * \file
    22  * \brief C API: Abstract Unicode Text API
    23  *
    24  * The Text Access API provides a means to allow text that is stored in alternative
    25  * formats to work with ICU services.  ICU normally operates on text that is
    26  * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
    27  * UnicodeString for C++ APIs.
    28  *
    29  * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
    30  * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
    31  *
    32  * There are three general classes of usage for UText:
    33  *
    34  *     Application Level Use.  This is the simplest usage - applications would
    35  *     use one of the utext_open() functions on their input text, and pass
    36  *     the resulting UText to the desired ICU service.
    37  *
    38  *     Second is usage in ICU Services, such as break iteration, that will need to
    39  *     operate on input presented to them as a UText.  These implementations
    40  *     will need to use the iteration and related UText functions to gain
    41  *     access to the actual text.
    42  *
    43  *     The third class of UText users are "text providers."  These are the
    44  *     UText implementations for the various text storage formats.  An application
    45  *     or system with a unique text storage format can implement a set of
    46  *     UText provider functions for that format, which will then allow
    47  *     ICU services to operate on that format.
    48  *
    49  *
    50  * <em>Iterating over text</em>
    51  *
    52  * Here is sample code for a forward iteration over the contents of a UText
    53  *
    54  * \code
    55  *    UChar32  c;
    56  *    UText    *ut = whatever();
    57  *
    58  *    for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
    59  *       // do whatever with the codepoint c here.
    60  *    }
    61  * \endcode
    62  *
    63  * And here is similar code to iterate in the reverse direction, from the end
    64  * of the text towards the beginning.
    65  *
    66  * \code
    67  *    UChar32  c;
    68  *    UText    *ut = whatever();
    69  *    int      textLength = utext_nativeLength(ut);
    70  *    for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
    71  *       // do whatever with the codepoint c here.
    72  *    }
    73  * \endcode
    74  *
    75  * <em>Characters and Indexing</em>
    76  *
    77  * Indexing into text by UText functions is nearly always in terms of the native
    78  * indexing of the underlying text storage.  The storage format could be UTF-8
    79  * or UTF-32, for example.  When coding to the UText access API, no assumptions
    80  * can be made regarding the size of characters, or how far an index
    81  * may move when iterating between characters.
    82  *
    83  * All indices supplied to UText functions are pinned to the length of the
    84  * text.  An out-of-bounds index is not considered to be an error, but is
    85  * adjusted to be in the range  0 <= index <= length of input text.
    86  *
    87  *
    88  * When an index position is returned from a UText function, it will be
    89  * a native index to the underlying text.  In the case of multi-unit characters,
    90  * it will  always refer to the first position of the character,
    91  * never to the interior.  This is essentially the same thing as saying that
    92  * a returned index will always point to a boundary between characters.
    93  *
    94  * When a native index is supplied to a UText function, all indices that
    95  * refer to any part of a multi-unit character representation are considered
    96  * to be equivalent.  In the case of multi-unit characters, an incoming index
    97  * will be logically normalized to refer to the start of the character.
    98  * 
    99  * It is possible to test whether a native index is on a code point boundary
   100  * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
   101  * If the index is returned unchanged, it was on a code point boundary.  If
   102  * an adjusted index is returned, the original index referred to the
   103  * interior of a character.
   104  *
   105  * <em>Conventions for calling UText functions</em>
   106  *
   107  * Most UText access functions have as their first parameter a (UText *) pointer,
   108  * which specifies the UText to be used.  Unless otherwise noted, the
   109  * pointer must refer to a valid, open UText.  Attempting to
   110  * use a closed UText or passing a NULL pointer is a programming error and
   111  * will produce undefined results or NULL pointer exceptions.
   112  * 
   113  * The UText_Open family of functions can either open an existing (closed)
   114  * UText, or heap allocate a new UText.  Here is sample code for creating
   115  * a stack-allocated UText.
   116  *
   117  * \code
   118  *    char     *s = whatever();  // A utf-8 string 
   119  *    U_ErrorCode status = U_ZERO_ERROR;
   120  *    UText    ut = UTEXT_INITIALIZER;
   121  *    utext_openUTF8(ut, s, -1, &status);
   122  *    if (U_FAILURE(status)) {
   123  *        // error handling
   124  *    } else {
   125  *        // work with the UText
   126  *    }
   127  * \endcode
   128  *
   129  * Any existing UText passed to an open function _must_ have been initialized, 
   130  * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
   131  * by an open function.  Passing NULL will cause the open function to
   132  * heap-allocate and fully initialize a new UText.
   133  *
   134  */
   138 #include "unicode/utypes.h"
   139 #include "unicode/uchar.h"
   140 #if U_SHOW_CPLUSPLUS_API
   141 #include "unicode/localpointer.h"
   142 #include "unicode/rep.h"
   143 #include "unicode/unistr.h"
   144 #include "unicode/chariter.h"
   145 #endif
   148 U_CDECL_BEGIN
   150 struct UText;
   151 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
   154 /***************************************************************************************
   155  *
   156  *   C Functions for creating UText wrappers around various kinds of text strings.
   157  *
   158  ****************************************************************************************/
   161 /**
   162   * Close function for UText instances.
   163   * Cleans up, releases any resources being held by an open UText.
   164   * <p>
   165   *   If the UText was originally allocated by one of the utext_open functions,
   166   *   the storage associated with the utext will also be freed.
   167   *   If the UText storage originated with the application, as it would with
   168   *   a local or static instance, the storage will not be deleted.
   169   *
   170   *   An open UText can be reset to refer to new string by using one of the utext_open()
   171   *   functions without first closing the UText.  
   172   *
   173   * @param ut  The UText to be closed.
   174   * @return    NULL if the UText struct was deleted by the close.  If the UText struct
   175   *            was originally provided by the caller to the open function, it is
   176   *            returned by this function, and may be safely used again in
   177   *            a subsequent utext_open.
   178   *
   179   * @stable ICU 3.4
   180   */
   181 U_STABLE UText * U_EXPORT2
   182 utext_close(UText *ut);
   184 #if U_SHOW_CPLUSPLUS_API
   186 U_NAMESPACE_BEGIN
   188 /**
   189  * \class LocalUTextPointer
   190  * "Smart pointer" class, closes a UText via utext_close().
   191  * For most methods see the LocalPointerBase base class.
   192  *
   193  * @see LocalPointerBase
   194  * @see LocalPointer
   195  * @stable ICU 4.4
   196  */
   197 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
   199 U_NAMESPACE_END
   201 #endif
   203 /**
   204  * Open a read-only UText implementation for UTF-8 strings.
   205  * 
   206  * \htmlonly
   207  * Any invalid UTF-8 in the input will be handled in this way:
   208  * a sequence of bytes that has the form of a truncated, but otherwise valid,
   209  * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 
   210  * Any other illegal bytes will each be replaced by a \uFFFD.
   211  * \endhtmlonly
   212  * 
   213  * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
   214  *               If non-NULL, must refer to an initialized UText struct, which will then
   215  *               be reset to reference the specified UTF-8 string.
   216  * @param s      A UTF-8 string.  Must not be NULL.
   217  * @param length The length of the UTF-8 string in bytes, or -1 if the string is
   218  *               zero terminated.
   219  * @param status Errors are returned here.
   220  * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
   221  *               will always be used and returned.
   222  * @stable ICU 3.4
   223  */
   224 U_STABLE UText * U_EXPORT2
   225 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
   228 /**
   229  * Open a read-only UText for UChar * string.
   230  * 
   231  * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
   232  *               If non-NULL, must refer to an initialized UText struct, which will then
   233  *               be reset to reference the specified UChar string.
   234  * @param s      A UChar (UTF-16) string
   235  * @param length The number of UChars in the input string, or -1 if the string is
   236  *               zero terminated.
   237  * @param status Errors are returned here.
   238  * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
   239  *               will always be used and returned.
   240  * @stable ICU 3.4
   241  */
   242 U_STABLE UText * U_EXPORT2
   243 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
   246 #if U_SHOW_CPLUSPLUS_API
   247 /**
   248  * Open a writable UText for a non-const UnicodeString. 
   249  * 
   250  * @param ut      Pointer to a UText struct.  If NULL, a new UText will be created.
   251  *                 If non-NULL, must refer to an initialized UText struct, which will then
   252  *                 be reset to reference the specified input string.
   253  * @param s       A UnicodeString.
   254  * @param status Errors are returned here.
   255  * @return        Pointer to the UText.  If a UText was supplied as input, this
   256  *                 will always be used and returned.
   257  * @stable ICU 3.4
   258  */
   259 U_STABLE UText * U_EXPORT2
   260 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
   263 /**
   264  * Open a UText for a const UnicodeString.   The resulting UText will not be writable.
   265  * 
   266  * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
   267  *               If non-NULL, must refer to an initialized UText struct, which will then
   268  *               be reset to reference the specified input string.
   269  * @param s      A const UnicodeString to be wrapped.
   270  * @param status Errors are returned here.
   271  * @return       Pointer to the UText.  If a UText was supplied as input, this
   272  *               will always be used and returned.
   273  * @stable ICU 3.4
   274  */
   275 U_STABLE UText * U_EXPORT2
   276 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
   279 /**
   280  * Open a writable UText implementation for an ICU Replaceable object.
   281  * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
   282  *               If non-NULL, must refer to an already existing UText, which will then
   283  *               be reset to reference the specified replaceable text.
   284  * @param rep    A Replaceable text object.
   285  * @param status Errors are returned here.
   286  * @return       Pointer to the UText.  If a UText was supplied as input, this
   287  *               will always be used and returned.
   288  * @see Replaceable
   289  * @stable ICU 3.4
   290  */
   291 U_STABLE UText * U_EXPORT2
   292 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
   294 /**
   295  * Open a  UText implementation over an ICU CharacterIterator.
   296  * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
   297  *               If non-NULL, must refer to an already existing UText, which will then
   298  *               be reset to reference the specified replaceable text.
   299  * @param ci     A Character Iterator.
   300  * @param status Errors are returned here.
   301  * @return       Pointer to the UText.  If a UText was supplied as input, this
   302  *               will always be used and returned.
   303  * @see Replaceable
   304  * @stable ICU 3.4
   305  */
   306 U_STABLE UText * U_EXPORT2
   307 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
   309 #endif
   312 /**
   313   *  Clone a UText.  This is much like opening a UText where the source text is itself
   314   *  another UText.
   315   *
   316   *  A deep clone will copy both the UText data structures and the underlying text.
   317   *  The original and cloned UText will operate completely independently; modifications
   318   *  made to the text in one will not affect the other.  Text providers are not
   319   *  required to support deep clones.  The user of clone() must check the status return
   320   *  and be prepared to handle failures.
   321   *
   322   *  The standard UText implementations for UTF8, UChar *, UnicodeString and
   323   *  Replaceable all support deep cloning.
   324   *
   325   *  The UText returned from a deep clone will be writable, assuming that the text
   326   *  provider is able to support writing, even if the source UText had been made
   327   *  non-writable by means of UText_freeze().
   328   *
   329   *  A shallow clone replicates only the UText data structures; it does not make
   330   *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
   331   *  have multiple iterators active in a single text string that is not being
   332   *  modified.
   333   *
   334   *  A shallow clone operation will not fail, barring truly exceptional conditions such
   335   *  as memory allocation failures.
   336   *
   337   *  Shallow UText clones should be avoided if the UText functions that modify the
   338   *  text are expected to be used, either on the original or the cloned UText.
   339   *  Any such modifications  can cause unpredictable behavior.  Read Only
   340   *  shallow clones provide some protection against errors of this type by
   341   *  disabling text modification via the cloned UText.
   342   *
   343   *  A shallow clone made with the readOnly parameter == FALSE will preserve the 
   344   *  utext_isWritable() state of the source object.  Note, however, that
   345   *  write operations must be avoided while more than one UText exists that refer
   346   *  to the same underlying text.
   347   *
   348   *  A UText and its clone may be safely concurrently accessed by separate threads.
   349   *  This is true for read access only with shallow clones, and for both read and
   350   *  write access with deep clones.
   351   *  It is the responsibility of the Text Provider to ensure that this thread safety
   352   *  constraint is met.
   353   *
   354   *  @param dest   A UText struct to be filled in with the result of the clone operation,
   355   *                or NULL if the clone function should heap-allocate a new UText struct.
   356   *                If non-NULL, must refer to an already existing UText, which will then
   357   *                be reset to become the clone.
   358   *  @param src    The UText to be cloned.
   359   *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
   360   *  @param readOnly TRUE to request that the cloned UText have read only access to the 
   361   *                underlying text.  
   363   *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
   364   *                will be returned if the text provider is unable to clone the
   365   *                original text.
   366   *  @return       The newly created clone, or NULL if the clone operation failed.
   367   *  @stable ICU 3.4
   368   */
   369 U_STABLE UText * U_EXPORT2
   370 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
   373 /**
   374   *  Compare two UText objects for equality.
   375   *  UTexts are equal if they are iterating over the same text, and
   376   *    have the same iteration position within the text.
   377   *    If either or both of the parameters are NULL, the comparison is FALSE.
   378   *
   379   *  @param a   The first of the two UTexts to compare.
   380   *  @param b   The other UText to be compared.
   381   *  @return    TRUE if the two UTexts are equal.
   382   *  @stable ICU 3.6
   383   */
   384 U_STABLE UBool U_EXPORT2
   385 utext_equals(const UText *a, const UText *b);
   388 /*****************************************************************************
   389  *
   390  *   Functions to work with the text represeted by a UText wrapper
   391  *
   392  *****************************************************************************/
   394 /**
   395   * Get the length of the text.  Depending on the characteristics
   396   * of the underlying text representation, this may be expensive.  
   397   * @see  utext_isLengthExpensive()
   398   *
   399   *
   400   * @param ut  the text to be accessed.
   401   * @return the length of the text, expressed in native units.
   402   *
   403   * @stable ICU 3.4
   404   */
   405 U_STABLE int64_t U_EXPORT2
   406 utext_nativeLength(UText *ut);
   408 /**
   409  *  Return TRUE if calculating the length of the text could be expensive.
   410  *  Finding the length of NUL terminated strings is considered to be expensive.
   411  *
   412  *  Note that the value of this function may change
   413  *  as the result of other operations on a UText.
   414  *  Once the length of a string has been discovered, it will no longer
   415  *  be expensive to report it.
   416  *
   417  * @param ut the text to be accessed.
   418  * @return TRUE if determining the length of the text could be time consuming.
   419  * @stable ICU 3.4
   420  */
   421 U_STABLE UBool U_EXPORT2
   422 utext_isLengthExpensive(const UText *ut);
   424 /**
   425  * Returns the code point at the requested index,
   426  * or U_SENTINEL (-1) if it is out of bounds.
   427  *
   428  * If the specified index points to the interior of a multi-unit
   429  * character - one of the trail bytes of a UTF-8 sequence, for example -
   430  * the complete code point will be returned.
   431  *
   432  * The iteration position will be set to the start of the returned code point.
   433  *
   434  * This function is roughly equivalent to the the sequence
   435  *    utext_setNativeIndex(index);
   436  *    utext_current32();
   437  * (There is a subtle difference if the index is out of bounds by being less than zero - 
   438  * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
   439  * will return the char at zero.  utext_char32At(negative index), on the other hand, will
   440  * return the U_SENTINEL value of -1.)
   441  * 
   442  * @param ut the text to be accessed
   443  * @param nativeIndex the native index of the character to be accessed.  If the index points
   444  *        to other than the first unit of a multi-unit character, it will be adjusted
   445  *        to the start of the character.
   446  * @return the code point at the specified index.
   447  * @stable ICU 3.4
   448  */
   449 U_STABLE UChar32 U_EXPORT2
   450 utext_char32At(UText *ut, int64_t nativeIndex);
   453 /**
   454  *
   455  * Get the code point at the current iteration position,
   456  * or U_SENTINEL (-1) if the iteration has reached the end of
   457  * the input text.
   458  *
   459  * @param ut the text to be accessed.
   460  * @return the Unicode code point at the current iterator position.
   461  * @stable ICU 3.4
   462  */
   463 U_STABLE UChar32 U_EXPORT2
   464 utext_current32(UText *ut);
   467 /**
   468  * Get the code point at the current iteration position of the UText, and
   469  * advance the position to the first index following the character.
   470  *
   471  * If the position is at the end of the text (the index following
   472  * the last character, which is also the length of the text), 
   473  * return U_SENTINEL (-1) and do not advance the index. 
   474  *
   475  * This is a post-increment operation.
   476  *
   477  * An inline macro version of this function, UTEXT_NEXT32(), 
   478  * is available for performance critical use.
   479  *
   480  * @param ut the text to be accessed.
   481  * @return the Unicode code point at the iteration position.
   482  * @see UTEXT_NEXT32
   483  * @stable ICU 3.4
   484  */
   485 U_STABLE UChar32 U_EXPORT2
   486 utext_next32(UText *ut);
   489 /**
   490  *  Move the iterator position to the character (code point) whose
   491  *  index precedes the current position, and return that character.
   492  *  This is a pre-decrement operation.
   493  *
   494  *  If the initial position is at the start of the text (index of 0) 
   495  *  return U_SENTINEL (-1), and leave the position unchanged.
   496  *
   497  *  An inline macro version of this function, UTEXT_PREVIOUS32(), 
   498  *  is available for performance critical use.
   499  *
   500  *  @param ut the text to be accessed.
   501  *  @return the previous UChar32 code point, or U_SENTINEL (-1) 
   502  *          if the iteration has reached the start of the text.
   503  *  @see UTEXT_PREVIOUS32
   504  *  @stable ICU 3.4
   505  */
   506 U_STABLE UChar32 U_EXPORT2
   507 utext_previous32(UText *ut);
   510 /**
   511   * Set the iteration index and return the code point at that index. 
   512   * Leave the iteration index at the start of the following code point.
   513   *
   514   * This function is the most efficient and convenient way to
   515   * begin a forward iteration.  The results are identical to the those
   516   * from the sequence
   517   * \code
   518   *    utext_setIndex();
   519   *    utext_next32();
   520   * \endcode
   521   *
   522   *  @param ut the text to be accessed.
   523   *  @param nativeIndex Iteration index, in the native units of the text provider.
   524   *  @return Code point which starts at or before index,
   525   *         or U_SENTINEL (-1) if it is out of bounds.
   526   * @stable ICU 3.4
   527   */
   528 U_STABLE UChar32 U_EXPORT2
   529 utext_next32From(UText *ut, int64_t nativeIndex);
   533 /**
   534   * Set the iteration index, and return the code point preceding the
   535   * one specified by the initial index.  Leave the iteration position
   536   * at the start of the returned code point.
   537   *
   538   * This function is the most efficient and convenient way to
   539   * begin a backwards iteration.
   540   *
   541   * @param ut the text to be accessed.
   542   * @param nativeIndex Iteration index in the native units of the text provider.
   543   * @return Code point preceding the one at the initial index,
   544   *         or U_SENTINEL (-1) if it is out of bounds.
   545   *
   546   * @stable ICU 3.4
   547   */
   548 U_STABLE UChar32 U_EXPORT2
   549 utext_previous32From(UText *ut, int64_t nativeIndex);
   551 /**
   552   * Get the current iterator position, which can range from 0 to 
   553   * the length of the text.
   554   * The position is a native index into the input text, in whatever format it
   555   * may have (possibly UTF-8 for example), and may not always be the same as
   556   * the corresponding UChar (UTF-16) index.
   557   * The returned position will always be aligned to a code point boundary. 
   558   *
   559   * @param ut the text to be accessed.
   560   * @return the current index position, in the native units of the text provider.
   561   * @stable ICU 3.4
   562   */
   563 U_STABLE int64_t U_EXPORT2
   564 utext_getNativeIndex(const UText *ut);
   566 /**
   567  * Set the current iteration position to the nearest code point
   568  * boundary at or preceding the specified index.
   569  * The index is in the native units of the original input text.
   570  * If the index is out of range, it will be pinned to be within
   571  * the range of the input text.
   572  * <p>
   573  * It will usually be more efficient to begin an iteration
   574  * using the functions utext_next32From() or utext_previous32From()
   575  * rather than setIndex().
   576  * <p>
   577  * Moving the index position to an adjacent character is best done
   578  * with utext_next32(), utext_previous32() or utext_moveIndex32().
   579  * Attempting to do direct arithmetic on the index position is
   580  * complicated by the fact that the size (in native units) of a
   581  * character depends on the underlying representation of the character
   582  * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
   583  * easily knowable.
   584  *
   585  * @param ut the text to be accessed.
   586  * @param nativeIndex the native unit index of the new iteration position.
   587  * @stable ICU 3.4
   588  */
   589 U_STABLE void U_EXPORT2
   590 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
   592 /**
   593  * Move the iterator postion by delta code points.  The number of code points
   594  * is a signed number; a negative delta will move the iterator backwards,
   595  * towards the start of the text.
   596  * <p>
   597  * The index is moved by <code>delta</code> code points
   598  * forward or backward, but no further backward than to 0 and
   599  * no further forward than to utext_nativeLength().
   600  * The resulting index value will be in between 0 and length, inclusive.
   601  *
   602  * @param ut the text to be accessed.
   603  * @param delta the signed number of code points to move the iteration position.
   604  * @return TRUE if the position could be moved the requested number of positions while
   605  *              staying within the range [0 - text length].
   606  * @stable ICU 3.4
   607  */
   608 U_STABLE UBool U_EXPORT2
   609 utext_moveIndex32(UText *ut, int32_t delta);
   611 /**
   612  * Get the native index of the character preceeding the current position.
   613  * If the iteration position is already at the start of the text, zero
   614  * is returned.
   615  * The value returned is the same as that obtained from the following sequence,
   616  * but without the side effect of changing the iteration position.
   617  *   
   618  * \code
   619  *    UText  *ut = whatever;
   620  *      ...
   621  *    utext_previous(ut)
   622  *    utext_getNativeIndex(ut);
   623  * \endcode
   624  *
   625  * This function is most useful during forwards iteration, where it will get the
   626  *   native index of the character most recently returned from utext_next().
   627  *
   628  * @param ut the text to be accessed
   629  * @return the native index of the character preceeding the current index position,
   630  *         or zero if the current position is at the start of the text.
   631  * @stable ICU 3.6
   632  */
   633 U_STABLE int64_t U_EXPORT2
   634 utext_getPreviousNativeIndex(UText *ut); 
   637 /**
   638  *
   639  * Extract text from a UText into a UChar buffer.  The range of text to be extracted
   640  * is specified in the native indices of the UText provider.  These may not necessarily
   641  * be UTF-16 indices.
   642  * <p>
   643  * The size (number of 16 bit UChars) of the data to be extracted is returned.  The
   644  * full number of UChars is returned, even when the extracted text is truncated
   645  * because the specified buffer size is too small.
   646  * <p>
   647  * The extracted string will (if you are a user) / must (if you are a text provider)
   648  * be NUL-terminated if there is sufficient space in the destination buffer.  This
   649  * terminating NUL is not included in the returned length.
   650  * <p>
   651  * The iteration index is left at the position following the last extracted character.
   652  *
   653  * @param  ut    the UText from which to extract data.
   654  * @param  nativeStart the native index of the first character to extract.\
   655  *               If the specified index is out of range,
   656  *               it will be pinned to to be within 0 <= index <= textLength
   657  * @param  nativeLimit the native string index of the position following the last
   658  *               character to extract.  If the specified index is out of range,
   659  *               it will be pinned to to be within 0 <= index <= textLength.
   660  *               nativeLimit must be >= nativeStart.
   661  * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
   662  * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
   663  *               for precomputing the required size.
   664  * @param  status receives any error status.
   665  *         U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 
   666  *         buffer was too small.  Returns number of UChars for preflighting.
   667  * @return Number of UChars in the data to be extracted.  Does not include a trailing NUL.
   668  *
   669  * @stable ICU 3.4
   670  */
   671 U_STABLE int32_t U_EXPORT2
   672 utext_extract(UText *ut,
   673              int64_t nativeStart, int64_t nativeLimit,
   674              UChar *dest, int32_t destCapacity,
   675              UErrorCode *status);
   679 /************************************************************************************
   680  *
   681  *  #define inline versions of selected performance-critical text access functions
   682  *          Caution:  do not use auto increment++ or decrement-- expressions
   683  *                    as parameters to these macros.
   684  *
   685  *          For most use, where there is no extreme performance constraint, the
   686  *          normal, non-inline functions are a better choice.  The resulting code
   687  *          will be smaller, and, if the need ever arises, easier to debug.
   688  *
   689  *          These are implemented as #defines rather than real functions
   690  *          because there is no fully portable way to do inline functions in plain C.
   691  *
   692  ************************************************************************************/
   694 #ifndef U_HIDE_INTERNAL_API
   695 /**
   696  * inline version of utext_current32(), for performance-critical situations.
   697  *
   698  * Get the code point at the current iteration position of the UText.
   699  * Returns U_SENTINEL (-1) if the position is at the end of the
   700  * text.
   701  *
   702  * @internal ICU 4.4 technology preview
   703  */
   704 #define UTEXT_CURRENT32(ut)  \
   705     ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
   706     ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
   707 #endif  /* U_HIDE_INTERNAL_API */
   709 /**
   710  * inline version of utext_next32(), for performance-critical situations.
   711  *
   712  * Get the code point at the current iteration position of the UText, and
   713  * advance the position to the first index following the character.
   714  * This is a post-increment operation.
   715  * Returns U_SENTINEL (-1) if the position is at the end of the
   716  * text.
   717  *
   718  * @stable ICU 3.4
   719  */
   720 #define UTEXT_NEXT32(ut)  \
   721     ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
   722     ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
   724 /**
   725  * inline version of utext_previous32(), for performance-critical situations.
   726  *
   727  *  Move the iterator position to the character (code point) whose
   728  *  index precedes the current position, and return that character.
   729  *  This is a pre-decrement operation.
   730  *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
   731  *
   732  * @stable ICU 3.4
   733  */
   734 #define UTEXT_PREVIOUS32(ut)  \
   735     ((ut)->chunkOffset > 0 && \
   736      (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
   737           (ut)->chunkContents[--((ut)->chunkOffset)]  :  utext_previous32(ut))
   739 /**
   740   *  inline version of utext_getNativeIndex(), for performance-critical situations.
   741   *
   742   * Get the current iterator position, which can range from 0 to 
   743   * the length of the text.
   744   * The position is a native index into the input text, in whatever format it
   745   * may have (possibly UTF-8 for example), and may not always be the same as
   746   * the corresponding UChar (UTF-16) index.
   747   * The returned position will always be aligned to a code point boundary. 
   748   *
   749   * @stable ICU 3.6
   750   */
   751 #define UTEXT_GETNATIVEINDEX(ut)                       \
   752     ((ut)->chunkOffset <= (ut)->nativeIndexingLimit?   \
   753         (ut)->chunkNativeStart+(ut)->chunkOffset :     \
   754         (ut)->pFuncs->mapOffsetToNative(ut))    
   756 /**
   757   *  inline version of utext_setNativeIndex(), for performance-critical situations.
   758   *
   759   * Set the current iteration position to the nearest code point
   760   * boundary at or preceding the specified index.
   761   * The index is in the native units of the original input text.
   762   * If the index is out of range, it will be pinned to be within
   763   * the range of the input text.
   764   *
   765   * @stable ICU 3.8
   766   */
   767 #define UTEXT_SETNATIVEINDEX(ut, ix)                       \
   768     { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
   769       if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
   770           (ut)->chunkOffset=(int32_t)__offset; \
   771       } else { \
   772           utext_setNativeIndex((ut), (ix)); } }
   776 /************************************************************************************
   777  *
   778  *   Functions related to writing or modifying the text.
   779  *   These will work only with modifiable UTexts.  Attempting to
   780  *   modify a read-only UText will return an error status.
   781  *
   782  ************************************************************************************/
   785 /**
   786  *  Return TRUE if the text can be written (modified) with utext_replace() or
   787  *  utext_copy().  For the text to be writable, the text provider must
   788  *  be of a type that supports writing and the UText must not be frozen.
   789  *
   790  *  Attempting to modify text when utext_isWriteable() is FALSE will fail -
   791  *  the text will not be modified, and an error will be returned from the function
   792  *  that attempted the modification.
   793  *
   794  * @param  ut   the UText to be tested.
   795  * @return TRUE if the text is modifiable.
   796  *
   797  * @see    utext_freeze()
   798  * @see    utext_replace()
   799  * @see    utext_copy()
   800  * @stable ICU 3.4
   801  *
   802  */
   803 U_STABLE UBool U_EXPORT2
   804 utext_isWritable(const UText *ut);
   807 /**
   808   * Test whether there is meta data associated with the text.
   809   * @see Replaceable::hasMetaData()
   810   *
   811   * @param ut The UText to be tested
   812   * @return TRUE if the underlying text includes meta data.
   813   * @stable ICU 3.4
   814   */
   815 U_STABLE UBool U_EXPORT2
   816 utext_hasMetaData(const UText *ut);
   819 /**
   820  * Replace a range of the original text with a replacement text.
   821  *
   822  * Leaves the current iteration position at the position following the
   823  *  newly inserted replacement text.
   824  *
   825  * This function is only available on UText types that support writing,
   826  * that is, ones where utext_isWritable() returns TRUE.
   827  *
   828  * When using this function, there should be only a single UText opened onto the
   829  * underlying native text string.  Behavior after a replace operation
   830  * on a UText is undefined for any other additional UTexts that refer to the
   831  * modified string.
   832  *
   833  * @param ut               the UText representing the text to be operated on.
   834  * @param nativeStart      the native index of the start of the region to be replaced
   835  * @param nativeLimit      the native index of the character following the region to be replaced.
   836  * @param replacementText  pointer to the replacement text
   837  * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
   838  * @param status           receives any error status.  Possible errors include
   839  *                         U_NO_WRITE_PERMISSION
   840  *
   841  * @return The signed number of (native) storage units by which
   842  *         the length of the text expanded or contracted.
   843  *
   844  * @stable ICU 3.4
   845  */
   846 U_STABLE int32_t U_EXPORT2
   847 utext_replace(UText *ut,
   848              int64_t nativeStart, int64_t nativeLimit,
   849              const UChar *replacementText, int32_t replacementLength,
   850              UErrorCode *status);
   854 /**
   855  *
   856  * Copy or move a substring from one position to another within the text,
   857  * while retaining any metadata associated with the text.
   858  * This function is used to duplicate or reorder substrings.
   859  * The destination index must not overlap the source range.
   860  *
   861  * The text to be copied or moved is inserted at destIndex;
   862  * it does not replace or overwrite any existing text.
   863  *
   864  * The iteration position is left following the newly inserted text
   865  * at the destination position.
   866  *
   867  * This function is only available on UText types that support writing,
   868  * that is, ones where utext_isWritable() returns TRUE.
   869  *
   870  * When using this function, there should be only a single UText opened onto the
   871  * underlying native text string.  Behavior after a copy operation
   872  * on a UText is undefined in any other additional UTexts that refer to the
   873  * modified string.
   874  *
   875  * @param ut           The UText representing the text to be operated on.
   876  * @param nativeStart  The native index of the start of the region to be copied or moved
   877  * @param nativeLimit  The native index of the character position following the region
   878  *                     to be copied.
   879  * @param destIndex    The native destination index to which the source substring is
   880  *                     copied or moved.
   881  * @param move         If TRUE, then the substring is moved, not copied/duplicated.
   882  * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
   883  *                       
   884  * @stable ICU 3.4
   885  */
   886 U_STABLE void U_EXPORT2
   887 utext_copy(UText *ut,
   888           int64_t nativeStart, int64_t nativeLimit,
   889           int64_t destIndex,
   890           UBool move,
   891           UErrorCode *status);
   894 /**
   895   *  <p>
   896   *  Freeze a UText.  This prevents any modification to the underlying text itself
   897   *  by means of functions operating on this UText.
   898   *  </p>
   899   *  <p>
   900   *  Once frozen, a UText can not be unfrozen.  The intent is to ensure
   901   *  that a the text underlying a frozen UText wrapper cannot be modified via that UText.
   902   *  </p>
   903   *  <p>
   904   *  Caution:  freezing a UText will disable changes made via the specific
   905   *   frozen UText wrapper only; it will not have any effect on the ability to
   906   *   directly modify the text by bypassing the UText.  Any such backdoor modifications
   907   *   are always an error while UText access is occuring because the underlying
   908   *   text can get out of sync with UText's buffering.
   909   *  </p>
   910   *
   911   *  @param ut  The UText to be frozen.
   912   *  @see   utext_isWritable()
   913   *  @stable ICU 3.6
   914   */
   915 U_STABLE void U_EXPORT2
   916 utext_freeze(UText *ut);
   919 /**
   920  * UText provider properties (bit field indexes).
   921  *
   922  * @see UText
   923  * @stable ICU 3.4
   924  */
   925 enum {
   926     /**
   927      * It is potentially time consuming for the provider to determine the length of the text.
   928      * @stable ICU 3.4
   929      */
   930     UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
   931     /**
   932      * Text chunks remain valid and usable until the text object is modified or
   933      * deleted, not just until the next time the access() function is called
   934      * (which is the default).
   935      * @stable ICU 3.4
   936      */
   937     UTEXT_PROVIDER_STABLE_CHUNKS = 2,
   938     /**
   939      * The provider supports modifying the text via the replace() and copy()
   940      * functions.
   941      * @see Replaceable
   942      * @stable ICU 3.4
   943      */
   944     UTEXT_PROVIDER_WRITABLE = 3,
   945     /**
   946      * There is meta data associated with the text.
   947      * @see Replaceable::hasMetaData()
   948      * @stable ICU 3.4
   949      */ 
   950     UTEXT_PROVIDER_HAS_META_DATA = 4,
   951     /**
   952      * Text provider owns the text storage.
   953      *  Generally occurs as the result of a deep clone of the UText.
   954      *  When closing the UText, the associated text must
   955      *  also be closed/deleted/freed/ whatever is appropriate.
   956      * @stable ICU 3.6
   957      */
   958      UTEXT_PROVIDER_OWNS_TEXT = 5
   959 };
   961 /**
   962   * Function type declaration for UText.clone().
   963   *
   964   *  clone a UText.  Much like opening a UText where the source text is itself
   965   *  another UText.
   966   *
   967   *  A deep clone will copy both the UText data structures and the underlying text.
   968   *  The original and cloned UText will operate completely independently; modifications
   969   *  made to the text in one will not effect the other.  Text providers are not
   970   *  required to support deep clones.  The user of clone() must check the status return
   971   *  and be prepared to handle failures.
   972   *
   973   *  A shallow clone replicates only the UText data structures; it does not make
   974   *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
   975   *  have multiple iterators active in a single text string that is not being
   976   *  modified.
   977   *
   978   *  A shallow clone operation must not fail except for truly exceptional conditions such
   979   *  as memory allocation failures.
   980   *
   981   *  A UText and its clone may be safely concurrently accessed by separate threads.
   982   *  This is true for both shallow and deep clones.
   983   *  It is the responsibility of the Text Provider to ensure that this thread safety
   984   *  constraint is met.
   986   *
   987   *  @param dest   A UText struct to be filled in with the result of the clone operation,
   988   *                or NULL if the clone function should heap-allocate a new UText struct.
   989   *  @param src    The UText to be cloned.
   990   *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
   991   *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
   992   *                should be returned if the text provider is unable to clone the
   993   *                original text.
   994   *  @return       The newly created clone, or NULL if the clone operation failed.
   995   *
   996   * @stable ICU 3.4
   997   */
   998 typedef UText * U_CALLCONV
   999 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
  1002 /**
  1003  * Function type declaration for UText.nativeLength().
  1005  * @param ut the UText to get the length of.
  1006  * @return the length, in the native units of the original text string.
  1007  * @see UText
  1008  * @stable ICU 3.4
  1009  */
  1010 typedef int64_t U_CALLCONV
  1011 UTextNativeLength(UText *ut);
  1013 /**
  1014  * Function type declaration for UText.access().  Get the description of the text chunk
  1015  *  containing the text at a requested native index.  The UText's iteration
  1016  *  position will be left at the requested index.  If the index is out
  1017  *  of bounds, the iteration position will be left at the start or end
  1018  *  of the string, as appropriate.
  1020  *  Chunks must begin and end on code point boundaries.  A single code point
  1021  *  comprised of multiple storage units must never span a chunk boundary.
  1024  * @param ut          the UText being accessed.
  1025  * @param nativeIndex Requested index of the text to be accessed.
  1026  * @param forward     If TRUE, then the returned chunk must contain text
  1027  *                    starting from the index, so that start<=index<limit.
  1028  *                    If FALSE, then the returned chunk must contain text
  1029  *                    before the index, so that start<index<=limit.
  1030  * @return            True if the requested index could be accessed.  The chunk
  1031  *                    will contain the requested text.
  1032  *                    False value if a chunk cannot be accessed
  1033  *                    (the requested index is out of bounds).
  1035  * @see UText
  1036  * @stable ICU 3.4
  1037  */
  1038 typedef UBool U_CALLCONV
  1039 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
  1041 /**
  1042  * Function type declaration for UText.extract().
  1044  * Extract text from a UText into a UChar buffer.  The range of text to be extracted
  1045  * is specified in the native indices of the UText provider.  These may not necessarily
  1046  * be UTF-16 indices.
  1047  * <p>
  1048  * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
  1049  * full amount is returned, even when the specified buffer size is smaller.
  1050  * <p>
  1051  * The extracted string will (if you are a user) / must (if you are a text provider)
  1052  * be NUL-terminated if there is sufficient space in the destination buffer.
  1054  * @param  ut            the UText from which to extract data.
  1055  * @param  nativeStart   the native index of the first characer to extract.
  1056  * @param  nativeLimit   the native string index of the position following the last
  1057  *                       character to extract.
  1058  * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
  1059  * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
  1060  *                       for precomputing the required size.
  1061  * @param  status        receives any error status.
  1062  *                       If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
  1063  *                       preflighting.
  1064  * @return Number of UChars in the data.  Does not include a trailing NUL.
  1066  * @stable ICU 3.4
  1067  */
  1068 typedef int32_t U_CALLCONV
  1069 UTextExtract(UText *ut,
  1070              int64_t nativeStart, int64_t nativeLimit,
  1071              UChar *dest, int32_t destCapacity,
  1072              UErrorCode *status);
  1074 /**
  1075  * Function type declaration for UText.replace().
  1077  * Replace a range of the original text with a replacement text.
  1079  * Leaves the current iteration position at the position following the
  1080  *  newly inserted replacement text.
  1082  * This function need only be implemented on UText types that support writing.
  1084  * When using this function, there should be only a single UText opened onto the
  1085  * underlying native text string.  The function is responsible for updating the
  1086  * text chunk within the UText to reflect the updated iteration position,
  1087  * taking into account any changes to the underlying string's structure caused
  1088  * by the replace operation.
  1090  * @param ut               the UText representing the text to be operated on.
  1091  * @param nativeStart      the index of the start of the region to be replaced
  1092  * @param nativeLimit      the index of the character following the region to be replaced.
  1093  * @param replacementText  pointer to the replacement text
  1094  * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
  1095  * @param status           receives any error status.  Possible errors include
  1096  *                         U_NO_WRITE_PERMISSION
  1098  * @return The signed number of (native) storage units by which
  1099  *         the length of the text expanded or contracted.
  1101  * @stable ICU 3.4
  1102  */
  1103 typedef int32_t U_CALLCONV
  1104 UTextReplace(UText *ut,
  1105              int64_t nativeStart, int64_t nativeLimit,
  1106              const UChar *replacementText, int32_t replacmentLength,
  1107              UErrorCode *status);
  1109 /**
  1110  * Function type declaration for UText.copy().
  1112  * Copy or move a substring from one position to another within the text,
  1113  * while retaining any metadata associated with the text.
  1114  * This function is used to duplicate or reorder substrings.
  1115  * The destination index must not overlap the source range.
  1117  * The text to be copied or moved is inserted at destIndex;
  1118  * it does not replace or overwrite any existing text.
  1120  * This function need only be implemented for UText types that support writing.
  1122  * When using this function, there should be only a single UText opened onto the
  1123  * underlying native text string.  The function is responsible for updating the
  1124  * text chunk within the UText to reflect the updated iteration position,
  1125  * taking into account any changes to the underlying string's structure caused
  1126  * by the replace operation.
  1128  * @param ut           The UText representing the text to be operated on.
  1129  * @param nativeStart  The index of the start of the region to be copied or moved
  1130  * @param nativeLimit  The index of the character following the region to be replaced.
  1131  * @param nativeDest   The destination index to which the source substring is copied or moved.
  1132  * @param move         If TRUE, then the substring is moved, not copied/duplicated.
  1133  * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
  1135  * @stable ICU 3.4
  1136  */
  1137 typedef void U_CALLCONV
  1138 UTextCopy(UText *ut,
  1139           int64_t nativeStart, int64_t nativeLimit,
  1140           int64_t nativeDest,
  1141           UBool move,
  1142           UErrorCode *status);
  1144 /**
  1145  * Function type declaration for UText.mapOffsetToNative().
  1146  * Map from the current UChar offset within the current text chunk to
  1147  *  the corresponding native index in the original source text.
  1149  * This is required only for text providers that do not use native UTF-16 indexes.
  1151  * @param ut     the UText.
  1152  * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
  1153  *         The returned native index should always be to a code point boundary.
  1155  * @stable ICU 3.4
  1156  */
  1157 typedef int64_t U_CALLCONV
  1158 UTextMapOffsetToNative(const UText *ut);
  1160 /**
  1161  * Function type declaration for UText.mapIndexToUTF16().
  1162  * Map from a native index to a UChar offset within a text chunk.
  1163  * Behavior is undefined if the native index does not fall within the
  1164  *   current chunk.
  1166  * This function is required only for text providers that do not use native UTF-16 indexes.
  1168  * @param ut          The UText containing the text chunk.
  1169  * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
  1170  * @return            Chunk-relative UTF-16 offset corresponding to the specified native
  1171  *                    index.
  1173  * @stable ICU 3.4
  1174  */
  1175 typedef int32_t U_CALLCONV
  1176 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
  1179 /**
  1180  * Function type declaration for UText.utextClose().
  1182  * A Text Provider close function is only required for provider types that make
  1183  *  allocations in their open function (or other functions) that must be 
  1184  *  cleaned when the UText is closed.
  1186  * The allocation of the UText struct itself and any "extra" storage
  1187  * associated with the UText is handled by the common UText implementation
  1188  * and does not require provider specific cleanup in a close function.
  1190  * Most UText provider implementations do not need to implement this function.
  1192  * @param ut A UText object to be closed.
  1194  * @stable ICU 3.4
  1195  */
  1196 typedef void U_CALLCONV
  1197 UTextClose(UText *ut);
  1200 /**
  1201   *   (public)  Function dispatch table for UText.
  1202   *             Conceptually very much like a C++ Virtual Function Table.
  1203   *             This struct defines the organization of the table.
  1204   *             Each text provider implementation must provide an
  1205   *              actual table that is initialized with the appropriate functions
  1206   *              for the type of text being handled.
  1207   *   @stable ICU 3.6
  1208   */
  1209 struct UTextFuncs {
  1210     /**
  1211      *   (public)  Function table size, sizeof(UTextFuncs)
  1212      *             Intended for use should the table grow to accomodate added
  1213      *             functions in the future, to allow tests for older format
  1214      *             function tables that do not contain the extensions.
  1216      *             Fields are placed for optimal alignment on
  1217      *             32/64/128-bit-pointer machines, by normally grouping together
  1218      *             4 32-bit fields,
  1219      *             4 pointers,
  1220      *             2 64-bit fields
  1221      *             in sequence.
  1222      *   @stable ICU 3.6
  1223      */
  1224     int32_t       tableSize;
  1226     /**
  1227       *   (private)  Alignment padding.
  1228       *              Do not use, reserved for use by the UText framework only.
  1229       *   @internal
  1230       */
  1231     int32_t       reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
  1234     /**
  1235      * (public) Function pointer for UTextClone
  1237      * @see UTextClone
  1238      * @stable ICU 3.6
  1239      */
  1240     UTextClone *clone;
  1242     /**
  1243      * (public) function pointer for UTextLength
  1244      * May be expensive to compute!
  1246      * @see UTextLength
  1247      * @stable ICU 3.6
  1248      */
  1249     UTextNativeLength *nativeLength;
  1251     /**
  1252      * (public) Function pointer for UTextAccess.
  1254      * @see UTextAccess
  1255      * @stable ICU 3.6
  1256      */
  1257     UTextAccess *access;
  1259     /**
  1260      * (public) Function pointer for UTextExtract.
  1262      * @see UTextExtract
  1263      * @stable ICU 3.6
  1264      */
  1265     UTextExtract *extract;
  1267     /**
  1268      * (public) Function pointer for UTextReplace.
  1270      * @see UTextReplace
  1271      * @stable ICU 3.6
  1272      */
  1273     UTextReplace *replace;
  1275     /**
  1276      * (public) Function pointer for UTextCopy.
  1278      * @see UTextCopy
  1279      * @stable ICU 3.6
  1280      */
  1281     UTextCopy *copy;
  1283     /**
  1284      * (public) Function pointer for UTextMapOffsetToNative.
  1286      * @see UTextMapOffsetToNative
  1287      * @stable ICU 3.6
  1288      */
  1289     UTextMapOffsetToNative *mapOffsetToNative;
  1291     /**
  1292      * (public) Function pointer for UTextMapNativeIndexToUTF16.
  1294      * @see UTextMapNativeIndexToUTF16
  1295      * @stable ICU 3.6
  1296      */
  1297     UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
  1299     /**
  1300      * (public) Function pointer for UTextClose.
  1302       * @see UTextClose
  1303       * @stable ICU 3.6
  1304       */
  1305     UTextClose  *close;
  1307     /**
  1308       * (private)  Spare function pointer
  1309       * @internal
  1310       */
  1311     UTextClose  *spare1;
  1313     /**
  1314       * (private)  Spare function pointer
  1315       * @internal
  1316       */
  1317     UTextClose  *spare2;
  1319     /**
  1320       * (private)  Spare function pointer
  1321       * @internal
  1322       */
  1323     UTextClose  *spare3;
  1325 };
  1326 /**
  1327  * Function dispatch table for UText
  1328  * @see UTextFuncs
  1329  */
  1330 typedef struct UTextFuncs UTextFuncs;
  1332  /**
  1333   *   UText struct.  Provides the interface between the generic UText access code
  1334   *                  and the UText provider code that works on specific kinds of
  1335   *                  text  (UTF-8, noncontiguous UTF-16, whatever.)
  1337   *                  Applications that are using predefined types of text providers
  1338   *                  to pass text data to ICU services will have no need to view the
  1339   *                  internals of the UText structs that they open.
  1341   * @stable ICU 3.6
  1342   */
  1343 struct UText {
  1344     /**
  1345      *     (private)  Magic.  Used to help detect when UText functions are handed
  1346      *                        invalid or unitialized UText structs.
  1347      *                        utext_openXYZ() functions take an initialized,
  1348      *                        but not necessarily open, UText struct as an
  1349      *                        optional fill-in parameter.  This magic field
  1350      *                        is used to check for that initialization.
  1351      *                        Text provider close functions must NOT clear
  1352      *                        the magic field because that would prevent
  1353      *                        reuse of the UText struct.
  1354      * @internal
  1355      */
  1356     uint32_t       magic;
  1359     /**
  1360      *     (private)  Flags for managing the allocation and freeing of
  1361      *                memory associated with this UText.
  1362      * @internal
  1363      */
  1364     int32_t        flags;
  1367     /**
  1368       *  Text provider properties.  This set of flags is maintainted by the
  1369       *                             text provider implementation.
  1370       *  @stable ICU 3.4
  1371       */
  1372     int32_t         providerProperties;
  1374     /**
  1375      * (public) sizeOfStruct=sizeof(UText)
  1376      * Allows possible backward compatible extension.
  1378      * @stable ICU 3.4
  1379      */
  1380     int32_t         sizeOfStruct;
  1382     /* ------ 16 byte alignment boundary -----------  */
  1385     /**
  1386       *  (protected) Native index of the first character position following
  1387       *              the current chunk.
  1388       *  @stable ICU 3.6
  1389       */
  1390     int64_t         chunkNativeLimit;
  1392     /**
  1393      *   (protected)  Size in bytes of the extra space (pExtra).
  1394      *  @stable ICU 3.4
  1395      */
  1396     int32_t        extraSize;
  1398     /**
  1399       *    (protected) The highest chunk offset where native indexing and
  1400       *    chunk (UTF-16) indexing correspond.  For UTF-16 sources, value
  1401       *    will be equal to chunkLength.
  1403       *    @stable ICU 3.6
  1404       */
  1405     int32_t         nativeIndexingLimit;
  1407     /* ---- 16 byte alignment boundary------ */
  1409     /**
  1410      *  (protected) Native index of the first character in the text chunk.
  1411      *  @stable ICU 3.6
  1412      */
  1413     int64_t         chunkNativeStart;
  1415     /**
  1416      *  (protected) Current iteration position within the text chunk (UTF-16 buffer).
  1417      *  This is the index to the character that will be returned by utext_next32().
  1418      *  @stable ICU 3.6
  1419      */
  1420     int32_t         chunkOffset;
  1422     /**
  1423      *  (protected) Length the text chunk (UTF-16 buffer), in UChars.
  1424      *  @stable ICU 3.6
  1425      */
  1426     int32_t         chunkLength;
  1428     /* ---- 16  byte alignment boundary-- */
  1431     /**
  1432      *  (protected)  pointer to a chunk of text in UTF-16 format.
  1433      *  May refer either to original storage of the source of the text, or
  1434      *  if conversion was required, to a buffer owned by the UText.
  1435      *  @stable ICU 3.6
  1436      */
  1437     const UChar    *chunkContents;
  1439      /**
  1440       * (public)     Pointer to Dispatch table for accessing functions for this UText.
  1441       * @stable ICU 3.6
  1442       */
  1443     const UTextFuncs     *pFuncs;
  1445     /**
  1446      *  (protected)  Pointer to additional space requested by the
  1447      *               text provider during the utext_open operation.
  1448      * @stable ICU 3.4
  1449      */
  1450     void          *pExtra;
  1452     /**
  1453      * (protected) Pointer to string or text-containin object or similar.
  1454      * This is the source of the text that this UText is wrapping, in a format
  1455      *  that is known to the text provider functions.
  1456      * @stable ICU 3.4
  1457      */
  1458     const void   *context;
  1460     /* --- 16 byte alignment boundary--- */
  1462     /**
  1463      * (protected) Pointer fields available for use by the text provider.
  1464      * Not used by UText common code.
  1465      * @stable ICU 3.6
  1466      */
  1467     const void     *p; 
  1468     /**
  1469      * (protected) Pointer fields available for use by the text provider.
  1470      * Not used by UText common code.
  1471      * @stable ICU 3.6
  1472      */
  1473     const void     *q;
  1474      /**
  1475      * (protected) Pointer fields available for use by the text provider.
  1476      * Not used by UText common code.
  1477      * @stable ICU 3.6
  1478       */
  1479     const void     *r;
  1481     /**
  1482       *  Private field reserved for future use by the UText framework
  1483       *     itself.  This is not to be touched by the text providers.
  1484       * @internal ICU 3.4
  1485       */
  1486     void           *privP;
  1489     /* --- 16 byte alignment boundary--- */
  1492     /**
  1493       * (protected) Integer field reserved for use by the text provider.
  1494       * Not used by the UText framework, or by the client (user) of the UText.
  1495       * @stable ICU 3.4
  1496       */
  1497     int64_t         a;
  1499     /**
  1500       * (protected) Integer field reserved for use by the text provider.
  1501       * Not used by the UText framework, or by the client (user) of the UText.
  1502       * @stable ICU 3.4
  1503       */
  1504     int32_t         b;
  1506     /**
  1507       * (protected) Integer field reserved for use by the text provider.
  1508       * Not used by the UText framework, or by the client (user) of the UText.
  1509       * @stable ICU 3.4
  1510       */
  1511     int32_t         c;
  1513     /*  ---- 16 byte alignment boundary---- */
  1516     /**
  1517       *  Private field reserved for future use by the UText framework
  1518       *     itself.  This is not to be touched by the text providers.
  1519       * @internal ICU 3.4
  1520       */
  1521     int64_t         privA;
  1522     /**
  1523       *  Private field reserved for future use by the UText framework
  1524       *     itself.  This is not to be touched by the text providers.
  1525       * @internal ICU 3.4
  1526       */
  1527     int32_t         privB;
  1528     /**
  1529       *  Private field reserved for future use by the UText framework
  1530       *     itself.  This is not to be touched by the text providers.
  1531       * @internal ICU 3.4
  1532       */
  1533     int32_t         privC;
  1534 };
  1537 /**
  1538  *  Common function for use by Text Provider implementations to allocate and/or initialize
  1539  *  a new UText struct.  To be called in the implementation of utext_open() functions.
  1540  *  If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
  1541  *  If the supplied UText is already open, the provider's close function will be called
  1542  *  so that the struct can be reused by the open that is in progress.
  1544  * @param ut   pointer to a UText struct to be re-used, or null if a new UText
  1545  *             should be allocated.
  1546  * @param extraSpace The amount of additional space to be allocated as part
  1547  *             of this UText, for use by types of providers that require
  1548  *             additional storage.
  1549  * @param status Errors are returned here.
  1550  * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
  1551  * @stable ICU 3.4
  1552  */
  1553 U_STABLE UText * U_EXPORT2
  1554 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
  1556 #ifndef U_HIDE_INTERNAL_API
  1557 /**
  1558   * @internal
  1559   *  Value used to help identify correctly initialized UText structs.
  1560   *  Note:  must be publicly visible so that UTEXT_INITIALIZER can access it.
  1561   */
  1562 enum {
  1563     UTEXT_MAGIC = 0x345ad82c
  1564 };
  1565 #endif  /* U_HIDE_INTERNAL_API */
  1567 /**
  1568  * initializer to be used with local (stack) instances of a UText
  1569  *  struct.  UText structs must be initialized before passing
  1570  *  them to one of the utext_open functions.
  1572  * @stable ICU 3.6
  1573  */
  1574 #define UTEXT_INITIALIZER {                                        \
  1575                   UTEXT_MAGIC,          /* magic                */ \
  1576                   0,                    /* flags                */ \
  1577                   0,                    /* providerProps        */ \
  1578                   sizeof(UText),        /* sizeOfStruct         */ \
  1579                   0,                    /* chunkNativeLimit     */ \
  1580                   0,                    /* extraSize            */ \
  1581                   0,                    /* nativeIndexingLimit  */ \
  1582                   0,                    /* chunkNativeStart     */ \
  1583                   0,                    /* chunkOffset          */ \
  1584                   0,                    /* chunkLength          */ \
  1585                   NULL,                 /* chunkContents        */ \
  1586                   NULL,                 /* pFuncs               */ \
  1587                   NULL,                 /* pExtra               */ \
  1588                   NULL,                 /* context              */ \
  1589                   NULL, NULL, NULL,     /* p, q, r              */ \
  1590                   NULL,                 /* privP                */ \
  1591                   0, 0, 0,              /* a, b, c              */ \
  1592                   0, 0, 0               /* privA,B,C,           */ \
  1596 U_CDECL_END
  1600 #endif

mercurial