intl/icu/source/common/utrie2.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/utrie2.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,988 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2001-2013, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  utrie2.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2008aug16 (starting from a copy of utrie.h)
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __UTRIE2_H__
    1.21 +#define __UTRIE2_H__
    1.22 +
    1.23 +#include "unicode/utypes.h"
    1.24 +#include "putilimp.h"
    1.25 +#include "udataswp.h"
    1.26 +
    1.27 +U_CDECL_BEGIN
    1.28 +
    1.29 +struct UTrie;  /* forward declaration */
    1.30 +#ifndef __UTRIE_H__
    1.31 +typedef struct UTrie UTrie;
    1.32 +#endif
    1.33 +
    1.34 +/**
    1.35 + * \file
    1.36 + *
    1.37 + * This is a common implementation of a Unicode trie.
    1.38 + * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
    1.39 + * Unicode code points (0..0x10ffff). (A map from code points to integers.)
    1.40 + *
    1.41 + * This is the second common version of a Unicode trie (hence the name UTrie2).
    1.42 + * Compared with UTrie version 1:
    1.43 + * - Still splitting BMP code points 11:5 bits for index and data table lookups.
    1.44 + * - Still separate data for lead surrogate code _units_ vs. code _points_,
    1.45 + *   but the lead surrogate code unit values are not required any more
    1.46 + *   for data lookup for supplementary code points.
    1.47 + * - The "folding" mechanism is removed. In UTrie version 1, this somewhat
    1.48 + *   hard-to-explain mechanism was meant to be used for optimized UTF-16
    1.49 + *   processing, with application-specific encoding of indexing bits
    1.50 + *   in the lead surrogate data for the associated supplementary code points.
    1.51 + * - For the last single-value code point range (ending with U+10ffff),
    1.52 + *   the starting code point ("highStart") and the value are stored.
    1.53 + * - For supplementary code points U+10000..highStart-1 a three-table lookup
    1.54 + *   (two index tables and one data table) is used. The first index
    1.55 + *   is truncated, omitting both the BMP portion and the high range.
    1.56 + * - There is a special small index for 2-byte UTF-8, and the initial data
    1.57 + *   entries are designed for fast 1/2-byte UTF-8 lookup.
    1.58 + */
    1.59 +
    1.60 +/**
    1.61 + * Trie structure.
    1.62 + * Use only with public API macros and functions.
    1.63 + */
    1.64 +struct UTrie2;
    1.65 +typedef struct UTrie2 UTrie2;
    1.66 +
    1.67 +/* Public UTrie2 API functions: read-only access ---------------------------- */
    1.68 +
    1.69 +/**
    1.70 + * Selectors for the width of a UTrie2 data value.
    1.71 + */
    1.72 +enum UTrie2ValueBits {
    1.73 +    /** 16 bits per UTrie2 data value. */
    1.74 +    UTRIE2_16_VALUE_BITS,
    1.75 +    /** 32 bits per UTrie2 data value. */
    1.76 +    UTRIE2_32_VALUE_BITS,
    1.77 +    /** Number of selectors for the width of UTrie2 data values. */
    1.78 +    UTRIE2_COUNT_VALUE_BITS
    1.79 +};
    1.80 +typedef enum UTrie2ValueBits UTrie2ValueBits;
    1.81 +
    1.82 +/**
    1.83 + * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory.
    1.84 + * Inverse of utrie2_serialize().
    1.85 + * The memory must remain valid and unchanged as long as the trie is used.
    1.86 + * You must utrie2_close() the trie once you are done using it.
    1.87 + *
    1.88 + * @param valueBits selects the data entry size; results in an
    1.89 + *                  U_INVALID_FORMAT_ERROR if it does not match the serialized form
    1.90 + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2
    1.91 + * @param length the number of bytes available at data;
    1.92 + *               can be more than necessary
    1.93 + * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
    1.94 + *                      can be NULL
    1.95 + * @param pErrorCode an in/out ICU UErrorCode
    1.96 + * @return the unserialized trie
    1.97 + *
    1.98 + * @see utrie2_open
    1.99 + * @see utrie2_serialize
   1.100 + */
   1.101 +U_CAPI UTrie2 * U_EXPORT2
   1.102 +utrie2_openFromSerialized(UTrie2ValueBits valueBits,
   1.103 +                          const void *data, int32_t length, int32_t *pActualLength,
   1.104 +                          UErrorCode *pErrorCode);
   1.105 +
   1.106 +/**
   1.107 + * Open a frozen, empty "dummy" trie.
   1.108 + * A dummy trie is an empty trie, used when a real data trie cannot
   1.109 + * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(),
   1.110 + * but without internally creating and compacting/serializing the
   1.111 + * builder data structure.
   1.112 + *
   1.113 + * The trie always returns the initialValue,
   1.114 + * or the errorValue for out-of-range code points and illegal UTF-8.
   1.115 + *
   1.116 + * You must utrie2_close() the trie once you are done using it.
   1.117 + *
   1.118 + * @param valueBits selects the data entry size
   1.119 + * @param initialValue the initial value that is set for all code points
   1.120 + * @param errorValue the value for out-of-range code points and illegal UTF-8
   1.121 + * @param pErrorCode an in/out ICU UErrorCode
   1.122 + * @return the dummy trie
   1.123 + *
   1.124 + * @see utrie2_openFromSerialized
   1.125 + * @see utrie2_open
   1.126 + */
   1.127 +U_CAPI UTrie2 * U_EXPORT2
   1.128 +utrie2_openDummy(UTrie2ValueBits valueBits,
   1.129 +                 uint32_t initialValue, uint32_t errorValue,
   1.130 +                 UErrorCode *pErrorCode);
   1.131 +
   1.132 +/**
   1.133 + * Get a value from a code point as stored in the trie.
   1.134 + * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower.
   1.135 + * Easier to use because, unlike the macros, this function works on all UTrie2
   1.136 + * objects, frozen or not, holding 16-bit or 32-bit data values.
   1.137 + *
   1.138 + * @param trie the trie
   1.139 + * @param c the code point
   1.140 + * @return the value
   1.141 + */
   1.142 +U_CAPI uint32_t U_EXPORT2
   1.143 +utrie2_get32(const UTrie2 *trie, UChar32 c);
   1.144 +
   1.145 +/* enumeration callback types */
   1.146 +
   1.147 +/**
   1.148 + * Callback from utrie2_enum(), extracts a uint32_t value from a
   1.149 + * trie value. This value will be passed on to the UTrie2EnumRange function.
   1.150 + *
   1.151 + * @param context an opaque pointer, as passed into utrie2_enum()
   1.152 + * @param value a value from the trie
   1.153 + * @return the value that is to be passed on to the UTrie2EnumRange function
   1.154 + */
   1.155 +typedef uint32_t U_CALLCONV
   1.156 +UTrie2EnumValue(const void *context, uint32_t value);
   1.157 +
   1.158 +/**
   1.159 + * Callback from utrie2_enum(), is called for each contiguous range
   1.160 + * of code points with the same value as retrieved from the trie and
   1.161 + * transformed by the UTrie2EnumValue function.
   1.162 + *
   1.163 + * The callback function can stop the enumeration by returning FALSE.
   1.164 + *
   1.165 + * @param context an opaque pointer, as passed into utrie2_enum()
   1.166 + * @param start the first code point in a contiguous range with value
   1.167 + * @param end the last code point in a contiguous range with value (inclusive)
   1.168 + * @param value the value that is set for all code points in [start..end]
   1.169 + * @return FALSE to stop the enumeration
   1.170 + */
   1.171 +typedef UBool U_CALLCONV
   1.172 +UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value);
   1.173 +
   1.174 +/**
   1.175 + * Enumerate efficiently all values in a trie.
   1.176 + * Do not modify the trie during the enumeration.
   1.177 + *
   1.178 + * For each entry in the trie, the value to be delivered is passed through
   1.179 + * the UTrie2EnumValue function.
   1.180 + * The value is unchanged if that function pointer is NULL.
   1.181 + *
   1.182 + * For each contiguous range of code points with a given (transformed) value,
   1.183 + * the UTrie2EnumRange function is called.
   1.184 + *
   1.185 + * @param trie a pointer to the trie
   1.186 + * @param enumValue a pointer to a function that may transform the trie entry value,
   1.187 + *                  or NULL if the values from the trie are to be used directly
   1.188 + * @param enumRange a pointer to a function that is called for each contiguous range
   1.189 + *                  of code points with the same (transformed) value
   1.190 + * @param context an opaque pointer that is passed on to the callback functions
   1.191 + */
   1.192 +U_CAPI void U_EXPORT2
   1.193 +utrie2_enum(const UTrie2 *trie,
   1.194 +            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context);
   1.195 +
   1.196 +/* Building a trie ---------------------------------------------------------- */
   1.197 +
   1.198 +/**
   1.199 + * Open an empty, writable trie. At build time, 32-bit data values are used.
   1.200 + * utrie2_freeze() takes a valueBits parameter
   1.201 + * which determines the data value width in the serialized and frozen forms.
   1.202 + * You must utrie2_close() the trie once you are done using it.
   1.203 + *
   1.204 + * @param initialValue the initial value that is set for all code points
   1.205 + * @param errorValue the value for out-of-range code points and illegal UTF-8
   1.206 + * @param pErrorCode an in/out ICU UErrorCode
   1.207 + * @return a pointer to the allocated and initialized new trie
   1.208 + */
   1.209 +U_CAPI UTrie2 * U_EXPORT2
   1.210 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
   1.211 +
   1.212 +/**
   1.213 + * Clone a trie.
   1.214 + * You must utrie2_close() the clone once you are done using it.
   1.215 + *
   1.216 + * @param other the trie to clone
   1.217 + * @param pErrorCode an in/out ICU UErrorCode
   1.218 + * @return a pointer to the new trie clone
   1.219 + */
   1.220 +U_CAPI UTrie2 * U_EXPORT2
   1.221 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode);
   1.222 +
   1.223 +/**
   1.224 + * Clone a trie. The clone will be mutable/writable even if the other trie
   1.225 + * is frozen. (See utrie2_freeze().)
   1.226 + * You must utrie2_close() the clone once you are done using it.
   1.227 + *
   1.228 + * @param other the trie to clone
   1.229 + * @param pErrorCode an in/out ICU UErrorCode
   1.230 + * @return a pointer to the new trie clone
   1.231 + */
   1.232 +U_CAPI UTrie2 * U_EXPORT2
   1.233 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode);
   1.234 +
   1.235 +/**
   1.236 + * Close a trie and release associated memory.
   1.237 + *
   1.238 + * @param trie the trie
   1.239 + */
   1.240 +U_CAPI void U_EXPORT2
   1.241 +utrie2_close(UTrie2 *trie);
   1.242 +
   1.243 +/**
   1.244 + * Set a value for a code point.
   1.245 + *
   1.246 + * @param trie the unfrozen trie
   1.247 + * @param c the code point
   1.248 + * @param value the value
   1.249 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
   1.250 + * - U_NO_WRITE_PERMISSION if the trie is frozen
   1.251 + */
   1.252 +U_CAPI void U_EXPORT2
   1.253 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
   1.254 +
   1.255 +/**
   1.256 + * Set a value in a range of code points [start..end].
   1.257 + * All code points c with start<=c<=end will get the value if
   1.258 + * overwrite is TRUE or if the old value is the initial value.
   1.259 + *
   1.260 + * @param trie the unfrozen trie
   1.261 + * @param start the first code point to get the value
   1.262 + * @param end the last code point to get the value (inclusive)
   1.263 + * @param value the value
   1.264 + * @param overwrite flag for whether old non-initial values are to be overwritten
   1.265 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
   1.266 + * - U_NO_WRITE_PERMISSION if the trie is frozen
   1.267 + */
   1.268 +U_CAPI void U_EXPORT2
   1.269 +utrie2_setRange32(UTrie2 *trie,
   1.270 +                  UChar32 start, UChar32 end,
   1.271 +                  uint32_t value, UBool overwrite,
   1.272 +                  UErrorCode *pErrorCode);
   1.273 +
   1.274 +/**
   1.275 + * Freeze a trie. Make it immutable (read-only) and compact it,
   1.276 + * ready for serialization and for use with fast macros.
   1.277 + * Functions to set values will fail after serializing.
   1.278 + *
   1.279 + * A trie can be frozen only once. If this function is called again with different
   1.280 + * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR.
   1.281 + *
   1.282 + * @param trie the trie
   1.283 + * @param valueBits selects the data entry size; if smaller than 32 bits, then
   1.284 + *                  the values stored in the trie will be truncated
   1.285 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
   1.286 + * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long
   1.287 + *                             for serialization
   1.288 + *                             (the trie will be immutable and usable,
   1.289 + *                             but not frozen and not usable with the fast macros)
   1.290 + *
   1.291 + * @see utrie2_cloneAsThawed
   1.292 + */
   1.293 +U_CAPI void U_EXPORT2
   1.294 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode);
   1.295 +
   1.296 +/**
   1.297 + * Test if the trie is frozen. (See utrie2_freeze().)
   1.298 + *
   1.299 + * @param trie the trie
   1.300 + * @return TRUE if the trie is frozen, that is, immutable, ready for serialization
   1.301 + *         and for use with fast macros
   1.302 + */
   1.303 +U_CAPI UBool U_EXPORT2
   1.304 +utrie2_isFrozen(const UTrie2 *trie);
   1.305 +
   1.306 +/**
   1.307 + * Serialize a frozen trie into 32-bit aligned memory.
   1.308 + * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR.
   1.309 + * A trie can be serialized multiple times.
   1.310 + *
   1.311 + * @param trie the frozen trie
   1.312 + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data,
   1.313 + *             can be NULL if capacity==0
   1.314 + * @param capacity the number of bytes available at data,
   1.315 + *                 or 0 for preflighting
   1.316 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
   1.317 + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
   1.318 + * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity
   1.319 + *                            parameters are bad
   1.320 + * @return the number of bytes written or needed for the trie
   1.321 + *
   1.322 + * @see utrie2_openFromSerialized()
   1.323 + */
   1.324 +U_CAPI int32_t U_EXPORT2
   1.325 +utrie2_serialize(UTrie2 *trie,
   1.326 +                 void *data, int32_t capacity,
   1.327 +                 UErrorCode *pErrorCode);
   1.328 +
   1.329 +/* Public UTrie2 API: miscellaneous functions ------------------------------- */
   1.330 +
   1.331 +/**
   1.332 + * Get the UTrie version from 32-bit-aligned memory containing the serialized form
   1.333 + * of either a UTrie (version 1) or a UTrie2 (version 2).
   1.334 + *
   1.335 + * @param data a pointer to 32-bit-aligned memory containing the serialized form
   1.336 + *             of a UTrie, version 1 or 2
   1.337 + * @param length the number of bytes available at data;
   1.338 + *               can be more than necessary (see return value)
   1.339 + * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
   1.340 + *                    If TRUE, opposite-endian serialized forms are recognized as well.
   1.341 + * @return the UTrie version of the serialized form, or 0 if it is not
   1.342 + *         recognized as a serialized UTrie
   1.343 + */
   1.344 +U_CAPI int32_t U_EXPORT2
   1.345 +utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk);
   1.346 +
   1.347 +/**
   1.348 + * Swap a serialized UTrie2.
   1.349 + * @internal
   1.350 + */
   1.351 +U_CAPI int32_t U_EXPORT2
   1.352 +utrie2_swap(const UDataSwapper *ds,
   1.353 +            const void *inData, int32_t length, void *outData,
   1.354 +            UErrorCode *pErrorCode);
   1.355 +
   1.356 +/**
   1.357 + * Swap a serialized UTrie or UTrie2.
   1.358 + * @internal
   1.359 + */
   1.360 +U_CAPI int32_t U_EXPORT2
   1.361 +utrie2_swapAnyVersion(const UDataSwapper *ds,
   1.362 +                      const void *inData, int32_t length, void *outData,
   1.363 +                      UErrorCode *pErrorCode);
   1.364 +
   1.365 +/**
   1.366 + * Build a UTrie2 (version 2) from a UTrie (version 1).
   1.367 + * Enumerates all values in the UTrie and builds a UTrie2 with the same values.
   1.368 + * The resulting UTrie2 will be frozen.
   1.369 + *
   1.370 + * @param trie1 the runtime UTrie structure to be enumerated
   1.371 + * @param errorValue the value for out-of-range code points and illegal UTF-8
   1.372 + * @param pErrorCode an in/out ICU UErrorCode
   1.373 + * @return The frozen UTrie2 with the same values as the UTrie.
   1.374 + */
   1.375 +U_CAPI UTrie2 * U_EXPORT2
   1.376 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode);
   1.377 +
   1.378 +/* Public UTrie2 API macros ------------------------------------------------- */
   1.379 +
   1.380 +/*
   1.381 + * These macros provide fast data lookup from a frozen trie.
   1.382 + * They will crash when used on an unfrozen trie.
   1.383 + */
   1.384 +
   1.385 +/**
   1.386 + * Return a 16-bit trie value from a code point, with range checking.
   1.387 + * Returns trie->errorValue if c is not in the range 0..U+10ffff.
   1.388 + *
   1.389 + * @param trie (const UTrie2 *, in) a frozen trie
   1.390 + * @param c (UChar32, in) the input code point
   1.391 + * @return (uint16_t) The code point's trie value.
   1.392 + */
   1.393 +#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c))
   1.394 +
   1.395 +/**
   1.396 + * Return a 32-bit trie value from a code point, with range checking.
   1.397 + * Returns trie->errorValue if c is not in the range 0..U+10ffff.
   1.398 + *
   1.399 + * @param trie (const UTrie2 *, in) a frozen trie
   1.400 + * @param c (UChar32, in) the input code point
   1.401 + * @return (uint32_t) The code point's trie value.
   1.402 + */
   1.403 +#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c))
   1.404 +
   1.405 +/**
   1.406 + * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
   1.407 + * and get a 16-bit value from the trie.
   1.408 + *
   1.409 + * @param trie (const UTrie2 *, in) a frozen trie
   1.410 + * @param src (const UChar *, in/out) the source text pointer
   1.411 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
   1.412 + * @param c (UChar32, out) variable for the code point
   1.413 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.414 + */
   1.415 +#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result)
   1.416 +
   1.417 +/**
   1.418 + * UTF-16: Get the next code point (UChar32 c, out), post-increment src,
   1.419 + * and get a 32-bit value from the trie.
   1.420 + *
   1.421 + * @param trie (const UTrie2 *, in) a frozen trie
   1.422 + * @param src (const UChar *, in/out) the source text pointer
   1.423 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
   1.424 + * @param c (UChar32, out) variable for the code point
   1.425 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.426 + */
   1.427 +#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result)
   1.428 +
   1.429 +/**
   1.430 + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
   1.431 + * and get a 16-bit value from the trie.
   1.432 + *
   1.433 + * @param trie (const UTrie2 *, in) a frozen trie
   1.434 + * @param start (const UChar *, in) the start pointer for the text
   1.435 + * @param src (const UChar *, in/out) the source text pointer
   1.436 + * @param c (UChar32, out) variable for the code point
   1.437 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.438 + */
   1.439 +#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result)
   1.440 +
   1.441 +/**
   1.442 + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src,
   1.443 + * and get a 32-bit value from the trie.
   1.444 + *
   1.445 + * @param trie (const UTrie2 *, in) a frozen trie
   1.446 + * @param start (const UChar *, in) the start pointer for the text
   1.447 + * @param src (const UChar *, in/out) the source text pointer
   1.448 + * @param c (UChar32, out) variable for the code point
   1.449 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result
   1.450 + */
   1.451 +#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result)
   1.452 +
   1.453 +/**
   1.454 + * UTF-8: Post-increment src and get a 16-bit value from the trie.
   1.455 + *
   1.456 + * @param trie (const UTrie2 *, in) a frozen trie
   1.457 + * @param src (const char *, in/out) the source text pointer
   1.458 + * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
   1.459 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.460 + */
   1.461 +#define UTRIE2_U8_NEXT16(trie, src, limit, result)\
   1.462 +    _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result)
   1.463 +
   1.464 +/**
   1.465 + * UTF-8: Post-increment src and get a 32-bit value from the trie.
   1.466 + *
   1.467 + * @param trie (const UTrie2 *, in) a frozen trie
   1.468 + * @param src (const char *, in/out) the source text pointer
   1.469 + * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
   1.470 + * @param result (uint16_t, out) uint32_t variable for the trie lookup result
   1.471 + */
   1.472 +#define UTRIE2_U8_NEXT32(trie, src, limit, result) \
   1.473 +    _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result)
   1.474 +
   1.475 +/**
   1.476 + * UTF-8: Pre-decrement src and get a 16-bit value from the trie.
   1.477 + *
   1.478 + * @param trie (const UTrie2 *, in) a frozen trie
   1.479 + * @param start (const char *, in) the start pointer for the text
   1.480 + * @param src (const char *, in/out) the source text pointer
   1.481 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result
   1.482 + */
   1.483 +#define UTRIE2_U8_PREV16(trie, start, src, result) \
   1.484 +    _UTRIE2_U8_PREV(trie, data16, index, start, src, result)
   1.485 +
   1.486 +/**
   1.487 + * UTF-8: Pre-decrement src and get a 32-bit value from the trie.
   1.488 + *
   1.489 + * @param trie (const UTrie2 *, in) a frozen trie
   1.490 + * @param start (const char *, in) the start pointer for the text
   1.491 + * @param src (const char *, in/out) the source text pointer
   1.492 + * @param result (uint16_t, out) uint32_t variable for the trie lookup result
   1.493 + */
   1.494 +#define UTRIE2_U8_PREV32(trie, start, src, result) \
   1.495 +    _UTRIE2_U8_PREV(trie, data32, data32, start, src, result)
   1.496 +
   1.497 +/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */
   1.498 +
   1.499 +/*
   1.500 + * The following functions and macros are used for highly optimized UTF-16
   1.501 + * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these.
   1.502 + *
   1.503 + * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_.
   1.504 + * UTF-16 text processing can be optimized by detecting surrogate pairs and
   1.505 + * assembling supplementary code points only when there is non-trivial data
   1.506 + * available.
   1.507 + *
   1.508 + * At build-time, use utrie2_enumForLeadSurrogate() to see if there
   1.509 + * is non-trivial (non-initialValue) data for any of the supplementary
   1.510 + * code points associated with a lead surrogate.
   1.511 + * If so, then set a special (application-specific) value for the
   1.512 + * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit().
   1.513 + *
   1.514 + * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or
   1.515 + * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial
   1.516 + * data and the code unit is a lead surrogate, then check if a trail surrogate
   1.517 + * follows. If so, assemble the supplementary code point with
   1.518 + * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP()
   1.519 + * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead
   1.520 + * surrogate's value or do a code point lookup for it.
   1.521 + *
   1.522 + * If there is only trivial data for lead and trail surrogates, then processing
   1.523 + * can often skip them. For example, in normalization or case mapping
   1.524 + * all characters that do not have any mappings are simply copied as is.
   1.525 + */
   1.526 +
   1.527 +/**
   1.528 + * Get a value from a lead surrogate code unit as stored in the trie.
   1.529 + *
   1.530 + * @param trie the trie
   1.531 + * @param c the code unit (U+D800..U+DBFF)
   1.532 + * @return the value
   1.533 + */
   1.534 +U_CAPI uint32_t U_EXPORT2
   1.535 +utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c);
   1.536 +
   1.537 +/**
   1.538 + * Enumerate the trie values for the 1024=0x400 code points
   1.539 + * corresponding to a given lead surrogate.
   1.540 + * For example, for the lead surrogate U+D87E it will enumerate the values
   1.541 + * for [U+2F800..U+2FC00[.
   1.542 + * Used by data builder code that sets special lead surrogate code unit values
   1.543 + * for optimized UTF-16 string processing.
   1.544 + *
   1.545 + * Do not modify the trie during the enumeration.
   1.546 + *
   1.547 + * Except for the limited code point range, this functions just like utrie2_enum():
   1.548 + * For each entry in the trie, the value to be delivered is passed through
   1.549 + * the UTrie2EnumValue function.
   1.550 + * The value is unchanged if that function pointer is NULL.
   1.551 + *
   1.552 + * For each contiguous range of code points with a given (transformed) value,
   1.553 + * the UTrie2EnumRange function is called.
   1.554 + *
   1.555 + * @param trie a pointer to the trie
   1.556 + * @param enumValue a pointer to a function that may transform the trie entry value,
   1.557 + *                  or NULL if the values from the trie are to be used directly
   1.558 + * @param enumRange a pointer to a function that is called for each contiguous range
   1.559 + *                  of code points with the same (transformed) value
   1.560 + * @param context an opaque pointer that is passed on to the callback functions
   1.561 + */
   1.562 +U_CAPI void U_EXPORT2
   1.563 +utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
   1.564 +                            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
   1.565 +                            const void *context);
   1.566 +
   1.567 +/**
   1.568 + * Set a value for a lead surrogate code unit.
   1.569 + *
   1.570 + * @param trie the unfrozen trie
   1.571 + * @param lead the lead surrogate code unit (U+D800..U+DBFF)
   1.572 + * @param value the value
   1.573 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes:
   1.574 + * - U_NO_WRITE_PERMISSION if the trie is frozen
   1.575 + */
   1.576 +U_CAPI void U_EXPORT2
   1.577 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
   1.578 +                                     UChar32 lead, uint32_t value,
   1.579 +                                     UErrorCode *pErrorCode);
   1.580 +
   1.581 +/**
   1.582 + * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
   1.583 + * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates,
   1.584 + * but smaller and faster.
   1.585 + *
   1.586 + * @param trie (const UTrie2 *, in) a frozen trie
   1.587 + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
   1.588 + * @return (uint16_t) The code unit's trie value.
   1.589 + */
   1.590 +#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c)
   1.591 +
   1.592 +/**
   1.593 + * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff).
   1.594 + * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates,
   1.595 + * but smaller and faster.
   1.596 + *
   1.597 + * @param trie (const UTrie2 *, in) a frozen trie
   1.598 + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff
   1.599 + * @return (uint32_t) The code unit's trie value.
   1.600 + */
   1.601 +#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c)
   1.602 +
   1.603 +/**
   1.604 + * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff).
   1.605 + *
   1.606 + * @param trie (const UTrie2 *, in) a frozen trie
   1.607 + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
   1.608 + * @return (uint16_t) The code point's trie value.
   1.609 + */
   1.610 +#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c)
   1.611 +
   1.612 +/**
   1.613 + * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff).
   1.614 + *
   1.615 + * @param trie (const UTrie2 *, in) a frozen trie
   1.616 + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff
   1.617 + * @return (uint32_t) The code point's trie value.
   1.618 + */
   1.619 +#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c)
   1.620 +
   1.621 +U_CDECL_END
   1.622 +
   1.623 +/* C++ convenience wrappers ------------------------------------------------- */
   1.624 +
   1.625 +#ifdef __cplusplus
   1.626 +
   1.627 +#include "unicode/utf.h"
   1.628 +#include "mutex.h"
   1.629 +
   1.630 +U_NAMESPACE_BEGIN
   1.631 +
   1.632 +// Use the Forward/Backward subclasses below.
   1.633 +class UTrie2StringIterator : public UMemory {
   1.634 +public:
   1.635 +    UTrie2StringIterator(const UTrie2 *t, const UChar *p) :
   1.636 +        trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {}
   1.637 +
   1.638 +    const UTrie2 *trie;
   1.639 +    const UChar *codePointStart, *codePointLimit;
   1.640 +    UChar32 codePoint;
   1.641 +};
   1.642 +
   1.643 +class BackwardUTrie2StringIterator : public UTrie2StringIterator {
   1.644 +public:
   1.645 +    BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) :
   1.646 +        UTrie2StringIterator(t, p), start(s) {}
   1.647 +
   1.648 +    uint16_t previous16();
   1.649 +
   1.650 +    const UChar *start;
   1.651 +};
   1.652 +
   1.653 +class ForwardUTrie2StringIterator : public UTrie2StringIterator {
   1.654 +public:
   1.655 +    // Iteration limit l can be NULL.
   1.656 +    // In that case, the caller must detect c==0 and stop.
   1.657 +    ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) :
   1.658 +        UTrie2StringIterator(t, p), limit(l) {}
   1.659 +
   1.660 +    uint16_t next16();
   1.661 +
   1.662 +    const UChar *limit;
   1.663 +};
   1.664 +
   1.665 +U_NAMESPACE_END
   1.666 +
   1.667 +#endif
   1.668 +
   1.669 +/* Internal definitions ----------------------------------------------------- */
   1.670 +
   1.671 +U_CDECL_BEGIN
   1.672 +
   1.673 +/** Build-time trie structure. */
   1.674 +struct UNewTrie2;
   1.675 +typedef struct UNewTrie2 UNewTrie2;
   1.676 +
   1.677 +/*
   1.678 + * Trie structure definition.
   1.679 + *
   1.680 + * Either the data table is 16 bits wide and accessed via the index
   1.681 + * pointer, with each index item increased by indexLength;
   1.682 + * in this case, data32==NULL, and data16 is used for direct ASCII access.
   1.683 + *
   1.684 + * Or the data table is 32 bits wide and accessed via the data32 pointer.
   1.685 + */
   1.686 +struct UTrie2 {
   1.687 +    /* protected: used by macros and functions for reading values */
   1.688 +    const uint16_t *index;
   1.689 +    const uint16_t *data16;     /* for fast UTF-8 ASCII access, if 16b data */
   1.690 +    const uint32_t *data32;     /* NULL if 16b data is used via index */
   1.691 +
   1.692 +    int32_t indexLength, dataLength;
   1.693 +    uint16_t index2NullOffset;  /* 0xffff if there is no dedicated index-2 null block */
   1.694 +    uint16_t dataNullOffset;
   1.695 +    uint32_t initialValue;
   1.696 +    /** Value returned for out-of-range code points and illegal UTF-8. */
   1.697 +    uint32_t errorValue;
   1.698 +
   1.699 +    /* Start of the last range which ends at U+10ffff, and its value. */
   1.700 +    UChar32 highStart;
   1.701 +    int32_t highValueIndex;
   1.702 +
   1.703 +    /* private: used by builder and unserialization functions */
   1.704 +    void *memory;           /* serialized bytes; NULL if not frozen yet */
   1.705 +    int32_t length;         /* number of serialized bytes at memory; 0 if not frozen yet */
   1.706 +    UBool isMemoryOwned;    /* TRUE if the trie owns the memory */
   1.707 +    UBool padding1;
   1.708 +    int16_t padding2;
   1.709 +    UNewTrie2 *newTrie;     /* builder object; NULL when frozen */
   1.710 +};
   1.711 +
   1.712 +/**
   1.713 + * Trie constants, defining shift widths, index array lengths, etc.
   1.714 + *
   1.715 + * These are needed for the runtime macros but users can treat these as
   1.716 + * implementation details and skip to the actual public API further below.
   1.717 + */
   1.718 +enum {
   1.719 +    /** Shift size for getting the index-1 table offset. */
   1.720 +    UTRIE2_SHIFT_1=6+5,
   1.721 +
   1.722 +    /** Shift size for getting the index-2 table offset. */
   1.723 +    UTRIE2_SHIFT_2=5,
   1.724 +
   1.725 +    /**
   1.726 +     * Difference between the two shift sizes,
   1.727 +     * for getting an index-1 offset from an index-2 offset. 6=11-5
   1.728 +     */
   1.729 +    UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2,
   1.730 +
   1.731 +    /**
   1.732 +     * Number of index-1 entries for the BMP. 32=0x20
   1.733 +     * This part of the index-1 table is omitted from the serialized form.
   1.734 +     */
   1.735 +    UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1,
   1.736 +
   1.737 +    /** Number of code points per index-1 table entry. 2048=0x800 */
   1.738 +    UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1,
   1.739 +
   1.740 +    /** Number of entries in an index-2 block. 64=0x40 */
   1.741 +    UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2,
   1.742 +
   1.743 +    /** Mask for getting the lower bits for the in-index-2-block offset. */
   1.744 +    UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1,
   1.745 +
   1.746 +    /** Number of entries in a data block. 32=0x20 */
   1.747 +    UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2,
   1.748 +
   1.749 +    /** Mask for getting the lower bits for the in-data-block offset. */
   1.750 +    UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1,
   1.751 +
   1.752 +    /**
   1.753 +     * Shift size for shifting left the index array values.
   1.754 +     * Increases possible data size with 16-bit index values at the cost
   1.755 +     * of compactability.
   1.756 +     * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
   1.757 +     */
   1.758 +    UTRIE2_INDEX_SHIFT=2,
   1.759 +
   1.760 +    /** The alignment size of a data block. Also the granularity for compaction. */
   1.761 +    UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT,
   1.762 +
   1.763 +    /* Fixed layout of the first part of the index array. ------------------- */
   1.764 +
   1.765 +    /**
   1.766 +     * The BMP part of the index-2 table is fixed and linear and starts at offset 0.
   1.767 +     * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
   1.768 +     */
   1.769 +    UTRIE2_INDEX_2_OFFSET=0,
   1.770 +
   1.771 +    /**
   1.772 +     * The part of the index-2 table for U+D800..U+DBFF stores values for
   1.773 +     * lead surrogate code _units_ not code _points_.
   1.774 +     * Values for lead surrogate code _points_ are indexed with this portion of the table.
   1.775 +     * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
   1.776 +     */
   1.777 +    UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2,
   1.778 +    UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2,
   1.779 +
   1.780 +    /** Count the lengths of both BMP pieces. 2080=0x820 */
   1.781 +    UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH,
   1.782 +
   1.783 +    /**
   1.784 +     * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
   1.785 +     * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
   1.786 +     */
   1.787 +    UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
   1.788 +    UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6,  /* U+0800 is the first code point after 2-byte UTF-8 */
   1.789 +
   1.790 +    /**
   1.791 +     * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
   1.792 +     * Variable length, for code points up to highStart, where the last single-value range starts.
   1.793 +     * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
   1.794 +     * (For 0x100000 supplementary code points U+10000..U+10ffff.)
   1.795 +     *
   1.796 +     * The part of the index-2 table for supplementary code points starts
   1.797 +     * after this index-1 table.
   1.798 +     *
   1.799 +     * Both the index-1 table and the following part of the index-2 table
   1.800 +     * are omitted completely if there is only BMP data.
   1.801 +     */
   1.802 +    UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH,
   1.803 +    UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1,
   1.804 +
   1.805 +    /*
   1.806 +     * Fixed layout of the first part of the data array. -----------------------
   1.807 +     * Starts with 4 blocks (128=0x80 entries) for ASCII.
   1.808 +     */
   1.809 +
   1.810 +    /**
   1.811 +     * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
   1.812 +     * Used with linear access for single bytes 0..0xbf for simple error handling.
   1.813 +     * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
   1.814 +     */
   1.815 +    UTRIE2_BAD_UTF8_DATA_OFFSET=0x80,
   1.816 +
   1.817 +    /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
   1.818 +    UTRIE2_DATA_START_OFFSET=0xc0
   1.819 +};
   1.820 +
   1.821 +/* Internal functions and macros -------------------------------------------- */
   1.822 +
   1.823 +/**
   1.824 + * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations.
   1.825 + * Do not call directly.
   1.826 + * @internal
   1.827 + */
   1.828 +U_INTERNAL int32_t U_EXPORT2
   1.829 +utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
   1.830 +                           const uint8_t *src, const uint8_t *limit);
   1.831 +
   1.832 +/**
   1.833 + * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations.
   1.834 + * Do not call directly.
   1.835 + * @internal
   1.836 + */
   1.837 +U_INTERNAL int32_t U_EXPORT2
   1.838 +utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
   1.839 +                           const uint8_t *start, const uint8_t *src);
   1.840 +
   1.841 +
   1.842 +/** Internal low-level trie getter. Returns a data index. */
   1.843 +#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \
   1.844 +    (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \
   1.845 +    <<UTRIE2_INDEX_SHIFT)+ \
   1.846 +    ((c)&UTRIE2_DATA_MASK))
   1.847 +
   1.848 +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */
   1.849 +#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c)
   1.850 +
   1.851 +/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */
   1.852 +#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \
   1.853 +    _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c)
   1.854 +
   1.855 +/** Internal trie getter from a BMP code point. Returns the data index. */
   1.856 +#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \
   1.857 +    _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
   1.858 +                      trieIndex, c)
   1.859 +
   1.860 +/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */
   1.861 +#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \
   1.862 +    (((int32_t)((trieIndex)[ \
   1.863 +        (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \
   1.864 +                      ((c)>>UTRIE2_SHIFT_1)]+ \
   1.865 +        (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \
   1.866 +    <<UTRIE2_INDEX_SHIFT)+ \
   1.867 +    ((c)&UTRIE2_DATA_MASK))
   1.868 +
   1.869 +/**
   1.870 + * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
   1.871 + * Returns the data index.
   1.872 + */
   1.873 +#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \
   1.874 +    ((uint32_t)(c)<0xd800 ? \
   1.875 +        _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \
   1.876 +        (uint32_t)(c)<=0xffff ? \
   1.877 +            _UTRIE2_INDEX_RAW( \
   1.878 +                (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \
   1.879 +                (trie)->index, c) : \
   1.880 +            (uint32_t)(c)>0x10ffff ? \
   1.881 +                (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \
   1.882 +                (c)>=(trie)->highStart ? \
   1.883 +                    (trie)->highValueIndex : \
   1.884 +                    _UTRIE2_INDEX_FROM_SUPP((trie)->index, c))
   1.885 +
   1.886 +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */
   1.887 +#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \
   1.888 +    (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)]
   1.889 +
   1.890 +/** Internal trie getter from a supplementary code point. Returns the data. */
   1.891 +#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \
   1.892 +    (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \
   1.893 +                 _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)]
   1.894 +
   1.895 +/**
   1.896 + * Internal trie getter from a code point, with checking that c is in 0..10FFFF.
   1.897 + * Returns the data.
   1.898 + */
   1.899 +#define _UTRIE2_GET(trie, data, asciiOffset, c) \
   1.900 +    (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]
   1.901 +
   1.902 +/** Internal next-post-increment: get the next code point (c) and its data. */
   1.903 +#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \
   1.904 +    { \
   1.905 +        uint16_t __c2; \
   1.906 +        (c)=*(src)++; \
   1.907 +        if(!U16_IS_LEAD(c)) { \
   1.908 +            (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \
   1.909 +        } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \
   1.910 +            (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \
   1.911 +        } else { \
   1.912 +            ++(src); \
   1.913 +            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
   1.914 +            (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
   1.915 +        } \
   1.916 +    } \
   1.917 +}
   1.918 +
   1.919 +/** Internal pre-decrement-previous: get the previous code point (c) and its data */
   1.920 +#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \
   1.921 +    { \
   1.922 +        uint16_t __c2; \
   1.923 +        (c)=*--(src); \
   1.924 +        if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \
   1.925 +            (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \
   1.926 +        } else { \
   1.927 +            --(src); \
   1.928 +            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
   1.929 +            (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
   1.930 +        } \
   1.931 +    } \
   1.932 +}
   1.933 +
   1.934 +/** Internal UTF-8 next-post-increment: get the next code point's data. */
   1.935 +#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \
   1.936 +    uint8_t __lead=(uint8_t)*(src)++; \
   1.937 +    if(__lead<0xc0) { \
   1.938 +        (result)=(trie)->ascii[__lead]; \
   1.939 +    } else { \
   1.940 +        uint8_t __t1, __t2; \
   1.941 +        if( /* handle U+0000..U+07FF inline */ \
   1.942 +            __lead<0xe0 && (src)<(limit) && \
   1.943 +            (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
   1.944 +        ) { \
   1.945 +            ++(src); \
   1.946 +            (result)=(trie)->data[ \
   1.947 +                (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
   1.948 +                __t1]; \
   1.949 +        } else if( /* handle U+0000..U+CFFF inline */ \
   1.950 +            __lead<0xed && ((src)+1)<(limit) && \
   1.951 +            (__t1=(uint8_t)(*(src)-0x80))<=0x3f && (__lead>0xe0 || __t1>=0x20) && \
   1.952 +            (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
   1.953 +        ) { \
   1.954 +            (src)+=2; \
   1.955 +            (result)=(trie)->data[ \
   1.956 +                ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
   1.957 +                                         (__t1<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
   1.958 +                <<UTRIE2_INDEX_SHIFT)+ \
   1.959 +                (__t2&UTRIE2_DATA_MASK)]; \
   1.960 +        } else { \
   1.961 +            int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
   1.962 +                                                                       (const uint8_t *)(limit)); \
   1.963 +            (src)+=__index&7; \
   1.964 +            (result)=(trie)->data[__index>>3]; \
   1.965 +        } \
   1.966 +    } \
   1.967 +}
   1.968 +
   1.969 +/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
   1.970 +#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \
   1.971 +    uint8_t __b=(uint8_t)*--(src); \
   1.972 +    if(__b<0x80) { \
   1.973 +        (result)=(trie)->ascii[__b]; \
   1.974 +    } else { \
   1.975 +        int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
   1.976 +                                                                (const uint8_t *)(src)); \
   1.977 +        (src)-=__index&7; \
   1.978 +        (result)=(trie)->data[__index>>3]; \
   1.979 +    } \
   1.980 +}
   1.981 +
   1.982 +U_CDECL_END
   1.983 +
   1.984 +/**
   1.985 + * Work around MSVC 2003 optimization bugs.
   1.986 + */
   1.987 +#if defined (U_HAVE_MSVC_2003_OR_EARLIER)
   1.988 +#pragma optimize("", off)
   1.989 +#endif
   1.990 +
   1.991 +#endif

mercurial