1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/utrie2.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,988 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 2001-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: utrie2.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2008aug16 (starting from a copy of utrie.h) 1.17 +* created by: Markus W. Scherer 1.18 +*/ 1.19 + 1.20 +#ifndef __UTRIE2_H__ 1.21 +#define __UTRIE2_H__ 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 +#include "putilimp.h" 1.25 +#include "udataswp.h" 1.26 + 1.27 +U_CDECL_BEGIN 1.28 + 1.29 +struct UTrie; /* forward declaration */ 1.30 +#ifndef __UTRIE_H__ 1.31 +typedef struct UTrie UTrie; 1.32 +#endif 1.33 + 1.34 +/** 1.35 + * \file 1.36 + * 1.37 + * This is a common implementation of a Unicode trie. 1.38 + * It is a kind of compressed, serializable table of 16- or 32-bit values associated with 1.39 + * Unicode code points (0..0x10ffff). (A map from code points to integers.) 1.40 + * 1.41 + * This is the second common version of a Unicode trie (hence the name UTrie2). 1.42 + * Compared with UTrie version 1: 1.43 + * - Still splitting BMP code points 11:5 bits for index and data table lookups. 1.44 + * - Still separate data for lead surrogate code _units_ vs. code _points_, 1.45 + * but the lead surrogate code unit values are not required any more 1.46 + * for data lookup for supplementary code points. 1.47 + * - The "folding" mechanism is removed. In UTrie version 1, this somewhat 1.48 + * hard-to-explain mechanism was meant to be used for optimized UTF-16 1.49 + * processing, with application-specific encoding of indexing bits 1.50 + * in the lead surrogate data for the associated supplementary code points. 1.51 + * - For the last single-value code point range (ending with U+10ffff), 1.52 + * the starting code point ("highStart") and the value are stored. 1.53 + * - For supplementary code points U+10000..highStart-1 a three-table lookup 1.54 + * (two index tables and one data table) is used. The first index 1.55 + * is truncated, omitting both the BMP portion and the high range. 1.56 + * - There is a special small index for 2-byte UTF-8, and the initial data 1.57 + * entries are designed for fast 1/2-byte UTF-8 lookup. 1.58 + */ 1.59 + 1.60 +/** 1.61 + * Trie structure. 1.62 + * Use only with public API macros and functions. 1.63 + */ 1.64 +struct UTrie2; 1.65 +typedef struct UTrie2 UTrie2; 1.66 + 1.67 +/* Public UTrie2 API functions: read-only access ---------------------------- */ 1.68 + 1.69 +/** 1.70 + * Selectors for the width of a UTrie2 data value. 1.71 + */ 1.72 +enum UTrie2ValueBits { 1.73 + /** 16 bits per UTrie2 data value. */ 1.74 + UTRIE2_16_VALUE_BITS, 1.75 + /** 32 bits per UTrie2 data value. */ 1.76 + UTRIE2_32_VALUE_BITS, 1.77 + /** Number of selectors for the width of UTrie2 data values. */ 1.78 + UTRIE2_COUNT_VALUE_BITS 1.79 +}; 1.80 +typedef enum UTrie2ValueBits UTrie2ValueBits; 1.81 + 1.82 +/** 1.83 + * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory. 1.84 + * Inverse of utrie2_serialize(). 1.85 + * The memory must remain valid and unchanged as long as the trie is used. 1.86 + * You must utrie2_close() the trie once you are done using it. 1.87 + * 1.88 + * @param valueBits selects the data entry size; results in an 1.89 + * U_INVALID_FORMAT_ERROR if it does not match the serialized form 1.90 + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2 1.91 + * @param length the number of bytes available at data; 1.92 + * can be more than necessary 1.93 + * @param pActualLength receives the actual number of bytes at data taken up by the trie data; 1.94 + * can be NULL 1.95 + * @param pErrorCode an in/out ICU UErrorCode 1.96 + * @return the unserialized trie 1.97 + * 1.98 + * @see utrie2_open 1.99 + * @see utrie2_serialize 1.100 + */ 1.101 +U_CAPI UTrie2 * U_EXPORT2 1.102 +utrie2_openFromSerialized(UTrie2ValueBits valueBits, 1.103 + const void *data, int32_t length, int32_t *pActualLength, 1.104 + UErrorCode *pErrorCode); 1.105 + 1.106 +/** 1.107 + * Open a frozen, empty "dummy" trie. 1.108 + * A dummy trie is an empty trie, used when a real data trie cannot 1.109 + * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(), 1.110 + * but without internally creating and compacting/serializing the 1.111 + * builder data structure. 1.112 + * 1.113 + * The trie always returns the initialValue, 1.114 + * or the errorValue for out-of-range code points and illegal UTF-8. 1.115 + * 1.116 + * You must utrie2_close() the trie once you are done using it. 1.117 + * 1.118 + * @param valueBits selects the data entry size 1.119 + * @param initialValue the initial value that is set for all code points 1.120 + * @param errorValue the value for out-of-range code points and illegal UTF-8 1.121 + * @param pErrorCode an in/out ICU UErrorCode 1.122 + * @return the dummy trie 1.123 + * 1.124 + * @see utrie2_openFromSerialized 1.125 + * @see utrie2_open 1.126 + */ 1.127 +U_CAPI UTrie2 * U_EXPORT2 1.128 +utrie2_openDummy(UTrie2ValueBits valueBits, 1.129 + uint32_t initialValue, uint32_t errorValue, 1.130 + UErrorCode *pErrorCode); 1.131 + 1.132 +/** 1.133 + * Get a value from a code point as stored in the trie. 1.134 + * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower. 1.135 + * Easier to use because, unlike the macros, this function works on all UTrie2 1.136 + * objects, frozen or not, holding 16-bit or 32-bit data values. 1.137 + * 1.138 + * @param trie the trie 1.139 + * @param c the code point 1.140 + * @return the value 1.141 + */ 1.142 +U_CAPI uint32_t U_EXPORT2 1.143 +utrie2_get32(const UTrie2 *trie, UChar32 c); 1.144 + 1.145 +/* enumeration callback types */ 1.146 + 1.147 +/** 1.148 + * Callback from utrie2_enum(), extracts a uint32_t value from a 1.149 + * trie value. This value will be passed on to the UTrie2EnumRange function. 1.150 + * 1.151 + * @param context an opaque pointer, as passed into utrie2_enum() 1.152 + * @param value a value from the trie 1.153 + * @return the value that is to be passed on to the UTrie2EnumRange function 1.154 + */ 1.155 +typedef uint32_t U_CALLCONV 1.156 +UTrie2EnumValue(const void *context, uint32_t value); 1.157 + 1.158 +/** 1.159 + * Callback from utrie2_enum(), is called for each contiguous range 1.160 + * of code points with the same value as retrieved from the trie and 1.161 + * transformed by the UTrie2EnumValue function. 1.162 + * 1.163 + * The callback function can stop the enumeration by returning FALSE. 1.164 + * 1.165 + * @param context an opaque pointer, as passed into utrie2_enum() 1.166 + * @param start the first code point in a contiguous range with value 1.167 + * @param end the last code point in a contiguous range with value (inclusive) 1.168 + * @param value the value that is set for all code points in [start..end] 1.169 + * @return FALSE to stop the enumeration 1.170 + */ 1.171 +typedef UBool U_CALLCONV 1.172 +UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value); 1.173 + 1.174 +/** 1.175 + * Enumerate efficiently all values in a trie. 1.176 + * Do not modify the trie during the enumeration. 1.177 + * 1.178 + * For each entry in the trie, the value to be delivered is passed through 1.179 + * the UTrie2EnumValue function. 1.180 + * The value is unchanged if that function pointer is NULL. 1.181 + * 1.182 + * For each contiguous range of code points with a given (transformed) value, 1.183 + * the UTrie2EnumRange function is called. 1.184 + * 1.185 + * @param trie a pointer to the trie 1.186 + * @param enumValue a pointer to a function that may transform the trie entry value, 1.187 + * or NULL if the values from the trie are to be used directly 1.188 + * @param enumRange a pointer to a function that is called for each contiguous range 1.189 + * of code points with the same (transformed) value 1.190 + * @param context an opaque pointer that is passed on to the callback functions 1.191 + */ 1.192 +U_CAPI void U_EXPORT2 1.193 +utrie2_enum(const UTrie2 *trie, 1.194 + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context); 1.195 + 1.196 +/* Building a trie ---------------------------------------------------------- */ 1.197 + 1.198 +/** 1.199 + * Open an empty, writable trie. At build time, 32-bit data values are used. 1.200 + * utrie2_freeze() takes a valueBits parameter 1.201 + * which determines the data value width in the serialized and frozen forms. 1.202 + * You must utrie2_close() the trie once you are done using it. 1.203 + * 1.204 + * @param initialValue the initial value that is set for all code points 1.205 + * @param errorValue the value for out-of-range code points and illegal UTF-8 1.206 + * @param pErrorCode an in/out ICU UErrorCode 1.207 + * @return a pointer to the allocated and initialized new trie 1.208 + */ 1.209 +U_CAPI UTrie2 * U_EXPORT2 1.210 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); 1.211 + 1.212 +/** 1.213 + * Clone a trie. 1.214 + * You must utrie2_close() the clone once you are done using it. 1.215 + * 1.216 + * @param other the trie to clone 1.217 + * @param pErrorCode an in/out ICU UErrorCode 1.218 + * @return a pointer to the new trie clone 1.219 + */ 1.220 +U_CAPI UTrie2 * U_EXPORT2 1.221 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode); 1.222 + 1.223 +/** 1.224 + * Clone a trie. The clone will be mutable/writable even if the other trie 1.225 + * is frozen. (See utrie2_freeze().) 1.226 + * You must utrie2_close() the clone once you are done using it. 1.227 + * 1.228 + * @param other the trie to clone 1.229 + * @param pErrorCode an in/out ICU UErrorCode 1.230 + * @return a pointer to the new trie clone 1.231 + */ 1.232 +U_CAPI UTrie2 * U_EXPORT2 1.233 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode); 1.234 + 1.235 +/** 1.236 + * Close a trie and release associated memory. 1.237 + * 1.238 + * @param trie the trie 1.239 + */ 1.240 +U_CAPI void U_EXPORT2 1.241 +utrie2_close(UTrie2 *trie); 1.242 + 1.243 +/** 1.244 + * Set a value for a code point. 1.245 + * 1.246 + * @param trie the unfrozen trie 1.247 + * @param c the code point 1.248 + * @param value the value 1.249 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: 1.250 + * - U_NO_WRITE_PERMISSION if the trie is frozen 1.251 + */ 1.252 +U_CAPI void U_EXPORT2 1.253 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); 1.254 + 1.255 +/** 1.256 + * Set a value in a range of code points [start..end]. 1.257 + * All code points c with start<=c<=end will get the value if 1.258 + * overwrite is TRUE or if the old value is the initial value. 1.259 + * 1.260 + * @param trie the unfrozen trie 1.261 + * @param start the first code point to get the value 1.262 + * @param end the last code point to get the value (inclusive) 1.263 + * @param value the value 1.264 + * @param overwrite flag for whether old non-initial values are to be overwritten 1.265 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: 1.266 + * - U_NO_WRITE_PERMISSION if the trie is frozen 1.267 + */ 1.268 +U_CAPI void U_EXPORT2 1.269 +utrie2_setRange32(UTrie2 *trie, 1.270 + UChar32 start, UChar32 end, 1.271 + uint32_t value, UBool overwrite, 1.272 + UErrorCode *pErrorCode); 1.273 + 1.274 +/** 1.275 + * Freeze a trie. Make it immutable (read-only) and compact it, 1.276 + * ready for serialization and for use with fast macros. 1.277 + * Functions to set values will fail after serializing. 1.278 + * 1.279 + * A trie can be frozen only once. If this function is called again with different 1.280 + * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR. 1.281 + * 1.282 + * @param trie the trie 1.283 + * @param valueBits selects the data entry size; if smaller than 32 bits, then 1.284 + * the values stored in the trie will be truncated 1.285 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: 1.286 + * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long 1.287 + * for serialization 1.288 + * (the trie will be immutable and usable, 1.289 + * but not frozen and not usable with the fast macros) 1.290 + * 1.291 + * @see utrie2_cloneAsThawed 1.292 + */ 1.293 +U_CAPI void U_EXPORT2 1.294 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode); 1.295 + 1.296 +/** 1.297 + * Test if the trie is frozen. (See utrie2_freeze().) 1.298 + * 1.299 + * @param trie the trie 1.300 + * @return TRUE if the trie is frozen, that is, immutable, ready for serialization 1.301 + * and for use with fast macros 1.302 + */ 1.303 +U_CAPI UBool U_EXPORT2 1.304 +utrie2_isFrozen(const UTrie2 *trie); 1.305 + 1.306 +/** 1.307 + * Serialize a frozen trie into 32-bit aligned memory. 1.308 + * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR. 1.309 + * A trie can be serialized multiple times. 1.310 + * 1.311 + * @param trie the frozen trie 1.312 + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data, 1.313 + * can be NULL if capacity==0 1.314 + * @param capacity the number of bytes available at data, 1.315 + * or 0 for preflighting 1.316 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: 1.317 + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization 1.318 + * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity 1.319 + * parameters are bad 1.320 + * @return the number of bytes written or needed for the trie 1.321 + * 1.322 + * @see utrie2_openFromSerialized() 1.323 + */ 1.324 +U_CAPI int32_t U_EXPORT2 1.325 +utrie2_serialize(UTrie2 *trie, 1.326 + void *data, int32_t capacity, 1.327 + UErrorCode *pErrorCode); 1.328 + 1.329 +/* Public UTrie2 API: miscellaneous functions ------------------------------- */ 1.330 + 1.331 +/** 1.332 + * Get the UTrie version from 32-bit-aligned memory containing the serialized form 1.333 + * of either a UTrie (version 1) or a UTrie2 (version 2). 1.334 + * 1.335 + * @param data a pointer to 32-bit-aligned memory containing the serialized form 1.336 + * of a UTrie, version 1 or 2 1.337 + * @param length the number of bytes available at data; 1.338 + * can be more than necessary (see return value) 1.339 + * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. 1.340 + * If TRUE, opposite-endian serialized forms are recognized as well. 1.341 + * @return the UTrie version of the serialized form, or 0 if it is not 1.342 + * recognized as a serialized UTrie 1.343 + */ 1.344 +U_CAPI int32_t U_EXPORT2 1.345 +utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk); 1.346 + 1.347 +/** 1.348 + * Swap a serialized UTrie2. 1.349 + * @internal 1.350 + */ 1.351 +U_CAPI int32_t U_EXPORT2 1.352 +utrie2_swap(const UDataSwapper *ds, 1.353 + const void *inData, int32_t length, void *outData, 1.354 + UErrorCode *pErrorCode); 1.355 + 1.356 +/** 1.357 + * Swap a serialized UTrie or UTrie2. 1.358 + * @internal 1.359 + */ 1.360 +U_CAPI int32_t U_EXPORT2 1.361 +utrie2_swapAnyVersion(const UDataSwapper *ds, 1.362 + const void *inData, int32_t length, void *outData, 1.363 + UErrorCode *pErrorCode); 1.364 + 1.365 +/** 1.366 + * Build a UTrie2 (version 2) from a UTrie (version 1). 1.367 + * Enumerates all values in the UTrie and builds a UTrie2 with the same values. 1.368 + * The resulting UTrie2 will be frozen. 1.369 + * 1.370 + * @param trie1 the runtime UTrie structure to be enumerated 1.371 + * @param errorValue the value for out-of-range code points and illegal UTF-8 1.372 + * @param pErrorCode an in/out ICU UErrorCode 1.373 + * @return The frozen UTrie2 with the same values as the UTrie. 1.374 + */ 1.375 +U_CAPI UTrie2 * U_EXPORT2 1.376 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode); 1.377 + 1.378 +/* Public UTrie2 API macros ------------------------------------------------- */ 1.379 + 1.380 +/* 1.381 + * These macros provide fast data lookup from a frozen trie. 1.382 + * They will crash when used on an unfrozen trie. 1.383 + */ 1.384 + 1.385 +/** 1.386 + * Return a 16-bit trie value from a code point, with range checking. 1.387 + * Returns trie->errorValue if c is not in the range 0..U+10ffff. 1.388 + * 1.389 + * @param trie (const UTrie2 *, in) a frozen trie 1.390 + * @param c (UChar32, in) the input code point 1.391 + * @return (uint16_t) The code point's trie value. 1.392 + */ 1.393 +#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c)) 1.394 + 1.395 +/** 1.396 + * Return a 32-bit trie value from a code point, with range checking. 1.397 + * Returns trie->errorValue if c is not in the range 0..U+10ffff. 1.398 + * 1.399 + * @param trie (const UTrie2 *, in) a frozen trie 1.400 + * @param c (UChar32, in) the input code point 1.401 + * @return (uint32_t) The code point's trie value. 1.402 + */ 1.403 +#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c)) 1.404 + 1.405 +/** 1.406 + * UTF-16: Get the next code point (UChar32 c, out), post-increment src, 1.407 + * and get a 16-bit value from the trie. 1.408 + * 1.409 + * @param trie (const UTrie2 *, in) a frozen trie 1.410 + * @param src (const UChar *, in/out) the source text pointer 1.411 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated 1.412 + * @param c (UChar32, out) variable for the code point 1.413 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result 1.414 + */ 1.415 +#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result) 1.416 + 1.417 +/** 1.418 + * UTF-16: Get the next code point (UChar32 c, out), post-increment src, 1.419 + * and get a 32-bit value from the trie. 1.420 + * 1.421 + * @param trie (const UTrie2 *, in) a frozen trie 1.422 + * @param src (const UChar *, in/out) the source text pointer 1.423 + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated 1.424 + * @param c (UChar32, out) variable for the code point 1.425 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result 1.426 + */ 1.427 +#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result) 1.428 + 1.429 +/** 1.430 + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, 1.431 + * and get a 16-bit value from the trie. 1.432 + * 1.433 + * @param trie (const UTrie2 *, in) a frozen trie 1.434 + * @param start (const UChar *, in) the start pointer for the text 1.435 + * @param src (const UChar *, in/out) the source text pointer 1.436 + * @param c (UChar32, out) variable for the code point 1.437 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result 1.438 + */ 1.439 +#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result) 1.440 + 1.441 +/** 1.442 + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, 1.443 + * and get a 32-bit value from the trie. 1.444 + * 1.445 + * @param trie (const UTrie2 *, in) a frozen trie 1.446 + * @param start (const UChar *, in) the start pointer for the text 1.447 + * @param src (const UChar *, in/out) the source text pointer 1.448 + * @param c (UChar32, out) variable for the code point 1.449 + * @param result (uint32_t, out) uint32_t variable for the trie lookup result 1.450 + */ 1.451 +#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result) 1.452 + 1.453 +/** 1.454 + * UTF-8: Post-increment src and get a 16-bit value from the trie. 1.455 + * 1.456 + * @param trie (const UTrie2 *, in) a frozen trie 1.457 + * @param src (const char *, in/out) the source text pointer 1.458 + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) 1.459 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result 1.460 + */ 1.461 +#define UTRIE2_U8_NEXT16(trie, src, limit, result)\ 1.462 + _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result) 1.463 + 1.464 +/** 1.465 + * UTF-8: Post-increment src and get a 32-bit value from the trie. 1.466 + * 1.467 + * @param trie (const UTrie2 *, in) a frozen trie 1.468 + * @param src (const char *, in/out) the source text pointer 1.469 + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) 1.470 + * @param result (uint16_t, out) uint32_t variable for the trie lookup result 1.471 + */ 1.472 +#define UTRIE2_U8_NEXT32(trie, src, limit, result) \ 1.473 + _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result) 1.474 + 1.475 +/** 1.476 + * UTF-8: Pre-decrement src and get a 16-bit value from the trie. 1.477 + * 1.478 + * @param trie (const UTrie2 *, in) a frozen trie 1.479 + * @param start (const char *, in) the start pointer for the text 1.480 + * @param src (const char *, in/out) the source text pointer 1.481 + * @param result (uint16_t, out) uint16_t variable for the trie lookup result 1.482 + */ 1.483 +#define UTRIE2_U8_PREV16(trie, start, src, result) \ 1.484 + _UTRIE2_U8_PREV(trie, data16, index, start, src, result) 1.485 + 1.486 +/** 1.487 + * UTF-8: Pre-decrement src and get a 32-bit value from the trie. 1.488 + * 1.489 + * @param trie (const UTrie2 *, in) a frozen trie 1.490 + * @param start (const char *, in) the start pointer for the text 1.491 + * @param src (const char *, in/out) the source text pointer 1.492 + * @param result (uint16_t, out) uint32_t variable for the trie lookup result 1.493 + */ 1.494 +#define UTRIE2_U8_PREV32(trie, start, src, result) \ 1.495 + _UTRIE2_U8_PREV(trie, data32, data32, start, src, result) 1.496 + 1.497 +/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */ 1.498 + 1.499 +/* 1.500 + * The following functions and macros are used for highly optimized UTF-16 1.501 + * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these. 1.502 + * 1.503 + * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_. 1.504 + * UTF-16 text processing can be optimized by detecting surrogate pairs and 1.505 + * assembling supplementary code points only when there is non-trivial data 1.506 + * available. 1.507 + * 1.508 + * At build-time, use utrie2_enumForLeadSurrogate() to see if there 1.509 + * is non-trivial (non-initialValue) data for any of the supplementary 1.510 + * code points associated with a lead surrogate. 1.511 + * If so, then set a special (application-specific) value for the 1.512 + * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit(). 1.513 + * 1.514 + * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or 1.515 + * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial 1.516 + * data and the code unit is a lead surrogate, then check if a trail surrogate 1.517 + * follows. If so, assemble the supplementary code point with 1.518 + * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP() 1.519 + * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead 1.520 + * surrogate's value or do a code point lookup for it. 1.521 + * 1.522 + * If there is only trivial data for lead and trail surrogates, then processing 1.523 + * can often skip them. For example, in normalization or case mapping 1.524 + * all characters that do not have any mappings are simply copied as is. 1.525 + */ 1.526 + 1.527 +/** 1.528 + * Get a value from a lead surrogate code unit as stored in the trie. 1.529 + * 1.530 + * @param trie the trie 1.531 + * @param c the code unit (U+D800..U+DBFF) 1.532 + * @return the value 1.533 + */ 1.534 +U_CAPI uint32_t U_EXPORT2 1.535 +utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c); 1.536 + 1.537 +/** 1.538 + * Enumerate the trie values for the 1024=0x400 code points 1.539 + * corresponding to a given lead surrogate. 1.540 + * For example, for the lead surrogate U+D87E it will enumerate the values 1.541 + * for [U+2F800..U+2FC00[. 1.542 + * Used by data builder code that sets special lead surrogate code unit values 1.543 + * for optimized UTF-16 string processing. 1.544 + * 1.545 + * Do not modify the trie during the enumeration. 1.546 + * 1.547 + * Except for the limited code point range, this functions just like utrie2_enum(): 1.548 + * For each entry in the trie, the value to be delivered is passed through 1.549 + * the UTrie2EnumValue function. 1.550 + * The value is unchanged if that function pointer is NULL. 1.551 + * 1.552 + * For each contiguous range of code points with a given (transformed) value, 1.553 + * the UTrie2EnumRange function is called. 1.554 + * 1.555 + * @param trie a pointer to the trie 1.556 + * @param enumValue a pointer to a function that may transform the trie entry value, 1.557 + * or NULL if the values from the trie are to be used directly 1.558 + * @param enumRange a pointer to a function that is called for each contiguous range 1.559 + * of code points with the same (transformed) value 1.560 + * @param context an opaque pointer that is passed on to the callback functions 1.561 + */ 1.562 +U_CAPI void U_EXPORT2 1.563 +utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, 1.564 + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, 1.565 + const void *context); 1.566 + 1.567 +/** 1.568 + * Set a value for a lead surrogate code unit. 1.569 + * 1.570 + * @param trie the unfrozen trie 1.571 + * @param lead the lead surrogate code unit (U+D800..U+DBFF) 1.572 + * @param value the value 1.573 + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: 1.574 + * - U_NO_WRITE_PERMISSION if the trie is frozen 1.575 + */ 1.576 +U_CAPI void U_EXPORT2 1.577 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, 1.578 + UChar32 lead, uint32_t value, 1.579 + UErrorCode *pErrorCode); 1.580 + 1.581 +/** 1.582 + * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). 1.583 + * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates, 1.584 + * but smaller and faster. 1.585 + * 1.586 + * @param trie (const UTrie2 *, in) a frozen trie 1.587 + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff 1.588 + * @return (uint16_t) The code unit's trie value. 1.589 + */ 1.590 +#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c) 1.591 + 1.592 +/** 1.593 + * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). 1.594 + * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates, 1.595 + * but smaller and faster. 1.596 + * 1.597 + * @param trie (const UTrie2 *, in) a frozen trie 1.598 + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff 1.599 + * @return (uint32_t) The code unit's trie value. 1.600 + */ 1.601 +#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c) 1.602 + 1.603 +/** 1.604 + * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff). 1.605 + * 1.606 + * @param trie (const UTrie2 *, in) a frozen trie 1.607 + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff 1.608 + * @return (uint16_t) The code point's trie value. 1.609 + */ 1.610 +#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c) 1.611 + 1.612 +/** 1.613 + * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff). 1.614 + * 1.615 + * @param trie (const UTrie2 *, in) a frozen trie 1.616 + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff 1.617 + * @return (uint32_t) The code point's trie value. 1.618 + */ 1.619 +#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c) 1.620 + 1.621 +U_CDECL_END 1.622 + 1.623 +/* C++ convenience wrappers ------------------------------------------------- */ 1.624 + 1.625 +#ifdef __cplusplus 1.626 + 1.627 +#include "unicode/utf.h" 1.628 +#include "mutex.h" 1.629 + 1.630 +U_NAMESPACE_BEGIN 1.631 + 1.632 +// Use the Forward/Backward subclasses below. 1.633 +class UTrie2StringIterator : public UMemory { 1.634 +public: 1.635 + UTrie2StringIterator(const UTrie2 *t, const UChar *p) : 1.636 + trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {} 1.637 + 1.638 + const UTrie2 *trie; 1.639 + const UChar *codePointStart, *codePointLimit; 1.640 + UChar32 codePoint; 1.641 +}; 1.642 + 1.643 +class BackwardUTrie2StringIterator : public UTrie2StringIterator { 1.644 +public: 1.645 + BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) : 1.646 + UTrie2StringIterator(t, p), start(s) {} 1.647 + 1.648 + uint16_t previous16(); 1.649 + 1.650 + const UChar *start; 1.651 +}; 1.652 + 1.653 +class ForwardUTrie2StringIterator : public UTrie2StringIterator { 1.654 +public: 1.655 + // Iteration limit l can be NULL. 1.656 + // In that case, the caller must detect c==0 and stop. 1.657 + ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) : 1.658 + UTrie2StringIterator(t, p), limit(l) {} 1.659 + 1.660 + uint16_t next16(); 1.661 + 1.662 + const UChar *limit; 1.663 +}; 1.664 + 1.665 +U_NAMESPACE_END 1.666 + 1.667 +#endif 1.668 + 1.669 +/* Internal definitions ----------------------------------------------------- */ 1.670 + 1.671 +U_CDECL_BEGIN 1.672 + 1.673 +/** Build-time trie structure. */ 1.674 +struct UNewTrie2; 1.675 +typedef struct UNewTrie2 UNewTrie2; 1.676 + 1.677 +/* 1.678 + * Trie structure definition. 1.679 + * 1.680 + * Either the data table is 16 bits wide and accessed via the index 1.681 + * pointer, with each index item increased by indexLength; 1.682 + * in this case, data32==NULL, and data16 is used for direct ASCII access. 1.683 + * 1.684 + * Or the data table is 32 bits wide and accessed via the data32 pointer. 1.685 + */ 1.686 +struct UTrie2 { 1.687 + /* protected: used by macros and functions for reading values */ 1.688 + const uint16_t *index; 1.689 + const uint16_t *data16; /* for fast UTF-8 ASCII access, if 16b data */ 1.690 + const uint32_t *data32; /* NULL if 16b data is used via index */ 1.691 + 1.692 + int32_t indexLength, dataLength; 1.693 + uint16_t index2NullOffset; /* 0xffff if there is no dedicated index-2 null block */ 1.694 + uint16_t dataNullOffset; 1.695 + uint32_t initialValue; 1.696 + /** Value returned for out-of-range code points and illegal UTF-8. */ 1.697 + uint32_t errorValue; 1.698 + 1.699 + /* Start of the last range which ends at U+10ffff, and its value. */ 1.700 + UChar32 highStart; 1.701 + int32_t highValueIndex; 1.702 + 1.703 + /* private: used by builder and unserialization functions */ 1.704 + void *memory; /* serialized bytes; NULL if not frozen yet */ 1.705 + int32_t length; /* number of serialized bytes at memory; 0 if not frozen yet */ 1.706 + UBool isMemoryOwned; /* TRUE if the trie owns the memory */ 1.707 + UBool padding1; 1.708 + int16_t padding2; 1.709 + UNewTrie2 *newTrie; /* builder object; NULL when frozen */ 1.710 +}; 1.711 + 1.712 +/** 1.713 + * Trie constants, defining shift widths, index array lengths, etc. 1.714 + * 1.715 + * These are needed for the runtime macros but users can treat these as 1.716 + * implementation details and skip to the actual public API further below. 1.717 + */ 1.718 +enum { 1.719 + /** Shift size for getting the index-1 table offset. */ 1.720 + UTRIE2_SHIFT_1=6+5, 1.721 + 1.722 + /** Shift size for getting the index-2 table offset. */ 1.723 + UTRIE2_SHIFT_2=5, 1.724 + 1.725 + /** 1.726 + * Difference between the two shift sizes, 1.727 + * for getting an index-1 offset from an index-2 offset. 6=11-5 1.728 + */ 1.729 + UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2, 1.730 + 1.731 + /** 1.732 + * Number of index-1 entries for the BMP. 32=0x20 1.733 + * This part of the index-1 table is omitted from the serialized form. 1.734 + */ 1.735 + UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1, 1.736 + 1.737 + /** Number of code points per index-1 table entry. 2048=0x800 */ 1.738 + UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1, 1.739 + 1.740 + /** Number of entries in an index-2 block. 64=0x40 */ 1.741 + UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2, 1.742 + 1.743 + /** Mask for getting the lower bits for the in-index-2-block offset. */ 1.744 + UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1, 1.745 + 1.746 + /** Number of entries in a data block. 32=0x20 */ 1.747 + UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2, 1.748 + 1.749 + /** Mask for getting the lower bits for the in-data-block offset. */ 1.750 + UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1, 1.751 + 1.752 + /** 1.753 + * Shift size for shifting left the index array values. 1.754 + * Increases possible data size with 16-bit index values at the cost 1.755 + * of compactability. 1.756 + * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY. 1.757 + */ 1.758 + UTRIE2_INDEX_SHIFT=2, 1.759 + 1.760 + /** The alignment size of a data block. Also the granularity for compaction. */ 1.761 + UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT, 1.762 + 1.763 + /* Fixed layout of the first part of the index array. ------------------- */ 1.764 + 1.765 + /** 1.766 + * The BMP part of the index-2 table is fixed and linear and starts at offset 0. 1.767 + * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2. 1.768 + */ 1.769 + UTRIE2_INDEX_2_OFFSET=0, 1.770 + 1.771 + /** 1.772 + * The part of the index-2 table for U+D800..U+DBFF stores values for 1.773 + * lead surrogate code _units_ not code _points_. 1.774 + * Values for lead surrogate code _points_ are indexed with this portion of the table. 1.775 + * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) 1.776 + */ 1.777 + UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2, 1.778 + UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2, 1.779 + 1.780 + /** Count the lengths of both BMP pieces. 2080=0x820 */ 1.781 + UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH, 1.782 + 1.783 + /** 1.784 + * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. 1.785 + * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. 1.786 + */ 1.787 + UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, 1.788 + UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6, /* U+0800 is the first code point after 2-byte UTF-8 */ 1.789 + 1.790 + /** 1.791 + * The index-1 table, only used for supplementary code points, at offset 2112=0x840. 1.792 + * Variable length, for code points up to highStart, where the last single-value range starts. 1.793 + * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. 1.794 + * (For 0x100000 supplementary code points U+10000..U+10ffff.) 1.795 + * 1.796 + * The part of the index-2 table for supplementary code points starts 1.797 + * after this index-1 table. 1.798 + * 1.799 + * Both the index-1 table and the following part of the index-2 table 1.800 + * are omitted completely if there is only BMP data. 1.801 + */ 1.802 + UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH, 1.803 + UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1, 1.804 + 1.805 + /* 1.806 + * Fixed layout of the first part of the data array. ----------------------- 1.807 + * Starts with 4 blocks (128=0x80 entries) for ASCII. 1.808 + */ 1.809 + 1.810 + /** 1.811 + * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. 1.812 + * Used with linear access for single bytes 0..0xbf for simple error handling. 1.813 + * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. 1.814 + */ 1.815 + UTRIE2_BAD_UTF8_DATA_OFFSET=0x80, 1.816 + 1.817 + /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */ 1.818 + UTRIE2_DATA_START_OFFSET=0xc0 1.819 +}; 1.820 + 1.821 +/* Internal functions and macros -------------------------------------------- */ 1.822 + 1.823 +/** 1.824 + * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations. 1.825 + * Do not call directly. 1.826 + * @internal 1.827 + */ 1.828 +U_INTERNAL int32_t U_EXPORT2 1.829 +utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, 1.830 + const uint8_t *src, const uint8_t *limit); 1.831 + 1.832 +/** 1.833 + * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations. 1.834 + * Do not call directly. 1.835 + * @internal 1.836 + */ 1.837 +U_INTERNAL int32_t U_EXPORT2 1.838 +utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, 1.839 + const uint8_t *start, const uint8_t *src); 1.840 + 1.841 + 1.842 +/** Internal low-level trie getter. Returns a data index. */ 1.843 +#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \ 1.844 + (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \ 1.845 + <<UTRIE2_INDEX_SHIFT)+ \ 1.846 + ((c)&UTRIE2_DATA_MASK)) 1.847 + 1.848 +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */ 1.849 +#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c) 1.850 + 1.851 +/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */ 1.852 +#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \ 1.853 + _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c) 1.854 + 1.855 +/** Internal trie getter from a BMP code point. Returns the data index. */ 1.856 +#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \ 1.857 + _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ 1.858 + trieIndex, c) 1.859 + 1.860 +/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */ 1.861 +#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \ 1.862 + (((int32_t)((trieIndex)[ \ 1.863 + (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \ 1.864 + ((c)>>UTRIE2_SHIFT_1)]+ \ 1.865 + (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \ 1.866 + <<UTRIE2_INDEX_SHIFT)+ \ 1.867 + ((c)&UTRIE2_DATA_MASK)) 1.868 + 1.869 +/** 1.870 + * Internal trie getter from a code point, with checking that c is in 0..10FFFF. 1.871 + * Returns the data index. 1.872 + */ 1.873 +#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \ 1.874 + ((uint32_t)(c)<0xd800 ? \ 1.875 + _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \ 1.876 + (uint32_t)(c)<=0xffff ? \ 1.877 + _UTRIE2_INDEX_RAW( \ 1.878 + (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ 1.879 + (trie)->index, c) : \ 1.880 + (uint32_t)(c)>0x10ffff ? \ 1.881 + (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \ 1.882 + (c)>=(trie)->highStart ? \ 1.883 + (trie)->highValueIndex : \ 1.884 + _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)) 1.885 + 1.886 +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */ 1.887 +#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \ 1.888 + (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)] 1.889 + 1.890 +/** Internal trie getter from a supplementary code point. Returns the data. */ 1.891 +#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \ 1.892 + (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \ 1.893 + _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)] 1.894 + 1.895 +/** 1.896 + * Internal trie getter from a code point, with checking that c is in 0..10FFFF. 1.897 + * Returns the data. 1.898 + */ 1.899 +#define _UTRIE2_GET(trie, data, asciiOffset, c) \ 1.900 + (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] 1.901 + 1.902 +/** Internal next-post-increment: get the next code point (c) and its data. */ 1.903 +#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \ 1.904 + { \ 1.905 + uint16_t __c2; \ 1.906 + (c)=*(src)++; \ 1.907 + if(!U16_IS_LEAD(c)) { \ 1.908 + (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \ 1.909 + } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \ 1.910 + (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \ 1.911 + } else { \ 1.912 + ++(src); \ 1.913 + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 1.914 + (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ 1.915 + } \ 1.916 + } \ 1.917 +} 1.918 + 1.919 +/** Internal pre-decrement-previous: get the previous code point (c) and its data */ 1.920 +#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \ 1.921 + { \ 1.922 + uint16_t __c2; \ 1.923 + (c)=*--(src); \ 1.924 + if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \ 1.925 + (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \ 1.926 + } else { \ 1.927 + --(src); \ 1.928 + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 1.929 + (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ 1.930 + } \ 1.931 + } \ 1.932 +} 1.933 + 1.934 +/** Internal UTF-8 next-post-increment: get the next code point's data. */ 1.935 +#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \ 1.936 + uint8_t __lead=(uint8_t)*(src)++; \ 1.937 + if(__lead<0xc0) { \ 1.938 + (result)=(trie)->ascii[__lead]; \ 1.939 + } else { \ 1.940 + uint8_t __t1, __t2; \ 1.941 + if( /* handle U+0000..U+07FF inline */ \ 1.942 + __lead<0xe0 && (src)<(limit) && \ 1.943 + (__t1=(uint8_t)(*(src)-0x80))<=0x3f \ 1.944 + ) { \ 1.945 + ++(src); \ 1.946 + (result)=(trie)->data[ \ 1.947 + (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \ 1.948 + __t1]; \ 1.949 + } else if( /* handle U+0000..U+CFFF inline */ \ 1.950 + __lead<0xed && ((src)+1)<(limit) && \ 1.951 + (__t1=(uint8_t)(*(src)-0x80))<=0x3f && (__lead>0xe0 || __t1>=0x20) && \ 1.952 + (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \ 1.953 + ) { \ 1.954 + (src)+=2; \ 1.955 + (result)=(trie)->data[ \ 1.956 + ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \ 1.957 + (__t1<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \ 1.958 + <<UTRIE2_INDEX_SHIFT)+ \ 1.959 + (__t2&UTRIE2_DATA_MASK)]; \ 1.960 + } else { \ 1.961 + int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \ 1.962 + (const uint8_t *)(limit)); \ 1.963 + (src)+=__index&7; \ 1.964 + (result)=(trie)->data[__index>>3]; \ 1.965 + } \ 1.966 + } \ 1.967 +} 1.968 + 1.969 +/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */ 1.970 +#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \ 1.971 + uint8_t __b=(uint8_t)*--(src); \ 1.972 + if(__b<0x80) { \ 1.973 + (result)=(trie)->ascii[__b]; \ 1.974 + } else { \ 1.975 + int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \ 1.976 + (const uint8_t *)(src)); \ 1.977 + (src)-=__index&7; \ 1.978 + (result)=(trie)->data[__index>>3]; \ 1.979 + } \ 1.980 +} 1.981 + 1.982 +U_CDECL_END 1.983 + 1.984 +/** 1.985 + * Work around MSVC 2003 optimization bugs. 1.986 + */ 1.987 +#if defined (U_HAVE_MSVC_2003_OR_EARLIER) 1.988 +#pragma optimize("", off) 1.989 +#endif 1.990 + 1.991 +#endif