1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/utf16.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,623 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1999-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: utf16.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 1999sep09 1.17 +* created by: Markus W. Scherer 1.18 +*/ 1.19 + 1.20 +/** 1.21 + * \file 1.22 + * \brief C API: 16-bit Unicode handling macros 1.23 + * 1.24 + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. 1.25 + * 1.26 + * For more information see utf.h and the ICU User Guide Strings chapter 1.27 + * (http://userguide.icu-project.org/strings). 1.28 + * 1.29 + * <em>Usage:</em> 1.30 + * ICU coding guidelines for if() statements should be followed when using these macros. 1.31 + * Compound statements (curly braces {}) must be used for if-else-while... 1.32 + * bodies and all macro statements should be terminated with semicolon. 1.33 + */ 1.34 + 1.35 +#ifndef __UTF16_H__ 1.36 +#define __UTF16_H__ 1.37 + 1.38 +#include "unicode/umachine.h" 1.39 +#ifndef __UTF_H__ 1.40 +# include "unicode/utf.h" 1.41 +#endif 1.42 + 1.43 +/* single-code point definitions -------------------------------------------- */ 1.44 + 1.45 +/** 1.46 + * Does this code unit alone encode a code point (BMP, not a surrogate)? 1.47 + * @param c 16-bit code unit 1.48 + * @return TRUE or FALSE 1.49 + * @stable ICU 2.4 1.50 + */ 1.51 +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) 1.52 + 1.53 +/** 1.54 + * Is this code unit a lead surrogate (U+d800..U+dbff)? 1.55 + * @param c 16-bit code unit 1.56 + * @return TRUE or FALSE 1.57 + * @stable ICU 2.4 1.58 + */ 1.59 +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 1.60 + 1.61 +/** 1.62 + * Is this code unit a trail surrogate (U+dc00..U+dfff)? 1.63 + * @param c 16-bit code unit 1.64 + * @return TRUE or FALSE 1.65 + * @stable ICU 2.4 1.66 + */ 1.67 +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 1.68 + 1.69 +/** 1.70 + * Is this code unit a surrogate (U+d800..U+dfff)? 1.71 + * @param c 16-bit code unit 1.72 + * @return TRUE or FALSE 1.73 + * @stable ICU 2.4 1.74 + */ 1.75 +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) 1.76 + 1.77 +/** 1.78 + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 1.79 + * is it a lead surrogate? 1.80 + * @param c 16-bit code unit 1.81 + * @return TRUE or FALSE 1.82 + * @stable ICU 2.4 1.83 + */ 1.84 +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 1.85 + 1.86 +/** 1.87 + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 1.88 + * is it a trail surrogate? 1.89 + * @param c 16-bit code unit 1.90 + * @return TRUE or FALSE 1.91 + * @stable ICU 4.2 1.92 + */ 1.93 +#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) 1.94 + 1.95 +/** 1.96 + * Helper constant for U16_GET_SUPPLEMENTARY. 1.97 + * @internal 1.98 + */ 1.99 +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 1.100 + 1.101 +/** 1.102 + * Get a supplementary code point value (U+10000..U+10ffff) 1.103 + * from its lead and trail surrogates. 1.104 + * The result is undefined if the input values are not 1.105 + * lead and trail surrogates. 1.106 + * 1.107 + * @param lead lead surrogate (U+d800..U+dbff) 1.108 + * @param trail trail surrogate (U+dc00..U+dfff) 1.109 + * @return supplementary code point (U+10000..U+10ffff) 1.110 + * @stable ICU 2.4 1.111 + */ 1.112 +#define U16_GET_SUPPLEMENTARY(lead, trail) \ 1.113 + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) 1.114 + 1.115 + 1.116 +/** 1.117 + * Get the lead surrogate (0xd800..0xdbff) for a 1.118 + * supplementary code point (0x10000..0x10ffff). 1.119 + * @param supplementary 32-bit code point (U+10000..U+10ffff) 1.120 + * @return lead surrogate (U+d800..U+dbff) for supplementary 1.121 + * @stable ICU 2.4 1.122 + */ 1.123 +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 1.124 + 1.125 +/** 1.126 + * Get the trail surrogate (0xdc00..0xdfff) for a 1.127 + * supplementary code point (0x10000..0x10ffff). 1.128 + * @param supplementary 32-bit code point (U+10000..U+10ffff) 1.129 + * @return trail surrogate (U+dc00..U+dfff) for supplementary 1.130 + * @stable ICU 2.4 1.131 + */ 1.132 +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 1.133 + 1.134 +/** 1.135 + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) 1.136 + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). 1.137 + * @param c 32-bit code point 1.138 + * @return 1 or 2 1.139 + * @stable ICU 2.4 1.140 + */ 1.141 +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 1.142 + 1.143 +/** 1.144 + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). 1.145 + * @return 2 1.146 + * @stable ICU 2.4 1.147 + */ 1.148 +#define U16_MAX_LENGTH 2 1.149 + 1.150 +/** 1.151 + * Get a code point from a string at a random-access offset, 1.152 + * without changing the offset. 1.153 + * "Unsafe" macro, assumes well-formed UTF-16. 1.154 + * 1.155 + * The offset may point to either the lead or trail surrogate unit 1.156 + * for a supplementary code point, in which case the macro will read 1.157 + * the adjacent matching surrogate as well. 1.158 + * The result is undefined if the offset points to a single, unpaired surrogate. 1.159 + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 1.160 + * 1.161 + * @param s const UChar * string 1.162 + * @param i string offset 1.163 + * @param c output UChar32 variable 1.164 + * @see U16_GET 1.165 + * @stable ICU 2.4 1.166 + */ 1.167 +#define U16_GET_UNSAFE(s, i, c) { \ 1.168 + (c)=(s)[i]; \ 1.169 + if(U16_IS_SURROGATE(c)) { \ 1.170 + if(U16_IS_SURROGATE_LEAD(c)) { \ 1.171 + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ 1.172 + } else { \ 1.173 + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ 1.174 + } \ 1.175 + } \ 1.176 +} 1.177 + 1.178 +/** 1.179 + * Get a code point from a string at a random-access offset, 1.180 + * without changing the offset. 1.181 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.182 + * 1.183 + * The offset may point to either the lead or trail surrogate unit 1.184 + * for a supplementary code point, in which case the macro will read 1.185 + * the adjacent matching surrogate as well. 1.186 + * 1.187 + * The length can be negative for a NUL-terminated string. 1.188 + * 1.189 + * If the offset points to a single, unpaired surrogate, then that itself 1.190 + * will be returned as the code point. 1.191 + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 1.192 + * 1.193 + * @param s const UChar * string 1.194 + * @param start starting string offset (usually 0) 1.195 + * @param i string offset, must be start<=i<length 1.196 + * @param length string length 1.197 + * @param c output UChar32 variable 1.198 + * @see U16_GET_UNSAFE 1.199 + * @stable ICU 2.4 1.200 + */ 1.201 +#define U16_GET(s, start, i, length, c) { \ 1.202 + (c)=(s)[i]; \ 1.203 + if(U16_IS_SURROGATE(c)) { \ 1.204 + uint16_t __c2; \ 1.205 + if(U16_IS_SURROGATE_LEAD(c)) { \ 1.206 + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 1.207 + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 1.208 + } \ 1.209 + } else { \ 1.210 + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 1.211 + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 1.212 + } \ 1.213 + } \ 1.214 + } \ 1.215 +} 1.216 + 1.217 +/* definitions with forward iteration --------------------------------------- */ 1.218 + 1.219 +/** 1.220 + * Get a code point from a string at a code point boundary offset, 1.221 + * and advance the offset to the next code point boundary. 1.222 + * (Post-incrementing forward iteration.) 1.223 + * "Unsafe" macro, assumes well-formed UTF-16. 1.224 + * 1.225 + * The offset may point to the lead surrogate unit 1.226 + * for a supplementary code point, in which case the macro will read 1.227 + * the following trail surrogate as well. 1.228 + * If the offset points to a trail surrogate, then that itself 1.229 + * will be returned as the code point. 1.230 + * The result is undefined if the offset points to a single, unpaired lead surrogate. 1.231 + * 1.232 + * @param s const UChar * string 1.233 + * @param i string offset 1.234 + * @param c output UChar32 variable 1.235 + * @see U16_NEXT 1.236 + * @stable ICU 2.4 1.237 + */ 1.238 +#define U16_NEXT_UNSAFE(s, i, c) { \ 1.239 + (c)=(s)[(i)++]; \ 1.240 + if(U16_IS_LEAD(c)) { \ 1.241 + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ 1.242 + } \ 1.243 +} 1.244 + 1.245 +/** 1.246 + * Get a code point from a string at a code point boundary offset, 1.247 + * and advance the offset to the next code point boundary. 1.248 + * (Post-incrementing forward iteration.) 1.249 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.250 + * 1.251 + * The length can be negative for a NUL-terminated string. 1.252 + * 1.253 + * The offset may point to the lead surrogate unit 1.254 + * for a supplementary code point, in which case the macro will read 1.255 + * the following trail surrogate as well. 1.256 + * If the offset points to a trail surrogate or 1.257 + * to a single, unpaired lead surrogate, then that itself 1.258 + * will be returned as the code point. 1.259 + * 1.260 + * @param s const UChar * string 1.261 + * @param i string offset, must be i<length 1.262 + * @param length string length 1.263 + * @param c output UChar32 variable 1.264 + * @see U16_NEXT_UNSAFE 1.265 + * @stable ICU 2.4 1.266 + */ 1.267 +#define U16_NEXT(s, i, length, c) { \ 1.268 + (c)=(s)[(i)++]; \ 1.269 + if(U16_IS_LEAD(c)) { \ 1.270 + uint16_t __c2; \ 1.271 + if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 1.272 + ++(i); \ 1.273 + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 1.274 + } \ 1.275 + } \ 1.276 +} 1.277 + 1.278 +/** 1.279 + * Append a code point to a string, overwriting 1 or 2 code units. 1.280 + * The offset points to the current end of the string contents 1.281 + * and is advanced (post-increment). 1.282 + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. 1.283 + * Otherwise, the result is undefined. 1.284 + * 1.285 + * @param s const UChar * string buffer 1.286 + * @param i string offset 1.287 + * @param c code point to append 1.288 + * @see U16_APPEND 1.289 + * @stable ICU 2.4 1.290 + */ 1.291 +#define U16_APPEND_UNSAFE(s, i, c) { \ 1.292 + if((uint32_t)(c)<=0xffff) { \ 1.293 + (s)[(i)++]=(uint16_t)(c); \ 1.294 + } else { \ 1.295 + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 1.296 + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 1.297 + } \ 1.298 +} 1.299 + 1.300 +/** 1.301 + * Append a code point to a string, overwriting 1 or 2 code units. 1.302 + * The offset points to the current end of the string contents 1.303 + * and is advanced (post-increment). 1.304 + * "Safe" macro, checks for a valid code point. 1.305 + * If a surrogate pair is written, checks for sufficient space in the string. 1.306 + * If the code point is not valid or a trail surrogate does not fit, 1.307 + * then isError is set to TRUE. 1.308 + * 1.309 + * @param s const UChar * string buffer 1.310 + * @param i string offset, must be i<capacity 1.311 + * @param capacity size of the string buffer 1.312 + * @param c code point to append 1.313 + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified 1.314 + * @see U16_APPEND_UNSAFE 1.315 + * @stable ICU 2.4 1.316 + */ 1.317 +#define U16_APPEND(s, i, capacity, c, isError) { \ 1.318 + if((uint32_t)(c)<=0xffff) { \ 1.319 + (s)[(i)++]=(uint16_t)(c); \ 1.320 + } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ 1.321 + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 1.322 + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 1.323 + } else /* c>0x10ffff or not enough space */ { \ 1.324 + (isError)=TRUE; \ 1.325 + } \ 1.326 +} 1.327 + 1.328 +/** 1.329 + * Advance the string offset from one code point boundary to the next. 1.330 + * (Post-incrementing iteration.) 1.331 + * "Unsafe" macro, assumes well-formed UTF-16. 1.332 + * 1.333 + * @param s const UChar * string 1.334 + * @param i string offset 1.335 + * @see U16_FWD_1 1.336 + * @stable ICU 2.4 1.337 + */ 1.338 +#define U16_FWD_1_UNSAFE(s, i) { \ 1.339 + if(U16_IS_LEAD((s)[(i)++])) { \ 1.340 + ++(i); \ 1.341 + } \ 1.342 +} 1.343 + 1.344 +/** 1.345 + * Advance the string offset from one code point boundary to the next. 1.346 + * (Post-incrementing iteration.) 1.347 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.348 + * 1.349 + * The length can be negative for a NUL-terminated string. 1.350 + * 1.351 + * @param s const UChar * string 1.352 + * @param i string offset, must be i<length 1.353 + * @param length string length 1.354 + * @see U16_FWD_1_UNSAFE 1.355 + * @stable ICU 2.4 1.356 + */ 1.357 +#define U16_FWD_1(s, i, length) { \ 1.358 + if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ 1.359 + ++(i); \ 1.360 + } \ 1.361 +} 1.362 + 1.363 +/** 1.364 + * Advance the string offset from one code point boundary to the n-th next one, 1.365 + * i.e., move forward by n code points. 1.366 + * (Post-incrementing iteration.) 1.367 + * "Unsafe" macro, assumes well-formed UTF-16. 1.368 + * 1.369 + * @param s const UChar * string 1.370 + * @param i string offset 1.371 + * @param n number of code points to skip 1.372 + * @see U16_FWD_N 1.373 + * @stable ICU 2.4 1.374 + */ 1.375 +#define U16_FWD_N_UNSAFE(s, i, n) { \ 1.376 + int32_t __N=(n); \ 1.377 + while(__N>0) { \ 1.378 + U16_FWD_1_UNSAFE(s, i); \ 1.379 + --__N; \ 1.380 + } \ 1.381 +} 1.382 + 1.383 +/** 1.384 + * Advance the string offset from one code point boundary to the n-th next one, 1.385 + * i.e., move forward by n code points. 1.386 + * (Post-incrementing iteration.) 1.387 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.388 + * 1.389 + * The length can be negative for a NUL-terminated string. 1.390 + * 1.391 + * @param s const UChar * string 1.392 + * @param i int32_t string offset, must be i<length 1.393 + * @param length int32_t string length 1.394 + * @param n number of code points to skip 1.395 + * @see U16_FWD_N_UNSAFE 1.396 + * @stable ICU 2.4 1.397 + */ 1.398 +#define U16_FWD_N(s, i, length, n) { \ 1.399 + int32_t __N=(n); \ 1.400 + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ 1.401 + U16_FWD_1(s, i, length); \ 1.402 + --__N; \ 1.403 + } \ 1.404 +} 1.405 + 1.406 +/** 1.407 + * Adjust a random-access offset to a code point boundary 1.408 + * at the start of a code point. 1.409 + * If the offset points to the trail surrogate of a surrogate pair, 1.410 + * then the offset is decremented. 1.411 + * Otherwise, it is not modified. 1.412 + * "Unsafe" macro, assumes well-formed UTF-16. 1.413 + * 1.414 + * @param s const UChar * string 1.415 + * @param i string offset 1.416 + * @see U16_SET_CP_START 1.417 + * @stable ICU 2.4 1.418 + */ 1.419 +#define U16_SET_CP_START_UNSAFE(s, i) { \ 1.420 + if(U16_IS_TRAIL((s)[i])) { \ 1.421 + --(i); \ 1.422 + } \ 1.423 +} 1.424 + 1.425 +/** 1.426 + * Adjust a random-access offset to a code point boundary 1.427 + * at the start of a code point. 1.428 + * If the offset points to the trail surrogate of a surrogate pair, 1.429 + * then the offset is decremented. 1.430 + * Otherwise, it is not modified. 1.431 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.432 + * 1.433 + * @param s const UChar * string 1.434 + * @param start starting string offset (usually 0) 1.435 + * @param i string offset, must be start<=i 1.436 + * @see U16_SET_CP_START_UNSAFE 1.437 + * @stable ICU 2.4 1.438 + */ 1.439 +#define U16_SET_CP_START(s, start, i) { \ 1.440 + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 1.441 + --(i); \ 1.442 + } \ 1.443 +} 1.444 + 1.445 +/* definitions with backward iteration -------------------------------------- */ 1.446 + 1.447 +/** 1.448 + * Move the string offset from one code point boundary to the previous one 1.449 + * and get the code point between them. 1.450 + * (Pre-decrementing backward iteration.) 1.451 + * "Unsafe" macro, assumes well-formed UTF-16. 1.452 + * 1.453 + * The input offset may be the same as the string length. 1.454 + * If the offset is behind a trail surrogate unit 1.455 + * for a supplementary code point, then the macro will read 1.456 + * the preceding lead surrogate as well. 1.457 + * If the offset is behind a lead surrogate, then that itself 1.458 + * will be returned as the code point. 1.459 + * The result is undefined if the offset is behind a single, unpaired trail surrogate. 1.460 + * 1.461 + * @param s const UChar * string 1.462 + * @param i string offset 1.463 + * @param c output UChar32 variable 1.464 + * @see U16_PREV 1.465 + * @stable ICU 2.4 1.466 + */ 1.467 +#define U16_PREV_UNSAFE(s, i, c) { \ 1.468 + (c)=(s)[--(i)]; \ 1.469 + if(U16_IS_TRAIL(c)) { \ 1.470 + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ 1.471 + } \ 1.472 +} 1.473 + 1.474 +/** 1.475 + * Move the string offset from one code point boundary to the previous one 1.476 + * and get the code point between them. 1.477 + * (Pre-decrementing backward iteration.) 1.478 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.479 + * 1.480 + * The input offset may be the same as the string length. 1.481 + * If the offset is behind a trail surrogate unit 1.482 + * for a supplementary code point, then the macro will read 1.483 + * the preceding lead surrogate as well. 1.484 + * If the offset is behind a lead surrogate or behind a single, unpaired 1.485 + * trail surrogate, then that itself 1.486 + * will be returned as the code point. 1.487 + * 1.488 + * @param s const UChar * string 1.489 + * @param start starting string offset (usually 0) 1.490 + * @param i string offset, must be start<i 1.491 + * @param c output UChar32 variable 1.492 + * @see U16_PREV_UNSAFE 1.493 + * @stable ICU 2.4 1.494 + */ 1.495 +#define U16_PREV(s, start, i, c) { \ 1.496 + (c)=(s)[--(i)]; \ 1.497 + if(U16_IS_TRAIL(c)) { \ 1.498 + uint16_t __c2; \ 1.499 + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 1.500 + --(i); \ 1.501 + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 1.502 + } \ 1.503 + } \ 1.504 +} 1.505 + 1.506 +/** 1.507 + * Move the string offset from one code point boundary to the previous one. 1.508 + * (Pre-decrementing backward iteration.) 1.509 + * The input offset may be the same as the string length. 1.510 + * "Unsafe" macro, assumes well-formed UTF-16. 1.511 + * 1.512 + * @param s const UChar * string 1.513 + * @param i string offset 1.514 + * @see U16_BACK_1 1.515 + * @stable ICU 2.4 1.516 + */ 1.517 +#define U16_BACK_1_UNSAFE(s, i) { \ 1.518 + if(U16_IS_TRAIL((s)[--(i)])) { \ 1.519 + --(i); \ 1.520 + } \ 1.521 +} 1.522 + 1.523 +/** 1.524 + * Move the string offset from one code point boundary to the previous one. 1.525 + * (Pre-decrementing backward iteration.) 1.526 + * The input offset may be the same as the string length. 1.527 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.528 + * 1.529 + * @param s const UChar * string 1.530 + * @param start starting string offset (usually 0) 1.531 + * @param i string offset, must be start<i 1.532 + * @see U16_BACK_1_UNSAFE 1.533 + * @stable ICU 2.4 1.534 + */ 1.535 +#define U16_BACK_1(s, start, i) { \ 1.536 + if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 1.537 + --(i); \ 1.538 + } \ 1.539 +} 1.540 + 1.541 +/** 1.542 + * Move the string offset from one code point boundary to the n-th one before it, 1.543 + * i.e., move backward by n code points. 1.544 + * (Pre-decrementing backward iteration.) 1.545 + * The input offset may be the same as the string length. 1.546 + * "Unsafe" macro, assumes well-formed UTF-16. 1.547 + * 1.548 + * @param s const UChar * string 1.549 + * @param i string offset 1.550 + * @param n number of code points to skip 1.551 + * @see U16_BACK_N 1.552 + * @stable ICU 2.4 1.553 + */ 1.554 +#define U16_BACK_N_UNSAFE(s, i, n) { \ 1.555 + int32_t __N=(n); \ 1.556 + while(__N>0) { \ 1.557 + U16_BACK_1_UNSAFE(s, i); \ 1.558 + --__N; \ 1.559 + } \ 1.560 +} 1.561 + 1.562 +/** 1.563 + * Move the string offset from one code point boundary to the n-th one before it, 1.564 + * i.e., move backward by n code points. 1.565 + * (Pre-decrementing backward iteration.) 1.566 + * The input offset may be the same as the string length. 1.567 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.568 + * 1.569 + * @param s const UChar * string 1.570 + * @param start start of string 1.571 + * @param i string offset, must be start<i 1.572 + * @param n number of code points to skip 1.573 + * @see U16_BACK_N_UNSAFE 1.574 + * @stable ICU 2.4 1.575 + */ 1.576 +#define U16_BACK_N(s, start, i, n) { \ 1.577 + int32_t __N=(n); \ 1.578 + while(__N>0 && (i)>(start)) { \ 1.579 + U16_BACK_1(s, start, i); \ 1.580 + --__N; \ 1.581 + } \ 1.582 +} 1.583 + 1.584 +/** 1.585 + * Adjust a random-access offset to a code point boundary after a code point. 1.586 + * If the offset is behind the lead surrogate of a surrogate pair, 1.587 + * then the offset is incremented. 1.588 + * Otherwise, it is not modified. 1.589 + * The input offset may be the same as the string length. 1.590 + * "Unsafe" macro, assumes well-formed UTF-16. 1.591 + * 1.592 + * @param s const UChar * string 1.593 + * @param i string offset 1.594 + * @see U16_SET_CP_LIMIT 1.595 + * @stable ICU 2.4 1.596 + */ 1.597 +#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ 1.598 + if(U16_IS_LEAD((s)[(i)-1])) { \ 1.599 + ++(i); \ 1.600 + } \ 1.601 +} 1.602 + 1.603 +/** 1.604 + * Adjust a random-access offset to a code point boundary after a code point. 1.605 + * If the offset is behind the lead surrogate of a surrogate pair, 1.606 + * then the offset is incremented. 1.607 + * Otherwise, it is not modified. 1.608 + * The input offset may be the same as the string length. 1.609 + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 1.610 + * 1.611 + * The length can be negative for a NUL-terminated string. 1.612 + * 1.613 + * @param s const UChar * string 1.614 + * @param start int32_t starting string offset (usually 0) 1.615 + * @param i int32_t string offset, start<=i<=length 1.616 + * @param length int32_t string length 1.617 + * @see U16_SET_CP_LIMIT_UNSAFE 1.618 + * @stable ICU 2.4 1.619 + */ 1.620 +#define U16_SET_CP_LIMIT(s, start, i, length) { \ 1.621 + if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ 1.622 + ++(i); \ 1.623 + } \ 1.624 +} 1.625 + 1.626 +#endif