1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/uchar.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,3331 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1997-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* File UCHAR.H 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 04/02/97 aliu Creation. 1.16 +* 03/29/99 helena Updated for C APIs. 1.17 +* 4/15/99 Madhu Updated for C Implementation and Javadoc 1.18 +* 5/20/99 Madhu Added the function u_getVersion() 1.19 +* 8/19/1999 srl Upgraded scripts to Unicode 3.0 1.20 +* 8/27/1999 schererm UCharDirection constants: U_... 1.21 +* 11/11/1999 weiv added u_isalnum(), cleaned comments 1.22 +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). 1.23 +****************************************************************************** 1.24 +*/ 1.25 + 1.26 +#ifndef UCHAR_H 1.27 +#define UCHAR_H 1.28 + 1.29 +#include "unicode/utypes.h" 1.30 + 1.31 +U_CDECL_BEGIN 1.32 + 1.33 +/*==========================================================================*/ 1.34 +/* Unicode version number */ 1.35 +/*==========================================================================*/ 1.36 +/** 1.37 + * Unicode version number, default for the current ICU version. 1.38 + * The actual Unicode Character Database (UCD) data is stored in uprops.dat 1.39 + * and may be generated from UCD files from a different Unicode version. 1.40 + * Call u_getUnicodeVersion to get the actual Unicode version of the data. 1.41 + * 1.42 + * @see u_getUnicodeVersion 1.43 + * @stable ICU 2.0 1.44 + */ 1.45 +#define U_UNICODE_VERSION "6.3" 1.46 + 1.47 +/** 1.48 + * \file 1.49 + * \brief C API: Unicode Properties 1.50 + * 1.51 + * This C API provides low-level access to the Unicode Character Database. 1.52 + * In addition to raw property values, some convenience functions calculate 1.53 + * derived properties, for example for Java-style programming. 1.54 + * 1.55 + * Unicode assigns each code point (not just assigned character) values for 1.56 + * many properties. 1.57 + * Most of them are simple boolean flags, or constants from a small enumerated list. 1.58 + * For some properties, values are strings or other relatively more complex types. 1.59 + * 1.60 + * For more information see 1.61 + * "About the Unicode Character Database" (http://www.unicode.org/ucd/) 1.62 + * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). 1.63 + * 1.64 + * Many functions are designed to match java.lang.Character functions. 1.65 + * See the individual function documentation, 1.66 + * and see the JDK 1.4 java.lang.Character documentation 1.67 + * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html 1.68 + * 1.69 + * There are also functions that provide easy migration from C/POSIX functions 1.70 + * like isblank(). Their use is generally discouraged because the C/POSIX 1.71 + * standards do not define their semantics beyond the ASCII range, which means 1.72 + * that different implementations exhibit very different behavior. 1.73 + * Instead, Unicode properties should be used directly. 1.74 + * 1.75 + * There are also only a few, broad C/POSIX character classes, and they tend 1.76 + * to be used for conflicting purposes. For example, the "isalpha()" class 1.77 + * is sometimes used to determine word boundaries, while a more sophisticated 1.78 + * approach would at least distinguish initial letters from continuation 1.79 + * characters (the latter including combining marks). 1.80 + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 1.81 + * Another example: There is no "istitle()" class for titlecase characters. 1.82 + * 1.83 + * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 1.84 + * ICU implements them according to the Standard Recommendations in 1.85 + * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 1.86 + * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 1.87 + * 1.88 + * API access for C/POSIX character classes is as follows: 1.89 + * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) 1.90 + * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) 1.91 + * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) 1.92 + * - punct: u_ispunct(c) 1.93 + * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER 1.94 + * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) 1.95 + * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) 1.96 + * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) 1.97 + * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) 1.98 + * - cntrl: u_charType(c)==U_CONTROL_CHAR 1.99 + * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) 1.100 + * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) 1.101 + * 1.102 + * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, 1.103 + * the Standard Recommendations in UTS #18. Instead, they match Java 1.104 + * functions according to their API documentation. 1.105 + * 1.106 + * \htmlonly 1.107 + * The C/POSIX character classes are also available in UnicodeSet patterns, 1.108 + * using patterns like [:graph:] or \p{graph}. 1.109 + * \endhtmlonly 1.110 + * 1.111 + * Note: There are several ICU whitespace functions. 1.112 + * Comparison: 1.113 + * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 1.114 + * most of general categories "Z" (separators) + most whitespace ISO controls 1.115 + * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 1.116 + * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 1.117 + * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) 1.118 + * - u_isspace: Z + whitespace ISO controls (including no-break spaces) 1.119 + * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP 1.120 + */ 1.121 + 1.122 +/** 1.123 + * Constants. 1.124 + */ 1.125 + 1.126 +/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ 1.127 +#define UCHAR_MIN_VALUE 0 1.128 + 1.129 +/** 1.130 + * The highest Unicode code point value (scalar value) according to 1.131 + * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). 1.132 + * For a single character, UChar32 is a simple type that can hold any code point value. 1.133 + * 1.134 + * @see UChar32 1.135 + * @stable ICU 2.0 1.136 + */ 1.137 +#define UCHAR_MAX_VALUE 0x10ffff 1.138 + 1.139 +/** 1.140 + * Get a single-bit bit set (a flag) from a bit number 0..31. 1.141 + * @stable ICU 2.1 1.142 + */ 1.143 +#define U_MASK(x) ((uint32_t)1<<(x)) 1.144 + 1.145 +/** 1.146 + * Selection constants for Unicode properties. 1.147 + * These constants are used in functions like u_hasBinaryProperty to select 1.148 + * one of the Unicode properties. 1.149 + * 1.150 + * The properties APIs are intended to reflect Unicode properties as defined 1.151 + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 1.152 + * For details about the properties see http://www.unicode.org/ucd/ . 1.153 + * For names of Unicode properties see the UCD file PropertyAliases.txt. 1.154 + * 1.155 + * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, 1.156 + * then properties marked with "new in Unicode 3.2" are not or not fully available. 1.157 + * Check u_getUnicodeVersion to be sure. 1.158 + * 1.159 + * @see u_hasBinaryProperty 1.160 + * @see u_getIntPropertyValue 1.161 + * @see u_getUnicodeVersion 1.162 + * @stable ICU 2.1 1.163 + */ 1.164 +typedef enum UProperty { 1.165 + /* 1.166 + * Note: UProperty constants are parsed by preparseucd.py. 1.167 + * It matches lines like 1.168 + * UCHAR_<Unicode property name>=<integer>, 1.169 + */ 1.170 + 1.171 + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that 1.172 + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, 1.173 + rather than UCHAR_BINARY_START. Likewise for other *_START 1.174 + identifiers. */ 1.175 + 1.176 + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. 1.177 + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ 1.178 + UCHAR_ALPHABETIC=0, 1.179 + /** First constant for binary Unicode properties. @stable ICU 2.1 */ 1.180 + UCHAR_BINARY_START=UCHAR_ALPHABETIC, 1.181 + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ 1.182 + UCHAR_ASCII_HEX_DIGIT=1, 1.183 + /** Binary property Bidi_Control. 1.184 + Format controls which have specific functions 1.185 + in the Bidi Algorithm. @stable ICU 2.1 */ 1.186 + UCHAR_BIDI_CONTROL=2, 1.187 + /** Binary property Bidi_Mirrored. 1.188 + Characters that may change display in RTL text. 1.189 + Same as u_isMirrored. 1.190 + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ 1.191 + UCHAR_BIDI_MIRRORED=3, 1.192 + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ 1.193 + UCHAR_DASH=4, 1.194 + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). 1.195 + Ignorable in most processing. 1.196 + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ 1.197 + UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, 1.198 + /** Binary property Deprecated (new in Unicode 3.2). 1.199 + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ 1.200 + UCHAR_DEPRECATED=6, 1.201 + /** Binary property Diacritic. Characters that linguistically modify 1.202 + the meaning of another character to which they apply. @stable ICU 2.1 */ 1.203 + UCHAR_DIACRITIC=7, 1.204 + /** Binary property Extender. 1.205 + Extend the value or shape of a preceding alphabetic character, 1.206 + e.g., length and iteration marks. @stable ICU 2.1 */ 1.207 + UCHAR_EXTENDER=8, 1.208 + /** Binary property Full_Composition_Exclusion. 1.209 + CompositionExclusions.txt+Singleton Decompositions+ 1.210 + Non-Starter Decompositions. @stable ICU 2.1 */ 1.211 + UCHAR_FULL_COMPOSITION_EXCLUSION=9, 1.212 + /** Binary property Grapheme_Base (new in Unicode 3.2). 1.213 + For programmatic determination of grapheme cluster boundaries. 1.214 + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ 1.215 + UCHAR_GRAPHEME_BASE=10, 1.216 + /** Binary property Grapheme_Extend (new in Unicode 3.2). 1.217 + For programmatic determination of grapheme cluster boundaries. 1.218 + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ 1.219 + UCHAR_GRAPHEME_EXTEND=11, 1.220 + /** Binary property Grapheme_Link (new in Unicode 3.2). 1.221 + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ 1.222 + UCHAR_GRAPHEME_LINK=12, 1.223 + /** Binary property Hex_Digit. 1.224 + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ 1.225 + UCHAR_HEX_DIGIT=13, 1.226 + /** Binary property Hyphen. Dashes used to mark connections 1.227 + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ 1.228 + UCHAR_HYPHEN=14, 1.229 + /** Binary property ID_Continue. 1.230 + Characters that can continue an identifier. 1.231 + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." 1.232 + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ 1.233 + UCHAR_ID_CONTINUE=15, 1.234 + /** Binary property ID_Start. 1.235 + Characters that can start an identifier. 1.236 + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ 1.237 + UCHAR_ID_START=16, 1.238 + /** Binary property Ideographic. 1.239 + CJKV ideographs. @stable ICU 2.1 */ 1.240 + UCHAR_IDEOGRAPHIC=17, 1.241 + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). 1.242 + For programmatic determination of 1.243 + Ideographic Description Sequences. @stable ICU 2.1 */ 1.244 + UCHAR_IDS_BINARY_OPERATOR=18, 1.245 + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). 1.246 + For programmatic determination of 1.247 + Ideographic Description Sequences. @stable ICU 2.1 */ 1.248 + UCHAR_IDS_TRINARY_OPERATOR=19, 1.249 + /** Binary property Join_Control. 1.250 + Format controls for cursive joining and ligation. @stable ICU 2.1 */ 1.251 + UCHAR_JOIN_CONTROL=20, 1.252 + /** Binary property Logical_Order_Exception (new in Unicode 3.2). 1.253 + Characters that do not use logical order and 1.254 + require special handling in most processing. @stable ICU 2.1 */ 1.255 + UCHAR_LOGICAL_ORDER_EXCEPTION=21, 1.256 + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. 1.257 + Ll+Other_Lowercase @stable ICU 2.1 */ 1.258 + UCHAR_LOWERCASE=22, 1.259 + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ 1.260 + UCHAR_MATH=23, 1.261 + /** Binary property Noncharacter_Code_Point. 1.262 + Code points that are explicitly defined as illegal 1.263 + for the encoding of characters. @stable ICU 2.1 */ 1.264 + UCHAR_NONCHARACTER_CODE_POINT=24, 1.265 + /** Binary property Quotation_Mark. @stable ICU 2.1 */ 1.266 + UCHAR_QUOTATION_MARK=25, 1.267 + /** Binary property Radical (new in Unicode 3.2). 1.268 + For programmatic determination of 1.269 + Ideographic Description Sequences. @stable ICU 2.1 */ 1.270 + UCHAR_RADICAL=26, 1.271 + /** Binary property Soft_Dotted (new in Unicode 3.2). 1.272 + Characters with a "soft dot", like i or j. 1.273 + An accent placed on these characters causes 1.274 + the dot to disappear. @stable ICU 2.1 */ 1.275 + UCHAR_SOFT_DOTTED=27, 1.276 + /** Binary property Terminal_Punctuation. 1.277 + Punctuation characters that generally mark 1.278 + the end of textual units. @stable ICU 2.1 */ 1.279 + UCHAR_TERMINAL_PUNCTUATION=28, 1.280 + /** Binary property Unified_Ideograph (new in Unicode 3.2). 1.281 + For programmatic determination of 1.282 + Ideographic Description Sequences. @stable ICU 2.1 */ 1.283 + UCHAR_UNIFIED_IDEOGRAPH=29, 1.284 + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. 1.285 + Lu+Other_Uppercase @stable ICU 2.1 */ 1.286 + UCHAR_UPPERCASE=30, 1.287 + /** Binary property White_Space. 1.288 + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. 1.289 + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ 1.290 + UCHAR_WHITE_SPACE=31, 1.291 + /** Binary property XID_Continue. 1.292 + ID_Continue modified to allow closure under 1.293 + normalization forms NFKC and NFKD. @stable ICU 2.1 */ 1.294 + UCHAR_XID_CONTINUE=32, 1.295 + /** Binary property XID_Start. ID_Start modified to allow 1.296 + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ 1.297 + UCHAR_XID_START=33, 1.298 + /** Binary property Case_Sensitive. Either the source of a case 1.299 + mapping or _in_ the target of a case mapping. Not the same as 1.300 + the general category Cased_Letter. @stable ICU 2.6 */ 1.301 + UCHAR_CASE_SENSITIVE=34, 1.302 + /** Binary property STerm (new in Unicode 4.0.1). 1.303 + Sentence Terminal. Used in UAX #29: Text Boundaries 1.304 + (http://www.unicode.org/reports/tr29/) 1.305 + @stable ICU 3.0 */ 1.306 + UCHAR_S_TERM=35, 1.307 + /** Binary property Variation_Selector (new in Unicode 4.0.1). 1.308 + Indicates all those characters that qualify as Variation Selectors. 1.309 + For details on the behavior of these characters, 1.310 + see StandardizedVariants.html and 15.6 Variation Selectors. 1.311 + @stable ICU 3.0 */ 1.312 + UCHAR_VARIATION_SELECTOR=36, 1.313 + /** Binary property NFD_Inert. 1.314 + ICU-specific property for characters that are inert under NFD, 1.315 + i.e., they do not interact with adjacent characters. 1.316 + See the documentation for the Normalizer2 class and the 1.317 + Normalizer2::isInert() method. 1.318 + @stable ICU 3.0 */ 1.319 + UCHAR_NFD_INERT=37, 1.320 + /** Binary property NFKD_Inert. 1.321 + ICU-specific property for characters that are inert under NFKD, 1.322 + i.e., they do not interact with adjacent characters. 1.323 + See the documentation for the Normalizer2 class and the 1.324 + Normalizer2::isInert() method. 1.325 + @stable ICU 3.0 */ 1.326 + UCHAR_NFKD_INERT=38, 1.327 + /** Binary property NFC_Inert. 1.328 + ICU-specific property for characters that are inert under NFC, 1.329 + i.e., they do not interact with adjacent characters. 1.330 + See the documentation for the Normalizer2 class and the 1.331 + Normalizer2::isInert() method. 1.332 + @stable ICU 3.0 */ 1.333 + UCHAR_NFC_INERT=39, 1.334 + /** Binary property NFKC_Inert. 1.335 + ICU-specific property for characters that are inert under NFKC, 1.336 + i.e., they do not interact with adjacent characters. 1.337 + See the documentation for the Normalizer2 class and the 1.338 + Normalizer2::isInert() method. 1.339 + @stable ICU 3.0 */ 1.340 + UCHAR_NFKC_INERT=40, 1.341 + /** Binary Property Segment_Starter. 1.342 + ICU-specific property for characters that are starters in terms of 1.343 + Unicode normalization and combining character sequences. 1.344 + They have ccc=0 and do not occur in non-initial position of the 1.345 + canonical decomposition of any character 1.346 + (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). 1.347 + ICU uses this property for segmenting a string for generating a set of 1.348 + canonically equivalent strings, e.g. for canonical closure while 1.349 + processing collation tailoring rules. 1.350 + @stable ICU 3.0 */ 1.351 + UCHAR_SEGMENT_STARTER=41, 1.352 + /** Binary property Pattern_Syntax (new in Unicode 4.1). 1.353 + See UAX #31 Identifier and Pattern Syntax 1.354 + (http://www.unicode.org/reports/tr31/) 1.355 + @stable ICU 3.4 */ 1.356 + UCHAR_PATTERN_SYNTAX=42, 1.357 + /** Binary property Pattern_White_Space (new in Unicode 4.1). 1.358 + See UAX #31 Identifier and Pattern Syntax 1.359 + (http://www.unicode.org/reports/tr31/) 1.360 + @stable ICU 3.4 */ 1.361 + UCHAR_PATTERN_WHITE_SPACE=43, 1.362 + /** Binary property alnum (a C/POSIX character class). 1.363 + Implemented according to the UTS #18 Annex C Standard Recommendation. 1.364 + See the uchar.h file documentation. 1.365 + @stable ICU 3.4 */ 1.366 + UCHAR_POSIX_ALNUM=44, 1.367 + /** Binary property blank (a C/POSIX character class). 1.368 + Implemented according to the UTS #18 Annex C Standard Recommendation. 1.369 + See the uchar.h file documentation. 1.370 + @stable ICU 3.4 */ 1.371 + UCHAR_POSIX_BLANK=45, 1.372 + /** Binary property graph (a C/POSIX character class). 1.373 + Implemented according to the UTS #18 Annex C Standard Recommendation. 1.374 + See the uchar.h file documentation. 1.375 + @stable ICU 3.4 */ 1.376 + UCHAR_POSIX_GRAPH=46, 1.377 + /** Binary property print (a C/POSIX character class). 1.378 + Implemented according to the UTS #18 Annex C Standard Recommendation. 1.379 + See the uchar.h file documentation. 1.380 + @stable ICU 3.4 */ 1.381 + UCHAR_POSIX_PRINT=47, 1.382 + /** Binary property xdigit (a C/POSIX character class). 1.383 + Implemented according to the UTS #18 Annex C Standard Recommendation. 1.384 + See the uchar.h file documentation. 1.385 + @stable ICU 3.4 */ 1.386 + UCHAR_POSIX_XDIGIT=48, 1.387 + /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ 1.388 + UCHAR_CASED=49, 1.389 + /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ 1.390 + UCHAR_CASE_IGNORABLE=50, 1.391 + /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ 1.392 + UCHAR_CHANGES_WHEN_LOWERCASED=51, 1.393 + /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ 1.394 + UCHAR_CHANGES_WHEN_UPPERCASED=52, 1.395 + /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ 1.396 + UCHAR_CHANGES_WHEN_TITLECASED=53, 1.397 + /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ 1.398 + UCHAR_CHANGES_WHEN_CASEFOLDED=54, 1.399 + /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ 1.400 + UCHAR_CHANGES_WHEN_CASEMAPPED=55, 1.401 + /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ 1.402 + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, 1.403 + /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ 1.404 + UCHAR_BINARY_LIMIT=57, 1.405 + 1.406 + /** Enumerated property Bidi_Class. 1.407 + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ 1.408 + UCHAR_BIDI_CLASS=0x1000, 1.409 + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ 1.410 + UCHAR_INT_START=UCHAR_BIDI_CLASS, 1.411 + /** Enumerated property Block. 1.412 + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ 1.413 + UCHAR_BLOCK=0x1001, 1.414 + /** Enumerated property Canonical_Combining_Class. 1.415 + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ 1.416 + UCHAR_CANONICAL_COMBINING_CLASS=0x1002, 1.417 + /** Enumerated property Decomposition_Type. 1.418 + Returns UDecompositionType values. @stable ICU 2.2 */ 1.419 + UCHAR_DECOMPOSITION_TYPE=0x1003, 1.420 + /** Enumerated property East_Asian_Width. 1.421 + See http://www.unicode.org/reports/tr11/ 1.422 + Returns UEastAsianWidth values. @stable ICU 2.2 */ 1.423 + UCHAR_EAST_ASIAN_WIDTH=0x1004, 1.424 + /** Enumerated property General_Category. 1.425 + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ 1.426 + UCHAR_GENERAL_CATEGORY=0x1005, 1.427 + /** Enumerated property Joining_Group. 1.428 + Returns UJoiningGroup values. @stable ICU 2.2 */ 1.429 + UCHAR_JOINING_GROUP=0x1006, 1.430 + /** Enumerated property Joining_Type. 1.431 + Returns UJoiningType values. @stable ICU 2.2 */ 1.432 + UCHAR_JOINING_TYPE=0x1007, 1.433 + /** Enumerated property Line_Break. 1.434 + Returns ULineBreak values. @stable ICU 2.2 */ 1.435 + UCHAR_LINE_BREAK=0x1008, 1.436 + /** Enumerated property Numeric_Type. 1.437 + Returns UNumericType values. @stable ICU 2.2 */ 1.438 + UCHAR_NUMERIC_TYPE=0x1009, 1.439 + /** Enumerated property Script. 1.440 + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ 1.441 + UCHAR_SCRIPT=0x100A, 1.442 + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. 1.443 + Returns UHangulSyllableType values. @stable ICU 2.6 */ 1.444 + UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, 1.445 + /** Enumerated property NFD_Quick_Check. 1.446 + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ 1.447 + UCHAR_NFD_QUICK_CHECK=0x100C, 1.448 + /** Enumerated property NFKD_Quick_Check. 1.449 + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ 1.450 + UCHAR_NFKD_QUICK_CHECK=0x100D, 1.451 + /** Enumerated property NFC_Quick_Check. 1.452 + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ 1.453 + UCHAR_NFC_QUICK_CHECK=0x100E, 1.454 + /** Enumerated property NFKC_Quick_Check. 1.455 + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ 1.456 + UCHAR_NFKC_QUICK_CHECK=0x100F, 1.457 + /** Enumerated property Lead_Canonical_Combining_Class. 1.458 + ICU-specific property for the ccc of the first code point 1.459 + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). 1.460 + Useful for checking for canonically ordered text; 1.461 + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . 1.462 + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ 1.463 + UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, 1.464 + /** Enumerated property Trail_Canonical_Combining_Class. 1.465 + ICU-specific property for the ccc of the last code point 1.466 + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). 1.467 + Useful for checking for canonically ordered text; 1.468 + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . 1.469 + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ 1.470 + UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, 1.471 + /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). 1.472 + Used in UAX #29: Text Boundaries 1.473 + (http://www.unicode.org/reports/tr29/) 1.474 + Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ 1.475 + UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, 1.476 + /** Enumerated property Sentence_Break (new in Unicode 4.1). 1.477 + Used in UAX #29: Text Boundaries 1.478 + (http://www.unicode.org/reports/tr29/) 1.479 + Returns USentenceBreak values. @stable ICU 3.4 */ 1.480 + UCHAR_SENTENCE_BREAK=0x1013, 1.481 + /** Enumerated property Word_Break (new in Unicode 4.1). 1.482 + Used in UAX #29: Text Boundaries 1.483 + (http://www.unicode.org/reports/tr29/) 1.484 + Returns UWordBreakValues values. @stable ICU 3.4 */ 1.485 + UCHAR_WORD_BREAK=0x1014, 1.486 + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). 1.487 + Used in UAX #9: Unicode Bidirectional Algorithm 1.488 + (http://www.unicode.org/reports/tr9/) 1.489 + Returns UBidiPairedBracketType values. @stable ICU 52 */ 1.490 + UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, 1.491 + /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ 1.492 + UCHAR_INT_LIMIT=0x1016, 1.493 + 1.494 + /** Bitmask property General_Category_Mask. 1.495 + This is the General_Category property returned as a bit mask. 1.496 + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), 1.497 + returns bit masks for UCharCategory values where exactly one bit is set. 1.498 + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), 1.499 + a multi-bit mask is used for sets of categories like "Letters". 1.500 + Mask values should be cast to uint32_t. 1.501 + @stable ICU 2.4 */ 1.502 + UCHAR_GENERAL_CATEGORY_MASK=0x2000, 1.503 + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ 1.504 + UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, 1.505 + /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */ 1.506 + UCHAR_MASK_LIMIT=0x2001, 1.507 + 1.508 + /** Double property Numeric_Value. 1.509 + Corresponds to u_getNumericValue. @stable ICU 2.4 */ 1.510 + UCHAR_NUMERIC_VALUE=0x3000, 1.511 + /** First constant for double Unicode properties. @stable ICU 2.4 */ 1.512 + UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, 1.513 + /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */ 1.514 + UCHAR_DOUBLE_LIMIT=0x3001, 1.515 + 1.516 + /** String property Age. 1.517 + Corresponds to u_charAge. @stable ICU 2.4 */ 1.518 + UCHAR_AGE=0x4000, 1.519 + /** First constant for string Unicode properties. @stable ICU 2.4 */ 1.520 + UCHAR_STRING_START=UCHAR_AGE, 1.521 + /** String property Bidi_Mirroring_Glyph. 1.522 + Corresponds to u_charMirror. @stable ICU 2.4 */ 1.523 + UCHAR_BIDI_MIRRORING_GLYPH=0x4001, 1.524 + /** String property Case_Folding. 1.525 + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ 1.526 + UCHAR_CASE_FOLDING=0x4002, 1.527 +#ifndef U_HIDE_DEPRECATED_API 1.528 + /** Deprecated string property ISO_Comment. 1.529 + Corresponds to u_getISOComment. @deprecated ICU 49 */ 1.530 + UCHAR_ISO_COMMENT=0x4003, 1.531 +#endif /* U_HIDE_DEPRECATED_API */ 1.532 + /** String property Lowercase_Mapping. 1.533 + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ 1.534 + UCHAR_LOWERCASE_MAPPING=0x4004, 1.535 + /** String property Name. 1.536 + Corresponds to u_charName. @stable ICU 2.4 */ 1.537 + UCHAR_NAME=0x4005, 1.538 + /** String property Simple_Case_Folding. 1.539 + Corresponds to u_foldCase. @stable ICU 2.4 */ 1.540 + UCHAR_SIMPLE_CASE_FOLDING=0x4006, 1.541 + /** String property Simple_Lowercase_Mapping. 1.542 + Corresponds to u_tolower. @stable ICU 2.4 */ 1.543 + UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, 1.544 + /** String property Simple_Titlecase_Mapping. 1.545 + Corresponds to u_totitle. @stable ICU 2.4 */ 1.546 + UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, 1.547 + /** String property Simple_Uppercase_Mapping. 1.548 + Corresponds to u_toupper. @stable ICU 2.4 */ 1.549 + UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, 1.550 + /** String property Titlecase_Mapping. 1.551 + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ 1.552 + UCHAR_TITLECASE_MAPPING=0x400A, 1.553 +#ifndef U_HIDE_DEPRECATED_API 1.554 + /** String property Unicode_1_Name. 1.555 + This property is of little practical value. 1.556 + Beginning with ICU 49, ICU APIs return an empty string for this property. 1.557 + Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ 1.558 + UCHAR_UNICODE_1_NAME=0x400B, 1.559 +#endif /* U_HIDE_DEPRECATED_API */ 1.560 + /** String property Uppercase_Mapping. 1.561 + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ 1.562 + UCHAR_UPPERCASE_MAPPING=0x400C, 1.563 + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). 1.564 + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ 1.565 + UCHAR_BIDI_PAIRED_BRACKET=0x400D, 1.566 + /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ 1.567 + UCHAR_STRING_LIMIT=0x400E, 1.568 + 1.569 + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). 1.570 + Some characters are commonly used in multiple scripts. 1.571 + For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1.572 + Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. 1.573 + @stable ICU 4.6 */ 1.574 + UCHAR_SCRIPT_EXTENSIONS=0x7000, 1.575 + /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ 1.576 + UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, 1.577 + /** One more than the last constant for Unicode properties with unusual value types. 1.578 + * @stable ICU 4.6 */ 1.579 + UCHAR_OTHER_PROPERTY_LIMIT=0x7001, 1.580 + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ 1.581 + UCHAR_INVALID_CODE = -1 1.582 +} UProperty; 1.583 + 1.584 +/** 1.585 + * Data for enumerated Unicode general category types. 1.586 + * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . 1.587 + * @stable ICU 2.0 1.588 + */ 1.589 +typedef enum UCharCategory 1.590 +{ 1.591 + /* 1.592 + * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. 1.593 + * It matches pairs of lines like 1.594 + * / ** <Unicode 2-letter General_Category value> comment... * / 1.595 + * U_<[A-Z_]+> = <integer>, 1.596 + */ 1.597 + 1.598 + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ 1.599 + U_UNASSIGNED = 0, 1.600 + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ 1.601 + U_GENERAL_OTHER_TYPES = 0, 1.602 + /** Lu @stable ICU 2.0 */ 1.603 + U_UPPERCASE_LETTER = 1, 1.604 + /** Ll @stable ICU 2.0 */ 1.605 + U_LOWERCASE_LETTER = 2, 1.606 + /** Lt @stable ICU 2.0 */ 1.607 + U_TITLECASE_LETTER = 3, 1.608 + /** Lm @stable ICU 2.0 */ 1.609 + U_MODIFIER_LETTER = 4, 1.610 + /** Lo @stable ICU 2.0 */ 1.611 + U_OTHER_LETTER = 5, 1.612 + /** Mn @stable ICU 2.0 */ 1.613 + U_NON_SPACING_MARK = 6, 1.614 + /** Me @stable ICU 2.0 */ 1.615 + U_ENCLOSING_MARK = 7, 1.616 + /** Mc @stable ICU 2.0 */ 1.617 + U_COMBINING_SPACING_MARK = 8, 1.618 + /** Nd @stable ICU 2.0 */ 1.619 + U_DECIMAL_DIGIT_NUMBER = 9, 1.620 + /** Nl @stable ICU 2.0 */ 1.621 + U_LETTER_NUMBER = 10, 1.622 + /** No @stable ICU 2.0 */ 1.623 + U_OTHER_NUMBER = 11, 1.624 + /** Zs @stable ICU 2.0 */ 1.625 + U_SPACE_SEPARATOR = 12, 1.626 + /** Zl @stable ICU 2.0 */ 1.627 + U_LINE_SEPARATOR = 13, 1.628 + /** Zp @stable ICU 2.0 */ 1.629 + U_PARAGRAPH_SEPARATOR = 14, 1.630 + /** Cc @stable ICU 2.0 */ 1.631 + U_CONTROL_CHAR = 15, 1.632 + /** Cf @stable ICU 2.0 */ 1.633 + U_FORMAT_CHAR = 16, 1.634 + /** Co @stable ICU 2.0 */ 1.635 + U_PRIVATE_USE_CHAR = 17, 1.636 + /** Cs @stable ICU 2.0 */ 1.637 + U_SURROGATE = 18, 1.638 + /** Pd @stable ICU 2.0 */ 1.639 + U_DASH_PUNCTUATION = 19, 1.640 + /** Ps @stable ICU 2.0 */ 1.641 + U_START_PUNCTUATION = 20, 1.642 + /** Pe @stable ICU 2.0 */ 1.643 + U_END_PUNCTUATION = 21, 1.644 + /** Pc @stable ICU 2.0 */ 1.645 + U_CONNECTOR_PUNCTUATION = 22, 1.646 + /** Po @stable ICU 2.0 */ 1.647 + U_OTHER_PUNCTUATION = 23, 1.648 + /** Sm @stable ICU 2.0 */ 1.649 + U_MATH_SYMBOL = 24, 1.650 + /** Sc @stable ICU 2.0 */ 1.651 + U_CURRENCY_SYMBOL = 25, 1.652 + /** Sk @stable ICU 2.0 */ 1.653 + U_MODIFIER_SYMBOL = 26, 1.654 + /** So @stable ICU 2.0 */ 1.655 + U_OTHER_SYMBOL = 27, 1.656 + /** Pi @stable ICU 2.0 */ 1.657 + U_INITIAL_PUNCTUATION = 28, 1.658 + /** Pf @stable ICU 2.0 */ 1.659 + U_FINAL_PUNCTUATION = 29, 1.660 + /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */ 1.661 + U_CHAR_CATEGORY_COUNT 1.662 +} UCharCategory; 1.663 + 1.664 +/** 1.665 + * U_GC_XX_MASK constants are bit flags corresponding to Unicode 1.666 + * general category values. 1.667 + * For each category, the nth bit is set if the numeric value of the 1.668 + * corresponding UCharCategory constant is n. 1.669 + * 1.670 + * There are also some U_GC_Y_MASK constants for groups of general categories 1.671 + * like L for all letter categories. 1.672 + * 1.673 + * @see u_charType 1.674 + * @see U_GET_GC_MASK 1.675 + * @see UCharCategory 1.676 + * @stable ICU 2.1 1.677 + */ 1.678 +#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) 1.679 + 1.680 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.681 +#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) 1.682 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.683 +#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) 1.684 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.685 +#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) 1.686 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.687 +#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) 1.688 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.689 +#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) 1.690 + 1.691 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.692 +#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) 1.693 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.694 +#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) 1.695 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.696 +#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) 1.697 + 1.698 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.699 +#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) 1.700 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.701 +#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) 1.702 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.703 +#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) 1.704 + 1.705 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.706 +#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) 1.707 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.708 +#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) 1.709 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.710 +#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) 1.711 + 1.712 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.713 +#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) 1.714 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.715 +#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) 1.716 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.717 +#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) 1.718 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.719 +#define U_GC_CS_MASK U_MASK(U_SURROGATE) 1.720 + 1.721 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.722 +#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) 1.723 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.724 +#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) 1.725 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.726 +#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) 1.727 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.728 +#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) 1.729 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.730 +#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) 1.731 + 1.732 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.733 +#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) 1.734 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.735 +#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) 1.736 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.737 +#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) 1.738 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.739 +#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) 1.740 + 1.741 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.742 +#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) 1.743 +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ 1.744 +#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) 1.745 + 1.746 + 1.747 +/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ 1.748 +#define U_GC_L_MASK \ 1.749 + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) 1.750 + 1.751 +/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ 1.752 +#define U_GC_LC_MASK \ 1.753 + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) 1.754 + 1.755 +/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ 1.756 +#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) 1.757 + 1.758 +/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ 1.759 +#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) 1.760 + 1.761 +/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ 1.762 +#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) 1.763 + 1.764 +/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ 1.765 +#define U_GC_C_MASK \ 1.766 + (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) 1.767 + 1.768 +/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ 1.769 +#define U_GC_P_MASK \ 1.770 + (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ 1.771 + U_GC_PI_MASK|U_GC_PF_MASK) 1.772 + 1.773 +/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ 1.774 +#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) 1.775 + 1.776 +/** 1.777 + * This specifies the language directional property of a character set. 1.778 + * @stable ICU 2.0 1.779 + */ 1.780 +typedef enum UCharDirection { 1.781 + /* 1.782 + * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. 1.783 + * It matches pairs of lines like 1.784 + * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / 1.785 + * U_<[A-Z_]+> = <integer>, 1.786 + */ 1.787 + 1.788 + /** L @stable ICU 2.0 */ 1.789 + U_LEFT_TO_RIGHT = 0, 1.790 + /** R @stable ICU 2.0 */ 1.791 + U_RIGHT_TO_LEFT = 1, 1.792 + /** EN @stable ICU 2.0 */ 1.793 + U_EUROPEAN_NUMBER = 2, 1.794 + /** ES @stable ICU 2.0 */ 1.795 + U_EUROPEAN_NUMBER_SEPARATOR = 3, 1.796 + /** ET @stable ICU 2.0 */ 1.797 + U_EUROPEAN_NUMBER_TERMINATOR = 4, 1.798 + /** AN @stable ICU 2.0 */ 1.799 + U_ARABIC_NUMBER = 5, 1.800 + /** CS @stable ICU 2.0 */ 1.801 + U_COMMON_NUMBER_SEPARATOR = 6, 1.802 + /** B @stable ICU 2.0 */ 1.803 + U_BLOCK_SEPARATOR = 7, 1.804 + /** S @stable ICU 2.0 */ 1.805 + U_SEGMENT_SEPARATOR = 8, 1.806 + /** WS @stable ICU 2.0 */ 1.807 + U_WHITE_SPACE_NEUTRAL = 9, 1.808 + /** ON @stable ICU 2.0 */ 1.809 + U_OTHER_NEUTRAL = 10, 1.810 + /** LRE @stable ICU 2.0 */ 1.811 + U_LEFT_TO_RIGHT_EMBEDDING = 11, 1.812 + /** LRO @stable ICU 2.0 */ 1.813 + U_LEFT_TO_RIGHT_OVERRIDE = 12, 1.814 + /** AL @stable ICU 2.0 */ 1.815 + U_RIGHT_TO_LEFT_ARABIC = 13, 1.816 + /** RLE @stable ICU 2.0 */ 1.817 + U_RIGHT_TO_LEFT_EMBEDDING = 14, 1.818 + /** RLO @stable ICU 2.0 */ 1.819 + U_RIGHT_TO_LEFT_OVERRIDE = 15, 1.820 + /** PDF @stable ICU 2.0 */ 1.821 + U_POP_DIRECTIONAL_FORMAT = 16, 1.822 + /** NSM @stable ICU 2.0 */ 1.823 + U_DIR_NON_SPACING_MARK = 17, 1.824 + /** BN @stable ICU 2.0 */ 1.825 + U_BOUNDARY_NEUTRAL = 18, 1.826 + /** FSI @stable ICU 52 */ 1.827 + U_FIRST_STRONG_ISOLATE = 19, 1.828 + /** LRI @stable ICU 52 */ 1.829 + U_LEFT_TO_RIGHT_ISOLATE = 20, 1.830 + /** RLI @stable ICU 52 */ 1.831 + U_RIGHT_TO_LEFT_ISOLATE = 21, 1.832 + /** PDI @stable ICU 52 */ 1.833 + U_POP_DIRECTIONAL_ISOLATE = 22, 1.834 + /** @stable ICU 2.0 */ 1.835 + U_CHAR_DIRECTION_COUNT 1.836 +} UCharDirection; 1.837 + 1.838 +/** 1.839 + * Bidi Paired Bracket Type constants. 1.840 + * 1.841 + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE 1.842 + * @stable ICU 52 1.843 + */ 1.844 +typedef enum UBidiPairedBracketType { 1.845 + /* 1.846 + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. 1.847 + * It matches lines like 1.848 + * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> 1.849 + */ 1.850 + 1.851 + /** Not a paired bracket. @stable ICU 52 */ 1.852 + U_BPT_NONE, 1.853 + /** Open paired bracket. @stable ICU 52 */ 1.854 + U_BPT_OPEN, 1.855 + /** Close paired bracket. @stable ICU 52 */ 1.856 + U_BPT_CLOSE, 1.857 + /** @stable ICU 52 */ 1.858 + U_BPT_COUNT /* 3 */ 1.859 +} UBidiPairedBracketType; 1.860 + 1.861 +/** 1.862 + * Constants for Unicode blocks, see the Unicode Data file Blocks.txt 1.863 + * @stable ICU 2.0 1.864 + */ 1.865 +enum UBlockCode { 1.866 + /* 1.867 + * Note: UBlockCode constants are parsed by preparseucd.py. 1.868 + * It matches lines like 1.869 + * UBLOCK_<Unicode Block value name> = <integer>, 1.870 + */ 1.871 + 1.872 + /** New No_Block value in Unicode 4. @stable ICU 2.6 */ 1.873 + UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ 1.874 + 1.875 + /** @stable ICU 2.0 */ 1.876 + UBLOCK_BASIC_LATIN = 1, /*[0000]*/ 1.877 + 1.878 + /** @stable ICU 2.0 */ 1.879 + UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ 1.880 + 1.881 + /** @stable ICU 2.0 */ 1.882 + UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ 1.883 + 1.884 + /** @stable ICU 2.0 */ 1.885 + UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ 1.886 + 1.887 + /** @stable ICU 2.0 */ 1.888 + UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ 1.889 + 1.890 + /** @stable ICU 2.0 */ 1.891 + UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ 1.892 + 1.893 + /** @stable ICU 2.0 */ 1.894 + UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ 1.895 + 1.896 + /** 1.897 + * Unicode 3.2 renames this block to "Greek and Coptic". 1.898 + * @stable ICU 2.0 1.899 + */ 1.900 + UBLOCK_GREEK =8, /*[0370]*/ 1.901 + 1.902 + /** @stable ICU 2.0 */ 1.903 + UBLOCK_CYRILLIC =9, /*[0400]*/ 1.904 + 1.905 + /** @stable ICU 2.0 */ 1.906 + UBLOCK_ARMENIAN =10, /*[0530]*/ 1.907 + 1.908 + /** @stable ICU 2.0 */ 1.909 + UBLOCK_HEBREW =11, /*[0590]*/ 1.910 + 1.911 + /** @stable ICU 2.0 */ 1.912 + UBLOCK_ARABIC =12, /*[0600]*/ 1.913 + 1.914 + /** @stable ICU 2.0 */ 1.915 + UBLOCK_SYRIAC =13, /*[0700]*/ 1.916 + 1.917 + /** @stable ICU 2.0 */ 1.918 + UBLOCK_THAANA =14, /*[0780]*/ 1.919 + 1.920 + /** @stable ICU 2.0 */ 1.921 + UBLOCK_DEVANAGARI =15, /*[0900]*/ 1.922 + 1.923 + /** @stable ICU 2.0 */ 1.924 + UBLOCK_BENGALI =16, /*[0980]*/ 1.925 + 1.926 + /** @stable ICU 2.0 */ 1.927 + UBLOCK_GURMUKHI =17, /*[0A00]*/ 1.928 + 1.929 + /** @stable ICU 2.0 */ 1.930 + UBLOCK_GUJARATI =18, /*[0A80]*/ 1.931 + 1.932 + /** @stable ICU 2.0 */ 1.933 + UBLOCK_ORIYA =19, /*[0B00]*/ 1.934 + 1.935 + /** @stable ICU 2.0 */ 1.936 + UBLOCK_TAMIL =20, /*[0B80]*/ 1.937 + 1.938 + /** @stable ICU 2.0 */ 1.939 + UBLOCK_TELUGU =21, /*[0C00]*/ 1.940 + 1.941 + /** @stable ICU 2.0 */ 1.942 + UBLOCK_KANNADA =22, /*[0C80]*/ 1.943 + 1.944 + /** @stable ICU 2.0 */ 1.945 + UBLOCK_MALAYALAM =23, /*[0D00]*/ 1.946 + 1.947 + /** @stable ICU 2.0 */ 1.948 + UBLOCK_SINHALA =24, /*[0D80]*/ 1.949 + 1.950 + /** @stable ICU 2.0 */ 1.951 + UBLOCK_THAI =25, /*[0E00]*/ 1.952 + 1.953 + /** @stable ICU 2.0 */ 1.954 + UBLOCK_LAO =26, /*[0E80]*/ 1.955 + 1.956 + /** @stable ICU 2.0 */ 1.957 + UBLOCK_TIBETAN =27, /*[0F00]*/ 1.958 + 1.959 + /** @stable ICU 2.0 */ 1.960 + UBLOCK_MYANMAR =28, /*[1000]*/ 1.961 + 1.962 + /** @stable ICU 2.0 */ 1.963 + UBLOCK_GEORGIAN =29, /*[10A0]*/ 1.964 + 1.965 + /** @stable ICU 2.0 */ 1.966 + UBLOCK_HANGUL_JAMO =30, /*[1100]*/ 1.967 + 1.968 + /** @stable ICU 2.0 */ 1.969 + UBLOCK_ETHIOPIC =31, /*[1200]*/ 1.970 + 1.971 + /** @stable ICU 2.0 */ 1.972 + UBLOCK_CHEROKEE =32, /*[13A0]*/ 1.973 + 1.974 + /** @stable ICU 2.0 */ 1.975 + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ 1.976 + 1.977 + /** @stable ICU 2.0 */ 1.978 + UBLOCK_OGHAM =34, /*[1680]*/ 1.979 + 1.980 + /** @stable ICU 2.0 */ 1.981 + UBLOCK_RUNIC =35, /*[16A0]*/ 1.982 + 1.983 + /** @stable ICU 2.0 */ 1.984 + UBLOCK_KHMER =36, /*[1780]*/ 1.985 + 1.986 + /** @stable ICU 2.0 */ 1.987 + UBLOCK_MONGOLIAN =37, /*[1800]*/ 1.988 + 1.989 + /** @stable ICU 2.0 */ 1.990 + UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ 1.991 + 1.992 + /** @stable ICU 2.0 */ 1.993 + UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ 1.994 + 1.995 + /** @stable ICU 2.0 */ 1.996 + UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ 1.997 + 1.998 + /** @stable ICU 2.0 */ 1.999 + UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ 1.1000 + 1.1001 + /** @stable ICU 2.0 */ 1.1002 + UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ 1.1003 + 1.1004 + /** 1.1005 + * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". 1.1006 + * @stable ICU 2.0 1.1007 + */ 1.1008 + UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ 1.1009 + 1.1010 + /** @stable ICU 2.0 */ 1.1011 + UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ 1.1012 + 1.1013 + /** @stable ICU 2.0 */ 1.1014 + UBLOCK_NUMBER_FORMS =45, /*[2150]*/ 1.1015 + 1.1016 + /** @stable ICU 2.0 */ 1.1017 + UBLOCK_ARROWS =46, /*[2190]*/ 1.1018 + 1.1019 + /** @stable ICU 2.0 */ 1.1020 + UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ 1.1021 + 1.1022 + /** @stable ICU 2.0 */ 1.1023 + UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ 1.1024 + 1.1025 + /** @stable ICU 2.0 */ 1.1026 + UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ 1.1027 + 1.1028 + /** @stable ICU 2.0 */ 1.1029 + UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ 1.1030 + 1.1031 + /** @stable ICU 2.0 */ 1.1032 + UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ 1.1033 + 1.1034 + /** @stable ICU 2.0 */ 1.1035 + UBLOCK_BOX_DRAWING =52, /*[2500]*/ 1.1036 + 1.1037 + /** @stable ICU 2.0 */ 1.1038 + UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ 1.1039 + 1.1040 + /** @stable ICU 2.0 */ 1.1041 + UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ 1.1042 + 1.1043 + /** @stable ICU 2.0 */ 1.1044 + UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ 1.1045 + 1.1046 + /** @stable ICU 2.0 */ 1.1047 + UBLOCK_DINGBATS =56, /*[2700]*/ 1.1048 + 1.1049 + /** @stable ICU 2.0 */ 1.1050 + UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ 1.1051 + 1.1052 + /** @stable ICU 2.0 */ 1.1053 + UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ 1.1054 + 1.1055 + /** @stable ICU 2.0 */ 1.1056 + UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ 1.1057 + 1.1058 + /** @stable ICU 2.0 */ 1.1059 + UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ 1.1060 + 1.1061 + /** @stable ICU 2.0 */ 1.1062 + UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ 1.1063 + 1.1064 + /** @stable ICU 2.0 */ 1.1065 + UBLOCK_HIRAGANA =62, /*[3040]*/ 1.1066 + 1.1067 + /** @stable ICU 2.0 */ 1.1068 + UBLOCK_KATAKANA =63, /*[30A0]*/ 1.1069 + 1.1070 + /** @stable ICU 2.0 */ 1.1071 + UBLOCK_BOPOMOFO =64, /*[3100]*/ 1.1072 + 1.1073 + /** @stable ICU 2.0 */ 1.1074 + UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ 1.1075 + 1.1076 + /** @stable ICU 2.0 */ 1.1077 + UBLOCK_KANBUN =66, /*[3190]*/ 1.1078 + 1.1079 + /** @stable ICU 2.0 */ 1.1080 + UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ 1.1081 + 1.1082 + /** @stable ICU 2.0 */ 1.1083 + UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ 1.1084 + 1.1085 + /** @stable ICU 2.0 */ 1.1086 + UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ 1.1087 + 1.1088 + /** @stable ICU 2.0 */ 1.1089 + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ 1.1090 + 1.1091 + /** @stable ICU 2.0 */ 1.1092 + UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ 1.1093 + 1.1094 + /** @stable ICU 2.0 */ 1.1095 + UBLOCK_YI_SYLLABLES =72, /*[A000]*/ 1.1096 + 1.1097 + /** @stable ICU 2.0 */ 1.1098 + UBLOCK_YI_RADICALS =73, /*[A490]*/ 1.1099 + 1.1100 + /** @stable ICU 2.0 */ 1.1101 + UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ 1.1102 + 1.1103 + /** @stable ICU 2.0 */ 1.1104 + UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ 1.1105 + 1.1106 + /** @stable ICU 2.0 */ 1.1107 + UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ 1.1108 + 1.1109 + /** @stable ICU 2.0 */ 1.1110 + UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ 1.1111 + 1.1112 + /** 1.1113 + * Same as UBLOCK_PRIVATE_USE. 1.1114 + * Until Unicode 3.1.1, the corresponding block name was "Private Use", 1.1115 + * and multiple code point ranges had this block. 1.1116 + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and 1.1117 + * adds separate blocks for the supplementary PUAs. 1.1118 + * 1.1119 + * @stable ICU 2.0 1.1120 + */ 1.1121 + UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ 1.1122 + /** 1.1123 + * Same as UBLOCK_PRIVATE_USE_AREA. 1.1124 + * Until Unicode 3.1.1, the corresponding block name was "Private Use", 1.1125 + * and multiple code point ranges had this block. 1.1126 + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and 1.1127 + * adds separate blocks for the supplementary PUAs. 1.1128 + * 1.1129 + * @stable ICU 2.0 1.1130 + */ 1.1131 + UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, 1.1132 + 1.1133 + /** @stable ICU 2.0 */ 1.1134 + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ 1.1135 + 1.1136 + /** @stable ICU 2.0 */ 1.1137 + UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ 1.1138 + 1.1139 + /** @stable ICU 2.0 */ 1.1140 + UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ 1.1141 + 1.1142 + /** @stable ICU 2.0 */ 1.1143 + UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ 1.1144 + 1.1145 + /** @stable ICU 2.0 */ 1.1146 + UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ 1.1147 + 1.1148 + /** @stable ICU 2.0 */ 1.1149 + UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ 1.1150 + 1.1151 + /** @stable ICU 2.0 */ 1.1152 + UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ 1.1153 + 1.1154 + /** @stable ICU 2.0 */ 1.1155 + UBLOCK_SPECIALS =86, /*[FFF0]*/ 1.1156 + 1.1157 + /** @stable ICU 2.0 */ 1.1158 + UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ 1.1159 + 1.1160 + /* New blocks in Unicode 3.1 */ 1.1161 + 1.1162 + /** @stable ICU 2.0 */ 1.1163 + UBLOCK_OLD_ITALIC = 88, /*[10300]*/ 1.1164 + /** @stable ICU 2.0 */ 1.1165 + UBLOCK_GOTHIC = 89, /*[10330]*/ 1.1166 + /** @stable ICU 2.0 */ 1.1167 + UBLOCK_DESERET = 90, /*[10400]*/ 1.1168 + /** @stable ICU 2.0 */ 1.1169 + UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ 1.1170 + /** @stable ICU 2.0 */ 1.1171 + UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ 1.1172 + /** @stable ICU 2.0 */ 1.1173 + UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ 1.1174 + /** @stable ICU 2.0 */ 1.1175 + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ 1.1176 + /** @stable ICU 2.0 */ 1.1177 + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ 1.1178 + /** @stable ICU 2.0 */ 1.1179 + UBLOCK_TAGS = 96, /*[E0000]*/ 1.1180 + 1.1181 + /* New blocks in Unicode 3.2 */ 1.1182 + 1.1183 + /** @stable ICU 3.0 */ 1.1184 + UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ 1.1185 + /** 1.1186 + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1.1187 + * @stable ICU 2.2 1.1188 + */ 1.1189 + UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, 1.1190 + /** @stable ICU 2.2 */ 1.1191 + UBLOCK_TAGALOG = 98, /*[1700]*/ 1.1192 + /** @stable ICU 2.2 */ 1.1193 + UBLOCK_HANUNOO = 99, /*[1720]*/ 1.1194 + /** @stable ICU 2.2 */ 1.1195 + UBLOCK_BUHID = 100, /*[1740]*/ 1.1196 + /** @stable ICU 2.2 */ 1.1197 + UBLOCK_TAGBANWA = 101, /*[1760]*/ 1.1198 + /** @stable ICU 2.2 */ 1.1199 + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ 1.1200 + /** @stable ICU 2.2 */ 1.1201 + UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ 1.1202 + /** @stable ICU 2.2 */ 1.1203 + UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ 1.1204 + /** @stable ICU 2.2 */ 1.1205 + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ 1.1206 + /** @stable ICU 2.2 */ 1.1207 + UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ 1.1208 + /** @stable ICU 2.2 */ 1.1209 + UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ 1.1210 + /** @stable ICU 2.2 */ 1.1211 + UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ 1.1212 + /** @stable ICU 2.2 */ 1.1213 + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ 1.1214 + /** @stable ICU 2.2 */ 1.1215 + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ 1.1216 + 1.1217 + /* New blocks in Unicode 4 */ 1.1218 + 1.1219 + /** @stable ICU 2.6 */ 1.1220 + UBLOCK_LIMBU = 111, /*[1900]*/ 1.1221 + /** @stable ICU 2.6 */ 1.1222 + UBLOCK_TAI_LE = 112, /*[1950]*/ 1.1223 + /** @stable ICU 2.6 */ 1.1224 + UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ 1.1225 + /** @stable ICU 2.6 */ 1.1226 + UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ 1.1227 + /** @stable ICU 2.6 */ 1.1228 + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ 1.1229 + /** @stable ICU 2.6 */ 1.1230 + UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ 1.1231 + /** @stable ICU 2.6 */ 1.1232 + UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ 1.1233 + /** @stable ICU 2.6 */ 1.1234 + UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ 1.1235 + /** @stable ICU 2.6 */ 1.1236 + UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ 1.1237 + /** @stable ICU 2.6 */ 1.1238 + UBLOCK_UGARITIC = 120, /*[10380]*/ 1.1239 + /** @stable ICU 2.6 */ 1.1240 + UBLOCK_SHAVIAN = 121, /*[10450]*/ 1.1241 + /** @stable ICU 2.6 */ 1.1242 + UBLOCK_OSMANYA = 122, /*[10480]*/ 1.1243 + /** @stable ICU 2.6 */ 1.1244 + UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ 1.1245 + /** @stable ICU 2.6 */ 1.1246 + UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ 1.1247 + /** @stable ICU 2.6 */ 1.1248 + UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ 1.1249 + 1.1250 + /* New blocks in Unicode 4.1 */ 1.1251 + 1.1252 + /** @stable ICU 3.4 */ 1.1253 + UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ 1.1254 + /** @stable ICU 3.4 */ 1.1255 + UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ 1.1256 + /** @stable ICU 3.4 */ 1.1257 + UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ 1.1258 + /** @stable ICU 3.4 */ 1.1259 + UBLOCK_BUGINESE = 129, /*[1A00]*/ 1.1260 + /** @stable ICU 3.4 */ 1.1261 + UBLOCK_CJK_STROKES = 130, /*[31C0]*/ 1.1262 + /** @stable ICU 3.4 */ 1.1263 + UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ 1.1264 + /** @stable ICU 3.4 */ 1.1265 + UBLOCK_COPTIC = 132, /*[2C80]*/ 1.1266 + /** @stable ICU 3.4 */ 1.1267 + UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ 1.1268 + /** @stable ICU 3.4 */ 1.1269 + UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ 1.1270 + /** @stable ICU 3.4 */ 1.1271 + UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ 1.1272 + /** @stable ICU 3.4 */ 1.1273 + UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ 1.1274 + /** @stable ICU 3.4 */ 1.1275 + UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ 1.1276 + /** @stable ICU 3.4 */ 1.1277 + UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ 1.1278 + /** @stable ICU 3.4 */ 1.1279 + UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ 1.1280 + /** @stable ICU 3.4 */ 1.1281 + UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ 1.1282 + /** @stable ICU 3.4 */ 1.1283 + UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ 1.1284 + /** @stable ICU 3.4 */ 1.1285 + UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ 1.1286 + /** @stable ICU 3.4 */ 1.1287 + UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ 1.1288 + /** @stable ICU 3.4 */ 1.1289 + UBLOCK_TIFINAGH = 144, /*[2D30]*/ 1.1290 + /** @stable ICU 3.4 */ 1.1291 + UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ 1.1292 + 1.1293 + /* New blocks in Unicode 5.0 */ 1.1294 + 1.1295 + /** @stable ICU 3.6 */ 1.1296 + UBLOCK_NKO = 146, /*[07C0]*/ 1.1297 + /** @stable ICU 3.6 */ 1.1298 + UBLOCK_BALINESE = 147, /*[1B00]*/ 1.1299 + /** @stable ICU 3.6 */ 1.1300 + UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ 1.1301 + /** @stable ICU 3.6 */ 1.1302 + UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ 1.1303 + /** @stable ICU 3.6 */ 1.1304 + UBLOCK_PHAGS_PA = 150, /*[A840]*/ 1.1305 + /** @stable ICU 3.6 */ 1.1306 + UBLOCK_PHOENICIAN = 151, /*[10900]*/ 1.1307 + /** @stable ICU 3.6 */ 1.1308 + UBLOCK_CUNEIFORM = 152, /*[12000]*/ 1.1309 + /** @stable ICU 3.6 */ 1.1310 + UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ 1.1311 + /** @stable ICU 3.6 */ 1.1312 + UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ 1.1313 + 1.1314 + /* New blocks in Unicode 5.1 */ 1.1315 + 1.1316 + /** @stable ICU 4.0 */ 1.1317 + UBLOCK_SUNDANESE = 155, /*[1B80]*/ 1.1318 + /** @stable ICU 4.0 */ 1.1319 + UBLOCK_LEPCHA = 156, /*[1C00]*/ 1.1320 + /** @stable ICU 4.0 */ 1.1321 + UBLOCK_OL_CHIKI = 157, /*[1C50]*/ 1.1322 + /** @stable ICU 4.0 */ 1.1323 + UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ 1.1324 + /** @stable ICU 4.0 */ 1.1325 + UBLOCK_VAI = 159, /*[A500]*/ 1.1326 + /** @stable ICU 4.0 */ 1.1327 + UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ 1.1328 + /** @stable ICU 4.0 */ 1.1329 + UBLOCK_SAURASHTRA = 161, /*[A880]*/ 1.1330 + /** @stable ICU 4.0 */ 1.1331 + UBLOCK_KAYAH_LI = 162, /*[A900]*/ 1.1332 + /** @stable ICU 4.0 */ 1.1333 + UBLOCK_REJANG = 163, /*[A930]*/ 1.1334 + /** @stable ICU 4.0 */ 1.1335 + UBLOCK_CHAM = 164, /*[AA00]*/ 1.1336 + /** @stable ICU 4.0 */ 1.1337 + UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ 1.1338 + /** @stable ICU 4.0 */ 1.1339 + UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ 1.1340 + /** @stable ICU 4.0 */ 1.1341 + UBLOCK_LYCIAN = 167, /*[10280]*/ 1.1342 + /** @stable ICU 4.0 */ 1.1343 + UBLOCK_CARIAN = 168, /*[102A0]*/ 1.1344 + /** @stable ICU 4.0 */ 1.1345 + UBLOCK_LYDIAN = 169, /*[10920]*/ 1.1346 + /** @stable ICU 4.0 */ 1.1347 + UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ 1.1348 + /** @stable ICU 4.0 */ 1.1349 + UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ 1.1350 + 1.1351 + /* New blocks in Unicode 5.2 */ 1.1352 + 1.1353 + /** @stable ICU 4.4 */ 1.1354 + UBLOCK_SAMARITAN = 172, /*[0800]*/ 1.1355 + /** @stable ICU 4.4 */ 1.1356 + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ 1.1357 + /** @stable ICU 4.4 */ 1.1358 + UBLOCK_TAI_THAM = 174, /*[1A20]*/ 1.1359 + /** @stable ICU 4.4 */ 1.1360 + UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ 1.1361 + /** @stable ICU 4.4 */ 1.1362 + UBLOCK_LISU = 176, /*[A4D0]*/ 1.1363 + /** @stable ICU 4.4 */ 1.1364 + UBLOCK_BAMUM = 177, /*[A6A0]*/ 1.1365 + /** @stable ICU 4.4 */ 1.1366 + UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ 1.1367 + /** @stable ICU 4.4 */ 1.1368 + UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ 1.1369 + /** @stable ICU 4.4 */ 1.1370 + UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ 1.1371 + /** @stable ICU 4.4 */ 1.1372 + UBLOCK_JAVANESE = 181, /*[A980]*/ 1.1373 + /** @stable ICU 4.4 */ 1.1374 + UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ 1.1375 + /** @stable ICU 4.4 */ 1.1376 + UBLOCK_TAI_VIET = 183, /*[AA80]*/ 1.1377 + /** @stable ICU 4.4 */ 1.1378 + UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ 1.1379 + /** @stable ICU 4.4 */ 1.1380 + UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ 1.1381 + /** @stable ICU 4.4 */ 1.1382 + UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ 1.1383 + /** @stable ICU 4.4 */ 1.1384 + UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ 1.1385 + /** @stable ICU 4.4 */ 1.1386 + UBLOCK_AVESTAN = 188, /*[10B00]*/ 1.1387 + /** @stable ICU 4.4 */ 1.1388 + UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ 1.1389 + /** @stable ICU 4.4 */ 1.1390 + UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ 1.1391 + /** @stable ICU 4.4 */ 1.1392 + UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ 1.1393 + /** @stable ICU 4.4 */ 1.1394 + UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ 1.1395 + /** @stable ICU 4.4 */ 1.1396 + UBLOCK_KAITHI = 193, /*[11080]*/ 1.1397 + /** @stable ICU 4.4 */ 1.1398 + UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ 1.1399 + /** @stable ICU 4.4 */ 1.1400 + UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ 1.1401 + /** @stable ICU 4.4 */ 1.1402 + UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ 1.1403 + /** @stable ICU 4.4 */ 1.1404 + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ 1.1405 + 1.1406 + /* New blocks in Unicode 6.0 */ 1.1407 + 1.1408 + /** @stable ICU 4.6 */ 1.1409 + UBLOCK_MANDAIC = 198, /*[0840]*/ 1.1410 + /** @stable ICU 4.6 */ 1.1411 + UBLOCK_BATAK = 199, /*[1BC0]*/ 1.1412 + /** @stable ICU 4.6 */ 1.1413 + UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ 1.1414 + /** @stable ICU 4.6 */ 1.1415 + UBLOCK_BRAHMI = 201, /*[11000]*/ 1.1416 + /** @stable ICU 4.6 */ 1.1417 + UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ 1.1418 + /** @stable ICU 4.6 */ 1.1419 + UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ 1.1420 + /** @stable ICU 4.6 */ 1.1421 + UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ 1.1422 + /** @stable ICU 4.6 */ 1.1423 + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ 1.1424 + /** @stable ICU 4.6 */ 1.1425 + UBLOCK_EMOTICONS = 206, /*[1F600]*/ 1.1426 + /** @stable ICU 4.6 */ 1.1427 + UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ 1.1428 + /** @stable ICU 4.6 */ 1.1429 + UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ 1.1430 + /** @stable ICU 4.6 */ 1.1431 + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ 1.1432 + 1.1433 + /* New blocks in Unicode 6.1 */ 1.1434 + 1.1435 + /** @stable ICU 49 */ 1.1436 + UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ 1.1437 + /** @stable ICU 49 */ 1.1438 + UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ 1.1439 + /** @stable ICU 49 */ 1.1440 + UBLOCK_CHAKMA = 212, /*[11100]*/ 1.1441 + /** @stable ICU 49 */ 1.1442 + UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ 1.1443 + /** @stable ICU 49 */ 1.1444 + UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ 1.1445 + /** @stable ICU 49 */ 1.1446 + UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ 1.1447 + /** @stable ICU 49 */ 1.1448 + UBLOCK_MIAO = 216, /*[16F00]*/ 1.1449 + /** @stable ICU 49 */ 1.1450 + UBLOCK_SHARADA = 217, /*[11180]*/ 1.1451 + /** @stable ICU 49 */ 1.1452 + UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ 1.1453 + /** @stable ICU 49 */ 1.1454 + UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ 1.1455 + /** @stable ICU 49 */ 1.1456 + UBLOCK_TAKRI = 220, /*[11680]*/ 1.1457 + 1.1458 + /** @stable ICU 2.0 */ 1.1459 + UBLOCK_COUNT = 221, 1.1460 + 1.1461 + /** @stable ICU 2.0 */ 1.1462 + UBLOCK_INVALID_CODE=-1 1.1463 +}; 1.1464 + 1.1465 +/** @stable ICU 2.0 */ 1.1466 +typedef enum UBlockCode UBlockCode; 1.1467 + 1.1468 +/** 1.1469 + * East Asian Width constants. 1.1470 + * 1.1471 + * @see UCHAR_EAST_ASIAN_WIDTH 1.1472 + * @see u_getIntPropertyValue 1.1473 + * @stable ICU 2.2 1.1474 + */ 1.1475 +typedef enum UEastAsianWidth { 1.1476 + /* 1.1477 + * Note: UEastAsianWidth constants are parsed by preparseucd.py. 1.1478 + * It matches lines like 1.1479 + * U_EA_<Unicode East_Asian_Width value name> 1.1480 + */ 1.1481 + 1.1482 + U_EA_NEUTRAL, /*[N]*/ 1.1483 + U_EA_AMBIGUOUS, /*[A]*/ 1.1484 + U_EA_HALFWIDTH, /*[H]*/ 1.1485 + U_EA_FULLWIDTH, /*[F]*/ 1.1486 + U_EA_NARROW, /*[Na]*/ 1.1487 + U_EA_WIDE, /*[W]*/ 1.1488 + U_EA_COUNT 1.1489 +} UEastAsianWidth; 1.1490 + 1.1491 +/** 1.1492 + * Selector constants for u_charName(). 1.1493 + * u_charName() returns the "modern" name of a 1.1494 + * Unicode character; or the name that was defined in 1.1495 + * Unicode version 1.0, before the Unicode standard merged 1.1496 + * with ISO-10646; or an "extended" name that gives each 1.1497 + * Unicode code point a unique name. 1.1498 + * 1.1499 + * @see u_charName 1.1500 + * @stable ICU 2.0 1.1501 + */ 1.1502 +typedef enum UCharNameChoice { 1.1503 + /** Unicode character name (Name property). @stable ICU 2.0 */ 1.1504 + U_UNICODE_CHAR_NAME, 1.1505 +#ifndef U_HIDE_DEPRECATED_API 1.1506 + /** 1.1507 + * The Unicode_1_Name property value which is of little practical value. 1.1508 + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. 1.1509 + * @deprecated ICU 49 1.1510 + */ 1.1511 + U_UNICODE_10_CHAR_NAME, 1.1512 +#endif /* U_HIDE_DEPRECATED_API */ 1.1513 + /** Standard or synthetic character name. @stable ICU 2.0 */ 1.1514 + U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, 1.1515 + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ 1.1516 + U_CHAR_NAME_ALIAS, 1.1517 + /** @stable ICU 2.0 */ 1.1518 + U_CHAR_NAME_CHOICE_COUNT 1.1519 +} UCharNameChoice; 1.1520 + 1.1521 +/** 1.1522 + * Selector constants for u_getPropertyName() and 1.1523 + * u_getPropertyValueName(). These selectors are used to choose which 1.1524 + * name is returned for a given property or value. All properties and 1.1525 + * values have a long name. Most have a short name, but some do not. 1.1526 + * Unicode allows for additional names, beyond the long and short 1.1527 + * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where 1.1528 + * i=1, 2,... 1.1529 + * 1.1530 + * @see u_getPropertyName() 1.1531 + * @see u_getPropertyValueName() 1.1532 + * @stable ICU 2.4 1.1533 + */ 1.1534 +typedef enum UPropertyNameChoice { 1.1535 + U_SHORT_PROPERTY_NAME, 1.1536 + U_LONG_PROPERTY_NAME, 1.1537 + U_PROPERTY_NAME_CHOICE_COUNT 1.1538 +} UPropertyNameChoice; 1.1539 + 1.1540 +/** 1.1541 + * Decomposition Type constants. 1.1542 + * 1.1543 + * @see UCHAR_DECOMPOSITION_TYPE 1.1544 + * @stable ICU 2.2 1.1545 + */ 1.1546 +typedef enum UDecompositionType { 1.1547 + /* 1.1548 + * Note: UDecompositionType constants are parsed by preparseucd.py. 1.1549 + * It matches lines like 1.1550 + * U_DT_<Unicode Decomposition_Type value name> 1.1551 + */ 1.1552 + 1.1553 + U_DT_NONE, /*[none]*/ 1.1554 + U_DT_CANONICAL, /*[can]*/ 1.1555 + U_DT_COMPAT, /*[com]*/ 1.1556 + U_DT_CIRCLE, /*[enc]*/ 1.1557 + U_DT_FINAL, /*[fin]*/ 1.1558 + U_DT_FONT, /*[font]*/ 1.1559 + U_DT_FRACTION, /*[fra]*/ 1.1560 + U_DT_INITIAL, /*[init]*/ 1.1561 + U_DT_ISOLATED, /*[iso]*/ 1.1562 + U_DT_MEDIAL, /*[med]*/ 1.1563 + U_DT_NARROW, /*[nar]*/ 1.1564 + U_DT_NOBREAK, /*[nb]*/ 1.1565 + U_DT_SMALL, /*[sml]*/ 1.1566 + U_DT_SQUARE, /*[sqr]*/ 1.1567 + U_DT_SUB, /*[sub]*/ 1.1568 + U_DT_SUPER, /*[sup]*/ 1.1569 + U_DT_VERTICAL, /*[vert]*/ 1.1570 + U_DT_WIDE, /*[wide]*/ 1.1571 + U_DT_COUNT /* 18 */ 1.1572 +} UDecompositionType; 1.1573 + 1.1574 +/** 1.1575 + * Joining Type constants. 1.1576 + * 1.1577 + * @see UCHAR_JOINING_TYPE 1.1578 + * @stable ICU 2.2 1.1579 + */ 1.1580 +typedef enum UJoiningType { 1.1581 + /* 1.1582 + * Note: UJoiningType constants are parsed by preparseucd.py. 1.1583 + * It matches lines like 1.1584 + * U_JT_<Unicode Joining_Type value name> 1.1585 + */ 1.1586 + 1.1587 + U_JT_NON_JOINING, /*[U]*/ 1.1588 + U_JT_JOIN_CAUSING, /*[C]*/ 1.1589 + U_JT_DUAL_JOINING, /*[D]*/ 1.1590 + U_JT_LEFT_JOINING, /*[L]*/ 1.1591 + U_JT_RIGHT_JOINING, /*[R]*/ 1.1592 + U_JT_TRANSPARENT, /*[T]*/ 1.1593 + U_JT_COUNT /* 6 */ 1.1594 +} UJoiningType; 1.1595 + 1.1596 +/** 1.1597 + * Joining Group constants. 1.1598 + * 1.1599 + * @see UCHAR_JOINING_GROUP 1.1600 + * @stable ICU 2.2 1.1601 + */ 1.1602 +typedef enum UJoiningGroup { 1.1603 + /* 1.1604 + * Note: UJoiningGroup constants are parsed by preparseucd.py. 1.1605 + * It matches lines like 1.1606 + * U_JG_<Unicode Joining_Group value name> 1.1607 + */ 1.1608 + 1.1609 + U_JG_NO_JOINING_GROUP, 1.1610 + U_JG_AIN, 1.1611 + U_JG_ALAPH, 1.1612 + U_JG_ALEF, 1.1613 + U_JG_BEH, 1.1614 + U_JG_BETH, 1.1615 + U_JG_DAL, 1.1616 + U_JG_DALATH_RISH, 1.1617 + U_JG_E, 1.1618 + U_JG_FEH, 1.1619 + U_JG_FINAL_SEMKATH, 1.1620 + U_JG_GAF, 1.1621 + U_JG_GAMAL, 1.1622 + U_JG_HAH, 1.1623 + U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ 1.1624 + U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, 1.1625 + U_JG_HE, 1.1626 + U_JG_HEH, 1.1627 + U_JG_HEH_GOAL, 1.1628 + U_JG_HETH, 1.1629 + U_JG_KAF, 1.1630 + U_JG_KAPH, 1.1631 + U_JG_KNOTTED_HEH, 1.1632 + U_JG_LAM, 1.1633 + U_JG_LAMADH, 1.1634 + U_JG_MEEM, 1.1635 + U_JG_MIM, 1.1636 + U_JG_NOON, 1.1637 + U_JG_NUN, 1.1638 + U_JG_PE, 1.1639 + U_JG_QAF, 1.1640 + U_JG_QAPH, 1.1641 + U_JG_REH, 1.1642 + U_JG_REVERSED_PE, 1.1643 + U_JG_SAD, 1.1644 + U_JG_SADHE, 1.1645 + U_JG_SEEN, 1.1646 + U_JG_SEMKATH, 1.1647 + U_JG_SHIN, 1.1648 + U_JG_SWASH_KAF, 1.1649 + U_JG_SYRIAC_WAW, 1.1650 + U_JG_TAH, 1.1651 + U_JG_TAW, 1.1652 + U_JG_TEH_MARBUTA, 1.1653 + U_JG_TETH, 1.1654 + U_JG_WAW, 1.1655 + U_JG_YEH, 1.1656 + U_JG_YEH_BARREE, 1.1657 + U_JG_YEH_WITH_TAIL, 1.1658 + U_JG_YUDH, 1.1659 + U_JG_YUDH_HE, 1.1660 + U_JG_ZAIN, 1.1661 + U_JG_FE, /**< @stable ICU 2.6 */ 1.1662 + U_JG_KHAPH, /**< @stable ICU 2.6 */ 1.1663 + U_JG_ZHAIN, /**< @stable ICU 2.6 */ 1.1664 + U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ 1.1665 + U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ 1.1666 + U_JG_NYA, /**< @stable ICU 4.4 */ 1.1667 + U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ 1.1668 + U_JG_COUNT 1.1669 +} UJoiningGroup; 1.1670 + 1.1671 +/** 1.1672 + * Grapheme Cluster Break constants. 1.1673 + * 1.1674 + * @see UCHAR_GRAPHEME_CLUSTER_BREAK 1.1675 + * @stable ICU 3.4 1.1676 + */ 1.1677 +typedef enum UGraphemeClusterBreak { 1.1678 + /* 1.1679 + * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. 1.1680 + * It matches lines like 1.1681 + * U_GCB_<Unicode Grapheme_Cluster_Break value name> 1.1682 + */ 1.1683 + 1.1684 + U_GCB_OTHER = 0, /*[XX]*/ 1.1685 + U_GCB_CONTROL = 1, /*[CN]*/ 1.1686 + U_GCB_CR = 2, /*[CR]*/ 1.1687 + U_GCB_EXTEND = 3, /*[EX]*/ 1.1688 + U_GCB_L = 4, /*[L]*/ 1.1689 + U_GCB_LF = 5, /*[LF]*/ 1.1690 + U_GCB_LV = 6, /*[LV]*/ 1.1691 + U_GCB_LVT = 7, /*[LVT]*/ 1.1692 + U_GCB_T = 8, /*[T]*/ 1.1693 + U_GCB_V = 9, /*[V]*/ 1.1694 + U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 1.1695 + U_GCB_PREPEND = 11, /*[PP]*/ 1.1696 + U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 1.1697 + U_GCB_COUNT = 13 1.1698 +} UGraphemeClusterBreak; 1.1699 + 1.1700 +/** 1.1701 + * Word Break constants. 1.1702 + * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) 1.1703 + * 1.1704 + * @see UCHAR_WORD_BREAK 1.1705 + * @stable ICU 3.4 1.1706 + */ 1.1707 +typedef enum UWordBreakValues { 1.1708 + /* 1.1709 + * Note: UWordBreakValues constants are parsed by preparseucd.py. 1.1710 + * It matches lines like 1.1711 + * U_WB_<Unicode Word_Break value name> 1.1712 + */ 1.1713 + 1.1714 + U_WB_OTHER = 0, /*[XX]*/ 1.1715 + U_WB_ALETTER = 1, /*[LE]*/ 1.1716 + U_WB_FORMAT = 2, /*[FO]*/ 1.1717 + U_WB_KATAKANA = 3, /*[KA]*/ 1.1718 + U_WB_MIDLETTER = 4, /*[ML]*/ 1.1719 + U_WB_MIDNUM = 5, /*[MN]*/ 1.1720 + U_WB_NUMERIC = 6, /*[NU]*/ 1.1721 + U_WB_EXTENDNUMLET = 7, /*[EX]*/ 1.1722 + U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 1.1723 + U_WB_EXTEND = 9, /*[Extend]*/ 1.1724 + U_WB_LF = 10, /*[LF]*/ 1.1725 + U_WB_MIDNUMLET =11, /*[MB]*/ 1.1726 + U_WB_NEWLINE =12, /*[NL]*/ 1.1727 + U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 1.1728 + U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 1.1729 + U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ 1.1730 + U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ 1.1731 + U_WB_COUNT = 17 1.1732 +} UWordBreakValues; 1.1733 + 1.1734 +/** 1.1735 + * Sentence Break constants. 1.1736 + * 1.1737 + * @see UCHAR_SENTENCE_BREAK 1.1738 + * @stable ICU 3.4 1.1739 + */ 1.1740 +typedef enum USentenceBreak { 1.1741 + /* 1.1742 + * Note: USentenceBreak constants are parsed by preparseucd.py. 1.1743 + * It matches lines like 1.1744 + * U_SB_<Unicode Sentence_Break value name> 1.1745 + */ 1.1746 + 1.1747 + U_SB_OTHER = 0, /*[XX]*/ 1.1748 + U_SB_ATERM = 1, /*[AT]*/ 1.1749 + U_SB_CLOSE = 2, /*[CL]*/ 1.1750 + U_SB_FORMAT = 3, /*[FO]*/ 1.1751 + U_SB_LOWER = 4, /*[LO]*/ 1.1752 + U_SB_NUMERIC = 5, /*[NU]*/ 1.1753 + U_SB_OLETTER = 6, /*[LE]*/ 1.1754 + U_SB_SEP = 7, /*[SE]*/ 1.1755 + U_SB_SP = 8, /*[SP]*/ 1.1756 + U_SB_STERM = 9, /*[ST]*/ 1.1757 + U_SB_UPPER = 10, /*[UP]*/ 1.1758 + U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ 1.1759 + U_SB_EXTEND = 12, /*[EX]*/ 1.1760 + U_SB_LF = 13, /*[LF]*/ 1.1761 + U_SB_SCONTINUE = 14, /*[SC]*/ 1.1762 + U_SB_COUNT = 15 1.1763 +} USentenceBreak; 1.1764 + 1.1765 +/** 1.1766 + * Line Break constants. 1.1767 + * 1.1768 + * @see UCHAR_LINE_BREAK 1.1769 + * @stable ICU 2.2 1.1770 + */ 1.1771 +typedef enum ULineBreak { 1.1772 + /* 1.1773 + * Note: ULineBreak constants are parsed by preparseucd.py. 1.1774 + * It matches lines like 1.1775 + * U_LB_<Unicode Line_Break value name> 1.1776 + */ 1.1777 + 1.1778 + U_LB_UNKNOWN = 0, /*[XX]*/ 1.1779 + U_LB_AMBIGUOUS = 1, /*[AI]*/ 1.1780 + U_LB_ALPHABETIC = 2, /*[AL]*/ 1.1781 + U_LB_BREAK_BOTH = 3, /*[B2]*/ 1.1782 + U_LB_BREAK_AFTER = 4, /*[BA]*/ 1.1783 + U_LB_BREAK_BEFORE = 5, /*[BB]*/ 1.1784 + U_LB_MANDATORY_BREAK = 6, /*[BK]*/ 1.1785 + U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ 1.1786 + U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ 1.1787 + U_LB_COMBINING_MARK = 9, /*[CM]*/ 1.1788 + U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ 1.1789 + U_LB_EXCLAMATION = 11, /*[EX]*/ 1.1790 + U_LB_GLUE = 12, /*[GL]*/ 1.1791 + U_LB_HYPHEN = 13, /*[HY]*/ 1.1792 + U_LB_IDEOGRAPHIC = 14, /*[ID]*/ 1.1793 + /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ 1.1794 + U_LB_INSEPARABLE = 15, /*[IN]*/ 1.1795 + U_LB_INSEPERABLE = U_LB_INSEPARABLE, 1.1796 + U_LB_INFIX_NUMERIC = 16, /*[IS]*/ 1.1797 + U_LB_LINE_FEED = 17, /*[LF]*/ 1.1798 + U_LB_NONSTARTER = 18, /*[NS]*/ 1.1799 + U_LB_NUMERIC = 19, /*[NU]*/ 1.1800 + U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ 1.1801 + U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ 1.1802 + U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ 1.1803 + U_LB_QUOTATION = 23, /*[QU]*/ 1.1804 + U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ 1.1805 + U_LB_SURROGATE = 25, /*[SG]*/ 1.1806 + U_LB_SPACE = 26, /*[SP]*/ 1.1807 + U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ 1.1808 + U_LB_ZWSPACE = 28, /*[ZW]*/ 1.1809 + U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 1.1810 + U_LB_WORD_JOINER = 30, /*[WJ]*/ 1.1811 + U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ 1.1812 + U_LB_H3 = 32, /*[H3]*/ 1.1813 + U_LB_JL = 33, /*[JL]*/ 1.1814 + U_LB_JT = 34, /*[JT]*/ 1.1815 + U_LB_JV = 35, /*[JV]*/ 1.1816 + U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 1.1817 + U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 1.1818 + U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 1.1819 + U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 1.1820 + U_LB_COUNT = 40 1.1821 +} ULineBreak; 1.1822 + 1.1823 +/** 1.1824 + * Numeric Type constants. 1.1825 + * 1.1826 + * @see UCHAR_NUMERIC_TYPE 1.1827 + * @stable ICU 2.2 1.1828 + */ 1.1829 +typedef enum UNumericType { 1.1830 + /* 1.1831 + * Note: UNumericType constants are parsed by preparseucd.py. 1.1832 + * It matches lines like 1.1833 + * U_NT_<Unicode Numeric_Type value name> 1.1834 + */ 1.1835 + 1.1836 + U_NT_NONE, /*[None]*/ 1.1837 + U_NT_DECIMAL, /*[de]*/ 1.1838 + U_NT_DIGIT, /*[di]*/ 1.1839 + U_NT_NUMERIC, /*[nu]*/ 1.1840 + U_NT_COUNT 1.1841 +} UNumericType; 1.1842 + 1.1843 +/** 1.1844 + * Hangul Syllable Type constants. 1.1845 + * 1.1846 + * @see UCHAR_HANGUL_SYLLABLE_TYPE 1.1847 + * @stable ICU 2.6 1.1848 + */ 1.1849 +typedef enum UHangulSyllableType { 1.1850 + /* 1.1851 + * Note: UHangulSyllableType constants are parsed by preparseucd.py. 1.1852 + * It matches lines like 1.1853 + * U_HST_<Unicode Hangul_Syllable_Type value name> 1.1854 + */ 1.1855 + 1.1856 + U_HST_NOT_APPLICABLE, /*[NA]*/ 1.1857 + U_HST_LEADING_JAMO, /*[L]*/ 1.1858 + U_HST_VOWEL_JAMO, /*[V]*/ 1.1859 + U_HST_TRAILING_JAMO, /*[T]*/ 1.1860 + U_HST_LV_SYLLABLE, /*[LV]*/ 1.1861 + U_HST_LVT_SYLLABLE, /*[LVT]*/ 1.1862 + U_HST_COUNT 1.1863 +} UHangulSyllableType; 1.1864 + 1.1865 +/** 1.1866 + * Check a binary Unicode property for a code point. 1.1867 + * 1.1868 + * Unicode, especially in version 3.2, defines many more properties than the 1.1869 + * original set in UnicodeData.txt. 1.1870 + * 1.1871 + * The properties APIs are intended to reflect Unicode properties as defined 1.1872 + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 1.1873 + * For details about the properties see http://www.unicode.org/ucd/ . 1.1874 + * For names of Unicode properties see the UCD file PropertyAliases.txt. 1.1875 + * 1.1876 + * Important: If ICU is built with UCD files from Unicode versions below 3.2, 1.1877 + * then properties marked with "new in Unicode 3.2" are not or not fully available. 1.1878 + * 1.1879 + * @param c Code point to test. 1.1880 + * @param which UProperty selector constant, identifies which binary property to check. 1.1881 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. 1.1882 + * @return TRUE or FALSE according to the binary Unicode property value for c. 1.1883 + * Also FALSE if 'which' is out of bounds or if the Unicode version 1.1884 + * does not have data for the property at all, or not for this code point. 1.1885 + * 1.1886 + * @see UProperty 1.1887 + * @see u_getIntPropertyValue 1.1888 + * @see u_getUnicodeVersion 1.1889 + * @stable ICU 2.1 1.1890 + */ 1.1891 +U_STABLE UBool U_EXPORT2 1.1892 +u_hasBinaryProperty(UChar32 c, UProperty which); 1.1893 + 1.1894 +/** 1.1895 + * Check if a code point has the Alphabetic Unicode property. 1.1896 + * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). 1.1897 + * This is different from u_isalpha! 1.1898 + * @param c Code point to test 1.1899 + * @return true if the code point has the Alphabetic Unicode property, false otherwise 1.1900 + * 1.1901 + * @see UCHAR_ALPHABETIC 1.1902 + * @see u_isalpha 1.1903 + * @see u_hasBinaryProperty 1.1904 + * @stable ICU 2.1 1.1905 + */ 1.1906 +U_STABLE UBool U_EXPORT2 1.1907 +u_isUAlphabetic(UChar32 c); 1.1908 + 1.1909 +/** 1.1910 + * Check if a code point has the Lowercase Unicode property. 1.1911 + * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). 1.1912 + * This is different from u_islower! 1.1913 + * @param c Code point to test 1.1914 + * @return true if the code point has the Lowercase Unicode property, false otherwise 1.1915 + * 1.1916 + * @see UCHAR_LOWERCASE 1.1917 + * @see u_islower 1.1918 + * @see u_hasBinaryProperty 1.1919 + * @stable ICU 2.1 1.1920 + */ 1.1921 +U_STABLE UBool U_EXPORT2 1.1922 +u_isULowercase(UChar32 c); 1.1923 + 1.1924 +/** 1.1925 + * Check if a code point has the Uppercase Unicode property. 1.1926 + * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). 1.1927 + * This is different from u_isupper! 1.1928 + * @param c Code point to test 1.1929 + * @return true if the code point has the Uppercase Unicode property, false otherwise 1.1930 + * 1.1931 + * @see UCHAR_UPPERCASE 1.1932 + * @see u_isupper 1.1933 + * @see u_hasBinaryProperty 1.1934 + * @stable ICU 2.1 1.1935 + */ 1.1936 +U_STABLE UBool U_EXPORT2 1.1937 +u_isUUppercase(UChar32 c); 1.1938 + 1.1939 +/** 1.1940 + * Check if a code point has the White_Space Unicode property. 1.1941 + * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). 1.1942 + * This is different from both u_isspace and u_isWhitespace! 1.1943 + * 1.1944 + * Note: There are several ICU whitespace functions; please see the uchar.h 1.1945 + * file documentation for a detailed comparison. 1.1946 + * 1.1947 + * @param c Code point to test 1.1948 + * @return true if the code point has the White_Space Unicode property, false otherwise. 1.1949 + * 1.1950 + * @see UCHAR_WHITE_SPACE 1.1951 + * @see u_isWhitespace 1.1952 + * @see u_isspace 1.1953 + * @see u_isJavaSpaceChar 1.1954 + * @see u_hasBinaryProperty 1.1955 + * @stable ICU 2.1 1.1956 + */ 1.1957 +U_STABLE UBool U_EXPORT2 1.1958 +u_isUWhiteSpace(UChar32 c); 1.1959 + 1.1960 +/** 1.1961 + * Get the property value for an enumerated or integer Unicode property for a code point. 1.1962 + * Also returns binary and mask property values. 1.1963 + * 1.1964 + * Unicode, especially in version 3.2, defines many more properties than the 1.1965 + * original set in UnicodeData.txt. 1.1966 + * 1.1967 + * The properties APIs are intended to reflect Unicode properties as defined 1.1968 + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). 1.1969 + * For details about the properties see http://www.unicode.org/ . 1.1970 + * For names of Unicode properties see the UCD file PropertyAliases.txt. 1.1971 + * 1.1972 + * Sample usage: 1.1973 + * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); 1.1974 + * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); 1.1975 + * 1.1976 + * @param c Code point to test. 1.1977 + * @param which UProperty selector constant, identifies which property to check. 1.1978 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 1.1979 + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 1.1980 + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 1.1981 + * @return Numeric value that is directly the property value or, 1.1982 + * for enumerated properties, corresponds to the numeric value of the enumerated 1.1983 + * constant of the respective property value enumeration type 1.1984 + * (cast to enum type if necessary). 1.1985 + * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. 1.1986 + * Returns a bit-mask for mask properties. 1.1987 + * Returns 0 if 'which' is out of bounds or if the Unicode version 1.1988 + * does not have data for the property at all, or not for this code point. 1.1989 + * 1.1990 + * @see UProperty 1.1991 + * @see u_hasBinaryProperty 1.1992 + * @see u_getIntPropertyMinValue 1.1993 + * @see u_getIntPropertyMaxValue 1.1994 + * @see u_getUnicodeVersion 1.1995 + * @stable ICU 2.2 1.1996 + */ 1.1997 +U_STABLE int32_t U_EXPORT2 1.1998 +u_getIntPropertyValue(UChar32 c, UProperty which); 1.1999 + 1.2000 +/** 1.2001 + * Get the minimum value for an enumerated/integer/binary Unicode property. 1.2002 + * Can be used together with u_getIntPropertyMaxValue 1.2003 + * to allocate arrays of UnicodeSet or similar. 1.2004 + * 1.2005 + * @param which UProperty selector constant, identifies which binary property to check. 1.2006 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 1.2007 + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. 1.2008 + * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. 1.2009 + * 0 if the property selector is out of range. 1.2010 + * 1.2011 + * @see UProperty 1.2012 + * @see u_hasBinaryProperty 1.2013 + * @see u_getUnicodeVersion 1.2014 + * @see u_getIntPropertyMaxValue 1.2015 + * @see u_getIntPropertyValue 1.2016 + * @stable ICU 2.2 1.2017 + */ 1.2018 +U_STABLE int32_t U_EXPORT2 1.2019 +u_getIntPropertyMinValue(UProperty which); 1.2020 + 1.2021 +/** 1.2022 + * Get the maximum value for an enumerated/integer/binary Unicode property. 1.2023 + * Can be used together with u_getIntPropertyMinValue 1.2024 + * to allocate arrays of UnicodeSet or similar. 1.2025 + * 1.2026 + * Examples for min/max values (for Unicode 3.2): 1.2027 + * 1.2028 + * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) 1.2029 + * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) 1.2030 + * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) 1.2031 + * 1.2032 + * For undefined UProperty constant values, min/max values will be 0/-1. 1.2033 + * 1.2034 + * @param which UProperty selector constant, identifies which binary property to check. 1.2035 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 1.2036 + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. 1.2037 + * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. 1.2038 + * <=0 if the property selector is out of range. 1.2039 + * 1.2040 + * @see UProperty 1.2041 + * @see u_hasBinaryProperty 1.2042 + * @see u_getUnicodeVersion 1.2043 + * @see u_getIntPropertyMaxValue 1.2044 + * @see u_getIntPropertyValue 1.2045 + * @stable ICU 2.2 1.2046 + */ 1.2047 +U_STABLE int32_t U_EXPORT2 1.2048 +u_getIntPropertyMaxValue(UProperty which); 1.2049 + 1.2050 +/** 1.2051 + * Get the numeric value for a Unicode code point as defined in the 1.2052 + * Unicode Character Database. 1.2053 + * 1.2054 + * A "double" return type is necessary because 1.2055 + * some numeric values are fractions, negative, or too large for int32_t. 1.2056 + * 1.2057 + * For characters without any numeric values in the Unicode Character Database, 1.2058 + * this function will return U_NO_NUMERIC_VALUE. 1.2059 + * Note: This is different from the Unicode Standard which specifies NaN as the default value. 1.2060 + * (NaN is not available on all platforms.) 1.2061 + * 1.2062 + * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() 1.2063 + * also supports negative values, large values, and fractions, 1.2064 + * while Java's getNumericValue() returns values 10..35 for ASCII letters. 1.2065 + * 1.2066 + * @param c Code point to get the numeric value for. 1.2067 + * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. 1.2068 + * 1.2069 + * @see U_NO_NUMERIC_VALUE 1.2070 + * @stable ICU 2.2 1.2071 + */ 1.2072 +U_STABLE double U_EXPORT2 1.2073 +u_getNumericValue(UChar32 c); 1.2074 + 1.2075 +/** 1.2076 + * Special value that is returned by u_getNumericValue when 1.2077 + * no numeric value is defined for a code point. 1.2078 + * 1.2079 + * @see u_getNumericValue 1.2080 + * @stable ICU 2.2 1.2081 + */ 1.2082 +#define U_NO_NUMERIC_VALUE ((double)-123456789.) 1.2083 + 1.2084 +/** 1.2085 + * Determines whether the specified code point has the general category "Ll" 1.2086 + * (lowercase letter). 1.2087 + * 1.2088 + * Same as java.lang.Character.isLowerCase(). 1.2089 + * 1.2090 + * This misses some characters that are also lowercase but 1.2091 + * have a different general category value. 1.2092 + * In order to include those, use UCHAR_LOWERCASE. 1.2093 + * 1.2094 + * In addition to being equivalent to a Java function, this also serves 1.2095 + * as a C/POSIX migration function. 1.2096 + * See the comments about C/POSIX character classification functions in the 1.2097 + * documentation at the top of this header file. 1.2098 + * 1.2099 + * @param c the code point to be tested 1.2100 + * @return TRUE if the code point is an Ll lowercase letter 1.2101 + * 1.2102 + * @see UCHAR_LOWERCASE 1.2103 + * @see u_isupper 1.2104 + * @see u_istitle 1.2105 + * @stable ICU 2.0 1.2106 + */ 1.2107 +U_STABLE UBool U_EXPORT2 1.2108 +u_islower(UChar32 c); 1.2109 + 1.2110 +/** 1.2111 + * Determines whether the specified code point has the general category "Lu" 1.2112 + * (uppercase letter). 1.2113 + * 1.2114 + * Same as java.lang.Character.isUpperCase(). 1.2115 + * 1.2116 + * This misses some characters that are also uppercase but 1.2117 + * have a different general category value. 1.2118 + * In order to include those, use UCHAR_UPPERCASE. 1.2119 + * 1.2120 + * In addition to being equivalent to a Java function, this also serves 1.2121 + * as a C/POSIX migration function. 1.2122 + * See the comments about C/POSIX character classification functions in the 1.2123 + * documentation at the top of this header file. 1.2124 + * 1.2125 + * @param c the code point to be tested 1.2126 + * @return TRUE if the code point is an Lu uppercase letter 1.2127 + * 1.2128 + * @see UCHAR_UPPERCASE 1.2129 + * @see u_islower 1.2130 + * @see u_istitle 1.2131 + * @see u_tolower 1.2132 + * @stable ICU 2.0 1.2133 + */ 1.2134 +U_STABLE UBool U_EXPORT2 1.2135 +u_isupper(UChar32 c); 1.2136 + 1.2137 +/** 1.2138 + * Determines whether the specified code point is a titlecase letter. 1.2139 + * True for general category "Lt" (titlecase letter). 1.2140 + * 1.2141 + * Same as java.lang.Character.isTitleCase(). 1.2142 + * 1.2143 + * @param c the code point to be tested 1.2144 + * @return TRUE if the code point is an Lt titlecase letter 1.2145 + * 1.2146 + * @see u_isupper 1.2147 + * @see u_islower 1.2148 + * @see u_totitle 1.2149 + * @stable ICU 2.0 1.2150 + */ 1.2151 +U_STABLE UBool U_EXPORT2 1.2152 +u_istitle(UChar32 c); 1.2153 + 1.2154 +/** 1.2155 + * Determines whether the specified code point is a digit character according to Java. 1.2156 + * True for characters with general category "Nd" (decimal digit numbers). 1.2157 + * Beginning with Unicode 4, this is the same as 1.2158 + * testing for the Numeric_Type of Decimal. 1.2159 + * 1.2160 + * Same as java.lang.Character.isDigit(). 1.2161 + * 1.2162 + * In addition to being equivalent to a Java function, this also serves 1.2163 + * as a C/POSIX migration function. 1.2164 + * See the comments about C/POSIX character classification functions in the 1.2165 + * documentation at the top of this header file. 1.2166 + * 1.2167 + * @param c the code point to be tested 1.2168 + * @return TRUE if the code point is a digit character according to Character.isDigit() 1.2169 + * 1.2170 + * @stable ICU 2.0 1.2171 + */ 1.2172 +U_STABLE UBool U_EXPORT2 1.2173 +u_isdigit(UChar32 c); 1.2174 + 1.2175 +/** 1.2176 + * Determines whether the specified code point is a letter character. 1.2177 + * True for general categories "L" (letters). 1.2178 + * 1.2179 + * Same as java.lang.Character.isLetter(). 1.2180 + * 1.2181 + * In addition to being equivalent to a Java function, this also serves 1.2182 + * as a C/POSIX migration function. 1.2183 + * See the comments about C/POSIX character classification functions in the 1.2184 + * documentation at the top of this header file. 1.2185 + * 1.2186 + * @param c the code point to be tested 1.2187 + * @return TRUE if the code point is a letter character 1.2188 + * 1.2189 + * @see u_isdigit 1.2190 + * @see u_isalnum 1.2191 + * @stable ICU 2.0 1.2192 + */ 1.2193 +U_STABLE UBool U_EXPORT2 1.2194 +u_isalpha(UChar32 c); 1.2195 + 1.2196 +/** 1.2197 + * Determines whether the specified code point is an alphanumeric character 1.2198 + * (letter or digit) according to Java. 1.2199 + * True for characters with general categories 1.2200 + * "L" (letters) and "Nd" (decimal digit numbers). 1.2201 + * 1.2202 + * Same as java.lang.Character.isLetterOrDigit(). 1.2203 + * 1.2204 + * In addition to being equivalent to a Java function, this also serves 1.2205 + * as a C/POSIX migration function. 1.2206 + * See the comments about C/POSIX character classification functions in the 1.2207 + * documentation at the top of this header file. 1.2208 + * 1.2209 + * @param c the code point to be tested 1.2210 + * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() 1.2211 + * 1.2212 + * @stable ICU 2.0 1.2213 + */ 1.2214 +U_STABLE UBool U_EXPORT2 1.2215 +u_isalnum(UChar32 c); 1.2216 + 1.2217 +/** 1.2218 + * Determines whether the specified code point is a hexadecimal digit. 1.2219 + * This is equivalent to u_digit(c, 16)>=0. 1.2220 + * True for characters with general category "Nd" (decimal digit numbers) 1.2221 + * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. 1.2222 + * (That is, for letters with code points 1.2223 + * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) 1.2224 + * 1.2225 + * In order to narrow the definition of hexadecimal digits to only ASCII 1.2226 + * characters, use (c<=0x7f && u_isxdigit(c)). 1.2227 + * 1.2228 + * This is a C/POSIX migration function. 1.2229 + * See the comments about C/POSIX character classification functions in the 1.2230 + * documentation at the top of this header file. 1.2231 + * 1.2232 + * @param c the code point to be tested 1.2233 + * @return TRUE if the code point is a hexadecimal digit 1.2234 + * 1.2235 + * @stable ICU 2.6 1.2236 + */ 1.2237 +U_STABLE UBool U_EXPORT2 1.2238 +u_isxdigit(UChar32 c); 1.2239 + 1.2240 +/** 1.2241 + * Determines whether the specified code point is a punctuation character. 1.2242 + * True for characters with general categories "P" (punctuation). 1.2243 + * 1.2244 + * This is a C/POSIX migration function. 1.2245 + * See the comments about C/POSIX character classification functions in the 1.2246 + * documentation at the top of this header file. 1.2247 + * 1.2248 + * @param c the code point to be tested 1.2249 + * @return TRUE if the code point is a punctuation character 1.2250 + * 1.2251 + * @stable ICU 2.6 1.2252 + */ 1.2253 +U_STABLE UBool U_EXPORT2 1.2254 +u_ispunct(UChar32 c); 1.2255 + 1.2256 +/** 1.2257 + * Determines whether the specified code point is a "graphic" character 1.2258 + * (printable, excluding spaces). 1.2259 + * TRUE for all characters except those with general categories 1.2260 + * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), 1.2261 + * "Cn" (unassigned), and "Z" (separators). 1.2262 + * 1.2263 + * This is a C/POSIX migration function. 1.2264 + * See the comments about C/POSIX character classification functions in the 1.2265 + * documentation at the top of this header file. 1.2266 + * 1.2267 + * @param c the code point to be tested 1.2268 + * @return TRUE if the code point is a "graphic" character 1.2269 + * 1.2270 + * @stable ICU 2.6 1.2271 + */ 1.2272 +U_STABLE UBool U_EXPORT2 1.2273 +u_isgraph(UChar32 c); 1.2274 + 1.2275 +/** 1.2276 + * Determines whether the specified code point is a "blank" or "horizontal space", 1.2277 + * a character that visibly separates words on a line. 1.2278 + * The following are equivalent definitions: 1.2279 + * 1.2280 + * TRUE for Unicode White_Space characters except for "vertical space controls" 1.2281 + * where "vertical space controls" are the following characters: 1.2282 + * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) 1.2283 + * 1.2284 + * same as 1.2285 + * 1.2286 + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) 1.2287 + * except Zero Width Space (ZWSP, U+200B). 1.2288 + * 1.2289 + * Note: There are several ICU whitespace functions; please see the uchar.h 1.2290 + * file documentation for a detailed comparison. 1.2291 + * 1.2292 + * This is a C/POSIX migration function. 1.2293 + * See the comments about C/POSIX character classification functions in the 1.2294 + * documentation at the top of this header file. 1.2295 + * 1.2296 + * @param c the code point to be tested 1.2297 + * @return TRUE if the code point is a "blank" 1.2298 + * 1.2299 + * @stable ICU 2.6 1.2300 + */ 1.2301 +U_STABLE UBool U_EXPORT2 1.2302 +u_isblank(UChar32 c); 1.2303 + 1.2304 +/** 1.2305 + * Determines whether the specified code point is "defined", 1.2306 + * which usually means that it is assigned a character. 1.2307 + * True for general categories other than "Cn" (other, not assigned), 1.2308 + * i.e., true for all code points mentioned in UnicodeData.txt. 1.2309 + * 1.2310 + * Note that non-character code points (e.g., U+FDD0) are not "defined" 1.2311 + * (they are Cn), but surrogate code points are "defined" (Cs). 1.2312 + * 1.2313 + * Same as java.lang.Character.isDefined(). 1.2314 + * 1.2315 + * @param c the code point to be tested 1.2316 + * @return TRUE if the code point is assigned a character 1.2317 + * 1.2318 + * @see u_isdigit 1.2319 + * @see u_isalpha 1.2320 + * @see u_isalnum 1.2321 + * @see u_isupper 1.2322 + * @see u_islower 1.2323 + * @see u_istitle 1.2324 + * @stable ICU 2.0 1.2325 + */ 1.2326 +U_STABLE UBool U_EXPORT2 1.2327 +u_isdefined(UChar32 c); 1.2328 + 1.2329 +/** 1.2330 + * Determines if the specified character is a space character or not. 1.2331 + * 1.2332 + * Note: There are several ICU whitespace functions; please see the uchar.h 1.2333 + * file documentation for a detailed comparison. 1.2334 + * 1.2335 + * This is a C/POSIX migration function. 1.2336 + * See the comments about C/POSIX character classification functions in the 1.2337 + * documentation at the top of this header file. 1.2338 + * 1.2339 + * @param c the character to be tested 1.2340 + * @return true if the character is a space character; false otherwise. 1.2341 + * 1.2342 + * @see u_isJavaSpaceChar 1.2343 + * @see u_isWhitespace 1.2344 + * @see u_isUWhiteSpace 1.2345 + * @stable ICU 2.0 1.2346 + */ 1.2347 +U_STABLE UBool U_EXPORT2 1.2348 +u_isspace(UChar32 c); 1.2349 + 1.2350 +/** 1.2351 + * Determine if the specified code point is a space character according to Java. 1.2352 + * True for characters with general categories "Z" (separators), 1.2353 + * which does not include control codes (e.g., TAB or Line Feed). 1.2354 + * 1.2355 + * Same as java.lang.Character.isSpaceChar(). 1.2356 + * 1.2357 + * Note: There are several ICU whitespace functions; please see the uchar.h 1.2358 + * file documentation for a detailed comparison. 1.2359 + * 1.2360 + * @param c the code point to be tested 1.2361 + * @return TRUE if the code point is a space character according to Character.isSpaceChar() 1.2362 + * 1.2363 + * @see u_isspace 1.2364 + * @see u_isWhitespace 1.2365 + * @see u_isUWhiteSpace 1.2366 + * @stable ICU 2.6 1.2367 + */ 1.2368 +U_STABLE UBool U_EXPORT2 1.2369 +u_isJavaSpaceChar(UChar32 c); 1.2370 + 1.2371 +/** 1.2372 + * Determines if the specified code point is a whitespace character according to Java/ICU. 1.2373 + * A character is considered to be a Java whitespace character if and only 1.2374 + * if it satisfies one of the following criteria: 1.2375 + * 1.2376 + * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 1.2377 + * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). 1.2378 + * - It is U+0009 HORIZONTAL TABULATION. 1.2379 + * - It is U+000A LINE FEED. 1.2380 + * - It is U+000B VERTICAL TABULATION. 1.2381 + * - It is U+000C FORM FEED. 1.2382 + * - It is U+000D CARRIAGE RETURN. 1.2383 + * - It is U+001C FILE SEPARATOR. 1.2384 + * - It is U+001D GROUP SEPARATOR. 1.2385 + * - It is U+001E RECORD SEPARATOR. 1.2386 + * - It is U+001F UNIT SEPARATOR. 1.2387 + * 1.2388 + * This API tries to sync with the semantics of Java's 1.2389 + * java.lang.Character.isWhitespace(), but it may not return 1.2390 + * the exact same results because of the Unicode version 1.2391 + * difference. 1.2392 + * 1.2393 + * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 1.2394 + * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 1.2395 + * See http://www.unicode.org/versions/Unicode4.0.1/ 1.2396 + * 1.2397 + * Note: There are several ICU whitespace functions; please see the uchar.h 1.2398 + * file documentation for a detailed comparison. 1.2399 + * 1.2400 + * @param c the code point to be tested 1.2401 + * @return TRUE if the code point is a whitespace character according to Java/ICU 1.2402 + * 1.2403 + * @see u_isspace 1.2404 + * @see u_isJavaSpaceChar 1.2405 + * @see u_isUWhiteSpace 1.2406 + * @stable ICU 2.0 1.2407 + */ 1.2408 +U_STABLE UBool U_EXPORT2 1.2409 +u_isWhitespace(UChar32 c); 1.2410 + 1.2411 +/** 1.2412 + * Determines whether the specified code point is a control character 1.2413 + * (as defined by this function). 1.2414 + * A control character is one of the following: 1.2415 + * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) 1.2416 + * - U_CONTROL_CHAR (Cc) 1.2417 + * - U_FORMAT_CHAR (Cf) 1.2418 + * - U_LINE_SEPARATOR (Zl) 1.2419 + * - U_PARAGRAPH_SEPARATOR (Zp) 1.2420 + * 1.2421 + * This is a C/POSIX migration function. 1.2422 + * See the comments about C/POSIX character classification functions in the 1.2423 + * documentation at the top of this header file. 1.2424 + * 1.2425 + * @param c the code point to be tested 1.2426 + * @return TRUE if the code point is a control character 1.2427 + * 1.2428 + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 1.2429 + * @see u_isprint 1.2430 + * @stable ICU 2.0 1.2431 + */ 1.2432 +U_STABLE UBool U_EXPORT2 1.2433 +u_iscntrl(UChar32 c); 1.2434 + 1.2435 +/** 1.2436 + * Determines whether the specified code point is an ISO control code. 1.2437 + * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). 1.2438 + * 1.2439 + * Same as java.lang.Character.isISOControl(). 1.2440 + * 1.2441 + * @param c the code point to be tested 1.2442 + * @return TRUE if the code point is an ISO control code 1.2443 + * 1.2444 + * @see u_iscntrl 1.2445 + * @stable ICU 2.6 1.2446 + */ 1.2447 +U_STABLE UBool U_EXPORT2 1.2448 +u_isISOControl(UChar32 c); 1.2449 + 1.2450 +/** 1.2451 + * Determines whether the specified code point is a printable character. 1.2452 + * True for general categories <em>other</em> than "C" (controls). 1.2453 + * 1.2454 + * This is a C/POSIX migration function. 1.2455 + * See the comments about C/POSIX character classification functions in the 1.2456 + * documentation at the top of this header file. 1.2457 + * 1.2458 + * @param c the code point to be tested 1.2459 + * @return TRUE if the code point is a printable character 1.2460 + * 1.2461 + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 1.2462 + * @see u_iscntrl 1.2463 + * @stable ICU 2.0 1.2464 + */ 1.2465 +U_STABLE UBool U_EXPORT2 1.2466 +u_isprint(UChar32 c); 1.2467 + 1.2468 +/** 1.2469 + * Determines whether the specified code point is a base character. 1.2470 + * True for general categories "L" (letters), "N" (numbers), 1.2471 + * "Mc" (spacing combining marks), and "Me" (enclosing marks). 1.2472 + * 1.2473 + * Note that this is different from the Unicode definition in 1.2474 + * chapter 3.5, conformance clause D13, 1.2475 + * which defines base characters to be all characters (not Cn) 1.2476 + * that do not graphically combine with preceding characters (M) 1.2477 + * and that are neither control (Cc) or format (Cf) characters. 1.2478 + * 1.2479 + * @param c the code point to be tested 1.2480 + * @return TRUE if the code point is a base character according to this function 1.2481 + * 1.2482 + * @see u_isalpha 1.2483 + * @see u_isdigit 1.2484 + * @stable ICU 2.0 1.2485 + */ 1.2486 +U_STABLE UBool U_EXPORT2 1.2487 +u_isbase(UChar32 c); 1.2488 + 1.2489 +/** 1.2490 + * Returns the bidirectional category value for the code point, 1.2491 + * which is used in the Unicode bidirectional algorithm 1.2492 + * (UAX #9 http://www.unicode.org/reports/tr9/). 1.2493 + * Note that some <em>unassigned</em> code points have bidi values 1.2494 + * of R or AL because they are in blocks that are reserved 1.2495 + * for Right-To-Left scripts. 1.2496 + * 1.2497 + * Same as java.lang.Character.getDirectionality() 1.2498 + * 1.2499 + * @param c the code point to be tested 1.2500 + * @return the bidirectional category (UCharDirection) value 1.2501 + * 1.2502 + * @see UCharDirection 1.2503 + * @stable ICU 2.0 1.2504 + */ 1.2505 +U_STABLE UCharDirection U_EXPORT2 1.2506 +u_charDirection(UChar32 c); 1.2507 + 1.2508 +/** 1.2509 + * Determines whether the code point has the Bidi_Mirrored property. 1.2510 + * This property is set for characters that are commonly used in 1.2511 + * Right-To-Left contexts and need to be displayed with a "mirrored" 1.2512 + * glyph. 1.2513 + * 1.2514 + * Same as java.lang.Character.isMirrored(). 1.2515 + * Same as UCHAR_BIDI_MIRRORED 1.2516 + * 1.2517 + * @param c the code point to be tested 1.2518 + * @return TRUE if the character has the Bidi_Mirrored property 1.2519 + * 1.2520 + * @see UCHAR_BIDI_MIRRORED 1.2521 + * @stable ICU 2.0 1.2522 + */ 1.2523 +U_STABLE UBool U_EXPORT2 1.2524 +u_isMirrored(UChar32 c); 1.2525 + 1.2526 +/** 1.2527 + * Maps the specified character to a "mirror-image" character. 1.2528 + * For characters with the Bidi_Mirrored property, implementations 1.2529 + * sometimes need a "poor man's" mapping to another Unicode 1.2530 + * character (code point) such that the default glyph may serve 1.2531 + * as the mirror-image of the default glyph of the specified 1.2532 + * character. This is useful for text conversion to and from 1.2533 + * codepages with visual order, and for displays without glyph 1.2534 + * selection capabilities. 1.2535 + * 1.2536 + * @param c the code point to be mapped 1.2537 + * @return another Unicode code point that may serve as a mirror-image 1.2538 + * substitute, or c itself if there is no such mapping or c 1.2539 + * does not have the Bidi_Mirrored property 1.2540 + * 1.2541 + * @see UCHAR_BIDI_MIRRORED 1.2542 + * @see u_isMirrored 1.2543 + * @stable ICU 2.0 1.2544 + */ 1.2545 +U_STABLE UChar32 U_EXPORT2 1.2546 +u_charMirror(UChar32 c); 1.2547 + 1.2548 +/** 1.2549 + * Maps the specified character to its paired bracket character. 1.2550 + * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). 1.2551 + * Otherwise c itself is returned. 1.2552 + * See http://www.unicode.org/reports/tr9/ 1.2553 + * 1.2554 + * @param c the code point to be mapped 1.2555 + * @return the paired bracket code point, 1.2556 + * or c itself if there is no such mapping 1.2557 + * (Bidi_Paired_Bracket_Type=None) 1.2558 + * 1.2559 + * @see UCHAR_BIDI_PAIRED_BRACKET 1.2560 + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE 1.2561 + * @see u_charMirror 1.2562 + * @stable ICU 52 1.2563 + */ 1.2564 +U_STABLE UChar32 U_EXPORT2 1.2565 +u_getBidiPairedBracket(UChar32 c); 1.2566 + 1.2567 +/** 1.2568 + * Returns the general category value for the code point. 1.2569 + * 1.2570 + * Same as java.lang.Character.getType(). 1.2571 + * 1.2572 + * @param c the code point to be tested 1.2573 + * @return the general category (UCharCategory) value 1.2574 + * 1.2575 + * @see UCharCategory 1.2576 + * @stable ICU 2.0 1.2577 + */ 1.2578 +U_STABLE int8_t U_EXPORT2 1.2579 +u_charType(UChar32 c); 1.2580 + 1.2581 +/** 1.2582 + * Get a single-bit bit set for the general category of a character. 1.2583 + * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. 1.2584 + * Same as U_MASK(u_charType(c)). 1.2585 + * 1.2586 + * @param c the code point to be tested 1.2587 + * @return a single-bit mask corresponding to the general category (UCharCategory) value 1.2588 + * 1.2589 + * @see u_charType 1.2590 + * @see UCharCategory 1.2591 + * @see U_GC_CN_MASK 1.2592 + * @stable ICU 2.1 1.2593 + */ 1.2594 +#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) 1.2595 + 1.2596 +/** 1.2597 + * Callback from u_enumCharTypes(), is called for each contiguous range 1.2598 + * of code points c (where start<=c<limit) 1.2599 + * with the same Unicode general category ("character type"). 1.2600 + * 1.2601 + * The callback function can stop the enumeration by returning FALSE. 1.2602 + * 1.2603 + * @param context an opaque pointer, as passed into utrie_enum() 1.2604 + * @param start the first code point in a contiguous range with value 1.2605 + * @param limit one past the last code point in a contiguous range with value 1.2606 + * @param type the general category for all code points in [start..limit[ 1.2607 + * @return FALSE to stop the enumeration 1.2608 + * 1.2609 + * @stable ICU 2.1 1.2610 + * @see UCharCategory 1.2611 + * @see u_enumCharTypes 1.2612 + */ 1.2613 +typedef UBool U_CALLCONV 1.2614 +UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); 1.2615 + 1.2616 +/** 1.2617 + * Enumerate efficiently all code points with their Unicode general categories. 1.2618 + * 1.2619 + * This is useful for building data structures (e.g., UnicodeSet's), 1.2620 + * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. 1.2621 + * 1.2622 + * For each contiguous range of code points with a given general category ("character type"), 1.2623 + * the UCharEnumTypeRange function is called. 1.2624 + * Adjacent ranges have different types. 1.2625 + * The Unicode Standard guarantees that the numeric value of the type is 0..31. 1.2626 + * 1.2627 + * @param enumRange a pointer to a function that is called for each contiguous range 1.2628 + * of code points with the same general category 1.2629 + * @param context an opaque pointer that is passed on to the callback function 1.2630 + * 1.2631 + * @stable ICU 2.1 1.2632 + * @see UCharCategory 1.2633 + * @see UCharEnumTypeRange 1.2634 + */ 1.2635 +U_STABLE void U_EXPORT2 1.2636 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); 1.2637 + 1.2638 +#if !UCONFIG_NO_NORMALIZATION 1.2639 + 1.2640 +/** 1.2641 + * Returns the combining class of the code point as specified in UnicodeData.txt. 1.2642 + * 1.2643 + * @param c the code point of the character 1.2644 + * @return the combining class of the character 1.2645 + * @stable ICU 2.0 1.2646 + */ 1.2647 +U_STABLE uint8_t U_EXPORT2 1.2648 +u_getCombiningClass(UChar32 c); 1.2649 + 1.2650 +#endif 1.2651 + 1.2652 +/** 1.2653 + * Returns the decimal digit value of a decimal digit character. 1.2654 + * Such characters have the general category "Nd" (decimal digit numbers) 1.2655 + * and a Numeric_Type of Decimal. 1.2656 + * 1.2657 + * Unlike ICU releases before 2.6, no digit values are returned for any 1.2658 + * Han characters because Han number characters are often used with a special 1.2659 + * Chinese-style number format (with characters for powers of 10 in between) 1.2660 + * instead of in decimal-positional notation. 1.2661 + * Unicode 4 explicitly assigns Han number characters the Numeric_Type 1.2662 + * Numeric instead of Decimal. 1.2663 + * See Jitterbug 1483 for more details. 1.2664 + * 1.2665 + * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() 1.2666 + * for complete numeric Unicode properties. 1.2667 + * 1.2668 + * @param c the code point for which to get the decimal digit value 1.2669 + * @return the decimal digit value of c, 1.2670 + * or -1 if c is not a decimal digit character 1.2671 + * 1.2672 + * @see u_getNumericValue 1.2673 + * @stable ICU 2.0 1.2674 + */ 1.2675 +U_STABLE int32_t U_EXPORT2 1.2676 +u_charDigitValue(UChar32 c); 1.2677 + 1.2678 +/** 1.2679 + * Returns the Unicode allocation block that contains the character. 1.2680 + * 1.2681 + * @param c the code point to be tested 1.2682 + * @return the block value (UBlockCode) for c 1.2683 + * 1.2684 + * @see UBlockCode 1.2685 + * @stable ICU 2.0 1.2686 + */ 1.2687 +U_STABLE UBlockCode U_EXPORT2 1.2688 +ublock_getCode(UChar32 c); 1.2689 + 1.2690 +/** 1.2691 + * Retrieve the name of a Unicode character. 1.2692 + * Depending on <code>nameChoice</code>, the character name written 1.2693 + * into the buffer is the "modern" name or the name that was defined 1.2694 + * in Unicode version 1.0. 1.2695 + * The name contains only "invariant" characters 1.2696 + * like A-Z, 0-9, space, and '-'. 1.2697 + * Unicode 1.0 names are only retrieved if they are different from the modern 1.2698 + * names and if the data file contains the data for them. gennames may or may 1.2699 + * not be called with a command line option to include 1.0 names in unames.dat. 1.2700 + * 1.2701 + * @param code The character (code point) for which to get the name. 1.2702 + * It must be <code>0<=code<=0x10ffff</code>. 1.2703 + * @param nameChoice Selector for which name to get. 1.2704 + * @param buffer Destination address for copying the name. 1.2705 + * The name will always be zero-terminated. 1.2706 + * If there is no name, then the buffer will be set to the empty string. 1.2707 + * @param bufferLength <code>==sizeof(buffer)</code> 1.2708 + * @param pErrorCode Pointer to a UErrorCode variable; 1.2709 + * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> 1.2710 + * returns. 1.2711 + * @return The length of the name, or 0 if there is no name for this character. 1.2712 + * If the bufferLength is less than or equal to the length, then the buffer 1.2713 + * contains the truncated name and the returned length indicates the full 1.2714 + * length of the name. 1.2715 + * The length does not include the zero-termination. 1.2716 + * 1.2717 + * @see UCharNameChoice 1.2718 + * @see u_charFromName 1.2719 + * @see u_enumCharNames 1.2720 + * @stable ICU 2.0 1.2721 + */ 1.2722 +U_STABLE int32_t U_EXPORT2 1.2723 +u_charName(UChar32 code, UCharNameChoice nameChoice, 1.2724 + char *buffer, int32_t bufferLength, 1.2725 + UErrorCode *pErrorCode); 1.2726 + 1.2727 +#ifndef U_HIDE_DEPRECATED_API 1.2728 +/** 1.2729 + * Returns an empty string. 1.2730 + * Used to return the ISO 10646 comment for a character. 1.2731 + * The Unicode ISO_Comment property is deprecated and has no values. 1.2732 + * 1.2733 + * @param c The character (code point) for which to get the ISO comment. 1.2734 + * It must be <code>0<=c<=0x10ffff</code>. 1.2735 + * @param dest Destination address for copying the comment. 1.2736 + * The comment will be zero-terminated if possible. 1.2737 + * If there is no comment, then the buffer will be set to the empty string. 1.2738 + * @param destCapacity <code>==sizeof(dest)</code> 1.2739 + * @param pErrorCode Pointer to a UErrorCode variable; 1.2740 + * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> 1.2741 + * returns. 1.2742 + * @return 0 1.2743 + * 1.2744 + * @deprecated ICU 49 1.2745 + */ 1.2746 +U_STABLE int32_t U_EXPORT2 1.2747 +u_getISOComment(UChar32 c, 1.2748 + char *dest, int32_t destCapacity, 1.2749 + UErrorCode *pErrorCode); 1.2750 +#endif /* U_HIDE_DEPRECATED_API */ 1.2751 + 1.2752 +/** 1.2753 + * Find a Unicode character by its name and return its code point value. 1.2754 + * The name is matched exactly and completely. 1.2755 + * If the name does not correspond to a code point, <i>pErrorCode</i> 1.2756 + * is set to <code>U_INVALID_CHAR_FOUND</code>. 1.2757 + * A Unicode 1.0 name is matched only if it differs from the modern name. 1.2758 + * Unicode names are all uppercase. Extended names are lowercase followed 1.2759 + * by an uppercase hexadecimal number, and within angle brackets. 1.2760 + * 1.2761 + * @param nameChoice Selector for which name to match. 1.2762 + * @param name The name to match. 1.2763 + * @param pErrorCode Pointer to a UErrorCode variable 1.2764 + * @return The Unicode value of the code point with the given name, 1.2765 + * or an undefined value if there is no such code point. 1.2766 + * 1.2767 + * @see UCharNameChoice 1.2768 + * @see u_charName 1.2769 + * @see u_enumCharNames 1.2770 + * @stable ICU 1.7 1.2771 + */ 1.2772 +U_STABLE UChar32 U_EXPORT2 1.2773 +u_charFromName(UCharNameChoice nameChoice, 1.2774 + const char *name, 1.2775 + UErrorCode *pErrorCode); 1.2776 + 1.2777 +/** 1.2778 + * Type of a callback function for u_enumCharNames() that gets called 1.2779 + * for each Unicode character with the code point value and 1.2780 + * the character name. 1.2781 + * If such a function returns FALSE, then the enumeration is stopped. 1.2782 + * 1.2783 + * @param context The context pointer that was passed to u_enumCharNames(). 1.2784 + * @param code The Unicode code point for the character with this name. 1.2785 + * @param nameChoice Selector for which kind of names is enumerated. 1.2786 + * @param name The character's name, zero-terminated. 1.2787 + * @param length The length of the name. 1.2788 + * @return TRUE if the enumeration should continue, FALSE to stop it. 1.2789 + * 1.2790 + * @see UCharNameChoice 1.2791 + * @see u_enumCharNames 1.2792 + * @stable ICU 1.7 1.2793 + */ 1.2794 +typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, 1.2795 + UChar32 code, 1.2796 + UCharNameChoice nameChoice, 1.2797 + const char *name, 1.2798 + int32_t length); 1.2799 + 1.2800 +/** 1.2801 + * Enumerate all assigned Unicode characters between the start and limit 1.2802 + * code points (start inclusive, limit exclusive) and call a function 1.2803 + * for each, passing the code point value and the character name. 1.2804 + * For Unicode 1.0 names, only those are enumerated that differ from the 1.2805 + * modern names. 1.2806 + * 1.2807 + * @param start The first code point in the enumeration range. 1.2808 + * @param limit One more than the last code point in the enumeration range 1.2809 + * (the first one after the range). 1.2810 + * @param fn The function that is to be called for each character name. 1.2811 + * @param context An arbitrary pointer that is passed to the function. 1.2812 + * @param nameChoice Selector for which kind of names to enumerate. 1.2813 + * @param pErrorCode Pointer to a UErrorCode variable 1.2814 + * 1.2815 + * @see UCharNameChoice 1.2816 + * @see UEnumCharNamesFn 1.2817 + * @see u_charName 1.2818 + * @see u_charFromName 1.2819 + * @stable ICU 1.7 1.2820 + */ 1.2821 +U_STABLE void U_EXPORT2 1.2822 +u_enumCharNames(UChar32 start, UChar32 limit, 1.2823 + UEnumCharNamesFn *fn, 1.2824 + void *context, 1.2825 + UCharNameChoice nameChoice, 1.2826 + UErrorCode *pErrorCode); 1.2827 + 1.2828 +/** 1.2829 + * Return the Unicode name for a given property, as given in the 1.2830 + * Unicode database file PropertyAliases.txt. 1.2831 + * 1.2832 + * In addition, this function maps the property 1.2833 + * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 1.2834 + * "General_Category_Mask". These names are not in 1.2835 + * PropertyAliases.txt. 1.2836 + * 1.2837 + * @param property UProperty selector other than UCHAR_INVALID_CODE. 1.2838 + * If out of range, NULL is returned. 1.2839 + * 1.2840 + * @param nameChoice selector for which name to get. If out of range, 1.2841 + * NULL is returned. All properties have a long name. Most 1.2842 + * have a short name, but some do not. Unicode allows for 1.2843 + * additional names; if present these will be returned by 1.2844 + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... 1.2845 + * 1.2846 + * @return a pointer to the name, or NULL if either the 1.2847 + * property or the nameChoice is out of range. If a given 1.2848 + * nameChoice returns NULL, then all larger values of 1.2849 + * nameChoice will return NULL, with one exception: if NULL is 1.2850 + * returned for U_SHORT_PROPERTY_NAME, then 1.2851 + * U_LONG_PROPERTY_NAME (and higher) may still return a 1.2852 + * non-NULL value. The returned pointer is valid until 1.2853 + * u_cleanup() is called. 1.2854 + * 1.2855 + * @see UProperty 1.2856 + * @see UPropertyNameChoice 1.2857 + * @stable ICU 2.4 1.2858 + */ 1.2859 +U_STABLE const char* U_EXPORT2 1.2860 +u_getPropertyName(UProperty property, 1.2861 + UPropertyNameChoice nameChoice); 1.2862 + 1.2863 +/** 1.2864 + * Return the UProperty enum for a given property name, as specified 1.2865 + * in the Unicode database file PropertyAliases.txt. Short, long, and 1.2866 + * any other variants are recognized. 1.2867 + * 1.2868 + * In addition, this function maps the synthetic names "gcm" / 1.2869 + * "General_Category_Mask" to the property 1.2870 + * UCHAR_GENERAL_CATEGORY_MASK. These names are not in 1.2871 + * PropertyAliases.txt. 1.2872 + * 1.2873 + * @param alias the property name to be matched. The name is compared 1.2874 + * using "loose matching" as described in PropertyAliases.txt. 1.2875 + * 1.2876 + * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name 1.2877 + * does not match any property. 1.2878 + * 1.2879 + * @see UProperty 1.2880 + * @stable ICU 2.4 1.2881 + */ 1.2882 +U_STABLE UProperty U_EXPORT2 1.2883 +u_getPropertyEnum(const char* alias); 1.2884 + 1.2885 +/** 1.2886 + * Return the Unicode name for a given property value, as given in the 1.2887 + * Unicode database file PropertyValueAliases.txt. 1.2888 + * 1.2889 + * Note: Some of the names in PropertyValueAliases.txt can only be 1.2890 + * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not 1.2891 + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / 1.2892 + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 1.2893 + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 1.2894 + * 1.2895 + * @param property UProperty selector constant. 1.2896 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 1.2897 + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 1.2898 + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 1.2899 + * If out of range, NULL is returned. 1.2900 + * 1.2901 + * @param value selector for a value for the given property. If out 1.2902 + * of range, NULL is returned. In general, valid values range 1.2903 + * from 0 up to some maximum. There are a few exceptions: 1.2904 + * (1.) UCHAR_BLOCK values begin at the non-zero value 1.2905 + * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS 1.2906 + * values are not contiguous and range from 0..240. (3.) 1.2907 + * UCHAR_GENERAL_CATEGORY_MASK values are not values of 1.2908 + * UCharCategory, but rather mask values produced by 1.2909 + * U_GET_GC_MASK(). This allows grouped categories such as 1.2910 + * [:L:] to be represented. Mask values range 1.2911 + * non-contiguously from 1..U_GC_P_MASK. 1.2912 + * 1.2913 + * @param nameChoice selector for which name to get. If out of range, 1.2914 + * NULL is returned. All values have a long name. Most have 1.2915 + * a short name, but some do not. Unicode allows for 1.2916 + * additional names; if present these will be returned by 1.2917 + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... 1.2918 + 1.2919 + * @return a pointer to the name, or NULL if either the 1.2920 + * property or the nameChoice is out of range. If a given 1.2921 + * nameChoice returns NULL, then all larger values of 1.2922 + * nameChoice will return NULL, with one exception: if NULL is 1.2923 + * returned for U_SHORT_PROPERTY_NAME, then 1.2924 + * U_LONG_PROPERTY_NAME (and higher) may still return a 1.2925 + * non-NULL value. The returned pointer is valid until 1.2926 + * u_cleanup() is called. 1.2927 + * 1.2928 + * @see UProperty 1.2929 + * @see UPropertyNameChoice 1.2930 + * @stable ICU 2.4 1.2931 + */ 1.2932 +U_STABLE const char* U_EXPORT2 1.2933 +u_getPropertyValueName(UProperty property, 1.2934 + int32_t value, 1.2935 + UPropertyNameChoice nameChoice); 1.2936 + 1.2937 +/** 1.2938 + * Return the property value integer for a given value name, as 1.2939 + * specified in the Unicode database file PropertyValueAliases.txt. 1.2940 + * Short, long, and any other variants are recognized. 1.2941 + * 1.2942 + * Note: Some of the names in PropertyValueAliases.txt will only be 1.2943 + * recognized with UCHAR_GENERAL_CATEGORY_MASK, not 1.2944 + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / 1.2945 + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 1.2946 + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 1.2947 + * 1.2948 + * @param property UProperty selector constant. 1.2949 + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT 1.2950 + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT 1.2951 + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. 1.2952 + * If out of range, UCHAR_INVALID_CODE is returned. 1.2953 + * 1.2954 + * @param alias the value name to be matched. The name is compared 1.2955 + * using "loose matching" as described in 1.2956 + * PropertyValueAliases.txt. 1.2957 + * 1.2958 + * @return a value integer or UCHAR_INVALID_CODE if the given name 1.2959 + * does not match any value of the given property, or if the 1.2960 + * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values 1.2961 + * are not values of UCharCategory, but rather mask values 1.2962 + * produced by U_GET_GC_MASK(). This allows grouped 1.2963 + * categories such as [:L:] to be represented. 1.2964 + * 1.2965 + * @see UProperty 1.2966 + * @stable ICU 2.4 1.2967 + */ 1.2968 +U_STABLE int32_t U_EXPORT2 1.2969 +u_getPropertyValueEnum(UProperty property, 1.2970 + const char* alias); 1.2971 + 1.2972 +/** 1.2973 + * Determines if the specified character is permissible as the 1.2974 + * first character in an identifier according to Unicode 1.2975 + * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). 1.2976 + * True for characters with general categories "L" (letters) and "Nl" (letter numbers). 1.2977 + * 1.2978 + * Same as java.lang.Character.isUnicodeIdentifierStart(). 1.2979 + * Same as UCHAR_ID_START 1.2980 + * 1.2981 + * @param c the code point to be tested 1.2982 + * @return TRUE if the code point may start an identifier 1.2983 + * 1.2984 + * @see UCHAR_ID_START 1.2985 + * @see u_isalpha 1.2986 + * @see u_isIDPart 1.2987 + * @stable ICU 2.0 1.2988 + */ 1.2989 +U_STABLE UBool U_EXPORT2 1.2990 +u_isIDStart(UChar32 c); 1.2991 + 1.2992 +/** 1.2993 + * Determines if the specified character is permissible 1.2994 + * in an identifier according to Java. 1.2995 + * True for characters with general categories "L" (letters), 1.2996 + * "Nl" (letter numbers), "Nd" (decimal digits), 1.2997 + * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and 1.2998 + * u_isIDIgnorable(c). 1.2999 + * 1.3000 + * Same as java.lang.Character.isUnicodeIdentifierPart(). 1.3001 + * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) 1.3002 + * except that Unicode recommends to ignore Cf which is less than 1.3003 + * u_isIDIgnorable(c). 1.3004 + * 1.3005 + * @param c the code point to be tested 1.3006 + * @return TRUE if the code point may occur in an identifier according to Java 1.3007 + * 1.3008 + * @see UCHAR_ID_CONTINUE 1.3009 + * @see u_isIDStart 1.3010 + * @see u_isIDIgnorable 1.3011 + * @stable ICU 2.0 1.3012 + */ 1.3013 +U_STABLE UBool U_EXPORT2 1.3014 +u_isIDPart(UChar32 c); 1.3015 + 1.3016 +/** 1.3017 + * Determines if the specified character should be regarded 1.3018 + * as an ignorable character in an identifier, 1.3019 + * according to Java. 1.3020 + * True for characters with general category "Cf" (format controls) as well as 1.3021 + * non-whitespace ISO controls 1.3022 + * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). 1.3023 + * 1.3024 + * Same as java.lang.Character.isIdentifierIgnorable(). 1.3025 + * 1.3026 + * Note that Unicode just recommends to ignore Cf (format controls). 1.3027 + * 1.3028 + * @param c the code point to be tested 1.3029 + * @return TRUE if the code point is ignorable in identifiers according to Java 1.3030 + * 1.3031 + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT 1.3032 + * @see u_isIDStart 1.3033 + * @see u_isIDPart 1.3034 + * @stable ICU 2.0 1.3035 + */ 1.3036 +U_STABLE UBool U_EXPORT2 1.3037 +u_isIDIgnorable(UChar32 c); 1.3038 + 1.3039 +/** 1.3040 + * Determines if the specified character is permissible as the 1.3041 + * first character in a Java identifier. 1.3042 + * In addition to u_isIDStart(c), true for characters with 1.3043 + * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). 1.3044 + * 1.3045 + * Same as java.lang.Character.isJavaIdentifierStart(). 1.3046 + * 1.3047 + * @param c the code point to be tested 1.3048 + * @return TRUE if the code point may start a Java identifier 1.3049 + * 1.3050 + * @see u_isJavaIDPart 1.3051 + * @see u_isalpha 1.3052 + * @see u_isIDStart 1.3053 + * @stable ICU 2.0 1.3054 + */ 1.3055 +U_STABLE UBool U_EXPORT2 1.3056 +u_isJavaIDStart(UChar32 c); 1.3057 + 1.3058 +/** 1.3059 + * Determines if the specified character is permissible 1.3060 + * in a Java identifier. 1.3061 + * In addition to u_isIDPart(c), true for characters with 1.3062 + * general category "Sc" (currency symbols). 1.3063 + * 1.3064 + * Same as java.lang.Character.isJavaIdentifierPart(). 1.3065 + * 1.3066 + * @param c the code point to be tested 1.3067 + * @return TRUE if the code point may occur in a Java identifier 1.3068 + * 1.3069 + * @see u_isIDIgnorable 1.3070 + * @see u_isJavaIDStart 1.3071 + * @see u_isalpha 1.3072 + * @see u_isdigit 1.3073 + * @see u_isIDPart 1.3074 + * @stable ICU 2.0 1.3075 + */ 1.3076 +U_STABLE UBool U_EXPORT2 1.3077 +u_isJavaIDPart(UChar32 c); 1.3078 + 1.3079 +/** 1.3080 + * The given character is mapped to its lowercase equivalent according to 1.3081 + * UnicodeData.txt; if the character has no lowercase equivalent, the character 1.3082 + * itself is returned. 1.3083 + * 1.3084 + * Same as java.lang.Character.toLowerCase(). 1.3085 + * 1.3086 + * This function only returns the simple, single-code point case mapping. 1.3087 + * Full case mappings should be used whenever possible because they produce 1.3088 + * better results by working on whole strings. 1.3089 + * They take into account the string context and the language and can map 1.3090 + * to a result string with a different length as appropriate. 1.3091 + * Full case mappings are applied by the string case mapping functions, 1.3092 + * see ustring.h and the UnicodeString class. 1.3093 + * See also the User Guide chapter on C/POSIX migration: 1.3094 + * http://icu-project.org/userguide/posix.html#case_mappings 1.3095 + * 1.3096 + * @param c the code point to be mapped 1.3097 + * @return the Simple_Lowercase_Mapping of the code point, if any; 1.3098 + * otherwise the code point itself. 1.3099 + * @stable ICU 2.0 1.3100 + */ 1.3101 +U_STABLE UChar32 U_EXPORT2 1.3102 +u_tolower(UChar32 c); 1.3103 + 1.3104 +/** 1.3105 + * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; 1.3106 + * if the character has no uppercase equivalent, the character itself is 1.3107 + * returned. 1.3108 + * 1.3109 + * Same as java.lang.Character.toUpperCase(). 1.3110 + * 1.3111 + * This function only returns the simple, single-code point case mapping. 1.3112 + * Full case mappings should be used whenever possible because they produce 1.3113 + * better results by working on whole strings. 1.3114 + * They take into account the string context and the language and can map 1.3115 + * to a result string with a different length as appropriate. 1.3116 + * Full case mappings are applied by the string case mapping functions, 1.3117 + * see ustring.h and the UnicodeString class. 1.3118 + * See also the User Guide chapter on C/POSIX migration: 1.3119 + * http://icu-project.org/userguide/posix.html#case_mappings 1.3120 + * 1.3121 + * @param c the code point to be mapped 1.3122 + * @return the Simple_Uppercase_Mapping of the code point, if any; 1.3123 + * otherwise the code point itself. 1.3124 + * @stable ICU 2.0 1.3125 + */ 1.3126 +U_STABLE UChar32 U_EXPORT2 1.3127 +u_toupper(UChar32 c); 1.3128 + 1.3129 +/** 1.3130 + * The given character is mapped to its titlecase equivalent 1.3131 + * according to UnicodeData.txt; 1.3132 + * if none is defined, the character itself is returned. 1.3133 + * 1.3134 + * Same as java.lang.Character.toTitleCase(). 1.3135 + * 1.3136 + * This function only returns the simple, single-code point case mapping. 1.3137 + * Full case mappings should be used whenever possible because they produce 1.3138 + * better results by working on whole strings. 1.3139 + * They take into account the string context and the language and can map 1.3140 + * to a result string with a different length as appropriate. 1.3141 + * Full case mappings are applied by the string case mapping functions, 1.3142 + * see ustring.h and the UnicodeString class. 1.3143 + * See also the User Guide chapter on C/POSIX migration: 1.3144 + * http://icu-project.org/userguide/posix.html#case_mappings 1.3145 + * 1.3146 + * @param c the code point to be mapped 1.3147 + * @return the Simple_Titlecase_Mapping of the code point, if any; 1.3148 + * otherwise the code point itself. 1.3149 + * @stable ICU 2.0 1.3150 + */ 1.3151 +U_STABLE UChar32 U_EXPORT2 1.3152 +u_totitle(UChar32 c); 1.3153 + 1.3154 +/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ 1.3155 +#define U_FOLD_CASE_DEFAULT 0 1.3156 + 1.3157 +/** 1.3158 + * Option value for case folding: 1.3159 + * 1.3160 + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 1.3161 + * and dotless i appropriately for Turkic languages (tr, az). 1.3162 + * 1.3163 + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 1.3164 + * are to be included for default mappings and 1.3165 + * excluded for the Turkic-specific mappings. 1.3166 + * 1.3167 + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 1.3168 + * are to be excluded for default mappings and 1.3169 + * included for the Turkic-specific mappings. 1.3170 + * 1.3171 + * @stable ICU 2.0 1.3172 + */ 1.3173 +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 1.3174 + 1.3175 +/** 1.3176 + * The given character is mapped to its case folding equivalent according to 1.3177 + * UnicodeData.txt and CaseFolding.txt; 1.3178 + * if the character has no case folding equivalent, the character 1.3179 + * itself is returned. 1.3180 + * 1.3181 + * This function only returns the simple, single-code point case mapping. 1.3182 + * Full case mappings should be used whenever possible because they produce 1.3183 + * better results by working on whole strings. 1.3184 + * They take into account the string context and the language and can map 1.3185 + * to a result string with a different length as appropriate. 1.3186 + * Full case mappings are applied by the string case mapping functions, 1.3187 + * see ustring.h and the UnicodeString class. 1.3188 + * See also the User Guide chapter on C/POSIX migration: 1.3189 + * http://icu-project.org/userguide/posix.html#case_mappings 1.3190 + * 1.3191 + * @param c the code point to be mapped 1.3192 + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.3193 + * @return the Simple_Case_Folding of the code point, if any; 1.3194 + * otherwise the code point itself. 1.3195 + * @stable ICU 2.0 1.3196 + */ 1.3197 +U_STABLE UChar32 U_EXPORT2 1.3198 +u_foldCase(UChar32 c, uint32_t options); 1.3199 + 1.3200 +/** 1.3201 + * Returns the decimal digit value of the code point in the 1.3202 + * specified radix. 1.3203 + * 1.3204 + * If the radix is not in the range <code>2<=radix<=36</code> or if the 1.3205 + * value of <code>c</code> is not a valid digit in the specified 1.3206 + * radix, <code>-1</code> is returned. A character is a valid digit 1.3207 + * if at least one of the following is true: 1.3208 + * <ul> 1.3209 + * <li>The character has a decimal digit value. 1.3210 + * Such characters have the general category "Nd" (decimal digit numbers) 1.3211 + * and a Numeric_Type of Decimal. 1.3212 + * In this case the value is the character's decimal digit value.</li> 1.3213 + * <li>The character is one of the uppercase Latin letters 1.3214 + * <code>'A'</code> through <code>'Z'</code>. 1.3215 + * In this case the value is <code>c-'A'+10</code>.</li> 1.3216 + * <li>The character is one of the lowercase Latin letters 1.3217 + * <code>'a'</code> through <code>'z'</code>. 1.3218 + * In this case the value is <code>ch-'a'+10</code>.</li> 1.3219 + * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) 1.3220 + * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) 1.3221 + * are recognized.</li> 1.3222 + * </ul> 1.3223 + * 1.3224 + * Same as java.lang.Character.digit(). 1.3225 + * 1.3226 + * @param ch the code point to be tested. 1.3227 + * @param radix the radix. 1.3228 + * @return the numeric value represented by the character in the 1.3229 + * specified radix, 1.3230 + * or -1 if there is no value or if the value exceeds the radix. 1.3231 + * 1.3232 + * @see UCHAR_NUMERIC_TYPE 1.3233 + * @see u_forDigit 1.3234 + * @see u_charDigitValue 1.3235 + * @see u_isdigit 1.3236 + * @stable ICU 2.0 1.3237 + */ 1.3238 +U_STABLE int32_t U_EXPORT2 1.3239 +u_digit(UChar32 ch, int8_t radix); 1.3240 + 1.3241 +/** 1.3242 + * Determines the character representation for a specific digit in 1.3243 + * the specified radix. If the value of <code>radix</code> is not a 1.3244 + * valid radix, or the value of <code>digit</code> is not a valid 1.3245 + * digit in the specified radix, the null character 1.3246 + * (<code>U+0000</code>) is returned. 1.3247 + * <p> 1.3248 + * The <code>radix</code> argument is valid if it is greater than or 1.3249 + * equal to 2 and less than or equal to 36. 1.3250 + * The <code>digit</code> argument is valid if 1.3251 + * <code>0 <= digit < radix</code>. 1.3252 + * <p> 1.3253 + * If the digit is less than 10, then 1.3254 + * <code>'0' + digit</code> is returned. Otherwise, the value 1.3255 + * <code>'a' + digit - 10</code> is returned. 1.3256 + * 1.3257 + * Same as java.lang.Character.forDigit(). 1.3258 + * 1.3259 + * @param digit the number to convert to a character. 1.3260 + * @param radix the radix. 1.3261 + * @return the <code>char</code> representation of the specified digit 1.3262 + * in the specified radix. 1.3263 + * 1.3264 + * @see u_digit 1.3265 + * @see u_charDigitValue 1.3266 + * @see u_isdigit 1.3267 + * @stable ICU 2.0 1.3268 + */ 1.3269 +U_STABLE UChar32 U_EXPORT2 1.3270 +u_forDigit(int32_t digit, int8_t radix); 1.3271 + 1.3272 +/** 1.3273 + * Get the "age" of the code point. 1.3274 + * The "age" is the Unicode version when the code point was first 1.3275 + * designated (as a non-character or for Private Use) 1.3276 + * or assigned a character. 1.3277 + * This can be useful to avoid emitting code points to receiving 1.3278 + * processes that do not accept newer characters. 1.3279 + * The data is from the UCD file DerivedAge.txt. 1.3280 + * 1.3281 + * @param c The code point. 1.3282 + * @param versionArray The Unicode version number array, to be filled in. 1.3283 + * 1.3284 + * @stable ICU 2.1 1.3285 + */ 1.3286 +U_STABLE void U_EXPORT2 1.3287 +u_charAge(UChar32 c, UVersionInfo versionArray); 1.3288 + 1.3289 +/** 1.3290 + * Gets the Unicode version information. 1.3291 + * The version array is filled in with the version information 1.3292 + * for the Unicode standard that is currently used by ICU. 1.3293 + * For example, Unicode version 3.1.1 is represented as an array with 1.3294 + * the values { 3, 1, 1, 0 }. 1.3295 + * 1.3296 + * @param versionArray an output array that will be filled in with 1.3297 + * the Unicode version number 1.3298 + * @stable ICU 2.0 1.3299 + */ 1.3300 +U_STABLE void U_EXPORT2 1.3301 +u_getUnicodeVersion(UVersionInfo versionArray); 1.3302 + 1.3303 +#if !UCONFIG_NO_NORMALIZATION 1.3304 +/** 1.3305 + * Get the FC_NFKC_Closure property string for a character. 1.3306 + * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" 1.3307 + * or for "FNC": http://www.unicode.org/reports/tr15/ 1.3308 + * 1.3309 + * @param c The character (code point) for which to get the FC_NFKC_Closure string. 1.3310 + * It must be <code>0<=c<=0x10ffff</code>. 1.3311 + * @param dest Destination address for copying the string. 1.3312 + * The string will be zero-terminated if possible. 1.3313 + * If there is no FC_NFKC_Closure string, 1.3314 + * then the buffer will be set to the empty string. 1.3315 + * @param destCapacity <code>==sizeof(dest)</code> 1.3316 + * @param pErrorCode Pointer to a UErrorCode variable. 1.3317 + * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. 1.3318 + * If the destCapacity is less than or equal to the length, then the buffer 1.3319 + * contains the truncated name and the returned length indicates the full 1.3320 + * length of the name. 1.3321 + * The length does not include the zero-termination. 1.3322 + * 1.3323 + * @stable ICU 2.2 1.3324 + */ 1.3325 +U_STABLE int32_t U_EXPORT2 1.3326 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); 1.3327 + 1.3328 +#endif 1.3329 + 1.3330 + 1.3331 +U_CDECL_END 1.3332 + 1.3333 +#endif /*_UCHAR*/ 1.3334 +/*eof*/