1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uchar.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,727 @@ 1.4 +/* 1.5 +******************************************************************************** 1.6 +* Copyright (C) 1996-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************** 1.9 +* 1.10 +* File UCHAR.C 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 04/02/97 aliu Creation. 1.16 +* 4/15/99 Madhu Updated all the function definitions for C Implementation 1.17 +* 5/20/99 Madhu Added the function u_getVersion() 1.18 +* 8/19/1999 srl Upgraded scripts to Unicode3.0 1.19 +* 11/11/1999 weiv added u_isalnum(), cleaned comments 1.20 +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 1.21 +* 06/20/2000 helena OS/400 port changes; mostly typecast. 1.22 +****************************************************************************** 1.23 +*/ 1.24 + 1.25 +#include "unicode/utypes.h" 1.26 +#include "unicode/uchar.h" 1.27 +#include "unicode/uscript.h" 1.28 +#include "unicode/udata.h" 1.29 +#include "uassert.h" 1.30 +#include "cmemory.h" 1.31 +#include "ucln_cmn.h" 1.32 +#include "utrie2.h" 1.33 +#include "udataswp.h" 1.34 +#include "uprops.h" 1.35 +#include "ustr_imp.h" 1.36 + 1.37 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.38 + 1.39 +/* uchar_props_data.h is machine-generated by genprops --csource */ 1.40 +#define INCLUDED_FROM_UCHAR_C 1.41 +#include "uchar_props_data.h" 1.42 + 1.43 +/* constants and macros for access to the data ------------------------------ */ 1.44 + 1.45 +/* getting a uint32_t properties word from the data */ 1.46 +#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 1.47 + 1.48 +U_CFUNC UBool 1.49 +uprv_haveProperties(UErrorCode *pErrorCode) { 1.50 + if(U_FAILURE(*pErrorCode)) { 1.51 + return FALSE; 1.52 + } 1.53 + return TRUE; 1.54 +} 1.55 + 1.56 +/* API functions ------------------------------------------------------------ */ 1.57 + 1.58 +/* Gets the Unicode character's general category.*/ 1.59 +U_CAPI int8_t U_EXPORT2 1.60 +u_charType(UChar32 c) { 1.61 + uint32_t props; 1.62 + GET_PROPS(c, props); 1.63 + return (int8_t)GET_CATEGORY(props); 1.64 +} 1.65 + 1.66 +/* Enumerate all code points with their general categories. */ 1.67 +struct _EnumTypeCallback { 1.68 + UCharEnumTypeRange *enumRange; 1.69 + const void *context; 1.70 +}; 1.71 + 1.72 +static uint32_t U_CALLCONV 1.73 +_enumTypeValue(const void *context, uint32_t value) { 1.74 + return GET_CATEGORY(value); 1.75 +} 1.76 + 1.77 +static UBool U_CALLCONV 1.78 +_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 1.79 + /* just cast the value to UCharCategory */ 1.80 + return ((struct _EnumTypeCallback *)context)-> 1.81 + enumRange(((struct _EnumTypeCallback *)context)->context, 1.82 + start, end+1, (UCharCategory)value); 1.83 +} 1.84 + 1.85 +U_CAPI void U_EXPORT2 1.86 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 1.87 + struct _EnumTypeCallback callback; 1.88 + 1.89 + if(enumRange==NULL) { 1.90 + return; 1.91 + } 1.92 + 1.93 + callback.enumRange=enumRange; 1.94 + callback.context=context; 1.95 + utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 1.96 +} 1.97 + 1.98 +/* Checks if ch is a lower case letter.*/ 1.99 +U_CAPI UBool U_EXPORT2 1.100 +u_islower(UChar32 c) { 1.101 + uint32_t props; 1.102 + GET_PROPS(c, props); 1.103 + return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 1.104 +} 1.105 + 1.106 +/* Checks if ch is an upper case letter.*/ 1.107 +U_CAPI UBool U_EXPORT2 1.108 +u_isupper(UChar32 c) { 1.109 + uint32_t props; 1.110 + GET_PROPS(c, props); 1.111 + return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 1.112 +} 1.113 + 1.114 +/* Checks if ch is a title case letter; usually upper case letters.*/ 1.115 +U_CAPI UBool U_EXPORT2 1.116 +u_istitle(UChar32 c) { 1.117 + uint32_t props; 1.118 + GET_PROPS(c, props); 1.119 + return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 1.120 +} 1.121 + 1.122 +/* Checks if ch is a decimal digit. */ 1.123 +U_CAPI UBool U_EXPORT2 1.124 +u_isdigit(UChar32 c) { 1.125 + uint32_t props; 1.126 + GET_PROPS(c, props); 1.127 + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 1.128 +} 1.129 + 1.130 +U_CAPI UBool U_EXPORT2 1.131 +u_isxdigit(UChar32 c) { 1.132 + uint32_t props; 1.133 + 1.134 + /* check ASCII and Fullwidth ASCII a-fA-F */ 1.135 + if( 1.136 + (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 1.137 + (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 1.138 + ) { 1.139 + return TRUE; 1.140 + } 1.141 + 1.142 + GET_PROPS(c, props); 1.143 + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 1.144 +} 1.145 + 1.146 +/* Checks if the Unicode character is a letter.*/ 1.147 +U_CAPI UBool U_EXPORT2 1.148 +u_isalpha(UChar32 c) { 1.149 + uint32_t props; 1.150 + GET_PROPS(c, props); 1.151 + return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 1.152 +} 1.153 + 1.154 +U_CAPI UBool U_EXPORT2 1.155 +u_isUAlphabetic(UChar32 c) { 1.156 + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 1.157 +} 1.158 + 1.159 +/* Checks if c is a letter or a decimal digit */ 1.160 +U_CAPI UBool U_EXPORT2 1.161 +u_isalnum(UChar32 c) { 1.162 + uint32_t props; 1.163 + GET_PROPS(c, props); 1.164 + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 1.165 +} 1.166 + 1.167 +/** 1.168 + * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 1.169 + * @internal 1.170 + */ 1.171 +U_CFUNC UBool 1.172 +u_isalnumPOSIX(UChar32 c) { 1.173 + return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 1.174 +} 1.175 + 1.176 +/* Checks if ch is a unicode character with assigned character type.*/ 1.177 +U_CAPI UBool U_EXPORT2 1.178 +u_isdefined(UChar32 c) { 1.179 + uint32_t props; 1.180 + GET_PROPS(c, props); 1.181 + return (UBool)(GET_CATEGORY(props)!=0); 1.182 +} 1.183 + 1.184 +/* Checks if the Unicode character is a base form character that can take a diacritic.*/ 1.185 +U_CAPI UBool U_EXPORT2 1.186 +u_isbase(UChar32 c) { 1.187 + uint32_t props; 1.188 + GET_PROPS(c, props); 1.189 + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 1.190 +} 1.191 + 1.192 +/* Checks if the Unicode character is a control character.*/ 1.193 +U_CAPI UBool U_EXPORT2 1.194 +u_iscntrl(UChar32 c) { 1.195 + uint32_t props; 1.196 + GET_PROPS(c, props); 1.197 + return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 1.198 +} 1.199 + 1.200 +U_CAPI UBool U_EXPORT2 1.201 +u_isISOControl(UChar32 c) { 1.202 + return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 1.203 +} 1.204 + 1.205 +/* Some control characters that are used as space. */ 1.206 +#define IS_THAT_CONTROL_SPACE(c) \ 1.207 + (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 1.208 + 1.209 +/* Java has decided that U+0085 New Line is not whitespace any more. */ 1.210 +#define IS_THAT_ASCII_CONTROL_SPACE(c) \ 1.211 + (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 1.212 + 1.213 +/* Checks if the Unicode character is a space character.*/ 1.214 +U_CAPI UBool U_EXPORT2 1.215 +u_isspace(UChar32 c) { 1.216 + uint32_t props; 1.217 + GET_PROPS(c, props); 1.218 + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 1.219 +} 1.220 + 1.221 +U_CAPI UBool U_EXPORT2 1.222 +u_isJavaSpaceChar(UChar32 c) { 1.223 + uint32_t props; 1.224 + GET_PROPS(c, props); 1.225 + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 1.226 +} 1.227 + 1.228 +/* Checks if the Unicode character is a whitespace character.*/ 1.229 +U_CAPI UBool U_EXPORT2 1.230 +u_isWhitespace(UChar32 c) { 1.231 + uint32_t props; 1.232 + GET_PROPS(c, props); 1.233 + return (UBool)( 1.234 + ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 1.235 + c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 1.236 + IS_THAT_ASCII_CONTROL_SPACE(c) 1.237 + ); 1.238 +} 1.239 + 1.240 +U_CAPI UBool U_EXPORT2 1.241 +u_isblank(UChar32 c) { 1.242 + if((uint32_t)c<=0x9f) { 1.243 + return c==9 || c==0x20; /* TAB or SPACE */ 1.244 + } else { 1.245 + /* Zs */ 1.246 + uint32_t props; 1.247 + GET_PROPS(c, props); 1.248 + return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 1.249 + } 1.250 +} 1.251 + 1.252 +U_CAPI UBool U_EXPORT2 1.253 +u_isUWhiteSpace(UChar32 c) { 1.254 + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 1.255 +} 1.256 + 1.257 +/* Checks if the Unicode character is printable.*/ 1.258 +U_CAPI UBool U_EXPORT2 1.259 +u_isprint(UChar32 c) { 1.260 + uint32_t props; 1.261 + GET_PROPS(c, props); 1.262 + /* comparing ==0 returns FALSE for the categories mentioned */ 1.263 + return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 1.264 +} 1.265 + 1.266 +/** 1.267 + * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 1.268 + * Implements UCHAR_POSIX_PRINT. 1.269 + * @internal 1.270 + */ 1.271 +U_CFUNC UBool 1.272 +u_isprintPOSIX(UChar32 c) { 1.273 + uint32_t props; 1.274 + GET_PROPS(c, props); 1.275 + /* 1.276 + * The only cntrl character in graph+blank is TAB (in blank). 1.277 + * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 1.278 + */ 1.279 + return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 1.280 +} 1.281 + 1.282 +U_CAPI UBool U_EXPORT2 1.283 +u_isgraph(UChar32 c) { 1.284 + uint32_t props; 1.285 + GET_PROPS(c, props); 1.286 + /* comparing ==0 returns FALSE for the categories mentioned */ 1.287 + return (UBool)((CAT_MASK(props)& 1.288 + (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 1.289 + ==0); 1.290 +} 1.291 + 1.292 +/** 1.293 + * Checks if c is in 1.294 + * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 1.295 + * with space=\p{Whitespace} and Control=Cc. 1.296 + * Implements UCHAR_POSIX_GRAPH. 1.297 + * @internal 1.298 + */ 1.299 +U_CFUNC UBool 1.300 +u_isgraphPOSIX(UChar32 c) { 1.301 + uint32_t props; 1.302 + GET_PROPS(c, props); 1.303 + /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 1.304 + /* comparing ==0 returns FALSE for the categories mentioned */ 1.305 + return (UBool)((CAT_MASK(props)& 1.306 + (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 1.307 + ==0); 1.308 +} 1.309 + 1.310 +U_CAPI UBool U_EXPORT2 1.311 +u_ispunct(UChar32 c) { 1.312 + uint32_t props; 1.313 + GET_PROPS(c, props); 1.314 + return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 1.315 +} 1.316 + 1.317 +/* Checks if the Unicode character can start a Unicode identifier.*/ 1.318 +U_CAPI UBool U_EXPORT2 1.319 +u_isIDStart(UChar32 c) { 1.320 + /* same as u_isalpha() */ 1.321 + uint32_t props; 1.322 + GET_PROPS(c, props); 1.323 + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 1.324 +} 1.325 + 1.326 +/* Checks if the Unicode character can be a Unicode identifier part other than starting the 1.327 + identifier.*/ 1.328 +U_CAPI UBool U_EXPORT2 1.329 +u_isIDPart(UChar32 c) { 1.330 + uint32_t props; 1.331 + GET_PROPS(c, props); 1.332 + return (UBool)( 1.333 + (CAT_MASK(props)& 1.334 + (U_GC_ND_MASK|U_GC_NL_MASK| 1.335 + U_GC_L_MASK| 1.336 + U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 1.337 + )!=0 || 1.338 + u_isIDIgnorable(c)); 1.339 +} 1.340 + 1.341 +/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 1.342 +U_CAPI UBool U_EXPORT2 1.343 +u_isIDIgnorable(UChar32 c) { 1.344 + if(c<=0x9f) { 1.345 + return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 1.346 + } else { 1.347 + uint32_t props; 1.348 + GET_PROPS(c, props); 1.349 + return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 1.350 + } 1.351 +} 1.352 + 1.353 +/*Checks if the Unicode character can start a Java identifier.*/ 1.354 +U_CAPI UBool U_EXPORT2 1.355 +u_isJavaIDStart(UChar32 c) { 1.356 + uint32_t props; 1.357 + GET_PROPS(c, props); 1.358 + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 1.359 +} 1.360 + 1.361 +/*Checks if the Unicode character can be a Java identifier part other than starting the 1.362 + * identifier. 1.363 + */ 1.364 +U_CAPI UBool U_EXPORT2 1.365 +u_isJavaIDPart(UChar32 c) { 1.366 + uint32_t props; 1.367 + GET_PROPS(c, props); 1.368 + return (UBool)( 1.369 + (CAT_MASK(props)& 1.370 + (U_GC_ND_MASK|U_GC_NL_MASK| 1.371 + U_GC_L_MASK| 1.372 + U_GC_SC_MASK|U_GC_PC_MASK| 1.373 + U_GC_MC_MASK|U_GC_MN_MASK) 1.374 + )!=0 || 1.375 + u_isIDIgnorable(c)); 1.376 +} 1.377 + 1.378 +U_CAPI int32_t U_EXPORT2 1.379 +u_charDigitValue(UChar32 c) { 1.380 + uint32_t props; 1.381 + int32_t value; 1.382 + GET_PROPS(c, props); 1.383 + value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 1.384 + if(value<=9) { 1.385 + return value; 1.386 + } else { 1.387 + return -1; 1.388 + } 1.389 +} 1.390 + 1.391 +U_CAPI double U_EXPORT2 1.392 +u_getNumericValue(UChar32 c) { 1.393 + uint32_t props; 1.394 + int32_t ntv; 1.395 + GET_PROPS(c, props); 1.396 + ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 1.397 + 1.398 + if(ntv==UPROPS_NTV_NONE) { 1.399 + return U_NO_NUMERIC_VALUE; 1.400 + } else if(ntv<UPROPS_NTV_DIGIT_START) { 1.401 + /* decimal digit */ 1.402 + return ntv-UPROPS_NTV_DECIMAL_START; 1.403 + } else if(ntv<UPROPS_NTV_NUMERIC_START) { 1.404 + /* other digit */ 1.405 + return ntv-UPROPS_NTV_DIGIT_START; 1.406 + } else if(ntv<UPROPS_NTV_FRACTION_START) { 1.407 + /* small integer */ 1.408 + return ntv-UPROPS_NTV_NUMERIC_START; 1.409 + } else if(ntv<UPROPS_NTV_LARGE_START) { 1.410 + /* fraction */ 1.411 + int32_t numerator=(ntv>>4)-12; 1.412 + int32_t denominator=(ntv&0xf)+1; 1.413 + return (double)numerator/denominator; 1.414 + } else if(ntv<UPROPS_NTV_BASE60_START) { 1.415 + /* large, single-significant-digit integer */ 1.416 + double numValue; 1.417 + int32_t mant=(ntv>>5)-14; 1.418 + int32_t exp=(ntv&0x1f)+2; 1.419 + numValue=mant; 1.420 + 1.421 + /* multiply by 10^exp without math.h */ 1.422 + while(exp>=4) { 1.423 + numValue*=10000.; 1.424 + exp-=4; 1.425 + } 1.426 + switch(exp) { 1.427 + case 3: 1.428 + numValue*=1000.; 1.429 + break; 1.430 + case 2: 1.431 + numValue*=100.; 1.432 + break; 1.433 + case 1: 1.434 + numValue*=10.; 1.435 + break; 1.436 + case 0: 1.437 + default: 1.438 + break; 1.439 + } 1.440 + 1.441 + return numValue; 1.442 + } else if(ntv<UPROPS_NTV_RESERVED_START) { 1.443 + /* sexagesimal (base 60) integer */ 1.444 + int32_t numValue=(ntv>>2)-0xbf; 1.445 + int32_t exp=(ntv&3)+1; 1.446 + 1.447 + switch(exp) { 1.448 + case 4: 1.449 + numValue*=60*60*60*60; 1.450 + break; 1.451 + case 3: 1.452 + numValue*=60*60*60; 1.453 + break; 1.454 + case 2: 1.455 + numValue*=60*60; 1.456 + break; 1.457 + case 1: 1.458 + numValue*=60; 1.459 + break; 1.460 + case 0: 1.461 + default: 1.462 + break; 1.463 + } 1.464 + 1.465 + return numValue; 1.466 + } else { 1.467 + /* reserved */ 1.468 + return U_NO_NUMERIC_VALUE; 1.469 + } 1.470 +} 1.471 + 1.472 +U_CAPI int32_t U_EXPORT2 1.473 +u_digit(UChar32 ch, int8_t radix) { 1.474 + int8_t value; 1.475 + if((uint8_t)(radix-2)<=(36-2)) { 1.476 + value=(int8_t)u_charDigitValue(ch); 1.477 + if(value<0) { 1.478 + /* ch is not a decimal digit, try latin letters */ 1.479 + if(ch>=0x61 && ch<=0x7A) { 1.480 + value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 1.481 + } else if(ch>=0x41 && ch<=0x5A) { 1.482 + value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 1.483 + } else if(ch>=0xFF41 && ch<=0xFF5A) { 1.484 + value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 1.485 + } else if(ch>=0xFF21 && ch<=0xFF3A) { 1.486 + value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 1.487 + } 1.488 + } 1.489 + } else { 1.490 + value=-1; /* invalid radix */ 1.491 + } 1.492 + return (int8_t)((value<radix) ? value : -1); 1.493 +} 1.494 + 1.495 +U_CAPI UChar32 U_EXPORT2 1.496 +u_forDigit(int32_t digit, int8_t radix) { 1.497 + if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 1.498 + return 0; 1.499 + } else if(digit<10) { 1.500 + return (UChar32)(0x30+digit); 1.501 + } else { 1.502 + return (UChar32)((0x61-10)+digit); 1.503 + } 1.504 +} 1.505 + 1.506 +/* miscellaneous, and support for uprops.cpp -------------------------------- */ 1.507 + 1.508 +U_CAPI void U_EXPORT2 1.509 +u_getUnicodeVersion(UVersionInfo versionArray) { 1.510 + if(versionArray!=NULL) { 1.511 + uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 1.512 + } 1.513 +} 1.514 + 1.515 +U_CFUNC uint32_t 1.516 +u_getMainProperties(UChar32 c) { 1.517 + uint32_t props; 1.518 + GET_PROPS(c, props); 1.519 + return props; 1.520 +} 1.521 + 1.522 +U_CFUNC uint32_t 1.523 +u_getUnicodeProperties(UChar32 c, int32_t column) { 1.524 + U_ASSERT(column>=0); 1.525 + if(column>=propsVectorsColumns) { 1.526 + return 0; 1.527 + } else { 1.528 + uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 1.529 + return propsVectors[vecIndex+column]; 1.530 + } 1.531 +} 1.532 + 1.533 +U_CFUNC int32_t 1.534 +uprv_getMaxValues(int32_t column) { 1.535 + switch(column) { 1.536 + case 0: 1.537 + return indexes[UPROPS_MAX_VALUES_INDEX]; 1.538 + case 2: 1.539 + return indexes[UPROPS_MAX_VALUES_2_INDEX]; 1.540 + default: 1.541 + return 0; 1.542 + } 1.543 +} 1.544 + 1.545 +U_CAPI void U_EXPORT2 1.546 +u_charAge(UChar32 c, UVersionInfo versionArray) { 1.547 + if(versionArray!=NULL) { 1.548 + uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 1.549 + versionArray[0]=(uint8_t)(version>>4); 1.550 + versionArray[1]=(uint8_t)(version&0xf); 1.551 + versionArray[2]=versionArray[3]=0; 1.552 + } 1.553 +} 1.554 + 1.555 +U_CAPI UScriptCode U_EXPORT2 1.556 +uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 1.557 + uint32_t scriptX; 1.558 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.559 + return USCRIPT_INVALID_CODE; 1.560 + } 1.561 + if((uint32_t)c>0x10ffff) { 1.562 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.563 + return USCRIPT_INVALID_CODE; 1.564 + } 1.565 + scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 1.566 + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 1.567 + return (UScriptCode)scriptX; 1.568 + } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 1.569 + return USCRIPT_COMMON; 1.570 + } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 1.571 + return USCRIPT_INHERITED; 1.572 + } else { 1.573 + return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 1.574 + } 1.575 +} 1.576 + 1.577 +U_CAPI UBool U_EXPORT2 1.578 +uscript_hasScript(UChar32 c, UScriptCode sc) { 1.579 + const uint16_t *scx; 1.580 + uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 1.581 + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 1.582 + return sc==(UScriptCode)scriptX; 1.583 + } 1.584 + 1.585 + scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 1.586 + if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 1.587 + scx=scriptExtensions+scx[1]; 1.588 + } 1.589 + if(sc>=USCRIPT_CODE_LIMIT) { 1.590 + /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ 1.591 + return FALSE; 1.592 + } 1.593 + while(sc>*scx) { 1.594 + ++scx; 1.595 + } 1.596 + return sc==(*scx&0x7fff); 1.597 +} 1.598 + 1.599 +U_CAPI int32_t U_EXPORT2 1.600 +uscript_getScriptExtensions(UChar32 c, 1.601 + UScriptCode *scripts, int32_t capacity, 1.602 + UErrorCode *pErrorCode) { 1.603 + uint32_t scriptX; 1.604 + int32_t length; 1.605 + const uint16_t *scx; 1.606 + uint16_t sx; 1.607 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.608 + return 0; 1.609 + } 1.610 + if(capacity<0 || (capacity>0 && scripts==NULL)) { 1.611 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.612 + return 0; 1.613 + } 1.614 + scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 1.615 + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 1.616 + if(capacity==0) { 1.617 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.618 + } else { 1.619 + scripts[0]=(UScriptCode)scriptX; 1.620 + } 1.621 + return 1; 1.622 + } 1.623 + 1.624 + scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 1.625 + if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 1.626 + scx=scriptExtensions+scx[1]; 1.627 + } 1.628 + length=0; 1.629 + do { 1.630 + sx=*scx++; 1.631 + if(length<capacity) { 1.632 + scripts[length]=(UScriptCode)(sx&0x7fff); 1.633 + } 1.634 + ++length; 1.635 + } while(sx<0x8000); 1.636 + if(length>capacity) { 1.637 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.638 + } 1.639 + return length; 1.640 +} 1.641 + 1.642 +U_CAPI UBlockCode U_EXPORT2 1.643 +ublock_getCode(UChar32 c) { 1.644 + return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 1.645 +} 1.646 + 1.647 +/* property starts for UnicodeSet ------------------------------------------- */ 1.648 + 1.649 +static UBool U_CALLCONV 1.650 +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 1.651 + /* add the start code point to the USet */ 1.652 + const USetAdder *sa=(const USetAdder *)context; 1.653 + sa->add(sa->set, start); 1.654 + return TRUE; 1.655 +} 1.656 + 1.657 +#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 1.658 + 1.659 +U_CFUNC void U_EXPORT2 1.660 +uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 1.661 + if(U_FAILURE(*pErrorCode)) { 1.662 + return; 1.663 + } 1.664 + 1.665 + /* add the start code point of each same-value range of the main trie */ 1.666 + utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 1.667 + 1.668 + /* add code points with hardcoded properties, plus the ones following them */ 1.669 + 1.670 + /* add for u_isblank() */ 1.671 + USET_ADD_CP_AND_NEXT(sa, TAB); 1.672 + 1.673 + /* add for IS_THAT_CONTROL_SPACE() */ 1.674 + sa->add(sa->set, CR+1); /* range TAB..CR */ 1.675 + sa->add(sa->set, 0x1c); 1.676 + sa->add(sa->set, 0x1f+1); 1.677 + USET_ADD_CP_AND_NEXT(sa, NL); 1.678 + 1.679 + /* add for u_isIDIgnorable() what was not added above */ 1.680 + sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 1.681 + sa->add(sa->set, HAIRSP); 1.682 + sa->add(sa->set, RLM+1); 1.683 + sa->add(sa->set, INHSWAP); 1.684 + sa->add(sa->set, NOMDIG+1); 1.685 + USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 1.686 + 1.687 + /* add no-break spaces for u_isWhitespace() what was not added above */ 1.688 + USET_ADD_CP_AND_NEXT(sa, NBSP); 1.689 + USET_ADD_CP_AND_NEXT(sa, FIGURESP); 1.690 + USET_ADD_CP_AND_NEXT(sa, NNBSP); 1.691 + 1.692 + /* add for u_digit() */ 1.693 + sa->add(sa->set, U_a); 1.694 + sa->add(sa->set, U_z+1); 1.695 + sa->add(sa->set, U_A); 1.696 + sa->add(sa->set, U_Z+1); 1.697 + sa->add(sa->set, U_FW_a); 1.698 + sa->add(sa->set, U_FW_z+1); 1.699 + sa->add(sa->set, U_FW_A); 1.700 + sa->add(sa->set, U_FW_Z+1); 1.701 + 1.702 + /* add for u_isxdigit() */ 1.703 + sa->add(sa->set, U_f+1); 1.704 + sa->add(sa->set, U_F+1); 1.705 + sa->add(sa->set, U_FW_f+1); 1.706 + sa->add(sa->set, U_FW_F+1); 1.707 + 1.708 + /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 1.709 + sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 1.710 + sa->add(sa->set, 0xfff0); 1.711 + sa->add(sa->set, 0xfffb+1); 1.712 + sa->add(sa->set, 0xe0000); 1.713 + sa->add(sa->set, 0xe0fff+1); 1.714 + 1.715 + /* add for UCHAR_GRAPHEME_BASE and others */ 1.716 + USET_ADD_CP_AND_NEXT(sa, CGJ); 1.717 +} 1.718 + 1.719 +U_CFUNC void U_EXPORT2 1.720 +upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 1.721 + if(U_FAILURE(*pErrorCode)) { 1.722 + return; 1.723 + } 1.724 + 1.725 + /* add the start code point of each same-value range of the properties vectors trie */ 1.726 + if(propsVectorsColumns>0) { 1.727 + /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ 1.728 + utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 1.729 + } 1.730 +}