1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uprops.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,609 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2002-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: uprops.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2002feb24 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Implementations for mostly non-core Unicode character properties 1.20 +* stored in uprops.icu. 1.21 +* 1.22 +* With the APIs implemented here, almost all properties files and 1.23 +* their associated implementation files are used from this file, 1.24 +* including those for normalization and case mappings. 1.25 +*/ 1.26 + 1.27 +#include "unicode/utypes.h" 1.28 +#include "unicode/uchar.h" 1.29 +#include "unicode/unorm2.h" 1.30 +#include "unicode/uscript.h" 1.31 +#include "unicode/ustring.h" 1.32 +#include "cstring.h" 1.33 +#include "normalizer2impl.h" 1.34 +#include "ucln_cmn.h" 1.35 +#include "umutex.h" 1.36 +#include "ubidi_props.h" 1.37 +#include "uprops.h" 1.38 +#include "ucase.h" 1.39 +#include "ustr_imp.h" 1.40 + 1.41 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.42 + 1.43 +U_NAMESPACE_USE 1.44 + 1.45 +#define GET_BIDI_PROPS() ubidi_getSingleton() 1.46 + 1.47 +/* general properties API functions ----------------------------------------- */ 1.48 + 1.49 +struct BinaryProperty; 1.50 + 1.51 +typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); 1.52 + 1.53 +struct BinaryProperty { 1.54 + int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 1.55 + uint32_t mask; 1.56 + BinaryPropertyContains *contains; 1.57 +}; 1.58 + 1.59 +static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { 1.60 + /* systematic, directly stored properties */ 1.61 + return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; 1.62 +} 1.63 + 1.64 +static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 1.65 + return ucase_hasBinaryProperty(c, which); 1.66 +} 1.67 + 1.68 +static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.69 + return ubidi_isBidiControl(GET_BIDI_PROPS(), c); 1.70 +} 1.71 + 1.72 +static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.73 + return ubidi_isMirrored(GET_BIDI_PROPS(), c); 1.74 +} 1.75 + 1.76 +static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.77 + return ubidi_isJoinControl(GET_BIDI_PROPS(), c); 1.78 +} 1.79 + 1.80 +#if UCONFIG_NO_NORMALIZATION 1.81 +static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { 1.82 + return FALSE; 1.83 +} 1.84 +#else 1.85 +static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.86 + // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 1.87 + UErrorCode errorCode=U_ZERO_ERROR; 1.88 + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 1.89 + return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); 1.90 +} 1.91 +#endif 1.92 + 1.93 +// UCHAR_NF*_INERT properties 1.94 +#if UCONFIG_NO_NORMALIZATION 1.95 +static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { 1.96 + return FALSE; 1.97 +} 1.98 +#else 1.99 +static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 1.100 + UErrorCode errorCode=U_ZERO_ERROR; 1.101 + const Normalizer2 *norm2=Normalizer2Factory::getInstance( 1.102 + (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); 1.103 + return U_SUCCESS(errorCode) && norm2->isInert(c); 1.104 +} 1.105 +#endif 1.106 + 1.107 +#if UCONFIG_NO_NORMALIZATION 1.108 +static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { 1.109 + return FALSE; 1.110 +} 1.111 +#else 1.112 +static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.113 + UnicodeString nfd; 1.114 + UErrorCode errorCode=U_ZERO_ERROR; 1.115 + const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); 1.116 + if(U_FAILURE(errorCode)) { 1.117 + return FALSE; 1.118 + } 1.119 + if(nfcNorm2->getDecomposition(c, nfd)) { 1.120 + /* c has a decomposition */ 1.121 + if(nfd.length()==1) { 1.122 + c=nfd[0]; /* single BMP code point */ 1.123 + } else if(nfd.length()<=U16_MAX_LENGTH && 1.124 + nfd.length()==U16_LENGTH(c=nfd.char32At(0)) 1.125 + ) { 1.126 + /* single supplementary code point */ 1.127 + } else { 1.128 + c=U_SENTINEL; 1.129 + } 1.130 + } else if(c<0) { 1.131 + return FALSE; /* protect against bad input */ 1.132 + } 1.133 + if(c>=0) { 1.134 + /* single code point */ 1.135 + const UCaseProps *csp=ucase_getSingleton(); 1.136 + const UChar *resultString; 1.137 + return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); 1.138 + } else { 1.139 + /* guess some large but stack-friendly capacity */ 1.140 + UChar dest[2*UCASE_MAX_STRING_LENGTH]; 1.141 + int32_t destLength; 1.142 + destLength=u_strFoldCase(dest, LENGTHOF(dest), 1.143 + nfd.getBuffer(), nfd.length(), 1.144 + U_FOLD_CASE_DEFAULT, &errorCode); 1.145 + return (UBool)(U_SUCCESS(errorCode) && 1.146 + 0!=u_strCompare(nfd.getBuffer(), nfd.length(), 1.147 + dest, destLength, FALSE)); 1.148 + } 1.149 +} 1.150 +#endif 1.151 + 1.152 +#if UCONFIG_NO_NORMALIZATION 1.153 +static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { 1.154 + return FALSE; 1.155 +} 1.156 +#else 1.157 +static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.158 + UErrorCode errorCode=U_ZERO_ERROR; 1.159 + const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); 1.160 + if(U_FAILURE(errorCode)) { 1.161 + return FALSE; 1.162 + } 1.163 + UnicodeString src(c); 1.164 + UnicodeString dest; 1.165 + { 1.166 + // The ReorderingBuffer must be in a block because its destructor 1.167 + // needs to release dest's buffer before we look at its contents. 1.168 + ReorderingBuffer buffer(*kcf, dest); 1.169 + // Small destCapacity for NFKC_CF(c). 1.170 + if(buffer.init(5, errorCode)) { 1.171 + const UChar *srcArray=src.getBuffer(); 1.172 + kcf->compose(srcArray, srcArray+src.length(), FALSE, 1.173 + TRUE, buffer, errorCode); 1.174 + } 1.175 + } 1.176 + return U_SUCCESS(errorCode) && dest!=src; 1.177 +} 1.178 +#endif 1.179 + 1.180 +#if UCONFIG_NO_NORMALIZATION 1.181 +static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { 1.182 + return FALSE; 1.183 +} 1.184 +#else 1.185 +static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.186 + UErrorCode errorCode=U_ZERO_ERROR; 1.187 + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 1.188 + return 1.189 + U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && 1.190 + impl->isCanonSegmentStarter(c); 1.191 +} 1.192 +#endif 1.193 + 1.194 +static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.195 + return u_isalnumPOSIX(c); 1.196 +} 1.197 + 1.198 +static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.199 + return u_isblank(c); 1.200 +} 1.201 + 1.202 +static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.203 + return u_isgraphPOSIX(c); 1.204 +} 1.205 + 1.206 +static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.207 + return u_isprintPOSIX(c); 1.208 +} 1.209 + 1.210 +static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.211 + return u_isxdigit(c); 1.212 +} 1.213 + 1.214 +static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ 1.215 + /* 1.216 + * column and mask values for binary properties from u_getUnicodeProperties(). 1.217 + * Must be in order of corresponding UProperty, 1.218 + * and there must be exactly one entry per binary UProperty. 1.219 + * 1.220 + * Properties with mask==0 are handled in code. 1.221 + * For them, column is the UPropertySource value. 1.222 + */ 1.223 + { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, 1.224 + { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, 1.225 + { UPROPS_SRC_BIDI, 0, isBidiControl }, 1.226 + { UPROPS_SRC_BIDI, 0, isMirrored }, 1.227 + { 1, U_MASK(UPROPS_DASH), defaultContains }, 1.228 + { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, 1.229 + { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, 1.230 + { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, 1.231 + { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, 1.232 + { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, 1.233 + { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, 1.234 + { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, 1.235 + { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, 1.236 + { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, 1.237 + { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, 1.238 + { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, 1.239 + { 1, U_MASK(UPROPS_ID_START), defaultContains }, 1.240 + { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, 1.241 + { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, 1.242 + { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, 1.243 + { UPROPS_SRC_BIDI, 0, isJoinControl }, 1.244 + { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, 1.245 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE 1.246 + { 1, U_MASK(UPROPS_MATH), defaultContains }, 1.247 + { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, 1.248 + { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, 1.249 + { 1, U_MASK(UPROPS_RADICAL), defaultContains }, 1.250 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED 1.251 + { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, 1.252 + { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, 1.253 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE 1.254 + { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, 1.255 + { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, 1.256 + { 1, U_MASK(UPROPS_XID_START), defaultContains }, 1.257 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE 1.258 + { 1, U_MASK(UPROPS_S_TERM), defaultContains }, 1.259 + { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, 1.260 + { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT 1.261 + { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT 1.262 + { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT 1.263 + { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT 1.264 + { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, 1.265 + { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, 1.266 + { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, 1.267 + { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, 1.268 + { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, 1.269 + { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, 1.270 + { UPROPS_SRC_CHAR, 0, isPOSIX_print }, 1.271 + { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, 1.272 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED 1.273 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE 1.274 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED 1.275 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED 1.276 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED 1.277 + { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, 1.278 + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED 1.279 + { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } 1.280 +}; 1.281 + 1.282 +U_CAPI UBool U_EXPORT2 1.283 +u_hasBinaryProperty(UChar32 c, UProperty which) { 1.284 + /* c is range-checked in the functions that are called from here */ 1.285 + if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { 1.286 + /* not a known binary property */ 1.287 + return FALSE; 1.288 + } else { 1.289 + const BinaryProperty &prop=binProps[which]; 1.290 + return prop.contains(prop, c, which); 1.291 + } 1.292 +} 1.293 + 1.294 +struct IntProperty; 1.295 + 1.296 +typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); 1.297 +typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); 1.298 + 1.299 +struct IntProperty { 1.300 + int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 1.301 + uint32_t mask; 1.302 + int32_t shift; // =maxValue if getMaxValueFromShift() is used 1.303 + IntPropertyGetValue *getValue; 1.304 + IntPropertyGetMaxValue *getMaxValue; 1.305 +}; 1.306 + 1.307 +static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { 1.308 + /* systematic, directly stored properties */ 1.309 + return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; 1.310 +} 1.311 + 1.312 +static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { 1.313 + return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; 1.314 +} 1.315 + 1.316 +static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { 1.317 + return prop.shift; 1.318 +} 1.319 + 1.320 +static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.321 + return (int32_t)u_charDirection(c); 1.322 +} 1.323 + 1.324 +static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.325 + return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); 1.326 +} 1.327 + 1.328 +static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { 1.329 + return ubidi_getMaxValue(GET_BIDI_PROPS(), which); 1.330 +} 1.331 + 1.332 +#if UCONFIG_NO_NORMALIZATION 1.333 +static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { 1.334 + return 0; 1.335 +} 1.336 +#else 1.337 +static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.338 + return u_getCombiningClass(c); 1.339 +} 1.340 +#endif 1.341 + 1.342 +static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.343 + return (int32_t)u_charType(c); 1.344 +} 1.345 + 1.346 +static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.347 + return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); 1.348 +} 1.349 + 1.350 +static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.351 + return ubidi_getJoiningType(GET_BIDI_PROPS(), c); 1.352 +} 1.353 + 1.354 +static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.355 + int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); 1.356 + return UPROPS_NTV_GET_TYPE(ntv); 1.357 +} 1.358 + 1.359 +static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.360 + UErrorCode errorCode=U_ZERO_ERROR; 1.361 + return (int32_t)uscript_getScript(c, &errorCode); 1.362 +} 1.363 + 1.364 +/* 1.365 + * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 1.366 + * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 1.367 + */ 1.368 +static const UHangulSyllableType gcbToHst[]={ 1.369 + U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ 1.370 + U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ 1.371 + U_HST_NOT_APPLICABLE, /* U_GCB_CR */ 1.372 + U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ 1.373 + U_HST_LEADING_JAMO, /* U_GCB_L */ 1.374 + U_HST_NOT_APPLICABLE, /* U_GCB_LF */ 1.375 + U_HST_LV_SYLLABLE, /* U_GCB_LV */ 1.376 + U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ 1.377 + U_HST_TRAILING_JAMO, /* U_GCB_T */ 1.378 + U_HST_VOWEL_JAMO /* U_GCB_V */ 1.379 + /* 1.380 + * Omit GCB values beyond what we need for hst. 1.381 + * The code below checks for the array length. 1.382 + */ 1.383 +}; 1.384 + 1.385 +static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.386 + /* see comments on gcbToHst[] above */ 1.387 + int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; 1.388 + if(gcb<LENGTHOF(gcbToHst)) { 1.389 + return gcbToHst[gcb]; 1.390 + } else { 1.391 + return U_HST_NOT_APPLICABLE; 1.392 + } 1.393 +} 1.394 + 1.395 +#if UCONFIG_NO_NORMALIZATION 1.396 +static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { 1.397 + return 0; 1.398 +} 1.399 +#else 1.400 +static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { 1.401 + return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); 1.402 +} 1.403 +#endif 1.404 + 1.405 +#if UCONFIG_NO_NORMALIZATION 1.406 +static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { 1.407 + return 0; 1.408 +} 1.409 +#else 1.410 +static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.411 + return unorm_getFCD16(c)>>8; 1.412 +} 1.413 +#endif 1.414 + 1.415 +#if UCONFIG_NO_NORMALIZATION 1.416 +static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { 1.417 + return 0; 1.418 +} 1.419 +#else 1.420 +static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 1.421 + return unorm_getFCD16(c)&0xff; 1.422 +} 1.423 +#endif 1.424 + 1.425 +static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ 1.426 + /* 1.427 + * column, mask and shift values for int-value properties from u_getUnicodeProperties(). 1.428 + * Must be in order of corresponding UProperty, 1.429 + * and there must be exactly one entry per int UProperty. 1.430 + * 1.431 + * Properties with mask==0 are handled in code. 1.432 + * For them, column is the UPropertySource value. 1.433 + */ 1.434 + { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, 1.435 + { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.436 + { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, 1.437 + { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, 1.438 + { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.439 + { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, 1.440 + { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, 1.441 + { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, 1.442 + { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.443 + { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, 1.444 + { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, 1.445 + { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, 1.446 + // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 1.447 + { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 1.448 + // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 1.449 + { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 1.450 + // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE 1.451 + { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 1.452 + // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE 1.453 + { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 1.454 + { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, 1.455 + { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, 1.456 + { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.457 + { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.458 + { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, 1.459 + { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, 1.460 +}; 1.461 + 1.462 +U_CAPI int32_t U_EXPORT2 1.463 +u_getIntPropertyValue(UChar32 c, UProperty which) { 1.464 + if(which<UCHAR_INT_START) { 1.465 + if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 1.466 + const BinaryProperty &prop=binProps[which]; 1.467 + return prop.contains(prop, c, which); 1.468 + } 1.469 + } else if(which<UCHAR_INT_LIMIT) { 1.470 + const IntProperty &prop=intProps[which-UCHAR_INT_START]; 1.471 + return prop.getValue(prop, c, which); 1.472 + } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { 1.473 + return U_MASK(u_charType(c)); 1.474 + } 1.475 + return 0; // undefined 1.476 +} 1.477 + 1.478 +U_CAPI int32_t U_EXPORT2 1.479 +u_getIntPropertyMinValue(UProperty /*which*/) { 1.480 + return 0; /* all binary/enum/int properties have a minimum value of 0 */ 1.481 +} 1.482 + 1.483 +U_CAPI int32_t U_EXPORT2 1.484 +u_getIntPropertyMaxValue(UProperty which) { 1.485 + if(which<UCHAR_INT_START) { 1.486 + if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 1.487 + return 1; // maximum TRUE for all binary properties 1.488 + } 1.489 + } else if(which<UCHAR_INT_LIMIT) { 1.490 + const IntProperty &prop=intProps[which-UCHAR_INT_START]; 1.491 + return prop.getMaxValue(prop, which); 1.492 + } 1.493 + return -1; // undefined 1.494 +} 1.495 + 1.496 +U_CFUNC UPropertySource U_EXPORT2 1.497 +uprops_getSource(UProperty which) { 1.498 + if(which<UCHAR_BINARY_START) { 1.499 + return UPROPS_SRC_NONE; /* undefined */ 1.500 + } else if(which<UCHAR_BINARY_LIMIT) { 1.501 + const BinaryProperty &prop=binProps[which]; 1.502 + if(prop.mask!=0) { 1.503 + return UPROPS_SRC_PROPSVEC; 1.504 + } else { 1.505 + return (UPropertySource)prop.column; 1.506 + } 1.507 + } else if(which<UCHAR_INT_START) { 1.508 + return UPROPS_SRC_NONE; /* undefined */ 1.509 + } else if(which<UCHAR_INT_LIMIT) { 1.510 + const IntProperty &prop=intProps[which-UCHAR_INT_START]; 1.511 + if(prop.mask!=0) { 1.512 + return UPROPS_SRC_PROPSVEC; 1.513 + } else { 1.514 + return (UPropertySource)prop.column; 1.515 + } 1.516 + } else if(which<UCHAR_STRING_START) { 1.517 + switch(which) { 1.518 + case UCHAR_GENERAL_CATEGORY_MASK: 1.519 + case UCHAR_NUMERIC_VALUE: 1.520 + return UPROPS_SRC_CHAR; 1.521 + 1.522 + default: 1.523 + return UPROPS_SRC_NONE; 1.524 + } 1.525 + } else if(which<UCHAR_STRING_LIMIT) { 1.526 + switch(which) { 1.527 + case UCHAR_AGE: 1.528 + return UPROPS_SRC_PROPSVEC; 1.529 + 1.530 + case UCHAR_BIDI_MIRRORING_GLYPH: 1.531 + return UPROPS_SRC_BIDI; 1.532 + 1.533 + case UCHAR_CASE_FOLDING: 1.534 + case UCHAR_LOWERCASE_MAPPING: 1.535 + case UCHAR_SIMPLE_CASE_FOLDING: 1.536 + case UCHAR_SIMPLE_LOWERCASE_MAPPING: 1.537 + case UCHAR_SIMPLE_TITLECASE_MAPPING: 1.538 + case UCHAR_SIMPLE_UPPERCASE_MAPPING: 1.539 + case UCHAR_TITLECASE_MAPPING: 1.540 + case UCHAR_UPPERCASE_MAPPING: 1.541 + return UPROPS_SRC_CASE; 1.542 + 1.543 + case UCHAR_ISO_COMMENT: 1.544 + case UCHAR_NAME: 1.545 + case UCHAR_UNICODE_1_NAME: 1.546 + return UPROPS_SRC_NAMES; 1.547 + 1.548 + default: 1.549 + return UPROPS_SRC_NONE; 1.550 + } 1.551 + } else { 1.552 + switch(which) { 1.553 + case UCHAR_SCRIPT_EXTENSIONS: 1.554 + return UPROPS_SRC_PROPSVEC; 1.555 + default: 1.556 + return UPROPS_SRC_NONE; /* undefined */ 1.557 + } 1.558 + } 1.559 +} 1.560 + 1.561 +#if !UCONFIG_NO_NORMALIZATION 1.562 + 1.563 +U_CAPI int32_t U_EXPORT2 1.564 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { 1.565 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.566 + return 0; 1.567 + } 1.568 + if(destCapacity<0 || (dest==NULL && destCapacity>0)) { 1.569 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.570 + return 0; 1.571 + } 1.572 + // Compute the FC_NFKC_Closure on the fly: 1.573 + // We have the API for complete coverage of Unicode properties, although 1.574 + // this value by itself is not useful via API. 1.575 + // (What could be useful is a custom normalization table that combines 1.576 + // case folding and NFKC.) 1.577 + // For the derivation, see Unicode's DerivedNormalizationProps.txt. 1.578 + const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); 1.579 + const UCaseProps *csp=ucase_getSingleton(); 1.580 + if(U_FAILURE(*pErrorCode)) { 1.581 + return 0; 1.582 + } 1.583 + // first: b = NFKC(Fold(a)) 1.584 + UnicodeString folded1String; 1.585 + const UChar *folded1; 1.586 + int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); 1.587 + if(folded1Length<0) { 1.588 + const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); 1.589 + if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { 1.590 + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC 1.591 + } 1.592 + folded1String.setTo(c); 1.593 + } else { 1.594 + if(folded1Length>UCASE_MAX_STRING_LENGTH) { 1.595 + folded1String.setTo(folded1Length); 1.596 + } else { 1.597 + folded1String.setTo(FALSE, folded1, folded1Length); 1.598 + } 1.599 + } 1.600 + UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); 1.601 + // second: c = NFKC(Fold(b)) 1.602 + UnicodeString folded2String(kc1); 1.603 + UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); 1.604 + // if (c != b) add the mapping from a to c 1.605 + if(U_FAILURE(*pErrorCode) || kc1==kc2) { 1.606 + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1.607 + } else { 1.608 + return kc2.extract(dest, destCapacity, *pErrorCode); 1.609 + } 1.610 +} 1.611 + 1.612 +#endif