intl/icu/source/common/uprops.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2002-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uprops.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2002feb24
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Implementations for mostly non-core Unicode character properties
michael@0 17 * stored in uprops.icu.
michael@0 18 *
michael@0 19 * With the APIs implemented here, almost all properties files and
michael@0 20 * their associated implementation files are used from this file,
michael@0 21 * including those for normalization and case mappings.
michael@0 22 */
michael@0 23
michael@0 24 #include "unicode/utypes.h"
michael@0 25 #include "unicode/uchar.h"
michael@0 26 #include "unicode/unorm2.h"
michael@0 27 #include "unicode/uscript.h"
michael@0 28 #include "unicode/ustring.h"
michael@0 29 #include "cstring.h"
michael@0 30 #include "normalizer2impl.h"
michael@0 31 #include "ucln_cmn.h"
michael@0 32 #include "umutex.h"
michael@0 33 #include "ubidi_props.h"
michael@0 34 #include "uprops.h"
michael@0 35 #include "ucase.h"
michael@0 36 #include "ustr_imp.h"
michael@0 37
michael@0 38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 39
michael@0 40 U_NAMESPACE_USE
michael@0 41
michael@0 42 #define GET_BIDI_PROPS() ubidi_getSingleton()
michael@0 43
michael@0 44 /* general properties API functions ----------------------------------------- */
michael@0 45
michael@0 46 struct BinaryProperty;
michael@0 47
michael@0 48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
michael@0 49
michael@0 50 struct BinaryProperty {
michael@0 51 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
michael@0 52 uint32_t mask;
michael@0 53 BinaryPropertyContains *contains;
michael@0 54 };
michael@0 55
michael@0 56 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
michael@0 57 /* systematic, directly stored properties */
michael@0 58 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
michael@0 59 }
michael@0 60
michael@0 61 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
michael@0 62 return ucase_hasBinaryProperty(c, which);
michael@0 63 }
michael@0 64
michael@0 65 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 66 return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
michael@0 67 }
michael@0 68
michael@0 69 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 70 return ubidi_isMirrored(GET_BIDI_PROPS(), c);
michael@0 71 }
michael@0 72
michael@0 73 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 74 return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
michael@0 75 }
michael@0 76
michael@0 77 #if UCONFIG_NO_NORMALIZATION
michael@0 78 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) {
michael@0 79 return FALSE;
michael@0 80 }
michael@0 81 #else
michael@0 82 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 83 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
michael@0 84 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 85 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
michael@0 86 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
michael@0 87 }
michael@0 88 #endif
michael@0 89
michael@0 90 // UCHAR_NF*_INERT properties
michael@0 91 #if UCONFIG_NO_NORMALIZATION
michael@0 92 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
michael@0 93 return FALSE;
michael@0 94 }
michael@0 95 #else
michael@0 96 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
michael@0 97 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 98 const Normalizer2 *norm2=Normalizer2Factory::getInstance(
michael@0 99 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
michael@0 100 return U_SUCCESS(errorCode) && norm2->isInert(c);
michael@0 101 }
michael@0 102 #endif
michael@0 103
michael@0 104 #if UCONFIG_NO_NORMALIZATION
michael@0 105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
michael@0 106 return FALSE;
michael@0 107 }
michael@0 108 #else
michael@0 109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 110 UnicodeString nfd;
michael@0 111 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 112 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
michael@0 113 if(U_FAILURE(errorCode)) {
michael@0 114 return FALSE;
michael@0 115 }
michael@0 116 if(nfcNorm2->getDecomposition(c, nfd)) {
michael@0 117 /* c has a decomposition */
michael@0 118 if(nfd.length()==1) {
michael@0 119 c=nfd[0]; /* single BMP code point */
michael@0 120 } else if(nfd.length()<=U16_MAX_LENGTH &&
michael@0 121 nfd.length()==U16_LENGTH(c=nfd.char32At(0))
michael@0 122 ) {
michael@0 123 /* single supplementary code point */
michael@0 124 } else {
michael@0 125 c=U_SENTINEL;
michael@0 126 }
michael@0 127 } else if(c<0) {
michael@0 128 return FALSE; /* protect against bad input */
michael@0 129 }
michael@0 130 if(c>=0) {
michael@0 131 /* single code point */
michael@0 132 const UCaseProps *csp=ucase_getSingleton();
michael@0 133 const UChar *resultString;
michael@0 134 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
michael@0 135 } else {
michael@0 136 /* guess some large but stack-friendly capacity */
michael@0 137 UChar dest[2*UCASE_MAX_STRING_LENGTH];
michael@0 138 int32_t destLength;
michael@0 139 destLength=u_strFoldCase(dest, LENGTHOF(dest),
michael@0 140 nfd.getBuffer(), nfd.length(),
michael@0 141 U_FOLD_CASE_DEFAULT, &errorCode);
michael@0 142 return (UBool)(U_SUCCESS(errorCode) &&
michael@0 143 0!=u_strCompare(nfd.getBuffer(), nfd.length(),
michael@0 144 dest, destLength, FALSE));
michael@0 145 }
michael@0 146 }
michael@0 147 #endif
michael@0 148
michael@0 149 #if UCONFIG_NO_NORMALIZATION
michael@0 150 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) {
michael@0 151 return FALSE;
michael@0 152 }
michael@0 153 #else
michael@0 154 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 155 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 156 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
michael@0 157 if(U_FAILURE(errorCode)) {
michael@0 158 return FALSE;
michael@0 159 }
michael@0 160 UnicodeString src(c);
michael@0 161 UnicodeString dest;
michael@0 162 {
michael@0 163 // The ReorderingBuffer must be in a block because its destructor
michael@0 164 // needs to release dest's buffer before we look at its contents.
michael@0 165 ReorderingBuffer buffer(*kcf, dest);
michael@0 166 // Small destCapacity for NFKC_CF(c).
michael@0 167 if(buffer.init(5, errorCode)) {
michael@0 168 const UChar *srcArray=src.getBuffer();
michael@0 169 kcf->compose(srcArray, srcArray+src.length(), FALSE,
michael@0 170 TRUE, buffer, errorCode);
michael@0 171 }
michael@0 172 }
michael@0 173 return U_SUCCESS(errorCode) && dest!=src;
michael@0 174 }
michael@0 175 #endif
michael@0 176
michael@0 177 #if UCONFIG_NO_NORMALIZATION
michael@0 178 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) {
michael@0 179 return FALSE;
michael@0 180 }
michael@0 181 #else
michael@0 182 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 183 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 184 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
michael@0 185 return
michael@0 186 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
michael@0 187 impl->isCanonSegmentStarter(c);
michael@0 188 }
michael@0 189 #endif
michael@0 190
michael@0 191 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 192 return u_isalnumPOSIX(c);
michael@0 193 }
michael@0 194
michael@0 195 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 196 return u_isblank(c);
michael@0 197 }
michael@0 198
michael@0 199 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 200 return u_isgraphPOSIX(c);
michael@0 201 }
michael@0 202
michael@0 203 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 204 return u_isprintPOSIX(c);
michael@0 205 }
michael@0 206
michael@0 207 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 208 return u_isxdigit(c);
michael@0 209 }
michael@0 210
michael@0 211 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
michael@0 212 /*
michael@0 213 * column and mask values for binary properties from u_getUnicodeProperties().
michael@0 214 * Must be in order of corresponding UProperty,
michael@0 215 * and there must be exactly one entry per binary UProperty.
michael@0 216 *
michael@0 217 * Properties with mask==0 are handled in code.
michael@0 218 * For them, column is the UPropertySource value.
michael@0 219 */
michael@0 220 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains },
michael@0 221 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
michael@0 222 { UPROPS_SRC_BIDI, 0, isBidiControl },
michael@0 223 { UPROPS_SRC_BIDI, 0, isMirrored },
michael@0 224 { 1, U_MASK(UPROPS_DASH), defaultContains },
michael@0 225 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
michael@0 226 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains },
michael@0 227 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains },
michael@0 228 { 1, U_MASK(UPROPS_EXTENDER), defaultContains },
michael@0 229 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion },
michael@0 230 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
michael@0 231 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
michael@0 232 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
michael@0 233 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains },
michael@0 234 { 1, U_MASK(UPROPS_HYPHEN), defaultContains },
michael@0 235 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains },
michael@0 236 { 1, U_MASK(UPROPS_ID_START), defaultContains },
michael@0 237 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
michael@0 238 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
michael@0 239 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
michael@0 240 { UPROPS_SRC_BIDI, 0, isJoinControl },
michael@0 241 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
michael@0 242 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE
michael@0 243 { 1, U_MASK(UPROPS_MATH), defaultContains },
michael@0 244 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
michael@0 245 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
michael@0 246 { 1, U_MASK(UPROPS_RADICAL), defaultContains },
michael@0 247 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED
michael@0 248 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
michael@0 249 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
michael@0 250 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE
michael@0 251 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains },
michael@0 252 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains },
michael@0 253 { 1, U_MASK(UPROPS_XID_START), defaultContains },
michael@0 254 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE
michael@0 255 { 1, U_MASK(UPROPS_S_TERM), defaultContains },
michael@0 256 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
michael@0 257 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT
michael@0 258 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT
michael@0 259 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT
michael@0 260 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT
michael@0 261 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
michael@0 262 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
michael@0 263 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
michael@0 264 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum },
michael@0 265 { UPROPS_SRC_CHAR, 0, isPOSIX_blank },
michael@0 266 { UPROPS_SRC_CHAR, 0, isPOSIX_graph },
michael@0 267 { UPROPS_SRC_CHAR, 0, isPOSIX_print },
michael@0 268 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit },
michael@0 269 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED
michael@0 270 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE
michael@0 271 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED
michael@0 272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED
michael@0 273 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
michael@0 274 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
michael@0 275 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
michael@0 276 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }
michael@0 277 };
michael@0 278
michael@0 279 U_CAPI UBool U_EXPORT2
michael@0 280 u_hasBinaryProperty(UChar32 c, UProperty which) {
michael@0 281 /* c is range-checked in the functions that are called from here */
michael@0 282 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
michael@0 283 /* not a known binary property */
michael@0 284 return FALSE;
michael@0 285 } else {
michael@0 286 const BinaryProperty &prop=binProps[which];
michael@0 287 return prop.contains(prop, c, which);
michael@0 288 }
michael@0 289 }
michael@0 290
michael@0 291 struct IntProperty;
michael@0 292
michael@0 293 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
michael@0 294 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
michael@0 295
michael@0 296 struct IntProperty {
michael@0 297 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0
michael@0 298 uint32_t mask;
michael@0 299 int32_t shift; // =maxValue if getMaxValueFromShift() is used
michael@0 300 IntPropertyGetValue *getValue;
michael@0 301 IntPropertyGetMaxValue *getMaxValue;
michael@0 302 };
michael@0 303
michael@0 304 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
michael@0 305 /* systematic, directly stored properties */
michael@0 306 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
michael@0 307 }
michael@0 308
michael@0 309 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
michael@0 310 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
michael@0 311 }
michael@0 312
michael@0 313 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
michael@0 314 return prop.shift;
michael@0 315 }
michael@0 316
michael@0 317 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 318 return (int32_t)u_charDirection(c);
michael@0 319 }
michael@0 320
michael@0 321 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 322 return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
michael@0 323 }
michael@0 324
michael@0 325 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
michael@0 326 return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
michael@0 327 }
michael@0 328
michael@0 329 #if UCONFIG_NO_NORMALIZATION
michael@0 330 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
michael@0 331 return 0;
michael@0 332 }
michael@0 333 #else
michael@0 334 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 335 return u_getCombiningClass(c);
michael@0 336 }
michael@0 337 #endif
michael@0 338
michael@0 339 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 340 return (int32_t)u_charType(c);
michael@0 341 }
michael@0 342
michael@0 343 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 344 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
michael@0 345 }
michael@0 346
michael@0 347 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 348 return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
michael@0 349 }
michael@0 350
michael@0 351 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 352 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c));
michael@0 353 return UPROPS_NTV_GET_TYPE(ntv);
michael@0 354 }
michael@0 355
michael@0 356 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 357 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 358 return (int32_t)uscript_getScript(c, &errorCode);
michael@0 359 }
michael@0 360
michael@0 361 /*
michael@0 362 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
michael@0 363 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
michael@0 364 */
michael@0 365 static const UHangulSyllableType gcbToHst[]={
michael@0 366 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
michael@0 367 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */
michael@0 368 U_HST_NOT_APPLICABLE, /* U_GCB_CR */
michael@0 369 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */
michael@0 370 U_HST_LEADING_JAMO, /* U_GCB_L */
michael@0 371 U_HST_NOT_APPLICABLE, /* U_GCB_LF */
michael@0 372 U_HST_LV_SYLLABLE, /* U_GCB_LV */
michael@0 373 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */
michael@0 374 U_HST_TRAILING_JAMO, /* U_GCB_T */
michael@0 375 U_HST_VOWEL_JAMO /* U_GCB_V */
michael@0 376 /*
michael@0 377 * Omit GCB values beyond what we need for hst.
michael@0 378 * The code below checks for the array length.
michael@0 379 */
michael@0 380 };
michael@0 381
michael@0 382 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 383 /* see comments on gcbToHst[] above */
michael@0 384 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
michael@0 385 if(gcb<LENGTHOF(gcbToHst)) {
michael@0 386 return gcbToHst[gcb];
michael@0 387 } else {
michael@0 388 return U_HST_NOT_APPLICABLE;
michael@0 389 }
michael@0 390 }
michael@0 391
michael@0 392 #if UCONFIG_NO_NORMALIZATION
michael@0 393 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
michael@0 394 return 0;
michael@0 395 }
michael@0 396 #else
michael@0 397 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
michael@0 398 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
michael@0 399 }
michael@0 400 #endif
michael@0 401
michael@0 402 #if UCONFIG_NO_NORMALIZATION
michael@0 403 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
michael@0 404 return 0;
michael@0 405 }
michael@0 406 #else
michael@0 407 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 408 return unorm_getFCD16(c)>>8;
michael@0 409 }
michael@0 410 #endif
michael@0 411
michael@0 412 #if UCONFIG_NO_NORMALIZATION
michael@0 413 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
michael@0 414 return 0;
michael@0 415 }
michael@0 416 #else
michael@0 417 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
michael@0 418 return unorm_getFCD16(c)&0xff;
michael@0 419 }
michael@0 420 #endif
michael@0 421
michael@0 422 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
michael@0 423 /*
michael@0 424 * column, mask and shift values for int-value properties from u_getUnicodeProperties().
michael@0 425 * Must be in order of corresponding UProperty,
michael@0 426 * and there must be exactly one entry per int UProperty.
michael@0 427 *
michael@0 428 * Properties with mask==0 are handled in code.
michael@0 429 * For them, column is the UPropertySource value.
michael@0 430 */
michael@0 431 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue },
michael@0 432 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 433 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift },
michael@0 434 { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue },
michael@0 435 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 436 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
michael@0 437 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue },
michael@0 438 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue },
michael@0 439 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 440 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift },
michael@0 441 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue },
michael@0 442 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift },
michael@0 443 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
michael@0 444 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
michael@0 445 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
michael@0 446 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift },
michael@0 447 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
michael@0 448 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
michael@0 449 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
michael@0 450 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift },
michael@0 451 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift },
michael@0 452 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift },
michael@0 453 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 454 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 455 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue },
michael@0 456 { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue },
michael@0 457 };
michael@0 458
michael@0 459 U_CAPI int32_t U_EXPORT2
michael@0 460 u_getIntPropertyValue(UChar32 c, UProperty which) {
michael@0 461 if(which<UCHAR_INT_START) {
michael@0 462 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
michael@0 463 const BinaryProperty &prop=binProps[which];
michael@0 464 return prop.contains(prop, c, which);
michael@0 465 }
michael@0 466 } else if(which<UCHAR_INT_LIMIT) {
michael@0 467 const IntProperty &prop=intProps[which-UCHAR_INT_START];
michael@0 468 return prop.getValue(prop, c, which);
michael@0 469 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
michael@0 470 return U_MASK(u_charType(c));
michael@0 471 }
michael@0 472 return 0; // undefined
michael@0 473 }
michael@0 474
michael@0 475 U_CAPI int32_t U_EXPORT2
michael@0 476 u_getIntPropertyMinValue(UProperty /*which*/) {
michael@0 477 return 0; /* all binary/enum/int properties have a minimum value of 0 */
michael@0 478 }
michael@0 479
michael@0 480 U_CAPI int32_t U_EXPORT2
michael@0 481 u_getIntPropertyMaxValue(UProperty which) {
michael@0 482 if(which<UCHAR_INT_START) {
michael@0 483 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
michael@0 484 return 1; // maximum TRUE for all binary properties
michael@0 485 }
michael@0 486 } else if(which<UCHAR_INT_LIMIT) {
michael@0 487 const IntProperty &prop=intProps[which-UCHAR_INT_START];
michael@0 488 return prop.getMaxValue(prop, which);
michael@0 489 }
michael@0 490 return -1; // undefined
michael@0 491 }
michael@0 492
michael@0 493 U_CFUNC UPropertySource U_EXPORT2
michael@0 494 uprops_getSource(UProperty which) {
michael@0 495 if(which<UCHAR_BINARY_START) {
michael@0 496 return UPROPS_SRC_NONE; /* undefined */
michael@0 497 } else if(which<UCHAR_BINARY_LIMIT) {
michael@0 498 const BinaryProperty &prop=binProps[which];
michael@0 499 if(prop.mask!=0) {
michael@0 500 return UPROPS_SRC_PROPSVEC;
michael@0 501 } else {
michael@0 502 return (UPropertySource)prop.column;
michael@0 503 }
michael@0 504 } else if(which<UCHAR_INT_START) {
michael@0 505 return UPROPS_SRC_NONE; /* undefined */
michael@0 506 } else if(which<UCHAR_INT_LIMIT) {
michael@0 507 const IntProperty &prop=intProps[which-UCHAR_INT_START];
michael@0 508 if(prop.mask!=0) {
michael@0 509 return UPROPS_SRC_PROPSVEC;
michael@0 510 } else {
michael@0 511 return (UPropertySource)prop.column;
michael@0 512 }
michael@0 513 } else if(which<UCHAR_STRING_START) {
michael@0 514 switch(which) {
michael@0 515 case UCHAR_GENERAL_CATEGORY_MASK:
michael@0 516 case UCHAR_NUMERIC_VALUE:
michael@0 517 return UPROPS_SRC_CHAR;
michael@0 518
michael@0 519 default:
michael@0 520 return UPROPS_SRC_NONE;
michael@0 521 }
michael@0 522 } else if(which<UCHAR_STRING_LIMIT) {
michael@0 523 switch(which) {
michael@0 524 case UCHAR_AGE:
michael@0 525 return UPROPS_SRC_PROPSVEC;
michael@0 526
michael@0 527 case UCHAR_BIDI_MIRRORING_GLYPH:
michael@0 528 return UPROPS_SRC_BIDI;
michael@0 529
michael@0 530 case UCHAR_CASE_FOLDING:
michael@0 531 case UCHAR_LOWERCASE_MAPPING:
michael@0 532 case UCHAR_SIMPLE_CASE_FOLDING:
michael@0 533 case UCHAR_SIMPLE_LOWERCASE_MAPPING:
michael@0 534 case UCHAR_SIMPLE_TITLECASE_MAPPING:
michael@0 535 case UCHAR_SIMPLE_UPPERCASE_MAPPING:
michael@0 536 case UCHAR_TITLECASE_MAPPING:
michael@0 537 case UCHAR_UPPERCASE_MAPPING:
michael@0 538 return UPROPS_SRC_CASE;
michael@0 539
michael@0 540 case UCHAR_ISO_COMMENT:
michael@0 541 case UCHAR_NAME:
michael@0 542 case UCHAR_UNICODE_1_NAME:
michael@0 543 return UPROPS_SRC_NAMES;
michael@0 544
michael@0 545 default:
michael@0 546 return UPROPS_SRC_NONE;
michael@0 547 }
michael@0 548 } else {
michael@0 549 switch(which) {
michael@0 550 case UCHAR_SCRIPT_EXTENSIONS:
michael@0 551 return UPROPS_SRC_PROPSVEC;
michael@0 552 default:
michael@0 553 return UPROPS_SRC_NONE; /* undefined */
michael@0 554 }
michael@0 555 }
michael@0 556 }
michael@0 557
michael@0 558 #if !UCONFIG_NO_NORMALIZATION
michael@0 559
michael@0 560 U_CAPI int32_t U_EXPORT2
michael@0 561 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
michael@0 562 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 563 return 0;
michael@0 564 }
michael@0 565 if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
michael@0 566 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 567 return 0;
michael@0 568 }
michael@0 569 // Compute the FC_NFKC_Closure on the fly:
michael@0 570 // We have the API for complete coverage of Unicode properties, although
michael@0 571 // this value by itself is not useful via API.
michael@0 572 // (What could be useful is a custom normalization table that combines
michael@0 573 // case folding and NFKC.)
michael@0 574 // For the derivation, see Unicode's DerivedNormalizationProps.txt.
michael@0 575 const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
michael@0 576 const UCaseProps *csp=ucase_getSingleton();
michael@0 577 if(U_FAILURE(*pErrorCode)) {
michael@0 578 return 0;
michael@0 579 }
michael@0 580 // first: b = NFKC(Fold(a))
michael@0 581 UnicodeString folded1String;
michael@0 582 const UChar *folded1;
michael@0 583 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT);
michael@0 584 if(folded1Length<0) {
michael@0 585 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
michael@0 586 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
michael@0 587 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
michael@0 588 }
michael@0 589 folded1String.setTo(c);
michael@0 590 } else {
michael@0 591 if(folded1Length>UCASE_MAX_STRING_LENGTH) {
michael@0 592 folded1String.setTo(folded1Length);
michael@0 593 } else {
michael@0 594 folded1String.setTo(FALSE, folded1, folded1Length);
michael@0 595 }
michael@0 596 }
michael@0 597 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
michael@0 598 // second: c = NFKC(Fold(b))
michael@0 599 UnicodeString folded2String(kc1);
michael@0 600 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
michael@0 601 // if (c != b) add the mapping from a to c
michael@0 602 if(U_FAILURE(*pErrorCode) || kc1==kc2) {
michael@0 603 return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
michael@0 604 } else {
michael@0 605 return kc2.extract(dest, destCapacity, *pErrorCode);
michael@0 606 }
michael@0 607 }
michael@0 608
michael@0 609 #endif

mercurial