intl/icu/source/common/uprops.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2002-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  uprops.cpp
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2002feb24
    14 *   created by: Markus W. Scherer
    15 *
    16 *   Implementations for mostly non-core Unicode character properties
    17 *   stored in uprops.icu.
    18 *
    19 *   With the APIs implemented here, almost all properties files and
    20 *   their associated implementation files are used from this file,
    21 *   including those for normalization and case mappings.
    22 */
    24 #include "unicode/utypes.h"
    25 #include "unicode/uchar.h"
    26 #include "unicode/unorm2.h"
    27 #include "unicode/uscript.h"
    28 #include "unicode/ustring.h"
    29 #include "cstring.h"
    30 #include "normalizer2impl.h"
    31 #include "ucln_cmn.h"
    32 #include "umutex.h"
    33 #include "ubidi_props.h"
    34 #include "uprops.h"
    35 #include "ucase.h"
    36 #include "ustr_imp.h"
    38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    40 U_NAMESPACE_USE
    42 #define GET_BIDI_PROPS() ubidi_getSingleton()
    44 /* general properties API functions ----------------------------------------- */
    46 struct BinaryProperty;
    48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
    50 struct BinaryProperty {
    51     int32_t column;  // SRC_PROPSVEC column, or "source" if mask==0
    52     uint32_t mask;
    53     BinaryPropertyContains *contains;
    54 };
    56 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
    57     /* systematic, directly stored properties */
    58     return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
    59 }
    61 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
    62     return ucase_hasBinaryProperty(c, which);
    63 }
    65 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    66     return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
    67 }
    69 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    70     return ubidi_isMirrored(GET_BIDI_PROPS(), c);
    71 }
    73 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    74     return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
    75 }
    77 #if UCONFIG_NO_NORMALIZATION
    78 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) {
    79     return FALSE;
    80 }
    81 #else
    82 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    83     // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
    84     UErrorCode errorCode=U_ZERO_ERROR;
    85     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    86     return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
    87 }
    88 #endif
    90 // UCHAR_NF*_INERT properties
    91 #if UCONFIG_NO_NORMALIZATION
    92 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
    93     return FALSE;
    94 }
    95 #else
    96 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
    97     UErrorCode errorCode=U_ZERO_ERROR;
    98     const Normalizer2 *norm2=Normalizer2Factory::getInstance(
    99         (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode);
   100     return U_SUCCESS(errorCode) && norm2->isInert(c);
   101 }
   102 #endif
   104 #if UCONFIG_NO_NORMALIZATION
   105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
   106     return FALSE;
   107 }
   108 #else
   109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   110     UnicodeString nfd;
   111     UErrorCode errorCode=U_ZERO_ERROR;
   112     const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode);
   113     if(U_FAILURE(errorCode)) {
   114         return FALSE;
   115     }
   116     if(nfcNorm2->getDecomposition(c, nfd)) {
   117         /* c has a decomposition */
   118         if(nfd.length()==1) {
   119             c=nfd[0];  /* single BMP code point */
   120         } else if(nfd.length()<=U16_MAX_LENGTH &&
   121                   nfd.length()==U16_LENGTH(c=nfd.char32At(0))
   122         ) {
   123             /* single supplementary code point */
   124         } else {
   125             c=U_SENTINEL;
   126         }
   127     } else if(c<0) {
   128         return FALSE;  /* protect against bad input */
   129     }
   130     if(c>=0) {
   131         /* single code point */
   132         const UCaseProps *csp=ucase_getSingleton();
   133         const UChar *resultString;
   134         return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0);
   135     } else {
   136         /* guess some large but stack-friendly capacity */
   137         UChar dest[2*UCASE_MAX_STRING_LENGTH];
   138         int32_t destLength;
   139         destLength=u_strFoldCase(dest, LENGTHOF(dest),
   140                                   nfd.getBuffer(), nfd.length(),
   141                                   U_FOLD_CASE_DEFAULT, &errorCode);
   142         return (UBool)(U_SUCCESS(errorCode) &&
   143                        0!=u_strCompare(nfd.getBuffer(), nfd.length(),
   144                                        dest, destLength, FALSE));
   145     }
   146 }
   147 #endif
   149 #if UCONFIG_NO_NORMALIZATION
   150 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) {
   151     return FALSE;
   152 }
   153 #else
   154 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   155     UErrorCode errorCode=U_ZERO_ERROR;
   156     const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
   157     if(U_FAILURE(errorCode)) {
   158         return FALSE;
   159     }
   160     UnicodeString src(c);
   161     UnicodeString dest;
   162     {
   163         // The ReorderingBuffer must be in a block because its destructor
   164         // needs to release dest's buffer before we look at its contents.
   165         ReorderingBuffer buffer(*kcf, dest);
   166         // Small destCapacity for NFKC_CF(c).
   167         if(buffer.init(5, errorCode)) {
   168             const UChar *srcArray=src.getBuffer();
   169             kcf->compose(srcArray, srcArray+src.length(), FALSE,
   170                           TRUE, buffer, errorCode);
   171         }
   172     }
   173     return U_SUCCESS(errorCode) && dest!=src;
   174 }
   175 #endif
   177 #if UCONFIG_NO_NORMALIZATION
   178 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) {
   179     return FALSE;
   180 }
   181 #else
   182 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   183     UErrorCode errorCode=U_ZERO_ERROR;
   184     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
   185     return
   186         U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
   187         impl->isCanonSegmentStarter(c);
   188 }
   189 #endif
   191 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   192     return u_isalnumPOSIX(c);
   193 }
   195 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   196     return u_isblank(c);
   197 }
   199 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   200     return u_isgraphPOSIX(c);
   201 }
   203 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   204     return u_isprintPOSIX(c);
   205 }
   207 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   208     return u_isxdigit(c);
   209 }
   211 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
   212     /*
   213      * column and mask values for binary properties from u_getUnicodeProperties().
   214      * Must be in order of corresponding UProperty,
   215      * and there must be exactly one entry per binary UProperty.
   216      *
   217      * Properties with mask==0 are handled in code.
   218      * For them, column is the UPropertySource value.
   219      */
   220     { 1,                U_MASK(UPROPS_ALPHABETIC), defaultContains },
   221     { 1,                U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
   222     { UPROPS_SRC_BIDI,  0, isBidiControl },
   223     { UPROPS_SRC_BIDI,  0, isMirrored },
   224     { 1,                U_MASK(UPROPS_DASH), defaultContains },
   225     { 1,                U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
   226     { 1,                U_MASK(UPROPS_DEPRECATED), defaultContains },
   227     { 1,                U_MASK(UPROPS_DIACRITIC), defaultContains },
   228     { 1,                U_MASK(UPROPS_EXTENDER), defaultContains },
   229     { UPROPS_SRC_NFC,   0, hasFullCompositionExclusion },
   230     { 1,                U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
   231     { 1,                U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
   232     { 1,                U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
   233     { 1,                U_MASK(UPROPS_HEX_DIGIT), defaultContains },
   234     { 1,                U_MASK(UPROPS_HYPHEN), defaultContains },
   235     { 1,                U_MASK(UPROPS_ID_CONTINUE), defaultContains },
   236     { 1,                U_MASK(UPROPS_ID_START), defaultContains },
   237     { 1,                U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
   238     { 1,                U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
   239     { 1,                U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
   240     { UPROPS_SRC_BIDI,  0, isJoinControl },
   241     { 1,                U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
   242     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_LOWERCASE
   243     { 1,                U_MASK(UPROPS_MATH), defaultContains },
   244     { 1,                U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
   245     { 1,                U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
   246     { 1,                U_MASK(UPROPS_RADICAL), defaultContains },
   247     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_SOFT_DOTTED
   248     { 1,                U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
   249     { 1,                U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
   250     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_UPPERCASE
   251     { 1,                U_MASK(UPROPS_WHITE_SPACE), defaultContains },
   252     { 1,                U_MASK(UPROPS_XID_CONTINUE), defaultContains },
   253     { 1,                U_MASK(UPROPS_XID_START), defaultContains },
   254     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASE_SENSITIVE
   255     { 1,                U_MASK(UPROPS_S_TERM), defaultContains },
   256     { 1,                U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
   257     { UPROPS_SRC_NFC,   0, isNormInert },  // UCHAR_NFD_INERT
   258     { UPROPS_SRC_NFKC,  0, isNormInert },  // UCHAR_NFKD_INERT
   259     { UPROPS_SRC_NFC,   0, isNormInert },  // UCHAR_NFC_INERT
   260     { UPROPS_SRC_NFKC,  0, isNormInert },  // UCHAR_NFKC_INERT
   261     { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
   262     { 1,                U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
   263     { 1,                U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
   264     { UPROPS_SRC_CHAR_AND_PROPSVEC,  0, isPOSIX_alnum },
   265     { UPROPS_SRC_CHAR,  0, isPOSIX_blank },
   266     { UPROPS_SRC_CHAR,  0, isPOSIX_graph },
   267     { UPROPS_SRC_CHAR,  0, isPOSIX_print },
   268     { UPROPS_SRC_CHAR,  0, isPOSIX_xdigit },
   269     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASED
   270     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASE_IGNORABLE
   271     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_LOWERCASED
   272     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_UPPERCASED
   273     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_TITLECASED
   274     { UPROPS_SRC_CASE_AND_NORM,  0, changesWhenCasefolded },
   275     { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_CASEMAPPED
   276     { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }
   277 };
   279 U_CAPI UBool U_EXPORT2
   280 u_hasBinaryProperty(UChar32 c, UProperty which) {
   281     /* c is range-checked in the functions that are called from here */
   282     if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
   283         /* not a known binary property */
   284         return FALSE;
   285     } else {
   286         const BinaryProperty &prop=binProps[which];
   287         return prop.contains(prop, c, which);
   288     }
   289 }
   291 struct IntProperty;
   293 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
   294 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
   296 struct IntProperty {
   297     int32_t column;  // SRC_PROPSVEC column, or "source" if mask==0
   298     uint32_t mask;
   299     int32_t shift;  // =maxValue if getMaxValueFromShift() is used
   300     IntPropertyGetValue *getValue;
   301     IntPropertyGetMaxValue *getMaxValue;
   302 };
   304 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
   305     /* systematic, directly stored properties */
   306     return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift;
   307 }
   309 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
   310     return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
   311 }
   313 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
   314     return prop.shift;
   315 }
   317 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   318     return (int32_t)u_charDirection(c);
   319 }
   321 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   322     return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
   323 }
   325 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
   326     return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
   327 }
   329 #if UCONFIG_NO_NORMALIZATION
   330 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
   331     return 0;
   332 }
   333 #else
   334 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   335     return u_getCombiningClass(c);
   336 }
   337 #endif
   339 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   340     return (int32_t)u_charType(c);
   341 }
   343 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   344     return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
   345 }
   347 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   348     return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
   349 }
   351 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   352     int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c));
   353     return UPROPS_NTV_GET_TYPE(ntv);
   354 }
   356 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   357     UErrorCode errorCode=U_ZERO_ERROR;
   358     return (int32_t)uscript_getScript(c, &errorCode);
   359 }
   361 /*
   362  * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
   363  * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
   364  */
   365 static const UHangulSyllableType gcbToHst[]={
   366     U_HST_NOT_APPLICABLE,   /* U_GCB_OTHER */
   367     U_HST_NOT_APPLICABLE,   /* U_GCB_CONTROL */
   368     U_HST_NOT_APPLICABLE,   /* U_GCB_CR */
   369     U_HST_NOT_APPLICABLE,   /* U_GCB_EXTEND */
   370     U_HST_LEADING_JAMO,     /* U_GCB_L */
   371     U_HST_NOT_APPLICABLE,   /* U_GCB_LF */
   372     U_HST_LV_SYLLABLE,      /* U_GCB_LV */
   373     U_HST_LVT_SYLLABLE,     /* U_GCB_LVT */
   374     U_HST_TRAILING_JAMO,    /* U_GCB_T */
   375     U_HST_VOWEL_JAMO        /* U_GCB_V */
   376     /*
   377      * Omit GCB values beyond what we need for hst.
   378      * The code below checks for the array length.
   379      */
   380 };
   382 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   383     /* see comments on gcbToHst[] above */
   384     int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
   385     if(gcb<LENGTHOF(gcbToHst)) {
   386         return gcbToHst[gcb];
   387     } else {
   388         return U_HST_NOT_APPLICABLE;
   389     }
   390 }
   392 #if UCONFIG_NO_NORMALIZATION
   393 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
   394     return 0;
   395 }
   396 #else
   397 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
   398     return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
   399 }
   400 #endif
   402 #if UCONFIG_NO_NORMALIZATION
   403 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
   404     return 0;
   405 }
   406 #else
   407 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   408     return unorm_getFCD16(c)>>8;
   409 }
   410 #endif
   412 #if UCONFIG_NO_NORMALIZATION
   413 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
   414     return 0;
   415 }
   416 #else
   417 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
   418     return unorm_getFCD16(c)&0xff;
   419 }
   420 #endif
   422 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
   423     /*
   424      * column, mask and shift values for int-value properties from u_getUnicodeProperties().
   425      * Must be in order of corresponding UProperty,
   426      * and there must be exactly one entry per int UProperty.
   427      *
   428      * Properties with mask==0 are handled in code.
   429      * For them, column is the UPropertySource value.
   430      */
   431     { UPROPS_SRC_BIDI,  0, 0,                               getBiDiClass, biDiGetMaxValue },
   432     { 0,                UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue },
   433     { UPROPS_SRC_NFC,   0, 0xff,                            getCombiningClass, getMaxValueFromShift },
   434     { 2,                UPROPS_DT_MASK, 0,                  defaultGetValue, defaultGetMaxValue },
   435     { 0,                UPROPS_EA_MASK, UPROPS_EA_SHIFT,    defaultGetValue, defaultGetMaxValue },
   436     { UPROPS_SRC_CHAR,  0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift },
   437     { UPROPS_SRC_BIDI,  0, 0,                               getJoiningGroup, biDiGetMaxValue },
   438     { UPROPS_SRC_BIDI,  0, 0,                               getJoiningType, biDiGetMaxValue },
   439     { 2,                UPROPS_LB_MASK, UPROPS_LB_SHIFT,    defaultGetValue, defaultGetMaxValue },
   440     { UPROPS_SRC_CHAR,  0, (int32_t)U_NT_COUNT-1,           getNumericType, getMaxValueFromShift },
   441     { 0,                UPROPS_SCRIPT_MASK, 0,              getScript, defaultGetMaxValue },
   442     { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1,       getHangulSyllableType, getMaxValueFromShift },
   443     // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
   444     { UPROPS_SRC_NFC,   0, (int32_t)UNORM_YES,              getNormQuickCheck, getMaxValueFromShift },
   445     // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
   446     { UPROPS_SRC_NFKC,  0, (int32_t)UNORM_YES,              getNormQuickCheck, getMaxValueFromShift },
   447     // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
   448     { UPROPS_SRC_NFC,   0, (int32_t)UNORM_MAYBE,            getNormQuickCheck, getMaxValueFromShift },
   449     // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
   450     { UPROPS_SRC_NFKC,  0, (int32_t)UNORM_MAYBE,            getNormQuickCheck, getMaxValueFromShift },
   451     { UPROPS_SRC_NFC,   0, 0xff,                            getLeadCombiningClass, getMaxValueFromShift },
   452     { UPROPS_SRC_NFC,   0, 0xff,                            getTrailCombiningClass, getMaxValueFromShift },
   453     { 2,                UPROPS_GCB_MASK, UPROPS_GCB_SHIFT,  defaultGetValue, defaultGetMaxValue },
   454     { 2,                UPROPS_SB_MASK, UPROPS_SB_SHIFT,    defaultGetValue, defaultGetMaxValue },
   455     { 2,                UPROPS_WB_MASK, UPROPS_WB_SHIFT,    defaultGetValue, defaultGetMaxValue },
   456     { UPROPS_SRC_BIDI,  0, 0,                               getBiDiPairedBracketType, biDiGetMaxValue },
   457 };
   459 U_CAPI int32_t U_EXPORT2
   460 u_getIntPropertyValue(UChar32 c, UProperty which) {
   461     if(which<UCHAR_INT_START) {
   462         if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
   463             const BinaryProperty &prop=binProps[which];
   464             return prop.contains(prop, c, which);
   465         }
   466     } else if(which<UCHAR_INT_LIMIT) {
   467         const IntProperty &prop=intProps[which-UCHAR_INT_START];
   468         return prop.getValue(prop, c, which);
   469     } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
   470         return U_MASK(u_charType(c));
   471     }
   472     return 0;  // undefined
   473 }
   475 U_CAPI int32_t U_EXPORT2
   476 u_getIntPropertyMinValue(UProperty /*which*/) {
   477     return 0; /* all binary/enum/int properties have a minimum value of 0 */
   478 }
   480 U_CAPI int32_t U_EXPORT2
   481 u_getIntPropertyMaxValue(UProperty which) {
   482     if(which<UCHAR_INT_START) {
   483         if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
   484             return 1;  // maximum TRUE for all binary properties
   485         }
   486     } else if(which<UCHAR_INT_LIMIT) {
   487         const IntProperty &prop=intProps[which-UCHAR_INT_START];
   488         return prop.getMaxValue(prop, which);
   489     }
   490     return -1;  // undefined
   491 }
   493 U_CFUNC UPropertySource U_EXPORT2
   494 uprops_getSource(UProperty which) {
   495     if(which<UCHAR_BINARY_START) {
   496         return UPROPS_SRC_NONE; /* undefined */
   497     } else if(which<UCHAR_BINARY_LIMIT) {
   498         const BinaryProperty &prop=binProps[which];
   499         if(prop.mask!=0) {
   500             return UPROPS_SRC_PROPSVEC;
   501         } else {
   502             return (UPropertySource)prop.column;
   503         }
   504     } else if(which<UCHAR_INT_START) {
   505         return UPROPS_SRC_NONE; /* undefined */
   506     } else if(which<UCHAR_INT_LIMIT) {
   507         const IntProperty &prop=intProps[which-UCHAR_INT_START];
   508         if(prop.mask!=0) {
   509             return UPROPS_SRC_PROPSVEC;
   510         } else {
   511             return (UPropertySource)prop.column;
   512         }
   513     } else if(which<UCHAR_STRING_START) {
   514         switch(which) {
   515         case UCHAR_GENERAL_CATEGORY_MASK:
   516         case UCHAR_NUMERIC_VALUE:
   517             return UPROPS_SRC_CHAR;
   519         default:
   520             return UPROPS_SRC_NONE;
   521         }
   522     } else if(which<UCHAR_STRING_LIMIT) {
   523         switch(which) {
   524         case UCHAR_AGE:
   525             return UPROPS_SRC_PROPSVEC;
   527         case UCHAR_BIDI_MIRRORING_GLYPH:
   528             return UPROPS_SRC_BIDI;
   530         case UCHAR_CASE_FOLDING:
   531         case UCHAR_LOWERCASE_MAPPING:
   532         case UCHAR_SIMPLE_CASE_FOLDING:
   533         case UCHAR_SIMPLE_LOWERCASE_MAPPING:
   534         case UCHAR_SIMPLE_TITLECASE_MAPPING:
   535         case UCHAR_SIMPLE_UPPERCASE_MAPPING:
   536         case UCHAR_TITLECASE_MAPPING:
   537         case UCHAR_UPPERCASE_MAPPING:
   538             return UPROPS_SRC_CASE;
   540         case UCHAR_ISO_COMMENT:
   541         case UCHAR_NAME:
   542         case UCHAR_UNICODE_1_NAME:
   543             return UPROPS_SRC_NAMES;
   545         default:
   546             return UPROPS_SRC_NONE;
   547         }
   548     } else {
   549         switch(which) {
   550         case UCHAR_SCRIPT_EXTENSIONS:
   551             return UPROPS_SRC_PROPSVEC;
   552         default:
   553             return UPROPS_SRC_NONE; /* undefined */
   554         }
   555     }
   556 }
   558 #if !UCONFIG_NO_NORMALIZATION
   560 U_CAPI int32_t U_EXPORT2
   561 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
   562     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   563         return 0;
   564     }
   565     if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
   566         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   567         return 0;
   568     }
   569     // Compute the FC_NFKC_Closure on the fly:
   570     // We have the API for complete coverage of Unicode properties, although
   571     // this value by itself is not useful via API.
   572     // (What could be useful is a custom normalization table that combines
   573     // case folding and NFKC.)
   574     // For the derivation, see Unicode's DerivedNormalizationProps.txt.
   575     const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
   576     const UCaseProps *csp=ucase_getSingleton();
   577     if(U_FAILURE(*pErrorCode)) {
   578         return 0;
   579     }
   580     // first: b = NFKC(Fold(a))
   581     UnicodeString folded1String;
   582     const UChar *folded1;
   583     int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT);
   584     if(folded1Length<0) {
   585         const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
   586         if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
   587             return u_terminateUChars(dest, destCapacity, 0, pErrorCode);  // c does not change at all under CaseFolding+NFKC
   588         }
   589         folded1String.setTo(c);
   590     } else {
   591         if(folded1Length>UCASE_MAX_STRING_LENGTH) {
   592             folded1String.setTo(folded1Length);
   593         } else {
   594             folded1String.setTo(FALSE, folded1, folded1Length);
   595         }
   596     }
   597     UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
   598     // second: c = NFKC(Fold(b))
   599     UnicodeString folded2String(kc1);
   600     UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
   601     // if (c != b) add the mapping from a to c
   602     if(U_FAILURE(*pErrorCode) || kc1==kc2) {
   603         return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
   604     } else {
   605         return kc2.extract(dest, destCapacity, *pErrorCode);
   606     }
   607 }
   609 #endif

mercurial