intl/icu/source/common/uchar.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 ********************************************************************************
     3 *   Copyright (C) 1996-2012, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 ********************************************************************************
     6 *
     7 * File UCHAR.C
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   04/02/97    aliu        Creation.
    13 *   4/15/99     Madhu       Updated all the function definitions for C Implementation
    14 *   5/20/99     Madhu       Added the function u_getVersion()
    15 *   8/19/1999   srl         Upgraded scripts to Unicode3.0 
    16 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
    17 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
    18 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
    19 ******************************************************************************
    20 */
    22 #include "unicode/utypes.h"
    23 #include "unicode/uchar.h"
    24 #include "unicode/uscript.h"
    25 #include "unicode/udata.h"
    26 #include "uassert.h"
    27 #include "cmemory.h"
    28 #include "ucln_cmn.h"
    29 #include "utrie2.h"
    30 #include "udataswp.h"
    31 #include "uprops.h"
    32 #include "ustr_imp.h"
    34 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    36 /* uchar_props_data.h is machine-generated by genprops --csource */
    37 #define INCLUDED_FROM_UCHAR_C
    38 #include "uchar_props_data.h"
    40 /* constants and macros for access to the data ------------------------------ */
    42 /* getting a uint32_t properties word from the data */
    43 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
    45 U_CFUNC UBool
    46 uprv_haveProperties(UErrorCode *pErrorCode) {
    47     if(U_FAILURE(*pErrorCode)) {
    48         return FALSE;
    49     }
    50     return TRUE;
    51 }
    53 /* API functions ------------------------------------------------------------ */
    55 /* Gets the Unicode character's general category.*/
    56 U_CAPI int8_t U_EXPORT2
    57 u_charType(UChar32 c) {
    58     uint32_t props;
    59     GET_PROPS(c, props);
    60     return (int8_t)GET_CATEGORY(props);
    61 }
    63 /* Enumerate all code points with their general categories. */
    64 struct _EnumTypeCallback {
    65     UCharEnumTypeRange *enumRange;
    66     const void *context;
    67 };
    69 static uint32_t U_CALLCONV
    70 _enumTypeValue(const void *context, uint32_t value) {
    71     return GET_CATEGORY(value);
    72 }
    74 static UBool U_CALLCONV
    75 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    76     /* just cast the value to UCharCategory */
    77     return ((struct _EnumTypeCallback *)context)->
    78         enumRange(((struct _EnumTypeCallback *)context)->context,
    79                   start, end+1, (UCharCategory)value);
    80 }
    82 U_CAPI void U_EXPORT2
    83 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
    84     struct _EnumTypeCallback callback;
    86     if(enumRange==NULL) {
    87         return;
    88     }
    90     callback.enumRange=enumRange;
    91     callback.context=context;
    92     utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
    93 }
    95 /* Checks if ch is a lower case letter.*/
    96 U_CAPI UBool U_EXPORT2
    97 u_islower(UChar32 c) {
    98     uint32_t props;
    99     GET_PROPS(c, props);
   100     return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
   101 }
   103 /* Checks if ch is an upper case letter.*/
   104 U_CAPI UBool U_EXPORT2
   105 u_isupper(UChar32 c) {
   106     uint32_t props;
   107     GET_PROPS(c, props);
   108     return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
   109 }
   111 /* Checks if ch is a title case letter; usually upper case letters.*/
   112 U_CAPI UBool U_EXPORT2
   113 u_istitle(UChar32 c) {
   114     uint32_t props;
   115     GET_PROPS(c, props);
   116     return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
   117 }
   119 /* Checks if ch is a decimal digit. */
   120 U_CAPI UBool U_EXPORT2
   121 u_isdigit(UChar32 c) {
   122     uint32_t props;
   123     GET_PROPS(c, props);
   124     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
   125 }
   127 U_CAPI UBool U_EXPORT2
   128 u_isxdigit(UChar32 c) {
   129     uint32_t props;
   131     /* check ASCII and Fullwidth ASCII a-fA-F */
   132     if(
   133         (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
   134         (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
   135     ) {
   136         return TRUE;
   137     }
   139     GET_PROPS(c, props);
   140     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
   141 }
   143 /* Checks if the Unicode character is a letter.*/
   144 U_CAPI UBool U_EXPORT2
   145 u_isalpha(UChar32 c) {
   146     uint32_t props;
   147     GET_PROPS(c, props);
   148     return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
   149 }
   151 U_CAPI UBool U_EXPORT2
   152 u_isUAlphabetic(UChar32 c) {
   153     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
   154 }
   156 /* Checks if c is a letter or a decimal digit */
   157 U_CAPI UBool U_EXPORT2
   158 u_isalnum(UChar32 c) {
   159     uint32_t props;
   160     GET_PROPS(c, props);
   161     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
   162 }
   164 /**
   165  * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
   166  * @internal
   167  */
   168 U_CFUNC UBool
   169 u_isalnumPOSIX(UChar32 c) {
   170     return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
   171 }
   173 /* Checks if ch is a unicode character with assigned character type.*/
   174 U_CAPI UBool U_EXPORT2
   175 u_isdefined(UChar32 c) {
   176     uint32_t props;
   177     GET_PROPS(c, props);
   178     return (UBool)(GET_CATEGORY(props)!=0);
   179 }
   181 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
   182 U_CAPI UBool U_EXPORT2
   183 u_isbase(UChar32 c) {
   184     uint32_t props;
   185     GET_PROPS(c, props);
   186     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
   187 }
   189 /* Checks if the Unicode character is a control character.*/
   190 U_CAPI UBool U_EXPORT2
   191 u_iscntrl(UChar32 c) {
   192     uint32_t props;
   193     GET_PROPS(c, props);
   194     return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
   195 }
   197 U_CAPI UBool U_EXPORT2
   198 u_isISOControl(UChar32 c) {
   199     return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
   200 }
   202 /* Some control characters that are used as space. */
   203 #define IS_THAT_CONTROL_SPACE(c) \
   204     (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
   206 /* Java has decided that U+0085 New Line is not whitespace any more. */
   207 #define IS_THAT_ASCII_CONTROL_SPACE(c) \
   208     (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
   210 /* Checks if the Unicode character is a space character.*/
   211 U_CAPI UBool U_EXPORT2
   212 u_isspace(UChar32 c) {
   213     uint32_t props;
   214     GET_PROPS(c, props);
   215     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
   216 }
   218 U_CAPI UBool U_EXPORT2
   219 u_isJavaSpaceChar(UChar32 c) {
   220     uint32_t props;
   221     GET_PROPS(c, props);
   222     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
   223 }
   225 /* Checks if the Unicode character is a whitespace character.*/
   226 U_CAPI UBool U_EXPORT2
   227 u_isWhitespace(UChar32 c) {
   228     uint32_t props;
   229     GET_PROPS(c, props);
   230     return (UBool)(
   231                 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
   232                     c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
   233                 IS_THAT_ASCII_CONTROL_SPACE(c)
   234            );
   235 }
   237 U_CAPI UBool U_EXPORT2
   238 u_isblank(UChar32 c) {
   239     if((uint32_t)c<=0x9f) {
   240         return c==9 || c==0x20; /* TAB or SPACE */
   241     } else {
   242         /* Zs */
   243         uint32_t props;
   244         GET_PROPS(c, props);
   245         return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
   246     }
   247 }
   249 U_CAPI UBool U_EXPORT2
   250 u_isUWhiteSpace(UChar32 c) {
   251     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
   252 }
   254 /* Checks if the Unicode character is printable.*/
   255 U_CAPI UBool U_EXPORT2
   256 u_isprint(UChar32 c) {
   257     uint32_t props;
   258     GET_PROPS(c, props);
   259     /* comparing ==0 returns FALSE for the categories mentioned */
   260     return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
   261 }
   263 /**
   264  * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
   265  * Implements UCHAR_POSIX_PRINT.
   266  * @internal
   267  */
   268 U_CFUNC UBool
   269 u_isprintPOSIX(UChar32 c) {
   270     uint32_t props;
   271     GET_PROPS(c, props);
   272     /*
   273      * The only cntrl character in graph+blank is TAB (in blank).
   274      * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
   275      */
   276     return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
   277 }
   279 U_CAPI UBool U_EXPORT2
   280 u_isgraph(UChar32 c) {
   281     uint32_t props;
   282     GET_PROPS(c, props);
   283     /* comparing ==0 returns FALSE for the categories mentioned */
   284     return (UBool)((CAT_MASK(props)&
   285                     (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
   286                    ==0);
   287 }
   289 /**
   290  * Checks if c is in
   291  * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
   292  * with space=\p{Whitespace} and Control=Cc.
   293  * Implements UCHAR_POSIX_GRAPH.
   294  * @internal
   295  */
   296 U_CFUNC UBool
   297 u_isgraphPOSIX(UChar32 c) {
   298     uint32_t props;
   299     GET_PROPS(c, props);
   300     /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
   301     /* comparing ==0 returns FALSE for the categories mentioned */
   302     return (UBool)((CAT_MASK(props)&
   303                     (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
   304                    ==0);
   305 }
   307 U_CAPI UBool U_EXPORT2
   308 u_ispunct(UChar32 c) {
   309     uint32_t props;
   310     GET_PROPS(c, props);
   311     return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
   312 }
   314 /* Checks if the Unicode character can start a Unicode identifier.*/
   315 U_CAPI UBool U_EXPORT2
   316 u_isIDStart(UChar32 c) {
   317     /* same as u_isalpha() */
   318     uint32_t props;
   319     GET_PROPS(c, props);
   320     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
   321 }
   323 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
   324  identifier.*/
   325 U_CAPI UBool U_EXPORT2
   326 u_isIDPart(UChar32 c) {
   327     uint32_t props;
   328     GET_PROPS(c, props);
   329     return (UBool)(
   330            (CAT_MASK(props)&
   331             (U_GC_ND_MASK|U_GC_NL_MASK|
   332              U_GC_L_MASK|
   333              U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
   334            )!=0 ||
   335            u_isIDIgnorable(c));
   336 }
   338 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
   339 U_CAPI UBool U_EXPORT2
   340 u_isIDIgnorable(UChar32 c) {
   341     if(c<=0x9f) {
   342         return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
   343     } else {
   344         uint32_t props;
   345         GET_PROPS(c, props);
   346         return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
   347     }
   348 }
   350 /*Checks if the Unicode character can start a Java identifier.*/
   351 U_CAPI UBool U_EXPORT2
   352 u_isJavaIDStart(UChar32 c) {
   353     uint32_t props;
   354     GET_PROPS(c, props);
   355     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
   356 }
   358 /*Checks if the Unicode character can be a Java identifier part other than starting the
   359  * identifier.
   360  */
   361 U_CAPI UBool U_EXPORT2
   362 u_isJavaIDPart(UChar32 c) {
   363     uint32_t props;
   364     GET_PROPS(c, props);
   365     return (UBool)(
   366            (CAT_MASK(props)&
   367             (U_GC_ND_MASK|U_GC_NL_MASK|
   368              U_GC_L_MASK|
   369              U_GC_SC_MASK|U_GC_PC_MASK|
   370              U_GC_MC_MASK|U_GC_MN_MASK)
   371            )!=0 ||
   372            u_isIDIgnorable(c));
   373 }
   375 U_CAPI int32_t U_EXPORT2
   376 u_charDigitValue(UChar32 c) {
   377     uint32_t props;
   378     int32_t value;
   379     GET_PROPS(c, props);
   380     value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
   381     if(value<=9) {
   382         return value;
   383     } else {
   384         return -1;
   385     }
   386 }
   388 U_CAPI double U_EXPORT2
   389 u_getNumericValue(UChar32 c) {
   390     uint32_t props;
   391     int32_t ntv;
   392     GET_PROPS(c, props);
   393     ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
   395     if(ntv==UPROPS_NTV_NONE) {
   396         return U_NO_NUMERIC_VALUE;
   397     } else if(ntv<UPROPS_NTV_DIGIT_START) {
   398         /* decimal digit */
   399         return ntv-UPROPS_NTV_DECIMAL_START;
   400     } else if(ntv<UPROPS_NTV_NUMERIC_START) {
   401         /* other digit */
   402         return ntv-UPROPS_NTV_DIGIT_START;
   403     } else if(ntv<UPROPS_NTV_FRACTION_START) {
   404         /* small integer */
   405         return ntv-UPROPS_NTV_NUMERIC_START;
   406     } else if(ntv<UPROPS_NTV_LARGE_START) {
   407         /* fraction */
   408         int32_t numerator=(ntv>>4)-12;
   409         int32_t denominator=(ntv&0xf)+1;
   410         return (double)numerator/denominator;
   411     } else if(ntv<UPROPS_NTV_BASE60_START) {
   412         /* large, single-significant-digit integer */
   413         double numValue;
   414         int32_t mant=(ntv>>5)-14;
   415         int32_t exp=(ntv&0x1f)+2;
   416         numValue=mant;
   418         /* multiply by 10^exp without math.h */
   419         while(exp>=4) {
   420             numValue*=10000.;
   421             exp-=4;
   422         }
   423         switch(exp) {
   424         case 3:
   425             numValue*=1000.;
   426             break;
   427         case 2:
   428             numValue*=100.;
   429             break;
   430         case 1:
   431             numValue*=10.;
   432             break;
   433         case 0:
   434         default:
   435             break;
   436         }
   438         return numValue;
   439     } else if(ntv<UPROPS_NTV_RESERVED_START) {
   440         /* sexagesimal (base 60) integer */
   441         int32_t numValue=(ntv>>2)-0xbf;
   442         int32_t exp=(ntv&3)+1;
   444         switch(exp) {
   445         case 4:
   446             numValue*=60*60*60*60;
   447             break;
   448         case 3:
   449             numValue*=60*60*60;
   450             break;
   451         case 2:
   452             numValue*=60*60;
   453             break;
   454         case 1:
   455             numValue*=60;
   456             break;
   457         case 0:
   458         default:
   459             break;
   460         }
   462         return numValue;
   463     } else {
   464         /* reserved */
   465         return U_NO_NUMERIC_VALUE;
   466     }
   467 }
   469 U_CAPI int32_t U_EXPORT2
   470 u_digit(UChar32 ch, int8_t radix) {
   471     int8_t value;
   472     if((uint8_t)(radix-2)<=(36-2)) {
   473         value=(int8_t)u_charDigitValue(ch);
   474         if(value<0) {
   475             /* ch is not a decimal digit, try latin letters */
   476             if(ch>=0x61 && ch<=0x7A) {
   477                 value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
   478             } else if(ch>=0x41 && ch<=0x5A) {
   479                 value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
   480             } else if(ch>=0xFF41 && ch<=0xFF5A) {
   481                 value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
   482             } else if(ch>=0xFF21 && ch<=0xFF3A) {
   483                 value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
   484             }
   485         }
   486     } else {
   487         value=-1;   /* invalid radix */
   488     }
   489     return (int8_t)((value<radix) ? value : -1);
   490 }
   492 U_CAPI UChar32 U_EXPORT2
   493 u_forDigit(int32_t digit, int8_t radix) {
   494     if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
   495         return 0;
   496     } else if(digit<10) {
   497         return (UChar32)(0x30+digit);
   498     } else {
   499         return (UChar32)((0x61-10)+digit);
   500     }
   501 }
   503 /* miscellaneous, and support for uprops.cpp -------------------------------- */
   505 U_CAPI void U_EXPORT2
   506 u_getUnicodeVersion(UVersionInfo versionArray) {
   507     if(versionArray!=NULL) {
   508         uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
   509     }
   510 }
   512 U_CFUNC uint32_t
   513 u_getMainProperties(UChar32 c) {
   514     uint32_t props;
   515     GET_PROPS(c, props);
   516     return props;
   517 }
   519 U_CFUNC uint32_t
   520 u_getUnicodeProperties(UChar32 c, int32_t column) {
   521     U_ASSERT(column>=0);
   522     if(column>=propsVectorsColumns) {
   523         return 0;
   524     } else {
   525         uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
   526         return propsVectors[vecIndex+column];
   527     }
   528 }
   530 U_CFUNC int32_t
   531 uprv_getMaxValues(int32_t column) {
   532     switch(column) {
   533     case 0:
   534         return indexes[UPROPS_MAX_VALUES_INDEX];
   535     case 2:
   536         return indexes[UPROPS_MAX_VALUES_2_INDEX];
   537     default:
   538         return 0;
   539     }
   540 }
   542 U_CAPI void U_EXPORT2
   543 u_charAge(UChar32 c, UVersionInfo versionArray) {
   544     if(versionArray!=NULL) {
   545         uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
   546         versionArray[0]=(uint8_t)(version>>4);
   547         versionArray[1]=(uint8_t)(version&0xf);
   548         versionArray[2]=versionArray[3]=0;
   549     }
   550 }
   552 U_CAPI UScriptCode U_EXPORT2
   553 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
   554     uint32_t scriptX;
   555     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   556         return USCRIPT_INVALID_CODE;
   557     }
   558     if((uint32_t)c>0x10ffff) {
   559         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   560         return USCRIPT_INVALID_CODE;
   561     }
   562     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
   563     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
   564         return (UScriptCode)scriptX;
   565     } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
   566         return USCRIPT_COMMON;
   567     } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
   568         return USCRIPT_INHERITED;
   569     } else {
   570         return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK];
   571     }
   572 }
   574 U_CAPI UBool U_EXPORT2
   575 uscript_hasScript(UChar32 c, UScriptCode sc) {
   576     const uint16_t *scx;
   577     uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
   578     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
   579         return sc==(UScriptCode)scriptX;
   580     }
   582     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
   583     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
   584         scx=scriptExtensions+scx[1];
   585     }
   586     if(sc>=USCRIPT_CODE_LIMIT) {
   587         /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
   588         return FALSE;
   589     }
   590     while(sc>*scx) {
   591         ++scx;
   592     }
   593     return sc==(*scx&0x7fff);
   594 }
   596 U_CAPI int32_t U_EXPORT2
   597 uscript_getScriptExtensions(UChar32 c,
   598                             UScriptCode *scripts, int32_t capacity,
   599                             UErrorCode *pErrorCode) {
   600     uint32_t scriptX;
   601     int32_t length;
   602     const uint16_t *scx;
   603     uint16_t sx;
   604     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   605         return 0;
   606     }
   607     if(capacity<0 || (capacity>0 && scripts==NULL)) {
   608         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   609         return 0;
   610     }
   611     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
   612     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
   613         if(capacity==0) {
   614             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   615         } else {
   616             scripts[0]=(UScriptCode)scriptX;
   617         }
   618         return 1;
   619     }
   621     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
   622     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
   623         scx=scriptExtensions+scx[1];
   624     }
   625     length=0;
   626     do {
   627         sx=*scx++;
   628         if(length<capacity) {
   629             scripts[length]=(UScriptCode)(sx&0x7fff);
   630         }
   631         ++length;
   632     } while(sx<0x8000);
   633     if(length>capacity) {
   634         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   635     }
   636     return length;
   637 }
   639 U_CAPI UBlockCode U_EXPORT2
   640 ublock_getCode(UChar32 c) {
   641     return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
   642 }
   644 /* property starts for UnicodeSet ------------------------------------------- */
   646 static UBool U_CALLCONV
   647 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
   648     /* add the start code point to the USet */
   649     const USetAdder *sa=(const USetAdder *)context;
   650     sa->add(sa->set, start);
   651     return TRUE;
   652 }
   654 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
   656 U_CFUNC void U_EXPORT2
   657 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
   658     if(U_FAILURE(*pErrorCode)) {
   659         return;
   660     }
   662     /* add the start code point of each same-value range of the main trie */
   663     utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
   665     /* add code points with hardcoded properties, plus the ones following them */
   667     /* add for u_isblank() */
   668     USET_ADD_CP_AND_NEXT(sa, TAB);
   670     /* add for IS_THAT_CONTROL_SPACE() */
   671     sa->add(sa->set, CR+1); /* range TAB..CR */
   672     sa->add(sa->set, 0x1c);
   673     sa->add(sa->set, 0x1f+1);
   674     USET_ADD_CP_AND_NEXT(sa, NL);
   676     /* add for u_isIDIgnorable() what was not added above */
   677     sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
   678     sa->add(sa->set, HAIRSP);
   679     sa->add(sa->set, RLM+1);
   680     sa->add(sa->set, INHSWAP);
   681     sa->add(sa->set, NOMDIG+1);
   682     USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
   684     /* add no-break spaces for u_isWhitespace() what was not added above */
   685     USET_ADD_CP_AND_NEXT(sa, NBSP);
   686     USET_ADD_CP_AND_NEXT(sa, FIGURESP);
   687     USET_ADD_CP_AND_NEXT(sa, NNBSP);
   689     /* add for u_digit() */
   690     sa->add(sa->set, U_a);
   691     sa->add(sa->set, U_z+1);
   692     sa->add(sa->set, U_A);
   693     sa->add(sa->set, U_Z+1);
   694     sa->add(sa->set, U_FW_a);
   695     sa->add(sa->set, U_FW_z+1);
   696     sa->add(sa->set, U_FW_A);
   697     sa->add(sa->set, U_FW_Z+1);
   699     /* add for u_isxdigit() */
   700     sa->add(sa->set, U_f+1);
   701     sa->add(sa->set, U_F+1);
   702     sa->add(sa->set, U_FW_f+1);
   703     sa->add(sa->set, U_FW_F+1);
   705     /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
   706     sa->add(sa->set, WJ); /* range WJ..NOMDIG */
   707     sa->add(sa->set, 0xfff0);
   708     sa->add(sa->set, 0xfffb+1);
   709     sa->add(sa->set, 0xe0000);
   710     sa->add(sa->set, 0xe0fff+1);
   712     /* add for UCHAR_GRAPHEME_BASE and others */
   713     USET_ADD_CP_AND_NEXT(sa, CGJ);
   714 }
   716 U_CFUNC void U_EXPORT2
   717 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
   718     if(U_FAILURE(*pErrorCode)) {
   719         return;
   720     }
   722     /* add the start code point of each same-value range of the properties vectors trie */
   723     if(propsVectorsColumns>0) {
   724         /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
   725         utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
   726     }
   727 }

mercurial