intl/icu/source/common/propname.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 **********************************************************************
     3 * Copyright (c) 2002-2011, International Business Machines
     4 * Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 * Author: Alan Liu
     7 * Created: October 30 2002
     8 * Since: ICU 2.4
     9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
    10 **********************************************************************
    11 */
    12 #include "propname.h"
    13 #include "unicode/uchar.h"
    14 #include "unicode/udata.h"
    15 #include "unicode/uscript.h"
    16 #include "umutex.h"
    17 #include "cmemory.h"
    18 #include "cstring.h"
    19 #include "ucln_cmn.h"
    20 #include "uarrsort.h"
    21 #include "uinvchar.h"
    23 #define INCLUDED_FROM_PROPNAME_CPP
    24 #include "propname_data.h"
    26 U_CDECL_BEGIN
    28 /**
    29  * Get the next non-ignorable ASCII character from a property name
    30  * and lowercases it.
    31  * @return ((advance count for the name)<<8)|character
    32  */
    33 static inline int32_t
    34 getASCIIPropertyNameChar(const char *name) {
    35     int32_t i;
    36     char c;
    38     /* Ignore delimiters '-', '_', and ASCII White_Space */
    39     for(i=0;
    40         (c=name[i++])==0x2d || c==0x5f ||
    41         c==0x20 || (0x09<=c && c<=0x0d);
    42     ) {}
    44     if(c!=0) {
    45         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
    46     } else {
    47         return i<<8;
    48     }
    49 }
    51 /**
    52  * Get the next non-ignorable EBCDIC character from a property name
    53  * and lowercases it.
    54  * @return ((advance count for the name)<<8)|character
    55  */
    56 static inline int32_t
    57 getEBCDICPropertyNameChar(const char *name) {
    58     int32_t i;
    59     char c;
    61     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
    62     for(i=0;
    63         (c=name[i++])==0x60 || c==0x6d ||
    64         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
    65     ) {}
    67     if(c!=0) {
    68         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
    69     } else {
    70         return i<<8;
    71     }
    72 }
    74 /**
    75  * Unicode property names and property value names are compared "loosely".
    76  *
    77  * UCD.html 4.0.1 says:
    78  *   For all property names, property value names, and for property values for
    79  *   Enumerated, Binary, or Catalog properties, use the following
    80  *   loose matching rule:
    81  *
    82  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
    83  *
    84  * This function does just that, for (char *) name strings.
    85  * It is almost identical to ucnv_compareNames() but also ignores
    86  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
    87  *
    88  * @internal
    89  */
    91 U_CAPI int32_t U_EXPORT2
    92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
    93     int32_t rc, r1, r2;
    95     for(;;) {
    96         r1=getASCIIPropertyNameChar(name1);
    97         r2=getASCIIPropertyNameChar(name2);
    99         /* If we reach the ends of both strings then they match */
   100         if(((r1|r2)&0xff)==0) {
   101             return 0;
   102         }
   104         /* Compare the lowercased characters */
   105         if(r1!=r2) {
   106             rc=(r1&0xff)-(r2&0xff);
   107             if(rc!=0) {
   108                 return rc;
   109             }
   110         }
   112         name1+=r1>>8;
   113         name2+=r2>>8;
   114     }
   115 }
   117 U_CAPI int32_t U_EXPORT2
   118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
   119     int32_t rc, r1, r2;
   121     for(;;) {
   122         r1=getEBCDICPropertyNameChar(name1);
   123         r2=getEBCDICPropertyNameChar(name2);
   125         /* If we reach the ends of both strings then they match */
   126         if(((r1|r2)&0xff)==0) {
   127             return 0;
   128         }
   130         /* Compare the lowercased characters */
   131         if(r1!=r2) {
   132             rc=(r1&0xff)-(r2&0xff);
   133             if(rc!=0) {
   134                 return rc;
   135             }
   136         }
   138         name1+=r1>>8;
   139         name2+=r2>>8;
   140     }
   141 }
   143 U_CDECL_END
   145 U_NAMESPACE_BEGIN
   147 int32_t PropNameData::findProperty(int32_t property) {
   148     int32_t i=1;  // valueMaps index, initially after numRanges
   149     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
   150         // Read and skip the start and limit of this range.
   151         int32_t start=valueMaps[i];
   152         int32_t limit=valueMaps[i+1];
   153         i+=2;
   154         if(property<start) {
   155             break;
   156         }
   157         if(property<limit) {
   158             return i+(property-start)*2;
   159         }
   160         i+=(limit-start)*2;  // Skip all entries for this range.
   161     }
   162     return 0;
   163 }
   165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
   166     if(valueMapIndex==0) {
   167         return 0;  // The property does not have named values.
   168     }
   169     ++valueMapIndex;  // Skip the BytesTrie offset.
   170     int32_t numRanges=valueMaps[valueMapIndex++];
   171     if(numRanges<0x10) {
   172         // Ranges of values.
   173         for(; numRanges>0; --numRanges) {
   174             // Read and skip the start and limit of this range.
   175             int32_t start=valueMaps[valueMapIndex];
   176             int32_t limit=valueMaps[valueMapIndex+1];
   177             valueMapIndex+=2;
   178             if(value<start) {
   179                 break;
   180             }
   181             if(value<limit) {
   182                 return valueMaps[valueMapIndex+value-start];
   183             }
   184             valueMapIndex+=limit-start;  // Skip all entries for this range.
   185         }
   186     } else {
   187         // List of values.
   188         int32_t valuesStart=valueMapIndex;
   189         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
   190         do {
   191             int32_t v=valueMaps[valueMapIndex];
   192             if(value<v) {
   193                 break;
   194             }
   195             if(value==v) {
   196                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
   197             }
   198         } while(++valueMapIndex<nameGroupOffsetsStart);
   199     }
   200     return 0;
   201 }
   203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
   204     int32_t numNames=*nameGroup++;
   205     if(nameIndex<0 || numNames<=nameIndex) {
   206         return NULL;
   207     }
   208     // Skip nameIndex names.
   209     for(; nameIndex>0; --nameIndex) {
   210         nameGroup=uprv_strchr(nameGroup, 0)+1;
   211     }
   212     if(*nameGroup==0) {
   213         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
   214     }
   215     return nameGroup;
   216 }
   218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
   219     if(name==NULL) {
   220         return FALSE;
   221     }
   222     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
   223     char c;
   224     while((c=*name++)!=0) {
   225         c=uprv_invCharToLowercaseAscii(c);
   226         // Ignore delimiters '-', '_', and ASCII White_Space.
   227         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
   228             continue;
   229         }
   230         if(!USTRINGTRIE_HAS_NEXT(result)) {
   231             return FALSE;
   232         }
   233         result=trie.next((uint8_t)c);
   234     }
   235     return USTRINGTRIE_HAS_VALUE(result);
   236 }
   238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
   239     int32_t valueMapIndex=findProperty(property);
   240     if(valueMapIndex==0) {
   241         return NULL;  // Not a known property.
   242     }
   243     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
   244 }
   246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
   247     int32_t valueMapIndex=findProperty(property);
   248     if(valueMapIndex==0) {
   249         return NULL;  // Not a known property.
   250     }
   251     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
   252     if(nameGroupOffset==0) {
   253         return NULL;
   254     }
   255     return getName(nameGroups+nameGroupOffset, nameChoice);
   256 }
   258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
   259     BytesTrie trie(bytesTries+bytesTrieOffset);
   260     if(containsName(trie, alias)) {
   261         return trie.getValue();
   262     } else {
   263         return UCHAR_INVALID_CODE;
   264     }
   265 }
   267 int32_t PropNameData::getPropertyEnum(const char *alias) {
   268     return getPropertyOrValueEnum(0, alias);
   269 }
   271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
   272     int32_t valueMapIndex=findProperty(property);
   273     if(valueMapIndex==0) {
   274         return UCHAR_INVALID_CODE;  // Not a known property.
   275     }
   276     valueMapIndex=valueMaps[valueMapIndex+1];
   277     if(valueMapIndex==0) {
   278         return UCHAR_INVALID_CODE;  // The property does not have named values.
   279     }
   280     // valueMapIndex is the start of the property's valueMap,
   281     // where the first word is the BytesTrie offset.
   282     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
   283 }
   284 U_NAMESPACE_END
   286 //----------------------------------------------------------------------
   287 // Public API implementation
   289 U_CAPI const char* U_EXPORT2
   290 u_getPropertyName(UProperty property,
   291                   UPropertyNameChoice nameChoice) {
   292     U_NAMESPACE_USE
   293     return PropNameData::getPropertyName(property, nameChoice);
   294 }
   296 U_CAPI UProperty U_EXPORT2
   297 u_getPropertyEnum(const char* alias) {
   298     U_NAMESPACE_USE
   299     return (UProperty)PropNameData::getPropertyEnum(alias);
   300 }
   302 U_CAPI const char* U_EXPORT2
   303 u_getPropertyValueName(UProperty property,
   304                        int32_t value,
   305                        UPropertyNameChoice nameChoice) {
   306     U_NAMESPACE_USE
   307     return PropNameData::getPropertyValueName(property, value, nameChoice);
   308 }
   310 U_CAPI int32_t U_EXPORT2
   311 u_getPropertyValueEnum(UProperty property,
   312                        const char* alias) {
   313     U_NAMESPACE_USE
   314     return PropNameData::getPropertyValueEnum(property, alias);
   315 }
   317 U_CAPI const char*  U_EXPORT2
   318 uscript_getName(UScriptCode scriptCode){
   319     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
   320                                   U_LONG_PROPERTY_NAME);
   321 }
   323 U_CAPI const char*  U_EXPORT2
   324 uscript_getShortName(UScriptCode scriptCode){
   325     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
   326                                   U_SHORT_PROPERTY_NAME);
   327 }

mercurial