intl/icu/source/common/propname.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/propname.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,327 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (c) 2002-2011, International Business Machines
     1.7 +* Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +* Author: Alan Liu
    1.10 +* Created: October 30 2002
    1.11 +* Since: ICU 2.4
    1.12 +* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
    1.13 +**********************************************************************
    1.14 +*/
    1.15 +#include "propname.h"
    1.16 +#include "unicode/uchar.h"
    1.17 +#include "unicode/udata.h"
    1.18 +#include "unicode/uscript.h"
    1.19 +#include "umutex.h"
    1.20 +#include "cmemory.h"
    1.21 +#include "cstring.h"
    1.22 +#include "ucln_cmn.h"
    1.23 +#include "uarrsort.h"
    1.24 +#include "uinvchar.h"
    1.25 +
    1.26 +#define INCLUDED_FROM_PROPNAME_CPP
    1.27 +#include "propname_data.h"
    1.28 +
    1.29 +U_CDECL_BEGIN
    1.30 +
    1.31 +/**
    1.32 + * Get the next non-ignorable ASCII character from a property name
    1.33 + * and lowercases it.
    1.34 + * @return ((advance count for the name)<<8)|character
    1.35 + */
    1.36 +static inline int32_t
    1.37 +getASCIIPropertyNameChar(const char *name) {
    1.38 +    int32_t i;
    1.39 +    char c;
    1.40 +
    1.41 +    /* Ignore delimiters '-', '_', and ASCII White_Space */
    1.42 +    for(i=0;
    1.43 +        (c=name[i++])==0x2d || c==0x5f ||
    1.44 +        c==0x20 || (0x09<=c && c<=0x0d);
    1.45 +    ) {}
    1.46 +
    1.47 +    if(c!=0) {
    1.48 +        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
    1.49 +    } else {
    1.50 +        return i<<8;
    1.51 +    }
    1.52 +}
    1.53 +
    1.54 +/**
    1.55 + * Get the next non-ignorable EBCDIC character from a property name
    1.56 + * and lowercases it.
    1.57 + * @return ((advance count for the name)<<8)|character
    1.58 + */
    1.59 +static inline int32_t
    1.60 +getEBCDICPropertyNameChar(const char *name) {
    1.61 +    int32_t i;
    1.62 +    char c;
    1.63 +
    1.64 +    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
    1.65 +    for(i=0;
    1.66 +        (c=name[i++])==0x60 || c==0x6d ||
    1.67 +        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
    1.68 +    ) {}
    1.69 +
    1.70 +    if(c!=0) {
    1.71 +        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
    1.72 +    } else {
    1.73 +        return i<<8;
    1.74 +    }
    1.75 +}
    1.76 +
    1.77 +/**
    1.78 + * Unicode property names and property value names are compared "loosely".
    1.79 + *
    1.80 + * UCD.html 4.0.1 says:
    1.81 + *   For all property names, property value names, and for property values for
    1.82 + *   Enumerated, Binary, or Catalog properties, use the following
    1.83 + *   loose matching rule:
    1.84 + *
    1.85 + *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
    1.86 + *
    1.87 + * This function does just that, for (char *) name strings.
    1.88 + * It is almost identical to ucnv_compareNames() but also ignores
    1.89 + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
    1.90 + *
    1.91 + * @internal
    1.92 + */
    1.93 +
    1.94 +U_CAPI int32_t U_EXPORT2
    1.95 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
    1.96 +    int32_t rc, r1, r2;
    1.97 +
    1.98 +    for(;;) {
    1.99 +        r1=getASCIIPropertyNameChar(name1);
   1.100 +        r2=getASCIIPropertyNameChar(name2);
   1.101 +
   1.102 +        /* If we reach the ends of both strings then they match */
   1.103 +        if(((r1|r2)&0xff)==0) {
   1.104 +            return 0;
   1.105 +        }
   1.106 +
   1.107 +        /* Compare the lowercased characters */
   1.108 +        if(r1!=r2) {
   1.109 +            rc=(r1&0xff)-(r2&0xff);
   1.110 +            if(rc!=0) {
   1.111 +                return rc;
   1.112 +            }
   1.113 +        }
   1.114 +
   1.115 +        name1+=r1>>8;
   1.116 +        name2+=r2>>8;
   1.117 +    }
   1.118 +}
   1.119 +
   1.120 +U_CAPI int32_t U_EXPORT2
   1.121 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
   1.122 +    int32_t rc, r1, r2;
   1.123 +
   1.124 +    for(;;) {
   1.125 +        r1=getEBCDICPropertyNameChar(name1);
   1.126 +        r2=getEBCDICPropertyNameChar(name2);
   1.127 +
   1.128 +        /* If we reach the ends of both strings then they match */
   1.129 +        if(((r1|r2)&0xff)==0) {
   1.130 +            return 0;
   1.131 +        }
   1.132 +
   1.133 +        /* Compare the lowercased characters */
   1.134 +        if(r1!=r2) {
   1.135 +            rc=(r1&0xff)-(r2&0xff);
   1.136 +            if(rc!=0) {
   1.137 +                return rc;
   1.138 +            }
   1.139 +        }
   1.140 +
   1.141 +        name1+=r1>>8;
   1.142 +        name2+=r2>>8;
   1.143 +    }
   1.144 +}
   1.145 +
   1.146 +U_CDECL_END
   1.147 +
   1.148 +U_NAMESPACE_BEGIN
   1.149 +
   1.150 +int32_t PropNameData::findProperty(int32_t property) {
   1.151 +    int32_t i=1;  // valueMaps index, initially after numRanges
   1.152 +    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
   1.153 +        // Read and skip the start and limit of this range.
   1.154 +        int32_t start=valueMaps[i];
   1.155 +        int32_t limit=valueMaps[i+1];
   1.156 +        i+=2;
   1.157 +        if(property<start) {
   1.158 +            break;
   1.159 +        }
   1.160 +        if(property<limit) {
   1.161 +            return i+(property-start)*2;
   1.162 +        }
   1.163 +        i+=(limit-start)*2;  // Skip all entries for this range.
   1.164 +    }
   1.165 +    return 0;
   1.166 +}
   1.167 +
   1.168 +int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
   1.169 +    if(valueMapIndex==0) {
   1.170 +        return 0;  // The property does not have named values.
   1.171 +    }
   1.172 +    ++valueMapIndex;  // Skip the BytesTrie offset.
   1.173 +    int32_t numRanges=valueMaps[valueMapIndex++];
   1.174 +    if(numRanges<0x10) {
   1.175 +        // Ranges of values.
   1.176 +        for(; numRanges>0; --numRanges) {
   1.177 +            // Read and skip the start and limit of this range.
   1.178 +            int32_t start=valueMaps[valueMapIndex];
   1.179 +            int32_t limit=valueMaps[valueMapIndex+1];
   1.180 +            valueMapIndex+=2;
   1.181 +            if(value<start) {
   1.182 +                break;
   1.183 +            }
   1.184 +            if(value<limit) {
   1.185 +                return valueMaps[valueMapIndex+value-start];
   1.186 +            }
   1.187 +            valueMapIndex+=limit-start;  // Skip all entries for this range.
   1.188 +        }
   1.189 +    } else {
   1.190 +        // List of values.
   1.191 +        int32_t valuesStart=valueMapIndex;
   1.192 +        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
   1.193 +        do {
   1.194 +            int32_t v=valueMaps[valueMapIndex];
   1.195 +            if(value<v) {
   1.196 +                break;
   1.197 +            }
   1.198 +            if(value==v) {
   1.199 +                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
   1.200 +            }
   1.201 +        } while(++valueMapIndex<nameGroupOffsetsStart);
   1.202 +    }
   1.203 +    return 0;
   1.204 +}
   1.205 +
   1.206 +const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
   1.207 +    int32_t numNames=*nameGroup++;
   1.208 +    if(nameIndex<0 || numNames<=nameIndex) {
   1.209 +        return NULL;
   1.210 +    }
   1.211 +    // Skip nameIndex names.
   1.212 +    for(; nameIndex>0; --nameIndex) {
   1.213 +        nameGroup=uprv_strchr(nameGroup, 0)+1;
   1.214 +    }
   1.215 +    if(*nameGroup==0) {
   1.216 +        return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
   1.217 +    }
   1.218 +    return nameGroup;
   1.219 +}
   1.220 +
   1.221 +UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
   1.222 +    if(name==NULL) {
   1.223 +        return FALSE;
   1.224 +    }
   1.225 +    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
   1.226 +    char c;
   1.227 +    while((c=*name++)!=0) {
   1.228 +        c=uprv_invCharToLowercaseAscii(c);
   1.229 +        // Ignore delimiters '-', '_', and ASCII White_Space.
   1.230 +        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
   1.231 +            continue;
   1.232 +        }
   1.233 +        if(!USTRINGTRIE_HAS_NEXT(result)) {
   1.234 +            return FALSE;
   1.235 +        }
   1.236 +        result=trie.next((uint8_t)c);
   1.237 +    }
   1.238 +    return USTRINGTRIE_HAS_VALUE(result);
   1.239 +}
   1.240 +
   1.241 +const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
   1.242 +    int32_t valueMapIndex=findProperty(property);
   1.243 +    if(valueMapIndex==0) {
   1.244 +        return NULL;  // Not a known property.
   1.245 +    }
   1.246 +    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
   1.247 +}
   1.248 +
   1.249 +const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
   1.250 +    int32_t valueMapIndex=findProperty(property);
   1.251 +    if(valueMapIndex==0) {
   1.252 +        return NULL;  // Not a known property.
   1.253 +    }
   1.254 +    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
   1.255 +    if(nameGroupOffset==0) {
   1.256 +        return NULL;
   1.257 +    }
   1.258 +    return getName(nameGroups+nameGroupOffset, nameChoice);
   1.259 +}
   1.260 +
   1.261 +int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
   1.262 +    BytesTrie trie(bytesTries+bytesTrieOffset);
   1.263 +    if(containsName(trie, alias)) {
   1.264 +        return trie.getValue();
   1.265 +    } else {
   1.266 +        return UCHAR_INVALID_CODE;
   1.267 +    }
   1.268 +}
   1.269 +
   1.270 +int32_t PropNameData::getPropertyEnum(const char *alias) {
   1.271 +    return getPropertyOrValueEnum(0, alias);
   1.272 +}
   1.273 +
   1.274 +int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
   1.275 +    int32_t valueMapIndex=findProperty(property);
   1.276 +    if(valueMapIndex==0) {
   1.277 +        return UCHAR_INVALID_CODE;  // Not a known property.
   1.278 +    }
   1.279 +    valueMapIndex=valueMaps[valueMapIndex+1];
   1.280 +    if(valueMapIndex==0) {
   1.281 +        return UCHAR_INVALID_CODE;  // The property does not have named values.
   1.282 +    }
   1.283 +    // valueMapIndex is the start of the property's valueMap,
   1.284 +    // where the first word is the BytesTrie offset.
   1.285 +    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
   1.286 +}
   1.287 +U_NAMESPACE_END
   1.288 +
   1.289 +//----------------------------------------------------------------------
   1.290 +// Public API implementation
   1.291 +
   1.292 +U_CAPI const char* U_EXPORT2
   1.293 +u_getPropertyName(UProperty property,
   1.294 +                  UPropertyNameChoice nameChoice) {
   1.295 +    U_NAMESPACE_USE
   1.296 +    return PropNameData::getPropertyName(property, nameChoice);
   1.297 +}
   1.298 +
   1.299 +U_CAPI UProperty U_EXPORT2
   1.300 +u_getPropertyEnum(const char* alias) {
   1.301 +    U_NAMESPACE_USE
   1.302 +    return (UProperty)PropNameData::getPropertyEnum(alias);
   1.303 +}
   1.304 +
   1.305 +U_CAPI const char* U_EXPORT2
   1.306 +u_getPropertyValueName(UProperty property,
   1.307 +                       int32_t value,
   1.308 +                       UPropertyNameChoice nameChoice) {
   1.309 +    U_NAMESPACE_USE
   1.310 +    return PropNameData::getPropertyValueName(property, value, nameChoice);
   1.311 +}
   1.312 +
   1.313 +U_CAPI int32_t U_EXPORT2
   1.314 +u_getPropertyValueEnum(UProperty property,
   1.315 +                       const char* alias) {
   1.316 +    U_NAMESPACE_USE
   1.317 +    return PropNameData::getPropertyValueEnum(property, alias);
   1.318 +}
   1.319 +
   1.320 +U_CAPI const char*  U_EXPORT2
   1.321 +uscript_getName(UScriptCode scriptCode){
   1.322 +    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
   1.323 +                                  U_LONG_PROPERTY_NAME);
   1.324 +}
   1.325 +
   1.326 +U_CAPI const char*  U_EXPORT2
   1.327 +uscript_getShortName(UScriptCode scriptCode){
   1.328 +    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
   1.329 +                                  U_SHORT_PROPERTY_NAME);
   1.330 +}

mercurial