1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/propname.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,327 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2002-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Author: Alan Liu 1.10 +* Created: October 30 2002 1.11 +* Since: ICU 2.4 1.12 +* 2010nov19 Markus Scherer Rewrite for formatVersion 2. 1.13 +********************************************************************** 1.14 +*/ 1.15 +#include "propname.h" 1.16 +#include "unicode/uchar.h" 1.17 +#include "unicode/udata.h" 1.18 +#include "unicode/uscript.h" 1.19 +#include "umutex.h" 1.20 +#include "cmemory.h" 1.21 +#include "cstring.h" 1.22 +#include "ucln_cmn.h" 1.23 +#include "uarrsort.h" 1.24 +#include "uinvchar.h" 1.25 + 1.26 +#define INCLUDED_FROM_PROPNAME_CPP 1.27 +#include "propname_data.h" 1.28 + 1.29 +U_CDECL_BEGIN 1.30 + 1.31 +/** 1.32 + * Get the next non-ignorable ASCII character from a property name 1.33 + * and lowercases it. 1.34 + * @return ((advance count for the name)<<8)|character 1.35 + */ 1.36 +static inline int32_t 1.37 +getASCIIPropertyNameChar(const char *name) { 1.38 + int32_t i; 1.39 + char c; 1.40 + 1.41 + /* Ignore delimiters '-', '_', and ASCII White_Space */ 1.42 + for(i=0; 1.43 + (c=name[i++])==0x2d || c==0x5f || 1.44 + c==0x20 || (0x09<=c && c<=0x0d); 1.45 + ) {} 1.46 + 1.47 + if(c!=0) { 1.48 + return (i<<8)|(uint8_t)uprv_asciitolower((char)c); 1.49 + } else { 1.50 + return i<<8; 1.51 + } 1.52 +} 1.53 + 1.54 +/** 1.55 + * Get the next non-ignorable EBCDIC character from a property name 1.56 + * and lowercases it. 1.57 + * @return ((advance count for the name)<<8)|character 1.58 + */ 1.59 +static inline int32_t 1.60 +getEBCDICPropertyNameChar(const char *name) { 1.61 + int32_t i; 1.62 + char c; 1.63 + 1.64 + /* Ignore delimiters '-', '_', and EBCDIC White_Space */ 1.65 + for(i=0; 1.66 + (c=name[i++])==0x60 || c==0x6d || 1.67 + c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; 1.68 + ) {} 1.69 + 1.70 + if(c!=0) { 1.71 + return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); 1.72 + } else { 1.73 + return i<<8; 1.74 + } 1.75 +} 1.76 + 1.77 +/** 1.78 + * Unicode property names and property value names are compared "loosely". 1.79 + * 1.80 + * UCD.html 4.0.1 says: 1.81 + * For all property names, property value names, and for property values for 1.82 + * Enumerated, Binary, or Catalog properties, use the following 1.83 + * loose matching rule: 1.84 + * 1.85 + * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. 1.86 + * 1.87 + * This function does just that, for (char *) name strings. 1.88 + * It is almost identical to ucnv_compareNames() but also ignores 1.89 + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). 1.90 + * 1.91 + * @internal 1.92 + */ 1.93 + 1.94 +U_CAPI int32_t U_EXPORT2 1.95 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { 1.96 + int32_t rc, r1, r2; 1.97 + 1.98 + for(;;) { 1.99 + r1=getASCIIPropertyNameChar(name1); 1.100 + r2=getASCIIPropertyNameChar(name2); 1.101 + 1.102 + /* If we reach the ends of both strings then they match */ 1.103 + if(((r1|r2)&0xff)==0) { 1.104 + return 0; 1.105 + } 1.106 + 1.107 + /* Compare the lowercased characters */ 1.108 + if(r1!=r2) { 1.109 + rc=(r1&0xff)-(r2&0xff); 1.110 + if(rc!=0) { 1.111 + return rc; 1.112 + } 1.113 + } 1.114 + 1.115 + name1+=r1>>8; 1.116 + name2+=r2>>8; 1.117 + } 1.118 +} 1.119 + 1.120 +U_CAPI int32_t U_EXPORT2 1.121 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { 1.122 + int32_t rc, r1, r2; 1.123 + 1.124 + for(;;) { 1.125 + r1=getEBCDICPropertyNameChar(name1); 1.126 + r2=getEBCDICPropertyNameChar(name2); 1.127 + 1.128 + /* If we reach the ends of both strings then they match */ 1.129 + if(((r1|r2)&0xff)==0) { 1.130 + return 0; 1.131 + } 1.132 + 1.133 + /* Compare the lowercased characters */ 1.134 + if(r1!=r2) { 1.135 + rc=(r1&0xff)-(r2&0xff); 1.136 + if(rc!=0) { 1.137 + return rc; 1.138 + } 1.139 + } 1.140 + 1.141 + name1+=r1>>8; 1.142 + name2+=r2>>8; 1.143 + } 1.144 +} 1.145 + 1.146 +U_CDECL_END 1.147 + 1.148 +U_NAMESPACE_BEGIN 1.149 + 1.150 +int32_t PropNameData::findProperty(int32_t property) { 1.151 + int32_t i=1; // valueMaps index, initially after numRanges 1.152 + for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { 1.153 + // Read and skip the start and limit of this range. 1.154 + int32_t start=valueMaps[i]; 1.155 + int32_t limit=valueMaps[i+1]; 1.156 + i+=2; 1.157 + if(property<start) { 1.158 + break; 1.159 + } 1.160 + if(property<limit) { 1.161 + return i+(property-start)*2; 1.162 + } 1.163 + i+=(limit-start)*2; // Skip all entries for this range. 1.164 + } 1.165 + return 0; 1.166 +} 1.167 + 1.168 +int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { 1.169 + if(valueMapIndex==0) { 1.170 + return 0; // The property does not have named values. 1.171 + } 1.172 + ++valueMapIndex; // Skip the BytesTrie offset. 1.173 + int32_t numRanges=valueMaps[valueMapIndex++]; 1.174 + if(numRanges<0x10) { 1.175 + // Ranges of values. 1.176 + for(; numRanges>0; --numRanges) { 1.177 + // Read and skip the start and limit of this range. 1.178 + int32_t start=valueMaps[valueMapIndex]; 1.179 + int32_t limit=valueMaps[valueMapIndex+1]; 1.180 + valueMapIndex+=2; 1.181 + if(value<start) { 1.182 + break; 1.183 + } 1.184 + if(value<limit) { 1.185 + return valueMaps[valueMapIndex+value-start]; 1.186 + } 1.187 + valueMapIndex+=limit-start; // Skip all entries for this range. 1.188 + } 1.189 + } else { 1.190 + // List of values. 1.191 + int32_t valuesStart=valueMapIndex; 1.192 + int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 1.193 + do { 1.194 + int32_t v=valueMaps[valueMapIndex]; 1.195 + if(value<v) { 1.196 + break; 1.197 + } 1.198 + if(value==v) { 1.199 + return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 1.200 + } 1.201 + } while(++valueMapIndex<nameGroupOffsetsStart); 1.202 + } 1.203 + return 0; 1.204 +} 1.205 + 1.206 +const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { 1.207 + int32_t numNames=*nameGroup++; 1.208 + if(nameIndex<0 || numNames<=nameIndex) { 1.209 + return NULL; 1.210 + } 1.211 + // Skip nameIndex names. 1.212 + for(; nameIndex>0; --nameIndex) { 1.213 + nameGroup=uprv_strchr(nameGroup, 0)+1; 1.214 + } 1.215 + if(*nameGroup==0) { 1.216 + return NULL; // no name (Property[Value]Aliases.txt has "n/a") 1.217 + } 1.218 + return nameGroup; 1.219 +} 1.220 + 1.221 +UBool PropNameData::containsName(BytesTrie &trie, const char *name) { 1.222 + if(name==NULL) { 1.223 + return FALSE; 1.224 + } 1.225 + UStringTrieResult result=USTRINGTRIE_NO_VALUE; 1.226 + char c; 1.227 + while((c=*name++)!=0) { 1.228 + c=uprv_invCharToLowercaseAscii(c); 1.229 + // Ignore delimiters '-', '_', and ASCII White_Space. 1.230 + if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { 1.231 + continue; 1.232 + } 1.233 + if(!USTRINGTRIE_HAS_NEXT(result)) { 1.234 + return FALSE; 1.235 + } 1.236 + result=trie.next((uint8_t)c); 1.237 + } 1.238 + return USTRINGTRIE_HAS_VALUE(result); 1.239 +} 1.240 + 1.241 +const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { 1.242 + int32_t valueMapIndex=findProperty(property); 1.243 + if(valueMapIndex==0) { 1.244 + return NULL; // Not a known property. 1.245 + } 1.246 + return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); 1.247 +} 1.248 + 1.249 +const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { 1.250 + int32_t valueMapIndex=findProperty(property); 1.251 + if(valueMapIndex==0) { 1.252 + return NULL; // Not a known property. 1.253 + } 1.254 + int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 1.255 + if(nameGroupOffset==0) { 1.256 + return NULL; 1.257 + } 1.258 + return getName(nameGroups+nameGroupOffset, nameChoice); 1.259 +} 1.260 + 1.261 +int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { 1.262 + BytesTrie trie(bytesTries+bytesTrieOffset); 1.263 + if(containsName(trie, alias)) { 1.264 + return trie.getValue(); 1.265 + } else { 1.266 + return UCHAR_INVALID_CODE; 1.267 + } 1.268 +} 1.269 + 1.270 +int32_t PropNameData::getPropertyEnum(const char *alias) { 1.271 + return getPropertyOrValueEnum(0, alias); 1.272 +} 1.273 + 1.274 +int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { 1.275 + int32_t valueMapIndex=findProperty(property); 1.276 + if(valueMapIndex==0) { 1.277 + return UCHAR_INVALID_CODE; // Not a known property. 1.278 + } 1.279 + valueMapIndex=valueMaps[valueMapIndex+1]; 1.280 + if(valueMapIndex==0) { 1.281 + return UCHAR_INVALID_CODE; // The property does not have named values. 1.282 + } 1.283 + // valueMapIndex is the start of the property's valueMap, 1.284 + // where the first word is the BytesTrie offset. 1.285 + return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 1.286 +} 1.287 +U_NAMESPACE_END 1.288 + 1.289 +//---------------------------------------------------------------------- 1.290 +// Public API implementation 1.291 + 1.292 +U_CAPI const char* U_EXPORT2 1.293 +u_getPropertyName(UProperty property, 1.294 + UPropertyNameChoice nameChoice) { 1.295 + U_NAMESPACE_USE 1.296 + return PropNameData::getPropertyName(property, nameChoice); 1.297 +} 1.298 + 1.299 +U_CAPI UProperty U_EXPORT2 1.300 +u_getPropertyEnum(const char* alias) { 1.301 + U_NAMESPACE_USE 1.302 + return (UProperty)PropNameData::getPropertyEnum(alias); 1.303 +} 1.304 + 1.305 +U_CAPI const char* U_EXPORT2 1.306 +u_getPropertyValueName(UProperty property, 1.307 + int32_t value, 1.308 + UPropertyNameChoice nameChoice) { 1.309 + U_NAMESPACE_USE 1.310 + return PropNameData::getPropertyValueName(property, value, nameChoice); 1.311 +} 1.312 + 1.313 +U_CAPI int32_t U_EXPORT2 1.314 +u_getPropertyValueEnum(UProperty property, 1.315 + const char* alias) { 1.316 + U_NAMESPACE_USE 1.317 + return PropNameData::getPropertyValueEnum(property, alias); 1.318 +} 1.319 + 1.320 +U_CAPI const char* U_EXPORT2 1.321 +uscript_getName(UScriptCode scriptCode){ 1.322 + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 1.323 + U_LONG_PROPERTY_NAME); 1.324 +} 1.325 + 1.326 +U_CAPI const char* U_EXPORT2 1.327 +uscript_getShortName(UScriptCode scriptCode){ 1.328 + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 1.329 + U_SHORT_PROPERTY_NAME); 1.330 +}