intl/icu/source/common/propname.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (c) 2002-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Author: Alan Liu
michael@0 7 * Created: October 30 2002
michael@0 8 * Since: ICU 2.4
michael@0 9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
michael@0 10 **********************************************************************
michael@0 11 */
michael@0 12 #include "propname.h"
michael@0 13 #include "unicode/uchar.h"
michael@0 14 #include "unicode/udata.h"
michael@0 15 #include "unicode/uscript.h"
michael@0 16 #include "umutex.h"
michael@0 17 #include "cmemory.h"
michael@0 18 #include "cstring.h"
michael@0 19 #include "ucln_cmn.h"
michael@0 20 #include "uarrsort.h"
michael@0 21 #include "uinvchar.h"
michael@0 22
michael@0 23 #define INCLUDED_FROM_PROPNAME_CPP
michael@0 24 #include "propname_data.h"
michael@0 25
michael@0 26 U_CDECL_BEGIN
michael@0 27
michael@0 28 /**
michael@0 29 * Get the next non-ignorable ASCII character from a property name
michael@0 30 * and lowercases it.
michael@0 31 * @return ((advance count for the name)<<8)|character
michael@0 32 */
michael@0 33 static inline int32_t
michael@0 34 getASCIIPropertyNameChar(const char *name) {
michael@0 35 int32_t i;
michael@0 36 char c;
michael@0 37
michael@0 38 /* Ignore delimiters '-', '_', and ASCII White_Space */
michael@0 39 for(i=0;
michael@0 40 (c=name[i++])==0x2d || c==0x5f ||
michael@0 41 c==0x20 || (0x09<=c && c<=0x0d);
michael@0 42 ) {}
michael@0 43
michael@0 44 if(c!=0) {
michael@0 45 return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
michael@0 46 } else {
michael@0 47 return i<<8;
michael@0 48 }
michael@0 49 }
michael@0 50
michael@0 51 /**
michael@0 52 * Get the next non-ignorable EBCDIC character from a property name
michael@0 53 * and lowercases it.
michael@0 54 * @return ((advance count for the name)<<8)|character
michael@0 55 */
michael@0 56 static inline int32_t
michael@0 57 getEBCDICPropertyNameChar(const char *name) {
michael@0 58 int32_t i;
michael@0 59 char c;
michael@0 60
michael@0 61 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
michael@0 62 for(i=0;
michael@0 63 (c=name[i++])==0x60 || c==0x6d ||
michael@0 64 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
michael@0 65 ) {}
michael@0 66
michael@0 67 if(c!=0) {
michael@0 68 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
michael@0 69 } else {
michael@0 70 return i<<8;
michael@0 71 }
michael@0 72 }
michael@0 73
michael@0 74 /**
michael@0 75 * Unicode property names and property value names are compared "loosely".
michael@0 76 *
michael@0 77 * UCD.html 4.0.1 says:
michael@0 78 * For all property names, property value names, and for property values for
michael@0 79 * Enumerated, Binary, or Catalog properties, use the following
michael@0 80 * loose matching rule:
michael@0 81 *
michael@0 82 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
michael@0 83 *
michael@0 84 * This function does just that, for (char *) name strings.
michael@0 85 * It is almost identical to ucnv_compareNames() but also ignores
michael@0 86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
michael@0 87 *
michael@0 88 * @internal
michael@0 89 */
michael@0 90
michael@0 91 U_CAPI int32_t U_EXPORT2
michael@0 92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
michael@0 93 int32_t rc, r1, r2;
michael@0 94
michael@0 95 for(;;) {
michael@0 96 r1=getASCIIPropertyNameChar(name1);
michael@0 97 r2=getASCIIPropertyNameChar(name2);
michael@0 98
michael@0 99 /* If we reach the ends of both strings then they match */
michael@0 100 if(((r1|r2)&0xff)==0) {
michael@0 101 return 0;
michael@0 102 }
michael@0 103
michael@0 104 /* Compare the lowercased characters */
michael@0 105 if(r1!=r2) {
michael@0 106 rc=(r1&0xff)-(r2&0xff);
michael@0 107 if(rc!=0) {
michael@0 108 return rc;
michael@0 109 }
michael@0 110 }
michael@0 111
michael@0 112 name1+=r1>>8;
michael@0 113 name2+=r2>>8;
michael@0 114 }
michael@0 115 }
michael@0 116
michael@0 117 U_CAPI int32_t U_EXPORT2
michael@0 118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
michael@0 119 int32_t rc, r1, r2;
michael@0 120
michael@0 121 for(;;) {
michael@0 122 r1=getEBCDICPropertyNameChar(name1);
michael@0 123 r2=getEBCDICPropertyNameChar(name2);
michael@0 124
michael@0 125 /* If we reach the ends of both strings then they match */
michael@0 126 if(((r1|r2)&0xff)==0) {
michael@0 127 return 0;
michael@0 128 }
michael@0 129
michael@0 130 /* Compare the lowercased characters */
michael@0 131 if(r1!=r2) {
michael@0 132 rc=(r1&0xff)-(r2&0xff);
michael@0 133 if(rc!=0) {
michael@0 134 return rc;
michael@0 135 }
michael@0 136 }
michael@0 137
michael@0 138 name1+=r1>>8;
michael@0 139 name2+=r2>>8;
michael@0 140 }
michael@0 141 }
michael@0 142
michael@0 143 U_CDECL_END
michael@0 144
michael@0 145 U_NAMESPACE_BEGIN
michael@0 146
michael@0 147 int32_t PropNameData::findProperty(int32_t property) {
michael@0 148 int32_t i=1; // valueMaps index, initially after numRanges
michael@0 149 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
michael@0 150 // Read and skip the start and limit of this range.
michael@0 151 int32_t start=valueMaps[i];
michael@0 152 int32_t limit=valueMaps[i+1];
michael@0 153 i+=2;
michael@0 154 if(property<start) {
michael@0 155 break;
michael@0 156 }
michael@0 157 if(property<limit) {
michael@0 158 return i+(property-start)*2;
michael@0 159 }
michael@0 160 i+=(limit-start)*2; // Skip all entries for this range.
michael@0 161 }
michael@0 162 return 0;
michael@0 163 }
michael@0 164
michael@0 165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
michael@0 166 if(valueMapIndex==0) {
michael@0 167 return 0; // The property does not have named values.
michael@0 168 }
michael@0 169 ++valueMapIndex; // Skip the BytesTrie offset.
michael@0 170 int32_t numRanges=valueMaps[valueMapIndex++];
michael@0 171 if(numRanges<0x10) {
michael@0 172 // Ranges of values.
michael@0 173 for(; numRanges>0; --numRanges) {
michael@0 174 // Read and skip the start and limit of this range.
michael@0 175 int32_t start=valueMaps[valueMapIndex];
michael@0 176 int32_t limit=valueMaps[valueMapIndex+1];
michael@0 177 valueMapIndex+=2;
michael@0 178 if(value<start) {
michael@0 179 break;
michael@0 180 }
michael@0 181 if(value<limit) {
michael@0 182 return valueMaps[valueMapIndex+value-start];
michael@0 183 }
michael@0 184 valueMapIndex+=limit-start; // Skip all entries for this range.
michael@0 185 }
michael@0 186 } else {
michael@0 187 // List of values.
michael@0 188 int32_t valuesStart=valueMapIndex;
michael@0 189 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
michael@0 190 do {
michael@0 191 int32_t v=valueMaps[valueMapIndex];
michael@0 192 if(value<v) {
michael@0 193 break;
michael@0 194 }
michael@0 195 if(value==v) {
michael@0 196 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
michael@0 197 }
michael@0 198 } while(++valueMapIndex<nameGroupOffsetsStart);
michael@0 199 }
michael@0 200 return 0;
michael@0 201 }
michael@0 202
michael@0 203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
michael@0 204 int32_t numNames=*nameGroup++;
michael@0 205 if(nameIndex<0 || numNames<=nameIndex) {
michael@0 206 return NULL;
michael@0 207 }
michael@0 208 // Skip nameIndex names.
michael@0 209 for(; nameIndex>0; --nameIndex) {
michael@0 210 nameGroup=uprv_strchr(nameGroup, 0)+1;
michael@0 211 }
michael@0 212 if(*nameGroup==0) {
michael@0 213 return NULL; // no name (Property[Value]Aliases.txt has "n/a")
michael@0 214 }
michael@0 215 return nameGroup;
michael@0 216 }
michael@0 217
michael@0 218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
michael@0 219 if(name==NULL) {
michael@0 220 return FALSE;
michael@0 221 }
michael@0 222 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
michael@0 223 char c;
michael@0 224 while((c=*name++)!=0) {
michael@0 225 c=uprv_invCharToLowercaseAscii(c);
michael@0 226 // Ignore delimiters '-', '_', and ASCII White_Space.
michael@0 227 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
michael@0 228 continue;
michael@0 229 }
michael@0 230 if(!USTRINGTRIE_HAS_NEXT(result)) {
michael@0 231 return FALSE;
michael@0 232 }
michael@0 233 result=trie.next((uint8_t)c);
michael@0 234 }
michael@0 235 return USTRINGTRIE_HAS_VALUE(result);
michael@0 236 }
michael@0 237
michael@0 238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
michael@0 239 int32_t valueMapIndex=findProperty(property);
michael@0 240 if(valueMapIndex==0) {
michael@0 241 return NULL; // Not a known property.
michael@0 242 }
michael@0 243 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
michael@0 244 }
michael@0 245
michael@0 246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
michael@0 247 int32_t valueMapIndex=findProperty(property);
michael@0 248 if(valueMapIndex==0) {
michael@0 249 return NULL; // Not a known property.
michael@0 250 }
michael@0 251 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
michael@0 252 if(nameGroupOffset==0) {
michael@0 253 return NULL;
michael@0 254 }
michael@0 255 return getName(nameGroups+nameGroupOffset, nameChoice);
michael@0 256 }
michael@0 257
michael@0 258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
michael@0 259 BytesTrie trie(bytesTries+bytesTrieOffset);
michael@0 260 if(containsName(trie, alias)) {
michael@0 261 return trie.getValue();
michael@0 262 } else {
michael@0 263 return UCHAR_INVALID_CODE;
michael@0 264 }
michael@0 265 }
michael@0 266
michael@0 267 int32_t PropNameData::getPropertyEnum(const char *alias) {
michael@0 268 return getPropertyOrValueEnum(0, alias);
michael@0 269 }
michael@0 270
michael@0 271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
michael@0 272 int32_t valueMapIndex=findProperty(property);
michael@0 273 if(valueMapIndex==0) {
michael@0 274 return UCHAR_INVALID_CODE; // Not a known property.
michael@0 275 }
michael@0 276 valueMapIndex=valueMaps[valueMapIndex+1];
michael@0 277 if(valueMapIndex==0) {
michael@0 278 return UCHAR_INVALID_CODE; // The property does not have named values.
michael@0 279 }
michael@0 280 // valueMapIndex is the start of the property's valueMap,
michael@0 281 // where the first word is the BytesTrie offset.
michael@0 282 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
michael@0 283 }
michael@0 284 U_NAMESPACE_END
michael@0 285
michael@0 286 //----------------------------------------------------------------------
michael@0 287 // Public API implementation
michael@0 288
michael@0 289 U_CAPI const char* U_EXPORT2
michael@0 290 u_getPropertyName(UProperty property,
michael@0 291 UPropertyNameChoice nameChoice) {
michael@0 292 U_NAMESPACE_USE
michael@0 293 return PropNameData::getPropertyName(property, nameChoice);
michael@0 294 }
michael@0 295
michael@0 296 U_CAPI UProperty U_EXPORT2
michael@0 297 u_getPropertyEnum(const char* alias) {
michael@0 298 U_NAMESPACE_USE
michael@0 299 return (UProperty)PropNameData::getPropertyEnum(alias);
michael@0 300 }
michael@0 301
michael@0 302 U_CAPI const char* U_EXPORT2
michael@0 303 u_getPropertyValueName(UProperty property,
michael@0 304 int32_t value,
michael@0 305 UPropertyNameChoice nameChoice) {
michael@0 306 U_NAMESPACE_USE
michael@0 307 return PropNameData::getPropertyValueName(property, value, nameChoice);
michael@0 308 }
michael@0 309
michael@0 310 U_CAPI int32_t U_EXPORT2
michael@0 311 u_getPropertyValueEnum(UProperty property,
michael@0 312 const char* alias) {
michael@0 313 U_NAMESPACE_USE
michael@0 314 return PropNameData::getPropertyValueEnum(property, alias);
michael@0 315 }
michael@0 316
michael@0 317 U_CAPI const char* U_EXPORT2
michael@0 318 uscript_getName(UScriptCode scriptCode){
michael@0 319 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
michael@0 320 U_LONG_PROPERTY_NAME);
michael@0 321 }
michael@0 322
michael@0 323 U_CAPI const char* U_EXPORT2
michael@0 324 uscript_getShortName(UScriptCode scriptCode){
michael@0 325 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
michael@0 326 U_SHORT_PROPERTY_NAME);
michael@0 327 }

mercurial