michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2002-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Author: Alan Liu michael@0: * Created: October 30 2002 michael@0: * Since: ICU 2.4 michael@0: * 2010nov19 Markus Scherer Rewrite for formatVersion 2. michael@0: ********************************************************************** michael@0: */ michael@0: #include "propname.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/udata.h" michael@0: #include "unicode/uscript.h" michael@0: #include "umutex.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "ucln_cmn.h" michael@0: #include "uarrsort.h" michael@0: #include "uinvchar.h" michael@0: michael@0: #define INCLUDED_FROM_PROPNAME_CPP michael@0: #include "propname_data.h" michael@0: michael@0: U_CDECL_BEGIN michael@0: michael@0: /** michael@0: * Get the next non-ignorable ASCII character from a property name michael@0: * and lowercases it. michael@0: * @return ((advance count for the name)<<8)|character michael@0: */ michael@0: static inline int32_t michael@0: getASCIIPropertyNameChar(const char *name) { michael@0: int32_t i; michael@0: char c; michael@0: michael@0: /* Ignore delimiters '-', '_', and ASCII White_Space */ michael@0: for(i=0; michael@0: (c=name[i++])==0x2d || c==0x5f || michael@0: c==0x20 || (0x09<=c && c<=0x0d); michael@0: ) {} michael@0: michael@0: if(c!=0) { michael@0: return (i<<8)|(uint8_t)uprv_asciitolower((char)c); michael@0: } else { michael@0: return i<<8; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Get the next non-ignorable EBCDIC character from a property name michael@0: * and lowercases it. michael@0: * @return ((advance count for the name)<<8)|character michael@0: */ michael@0: static inline int32_t michael@0: getEBCDICPropertyNameChar(const char *name) { michael@0: int32_t i; michael@0: char c; michael@0: michael@0: /* Ignore delimiters '-', '_', and EBCDIC White_Space */ michael@0: for(i=0; michael@0: (c=name[i++])==0x60 || c==0x6d || michael@0: c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; michael@0: ) {} michael@0: michael@0: if(c!=0) { michael@0: return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); michael@0: } else { michael@0: return i<<8; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Unicode property names and property value names are compared "loosely". michael@0: * michael@0: * UCD.html 4.0.1 says: michael@0: * For all property names, property value names, and for property values for michael@0: * Enumerated, Binary, or Catalog properties, use the following michael@0: * loose matching rule: michael@0: * michael@0: * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. michael@0: * michael@0: * This function does just that, for (char *) name strings. michael@0: * It is almost identical to ucnv_compareNames() but also ignores michael@0: * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). michael@0: * michael@0: * @internal michael@0: */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { michael@0: int32_t rc, r1, r2; michael@0: michael@0: for(;;) { michael@0: r1=getASCIIPropertyNameChar(name1); michael@0: r2=getASCIIPropertyNameChar(name2); michael@0: michael@0: /* If we reach the ends of both strings then they match */ michael@0: if(((r1|r2)&0xff)==0) { michael@0: return 0; michael@0: } michael@0: michael@0: /* Compare the lowercased characters */ michael@0: if(r1!=r2) { michael@0: rc=(r1&0xff)-(r2&0xff); michael@0: if(rc!=0) { michael@0: return rc; michael@0: } michael@0: } michael@0: michael@0: name1+=r1>>8; michael@0: name2+=r2>>8; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { michael@0: int32_t rc, r1, r2; michael@0: michael@0: for(;;) { michael@0: r1=getEBCDICPropertyNameChar(name1); michael@0: r2=getEBCDICPropertyNameChar(name2); michael@0: michael@0: /* If we reach the ends of both strings then they match */ michael@0: if(((r1|r2)&0xff)==0) { michael@0: return 0; michael@0: } michael@0: michael@0: /* Compare the lowercased characters */ michael@0: if(r1!=r2) { michael@0: rc=(r1&0xff)-(r2&0xff); michael@0: if(rc!=0) { michael@0: return rc; michael@0: } michael@0: } michael@0: michael@0: name1+=r1>>8; michael@0: name2+=r2>>8; michael@0: } michael@0: } michael@0: michael@0: U_CDECL_END michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: int32_t PropNameData::findProperty(int32_t property) { michael@0: int32_t i=1; // valueMaps index, initially after numRanges michael@0: for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { michael@0: // Read and skip the start and limit of this range. michael@0: int32_t start=valueMaps[i]; michael@0: int32_t limit=valueMaps[i+1]; michael@0: i+=2; michael@0: if(property0; --numRanges) { michael@0: // Read and skip the start and limit of this range. michael@0: int32_t start=valueMaps[valueMapIndex]; michael@0: int32_t limit=valueMaps[valueMapIndex+1]; michael@0: valueMapIndex+=2; michael@0: if(value0; --nameIndex) { michael@0: nameGroup=uprv_strchr(nameGroup, 0)+1; michael@0: } michael@0: if(*nameGroup==0) { michael@0: return NULL; // no name (Property[Value]Aliases.txt has "n/a") michael@0: } michael@0: return nameGroup; michael@0: } michael@0: michael@0: UBool PropNameData::containsName(BytesTrie &trie, const char *name) { michael@0: if(name==NULL) { michael@0: return FALSE; michael@0: } michael@0: UStringTrieResult result=USTRINGTRIE_NO_VALUE; michael@0: char c; michael@0: while((c=*name++)!=0) { michael@0: c=uprv_invCharToLowercaseAscii(c); michael@0: // Ignore delimiters '-', '_', and ASCII White_Space. michael@0: if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { michael@0: continue; michael@0: } michael@0: if(!USTRINGTRIE_HAS_NEXT(result)) { michael@0: return FALSE; michael@0: } michael@0: result=trie.next((uint8_t)c); michael@0: } michael@0: return USTRINGTRIE_HAS_VALUE(result); michael@0: } michael@0: michael@0: const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { michael@0: int32_t valueMapIndex=findProperty(property); michael@0: if(valueMapIndex==0) { michael@0: return NULL; // Not a known property. michael@0: } michael@0: return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); michael@0: } michael@0: michael@0: const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { michael@0: int32_t valueMapIndex=findProperty(property); michael@0: if(valueMapIndex==0) { michael@0: return NULL; // Not a known property. michael@0: } michael@0: int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); michael@0: if(nameGroupOffset==0) { michael@0: return NULL; michael@0: } michael@0: return getName(nameGroups+nameGroupOffset, nameChoice); michael@0: } michael@0: michael@0: int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { michael@0: BytesTrie trie(bytesTries+bytesTrieOffset); michael@0: if(containsName(trie, alias)) { michael@0: return trie.getValue(); michael@0: } else { michael@0: return UCHAR_INVALID_CODE; michael@0: } michael@0: } michael@0: michael@0: int32_t PropNameData::getPropertyEnum(const char *alias) { michael@0: return getPropertyOrValueEnum(0, alias); michael@0: } michael@0: michael@0: int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { michael@0: int32_t valueMapIndex=findProperty(property); michael@0: if(valueMapIndex==0) { michael@0: return UCHAR_INVALID_CODE; // Not a known property. michael@0: } michael@0: valueMapIndex=valueMaps[valueMapIndex+1]; michael@0: if(valueMapIndex==0) { michael@0: return UCHAR_INVALID_CODE; // The property does not have named values. michael@0: } michael@0: // valueMapIndex is the start of the property's valueMap, michael@0: // where the first word is the BytesTrie offset. michael@0: return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); michael@0: } michael@0: U_NAMESPACE_END michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // Public API implementation michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: u_getPropertyName(UProperty property, michael@0: UPropertyNameChoice nameChoice) { michael@0: U_NAMESPACE_USE michael@0: return PropNameData::getPropertyName(property, nameChoice); michael@0: } michael@0: michael@0: U_CAPI UProperty U_EXPORT2 michael@0: u_getPropertyEnum(const char* alias) { michael@0: U_NAMESPACE_USE michael@0: return (UProperty)PropNameData::getPropertyEnum(alias); michael@0: } michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: u_getPropertyValueName(UProperty property, michael@0: int32_t value, michael@0: UPropertyNameChoice nameChoice) { michael@0: U_NAMESPACE_USE michael@0: return PropNameData::getPropertyValueName(property, value, nameChoice); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: u_getPropertyValueEnum(UProperty property, michael@0: const char* alias) { michael@0: U_NAMESPACE_USE michael@0: return PropNameData::getPropertyValueEnum(property, alias); michael@0: } michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: uscript_getName(UScriptCode scriptCode){ michael@0: return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, michael@0: U_LONG_PROPERTY_NAME); michael@0: } michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: uscript_getShortName(UScriptCode scriptCode){ michael@0: return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, michael@0: U_SHORT_PROPERTY_NAME); michael@0: }