michael@0: /*
michael@0: **********************************************************************
michael@0: * Copyright (c) 2002-2011, International Business Machines
michael@0: * Corporation and others.  All Rights Reserved.
michael@0: **********************************************************************
michael@0: * Author: Alan Liu
michael@0: * Created: October 30 2002
michael@0: * Since: ICU 2.4
michael@0: * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
michael@0: **********************************************************************
michael@0: */
michael@0: #include "propname.h"
michael@0: #include "unicode/uchar.h"
michael@0: #include "unicode/udata.h"
michael@0: #include "unicode/uscript.h"
michael@0: #include "umutex.h"
michael@0: #include "cmemory.h"
michael@0: #include "cstring.h"
michael@0: #include "ucln_cmn.h"
michael@0: #include "uarrsort.h"
michael@0: #include "uinvchar.h"
michael@0: 
michael@0: #define INCLUDED_FROM_PROPNAME_CPP
michael@0: #include "propname_data.h"
michael@0: 
michael@0: U_CDECL_BEGIN
michael@0: 
michael@0: /**
michael@0:  * Get the next non-ignorable ASCII character from a property name
michael@0:  * and lowercases it.
michael@0:  * @return ((advance count for the name)<<8)|character
michael@0:  */
michael@0: static inline int32_t
michael@0: getASCIIPropertyNameChar(const char *name) {
michael@0:     int32_t i;
michael@0:     char c;
michael@0: 
michael@0:     /* Ignore delimiters '-', '_', and ASCII White_Space */
michael@0:     for(i=0;
michael@0:         (c=name[i++])==0x2d || c==0x5f ||
michael@0:         c==0x20 || (0x09<=c && c<=0x0d);
michael@0:     ) {}
michael@0: 
michael@0:     if(c!=0) {
michael@0:         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
michael@0:     } else {
michael@0:         return i<<8;
michael@0:     }
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Get the next non-ignorable EBCDIC character from a property name
michael@0:  * and lowercases it.
michael@0:  * @return ((advance count for the name)<<8)|character
michael@0:  */
michael@0: static inline int32_t
michael@0: getEBCDICPropertyNameChar(const char *name) {
michael@0:     int32_t i;
michael@0:     char c;
michael@0: 
michael@0:     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
michael@0:     for(i=0;
michael@0:         (c=name[i++])==0x60 || c==0x6d ||
michael@0:         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
michael@0:     ) {}
michael@0: 
michael@0:     if(c!=0) {
michael@0:         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
michael@0:     } else {
michael@0:         return i<<8;
michael@0:     }
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Unicode property names and property value names are compared "loosely".
michael@0:  *
michael@0:  * UCD.html 4.0.1 says:
michael@0:  *   For all property names, property value names, and for property values for
michael@0:  *   Enumerated, Binary, or Catalog properties, use the following
michael@0:  *   loose matching rule:
michael@0:  *
michael@0:  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
michael@0:  *
michael@0:  * This function does just that, for (char *) name strings.
michael@0:  * It is almost identical to ucnv_compareNames() but also ignores
michael@0:  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
michael@0:  *
michael@0:  * @internal
michael@0:  */
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
michael@0:     int32_t rc, r1, r2;
michael@0: 
michael@0:     for(;;) {
michael@0:         r1=getASCIIPropertyNameChar(name1);
michael@0:         r2=getASCIIPropertyNameChar(name2);
michael@0: 
michael@0:         /* If we reach the ends of both strings then they match */
michael@0:         if(((r1|r2)&0xff)==0) {
michael@0:             return 0;
michael@0:         }
michael@0: 
michael@0:         /* Compare the lowercased characters */
michael@0:         if(r1!=r2) {
michael@0:             rc=(r1&0xff)-(r2&0xff);
michael@0:             if(rc!=0) {
michael@0:                 return rc;
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         name1+=r1>>8;
michael@0:         name2+=r2>>8;
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
michael@0:     int32_t rc, r1, r2;
michael@0: 
michael@0:     for(;;) {
michael@0:         r1=getEBCDICPropertyNameChar(name1);
michael@0:         r2=getEBCDICPropertyNameChar(name2);
michael@0: 
michael@0:         /* If we reach the ends of both strings then they match */
michael@0:         if(((r1|r2)&0xff)==0) {
michael@0:             return 0;
michael@0:         }
michael@0: 
michael@0:         /* Compare the lowercased characters */
michael@0:         if(r1!=r2) {
michael@0:             rc=(r1&0xff)-(r2&0xff);
michael@0:             if(rc!=0) {
michael@0:                 return rc;
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         name1+=r1>>8;
michael@0:         name2+=r2>>8;
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CDECL_END
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: int32_t PropNameData::findProperty(int32_t property) {
michael@0:     int32_t i=1;  // valueMaps index, initially after numRanges
michael@0:     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
michael@0:         // Read and skip the start and limit of this range.
michael@0:         int32_t start=valueMaps[i];
michael@0:         int32_t limit=valueMaps[i+1];
michael@0:         i+=2;
michael@0:         if(property<start) {
michael@0:             break;
michael@0:         }
michael@0:         if(property<limit) {
michael@0:             return i+(property-start)*2;
michael@0:         }
michael@0:         i+=(limit-start)*2;  // Skip all entries for this range.
michael@0:     }
michael@0:     return 0;
michael@0: }
michael@0: 
michael@0: int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
michael@0:     if(valueMapIndex==0) {
michael@0:         return 0;  // The property does not have named values.
michael@0:     }
michael@0:     ++valueMapIndex;  // Skip the BytesTrie offset.
michael@0:     int32_t numRanges=valueMaps[valueMapIndex++];
michael@0:     if(numRanges<0x10) {
michael@0:         // Ranges of values.
michael@0:         for(; numRanges>0; --numRanges) {
michael@0:             // Read and skip the start and limit of this range.
michael@0:             int32_t start=valueMaps[valueMapIndex];
michael@0:             int32_t limit=valueMaps[valueMapIndex+1];
michael@0:             valueMapIndex+=2;
michael@0:             if(value<start) {
michael@0:                 break;
michael@0:             }
michael@0:             if(value<limit) {
michael@0:                 return valueMaps[valueMapIndex+value-start];
michael@0:             }
michael@0:             valueMapIndex+=limit-start;  // Skip all entries for this range.
michael@0:         }
michael@0:     } else {
michael@0:         // List of values.
michael@0:         int32_t valuesStart=valueMapIndex;
michael@0:         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
michael@0:         do {
michael@0:             int32_t v=valueMaps[valueMapIndex];
michael@0:             if(value<v) {
michael@0:                 break;
michael@0:             }
michael@0:             if(value==v) {
michael@0:                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
michael@0:             }
michael@0:         } while(++valueMapIndex<nameGroupOffsetsStart);
michael@0:     }
michael@0:     return 0;
michael@0: }
michael@0: 
michael@0: const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
michael@0:     int32_t numNames=*nameGroup++;
michael@0:     if(nameIndex<0 || numNames<=nameIndex) {
michael@0:         return NULL;
michael@0:     }
michael@0:     // Skip nameIndex names.
michael@0:     for(; nameIndex>0; --nameIndex) {
michael@0:         nameGroup=uprv_strchr(nameGroup, 0)+1;
michael@0:     }
michael@0:     if(*nameGroup==0) {
michael@0:         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
michael@0:     }
michael@0:     return nameGroup;
michael@0: }
michael@0: 
michael@0: UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
michael@0:     if(name==NULL) {
michael@0:         return FALSE;
michael@0:     }
michael@0:     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
michael@0:     char c;
michael@0:     while((c=*name++)!=0) {
michael@0:         c=uprv_invCharToLowercaseAscii(c);
michael@0:         // Ignore delimiters '-', '_', and ASCII White_Space.
michael@0:         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
michael@0:             continue;
michael@0:         }
michael@0:         if(!USTRINGTRIE_HAS_NEXT(result)) {
michael@0:             return FALSE;
michael@0:         }
michael@0:         result=trie.next((uint8_t)c);
michael@0:     }
michael@0:     return USTRINGTRIE_HAS_VALUE(result);
michael@0: }
michael@0: 
michael@0: const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
michael@0:     int32_t valueMapIndex=findProperty(property);
michael@0:     if(valueMapIndex==0) {
michael@0:         return NULL;  // Not a known property.
michael@0:     }
michael@0:     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
michael@0: }
michael@0: 
michael@0: const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
michael@0:     int32_t valueMapIndex=findProperty(property);
michael@0:     if(valueMapIndex==0) {
michael@0:         return NULL;  // Not a known property.
michael@0:     }
michael@0:     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
michael@0:     if(nameGroupOffset==0) {
michael@0:         return NULL;
michael@0:     }
michael@0:     return getName(nameGroups+nameGroupOffset, nameChoice);
michael@0: }
michael@0: 
michael@0: int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
michael@0:     BytesTrie trie(bytesTries+bytesTrieOffset);
michael@0:     if(containsName(trie, alias)) {
michael@0:         return trie.getValue();
michael@0:     } else {
michael@0:         return UCHAR_INVALID_CODE;
michael@0:     }
michael@0: }
michael@0: 
michael@0: int32_t PropNameData::getPropertyEnum(const char *alias) {
michael@0:     return getPropertyOrValueEnum(0, alias);
michael@0: }
michael@0: 
michael@0: int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
michael@0:     int32_t valueMapIndex=findProperty(property);
michael@0:     if(valueMapIndex==0) {
michael@0:         return UCHAR_INVALID_CODE;  // Not a known property.
michael@0:     }
michael@0:     valueMapIndex=valueMaps[valueMapIndex+1];
michael@0:     if(valueMapIndex==0) {
michael@0:         return UCHAR_INVALID_CODE;  // The property does not have named values.
michael@0:     }
michael@0:     // valueMapIndex is the start of the property's valueMap,
michael@0:     // where the first word is the BytesTrie offset.
michael@0:     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
michael@0: }
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: //----------------------------------------------------------------------
michael@0: // Public API implementation
michael@0: 
michael@0: U_CAPI const char* U_EXPORT2
michael@0: u_getPropertyName(UProperty property,
michael@0:                   UPropertyNameChoice nameChoice) {
michael@0:     U_NAMESPACE_USE
michael@0:     return PropNameData::getPropertyName(property, nameChoice);
michael@0: }
michael@0: 
michael@0: U_CAPI UProperty U_EXPORT2
michael@0: u_getPropertyEnum(const char* alias) {
michael@0:     U_NAMESPACE_USE
michael@0:     return (UProperty)PropNameData::getPropertyEnum(alias);
michael@0: }
michael@0: 
michael@0: U_CAPI const char* U_EXPORT2
michael@0: u_getPropertyValueName(UProperty property,
michael@0:                        int32_t value,
michael@0:                        UPropertyNameChoice nameChoice) {
michael@0:     U_NAMESPACE_USE
michael@0:     return PropNameData::getPropertyValueName(property, value, nameChoice);
michael@0: }
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_getPropertyValueEnum(UProperty property,
michael@0:                        const char* alias) {
michael@0:     U_NAMESPACE_USE
michael@0:     return PropNameData::getPropertyValueEnum(property, alias);
michael@0: }
michael@0: 
michael@0: U_CAPI const char*  U_EXPORT2
michael@0: uscript_getName(UScriptCode scriptCode){
michael@0:     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
michael@0:                                   U_LONG_PROPERTY_NAME);
michael@0: }
michael@0: 
michael@0: U_CAPI const char*  U_EXPORT2
michael@0: uscript_getShortName(UScriptCode scriptCode){
michael@0:     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
michael@0:                                   U_SHORT_PROPERTY_NAME);
michael@0: }