1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unistr_case.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,180 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1999-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: unistr_case.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:2 1.15 +* 1.16 +* created on: 2004aug19 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Case-mapping functions moved here from unistr.cpp 1.20 +*/ 1.21 + 1.22 +#include "unicode/utypes.h" 1.23 +#include "unicode/putil.h" 1.24 +#include "cstring.h" 1.25 +#include "cmemory.h" 1.26 +#include "unicode/ustring.h" 1.27 +#include "unicode/unistr.h" 1.28 +#include "unicode/uchar.h" 1.29 +#include "uelement.h" 1.30 +#include "ustr_imp.h" 1.31 + 1.32 +U_NAMESPACE_BEGIN 1.33 + 1.34 +//======================================== 1.35 +// Read-only implementation 1.36 +//======================================== 1.37 + 1.38 +int8_t 1.39 +UnicodeString::doCaseCompare(int32_t start, 1.40 + int32_t length, 1.41 + const UChar *srcChars, 1.42 + int32_t srcStart, 1.43 + int32_t srcLength, 1.44 + uint32_t options) const 1.45 +{ 1.46 + // compare illegal string values 1.47 + // treat const UChar *srcChars==NULL as an empty string 1.48 + if(isBogus()) { 1.49 + return -1; 1.50 + } 1.51 + 1.52 + // pin indices to legal values 1.53 + pinIndices(start, length); 1.54 + 1.55 + if(srcChars == NULL) { 1.56 + srcStart = srcLength = 0; 1.57 + } 1.58 + 1.59 + // get the correct pointer 1.60 + const UChar *chars = getArrayStart(); 1.61 + 1.62 + chars += start; 1.63 + if(srcStart!=0) { 1.64 + srcChars += srcStart; 1.65 + } 1.66 + 1.67 + if(chars != srcChars) { 1.68 + UErrorCode errorCode=U_ZERO_ERROR; 1.69 + int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 1.70 + options|U_COMPARE_IGNORE_CASE, &errorCode); 1.71 + if(result!=0) { 1.72 + return (int8_t)(result >> 24 | 1); 1.73 + } 1.74 + } else { 1.75 + // get the srcLength if necessary 1.76 + if(srcLength < 0) { 1.77 + srcLength = u_strlen(srcChars + srcStart); 1.78 + } 1.79 + if(length != srcLength) { 1.80 + return (int8_t)((length - srcLength) >> 24 | 1); 1.81 + } 1.82 + } 1.83 + return 0; 1.84 +} 1.85 + 1.86 +//======================================== 1.87 +// Write implementation 1.88 +//======================================== 1.89 + 1.90 +UnicodeString & 1.91 +UnicodeString::caseMap(const UCaseMap *csm, 1.92 + UStringCaseMapper *stringCaseMapper) { 1.93 + if(isEmpty() || !isWritable()) { 1.94 + // nothing to do 1.95 + return *this; 1.96 + } 1.97 + 1.98 + // We need to allocate a new buffer for the internal string case mapping function. 1.99 + // This is very similar to how doReplace() keeps the old array pointer 1.100 + // and deletes the old array itself after it is done. 1.101 + // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 1.102 + UChar oldStackBuffer[US_STACKBUF_SIZE]; 1.103 + UChar *oldArray; 1.104 + int32_t oldLength; 1.105 + 1.106 + if(fFlags&kUsingStackBuffer) { 1.107 + // copy the stack buffer contents because it will be overwritten 1.108 + u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); 1.109 + oldArray = oldStackBuffer; 1.110 + oldLength = fShortLength; 1.111 + } else { 1.112 + oldArray = getArrayStart(); 1.113 + oldLength = length(); 1.114 + } 1.115 + 1.116 + int32_t capacity; 1.117 + if(oldLength <= US_STACKBUF_SIZE) { 1.118 + capacity = US_STACKBUF_SIZE; 1.119 + } else { 1.120 + capacity = oldLength + 20; 1.121 + } 1.122 + int32_t *bufferToDelete = 0; 1.123 + if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 1.124 + return *this; 1.125 + } 1.126 + 1.127 + // Case-map, and if the result is too long, then reallocate and repeat. 1.128 + UErrorCode errorCode; 1.129 + int32_t newLength; 1.130 + do { 1.131 + errorCode = U_ZERO_ERROR; 1.132 + newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), 1.133 + oldArray, oldLength, &errorCode); 1.134 + setLength(newLength); 1.135 + } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); 1.136 + 1.137 + if (bufferToDelete) { 1.138 + uprv_free(bufferToDelete); 1.139 + } 1.140 + if(U_FAILURE(errorCode)) { 1.141 + setToBogus(); 1.142 + } 1.143 + return *this; 1.144 +} 1.145 + 1.146 +UnicodeString & 1.147 +UnicodeString::foldCase(uint32_t options) { 1.148 + UCaseMap csm=UCASEMAP_INITIALIZER; 1.149 + csm.csp=ucase_getSingleton(); 1.150 + csm.options=options; 1.151 + return caseMap(&csm, ustrcase_internalFold); 1.152 +} 1.153 + 1.154 +U_NAMESPACE_END 1.155 + 1.156 +// Defined here to reduce dependencies on break iterator 1.157 +U_CAPI int32_t U_EXPORT2 1.158 +uhash_hashCaselessUnicodeString(const UElement key) { 1.159 + U_NAMESPACE_USE 1.160 + const UnicodeString *str = (const UnicodeString*) key.pointer; 1.161 + if (str == NULL) { 1.162 + return 0; 1.163 + } 1.164 + // Inefficient; a better way would be to have a hash function in 1.165 + // UnicodeString that does case folding on the fly. 1.166 + UnicodeString copy(*str); 1.167 + return copy.foldCase().hashCode(); 1.168 +} 1.169 + 1.170 +// Defined here to reduce dependencies on break iterator 1.171 +U_CAPI UBool U_EXPORT2 1.172 +uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { 1.173 + U_NAMESPACE_USE 1.174 + const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 1.175 + const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 1.176 + if (str1 == str2) { 1.177 + return TRUE; 1.178 + } 1.179 + if (str1 == NULL || str2 == NULL) { 1.180 + return FALSE; 1.181 + } 1.182 + return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 1.183 +}