intl/icu/source/common/unistr_case.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: unistr_case.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:2
michael@0 12 *
michael@0 13 * created on: 2004aug19
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Case-mapping functions moved here from unistr.cpp
michael@0 17 */
michael@0 18
michael@0 19 #include "unicode/utypes.h"
michael@0 20 #include "unicode/putil.h"
michael@0 21 #include "cstring.h"
michael@0 22 #include "cmemory.h"
michael@0 23 #include "unicode/ustring.h"
michael@0 24 #include "unicode/unistr.h"
michael@0 25 #include "unicode/uchar.h"
michael@0 26 #include "uelement.h"
michael@0 27 #include "ustr_imp.h"
michael@0 28
michael@0 29 U_NAMESPACE_BEGIN
michael@0 30
michael@0 31 //========================================
michael@0 32 // Read-only implementation
michael@0 33 //========================================
michael@0 34
michael@0 35 int8_t
michael@0 36 UnicodeString::doCaseCompare(int32_t start,
michael@0 37 int32_t length,
michael@0 38 const UChar *srcChars,
michael@0 39 int32_t srcStart,
michael@0 40 int32_t srcLength,
michael@0 41 uint32_t options) const
michael@0 42 {
michael@0 43 // compare illegal string values
michael@0 44 // treat const UChar *srcChars==NULL as an empty string
michael@0 45 if(isBogus()) {
michael@0 46 return -1;
michael@0 47 }
michael@0 48
michael@0 49 // pin indices to legal values
michael@0 50 pinIndices(start, length);
michael@0 51
michael@0 52 if(srcChars == NULL) {
michael@0 53 srcStart = srcLength = 0;
michael@0 54 }
michael@0 55
michael@0 56 // get the correct pointer
michael@0 57 const UChar *chars = getArrayStart();
michael@0 58
michael@0 59 chars += start;
michael@0 60 if(srcStart!=0) {
michael@0 61 srcChars += srcStart;
michael@0 62 }
michael@0 63
michael@0 64 if(chars != srcChars) {
michael@0 65 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 66 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
michael@0 67 options|U_COMPARE_IGNORE_CASE, &errorCode);
michael@0 68 if(result!=0) {
michael@0 69 return (int8_t)(result >> 24 | 1);
michael@0 70 }
michael@0 71 } else {
michael@0 72 // get the srcLength if necessary
michael@0 73 if(srcLength < 0) {
michael@0 74 srcLength = u_strlen(srcChars + srcStart);
michael@0 75 }
michael@0 76 if(length != srcLength) {
michael@0 77 return (int8_t)((length - srcLength) >> 24 | 1);
michael@0 78 }
michael@0 79 }
michael@0 80 return 0;
michael@0 81 }
michael@0 82
michael@0 83 //========================================
michael@0 84 // Write implementation
michael@0 85 //========================================
michael@0 86
michael@0 87 UnicodeString &
michael@0 88 UnicodeString::caseMap(const UCaseMap *csm,
michael@0 89 UStringCaseMapper *stringCaseMapper) {
michael@0 90 if(isEmpty() || !isWritable()) {
michael@0 91 // nothing to do
michael@0 92 return *this;
michael@0 93 }
michael@0 94
michael@0 95 // We need to allocate a new buffer for the internal string case mapping function.
michael@0 96 // This is very similar to how doReplace() keeps the old array pointer
michael@0 97 // and deletes the old array itself after it is done.
michael@0 98 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
michael@0 99 UChar oldStackBuffer[US_STACKBUF_SIZE];
michael@0 100 UChar *oldArray;
michael@0 101 int32_t oldLength;
michael@0 102
michael@0 103 if(fFlags&kUsingStackBuffer) {
michael@0 104 // copy the stack buffer contents because it will be overwritten
michael@0 105 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
michael@0 106 oldArray = oldStackBuffer;
michael@0 107 oldLength = fShortLength;
michael@0 108 } else {
michael@0 109 oldArray = getArrayStart();
michael@0 110 oldLength = length();
michael@0 111 }
michael@0 112
michael@0 113 int32_t capacity;
michael@0 114 if(oldLength <= US_STACKBUF_SIZE) {
michael@0 115 capacity = US_STACKBUF_SIZE;
michael@0 116 } else {
michael@0 117 capacity = oldLength + 20;
michael@0 118 }
michael@0 119 int32_t *bufferToDelete = 0;
michael@0 120 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
michael@0 121 return *this;
michael@0 122 }
michael@0 123
michael@0 124 // Case-map, and if the result is too long, then reallocate and repeat.
michael@0 125 UErrorCode errorCode;
michael@0 126 int32_t newLength;
michael@0 127 do {
michael@0 128 errorCode = U_ZERO_ERROR;
michael@0 129 newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
michael@0 130 oldArray, oldLength, &errorCode);
michael@0 131 setLength(newLength);
michael@0 132 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
michael@0 133
michael@0 134 if (bufferToDelete) {
michael@0 135 uprv_free(bufferToDelete);
michael@0 136 }
michael@0 137 if(U_FAILURE(errorCode)) {
michael@0 138 setToBogus();
michael@0 139 }
michael@0 140 return *this;
michael@0 141 }
michael@0 142
michael@0 143 UnicodeString &
michael@0 144 UnicodeString::foldCase(uint32_t options) {
michael@0 145 UCaseMap csm=UCASEMAP_INITIALIZER;
michael@0 146 csm.csp=ucase_getSingleton();
michael@0 147 csm.options=options;
michael@0 148 return caseMap(&csm, ustrcase_internalFold);
michael@0 149 }
michael@0 150
michael@0 151 U_NAMESPACE_END
michael@0 152
michael@0 153 // Defined here to reduce dependencies on break iterator
michael@0 154 U_CAPI int32_t U_EXPORT2
michael@0 155 uhash_hashCaselessUnicodeString(const UElement key) {
michael@0 156 U_NAMESPACE_USE
michael@0 157 const UnicodeString *str = (const UnicodeString*) key.pointer;
michael@0 158 if (str == NULL) {
michael@0 159 return 0;
michael@0 160 }
michael@0 161 // Inefficient; a better way would be to have a hash function in
michael@0 162 // UnicodeString that does case folding on the fly.
michael@0 163 UnicodeString copy(*str);
michael@0 164 return copy.foldCase().hashCode();
michael@0 165 }
michael@0 166
michael@0 167 // Defined here to reduce dependencies on break iterator
michael@0 168 U_CAPI UBool U_EXPORT2
michael@0 169 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
michael@0 170 U_NAMESPACE_USE
michael@0 171 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
michael@0 172 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
michael@0 173 if (str1 == str2) {
michael@0 174 return TRUE;
michael@0 175 }
michael@0 176 if (str1 == NULL || str2 == NULL) {
michael@0 177 return FALSE;
michael@0 178 }
michael@0 179 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
michael@0 180 }

mercurial