intl/icu/source/common/unistr_case.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unistr_case.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,180 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1999-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  unistr_case.cpp
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:2
    1.15 +*
    1.16 +*   created on: 2004aug19
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   Case-mapping functions moved here from unistr.cpp
    1.20 +*/
    1.21 +
    1.22 +#include "unicode/utypes.h"
    1.23 +#include "unicode/putil.h"
    1.24 +#include "cstring.h"
    1.25 +#include "cmemory.h"
    1.26 +#include "unicode/ustring.h"
    1.27 +#include "unicode/unistr.h"
    1.28 +#include "unicode/uchar.h"
    1.29 +#include "uelement.h"
    1.30 +#include "ustr_imp.h"
    1.31 +
    1.32 +U_NAMESPACE_BEGIN
    1.33 +
    1.34 +//========================================
    1.35 +// Read-only implementation
    1.36 +//========================================
    1.37 +
    1.38 +int8_t
    1.39 +UnicodeString::doCaseCompare(int32_t start,
    1.40 +                             int32_t length,
    1.41 +                             const UChar *srcChars,
    1.42 +                             int32_t srcStart,
    1.43 +                             int32_t srcLength,
    1.44 +                             uint32_t options) const
    1.45 +{
    1.46 +  // compare illegal string values
    1.47 +  // treat const UChar *srcChars==NULL as an empty string
    1.48 +  if(isBogus()) {
    1.49 +    return -1;
    1.50 +  }
    1.51 +
    1.52 +  // pin indices to legal values
    1.53 +  pinIndices(start, length);
    1.54 +
    1.55 +  if(srcChars == NULL) {
    1.56 +    srcStart = srcLength = 0;
    1.57 +  }
    1.58 +
    1.59 +  // get the correct pointer
    1.60 +  const UChar *chars = getArrayStart();
    1.61 +
    1.62 +  chars += start;
    1.63 +  if(srcStart!=0) {
    1.64 +    srcChars += srcStart;
    1.65 +  }
    1.66 +
    1.67 +  if(chars != srcChars) {
    1.68 +    UErrorCode errorCode=U_ZERO_ERROR;
    1.69 +    int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
    1.70 +                                options|U_COMPARE_IGNORE_CASE, &errorCode);
    1.71 +    if(result!=0) {
    1.72 +      return (int8_t)(result >> 24 | 1);
    1.73 +    }
    1.74 +  } else {
    1.75 +    // get the srcLength if necessary
    1.76 +    if(srcLength < 0) {
    1.77 +      srcLength = u_strlen(srcChars + srcStart);
    1.78 +    }
    1.79 +    if(length != srcLength) {
    1.80 +      return (int8_t)((length - srcLength) >> 24 | 1);
    1.81 +    }
    1.82 +  }
    1.83 +  return 0;
    1.84 +}
    1.85 +
    1.86 +//========================================
    1.87 +// Write implementation
    1.88 +//========================================
    1.89 +
    1.90 +UnicodeString &
    1.91 +UnicodeString::caseMap(const UCaseMap *csm,
    1.92 +                       UStringCaseMapper *stringCaseMapper) {
    1.93 +  if(isEmpty() || !isWritable()) {
    1.94 +    // nothing to do
    1.95 +    return *this;
    1.96 +  }
    1.97 +
    1.98 +  // We need to allocate a new buffer for the internal string case mapping function.
    1.99 +  // This is very similar to how doReplace() keeps the old array pointer
   1.100 +  // and deletes the old array itself after it is done.
   1.101 +  // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
   1.102 +  UChar oldStackBuffer[US_STACKBUF_SIZE];
   1.103 +  UChar *oldArray;
   1.104 +  int32_t oldLength;
   1.105 +
   1.106 +  if(fFlags&kUsingStackBuffer) {
   1.107 +    // copy the stack buffer contents because it will be overwritten
   1.108 +    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
   1.109 +    oldArray = oldStackBuffer;
   1.110 +    oldLength = fShortLength;
   1.111 +  } else {
   1.112 +    oldArray = getArrayStart();
   1.113 +    oldLength = length();
   1.114 +  }
   1.115 +
   1.116 +  int32_t capacity;
   1.117 +  if(oldLength <= US_STACKBUF_SIZE) {
   1.118 +    capacity = US_STACKBUF_SIZE;
   1.119 +  } else {
   1.120 +    capacity = oldLength + 20;
   1.121 +  }
   1.122 +  int32_t *bufferToDelete = 0;
   1.123 +  if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
   1.124 +    return *this;
   1.125 +  }
   1.126 +
   1.127 +  // Case-map, and if the result is too long, then reallocate and repeat.
   1.128 +  UErrorCode errorCode;
   1.129 +  int32_t newLength;
   1.130 +  do {
   1.131 +    errorCode = U_ZERO_ERROR;
   1.132 +    newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
   1.133 +                                 oldArray, oldLength, &errorCode);
   1.134 +    setLength(newLength);
   1.135 +  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
   1.136 +
   1.137 +  if (bufferToDelete) {
   1.138 +    uprv_free(bufferToDelete);
   1.139 +  }
   1.140 +  if(U_FAILURE(errorCode)) {
   1.141 +    setToBogus();
   1.142 +  }
   1.143 +  return *this;
   1.144 +}
   1.145 +
   1.146 +UnicodeString &
   1.147 +UnicodeString::foldCase(uint32_t options) {
   1.148 +  UCaseMap csm=UCASEMAP_INITIALIZER;
   1.149 +  csm.csp=ucase_getSingleton();
   1.150 +  csm.options=options;
   1.151 +  return caseMap(&csm, ustrcase_internalFold);
   1.152 +}
   1.153 +
   1.154 +U_NAMESPACE_END
   1.155 +
   1.156 +// Defined here to reduce dependencies on break iterator
   1.157 +U_CAPI int32_t U_EXPORT2
   1.158 +uhash_hashCaselessUnicodeString(const UElement key) {
   1.159 +    U_NAMESPACE_USE
   1.160 +    const UnicodeString *str = (const UnicodeString*) key.pointer;
   1.161 +    if (str == NULL) {
   1.162 +        return 0;
   1.163 +    }
   1.164 +    // Inefficient; a better way would be to have a hash function in
   1.165 +    // UnicodeString that does case folding on the fly.
   1.166 +    UnicodeString copy(*str);
   1.167 +    return copy.foldCase().hashCode();
   1.168 +}
   1.169 +
   1.170 +// Defined here to reduce dependencies on break iterator
   1.171 +U_CAPI UBool U_EXPORT2
   1.172 +uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
   1.173 +    U_NAMESPACE_USE
   1.174 +    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
   1.175 +    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
   1.176 +    if (str1 == str2) {
   1.177 +        return TRUE;
   1.178 +    }
   1.179 +    if (str1 == NULL || str2 == NULL) {
   1.180 +        return FALSE;
   1.181 +    }
   1.182 +    return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
   1.183 +}

mercurial