michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 1996-2012, International Business Machines Corporation and michael@0: * others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: */ michael@0: //=============================================================================== michael@0: // michael@0: // File sortkey.cpp michael@0: // michael@0: // michael@0: // michael@0: // Created by: Helena Shih michael@0: // michael@0: // Modification History: michael@0: // michael@0: // Date Name Description michael@0: // michael@0: // 6/20/97 helena Java class name change. michael@0: // 6/23/97 helena Added comments to make code more readable. michael@0: // 6/26/98 erm Canged to use byte arrays instead of UnicodeString michael@0: // 7/31/98 erm hashCode: minimum inc should be 2 not 1, michael@0: // Cleaned up operator= michael@0: // 07/12/99 helena HPUX 11 CC port. michael@0: // 03/06/01 synwee Modified compareTo, to handle the result of michael@0: // 2 string similar in contents, but one is longer michael@0: // than the other michael@0: //=============================================================================== michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "unicode/sortkey.h" michael@0: #include "cmemory.h" michael@0: #include "uelement.h" michael@0: #include "ustr_imp.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // A hash code of kInvalidHashCode indicates that the hash code needs michael@0: // to be computed. A hash code of kEmptyHashCode is used for empty keys michael@0: // and for any key whose computed hash code is kInvalidHashCode. michael@0: static const int32_t kInvalidHashCode = 0; michael@0: static const int32_t kEmptyHashCode = 1; michael@0: // The "bogus hash code" replaces a separate fBogus flag. michael@0: static const int32_t kBogusHashCode = 2; michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) michael@0: michael@0: CollationKey::CollationKey() michael@0: : UObject(), fFlagAndLength(0), michael@0: fHashCode(kEmptyHashCode) michael@0: { michael@0: } michael@0: michael@0: // Create a collation key from a bit array. michael@0: CollationKey::CollationKey(const uint8_t* newValues, int32_t count) michael@0: : UObject(), fFlagAndLength(count), michael@0: fHashCode(kInvalidHashCode) michael@0: { michael@0: if (count < 0 || (newValues == NULL && count != 0) || michael@0: (count > getCapacity() && reallocate(count, 0) == NULL)) { michael@0: setToBogus(); michael@0: return; michael@0: } michael@0: michael@0: if (count > 0) { michael@0: uprv_memcpy(getBytes(), newValues, count); michael@0: } michael@0: } michael@0: michael@0: CollationKey::CollationKey(const CollationKey& other) michael@0: : UObject(other), fFlagAndLength(other.getLength()), michael@0: fHashCode(other.fHashCode) michael@0: { michael@0: if (other.isBogus()) michael@0: { michael@0: setToBogus(); michael@0: return; michael@0: } michael@0: michael@0: int32_t length = fFlagAndLength; michael@0: if (length > getCapacity() && reallocate(length, 0) == NULL) { michael@0: setToBogus(); michael@0: return; michael@0: } michael@0: michael@0: if (length > 0) { michael@0: uprv_memcpy(getBytes(), other.getBytes(), length); michael@0: } michael@0: } michael@0: michael@0: CollationKey::~CollationKey() michael@0: { michael@0: if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } michael@0: } michael@0: michael@0: uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { michael@0: uint8_t *newBytes = static_cast(uprv_malloc(newCapacity)); michael@0: if(newBytes == NULL) { return NULL; } michael@0: if(length > 0) { michael@0: uprv_memcpy(newBytes, getBytes(), length); michael@0: } michael@0: if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } michael@0: fUnion.fFields.fBytes = newBytes; michael@0: fUnion.fFields.fCapacity = newCapacity; michael@0: fFlagAndLength |= 0x80000000; michael@0: return newBytes; michael@0: } michael@0: michael@0: void CollationKey::setLength(int32_t newLength) { michael@0: // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); michael@0: fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; michael@0: fHashCode = kInvalidHashCode; michael@0: } michael@0: michael@0: // set the key to an empty state michael@0: CollationKey& michael@0: CollationKey::reset() michael@0: { michael@0: fFlagAndLength &= 0x80000000; michael@0: fHashCode = kEmptyHashCode; michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: // set the key to a "bogus" or invalid state michael@0: CollationKey& michael@0: CollationKey::setToBogus() michael@0: { michael@0: fFlagAndLength &= 0x80000000; michael@0: fHashCode = kBogusHashCode; michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: UBool michael@0: CollationKey::operator==(const CollationKey& source) const michael@0: { michael@0: return getLength() == source.getLength() && michael@0: (this == &source || michael@0: uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); michael@0: } michael@0: michael@0: const CollationKey& michael@0: CollationKey::operator=(const CollationKey& other) michael@0: { michael@0: if (this != &other) michael@0: { michael@0: if (other.isBogus()) michael@0: { michael@0: return setToBogus(); michael@0: } michael@0: michael@0: int32_t length = other.getLength(); michael@0: if (length > getCapacity() && reallocate(length, 0) == NULL) { michael@0: return setToBogus(); michael@0: } michael@0: if (length > 0) { michael@0: uprv_memcpy(getBytes(), other.getBytes(), length); michael@0: } michael@0: fFlagAndLength = (fFlagAndLength & 0x80000000) | length; michael@0: fHashCode = other.fHashCode; michael@0: } michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: // Bitwise comparison for the collation keys. michael@0: Collator::EComparisonResult michael@0: CollationKey::compareTo(const CollationKey& target) const michael@0: { michael@0: UErrorCode errorCode = U_ZERO_ERROR; michael@0: return static_cast(compareTo(target, errorCode)); michael@0: } michael@0: michael@0: // Bitwise comparison for the collation keys. michael@0: UCollationResult michael@0: CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const michael@0: { michael@0: if(U_SUCCESS(status)) { michael@0: const uint8_t *src = getBytes(); michael@0: const uint8_t *tgt = target.getBytes(); michael@0: michael@0: // are we comparing the same string michael@0: if (src == tgt) michael@0: return UCOL_EQUAL; michael@0: michael@0: UCollationResult result; michael@0: michael@0: // are we comparing different lengths? michael@0: int32_t minLength = getLength(); michael@0: int32_t targetLength = target.getLength(); michael@0: if (minLength < targetLength) { michael@0: result = UCOL_LESS; michael@0: } else if (minLength == targetLength) { michael@0: result = UCOL_EQUAL; michael@0: } else { michael@0: minLength = targetLength; michael@0: result = UCOL_GREATER; michael@0: } michael@0: michael@0: if (minLength > 0) { michael@0: int diff = uprv_memcmp(src, tgt, minLength); michael@0: if (diff > 0) { michael@0: return UCOL_GREATER; michael@0: } michael@0: else michael@0: if (diff < 0) { michael@0: return UCOL_LESS; michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } else { michael@0: return UCOL_EQUAL; michael@0: } michael@0: } michael@0: michael@0: #ifdef U_USE_COLLATION_KEY_DEPRECATES michael@0: // Create a copy of the byte array. michael@0: uint8_t* michael@0: CollationKey::toByteArray(int32_t& count) const michael@0: { michael@0: uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); michael@0: michael@0: if (result == NULL) michael@0: { michael@0: count = 0; michael@0: } michael@0: else michael@0: { michael@0: count = fCount; michael@0: if (count > 0) { michael@0: uprv_memcpy(result, fBytes, fCount); michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: #endif michael@0: michael@0: static int32_t michael@0: computeHashCode(const uint8_t *key, int32_t length) { michael@0: const char *s = reinterpret_cast(key); michael@0: int32_t hash; michael@0: if (s == NULL || length == 0) { michael@0: hash = kEmptyHashCode; michael@0: } else { michael@0: hash = ustr_hashCharsN(s, length); michael@0: if (hash == kInvalidHashCode || hash == kBogusHashCode) { michael@0: hash = kEmptyHashCode; michael@0: } michael@0: } michael@0: return hash; michael@0: } michael@0: michael@0: int32_t michael@0: CollationKey::hashCode() const michael@0: { michael@0: // (Cribbed from UnicodeString) michael@0: // We cache the hashCode; when it becomes invalid, due to any change to the michael@0: // string, we note this by setting it to kInvalidHashCode. [LIU] michael@0: michael@0: // Note: This method is semantically const, but physically non-const. michael@0: michael@0: if (fHashCode == kInvalidHashCode) michael@0: { michael@0: fHashCode = computeHashCode(getBytes(), getLength()); michael@0: } michael@0: michael@0: return fHashCode; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucol_keyHashCode(const uint8_t *key, michael@0: int32_t length) michael@0: { michael@0: return icu::computeHashCode(key, length); michael@0: } michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */