michael@0: /*
michael@0: *******************************************************************************
michael@0: * Copyright (C) 1996-2012, International Business Machines Corporation and
michael@0: * others. All Rights Reserved.
michael@0: *******************************************************************************
michael@0: */
michael@0: //===============================================================================
michael@0: //
michael@0: // File sortkey.cpp
michael@0: //
michael@0: //
michael@0: //
michael@0: // Created by: Helena Shih
michael@0: //
michael@0: // Modification History:
michael@0: //
michael@0: //  Date         Name          Description
michael@0: //
michael@0: //  6/20/97      helena        Java class name change.
michael@0: //  6/23/97      helena        Added comments to make code more readable.
michael@0: //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
michael@0: //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
michael@0: //                             Cleaned up operator=
michael@0: // 07/12/99      helena        HPUX 11 CC port.
michael@0: // 03/06/01      synwee        Modified compareTo, to handle the result of
michael@0: //                             2 string similar in contents, but one is longer
michael@0: //                             than the other
michael@0: //===============================================================================
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: #if !UCONFIG_NO_COLLATION
michael@0: 
michael@0: #include "unicode/sortkey.h"
michael@0: #include "cmemory.h"
michael@0: #include "uelement.h"
michael@0: #include "ustr_imp.h"
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: // A hash code of kInvalidHashCode indicates that the hash code needs
michael@0: // to be computed. A hash code of kEmptyHashCode is used for empty keys
michael@0: // and for any key whose computed hash code is kInvalidHashCode.
michael@0: static const int32_t kInvalidHashCode = 0;
michael@0: static const int32_t kEmptyHashCode = 1;
michael@0: // The "bogus hash code" replaces a separate fBogus flag.
michael@0: static const int32_t kBogusHashCode = 2;
michael@0: 
michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
michael@0: 
michael@0: CollationKey::CollationKey()
michael@0:     : UObject(), fFlagAndLength(0),
michael@0:       fHashCode(kEmptyHashCode)
michael@0: {
michael@0: }
michael@0: 
michael@0: // Create a collation key from a bit array.
michael@0: CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
michael@0:     : UObject(), fFlagAndLength(count),
michael@0:       fHashCode(kInvalidHashCode)
michael@0: {
michael@0:     if (count < 0 || (newValues == NULL && count != 0) ||
michael@0:             (count > getCapacity() && reallocate(count, 0) == NULL)) {
michael@0:         setToBogus();
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if (count > 0) {
michael@0:         uprv_memcpy(getBytes(), newValues, count);
michael@0:     }
michael@0: }
michael@0: 
michael@0: CollationKey::CollationKey(const CollationKey& other)
michael@0:     : UObject(other), fFlagAndLength(other.getLength()),
michael@0:       fHashCode(other.fHashCode)
michael@0: {
michael@0:     if (other.isBogus())
michael@0:     {
michael@0:         setToBogus();
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     int32_t length = fFlagAndLength;
michael@0:     if (length > getCapacity() && reallocate(length, 0) == NULL) {
michael@0:         setToBogus();
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if (length > 0) {
michael@0:         uprv_memcpy(getBytes(), other.getBytes(), length);
michael@0:     }
michael@0: }
michael@0: 
michael@0: CollationKey::~CollationKey()
michael@0: {
michael@0:     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
michael@0: }
michael@0: 
michael@0: uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
michael@0:     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
michael@0:     if(newBytes == NULL) { return NULL; }
michael@0:     if(length > 0) {
michael@0:         uprv_memcpy(newBytes, getBytes(), length);
michael@0:     }
michael@0:     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
michael@0:     fUnion.fFields.fBytes = newBytes;
michael@0:     fUnion.fFields.fCapacity = newCapacity;
michael@0:     fFlagAndLength |= 0x80000000;
michael@0:     return newBytes;
michael@0: }
michael@0: 
michael@0: void CollationKey::setLength(int32_t newLength) {
michael@0:     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
michael@0:     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
michael@0:     fHashCode = kInvalidHashCode;
michael@0: }
michael@0: 
michael@0: // set the key to an empty state
michael@0: CollationKey&
michael@0: CollationKey::reset()
michael@0: {
michael@0:     fFlagAndLength &= 0x80000000;
michael@0:     fHashCode = kEmptyHashCode;
michael@0: 
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: // set the key to a "bogus" or invalid state
michael@0: CollationKey&
michael@0: CollationKey::setToBogus()
michael@0: {
michael@0:     fFlagAndLength &= 0x80000000;
michael@0:     fHashCode = kBogusHashCode;
michael@0: 
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: UBool
michael@0: CollationKey::operator==(const CollationKey& source) const
michael@0: {
michael@0:     return getLength() == source.getLength() &&
michael@0:             (this == &source ||
michael@0:              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
michael@0: }
michael@0: 
michael@0: const CollationKey&
michael@0: CollationKey::operator=(const CollationKey& other)
michael@0: {
michael@0:     if (this != &other)
michael@0:     {
michael@0:         if (other.isBogus())
michael@0:         {
michael@0:             return setToBogus();
michael@0:         }
michael@0: 
michael@0:         int32_t length = other.getLength();
michael@0:         if (length > getCapacity() && reallocate(length, 0) == NULL) {
michael@0:             return setToBogus();
michael@0:         }
michael@0:         if (length > 0) {
michael@0:             uprv_memcpy(getBytes(), other.getBytes(), length);
michael@0:         }
michael@0:         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
michael@0:         fHashCode = other.fHashCode;
michael@0:     }
michael@0: 
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: // Bitwise comparison for the collation keys.
michael@0: Collator::EComparisonResult
michael@0: CollationKey::compareTo(const CollationKey& target) const
michael@0: {
michael@0:     UErrorCode errorCode = U_ZERO_ERROR;
michael@0:     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
michael@0: }
michael@0: 
michael@0: // Bitwise comparison for the collation keys.
michael@0: UCollationResult
michael@0: CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
michael@0: {
michael@0:   if(U_SUCCESS(status)) {
michael@0:     const uint8_t *src = getBytes();
michael@0:     const uint8_t *tgt = target.getBytes();
michael@0: 
michael@0:     // are we comparing the same string
michael@0:     if (src == tgt)
michael@0:         return  UCOL_EQUAL;
michael@0: 
michael@0:     UCollationResult result;
michael@0: 
michael@0:     // are we comparing different lengths?
michael@0:     int32_t minLength = getLength();
michael@0:     int32_t targetLength = target.getLength();
michael@0:     if (minLength < targetLength) {
michael@0:         result = UCOL_LESS;
michael@0:     } else if (minLength == targetLength) {
michael@0:         result = UCOL_EQUAL;
michael@0:     } else {
michael@0:         minLength = targetLength;
michael@0:         result = UCOL_GREATER;
michael@0:     }
michael@0: 
michael@0:     if (minLength > 0) {
michael@0:         int diff = uprv_memcmp(src, tgt, minLength);
michael@0:         if (diff > 0) {
michael@0:             return UCOL_GREATER;
michael@0:         }
michael@0:         else
michael@0:             if (diff < 0) {
michael@0:                 return UCOL_LESS;
michael@0:             }
michael@0:     }
michael@0: 
michael@0:     return result;
michael@0:   } else {
michael@0:     return UCOL_EQUAL;
michael@0:   }
michael@0: }
michael@0: 
michael@0: #ifdef U_USE_COLLATION_KEY_DEPRECATES
michael@0: // Create a copy of the byte array.
michael@0: uint8_t*
michael@0: CollationKey::toByteArray(int32_t& count) const
michael@0: {
michael@0:     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
michael@0: 
michael@0:     if (result == NULL)
michael@0:     {
michael@0:         count = 0;
michael@0:     }
michael@0:     else
michael@0:     {
michael@0:         count = fCount;
michael@0:         if (count > 0) {
michael@0:             uprv_memcpy(result, fBytes, fCount);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return result;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: static int32_t
michael@0: computeHashCode(const uint8_t *key, int32_t  length) {
michael@0:     const char *s = reinterpret_cast<const char *>(key);
michael@0:     int32_t hash;
michael@0:     if (s == NULL || length == 0) {
michael@0:         hash = kEmptyHashCode;
michael@0:     } else {
michael@0:         hash = ustr_hashCharsN(s, length);
michael@0:         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
michael@0:             hash = kEmptyHashCode;
michael@0:         }
michael@0:     }
michael@0:     return hash;
michael@0: }
michael@0: 
michael@0: int32_t
michael@0: CollationKey::hashCode() const
michael@0: {
michael@0:     // (Cribbed from UnicodeString)
michael@0:     // We cache the hashCode; when it becomes invalid, due to any change to the
michael@0:     // string, we note this by setting it to kInvalidHashCode. [LIU]
michael@0: 
michael@0:     // Note: This method is semantically const, but physically non-const.
michael@0: 
michael@0:     if (fHashCode == kInvalidHashCode)
michael@0:     {
michael@0:         fHashCode = computeHashCode(getBytes(), getLength());
michael@0:     }
michael@0: 
michael@0:     return fHashCode;
michael@0: }
michael@0: 
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: ucol_keyHashCode(const uint8_t *key, 
michael@0:                        int32_t  length)
michael@0: {
michael@0:     return icu::computeHashCode(key, length);
michael@0: }
michael@0: 
michael@0: #endif /* #if !UCONFIG_NO_COLLATION */