1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/scriptset.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,276 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* scriptset.cpp 1.11 +* 1.12 +* created on: 2013 Jan 7 1.13 +* created by: Andy Heninger 1.14 +*/ 1.15 + 1.16 +#include "unicode/utypes.h" 1.17 + 1.18 +#include "unicode/uchar.h" 1.19 +#include "unicode/unistr.h" 1.20 + 1.21 +#include "scriptset.h" 1.22 +#include "uassert.h" 1.23 + 1.24 +U_NAMESPACE_BEGIN 1.25 + 1.26 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.27 + 1.28 +//---------------------------------------------------------------------------- 1.29 +// 1.30 +// ScriptSet implementation 1.31 +// 1.32 +//---------------------------------------------------------------------------- 1.33 +ScriptSet::ScriptSet() { 1.34 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.35 + bits[i] = 0; 1.36 + } 1.37 +} 1.38 + 1.39 +ScriptSet::~ScriptSet() { 1.40 +} 1.41 + 1.42 +ScriptSet::ScriptSet(const ScriptSet &other) { 1.43 + *this = other; 1.44 +} 1.45 + 1.46 + 1.47 +ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 1.48 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.49 + bits[i] = other.bits[i]; 1.50 + } 1.51 + return *this; 1.52 +} 1.53 + 1.54 + 1.55 +UBool ScriptSet::operator == (const ScriptSet &other) const { 1.56 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.57 + if (bits[i] != other.bits[i]) { 1.58 + return FALSE; 1.59 + } 1.60 + } 1.61 + return TRUE; 1.62 +} 1.63 + 1.64 +UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 1.65 + if (U_FAILURE(status)) { 1.66 + return FALSE; 1.67 + } 1.68 + if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 1.69 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.70 + return FALSE; 1.71 + } 1.72 + uint32_t index = script / 32; 1.73 + uint32_t bit = 1 << (script & 31); 1.74 + return ((bits[index] & bit) != 0); 1.75 +} 1.76 + 1.77 + 1.78 +ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 1.79 + if (U_FAILURE(status)) { 1.80 + return *this; 1.81 + } 1.82 + if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 1.83 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.84 + return *this; 1.85 + } 1.86 + uint32_t index = script / 32; 1.87 + uint32_t bit = 1 << (script & 31); 1.88 + bits[index] |= bit; 1.89 + return *this; 1.90 +} 1.91 + 1.92 +ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 1.93 + if (U_FAILURE(status)) { 1.94 + return *this; 1.95 + } 1.96 + if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 1.97 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.98 + return *this; 1.99 + } 1.100 + uint32_t index = script / 32; 1.101 + uint32_t bit = 1 << (script & 31); 1.102 + bits[index] &= ~bit; 1.103 + return *this; 1.104 +} 1.105 + 1.106 + 1.107 + 1.108 +ScriptSet &ScriptSet::Union(const ScriptSet &other) { 1.109 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.110 + bits[i] |= other.bits[i]; 1.111 + } 1.112 + return *this; 1.113 +} 1.114 + 1.115 +ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 1.116 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.117 + bits[i] &= other.bits[i]; 1.118 + } 1.119 + return *this; 1.120 +} 1.121 + 1.122 +ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 1.123 + ScriptSet t; 1.124 + t.set(script, status); 1.125 + if (U_SUCCESS(status)) { 1.126 + this->intersect(t); 1.127 + } 1.128 + return *this; 1.129 +} 1.130 + 1.131 +UBool ScriptSet::intersects(const ScriptSet &other) const { 1.132 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.133 + if ((bits[i] & other.bits[i]) != 0) { 1.134 + return true; 1.135 + } 1.136 + } 1.137 + return false; 1.138 +} 1.139 + 1.140 +UBool ScriptSet::contains(const ScriptSet &other) const { 1.141 + ScriptSet t(*this); 1.142 + t.intersect(other); 1.143 + return (t == other); 1.144 +} 1.145 + 1.146 + 1.147 +ScriptSet &ScriptSet::setAll() { 1.148 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.149 + bits[i] = 0xffffffffu; 1.150 + } 1.151 + return *this; 1.152 +} 1.153 + 1.154 + 1.155 +ScriptSet &ScriptSet::resetAll() { 1.156 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.157 + bits[i] = 0; 1.158 + } 1.159 + return *this; 1.160 +} 1.161 + 1.162 +int32_t ScriptSet::countMembers() const { 1.163 + // This bit counter is good for sparse numbers of '1's, which is 1.164 + // very much the case that we will usually have. 1.165 + int32_t count = 0; 1.166 + for (uint32_t i=0; i<LENGTHOF(bits); i++) { 1.167 + uint32_t x = bits[i]; 1.168 + while (x > 0) { 1.169 + count++; 1.170 + x &= (x - 1); // and off the least significant one bit. 1.171 + } 1.172 + } 1.173 + return count; 1.174 +} 1.175 + 1.176 +int32_t ScriptSet::hashCode() const { 1.177 + int32_t hash = 0; 1.178 + for (int32_t i=0; i<LENGTHOF(bits); i++) { 1.179 + hash ^= bits[i]; 1.180 + } 1.181 + return hash; 1.182 +} 1.183 + 1.184 +int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 1.185 + // TODO: Wants a better implementation. 1.186 + if (fromIndex < 0) { 1.187 + return -1; 1.188 + } 1.189 + UErrorCode status = U_ZERO_ERROR; 1.190 + for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 1.191 + if (test((UScriptCode)scriptIndex, status)) { 1.192 + return scriptIndex; 1.193 + } 1.194 + } 1.195 + return -1; 1.196 +} 1.197 + 1.198 +UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 1.199 + UBool firstTime = TRUE; 1.200 + for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 1.201 + if (!firstTime) { 1.202 + dest.append((UChar)0x20); 1.203 + } 1.204 + firstTime = FALSE; 1.205 + const char *scriptName = uscript_getShortName((UScriptCode(i))); 1.206 + dest.append(UnicodeString(scriptName, -1, US_INV)); 1.207 + } 1.208 + return dest; 1.209 +} 1.210 + 1.211 +ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 1.212 + resetAll(); 1.213 + if (U_FAILURE(status)) { 1.214 + return *this; 1.215 + } 1.216 + UnicodeString oneScriptName; 1.217 + for (int32_t i=0; i<scriptString.length();) { 1.218 + UChar32 c = scriptString.char32At(i); 1.219 + i = scriptString.moveIndex32(i, 1); 1.220 + if (!u_isUWhiteSpace(c)) { 1.221 + oneScriptName.append(c); 1.222 + if (i < scriptString.length()) { 1.223 + continue; 1.224 + } 1.225 + } 1.226 + if (oneScriptName.length() > 0) { 1.227 + char buf[40]; 1.228 + oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 1.229 + buf[sizeof(buf)-1] = 0; 1.230 + int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 1.231 + if (sc == UCHAR_INVALID_CODE) { 1.232 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.233 + } else { 1.234 + this->set((UScriptCode)sc, status); 1.235 + } 1.236 + if (U_FAILURE(status)) { 1.237 + return *this; 1.238 + } 1.239 + oneScriptName.remove(); 1.240 + } 1.241 + } 1.242 + return *this; 1.243 +} 1.244 + 1.245 +U_NAMESPACE_END 1.246 + 1.247 +U_CAPI UBool U_EXPORT2 1.248 +uhash_equalsScriptSet(const UElement key1, const UElement key2) { 1.249 + icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 1.250 + icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 1.251 + return (*s1 == *s2); 1.252 +} 1.253 + 1.254 +U_CAPI int8_t U_EXPORT2 1.255 +uhash_compareScriptSet(UElement key0, UElement key1) { 1.256 + icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 1.257 + icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 1.258 + int32_t diff = s0->countMembers() - s1->countMembers(); 1.259 + if (diff != 0) return diff; 1.260 + int32_t i0 = s0->nextSetBit(0); 1.261 + int32_t i1 = s1->nextSetBit(0); 1.262 + while ((diff = i0-i1) == 0 && i0 > 0) { 1.263 + i0 = s0->nextSetBit(i0+1); 1.264 + i1 = s1->nextSetBit(i1+1); 1.265 + } 1.266 + return (int8_t)diff; 1.267 +} 1.268 + 1.269 +U_CAPI int32_t U_EXPORT2 1.270 +uhash_hashScriptSet(const UElement key) { 1.271 + icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 1.272 + return s->hashCode(); 1.273 +} 1.274 + 1.275 +U_CAPI void U_EXPORT2 1.276 +uhash_deleteScriptSet(void *obj) { 1.277 + icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 1.278 + delete s; 1.279 +}