intl/icu/source/i18n/scriptset.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 *
michael@0 7 * scriptset.cpp
michael@0 8 *
michael@0 9 * created on: 2013 Jan 7
michael@0 10 * created by: Andy Heninger
michael@0 11 */
michael@0 12
michael@0 13 #include "unicode/utypes.h"
michael@0 14
michael@0 15 #include "unicode/uchar.h"
michael@0 16 #include "unicode/unistr.h"
michael@0 17
michael@0 18 #include "scriptset.h"
michael@0 19 #include "uassert.h"
michael@0 20
michael@0 21 U_NAMESPACE_BEGIN
michael@0 22
michael@0 23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 24
michael@0 25 //----------------------------------------------------------------------------
michael@0 26 //
michael@0 27 // ScriptSet implementation
michael@0 28 //
michael@0 29 //----------------------------------------------------------------------------
michael@0 30 ScriptSet::ScriptSet() {
michael@0 31 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 32 bits[i] = 0;
michael@0 33 }
michael@0 34 }
michael@0 35
michael@0 36 ScriptSet::~ScriptSet() {
michael@0 37 }
michael@0 38
michael@0 39 ScriptSet::ScriptSet(const ScriptSet &other) {
michael@0 40 *this = other;
michael@0 41 }
michael@0 42
michael@0 43
michael@0 44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
michael@0 45 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 46 bits[i] = other.bits[i];
michael@0 47 }
michael@0 48 return *this;
michael@0 49 }
michael@0 50
michael@0 51
michael@0 52 UBool ScriptSet::operator == (const ScriptSet &other) const {
michael@0 53 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 54 if (bits[i] != other.bits[i]) {
michael@0 55 return FALSE;
michael@0 56 }
michael@0 57 }
michael@0 58 return TRUE;
michael@0 59 }
michael@0 60
michael@0 61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
michael@0 62 if (U_FAILURE(status)) {
michael@0 63 return FALSE;
michael@0 64 }
michael@0 65 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
michael@0 66 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 67 return FALSE;
michael@0 68 }
michael@0 69 uint32_t index = script / 32;
michael@0 70 uint32_t bit = 1 << (script & 31);
michael@0 71 return ((bits[index] & bit) != 0);
michael@0 72 }
michael@0 73
michael@0 74
michael@0 75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
michael@0 76 if (U_FAILURE(status)) {
michael@0 77 return *this;
michael@0 78 }
michael@0 79 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
michael@0 80 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 81 return *this;
michael@0 82 }
michael@0 83 uint32_t index = script / 32;
michael@0 84 uint32_t bit = 1 << (script & 31);
michael@0 85 bits[index] |= bit;
michael@0 86 return *this;
michael@0 87 }
michael@0 88
michael@0 89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
michael@0 90 if (U_FAILURE(status)) {
michael@0 91 return *this;
michael@0 92 }
michael@0 93 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
michael@0 94 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 95 return *this;
michael@0 96 }
michael@0 97 uint32_t index = script / 32;
michael@0 98 uint32_t bit = 1 << (script & 31);
michael@0 99 bits[index] &= ~bit;
michael@0 100 return *this;
michael@0 101 }
michael@0 102
michael@0 103
michael@0 104
michael@0 105 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
michael@0 106 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 107 bits[i] |= other.bits[i];
michael@0 108 }
michael@0 109 return *this;
michael@0 110 }
michael@0 111
michael@0 112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
michael@0 113 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 114 bits[i] &= other.bits[i];
michael@0 115 }
michael@0 116 return *this;
michael@0 117 }
michael@0 118
michael@0 119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
michael@0 120 ScriptSet t;
michael@0 121 t.set(script, status);
michael@0 122 if (U_SUCCESS(status)) {
michael@0 123 this->intersect(t);
michael@0 124 }
michael@0 125 return *this;
michael@0 126 }
michael@0 127
michael@0 128 UBool ScriptSet::intersects(const ScriptSet &other) const {
michael@0 129 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 130 if ((bits[i] & other.bits[i]) != 0) {
michael@0 131 return true;
michael@0 132 }
michael@0 133 }
michael@0 134 return false;
michael@0 135 }
michael@0 136
michael@0 137 UBool ScriptSet::contains(const ScriptSet &other) const {
michael@0 138 ScriptSet t(*this);
michael@0 139 t.intersect(other);
michael@0 140 return (t == other);
michael@0 141 }
michael@0 142
michael@0 143
michael@0 144 ScriptSet &ScriptSet::setAll() {
michael@0 145 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 146 bits[i] = 0xffffffffu;
michael@0 147 }
michael@0 148 return *this;
michael@0 149 }
michael@0 150
michael@0 151
michael@0 152 ScriptSet &ScriptSet::resetAll() {
michael@0 153 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 154 bits[i] = 0;
michael@0 155 }
michael@0 156 return *this;
michael@0 157 }
michael@0 158
michael@0 159 int32_t ScriptSet::countMembers() const {
michael@0 160 // This bit counter is good for sparse numbers of '1's, which is
michael@0 161 // very much the case that we will usually have.
michael@0 162 int32_t count = 0;
michael@0 163 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 164 uint32_t x = bits[i];
michael@0 165 while (x > 0) {
michael@0 166 count++;
michael@0 167 x &= (x - 1); // and off the least significant one bit.
michael@0 168 }
michael@0 169 }
michael@0 170 return count;
michael@0 171 }
michael@0 172
michael@0 173 int32_t ScriptSet::hashCode() const {
michael@0 174 int32_t hash = 0;
michael@0 175 for (int32_t i=0; i<LENGTHOF(bits); i++) {
michael@0 176 hash ^= bits[i];
michael@0 177 }
michael@0 178 return hash;
michael@0 179 }
michael@0 180
michael@0 181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
michael@0 182 // TODO: Wants a better implementation.
michael@0 183 if (fromIndex < 0) {
michael@0 184 return -1;
michael@0 185 }
michael@0 186 UErrorCode status = U_ZERO_ERROR;
michael@0 187 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
michael@0 188 if (test((UScriptCode)scriptIndex, status)) {
michael@0 189 return scriptIndex;
michael@0 190 }
michael@0 191 }
michael@0 192 return -1;
michael@0 193 }
michael@0 194
michael@0 195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
michael@0 196 UBool firstTime = TRUE;
michael@0 197 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
michael@0 198 if (!firstTime) {
michael@0 199 dest.append((UChar)0x20);
michael@0 200 }
michael@0 201 firstTime = FALSE;
michael@0 202 const char *scriptName = uscript_getShortName((UScriptCode(i)));
michael@0 203 dest.append(UnicodeString(scriptName, -1, US_INV));
michael@0 204 }
michael@0 205 return dest;
michael@0 206 }
michael@0 207
michael@0 208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
michael@0 209 resetAll();
michael@0 210 if (U_FAILURE(status)) {
michael@0 211 return *this;
michael@0 212 }
michael@0 213 UnicodeString oneScriptName;
michael@0 214 for (int32_t i=0; i<scriptString.length();) {
michael@0 215 UChar32 c = scriptString.char32At(i);
michael@0 216 i = scriptString.moveIndex32(i, 1);
michael@0 217 if (!u_isUWhiteSpace(c)) {
michael@0 218 oneScriptName.append(c);
michael@0 219 if (i < scriptString.length()) {
michael@0 220 continue;
michael@0 221 }
michael@0 222 }
michael@0 223 if (oneScriptName.length() > 0) {
michael@0 224 char buf[40];
michael@0 225 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
michael@0 226 buf[sizeof(buf)-1] = 0;
michael@0 227 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
michael@0 228 if (sc == UCHAR_INVALID_CODE) {
michael@0 229 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 230 } else {
michael@0 231 this->set((UScriptCode)sc, status);
michael@0 232 }
michael@0 233 if (U_FAILURE(status)) {
michael@0 234 return *this;
michael@0 235 }
michael@0 236 oneScriptName.remove();
michael@0 237 }
michael@0 238 }
michael@0 239 return *this;
michael@0 240 }
michael@0 241
michael@0 242 U_NAMESPACE_END
michael@0 243
michael@0 244 U_CAPI UBool U_EXPORT2
michael@0 245 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
michael@0 246 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
michael@0 247 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
michael@0 248 return (*s1 == *s2);
michael@0 249 }
michael@0 250
michael@0 251 U_CAPI int8_t U_EXPORT2
michael@0 252 uhash_compareScriptSet(UElement key0, UElement key1) {
michael@0 253 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
michael@0 254 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
michael@0 255 int32_t diff = s0->countMembers() - s1->countMembers();
michael@0 256 if (diff != 0) return diff;
michael@0 257 int32_t i0 = s0->nextSetBit(0);
michael@0 258 int32_t i1 = s1->nextSetBit(0);
michael@0 259 while ((diff = i0-i1) == 0 && i0 > 0) {
michael@0 260 i0 = s0->nextSetBit(i0+1);
michael@0 261 i1 = s1->nextSetBit(i1+1);
michael@0 262 }
michael@0 263 return (int8_t)diff;
michael@0 264 }
michael@0 265
michael@0 266 U_CAPI int32_t U_EXPORT2
michael@0 267 uhash_hashScriptSet(const UElement key) {
michael@0 268 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
michael@0 269 return s->hashCode();
michael@0 270 }
michael@0 271
michael@0 272 U_CAPI void U_EXPORT2
michael@0 273 uhash_deleteScriptSet(void *obj) {
michael@0 274 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
michael@0 275 delete s;
michael@0 276 }

mercurial