intl/icu/source/i18n/scriptset.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/scriptset.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,276 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*
    1.10 +* scriptset.cpp
    1.11 +*
    1.12 +* created on: 2013 Jan 7
    1.13 +* created by: Andy Heninger
    1.14 +*/
    1.15 +
    1.16 +#include "unicode/utypes.h"
    1.17 +
    1.18 +#include "unicode/uchar.h"
    1.19 +#include "unicode/unistr.h"
    1.20 +
    1.21 +#include "scriptset.h"
    1.22 +#include "uassert.h"
    1.23 +
    1.24 +U_NAMESPACE_BEGIN
    1.25 +
    1.26 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.27 +
    1.28 +//----------------------------------------------------------------------------
    1.29 +//
    1.30 +//  ScriptSet implementation
    1.31 +//
    1.32 +//----------------------------------------------------------------------------
    1.33 +ScriptSet::ScriptSet() {
    1.34 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    1.35 +        bits[i] = 0;
    1.36 +    }
    1.37 +}
    1.38 +
    1.39 +ScriptSet::~ScriptSet() {
    1.40 +}
    1.41 +
    1.42 +ScriptSet::ScriptSet(const ScriptSet &other) {
    1.43 +    *this = other;
    1.44 +}
    1.45 +    
    1.46 +
    1.47 +ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
    1.48 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    1.49 +        bits[i] = other.bits[i];
    1.50 +    }
    1.51 +    return *this;
    1.52 +}
    1.53 +
    1.54 +
    1.55 +UBool ScriptSet::operator == (const ScriptSet &other) const {
    1.56 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    1.57 +        if (bits[i] != other.bits[i]) {
    1.58 +            return FALSE;
    1.59 +        }
    1.60 +    }
    1.61 +    return TRUE;
    1.62 +}
    1.63 +
    1.64 +UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
    1.65 +    if (U_FAILURE(status)) {
    1.66 +        return FALSE;
    1.67 +    }
    1.68 +    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    1.69 +        status = U_ILLEGAL_ARGUMENT_ERROR;
    1.70 +        return FALSE;
    1.71 +    }
    1.72 +    uint32_t index = script / 32;
    1.73 +    uint32_t bit   = 1 << (script & 31);
    1.74 +    return ((bits[index] & bit) != 0);
    1.75 +}
    1.76 +
    1.77 +
    1.78 +ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
    1.79 +    if (U_FAILURE(status)) {
    1.80 +        return *this;
    1.81 +    }
    1.82 +    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    1.83 +        status = U_ILLEGAL_ARGUMENT_ERROR;
    1.84 +        return *this;
    1.85 +    }
    1.86 +    uint32_t index = script / 32;
    1.87 +    uint32_t bit   = 1 << (script & 31);
    1.88 +    bits[index] |= bit;
    1.89 +    return *this;
    1.90 +}
    1.91 +
    1.92 +ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
    1.93 +    if (U_FAILURE(status)) {
    1.94 +        return *this;
    1.95 +    }
    1.96 +    if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    1.97 +        status = U_ILLEGAL_ARGUMENT_ERROR;
    1.98 +        return *this;
    1.99 +    }
   1.100 +    uint32_t index = script / 32;
   1.101 +    uint32_t bit   = 1 << (script & 31);
   1.102 +    bits[index] &= ~bit;
   1.103 +    return *this;
   1.104 +}
   1.105 +
   1.106 +
   1.107 +
   1.108 +ScriptSet &ScriptSet::Union(const ScriptSet &other) {
   1.109 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.110 +        bits[i] |= other.bits[i];
   1.111 +    }
   1.112 +    return *this;
   1.113 +}
   1.114 +
   1.115 +ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
   1.116 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.117 +        bits[i] &= other.bits[i];
   1.118 +    }
   1.119 +    return *this;
   1.120 +}
   1.121 +
   1.122 +ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
   1.123 +    ScriptSet t;
   1.124 +    t.set(script, status);
   1.125 +    if (U_SUCCESS(status)) {
   1.126 +        this->intersect(t);
   1.127 +    }
   1.128 +    return *this;
   1.129 +}
   1.130 +    
   1.131 +UBool ScriptSet::intersects(const ScriptSet &other) const {
   1.132 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.133 +        if ((bits[i] & other.bits[i]) != 0) {
   1.134 +            return true;
   1.135 +        }
   1.136 +    }
   1.137 +    return false;
   1.138 +}
   1.139 +
   1.140 +UBool ScriptSet::contains(const ScriptSet &other) const {
   1.141 +    ScriptSet t(*this);
   1.142 +    t.intersect(other);
   1.143 +    return (t == other);
   1.144 +}
   1.145 +
   1.146 +
   1.147 +ScriptSet &ScriptSet::setAll() {
   1.148 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.149 +        bits[i] = 0xffffffffu;
   1.150 +    }
   1.151 +    return *this;
   1.152 +}
   1.153 +
   1.154 +
   1.155 +ScriptSet &ScriptSet::resetAll() {
   1.156 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.157 +        bits[i] = 0;
   1.158 +    }
   1.159 +    return *this;
   1.160 +}
   1.161 +
   1.162 +int32_t ScriptSet::countMembers() const {
   1.163 +    // This bit counter is good for sparse numbers of '1's, which is
   1.164 +    //  very much the case that we will usually have.
   1.165 +    int32_t count = 0;
   1.166 +    for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   1.167 +        uint32_t x = bits[i];
   1.168 +        while (x > 0) {
   1.169 +            count++;
   1.170 +            x &= (x - 1);    // and off the least significant one bit.
   1.171 +        }
   1.172 +    }
   1.173 +    return count;
   1.174 +}
   1.175 +
   1.176 +int32_t ScriptSet::hashCode() const {
   1.177 +    int32_t hash = 0;
   1.178 +    for (int32_t i=0; i<LENGTHOF(bits); i++) {
   1.179 +        hash ^= bits[i];
   1.180 +    }
   1.181 +    return hash;
   1.182 +}
   1.183 +
   1.184 +int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
   1.185 +    // TODO: Wants a better implementation.
   1.186 +    if (fromIndex < 0) {
   1.187 +        return -1;
   1.188 +    }
   1.189 +    UErrorCode status = U_ZERO_ERROR;
   1.190 +    for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
   1.191 +        if (test((UScriptCode)scriptIndex, status)) {
   1.192 +            return scriptIndex;
   1.193 +        }
   1.194 +    }
   1.195 +    return -1;
   1.196 +}
   1.197 +
   1.198 +UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
   1.199 +    UBool firstTime = TRUE;
   1.200 +    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
   1.201 +        if (!firstTime) {
   1.202 +            dest.append((UChar)0x20);
   1.203 +        }
   1.204 +        firstTime = FALSE;
   1.205 +        const char *scriptName = uscript_getShortName((UScriptCode(i)));
   1.206 +        dest.append(UnicodeString(scriptName, -1, US_INV));
   1.207 +    }
   1.208 +    return dest;
   1.209 +}
   1.210 +
   1.211 +ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
   1.212 +    resetAll();
   1.213 +    if (U_FAILURE(status)) {
   1.214 +        return *this;
   1.215 +    }
   1.216 +    UnicodeString oneScriptName;
   1.217 +    for (int32_t i=0; i<scriptString.length();) {
   1.218 +        UChar32 c = scriptString.char32At(i);
   1.219 +        i = scriptString.moveIndex32(i, 1);
   1.220 +        if (!u_isUWhiteSpace(c)) {
   1.221 +            oneScriptName.append(c);
   1.222 +            if (i < scriptString.length()) {
   1.223 +                continue;
   1.224 +            }
   1.225 +        }
   1.226 +        if (oneScriptName.length() > 0) {
   1.227 +            char buf[40];
   1.228 +            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
   1.229 +            buf[sizeof(buf)-1] = 0;
   1.230 +            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
   1.231 +            if (sc == UCHAR_INVALID_CODE) {
   1.232 +                status = U_ILLEGAL_ARGUMENT_ERROR;
   1.233 +            } else {
   1.234 +                this->set((UScriptCode)sc, status);
   1.235 +            }
   1.236 +            if (U_FAILURE(status)) {
   1.237 +                return *this;
   1.238 +            }
   1.239 +            oneScriptName.remove();
   1.240 +        }
   1.241 +    }
   1.242 +    return *this;
   1.243 +}
   1.244 +
   1.245 +U_NAMESPACE_END
   1.246 +
   1.247 +U_CAPI UBool U_EXPORT2
   1.248 +uhash_equalsScriptSet(const UElement key1, const UElement key2) {
   1.249 +    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
   1.250 +    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
   1.251 +    return (*s1 == *s2);
   1.252 +}
   1.253 +
   1.254 +U_CAPI int8_t U_EXPORT2
   1.255 +uhash_compareScriptSet(UElement key0, UElement key1) {
   1.256 +    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
   1.257 +    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
   1.258 +    int32_t diff = s0->countMembers() - s1->countMembers();
   1.259 +    if (diff != 0) return diff;
   1.260 +    int32_t i0 = s0->nextSetBit(0);
   1.261 +    int32_t i1 = s1->nextSetBit(0);
   1.262 +    while ((diff = i0-i1) == 0 && i0 > 0) {
   1.263 +        i0 = s0->nextSetBit(i0+1);
   1.264 +        i1 = s1->nextSetBit(i1+1);
   1.265 +    }
   1.266 +    return (int8_t)diff;
   1.267 +}
   1.268 +
   1.269 +U_CAPI int32_t U_EXPORT2
   1.270 +uhash_hashScriptSet(const UElement key) {
   1.271 +    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
   1.272 +    return s->hashCode();
   1.273 +}
   1.274 +
   1.275 +U_CAPI void U_EXPORT2
   1.276 +uhash_deleteScriptSet(void *obj) {
   1.277 +    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
   1.278 +    delete s;
   1.279 +}

mercurial