intl/icu/source/i18n/scriptset.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *
     7 * scriptset.cpp
     8 *
     9 * created on: 2013 Jan 7
    10 * created by: Andy Heninger
    11 */
    13 #include "unicode/utypes.h"
    15 #include "unicode/uchar.h"
    16 #include "unicode/unistr.h"
    18 #include "scriptset.h"
    19 #include "uassert.h"
    21 U_NAMESPACE_BEGIN
    23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    25 //----------------------------------------------------------------------------
    26 //
    27 //  ScriptSet implementation
    28 //
    29 //----------------------------------------------------------------------------
    30 ScriptSet::ScriptSet() {
    31     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    32         bits[i] = 0;
    33     }
    34 }
    36 ScriptSet::~ScriptSet() {
    37 }
    39 ScriptSet::ScriptSet(const ScriptSet &other) {
    40     *this = other;
    41 }
    44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
    45     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    46         bits[i] = other.bits[i];
    47     }
    48     return *this;
    49 }
    52 UBool ScriptSet::operator == (const ScriptSet &other) const {
    53     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    54         if (bits[i] != other.bits[i]) {
    55             return FALSE;
    56         }
    57     }
    58     return TRUE;
    59 }
    61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
    62     if (U_FAILURE(status)) {
    63         return FALSE;
    64     }
    65     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    66         status = U_ILLEGAL_ARGUMENT_ERROR;
    67         return FALSE;
    68     }
    69     uint32_t index = script / 32;
    70     uint32_t bit   = 1 << (script & 31);
    71     return ((bits[index] & bit) != 0);
    72 }
    75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
    76     if (U_FAILURE(status)) {
    77         return *this;
    78     }
    79     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    80         status = U_ILLEGAL_ARGUMENT_ERROR;
    81         return *this;
    82     }
    83     uint32_t index = script / 32;
    84     uint32_t bit   = 1 << (script & 31);
    85     bits[index] |= bit;
    86     return *this;
    87 }
    89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
    90     if (U_FAILURE(status)) {
    91         return *this;
    92     }
    93     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
    94         status = U_ILLEGAL_ARGUMENT_ERROR;
    95         return *this;
    96     }
    97     uint32_t index = script / 32;
    98     uint32_t bit   = 1 << (script & 31);
    99     bits[index] &= ~bit;
   100     return *this;
   101 }
   105 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
   106     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   107         bits[i] |= other.bits[i];
   108     }
   109     return *this;
   110 }
   112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
   113     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   114         bits[i] &= other.bits[i];
   115     }
   116     return *this;
   117 }
   119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
   120     ScriptSet t;
   121     t.set(script, status);
   122     if (U_SUCCESS(status)) {
   123         this->intersect(t);
   124     }
   125     return *this;
   126 }
   128 UBool ScriptSet::intersects(const ScriptSet &other) const {
   129     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   130         if ((bits[i] & other.bits[i]) != 0) {
   131             return true;
   132         }
   133     }
   134     return false;
   135 }
   137 UBool ScriptSet::contains(const ScriptSet &other) const {
   138     ScriptSet t(*this);
   139     t.intersect(other);
   140     return (t == other);
   141 }
   144 ScriptSet &ScriptSet::setAll() {
   145     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   146         bits[i] = 0xffffffffu;
   147     }
   148     return *this;
   149 }
   152 ScriptSet &ScriptSet::resetAll() {
   153     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   154         bits[i] = 0;
   155     }
   156     return *this;
   157 }
   159 int32_t ScriptSet::countMembers() const {
   160     // This bit counter is good for sparse numbers of '1's, which is
   161     //  very much the case that we will usually have.
   162     int32_t count = 0;
   163     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
   164         uint32_t x = bits[i];
   165         while (x > 0) {
   166             count++;
   167             x &= (x - 1);    // and off the least significant one bit.
   168         }
   169     }
   170     return count;
   171 }
   173 int32_t ScriptSet::hashCode() const {
   174     int32_t hash = 0;
   175     for (int32_t i=0; i<LENGTHOF(bits); i++) {
   176         hash ^= bits[i];
   177     }
   178     return hash;
   179 }
   181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
   182     // TODO: Wants a better implementation.
   183     if (fromIndex < 0) {
   184         return -1;
   185     }
   186     UErrorCode status = U_ZERO_ERROR;
   187     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
   188         if (test((UScriptCode)scriptIndex, status)) {
   189             return scriptIndex;
   190         }
   191     }
   192     return -1;
   193 }
   195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
   196     UBool firstTime = TRUE;
   197     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
   198         if (!firstTime) {
   199             dest.append((UChar)0x20);
   200         }
   201         firstTime = FALSE;
   202         const char *scriptName = uscript_getShortName((UScriptCode(i)));
   203         dest.append(UnicodeString(scriptName, -1, US_INV));
   204     }
   205     return dest;
   206 }
   208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
   209     resetAll();
   210     if (U_FAILURE(status)) {
   211         return *this;
   212     }
   213     UnicodeString oneScriptName;
   214     for (int32_t i=0; i<scriptString.length();) {
   215         UChar32 c = scriptString.char32At(i);
   216         i = scriptString.moveIndex32(i, 1);
   217         if (!u_isUWhiteSpace(c)) {
   218             oneScriptName.append(c);
   219             if (i < scriptString.length()) {
   220                 continue;
   221             }
   222         }
   223         if (oneScriptName.length() > 0) {
   224             char buf[40];
   225             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
   226             buf[sizeof(buf)-1] = 0;
   227             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
   228             if (sc == UCHAR_INVALID_CODE) {
   229                 status = U_ILLEGAL_ARGUMENT_ERROR;
   230             } else {
   231                 this->set((UScriptCode)sc, status);
   232             }
   233             if (U_FAILURE(status)) {
   234                 return *this;
   235             }
   236             oneScriptName.remove();
   237         }
   238     }
   239     return *this;
   240 }
   242 U_NAMESPACE_END
   244 U_CAPI UBool U_EXPORT2
   245 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
   246     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
   247     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
   248     return (*s1 == *s2);
   249 }
   251 U_CAPI int8_t U_EXPORT2
   252 uhash_compareScriptSet(UElement key0, UElement key1) {
   253     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
   254     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
   255     int32_t diff = s0->countMembers() - s1->countMembers();
   256     if (diff != 0) return diff;
   257     int32_t i0 = s0->nextSetBit(0);
   258     int32_t i1 = s1->nextSetBit(0);
   259     while ((diff = i0-i1) == 0 && i0 > 0) {
   260         i0 = s0->nextSetBit(i0+1);
   261         i1 = s1->nextSetBit(i1+1);
   262     }
   263     return (int8_t)diff;
   264 }
   266 U_CAPI int32_t U_EXPORT2
   267 uhash_hashScriptSet(const UElement key) {
   268     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
   269     return s->hashCode();
   270 }
   272 U_CAPI void U_EXPORT2
   273 uhash_deleteScriptSet(void *obj) {
   274     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
   275     delete s;
   276 }

mercurial