|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2013, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * |
|
7 * scriptset.cpp |
|
8 * |
|
9 * created on: 2013 Jan 7 |
|
10 * created by: Andy Heninger |
|
11 */ |
|
12 |
|
13 #include "unicode/utypes.h" |
|
14 |
|
15 #include "unicode/uchar.h" |
|
16 #include "unicode/unistr.h" |
|
17 |
|
18 #include "scriptset.h" |
|
19 #include "uassert.h" |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
|
24 |
|
25 //---------------------------------------------------------------------------- |
|
26 // |
|
27 // ScriptSet implementation |
|
28 // |
|
29 //---------------------------------------------------------------------------- |
|
30 ScriptSet::ScriptSet() { |
|
31 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
32 bits[i] = 0; |
|
33 } |
|
34 } |
|
35 |
|
36 ScriptSet::~ScriptSet() { |
|
37 } |
|
38 |
|
39 ScriptSet::ScriptSet(const ScriptSet &other) { |
|
40 *this = other; |
|
41 } |
|
42 |
|
43 |
|
44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) { |
|
45 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
46 bits[i] = other.bits[i]; |
|
47 } |
|
48 return *this; |
|
49 } |
|
50 |
|
51 |
|
52 UBool ScriptSet::operator == (const ScriptSet &other) const { |
|
53 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
54 if (bits[i] != other.bits[i]) { |
|
55 return FALSE; |
|
56 } |
|
57 } |
|
58 return TRUE; |
|
59 } |
|
60 |
|
61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { |
|
62 if (U_FAILURE(status)) { |
|
63 return FALSE; |
|
64 } |
|
65 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { |
|
66 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
67 return FALSE; |
|
68 } |
|
69 uint32_t index = script / 32; |
|
70 uint32_t bit = 1 << (script & 31); |
|
71 return ((bits[index] & bit) != 0); |
|
72 } |
|
73 |
|
74 |
|
75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { |
|
76 if (U_FAILURE(status)) { |
|
77 return *this; |
|
78 } |
|
79 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { |
|
80 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
81 return *this; |
|
82 } |
|
83 uint32_t index = script / 32; |
|
84 uint32_t bit = 1 << (script & 31); |
|
85 bits[index] |= bit; |
|
86 return *this; |
|
87 } |
|
88 |
|
89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { |
|
90 if (U_FAILURE(status)) { |
|
91 return *this; |
|
92 } |
|
93 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { |
|
94 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
95 return *this; |
|
96 } |
|
97 uint32_t index = script / 32; |
|
98 uint32_t bit = 1 << (script & 31); |
|
99 bits[index] &= ~bit; |
|
100 return *this; |
|
101 } |
|
102 |
|
103 |
|
104 |
|
105 ScriptSet &ScriptSet::Union(const ScriptSet &other) { |
|
106 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
107 bits[i] |= other.bits[i]; |
|
108 } |
|
109 return *this; |
|
110 } |
|
111 |
|
112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) { |
|
113 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
114 bits[i] &= other.bits[i]; |
|
115 } |
|
116 return *this; |
|
117 } |
|
118 |
|
119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { |
|
120 ScriptSet t; |
|
121 t.set(script, status); |
|
122 if (U_SUCCESS(status)) { |
|
123 this->intersect(t); |
|
124 } |
|
125 return *this; |
|
126 } |
|
127 |
|
128 UBool ScriptSet::intersects(const ScriptSet &other) const { |
|
129 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
130 if ((bits[i] & other.bits[i]) != 0) { |
|
131 return true; |
|
132 } |
|
133 } |
|
134 return false; |
|
135 } |
|
136 |
|
137 UBool ScriptSet::contains(const ScriptSet &other) const { |
|
138 ScriptSet t(*this); |
|
139 t.intersect(other); |
|
140 return (t == other); |
|
141 } |
|
142 |
|
143 |
|
144 ScriptSet &ScriptSet::setAll() { |
|
145 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
146 bits[i] = 0xffffffffu; |
|
147 } |
|
148 return *this; |
|
149 } |
|
150 |
|
151 |
|
152 ScriptSet &ScriptSet::resetAll() { |
|
153 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
154 bits[i] = 0; |
|
155 } |
|
156 return *this; |
|
157 } |
|
158 |
|
159 int32_t ScriptSet::countMembers() const { |
|
160 // This bit counter is good for sparse numbers of '1's, which is |
|
161 // very much the case that we will usually have. |
|
162 int32_t count = 0; |
|
163 for (uint32_t i=0; i<LENGTHOF(bits); i++) { |
|
164 uint32_t x = bits[i]; |
|
165 while (x > 0) { |
|
166 count++; |
|
167 x &= (x - 1); // and off the least significant one bit. |
|
168 } |
|
169 } |
|
170 return count; |
|
171 } |
|
172 |
|
173 int32_t ScriptSet::hashCode() const { |
|
174 int32_t hash = 0; |
|
175 for (int32_t i=0; i<LENGTHOF(bits); i++) { |
|
176 hash ^= bits[i]; |
|
177 } |
|
178 return hash; |
|
179 } |
|
180 |
|
181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { |
|
182 // TODO: Wants a better implementation. |
|
183 if (fromIndex < 0) { |
|
184 return -1; |
|
185 } |
|
186 UErrorCode status = U_ZERO_ERROR; |
|
187 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { |
|
188 if (test((UScriptCode)scriptIndex, status)) { |
|
189 return scriptIndex; |
|
190 } |
|
191 } |
|
192 return -1; |
|
193 } |
|
194 |
|
195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { |
|
196 UBool firstTime = TRUE; |
|
197 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { |
|
198 if (!firstTime) { |
|
199 dest.append((UChar)0x20); |
|
200 } |
|
201 firstTime = FALSE; |
|
202 const char *scriptName = uscript_getShortName((UScriptCode(i))); |
|
203 dest.append(UnicodeString(scriptName, -1, US_INV)); |
|
204 } |
|
205 return dest; |
|
206 } |
|
207 |
|
208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { |
|
209 resetAll(); |
|
210 if (U_FAILURE(status)) { |
|
211 return *this; |
|
212 } |
|
213 UnicodeString oneScriptName; |
|
214 for (int32_t i=0; i<scriptString.length();) { |
|
215 UChar32 c = scriptString.char32At(i); |
|
216 i = scriptString.moveIndex32(i, 1); |
|
217 if (!u_isUWhiteSpace(c)) { |
|
218 oneScriptName.append(c); |
|
219 if (i < scriptString.length()) { |
|
220 continue; |
|
221 } |
|
222 } |
|
223 if (oneScriptName.length() > 0) { |
|
224 char buf[40]; |
|
225 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); |
|
226 buf[sizeof(buf)-1] = 0; |
|
227 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); |
|
228 if (sc == UCHAR_INVALID_CODE) { |
|
229 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
230 } else { |
|
231 this->set((UScriptCode)sc, status); |
|
232 } |
|
233 if (U_FAILURE(status)) { |
|
234 return *this; |
|
235 } |
|
236 oneScriptName.remove(); |
|
237 } |
|
238 } |
|
239 return *this; |
|
240 } |
|
241 |
|
242 U_NAMESPACE_END |
|
243 |
|
244 U_CAPI UBool U_EXPORT2 |
|
245 uhash_equalsScriptSet(const UElement key1, const UElement key2) { |
|
246 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); |
|
247 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); |
|
248 return (*s1 == *s2); |
|
249 } |
|
250 |
|
251 U_CAPI int8_t U_EXPORT2 |
|
252 uhash_compareScriptSet(UElement key0, UElement key1) { |
|
253 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); |
|
254 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); |
|
255 int32_t diff = s0->countMembers() - s1->countMembers(); |
|
256 if (diff != 0) return diff; |
|
257 int32_t i0 = s0->nextSetBit(0); |
|
258 int32_t i1 = s1->nextSetBit(0); |
|
259 while ((diff = i0-i1) == 0 && i0 > 0) { |
|
260 i0 = s0->nextSetBit(i0+1); |
|
261 i1 = s1->nextSetBit(i1+1); |
|
262 } |
|
263 return (int8_t)diff; |
|
264 } |
|
265 |
|
266 U_CAPI int32_t U_EXPORT2 |
|
267 uhash_hashScriptSet(const UElement key) { |
|
268 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); |
|
269 return s->hashCode(); |
|
270 } |
|
271 |
|
272 U_CAPI void U_EXPORT2 |
|
273 uhash_deleteScriptSet(void *obj) { |
|
274 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); |
|
275 delete s; |
|
276 } |