|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2013, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: uscript_props.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2013feb16 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 #include "unicode/utypes.h" |
|
16 #include "unicode/unistr.h" |
|
17 #include "unicode/uscript.h" |
|
18 #include "unicode/utf16.h" |
|
19 #include "ustr_imp.h" |
|
20 |
|
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
|
22 |
|
23 namespace { |
|
24 |
|
25 // Script metadata (script properties). |
|
26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt |
|
27 |
|
28 // 0 = NOT_ENCODED, no sample character, default false script properties. |
|
29 // Bits 20.. 0: sample character |
|
30 |
|
31 // Bits 23..21: usage |
|
32 const int32_t UNKNOWN = 1 << 21; |
|
33 const int32_t EXCLUSION = 2 << 21; |
|
34 const int32_t LIMITED_USE = 3 << 21; |
|
35 const int32_t ASPIRATIONAL = 4 << 21; |
|
36 const int32_t RECOMMENDED = 5 << 21; |
|
37 |
|
38 // Bits 31..24: Single-bit flags |
|
39 const int32_t RTL = 1 << 24; |
|
40 const int32_t LB_LETTERS = 1 << 25; |
|
41 const int32_t CASED = 1 << 26; |
|
42 |
|
43 const int32_t SCRIPT_PROPS[] = { |
|
44 // Begin copy-paste output from |
|
45 // tools/trunk/unicode/py/parsescriptmetadata.py |
|
46 0x0040 | UNKNOWN, // Zyyy |
|
47 0x0308 | UNKNOWN, // Zinh |
|
48 0x0628 | RECOMMENDED | RTL, // Arab |
|
49 0x0531 | RECOMMENDED | CASED, // Armn |
|
50 0x0995 | RECOMMENDED, // Beng |
|
51 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo |
|
52 0x13C4 | LIMITED_USE, // Cher |
|
53 0x03E2 | EXCLUSION | CASED, // Copt |
|
54 0x042F | RECOMMENDED | CASED, // Cyrl |
|
55 0x10414 | EXCLUSION | CASED, // Dsrt |
|
56 0x0905 | RECOMMENDED, // Deva |
|
57 0x12A0 | RECOMMENDED, // Ethi |
|
58 0x10D3 | RECOMMENDED, // Geor |
|
59 0x10330 | EXCLUSION, // Goth |
|
60 0x03A9 | RECOMMENDED | CASED, // Grek |
|
61 0x0A95 | RECOMMENDED, // Gujr |
|
62 0x0A15 | RECOMMENDED, // Guru |
|
63 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani |
|
64 0xAC00 | RECOMMENDED, // Hang |
|
65 0x05D0 | RECOMMENDED | RTL, // Hebr |
|
66 0x304B | RECOMMENDED | LB_LETTERS, // Hira |
|
67 0x0C95 | RECOMMENDED, // Knda |
|
68 0x30AB | RECOMMENDED | LB_LETTERS, // Kana |
|
69 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr |
|
70 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo |
|
71 0x004C | RECOMMENDED | CASED, // Latn |
|
72 0x0D15 | RECOMMENDED, // Mlym |
|
73 0x1826 | ASPIRATIONAL, // Mong |
|
74 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr |
|
75 0x168F | EXCLUSION, // Ogam |
|
76 0x10300 | EXCLUSION, // Ital |
|
77 0x0B15 | RECOMMENDED, // Orya |
|
78 0x16A0 | EXCLUSION, // Runr |
|
79 0x0D85 | RECOMMENDED, // Sinh |
|
80 0x0710 | LIMITED_USE | RTL, // Syrc |
|
81 0x0B95 | RECOMMENDED, // Taml |
|
82 0x0C15 | RECOMMENDED, // Telu |
|
83 0x078C | RECOMMENDED | RTL, // Thaa |
|
84 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai |
|
85 0x0F40 | RECOMMENDED, // Tibt |
|
86 0x14C0 | ASPIRATIONAL, // Cans |
|
87 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii |
|
88 0x1703 | EXCLUSION, // Tglg |
|
89 0x1723 | EXCLUSION, // Hano |
|
90 0x1743 | EXCLUSION, // Buhd |
|
91 0x1763 | EXCLUSION, // Tagb |
|
92 0x2800 | UNKNOWN, // Brai |
|
93 0x10800 | EXCLUSION | RTL, // Cprt |
|
94 0x1900 | LIMITED_USE, // Limb |
|
95 0x10000 | EXCLUSION, // Linb |
|
96 0x10480 | EXCLUSION, // Osma |
|
97 0x10450 | EXCLUSION, // Shaw |
|
98 0x1950 | LIMITED_USE | LB_LETTERS, // Tale |
|
99 0x10380 | EXCLUSION, // Ugar |
|
100 0, |
|
101 0x1A00 | EXCLUSION, // Bugi |
|
102 0x2C00 | EXCLUSION | CASED, // Glag |
|
103 0x10A00 | EXCLUSION | RTL, // Khar |
|
104 0xA800 | LIMITED_USE, // Sylo |
|
105 0x1980 | LIMITED_USE | LB_LETTERS, // Talu |
|
106 0x2D30 | ASPIRATIONAL, // Tfng |
|
107 0x103A0 | EXCLUSION, // Xpeo |
|
108 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali |
|
109 0x1BC0 | LIMITED_USE, // Batk |
|
110 0, |
|
111 0x11005 | EXCLUSION, // Brah |
|
112 0xAA00 | LIMITED_USE, // Cham |
|
113 0, |
|
114 0, |
|
115 0, |
|
116 0, |
|
117 0x13153 | EXCLUSION, // Egyp |
|
118 0, |
|
119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans |
|
120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant |
|
121 0, |
|
122 0, |
|
123 0, |
|
124 0xA984 | LIMITED_USE | LB_LETTERS, // Java |
|
125 0xA90A | LIMITED_USE, // Kali |
|
126 0, |
|
127 0, |
|
128 0x1C00 | LIMITED_USE, // Lepc |
|
129 0, |
|
130 0x0840 | LIMITED_USE | RTL, // Mand |
|
131 0, |
|
132 0x10980 | EXCLUSION | RTL, // Mero |
|
133 0x07CA | LIMITED_USE | RTL, // Nkoo |
|
134 0x10C00 | EXCLUSION | RTL, // Orkh |
|
135 0, |
|
136 0xA840 | EXCLUSION, // Phag |
|
137 0x10900 | EXCLUSION | RTL, // Phnx |
|
138 0x16F00 | ASPIRATIONAL, // Plrd |
|
139 0, |
|
140 0, |
|
141 0, |
|
142 0, |
|
143 0, |
|
144 0, |
|
145 0xA549 | LIMITED_USE, // Vaii |
|
146 0, |
|
147 0x12000 | EXCLUSION, // Xsux |
|
148 0, |
|
149 0xFDD0 | UNKNOWN, // Zzzz |
|
150 0x102A0 | EXCLUSION, // Cari |
|
151 0x304B | RECOMMENDED | LB_LETTERS, // Jpan |
|
152 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana |
|
153 0x10280 | EXCLUSION, // Lyci |
|
154 0x10920 | EXCLUSION | RTL, // Lydi |
|
155 0x1C5A | LIMITED_USE, // Olck |
|
156 0xA930 | EXCLUSION, // Rjng |
|
157 0xA882 | LIMITED_USE, // Saur |
|
158 0, |
|
159 0x1B83 | LIMITED_USE, // Sund |
|
160 0, |
|
161 0xABC0 | LIMITED_USE, // Mtei |
|
162 0x10840 | EXCLUSION | RTL, // Armi |
|
163 0x10B00 | EXCLUSION | RTL, // Avst |
|
164 0x11103 | LIMITED_USE, // Cakm |
|
165 0xAC00 | RECOMMENDED, // Kore |
|
166 0x11083 | EXCLUSION, // Kthi |
|
167 0, |
|
168 0x10B60 | EXCLUSION | RTL, // Phli |
|
169 0, |
|
170 0, |
|
171 0x10B40 | EXCLUSION | RTL, // Prti |
|
172 0x0800 | EXCLUSION | RTL, // Samr |
|
173 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt |
|
174 0, |
|
175 0, |
|
176 0xA6A0 | LIMITED_USE, // Bamu |
|
177 0xA4D0 | LIMITED_USE, // Lisu |
|
178 0, |
|
179 0x10A60 | EXCLUSION | RTL, // Sarb |
|
180 0, |
|
181 0, |
|
182 0, |
|
183 0, |
|
184 0, |
|
185 0, |
|
186 0, |
|
187 0x109A0 | EXCLUSION | RTL, // Merc |
|
188 0, |
|
189 0, |
|
190 0, |
|
191 0, |
|
192 0, |
|
193 0, |
|
194 0, |
|
195 0, |
|
196 0, |
|
197 0x11183 | EXCLUSION, // Shrd |
|
198 0x110D0 | EXCLUSION, // Sora |
|
199 0x11680 | EXCLUSION, // Takr |
|
200 0, |
|
201 0, |
|
202 0, |
|
203 0, |
|
204 0, |
|
205 0, |
|
206 0, |
|
207 // End copy-paste from parsescriptmetadata.py |
|
208 }; |
|
209 |
|
210 int32_t getScriptProps(UScriptCode script) { |
|
211 if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { |
|
212 return SCRIPT_PROPS[script]; |
|
213 } else { |
|
214 return 0; |
|
215 } |
|
216 } |
|
217 |
|
218 } // namespace |
|
219 |
|
220 U_CAPI int32_t U_EXPORT2 |
|
221 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { |
|
222 if(U_FAILURE(*pErrorCode)) { return 0; } |
|
223 if(capacity < 0 || (capacity > 0 && dest == NULL)) { |
|
224 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
225 return 0; |
|
226 } |
|
227 int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
|
228 int32_t length; |
|
229 if(sampleChar == 0) { |
|
230 length = 0; |
|
231 } else { |
|
232 length = U16_LENGTH(sampleChar); |
|
233 if(length <= capacity) { |
|
234 int32_t i = 0; |
|
235 U16_APPEND_UNSAFE(dest, i, sampleChar); |
|
236 } |
|
237 } |
|
238 return u_terminateUChars(dest, capacity, length, pErrorCode); |
|
239 } |
|
240 |
|
241 U_COMMON_API icu::UnicodeString U_EXPORT2 |
|
242 uscript_getSampleUnicodeString(UScriptCode script) { |
|
243 icu::UnicodeString sample; |
|
244 int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
|
245 if(sampleChar != 0) { |
|
246 sample.append(sampleChar); |
|
247 } |
|
248 return sample; |
|
249 } |
|
250 |
|
251 U_CAPI UScriptUsage U_EXPORT2 |
|
252 uscript_getUsage(UScriptCode script) { |
|
253 return (UScriptUsage)((getScriptProps(script) >> 21) & 7); |
|
254 } |
|
255 |
|
256 U_CAPI UBool U_EXPORT2 |
|
257 uscript_isRightToLeft(UScriptCode script) { |
|
258 return (getScriptProps(script) & RTL) != 0; |
|
259 } |
|
260 |
|
261 U_CAPI UBool U_EXPORT2 |
|
262 uscript_breaksBetweenLetters(UScriptCode script) { |
|
263 return (getScriptProps(script) & LB_LETTERS) != 0; |
|
264 } |
|
265 |
|
266 U_CAPI UBool U_EXPORT2 |
|
267 uscript_isCased(UScriptCode script) { |
|
268 return (getScriptProps(script) & CASED) != 0; |
|
269 } |