|
1 /* |
|
2 * Copyright © 2009 Red Hat, Inc. |
|
3 * Copyright © 2011 Google, Inc. |
|
4 * |
|
5 * This is part of HarfBuzz, a text shaping library. |
|
6 * |
|
7 * Permission is hereby granted, without written agreement and without |
|
8 * license or royalty fees, to use, copy, modify, and distribute this |
|
9 * software and its documentation for any purpose, provided that the |
|
10 * above copyright notice and the following two paragraphs appear in |
|
11 * all copies of this software. |
|
12 * |
|
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
|
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
|
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
|
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
|
17 * DAMAGE. |
|
18 * |
|
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
|
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
|
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
|
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
|
24 * |
|
25 * Red Hat Author(s): Behdad Esfahbod |
|
26 * Google Author(s): Behdad Esfahbod |
|
27 */ |
|
28 |
|
29 #include "hb-private.hh" |
|
30 |
|
31 #include "hb-glib.h" |
|
32 |
|
33 #include "hb-unicode-private.hh" |
|
34 |
|
35 |
|
36 #if !GLIB_CHECK_VERSION(2,29,14) |
|
37 static const hb_script_t |
|
38 glib_script_to_script[] = |
|
39 { |
|
40 HB_SCRIPT_COMMON, |
|
41 HB_SCRIPT_INHERITED, |
|
42 HB_SCRIPT_ARABIC, |
|
43 HB_SCRIPT_ARMENIAN, |
|
44 HB_SCRIPT_BENGALI, |
|
45 HB_SCRIPT_BOPOMOFO, |
|
46 HB_SCRIPT_CHEROKEE, |
|
47 HB_SCRIPT_COPTIC, |
|
48 HB_SCRIPT_CYRILLIC, |
|
49 HB_SCRIPT_DESERET, |
|
50 HB_SCRIPT_DEVANAGARI, |
|
51 HB_SCRIPT_ETHIOPIC, |
|
52 HB_SCRIPT_GEORGIAN, |
|
53 HB_SCRIPT_GOTHIC, |
|
54 HB_SCRIPT_GREEK, |
|
55 HB_SCRIPT_GUJARATI, |
|
56 HB_SCRIPT_GURMUKHI, |
|
57 HB_SCRIPT_HAN, |
|
58 HB_SCRIPT_HANGUL, |
|
59 HB_SCRIPT_HEBREW, |
|
60 HB_SCRIPT_HIRAGANA, |
|
61 HB_SCRIPT_KANNADA, |
|
62 HB_SCRIPT_KATAKANA, |
|
63 HB_SCRIPT_KHMER, |
|
64 HB_SCRIPT_LAO, |
|
65 HB_SCRIPT_LATIN, |
|
66 HB_SCRIPT_MALAYALAM, |
|
67 HB_SCRIPT_MONGOLIAN, |
|
68 HB_SCRIPT_MYANMAR, |
|
69 HB_SCRIPT_OGHAM, |
|
70 HB_SCRIPT_OLD_ITALIC, |
|
71 HB_SCRIPT_ORIYA, |
|
72 HB_SCRIPT_RUNIC, |
|
73 HB_SCRIPT_SINHALA, |
|
74 HB_SCRIPT_SYRIAC, |
|
75 HB_SCRIPT_TAMIL, |
|
76 HB_SCRIPT_TELUGU, |
|
77 HB_SCRIPT_THAANA, |
|
78 HB_SCRIPT_THAI, |
|
79 HB_SCRIPT_TIBETAN, |
|
80 HB_SCRIPT_CANADIAN_SYLLABICS, |
|
81 HB_SCRIPT_YI, |
|
82 HB_SCRIPT_TAGALOG, |
|
83 HB_SCRIPT_HANUNOO, |
|
84 HB_SCRIPT_BUHID, |
|
85 HB_SCRIPT_TAGBANWA, |
|
86 |
|
87 /* Unicode-4.0 additions */ |
|
88 HB_SCRIPT_BRAILLE, |
|
89 HB_SCRIPT_CYPRIOT, |
|
90 HB_SCRIPT_LIMBU, |
|
91 HB_SCRIPT_OSMANYA, |
|
92 HB_SCRIPT_SHAVIAN, |
|
93 HB_SCRIPT_LINEAR_B, |
|
94 HB_SCRIPT_TAI_LE, |
|
95 HB_SCRIPT_UGARITIC, |
|
96 |
|
97 /* Unicode-4.1 additions */ |
|
98 HB_SCRIPT_NEW_TAI_LUE, |
|
99 HB_SCRIPT_BUGINESE, |
|
100 HB_SCRIPT_GLAGOLITIC, |
|
101 HB_SCRIPT_TIFINAGH, |
|
102 HB_SCRIPT_SYLOTI_NAGRI, |
|
103 HB_SCRIPT_OLD_PERSIAN, |
|
104 HB_SCRIPT_KHAROSHTHI, |
|
105 |
|
106 /* Unicode-5.0 additions */ |
|
107 HB_SCRIPT_UNKNOWN, |
|
108 HB_SCRIPT_BALINESE, |
|
109 HB_SCRIPT_CUNEIFORM, |
|
110 HB_SCRIPT_PHOENICIAN, |
|
111 HB_SCRIPT_PHAGS_PA, |
|
112 HB_SCRIPT_NKO, |
|
113 |
|
114 /* Unicode-5.1 additions */ |
|
115 HB_SCRIPT_KAYAH_LI, |
|
116 HB_SCRIPT_LEPCHA, |
|
117 HB_SCRIPT_REJANG, |
|
118 HB_SCRIPT_SUNDANESE, |
|
119 HB_SCRIPT_SAURASHTRA, |
|
120 HB_SCRIPT_CHAM, |
|
121 HB_SCRIPT_OL_CHIKI, |
|
122 HB_SCRIPT_VAI, |
|
123 HB_SCRIPT_CARIAN, |
|
124 HB_SCRIPT_LYCIAN, |
|
125 HB_SCRIPT_LYDIAN, |
|
126 |
|
127 /* Unicode-5.2 additions */ |
|
128 HB_SCRIPT_AVESTAN, |
|
129 HB_SCRIPT_BAMUM, |
|
130 HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, |
|
131 HB_SCRIPT_IMPERIAL_ARAMAIC, |
|
132 HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, |
|
133 HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, |
|
134 HB_SCRIPT_JAVANESE, |
|
135 HB_SCRIPT_KAITHI, |
|
136 HB_SCRIPT_TAI_THAM, |
|
137 HB_SCRIPT_LISU, |
|
138 HB_SCRIPT_MEETEI_MAYEK, |
|
139 HB_SCRIPT_OLD_SOUTH_ARABIAN, |
|
140 HB_SCRIPT_OLD_TURKIC, |
|
141 HB_SCRIPT_SAMARITAN, |
|
142 HB_SCRIPT_TAI_VIET, |
|
143 |
|
144 /* Unicode-6.0 additions */ |
|
145 HB_SCRIPT_BATAK, |
|
146 HB_SCRIPT_BRAHMI, |
|
147 HB_SCRIPT_MANDAIC, |
|
148 |
|
149 /* Unicode-6.1 additions */ |
|
150 HB_SCRIPT_CHAKMA, |
|
151 HB_SCRIPT_MEROITIC_CURSIVE, |
|
152 HB_SCRIPT_MEROITIC_HIEROGLYPHS, |
|
153 HB_SCRIPT_MIAO, |
|
154 HB_SCRIPT_SHARADA, |
|
155 HB_SCRIPT_SORA_SOMPENG, |
|
156 HB_SCRIPT_TAKRI |
|
157 }; |
|
158 #endif |
|
159 |
|
160 hb_script_t |
|
161 hb_glib_script_to_script (GUnicodeScript script) |
|
162 { |
|
163 #if GLIB_CHECK_VERSION(2,29,14) |
|
164 return (hb_script_t) g_unicode_script_to_iso15924 (script); |
|
165 #else |
|
166 if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script))) |
|
167 return glib_script_to_script[script]; |
|
168 |
|
169 if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE)) |
|
170 return HB_SCRIPT_INVALID; |
|
171 |
|
172 return HB_SCRIPT_UNKNOWN; |
|
173 #endif |
|
174 } |
|
175 |
|
176 GUnicodeScript |
|
177 hb_glib_script_from_script (hb_script_t script) |
|
178 { |
|
179 #if GLIB_CHECK_VERSION(2,29,14) |
|
180 return g_unicode_script_from_iso15924 (script); |
|
181 #else |
|
182 unsigned int count = ARRAY_LENGTH (glib_script_to_script); |
|
183 for (unsigned int i = 0; i < count; i++) |
|
184 if (glib_script_to_script[i] == script) |
|
185 return (GUnicodeScript) i; |
|
186 |
|
187 if (unlikely (script == HB_SCRIPT_INVALID)) |
|
188 return G_UNICODE_SCRIPT_INVALID_CODE; |
|
189 |
|
190 return G_UNICODE_SCRIPT_UNKNOWN; |
|
191 #endif |
|
192 } |
|
193 |
|
194 |
|
195 static hb_unicode_combining_class_t |
|
196 hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
197 hb_codepoint_t unicode, |
|
198 void *user_data HB_UNUSED) |
|
199 |
|
200 { |
|
201 return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode); |
|
202 } |
|
203 |
|
204 static unsigned int |
|
205 hb_glib_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
206 hb_codepoint_t unicode, |
|
207 void *user_data HB_UNUSED) |
|
208 { |
|
209 return g_unichar_iswide (unicode) ? 2 : 1; |
|
210 } |
|
211 |
|
212 static hb_unicode_general_category_t |
|
213 hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
214 hb_codepoint_t unicode, |
|
215 void *user_data HB_UNUSED) |
|
216 |
|
217 { |
|
218 /* hb_unicode_general_category_t and GUnicodeType are identical */ |
|
219 return (hb_unicode_general_category_t) g_unichar_type (unicode); |
|
220 } |
|
221 |
|
222 static hb_codepoint_t |
|
223 hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
224 hb_codepoint_t unicode, |
|
225 void *user_data HB_UNUSED) |
|
226 { |
|
227 g_unichar_get_mirror_char (unicode, &unicode); |
|
228 return unicode; |
|
229 } |
|
230 |
|
231 static hb_script_t |
|
232 hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
233 hb_codepoint_t unicode, |
|
234 void *user_data HB_UNUSED) |
|
235 { |
|
236 return hb_glib_script_to_script (g_unichar_get_script (unicode)); |
|
237 } |
|
238 |
|
239 static hb_bool_t |
|
240 hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
241 hb_codepoint_t a, |
|
242 hb_codepoint_t b, |
|
243 hb_codepoint_t *ab, |
|
244 void *user_data HB_UNUSED) |
|
245 { |
|
246 #if GLIB_CHECK_VERSION(2,29,12) |
|
247 return g_unichar_compose (a, b, ab); |
|
248 #endif |
|
249 |
|
250 /* We don't ifdef-out the fallback code such that compiler always |
|
251 * sees it and makes sure it's compilable. */ |
|
252 |
|
253 gchar utf8[12]; |
|
254 gchar *normalized; |
|
255 int len; |
|
256 hb_bool_t ret; |
|
257 |
|
258 len = g_unichar_to_utf8 (a, utf8); |
|
259 len += g_unichar_to_utf8 (b, utf8 + len); |
|
260 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC); |
|
261 len = g_utf8_strlen (normalized, -1); |
|
262 if (unlikely (!len)) |
|
263 return false; |
|
264 |
|
265 if (len == 1) { |
|
266 *ab = g_utf8_get_char (normalized); |
|
267 ret = true; |
|
268 } else { |
|
269 ret = false; |
|
270 } |
|
271 |
|
272 g_free (normalized); |
|
273 return ret; |
|
274 } |
|
275 |
|
276 static hb_bool_t |
|
277 hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
278 hb_codepoint_t ab, |
|
279 hb_codepoint_t *a, |
|
280 hb_codepoint_t *b, |
|
281 void *user_data HB_UNUSED) |
|
282 { |
|
283 #if GLIB_CHECK_VERSION(2,29,12) |
|
284 return g_unichar_decompose (ab, a, b); |
|
285 #endif |
|
286 |
|
287 /* We don't ifdef-out the fallback code such that compiler always |
|
288 * sees it and makes sure it's compilable. */ |
|
289 |
|
290 gchar utf8[6]; |
|
291 gchar *normalized; |
|
292 int len; |
|
293 hb_bool_t ret; |
|
294 |
|
295 len = g_unichar_to_utf8 (ab, utf8); |
|
296 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD); |
|
297 len = g_utf8_strlen (normalized, -1); |
|
298 if (unlikely (!len)) |
|
299 return false; |
|
300 |
|
301 if (len == 1) { |
|
302 *a = g_utf8_get_char (normalized); |
|
303 *b = 0; |
|
304 ret = *a != ab; |
|
305 } else if (len == 2) { |
|
306 *a = g_utf8_get_char (normalized); |
|
307 *b = g_utf8_get_char (g_utf8_next_char (normalized)); |
|
308 /* Here's the ugly part: if ab decomposes to a single character and |
|
309 * that character decomposes again, we have to detect that and undo |
|
310 * the second part :-(. */ |
|
311 gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); |
|
312 hb_codepoint_t c = g_utf8_get_char (recomposed); |
|
313 if (c != ab && c != *a) { |
|
314 *a = c; |
|
315 *b = 0; |
|
316 } |
|
317 g_free (recomposed); |
|
318 ret = true; |
|
319 } else { |
|
320 /* If decomposed to more than two characters, take the last one, |
|
321 * and recompose the rest to get the first component. */ |
|
322 gchar *end = g_utf8_offset_to_pointer (normalized, len - 1); |
|
323 gchar *recomposed; |
|
324 *b = g_utf8_get_char (end); |
|
325 recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC); |
|
326 /* We expect that recomposed has exactly one character now. */ |
|
327 *a = g_utf8_get_char (recomposed); |
|
328 g_free (recomposed); |
|
329 ret = true; |
|
330 } |
|
331 |
|
332 g_free (normalized); |
|
333 return ret; |
|
334 } |
|
335 |
|
336 static unsigned int |
|
337 hb_glib_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
|
338 hb_codepoint_t u, |
|
339 hb_codepoint_t *decomposed, |
|
340 void *user_data HB_UNUSED) |
|
341 { |
|
342 #if GLIB_CHECK_VERSION(2,29,12) |
|
343 return g_unichar_fully_decompose (u, TRUE, decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN); |
|
344 #endif |
|
345 |
|
346 /* If the user doesn't have GLib >= 2.29.12 we have to perform |
|
347 * a round trip to UTF-8 and the associated memory management dance. */ |
|
348 gchar utf8[6]; |
|
349 gchar *utf8_decomposed, *c; |
|
350 gsize utf8_len, utf8_decomposed_len, i; |
|
351 |
|
352 /* Convert @u to UTF-8 and normalise it in NFKD mode. This performs the compatibility decomposition. */ |
|
353 utf8_len = g_unichar_to_utf8 (u, utf8); |
|
354 utf8_decomposed = g_utf8_normalize (utf8, utf8_len, G_NORMALIZE_NFKD); |
|
355 utf8_decomposed_len = g_utf8_strlen (utf8_decomposed, -1); |
|
356 |
|
357 assert (utf8_decomposed_len <= HB_UNICODE_MAX_DECOMPOSITION_LEN); |
|
358 |
|
359 for (i = 0, c = utf8_decomposed; i < utf8_decomposed_len; i++, c = g_utf8_next_char (c)) |
|
360 *decomposed++ = g_utf8_get_char (c); |
|
361 |
|
362 g_free (utf8_decomposed); |
|
363 |
|
364 return utf8_decomposed_len; |
|
365 } |
|
366 |
|
367 hb_unicode_funcs_t * |
|
368 hb_glib_get_unicode_funcs (void) |
|
369 { |
|
370 static const hb_unicode_funcs_t _hb_glib_unicode_funcs = { |
|
371 HB_OBJECT_HEADER_STATIC, |
|
372 |
|
373 NULL, /* parent */ |
|
374 true, /* immutable */ |
|
375 { |
|
376 #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name, |
|
377 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS |
|
378 #undef HB_UNICODE_FUNC_IMPLEMENT |
|
379 } |
|
380 }; |
|
381 |
|
382 return const_cast<hb_unicode_funcs_t *> (&_hb_glib_unicode_funcs); |
|
383 } |
|
384 |