1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/hb-icu.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,372 @@ 1.4 +/* 1.5 + * Copyright © 2009 Red Hat, Inc. 1.6 + * Copyright © 2009 Keith Stribley 1.7 + * Copyright © 2011 Google, Inc. 1.8 + * 1.9 + * This is part of HarfBuzz, a text shaping library. 1.10 + * 1.11 + * Permission is hereby granted, without written agreement and without 1.12 + * license or royalty fees, to use, copy, modify, and distribute this 1.13 + * software and its documentation for any purpose, provided that the 1.14 + * above copyright notice and the following two paragraphs appear in 1.15 + * all copies of this software. 1.16 + * 1.17 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 1.18 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 1.19 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 1.20 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 1.21 + * DAMAGE. 1.22 + * 1.23 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 1.24 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.25 + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 1.26 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 1.27 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 1.28 + * 1.29 + * Red Hat Author(s): Behdad Esfahbod 1.30 + * Google Author(s): Behdad Esfahbod 1.31 + */ 1.32 + 1.33 +#include "hb-private.hh" 1.34 + 1.35 +#include "hb-icu.h" 1.36 + 1.37 +#include "hb-unicode-private.hh" 1.38 + 1.39 +#include <unicode/uchar.h> 1.40 +#include <unicode/unorm.h> 1.41 +#include <unicode/ustring.h> 1.42 +#include <unicode/uversion.h> 1.43 + 1.44 + 1.45 +hb_script_t 1.46 +hb_icu_script_to_script (UScriptCode script) 1.47 +{ 1.48 + if (unlikely (script == USCRIPT_INVALID_CODE)) 1.49 + return HB_SCRIPT_INVALID; 1.50 + 1.51 + return hb_script_from_string (uscript_getShortName (script), -1); 1.52 +} 1.53 + 1.54 +UScriptCode 1.55 +hb_icu_script_from_script (hb_script_t script) 1.56 +{ 1.57 + if (unlikely (script == HB_SCRIPT_INVALID)) 1.58 + return USCRIPT_INVALID_CODE; 1.59 + 1.60 + for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++) 1.61 + if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) 1.62 + return (UScriptCode) i; 1.63 + 1.64 + return USCRIPT_UNKNOWN; 1.65 +} 1.66 + 1.67 + 1.68 +static hb_unicode_combining_class_t 1.69 +hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.70 + hb_codepoint_t unicode, 1.71 + void *user_data HB_UNUSED) 1.72 + 1.73 +{ 1.74 + return (hb_unicode_combining_class_t) u_getCombiningClass (unicode); 1.75 +} 1.76 + 1.77 +static unsigned int 1.78 +hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.79 + hb_codepoint_t unicode, 1.80 + void *user_data HB_UNUSED) 1.81 +{ 1.82 + switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH)) 1.83 + { 1.84 + case U_EA_WIDE: 1.85 + case U_EA_FULLWIDTH: 1.86 + return 2; 1.87 + case U_EA_NEUTRAL: 1.88 + case U_EA_AMBIGUOUS: 1.89 + case U_EA_HALFWIDTH: 1.90 + case U_EA_NARROW: 1.91 + return 1; 1.92 + } 1.93 + return 1; 1.94 +} 1.95 + 1.96 +static hb_unicode_general_category_t 1.97 +hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.98 + hb_codepoint_t unicode, 1.99 + void *user_data HB_UNUSED) 1.100 +{ 1.101 + switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) 1.102 + { 1.103 + case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; 1.104 + 1.105 + case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER; 1.106 + case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER; 1.107 + case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER; 1.108 + case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER; 1.109 + case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; 1.110 + 1.111 + case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK; 1.112 + case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK; 1.113 + case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK; 1.114 + 1.115 + case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER; 1.116 + case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER; 1.117 + case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER; 1.118 + 1.119 + case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR; 1.120 + case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR; 1.121 + case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR; 1.122 + 1.123 + case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL; 1.124 + case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT; 1.125 + case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE; 1.126 + case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE; 1.127 + 1.128 + 1.129 + case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION; 1.130 + case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION; 1.131 + case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION; 1.132 + case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION; 1.133 + case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION; 1.134 + 1.135 + case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL; 1.136 + case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL; 1.137 + case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL; 1.138 + case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL; 1.139 + 1.140 + case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION; 1.141 + case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION; 1.142 + } 1.143 + 1.144 + return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; 1.145 +} 1.146 + 1.147 +static hb_codepoint_t 1.148 +hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.149 + hb_codepoint_t unicode, 1.150 + void *user_data HB_UNUSED) 1.151 +{ 1.152 + return u_charMirror(unicode); 1.153 +} 1.154 + 1.155 +static hb_script_t 1.156 +hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.157 + hb_codepoint_t unicode, 1.158 + void *user_data HB_UNUSED) 1.159 +{ 1.160 + UErrorCode status = U_ZERO_ERROR; 1.161 + UScriptCode scriptCode = uscript_getScript(unicode, &status); 1.162 + 1.163 + if (unlikely (U_FAILURE (status))) 1.164 + return HB_SCRIPT_UNKNOWN; 1.165 + 1.166 + return hb_icu_script_to_script (scriptCode); 1.167 +} 1.168 + 1.169 +#if U_ICU_VERSION_MAJOR_NUM >= 49 1.170 +static const UNormalizer2 *normalizer; 1.171 +#endif 1.172 + 1.173 +static hb_bool_t 1.174 +hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.175 + hb_codepoint_t a, 1.176 + hb_codepoint_t b, 1.177 + hb_codepoint_t *ab, 1.178 + void *user_data HB_UNUSED) 1.179 +{ 1.180 +#if U_ICU_VERSION_MAJOR_NUM >= 49 1.181 + { 1.182 + UChar32 ret = unorm2_composePair (normalizer, a, b); 1.183 + if (ret < 0) return false; 1.184 + *ab = ret; 1.185 + return true; 1.186 + } 1.187 +#endif 1.188 + 1.189 + /* We don't ifdef-out the fallback code such that compiler always 1.190 + * sees it and makes sure it's compilable. */ 1.191 + 1.192 + UChar utf16[4], normalized[5]; 1.193 + unsigned int len; 1.194 + hb_bool_t ret, err; 1.195 + UErrorCode icu_err; 1.196 + 1.197 + len = 0; 1.198 + err = false; 1.199 + U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err); 1.200 + if (err) return false; 1.201 + U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err); 1.202 + if (err) return false; 1.203 + 1.204 + icu_err = U_ZERO_ERROR; 1.205 + len = unorm_normalize (utf16, len, UNORM_NFC, 0, normalized, ARRAY_LENGTH (normalized), &icu_err); 1.206 + if (U_FAILURE (icu_err)) 1.207 + return false; 1.208 + if (u_countChar32 (normalized, len) == 1) { 1.209 + U16_GET_UNSAFE (normalized, 0, *ab); 1.210 + ret = true; 1.211 + } else { 1.212 + ret = false; 1.213 + } 1.214 + 1.215 + return ret; 1.216 +} 1.217 + 1.218 +static hb_bool_t 1.219 +hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.220 + hb_codepoint_t ab, 1.221 + hb_codepoint_t *a, 1.222 + hb_codepoint_t *b, 1.223 + void *user_data HB_UNUSED) 1.224 +{ 1.225 +#if U_ICU_VERSION_MAJOR_NUM >= 49 1.226 + { 1.227 + UChar decomposed[4]; 1.228 + int len; 1.229 + UErrorCode icu_err = U_ZERO_ERROR; 1.230 + len = unorm2_getRawDecomposition (normalizer, ab, decomposed, 1.231 + ARRAY_LENGTH (decomposed), &icu_err); 1.232 + if (U_FAILURE (icu_err) || len < 0) return false; 1.233 + 1.234 + len = u_countChar32 (decomposed, len); 1.235 + if (len == 1) { 1.236 + U16_GET_UNSAFE (decomposed, 0, *a); 1.237 + *b = 0; 1.238 + return *a != ab; 1.239 + } else if (len == 2) { 1.240 + len =0; 1.241 + U16_NEXT_UNSAFE (decomposed, len, *a); 1.242 + U16_NEXT_UNSAFE (decomposed, len, *b); 1.243 + } 1.244 + return true; 1.245 + } 1.246 +#endif 1.247 + 1.248 + /* We don't ifdef-out the fallback code such that compiler always 1.249 + * sees it and makes sure it's compilable. */ 1.250 + 1.251 + UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1]; 1.252 + unsigned int len; 1.253 + hb_bool_t ret, err; 1.254 + UErrorCode icu_err; 1.255 + 1.256 + /* This function is a monster! Maybe it wasn't a good idea adding a 1.257 + * pairwise decompose API... */ 1.258 + /* Watchout for the dragons. Err, watchout for macros changing len. */ 1.259 + 1.260 + len = 0; 1.261 + err = false; 1.262 + U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err); 1.263 + if (err) return false; 1.264 + 1.265 + icu_err = U_ZERO_ERROR; 1.266 + len = unorm_normalize (utf16, len, UNORM_NFD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err); 1.267 + if (U_FAILURE (icu_err)) 1.268 + return false; 1.269 + 1.270 + len = u_countChar32 (normalized, len); 1.271 + 1.272 + if (len == 1) { 1.273 + U16_GET_UNSAFE (normalized, 0, *a); 1.274 + *b = 0; 1.275 + ret = *a != ab; 1.276 + } else if (len == 2) { 1.277 + len =0; 1.278 + U16_NEXT_UNSAFE (normalized, len, *a); 1.279 + U16_NEXT_UNSAFE (normalized, len, *b); 1.280 + 1.281 + /* Here's the ugly part: if ab decomposes to a single character and 1.282 + * that character decomposes again, we have to detect that and undo 1.283 + * the second part :-(. */ 1.284 + UChar recomposed[20]; 1.285 + icu_err = U_ZERO_ERROR; 1.286 + unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); 1.287 + if (U_FAILURE (icu_err)) 1.288 + return false; 1.289 + hb_codepoint_t c; 1.290 + U16_GET_UNSAFE (recomposed, 0, c); 1.291 + if (c != *a && c != ab) { 1.292 + *a = c; 1.293 + *b = 0; 1.294 + } 1.295 + ret = true; 1.296 + } else { 1.297 + /* If decomposed to more than two characters, take the last one, 1.298 + * and recompose the rest to get the first component. */ 1.299 + U16_PREV_UNSAFE (normalized, len, *b); /* Changes len in-place. */ 1.300 + UChar recomposed[18 * 2]; 1.301 + icu_err = U_ZERO_ERROR; 1.302 + len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err); 1.303 + if (U_FAILURE (icu_err)) 1.304 + return false; 1.305 + /* We expect that recomposed has exactly one character now. */ 1.306 + if (unlikely (u_countChar32 (recomposed, len) != 1)) 1.307 + return false; 1.308 + U16_GET_UNSAFE (recomposed, 0, *a); 1.309 + ret = true; 1.310 + } 1.311 + 1.312 + return ret; 1.313 +} 1.314 + 1.315 +static unsigned int 1.316 +hb_icu_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED, 1.317 + hb_codepoint_t u, 1.318 + hb_codepoint_t *decomposed, 1.319 + void *user_data HB_UNUSED) 1.320 +{ 1.321 + UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1]; 1.322 + unsigned int len; 1.323 + int32_t utf32_len; 1.324 + hb_bool_t err; 1.325 + UErrorCode icu_err; 1.326 + 1.327 + /* Copy @u into a UTF-16 array to be passed to ICU. */ 1.328 + len = 0; 1.329 + err = FALSE; 1.330 + U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), u, err); 1.331 + if (err) 1.332 + return 0; 1.333 + 1.334 + /* Normalise the codepoint using NFKD mode. */ 1.335 + icu_err = U_ZERO_ERROR; 1.336 + len = unorm_normalize (utf16, len, UNORM_NFKD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err); 1.337 + if (icu_err) 1.338 + return 0; 1.339 + 1.340 + /* Convert the decomposed form from UTF-16 to UTF-32. */ 1.341 + icu_err = U_ZERO_ERROR; 1.342 + u_strToUTF32 ((UChar32*) decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN, &utf32_len, normalized, len, &icu_err); 1.343 + if (icu_err) 1.344 + return 0; 1.345 + 1.346 + return utf32_len; 1.347 +} 1.348 + 1.349 + 1.350 +hb_unicode_funcs_t * 1.351 +hb_icu_get_unicode_funcs (void) 1.352 +{ 1.353 + static const hb_unicode_funcs_t _hb_icu_unicode_funcs = { 1.354 + HB_OBJECT_HEADER_STATIC, 1.355 + 1.356 + NULL, /* parent */ 1.357 + true, /* immutable */ 1.358 + { 1.359 +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name, 1.360 + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 1.361 +#undef HB_UNICODE_FUNC_IMPLEMENT 1.362 + } 1.363 + }; 1.364 + 1.365 +#if U_ICU_VERSION_MAJOR_NUM >= 49 1.366 + if (!hb_atomic_ptr_get (&normalizer)) { 1.367 + UErrorCode icu_err = U_ZERO_ERROR; 1.368 + /* We ignore failure in getNFCInstace(). */ 1.369 + hb_atomic_ptr_cmpexch (&normalizer, NULL, unorm2_getNFCInstance (&icu_err)); 1.370 + } 1.371 +#endif 1.372 + return const_cast<hb_unicode_funcs_t *> (&_hb_icu_unicode_funcs); 1.373 +} 1.374 + 1.375 +