gfx/harfbuzz/src/hb-icu.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/harfbuzz/src/hb-icu.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,372 @@
     1.4 +/*
     1.5 + * Copyright © 2009  Red Hat, Inc.
     1.6 + * Copyright © 2009  Keith Stribley
     1.7 + * Copyright © 2011  Google, Inc.
     1.8 + *
     1.9 + *  This is part of HarfBuzz, a text shaping library.
    1.10 + *
    1.11 + * Permission is hereby granted, without written agreement and without
    1.12 + * license or royalty fees, to use, copy, modify, and distribute this
    1.13 + * software and its documentation for any purpose, provided that the
    1.14 + * above copyright notice and the following two paragraphs appear in
    1.15 + * all copies of this software.
    1.16 + *
    1.17 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
    1.18 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
    1.19 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
    1.20 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
    1.21 + * DAMAGE.
    1.22 + *
    1.23 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
    1.24 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.25 + * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    1.26 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
    1.27 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    1.28 + *
    1.29 + * Red Hat Author(s): Behdad Esfahbod
    1.30 + * Google Author(s): Behdad Esfahbod
    1.31 + */
    1.32 +
    1.33 +#include "hb-private.hh"
    1.34 +
    1.35 +#include "hb-icu.h"
    1.36 +
    1.37 +#include "hb-unicode-private.hh"
    1.38 +
    1.39 +#include <unicode/uchar.h>
    1.40 +#include <unicode/unorm.h>
    1.41 +#include <unicode/ustring.h>
    1.42 +#include <unicode/uversion.h>
    1.43 +
    1.44 +
    1.45 +hb_script_t
    1.46 +hb_icu_script_to_script (UScriptCode script)
    1.47 +{
    1.48 +  if (unlikely (script == USCRIPT_INVALID_CODE))
    1.49 +    return HB_SCRIPT_INVALID;
    1.50 +
    1.51 +  return hb_script_from_string (uscript_getShortName (script), -1);
    1.52 +}
    1.53 +
    1.54 +UScriptCode
    1.55 +hb_icu_script_from_script (hb_script_t script)
    1.56 +{
    1.57 +  if (unlikely (script == HB_SCRIPT_INVALID))
    1.58 +    return USCRIPT_INVALID_CODE;
    1.59 +
    1.60 +  for (unsigned int i = 0; i < USCRIPT_CODE_LIMIT; i++)
    1.61 +    if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script))
    1.62 +      return (UScriptCode) i;
    1.63 +
    1.64 +  return USCRIPT_UNKNOWN;
    1.65 +}
    1.66 +
    1.67 +
    1.68 +static hb_unicode_combining_class_t
    1.69 +hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    1.70 +				hb_codepoint_t      unicode,
    1.71 +				void               *user_data HB_UNUSED)
    1.72 +
    1.73 +{
    1.74 +  return (hb_unicode_combining_class_t) u_getCombiningClass (unicode);
    1.75 +}
    1.76 +
    1.77 +static unsigned int
    1.78 +hb_icu_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    1.79 +				hb_codepoint_t      unicode,
    1.80 +				void               *user_data HB_UNUSED)
    1.81 +{
    1.82 +  switch (u_getIntPropertyValue(unicode, UCHAR_EAST_ASIAN_WIDTH))
    1.83 +  {
    1.84 +  case U_EA_WIDE:
    1.85 +  case U_EA_FULLWIDTH:
    1.86 +    return 2;
    1.87 +  case U_EA_NEUTRAL:
    1.88 +  case U_EA_AMBIGUOUS:
    1.89 +  case U_EA_HALFWIDTH:
    1.90 +  case U_EA_NARROW:
    1.91 +    return 1;
    1.92 +  }
    1.93 +  return 1;
    1.94 +}
    1.95 +
    1.96 +static hb_unicode_general_category_t
    1.97 +hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
    1.98 +				 hb_codepoint_t      unicode,
    1.99 +				 void               *user_data HB_UNUSED)
   1.100 +{
   1.101 +  switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY))
   1.102 +  {
   1.103 +  case U_UNASSIGNED:			return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
   1.104 +
   1.105 +  case U_UPPERCASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER;
   1.106 +  case U_LOWERCASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER;
   1.107 +  case U_TITLECASE_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER;
   1.108 +  case U_MODIFIER_LETTER:		return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER;
   1.109 +  case U_OTHER_LETTER:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
   1.110 +
   1.111 +  case U_NON_SPACING_MARK:		return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK;
   1.112 +  case U_ENCLOSING_MARK:		return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK;
   1.113 +  case U_COMBINING_SPACING_MARK:	return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK;
   1.114 +
   1.115 +  case U_DECIMAL_DIGIT_NUMBER:		return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER;
   1.116 +  case U_LETTER_NUMBER:			return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER;
   1.117 +  case U_OTHER_NUMBER:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER;
   1.118 +
   1.119 +  case U_SPACE_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
   1.120 +  case U_LINE_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR;
   1.121 +  case U_PARAGRAPH_SEPARATOR:		return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR;
   1.122 +
   1.123 +  case U_CONTROL_CHAR:			return HB_UNICODE_GENERAL_CATEGORY_CONTROL;
   1.124 +  case U_FORMAT_CHAR:			return HB_UNICODE_GENERAL_CATEGORY_FORMAT;
   1.125 +  case U_PRIVATE_USE_CHAR:		return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE;
   1.126 +  case U_SURROGATE:			return HB_UNICODE_GENERAL_CATEGORY_SURROGATE;
   1.127 +
   1.128 +
   1.129 +  case U_DASH_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION;
   1.130 +  case U_START_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION;
   1.131 +  case U_END_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION;
   1.132 +  case U_CONNECTOR_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION;
   1.133 +  case U_OTHER_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION;
   1.134 +
   1.135 +  case U_MATH_SYMBOL:			return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL;
   1.136 +  case U_CURRENCY_SYMBOL:		return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL;
   1.137 +  case U_MODIFIER_SYMBOL:		return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL;
   1.138 +  case U_OTHER_SYMBOL:			return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL;
   1.139 +
   1.140 +  case U_INITIAL_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION;
   1.141 +  case U_FINAL_PUNCTUATION:		return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION;
   1.142 +  }
   1.143 +
   1.144 +  return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
   1.145 +}
   1.146 +
   1.147 +static hb_codepoint_t
   1.148 +hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.149 +			  hb_codepoint_t      unicode,
   1.150 +			  void               *user_data HB_UNUSED)
   1.151 +{
   1.152 +  return u_charMirror(unicode);
   1.153 +}
   1.154 +
   1.155 +static hb_script_t
   1.156 +hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.157 +		       hb_codepoint_t      unicode,
   1.158 +		       void               *user_data HB_UNUSED)
   1.159 +{
   1.160 +  UErrorCode status = U_ZERO_ERROR;
   1.161 +  UScriptCode scriptCode = uscript_getScript(unicode, &status);
   1.162 +
   1.163 +  if (unlikely (U_FAILURE (status)))
   1.164 +    return HB_SCRIPT_UNKNOWN;
   1.165 +
   1.166 +  return hb_icu_script_to_script (scriptCode);
   1.167 +}
   1.168 +
   1.169 +#if U_ICU_VERSION_MAJOR_NUM >= 49
   1.170 +static const UNormalizer2 *normalizer;
   1.171 +#endif
   1.172 +
   1.173 +static hb_bool_t
   1.174 +hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.175 +			hb_codepoint_t      a,
   1.176 +			hb_codepoint_t      b,
   1.177 +			hb_codepoint_t     *ab,
   1.178 +			void               *user_data HB_UNUSED)
   1.179 +{
   1.180 +#if U_ICU_VERSION_MAJOR_NUM >= 49
   1.181 +  {
   1.182 +    UChar32 ret = unorm2_composePair (normalizer, a, b);
   1.183 +    if (ret < 0) return false;
   1.184 +    *ab = ret;
   1.185 +    return true;
   1.186 +  }
   1.187 +#endif
   1.188 +
   1.189 +  /* We don't ifdef-out the fallback code such that compiler always
   1.190 +   * sees it and makes sure it's compilable. */
   1.191 +
   1.192 +  UChar utf16[4], normalized[5];
   1.193 +  unsigned int len;
   1.194 +  hb_bool_t ret, err;
   1.195 +  UErrorCode icu_err;
   1.196 +
   1.197 +  len = 0;
   1.198 +  err = false;
   1.199 +  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), a, err);
   1.200 +  if (err) return false;
   1.201 +  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), b, err);
   1.202 +  if (err) return false;
   1.203 +
   1.204 +  icu_err = U_ZERO_ERROR;
   1.205 +  len = unorm_normalize (utf16, len, UNORM_NFC, 0, normalized, ARRAY_LENGTH (normalized), &icu_err);
   1.206 +  if (U_FAILURE (icu_err))
   1.207 +    return false;
   1.208 +  if (u_countChar32 (normalized, len) == 1) {
   1.209 +    U16_GET_UNSAFE (normalized, 0, *ab);
   1.210 +    ret = true;
   1.211 +  } else {
   1.212 +    ret = false;
   1.213 +  }
   1.214 +
   1.215 +  return ret;
   1.216 +}
   1.217 +
   1.218 +static hb_bool_t
   1.219 +hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.220 +			  hb_codepoint_t      ab,
   1.221 +			  hb_codepoint_t     *a,
   1.222 +			  hb_codepoint_t     *b,
   1.223 +			  void               *user_data HB_UNUSED)
   1.224 +{
   1.225 +#if U_ICU_VERSION_MAJOR_NUM >= 49
   1.226 +  {
   1.227 +    UChar decomposed[4];
   1.228 +    int len;
   1.229 +    UErrorCode icu_err = U_ZERO_ERROR;
   1.230 +    len = unorm2_getRawDecomposition (normalizer, ab, decomposed,
   1.231 +				      ARRAY_LENGTH (decomposed), &icu_err);
   1.232 +    if (U_FAILURE (icu_err) || len < 0) return false;
   1.233 +
   1.234 +    len = u_countChar32 (decomposed, len);
   1.235 +    if (len == 1) {
   1.236 +      U16_GET_UNSAFE (decomposed, 0, *a);
   1.237 +      *b = 0;
   1.238 +      return *a != ab;
   1.239 +    } else if (len == 2) {
   1.240 +      len =0;
   1.241 +      U16_NEXT_UNSAFE (decomposed, len, *a);
   1.242 +      U16_NEXT_UNSAFE (decomposed, len, *b);
   1.243 +    }
   1.244 +    return true;
   1.245 +  }
   1.246 +#endif
   1.247 +
   1.248 +  /* We don't ifdef-out the fallback code such that compiler always
   1.249 +   * sees it and makes sure it's compilable. */
   1.250 +
   1.251 +  UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1];
   1.252 +  unsigned int len;
   1.253 +  hb_bool_t ret, err;
   1.254 +  UErrorCode icu_err;
   1.255 +
   1.256 +  /* This function is a monster! Maybe it wasn't a good idea adding a
   1.257 +   * pairwise decompose API... */
   1.258 +  /* Watchout for the dragons.  Err, watchout for macros changing len. */
   1.259 +
   1.260 +  len = 0;
   1.261 +  err = false;
   1.262 +  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), ab, err);
   1.263 +  if (err) return false;
   1.264 +
   1.265 +  icu_err = U_ZERO_ERROR;
   1.266 +  len = unorm_normalize (utf16, len, UNORM_NFD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err);
   1.267 +  if (U_FAILURE (icu_err))
   1.268 +    return false;
   1.269 +
   1.270 +  len = u_countChar32 (normalized, len);
   1.271 +
   1.272 +  if (len == 1) {
   1.273 +    U16_GET_UNSAFE (normalized, 0, *a);
   1.274 +    *b = 0;
   1.275 +    ret = *a != ab;
   1.276 +  } else if (len == 2) {
   1.277 +    len =0;
   1.278 +    U16_NEXT_UNSAFE (normalized, len, *a);
   1.279 +    U16_NEXT_UNSAFE (normalized, len, *b);
   1.280 +
   1.281 +    /* Here's the ugly part: if ab decomposes to a single character and
   1.282 +     * that character decomposes again, we have to detect that and undo
   1.283 +     * the second part :-(. */
   1.284 +    UChar recomposed[20];
   1.285 +    icu_err = U_ZERO_ERROR;
   1.286 +    unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
   1.287 +    if (U_FAILURE (icu_err))
   1.288 +      return false;
   1.289 +    hb_codepoint_t c;
   1.290 +    U16_GET_UNSAFE (recomposed, 0, c);
   1.291 +    if (c != *a && c != ab) {
   1.292 +      *a = c;
   1.293 +      *b = 0;
   1.294 +    }
   1.295 +    ret = true;
   1.296 +  } else {
   1.297 +    /* If decomposed to more than two characters, take the last one,
   1.298 +     * and recompose the rest to get the first component. */
   1.299 +    U16_PREV_UNSAFE (normalized, len, *b); /* Changes len in-place. */
   1.300 +    UChar recomposed[18 * 2];
   1.301 +    icu_err = U_ZERO_ERROR;
   1.302 +    len = unorm_normalize (normalized, len, UNORM_NFC, 0, recomposed, ARRAY_LENGTH (recomposed), &icu_err);
   1.303 +    if (U_FAILURE (icu_err))
   1.304 +      return false;
   1.305 +    /* We expect that recomposed has exactly one character now. */
   1.306 +    if (unlikely (u_countChar32 (recomposed, len) != 1))
   1.307 +      return false;
   1.308 +    U16_GET_UNSAFE (recomposed, 0, *a);
   1.309 +    ret = true;
   1.310 +  }
   1.311 +
   1.312 +  return ret;
   1.313 +}
   1.314 +
   1.315 +static unsigned int
   1.316 +hb_icu_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.317 +					hb_codepoint_t      u,
   1.318 +					hb_codepoint_t     *decomposed,
   1.319 +					void               *user_data HB_UNUSED)
   1.320 +{
   1.321 +  UChar utf16[2], normalized[2 * HB_UNICODE_MAX_DECOMPOSITION_LEN + 1];
   1.322 +  unsigned int len;
   1.323 +  int32_t utf32_len;
   1.324 +  hb_bool_t err;
   1.325 +  UErrorCode icu_err;
   1.326 +
   1.327 +  /* Copy @u into a UTF-16 array to be passed to ICU. */
   1.328 +  len = 0;
   1.329 +  err = FALSE;
   1.330 +  U16_APPEND (utf16, len, ARRAY_LENGTH (utf16), u, err);
   1.331 +  if (err)
   1.332 +    return 0;
   1.333 +
   1.334 +  /* Normalise the codepoint using NFKD mode. */
   1.335 +  icu_err = U_ZERO_ERROR;
   1.336 +  len = unorm_normalize (utf16, len, UNORM_NFKD, 0, normalized, ARRAY_LENGTH (normalized), &icu_err);
   1.337 +  if (icu_err)
   1.338 +    return 0;
   1.339 +
   1.340 +  /* Convert the decomposed form from UTF-16 to UTF-32. */
   1.341 +  icu_err = U_ZERO_ERROR;
   1.342 +  u_strToUTF32 ((UChar32*) decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN, &utf32_len, normalized, len, &icu_err);
   1.343 +  if (icu_err)
   1.344 +    return 0;
   1.345 +
   1.346 +  return utf32_len;
   1.347 +}
   1.348 +
   1.349 +
   1.350 +hb_unicode_funcs_t *
   1.351 +hb_icu_get_unicode_funcs (void)
   1.352 +{
   1.353 +  static const hb_unicode_funcs_t _hb_icu_unicode_funcs = {
   1.354 +    HB_OBJECT_HEADER_STATIC,
   1.355 +
   1.356 +    NULL, /* parent */
   1.357 +    true, /* immutable */
   1.358 +    {
   1.359 +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_icu_unicode_##name,
   1.360 +      HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
   1.361 +#undef HB_UNICODE_FUNC_IMPLEMENT
   1.362 +    }
   1.363 +  };
   1.364 +
   1.365 +#if U_ICU_VERSION_MAJOR_NUM >= 49
   1.366 +  if (!hb_atomic_ptr_get (&normalizer)) {
   1.367 +    UErrorCode icu_err = U_ZERO_ERROR;
   1.368 +    /* We ignore failure in getNFCInstace(). */
   1.369 +    hb_atomic_ptr_cmpexch (&normalizer, NULL, unorm2_getNFCInstance (&icu_err));
   1.370 +  }
   1.371 +#endif
   1.372 +  return const_cast<hb_unicode_funcs_t *> (&_hb_icu_unicode_funcs);
   1.373 +}
   1.374 +
   1.375 +

mercurial