gfx/harfbuzz/src/hb-glib.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/harfbuzz/src/hb-glib.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,384 @@
     1.4 +/*
     1.5 + * Copyright © 2009  Red Hat, Inc.
     1.6 + * Copyright © 2011  Google, Inc.
     1.7 + *
     1.8 + *  This is part of HarfBuzz, a text shaping library.
     1.9 + *
    1.10 + * Permission is hereby granted, without written agreement and without
    1.11 + * license or royalty fees, to use, copy, modify, and distribute this
    1.12 + * software and its documentation for any purpose, provided that the
    1.13 + * above copyright notice and the following two paragraphs appear in
    1.14 + * all copies of this software.
    1.15 + *
    1.16 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
    1.17 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
    1.18 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
    1.19 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
    1.20 + * DAMAGE.
    1.21 + *
    1.22 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
    1.23 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.24 + * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    1.25 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
    1.26 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    1.27 + *
    1.28 + * Red Hat Author(s): Behdad Esfahbod
    1.29 + * Google Author(s): Behdad Esfahbod
    1.30 + */
    1.31 +
    1.32 +#include "hb-private.hh"
    1.33 +
    1.34 +#include "hb-glib.h"
    1.35 +
    1.36 +#include "hb-unicode-private.hh"
    1.37 +
    1.38 +
    1.39 +#if !GLIB_CHECK_VERSION(2,29,14)
    1.40 +static const hb_script_t
    1.41 +glib_script_to_script[] =
    1.42 +{
    1.43 +  HB_SCRIPT_COMMON,
    1.44 +  HB_SCRIPT_INHERITED,
    1.45 +  HB_SCRIPT_ARABIC,
    1.46 +  HB_SCRIPT_ARMENIAN,
    1.47 +  HB_SCRIPT_BENGALI,
    1.48 +  HB_SCRIPT_BOPOMOFO,
    1.49 +  HB_SCRIPT_CHEROKEE,
    1.50 +  HB_SCRIPT_COPTIC,
    1.51 +  HB_SCRIPT_CYRILLIC,
    1.52 +  HB_SCRIPT_DESERET,
    1.53 +  HB_SCRIPT_DEVANAGARI,
    1.54 +  HB_SCRIPT_ETHIOPIC,
    1.55 +  HB_SCRIPT_GEORGIAN,
    1.56 +  HB_SCRIPT_GOTHIC,
    1.57 +  HB_SCRIPT_GREEK,
    1.58 +  HB_SCRIPT_GUJARATI,
    1.59 +  HB_SCRIPT_GURMUKHI,
    1.60 +  HB_SCRIPT_HAN,
    1.61 +  HB_SCRIPT_HANGUL,
    1.62 +  HB_SCRIPT_HEBREW,
    1.63 +  HB_SCRIPT_HIRAGANA,
    1.64 +  HB_SCRIPT_KANNADA,
    1.65 +  HB_SCRIPT_KATAKANA,
    1.66 +  HB_SCRIPT_KHMER,
    1.67 +  HB_SCRIPT_LAO,
    1.68 +  HB_SCRIPT_LATIN,
    1.69 +  HB_SCRIPT_MALAYALAM,
    1.70 +  HB_SCRIPT_MONGOLIAN,
    1.71 +  HB_SCRIPT_MYANMAR,
    1.72 +  HB_SCRIPT_OGHAM,
    1.73 +  HB_SCRIPT_OLD_ITALIC,
    1.74 +  HB_SCRIPT_ORIYA,
    1.75 +  HB_SCRIPT_RUNIC,
    1.76 +  HB_SCRIPT_SINHALA,
    1.77 +  HB_SCRIPT_SYRIAC,
    1.78 +  HB_SCRIPT_TAMIL,
    1.79 +  HB_SCRIPT_TELUGU,
    1.80 +  HB_SCRIPT_THAANA,
    1.81 +  HB_SCRIPT_THAI,
    1.82 +  HB_SCRIPT_TIBETAN,
    1.83 +  HB_SCRIPT_CANADIAN_SYLLABICS,
    1.84 +  HB_SCRIPT_YI,
    1.85 +  HB_SCRIPT_TAGALOG,
    1.86 +  HB_SCRIPT_HANUNOO,
    1.87 +  HB_SCRIPT_BUHID,
    1.88 +  HB_SCRIPT_TAGBANWA,
    1.89 +
    1.90 +  /* Unicode-4.0 additions */
    1.91 +  HB_SCRIPT_BRAILLE,
    1.92 +  HB_SCRIPT_CYPRIOT,
    1.93 +  HB_SCRIPT_LIMBU,
    1.94 +  HB_SCRIPT_OSMANYA,
    1.95 +  HB_SCRIPT_SHAVIAN,
    1.96 +  HB_SCRIPT_LINEAR_B,
    1.97 +  HB_SCRIPT_TAI_LE,
    1.98 +  HB_SCRIPT_UGARITIC,
    1.99 +
   1.100 +  /* Unicode-4.1 additions */
   1.101 +  HB_SCRIPT_NEW_TAI_LUE,
   1.102 +  HB_SCRIPT_BUGINESE,
   1.103 +  HB_SCRIPT_GLAGOLITIC,
   1.104 +  HB_SCRIPT_TIFINAGH,
   1.105 +  HB_SCRIPT_SYLOTI_NAGRI,
   1.106 +  HB_SCRIPT_OLD_PERSIAN,
   1.107 +  HB_SCRIPT_KHAROSHTHI,
   1.108 +
   1.109 +  /* Unicode-5.0 additions */
   1.110 +  HB_SCRIPT_UNKNOWN,
   1.111 +  HB_SCRIPT_BALINESE,
   1.112 +  HB_SCRIPT_CUNEIFORM,
   1.113 +  HB_SCRIPT_PHOENICIAN,
   1.114 +  HB_SCRIPT_PHAGS_PA,
   1.115 +  HB_SCRIPT_NKO,
   1.116 +
   1.117 +  /* Unicode-5.1 additions */
   1.118 +  HB_SCRIPT_KAYAH_LI,
   1.119 +  HB_SCRIPT_LEPCHA,
   1.120 +  HB_SCRIPT_REJANG,
   1.121 +  HB_SCRIPT_SUNDANESE,
   1.122 +  HB_SCRIPT_SAURASHTRA,
   1.123 +  HB_SCRIPT_CHAM,
   1.124 +  HB_SCRIPT_OL_CHIKI,
   1.125 +  HB_SCRIPT_VAI,
   1.126 +  HB_SCRIPT_CARIAN,
   1.127 +  HB_SCRIPT_LYCIAN,
   1.128 +  HB_SCRIPT_LYDIAN,
   1.129 +
   1.130 +  /* Unicode-5.2 additions */
   1.131 +  HB_SCRIPT_AVESTAN,
   1.132 +  HB_SCRIPT_BAMUM,
   1.133 +  HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
   1.134 +  HB_SCRIPT_IMPERIAL_ARAMAIC,
   1.135 +  HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
   1.136 +  HB_SCRIPT_INSCRIPTIONAL_PARTHIAN,
   1.137 +  HB_SCRIPT_JAVANESE,
   1.138 +  HB_SCRIPT_KAITHI,
   1.139 +  HB_SCRIPT_TAI_THAM,
   1.140 +  HB_SCRIPT_LISU,
   1.141 +  HB_SCRIPT_MEETEI_MAYEK,
   1.142 +  HB_SCRIPT_OLD_SOUTH_ARABIAN,
   1.143 +  HB_SCRIPT_OLD_TURKIC,
   1.144 +  HB_SCRIPT_SAMARITAN,
   1.145 +  HB_SCRIPT_TAI_VIET,
   1.146 +
   1.147 +  /* Unicode-6.0 additions */
   1.148 +  HB_SCRIPT_BATAK,
   1.149 +  HB_SCRIPT_BRAHMI,
   1.150 +  HB_SCRIPT_MANDAIC,
   1.151 +
   1.152 +  /* Unicode-6.1 additions */
   1.153 +  HB_SCRIPT_CHAKMA,
   1.154 +  HB_SCRIPT_MEROITIC_CURSIVE,
   1.155 +  HB_SCRIPT_MEROITIC_HIEROGLYPHS,
   1.156 +  HB_SCRIPT_MIAO,
   1.157 +  HB_SCRIPT_SHARADA,
   1.158 +  HB_SCRIPT_SORA_SOMPENG,
   1.159 +  HB_SCRIPT_TAKRI
   1.160 +};
   1.161 +#endif
   1.162 +
   1.163 +hb_script_t
   1.164 +hb_glib_script_to_script (GUnicodeScript script)
   1.165 +{
   1.166 +#if GLIB_CHECK_VERSION(2,29,14)
   1.167 +  return (hb_script_t) g_unicode_script_to_iso15924 (script);
   1.168 +#else
   1.169 +  if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script)))
   1.170 +    return glib_script_to_script[script];
   1.171 +
   1.172 +  if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE))
   1.173 +    return HB_SCRIPT_INVALID;
   1.174 +
   1.175 +  return HB_SCRIPT_UNKNOWN;
   1.176 +#endif
   1.177 +}
   1.178 +
   1.179 +GUnicodeScript
   1.180 +hb_glib_script_from_script (hb_script_t script)
   1.181 +{
   1.182 +#if GLIB_CHECK_VERSION(2,29,14)
   1.183 +  return g_unicode_script_from_iso15924 (script);
   1.184 +#else
   1.185 +  unsigned int count = ARRAY_LENGTH (glib_script_to_script);
   1.186 +  for (unsigned int i = 0; i < count; i++)
   1.187 +    if (glib_script_to_script[i] == script)
   1.188 +      return (GUnicodeScript) i;
   1.189 +
   1.190 +  if (unlikely (script == HB_SCRIPT_INVALID))
   1.191 +    return G_UNICODE_SCRIPT_INVALID_CODE;
   1.192 +
   1.193 +  return G_UNICODE_SCRIPT_UNKNOWN;
   1.194 +#endif
   1.195 +}
   1.196 +
   1.197 +
   1.198 +static hb_unicode_combining_class_t
   1.199 +hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.200 +				 hb_codepoint_t      unicode,
   1.201 +				 void               *user_data HB_UNUSED)
   1.202 +
   1.203 +{
   1.204 +  return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode);
   1.205 +}
   1.206 +
   1.207 +static unsigned int
   1.208 +hb_glib_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.209 +				 hb_codepoint_t      unicode,
   1.210 +				 void               *user_data HB_UNUSED)
   1.211 +{
   1.212 +  return g_unichar_iswide (unicode) ? 2 : 1;
   1.213 +}
   1.214 +
   1.215 +static hb_unicode_general_category_t
   1.216 +hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.217 +				  hb_codepoint_t      unicode,
   1.218 +				  void               *user_data HB_UNUSED)
   1.219 +
   1.220 +{
   1.221 +  /* hb_unicode_general_category_t and GUnicodeType are identical */
   1.222 +  return (hb_unicode_general_category_t) g_unichar_type (unicode);
   1.223 +}
   1.224 +
   1.225 +static hb_codepoint_t
   1.226 +hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.227 +			   hb_codepoint_t      unicode,
   1.228 +			   void               *user_data HB_UNUSED)
   1.229 +{
   1.230 +  g_unichar_get_mirror_char (unicode, &unicode);
   1.231 +  return unicode;
   1.232 +}
   1.233 +
   1.234 +static hb_script_t
   1.235 +hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.236 +			hb_codepoint_t      unicode,
   1.237 +			void               *user_data HB_UNUSED)
   1.238 +{
   1.239 +  return hb_glib_script_to_script (g_unichar_get_script (unicode));
   1.240 +}
   1.241 +
   1.242 +static hb_bool_t
   1.243 +hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.244 +			 hb_codepoint_t      a,
   1.245 +			 hb_codepoint_t      b,
   1.246 +			 hb_codepoint_t     *ab,
   1.247 +			 void               *user_data HB_UNUSED)
   1.248 +{
   1.249 +#if GLIB_CHECK_VERSION(2,29,12)
   1.250 +  return g_unichar_compose (a, b, ab);
   1.251 +#endif
   1.252 +
   1.253 +  /* We don't ifdef-out the fallback code such that compiler always
   1.254 +   * sees it and makes sure it's compilable. */
   1.255 +
   1.256 +  gchar utf8[12];
   1.257 +  gchar *normalized;
   1.258 +  int len;
   1.259 +  hb_bool_t ret;
   1.260 +
   1.261 +  len = g_unichar_to_utf8 (a, utf8);
   1.262 +  len += g_unichar_to_utf8 (b, utf8 + len);
   1.263 +  normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC);
   1.264 +  len = g_utf8_strlen (normalized, -1);
   1.265 +  if (unlikely (!len))
   1.266 +    return false;
   1.267 +
   1.268 +  if (len == 1) {
   1.269 +    *ab = g_utf8_get_char (normalized);
   1.270 +    ret = true;
   1.271 +  } else {
   1.272 +    ret = false;
   1.273 +  }
   1.274 +
   1.275 +  g_free (normalized);
   1.276 +  return ret;
   1.277 +}
   1.278 +
   1.279 +static hb_bool_t
   1.280 +hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.281 +			   hb_codepoint_t      ab,
   1.282 +			   hb_codepoint_t     *a,
   1.283 +			   hb_codepoint_t     *b,
   1.284 +			   void               *user_data HB_UNUSED)
   1.285 +{
   1.286 +#if GLIB_CHECK_VERSION(2,29,12)
   1.287 +  return g_unichar_decompose (ab, a, b);
   1.288 +#endif
   1.289 +
   1.290 +  /* We don't ifdef-out the fallback code such that compiler always
   1.291 +   * sees it and makes sure it's compilable. */
   1.292 +
   1.293 +  gchar utf8[6];
   1.294 +  gchar *normalized;
   1.295 +  int len;
   1.296 +  hb_bool_t ret;
   1.297 +
   1.298 +  len = g_unichar_to_utf8 (ab, utf8);
   1.299 +  normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD);
   1.300 +  len = g_utf8_strlen (normalized, -1);
   1.301 +  if (unlikely (!len))
   1.302 +    return false;
   1.303 +
   1.304 +  if (len == 1) {
   1.305 +    *a = g_utf8_get_char (normalized);
   1.306 +    *b = 0;
   1.307 +    ret = *a != ab;
   1.308 +  } else if (len == 2) {
   1.309 +    *a = g_utf8_get_char (normalized);
   1.310 +    *b = g_utf8_get_char (g_utf8_next_char (normalized));
   1.311 +    /* Here's the ugly part: if ab decomposes to a single character and
   1.312 +     * that character decomposes again, we have to detect that and undo
   1.313 +     * the second part :-(. */
   1.314 +    gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC);
   1.315 +    hb_codepoint_t c = g_utf8_get_char (recomposed);
   1.316 +    if (c != ab && c != *a) {
   1.317 +      *a = c;
   1.318 +      *b = 0;
   1.319 +    }
   1.320 +    g_free (recomposed);
   1.321 +    ret = true;
   1.322 +  } else {
   1.323 +    /* If decomposed to more than two characters, take the last one,
   1.324 +     * and recompose the rest to get the first component. */
   1.325 +    gchar *end = g_utf8_offset_to_pointer (normalized, len - 1);
   1.326 +    gchar *recomposed;
   1.327 +    *b = g_utf8_get_char (end);
   1.328 +    recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC);
   1.329 +    /* We expect that recomposed has exactly one character now. */
   1.330 +    *a = g_utf8_get_char (recomposed);
   1.331 +    g_free (recomposed);
   1.332 +    ret = true;
   1.333 +  }
   1.334 +
   1.335 +  g_free (normalized);
   1.336 +  return ret;
   1.337 +}
   1.338 +
   1.339 +static unsigned int
   1.340 +hb_glib_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED,
   1.341 +					 hb_codepoint_t      u,
   1.342 +					 hb_codepoint_t     *decomposed,
   1.343 +					 void               *user_data HB_UNUSED)
   1.344 +{
   1.345 +#if GLIB_CHECK_VERSION(2,29,12)
   1.346 +  return g_unichar_fully_decompose (u, TRUE, decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN);
   1.347 +#endif
   1.348 +
   1.349 +  /* If the user doesn't have GLib >= 2.29.12 we have to perform
   1.350 +   * a round trip to UTF-8 and the associated memory management dance. */
   1.351 +  gchar utf8[6];
   1.352 +  gchar *utf8_decomposed, *c;
   1.353 +  gsize utf8_len, utf8_decomposed_len, i;
   1.354 +
   1.355 +  /* Convert @u to UTF-8 and normalise it in NFKD mode. This performs the compatibility decomposition. */
   1.356 +  utf8_len = g_unichar_to_utf8 (u, utf8);
   1.357 +  utf8_decomposed = g_utf8_normalize (utf8, utf8_len, G_NORMALIZE_NFKD);
   1.358 +  utf8_decomposed_len = g_utf8_strlen (utf8_decomposed, -1);
   1.359 +
   1.360 +  assert (utf8_decomposed_len <= HB_UNICODE_MAX_DECOMPOSITION_LEN);
   1.361 +
   1.362 +  for (i = 0, c = utf8_decomposed; i < utf8_decomposed_len; i++, c = g_utf8_next_char (c))
   1.363 +    *decomposed++ = g_utf8_get_char (c);
   1.364 +
   1.365 +  g_free (utf8_decomposed);
   1.366 +
   1.367 +  return utf8_decomposed_len;
   1.368 +}
   1.369 +
   1.370 +hb_unicode_funcs_t *
   1.371 +hb_glib_get_unicode_funcs (void)
   1.372 +{
   1.373 +  static const hb_unicode_funcs_t _hb_glib_unicode_funcs = {
   1.374 +    HB_OBJECT_HEADER_STATIC,
   1.375 +
   1.376 +    NULL, /* parent */
   1.377 +    true, /* immutable */
   1.378 +    {
   1.379 +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name,
   1.380 +      HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
   1.381 +#undef HB_UNICODE_FUNC_IMPLEMENT
   1.382 +    }
   1.383 +  };
   1.384 +
   1.385 +  return const_cast<hb_unicode_funcs_t *> (&_hb_glib_unicode_funcs);
   1.386 +}
   1.387 +

mercurial