gfx/harfbuzz/src/hb-ot-shape-complex-arabic.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/harfbuzz/src/hb-ot-shape-complex-arabic.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,374 @@
     1.4 +/*
     1.5 + * Copyright © 2010,2012  Google, Inc.
     1.6 + *
     1.7 + *  This is part of HarfBuzz, a text shaping library.
     1.8 + *
     1.9 + * Permission is hereby granted, without written agreement and without
    1.10 + * license or royalty fees, to use, copy, modify, and distribute this
    1.11 + * software and its documentation for any purpose, provided that the
    1.12 + * above copyright notice and the following two paragraphs appear in
    1.13 + * all copies of this software.
    1.14 + *
    1.15 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
    1.16 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
    1.17 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
    1.18 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
    1.19 + * DAMAGE.
    1.20 + *
    1.21 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
    1.22 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.23 + * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
    1.24 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
    1.25 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
    1.26 + *
    1.27 + * Google Author(s): Behdad Esfahbod
    1.28 + */
    1.29 +
    1.30 +#include "hb-ot-shape-complex-private.hh"
    1.31 +#include "hb-ot-shape-private.hh"
    1.32 +
    1.33 +
    1.34 +/* buffer var allocations */
    1.35 +#define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */
    1.36 +
    1.37 +
    1.38 +/*
    1.39 + * Bits used in the joining tables
    1.40 + */
    1.41 +enum {
    1.42 +  JOINING_TYPE_U		= 0,
    1.43 +  JOINING_TYPE_L		= 1,
    1.44 +  JOINING_TYPE_R		= 2,
    1.45 +  JOINING_TYPE_D		= 3,
    1.46 +  JOINING_TYPE_C		= JOINING_TYPE_D,
    1.47 +  JOINING_GROUP_ALAPH		= 4,
    1.48 +  JOINING_GROUP_DALATH_RISH	= 5,
    1.49 +  NUM_STATE_MACHINE_COLS	= 6,
    1.50 +
    1.51 +  JOINING_TYPE_T = 7,
    1.52 +  JOINING_TYPE_X = 8  /* means: use general-category to choose between U or T. */
    1.53 +};
    1.54 +
    1.55 +/*
    1.56 + * Joining types:
    1.57 + */
    1.58 +
    1.59 +#include "hb-ot-shape-complex-arabic-table.hh"
    1.60 +
    1.61 +static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
    1.62 +{
    1.63 +  if (likely (hb_in_range<hb_codepoint_t> (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) {
    1.64 +    unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
    1.65 +    if (likely (j_type != JOINING_TYPE_X))
    1.66 +      return j_type;
    1.67 +  }
    1.68 +
    1.69 +  /* Mongolian joining data is not in ArabicJoining.txt yet. */
    1.70 +  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF)))
    1.71 +  {
    1.72 +    if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1880, 0x1886)))
    1.73 +      return JOINING_TYPE_U;
    1.74 +
    1.75 +    /* All letters, SIBE SYLLABLE BOUNDARY MARKER, and NIRUGU are D */
    1.76 +    if ((FLAG(gen_cat) & (FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
    1.77 +			  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER)))
    1.78 +	|| u == 0x1807 || u == 0x180A)
    1.79 +      return JOINING_TYPE_D;
    1.80 +  }
    1.81 +
    1.82 +  /* 'Phags-pa joining data is not in ArabicJoining.txt yet. */
    1.83 +  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xA840, 0xA872)))
    1.84 +  {
    1.85 +      if (unlikely (u == 0xA872))
    1.86 +	return JOINING_TYPE_L;
    1.87 +
    1.88 +      return JOINING_TYPE_D;
    1.89 +  }
    1.90 +
    1.91 +  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x200C, 0x200D)))
    1.92 +  {
    1.93 +    return u == 0x200C ? JOINING_TYPE_U : JOINING_TYPE_C;
    1.94 +  }
    1.95 +
    1.96 +  return (FLAG(gen_cat) & (FLAG(HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | FLAG(HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | FLAG(HB_UNICODE_GENERAL_CATEGORY_FORMAT))) ?
    1.97 +	 JOINING_TYPE_T : JOINING_TYPE_U;
    1.98 +}
    1.99 +
   1.100 +static const hb_tag_t arabic_features[] =
   1.101 +{
   1.102 +  HB_TAG('i','n','i','t'),
   1.103 +  HB_TAG('m','e','d','i'),
   1.104 +  HB_TAG('f','i','n','a'),
   1.105 +  HB_TAG('i','s','o','l'),
   1.106 +  /* Syriac */
   1.107 +  HB_TAG('m','e','d','2'),
   1.108 +  HB_TAG('f','i','n','2'),
   1.109 +  HB_TAG('f','i','n','3'),
   1.110 +  HB_TAG_NONE
   1.111 +};
   1.112 +
   1.113 +
   1.114 +/* Same order as the feature array */
   1.115 +enum {
   1.116 +  INIT,
   1.117 +  MEDI,
   1.118 +  FINA,
   1.119 +  ISOL,
   1.120 +
   1.121 +  /* Syriac */
   1.122 +  MED2,
   1.123 +  FIN2,
   1.124 +  FIN3,
   1.125 +
   1.126 +  NONE,
   1.127 +
   1.128 +  ARABIC_NUM_FEATURES = NONE
   1.129 +};
   1.130 +
   1.131 +static const struct arabic_state_table_entry {
   1.132 +	uint8_t prev_action;
   1.133 +	uint8_t curr_action;
   1.134 +	uint16_t next_state;
   1.135 +} arabic_state_table[][NUM_STATE_MACHINE_COLS] =
   1.136 +{
   1.137 +  /*   jt_U,          jt_L,          jt_R,          jt_D,          jg_ALAPH,      jg_DALATH_RISH */
   1.138 +
   1.139 +  /* State 0: prev was U, not willing to join. */
   1.140 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,6}, },
   1.141 +
   1.142 +  /* State 1: prev was R or ISOL/ALAPH, not willing to join. */
   1.143 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN2,5}, {NONE,ISOL,6}, },
   1.144 +
   1.145 +  /* State 2: prev was D/L in ISOL form, willing to join. */
   1.146 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {INIT,FINA,1}, {INIT,FINA,3}, {INIT,FINA,4}, {INIT,FINA,6}, },
   1.147 +
   1.148 +  /* State 3: prev was D in FINA form, willing to join. */
   1.149 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {MEDI,FINA,1}, {MEDI,FINA,3}, {MEDI,FINA,4}, {MEDI,FINA,6}, },
   1.150 +
   1.151 +  /* State 4: prev was FINA ALAPH, not willing to join. */
   1.152 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {MED2,ISOL,1}, {MED2,ISOL,2}, {MED2,FIN2,5}, {MED2,ISOL,6}, },
   1.153 +
   1.154 +  /* State 5: prev was FIN2/FIN3 ALAPH, not willing to join. */
   1.155 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {ISOL,ISOL,1}, {ISOL,ISOL,2}, {ISOL,FIN2,5}, {ISOL,ISOL,6}, },
   1.156 +
   1.157 +  /* State 6: prev was DALATH/RISH, not willing to join. */
   1.158 +  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN3,5}, {NONE,ISOL,6}, }
   1.159 +};
   1.160 +
   1.161 +
   1.162 +static void
   1.163 +nuke_joiners (const hb_ot_shape_plan_t *plan,
   1.164 +	      hb_font_t *font,
   1.165 +	      hb_buffer_t *buffer);
   1.166 +
   1.167 +static void
   1.168 +arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
   1.169 +		       hb_font_t *font,
   1.170 +		       hb_buffer_t *buffer);
   1.171 +
   1.172 +static void
   1.173 +collect_features_arabic (hb_ot_shape_planner_t *plan)
   1.174 +{
   1.175 +  hb_ot_map_builder_t *map = &plan->map;
   1.176 +
   1.177 +  /* For Language forms (in ArabicOT speak), we do the iso/fina/medi/init together,
   1.178 +   * then rlig and calt each in their own stage.  This makes IranNastaliq's ALLAH
   1.179 +   * ligature work correctly. It's unfortunate though...
   1.180 +   *
   1.181 +   * This also makes Arial Bold in Windows7 work.  See:
   1.182 +   * https://bugzilla.mozilla.org/show_bug.cgi?id=644184
   1.183 +   *
   1.184 +   * TODO: Add test cases for these two.
   1.185 +   */
   1.186 +
   1.187 +  map->add_gsub_pause (nuke_joiners);
   1.188 +
   1.189 +  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
   1.190 +  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
   1.191 +
   1.192 +  map->add_gsub_pause (NULL);
   1.193 +
   1.194 +  for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++)
   1.195 +    map->add_feature (arabic_features[i], 1, i < 4 ? F_HAS_FALLBACK : F_NONE); /* The first four features have fallback. */
   1.196 +
   1.197 +  map->add_gsub_pause (NULL);
   1.198 +
   1.199 +  map->add_feature (HB_TAG('r','l','i','g'), 1, F_GLOBAL|F_HAS_FALLBACK);
   1.200 +  map->add_gsub_pause (arabic_fallback_shape);
   1.201 +
   1.202 +  map->add_global_bool_feature (HB_TAG('c','a','l','t'));
   1.203 +  map->add_gsub_pause (NULL);
   1.204 +
   1.205 +  map->add_global_bool_feature (HB_TAG('m','s','e','t'));
   1.206 +}
   1.207 +
   1.208 +#include "hb-ot-shape-complex-arabic-fallback.hh"
   1.209 +
   1.210 +struct arabic_shape_plan_t
   1.211 +{
   1.212 +  ASSERT_POD ();
   1.213 +
   1.214 +  /* The "+ 1" in the next array is to accommodate for the "NONE" command,
   1.215 +   * which is not an OpenType feature, but this simplifies the code by not
   1.216 +   * having to do a "if (... < NONE) ..." and just rely on the fact that
   1.217 +   * mask_array[NONE] == 0. */
   1.218 +  hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1];
   1.219 +
   1.220 +  bool do_fallback;
   1.221 +  arabic_fallback_plan_t *fallback_plan;
   1.222 +};
   1.223 +
   1.224 +static void *
   1.225 +data_create_arabic (const hb_ot_shape_plan_t *plan)
   1.226 +{
   1.227 +  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) calloc (1, sizeof (arabic_shape_plan_t));
   1.228 +  if (unlikely (!arabic_plan))
   1.229 +    return NULL;
   1.230 +
   1.231 +  arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
   1.232 +  for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
   1.233 +    arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
   1.234 +    if (i < 4)
   1.235 +      arabic_plan->do_fallback = arabic_plan->do_fallback && plan->map.needs_fallback (arabic_features[i]);
   1.236 +  }
   1.237 +
   1.238 +  return arabic_plan;
   1.239 +}
   1.240 +
   1.241 +static void
   1.242 +data_destroy_arabic (void *data)
   1.243 +{
   1.244 +  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) data;
   1.245 +
   1.246 +  arabic_fallback_plan_destroy (arabic_plan->fallback_plan);
   1.247 +
   1.248 +  free (data);
   1.249 +}
   1.250 +
   1.251 +static void
   1.252 +arabic_joining (hb_buffer_t *buffer)
   1.253 +{
   1.254 +  unsigned int count = buffer->len;
   1.255 +  unsigned int prev = (unsigned int) -1, state = 0;
   1.256 +
   1.257 +  HB_BUFFER_ALLOCATE_VAR (buffer, arabic_shaping_action);
   1.258 +
   1.259 +  /* Check pre-context */
   1.260 +  if (!(buffer->flags & HB_BUFFER_FLAG_BOT))
   1.261 +    for (unsigned int i = 0; i < buffer->context_len[0]; i++)
   1.262 +    {
   1.263 +      unsigned int this_type = get_joining_type (buffer->context[0][i], buffer->unicode->general_category (buffer->context[0][i]));
   1.264 +
   1.265 +      if (unlikely (this_type == JOINING_TYPE_T))
   1.266 +	continue;
   1.267 +
   1.268 +      const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
   1.269 +      state = entry->next_state;
   1.270 +      break;
   1.271 +    }
   1.272 +
   1.273 +  for (unsigned int i = 0; i < count; i++)
   1.274 +  {
   1.275 +    unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i]));
   1.276 +
   1.277 +    if (unlikely (this_type == JOINING_TYPE_T)) {
   1.278 +      buffer->info[i].arabic_shaping_action() = NONE;
   1.279 +      continue;
   1.280 +    }
   1.281 +
   1.282 +    const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
   1.283 +
   1.284 +    if (entry->prev_action != NONE && prev != (unsigned int) -1)
   1.285 +      for (; prev < i; prev++)
   1.286 +	buffer->info[prev].arabic_shaping_action() = entry->prev_action;
   1.287 +
   1.288 +    buffer->info[i].arabic_shaping_action() = entry->curr_action;
   1.289 +
   1.290 +    prev = i;
   1.291 +    state = entry->next_state;
   1.292 +  }
   1.293 +
   1.294 +  if (!(buffer->flags & HB_BUFFER_FLAG_EOT))
   1.295 +    for (unsigned int i = 0; i < buffer->context_len[1]; i++)
   1.296 +    {
   1.297 +      unsigned int this_type = get_joining_type (buffer->context[1][i], buffer->unicode->general_category (buffer->context[1][i]));
   1.298 +
   1.299 +      if (unlikely (this_type == JOINING_TYPE_T))
   1.300 +	continue;
   1.301 +
   1.302 +      const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
   1.303 +      if (entry->prev_action != NONE && prev != (unsigned int) -1)
   1.304 +	buffer->info[prev].arabic_shaping_action() = entry->prev_action;
   1.305 +      break;
   1.306 +    }
   1.307 +
   1.308 +
   1.309 +  HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
   1.310 +}
   1.311 +
   1.312 +static void
   1.313 +setup_masks_arabic (const hb_ot_shape_plan_t *plan,
   1.314 +		    hb_buffer_t              *buffer,
   1.315 +		    hb_font_t                *font HB_UNUSED)
   1.316 +{
   1.317 +  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
   1.318 +
   1.319 +  arabic_joining (buffer);
   1.320 +  unsigned int count = buffer->len;
   1.321 +  for (unsigned int i = 0; i < count; i++)
   1.322 +    buffer->info[i].mask |= arabic_plan->mask_array[buffer->info[i].arabic_shaping_action()];
   1.323 +}
   1.324 +
   1.325 +
   1.326 +static void
   1.327 +nuke_joiners (const hb_ot_shape_plan_t *plan HB_UNUSED,
   1.328 +	      hb_font_t *font HB_UNUSED,
   1.329 +	      hb_buffer_t *buffer)
   1.330 +{
   1.331 +  unsigned int count = buffer->len;
   1.332 +  for (unsigned int i = 0; i < count; i++)
   1.333 +    if (_hb_glyph_info_is_zwj (&buffer->info[i]))
   1.334 +      _hb_glyph_info_flip_joiners (&buffer->info[i]);
   1.335 +}
   1.336 +
   1.337 +static void
   1.338 +arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
   1.339 +		       hb_font_t *font,
   1.340 +		       hb_buffer_t *buffer)
   1.341 +{
   1.342 +  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
   1.343 +
   1.344 +  if (!arabic_plan->do_fallback)
   1.345 +    return;
   1.346 +
   1.347 +retry:
   1.348 +  arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) hb_atomic_ptr_get (&arabic_plan->fallback_plan);
   1.349 +  if (unlikely (!fallback_plan))
   1.350 +  {
   1.351 +    /* This sucks.  We need a font to build the fallback plan... */
   1.352 +    fallback_plan = arabic_fallback_plan_create (plan, font);
   1.353 +    if (unlikely (!hb_atomic_ptr_cmpexch (&(const_cast<arabic_shape_plan_t *> (arabic_plan))->fallback_plan, NULL, fallback_plan))) {
   1.354 +      arabic_fallback_plan_destroy (fallback_plan);
   1.355 +      goto retry;
   1.356 +    }
   1.357 +  }
   1.358 +
   1.359 +  arabic_fallback_plan_shape (fallback_plan, font, buffer);
   1.360 +}
   1.361 +
   1.362 +
   1.363 +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
   1.364 +{
   1.365 +  "arabic",
   1.366 +  collect_features_arabic,
   1.367 +  NULL, /* override_features */
   1.368 +  data_create_arabic,
   1.369 +  data_destroy_arabic,
   1.370 +  NULL, /* preprocess_text_arabic */
   1.371 +  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   1.372 +  NULL, /* decompose */
   1.373 +  NULL, /* compose */
   1.374 +  setup_masks_arabic,
   1.375 +  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   1.376 +  true, /* fallback_position */
   1.377 +};

mercurial