1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/hb-ot-shape-complex-hangul.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,417 @@ 1.4 +/* 1.5 + * Copyright © 2013 Google, Inc. 1.6 + * 1.7 + * This is part of HarfBuzz, a text shaping library. 1.8 + * 1.9 + * Permission is hereby granted, without written agreement and without 1.10 + * license or royalty fees, to use, copy, modify, and distribute this 1.11 + * software and its documentation for any purpose, provided that the 1.12 + * above copyright notice and the following two paragraphs appear in 1.13 + * all copies of this software. 1.14 + * 1.15 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 1.16 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 1.17 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 1.18 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 1.19 + * DAMAGE. 1.20 + * 1.21 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 1.22 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.23 + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 1.24 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 1.25 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 1.26 + * 1.27 + * Google Author(s): Behdad Esfahbod 1.28 + */ 1.29 + 1.30 +#include "hb-ot-shape-complex-private.hh" 1.31 + 1.32 + 1.33 +/* Hangul shaper */ 1.34 + 1.35 + 1.36 +/* Same order as the feature array below */ 1.37 +enum { 1.38 + NONE, 1.39 + 1.40 + LJMO, 1.41 + VJMO, 1.42 + TJMO, 1.43 + 1.44 + FIRST_HANGUL_FEATURE = LJMO, 1.45 + HANGUL_FEATURE_COUNT = TJMO + 1 1.46 +}; 1.47 + 1.48 +static const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] = 1.49 +{ 1.50 + HB_TAG_NONE, 1.51 + HB_TAG('l','j','m','o'), 1.52 + HB_TAG('v','j','m','o'), 1.53 + HB_TAG('t','j','m','o') 1.54 +}; 1.55 + 1.56 +static void 1.57 +collect_features_hangul (hb_ot_shape_planner_t *plan) 1.58 +{ 1.59 + hb_ot_map_builder_t *map = &plan->map; 1.60 + 1.61 + for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++) 1.62 + map->add_feature (hangul_features[i], 1, F_NONE); 1.63 +} 1.64 + 1.65 +struct hangul_shape_plan_t 1.66 +{ 1.67 + ASSERT_POD (); 1.68 + 1.69 + hb_mask_t mask_array[HANGUL_FEATURE_COUNT]; 1.70 +}; 1.71 + 1.72 +static void * 1.73 +data_create_hangul (const hb_ot_shape_plan_t *plan) 1.74 +{ 1.75 + hangul_shape_plan_t *hangul_plan = (hangul_shape_plan_t *) calloc (1, sizeof (hangul_shape_plan_t)); 1.76 + if (unlikely (!hangul_plan)) 1.77 + return NULL; 1.78 + 1.79 + for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++) 1.80 + hangul_plan->mask_array[i] = plan->map.get_1_mask (hangul_features[i]); 1.81 + 1.82 + return hangul_plan; 1.83 +} 1.84 + 1.85 +static void 1.86 +data_destroy_hangul (void *data) 1.87 +{ 1.88 + free (data); 1.89 +} 1.90 + 1.91 +/* Constants for algorithmic hangul syllable [de]composition. */ 1.92 +#define LBase 0x1100 1.93 +#define VBase 0x1161 1.94 +#define TBase 0x11A7 1.95 +#define LCount 19 1.96 +#define VCount 21 1.97 +#define TCount 28 1.98 +#define SBase 0xAC00 1.99 +#define NCount (VCount * TCount) 1.100 +#define SCount (LCount * NCount) 1.101 + 1.102 +#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1)) 1.103 +#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1)) 1.104 +#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1)) 1.105 +#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1)) 1.106 + 1.107 +#define isL(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1100, 0x115F, 0xA960, 0xA97C)) 1.108 +#define isV(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1160, 0x11A7, 0xD7B0, 0xD7C6)) 1.109 +#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8, 0x11FF, 0xD7CB, 0xD7FB)) 1.110 + 1.111 +#define isHangulTone(u) (hb_in_range<hb_codepoint_t> ((u), 0x302e, 0x302f)) 1.112 + 1.113 +/* buffer var allocations */ 1.114 +#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */ 1.115 + 1.116 +static bool 1.117 +is_zero_width_char (hb_font_t *font, 1.118 + hb_codepoint_t unicode) 1.119 +{ 1.120 + hb_codepoint_t glyph; 1.121 + return hb_font_get_glyph (font, unicode, 0, &glyph) && hb_font_get_glyph_h_advance (font, glyph) == 0; 1.122 +} 1.123 + 1.124 +static void 1.125 +preprocess_text_hangul (const hb_ot_shape_plan_t *plan, 1.126 + hb_buffer_t *buffer, 1.127 + hb_font_t *font) 1.128 +{ 1.129 + HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature); 1.130 + 1.131 + /* Hangul syllables come in two shapes: LV, and LVT. Of those: 1.132 + * 1.133 + * - LV can be precomposed, or decomposed. Lets call those 1.134 + * <LV> and <L,V>, 1.135 + * - LVT can be fully precomposed, partically precomposed, or 1.136 + * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. 1.137 + * 1.138 + * The composition / decomposition is mechanical. However, not 1.139 + * all <L,V> sequences compose, and not all <LV,T> sequences 1.140 + * compose. 1.141 + * 1.142 + * Here are the specifics: 1.143 + * 1.144 + * - <L>: U+1100..115F, U+A960..A97F 1.145 + * - <V>: U+1160..11A7, U+D7B0..D7C7 1.146 + * - <T>: U+11A8..11FF, U+D7CB..D7FB 1.147 + * 1.148 + * - Only the <L,V> sequences for the 11xx ranges combine. 1.149 + * - Only <LV,T> sequences for T in U+11A8..11C3 combine. 1.150 + * 1.151 + * Here is what we want to accomplish in this shaper: 1.152 + * 1.153 + * - If the whole syllable can be precomposed, do that, 1.154 + * - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. 1.155 + * - If a valid syllable is followed by a Hangul tone mark, reorder the tone 1.156 + * mark to precede the whole syllable - unless it is a zero-width glyph, in 1.157 + * which case we leave it untouched, assuming it's designed to overstrike. 1.158 + * 1.159 + * That is, of the different possible syllables: 1.160 + * 1.161 + * <L> 1.162 + * <L,V> 1.163 + * <L,V,T> 1.164 + * <LV> 1.165 + * <LVT> 1.166 + * <LV, T> 1.167 + * 1.168 + * - <L> needs no work. 1.169 + * 1.170 + * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we 1.171 + * should fully decompose them if font supports. 1.172 + * 1.173 + * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. 1.174 + * 1.175 + * - <LV,T> we should compose if the whole thing can be composed, otherwise we should 1.176 + * decompose. 1.177 + */ 1.178 + 1.179 + buffer->clear_output (); 1.180 + unsigned int start = 0, end = 0; /* Extent of most recently seen syllable; 1.181 + * valid only if start < end 1.182 + */ 1.183 + unsigned int count = buffer->len; 1.184 + 1.185 + for (buffer->idx = 0; buffer->idx < count;) 1.186 + { 1.187 + hb_codepoint_t u = buffer->cur().codepoint; 1.188 + 1.189 + if (isHangulTone (u)) 1.190 + { 1.191 + /* 1.192 + * We could cache the width of the tone marks and the existence of dotted-circle, 1.193 + * but the use of the Hangul tone mark characters seems to be rare enough that 1.194 + * I didn't bother for now. 1.195 + */ 1.196 + if (start < end && end == buffer->out_len) 1.197 + { 1.198 + /* Tone mark follows a valid syllable; move it in front, unless it's zero width. */ 1.199 + buffer->next_glyph (); 1.200 + if (!is_zero_width_char (font, u)) 1.201 + { 1.202 + hb_glyph_info_t *info = buffer->out_info; 1.203 + hb_glyph_info_t tone = info[end]; 1.204 + memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t)); 1.205 + info[start] = tone; 1.206 + } 1.207 + /* Merge clusters across the (possibly reordered) syllable+tone. 1.208 + * We want to merge even in the zero-width tone mark case here, 1.209 + * so that clustering behavior isn't dependent on how the tone mark 1.210 + * is handled by the font. 1.211 + */ 1.212 + buffer->merge_out_clusters (start, end + 1); 1.213 + } 1.214 + else 1.215 + { 1.216 + /* No valid syllable as base for tone mark; try to insert dotted circle. */ 1.217 + if (font->has_glyph (0x25cc)) 1.218 + { 1.219 + hb_codepoint_t chars[2]; 1.220 + if (!is_zero_width_char (font, u)) { 1.221 + chars[0] = u; 1.222 + chars[1] = 0x25cc; 1.223 + } else { 1.224 + chars[0] = 0x25cc; 1.225 + chars[1] = u; 1.226 + } 1.227 + buffer->replace_glyphs (1, 2, chars); 1.228 + } 1.229 + else 1.230 + { 1.231 + /* No dotted circle available in the font; just leave tone mark untouched. */ 1.232 + buffer->next_glyph (); 1.233 + } 1.234 + } 1.235 + start = end = buffer->out_len; 1.236 + continue; 1.237 + } 1.238 + 1.239 + start = buffer->out_len; /* Remember current position as a potential syllable start; 1.240 + * will only be used if we set end to a later position. 1.241 + */ 1.242 + 1.243 + if (isL (u) && buffer->idx + 1 < count) 1.244 + { 1.245 + hb_codepoint_t l = u; 1.246 + hb_codepoint_t v = buffer->cur(+1).codepoint; 1.247 + if (isV (v)) 1.248 + { 1.249 + /* Have <L,V> or <L,V,T>. */ 1.250 + hb_codepoint_t t = 0; 1.251 + unsigned int tindex = 0; 1.252 + if (buffer->idx + 2 < count) 1.253 + { 1.254 + t = buffer->cur(+2).codepoint; 1.255 + if (isT (t)) 1.256 + tindex = t - TBase; /* Only used if isCombiningT (t); otherwise invalid. */ 1.257 + else 1.258 + t = 0; /* The next character was not a trailing jamo. */ 1.259 + } 1.260 + 1.261 + /* We've got a syllable <L,V,T?>; see if it can potentially be composed. */ 1.262 + if (isCombiningL (l) && isCombiningV (v) && (t == 0 || isCombiningT (t))) 1.263 + { 1.264 + /* Try to compose; if this succeeds, end is set to start+1. */ 1.265 + hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; 1.266 + if (font->has_glyph (s)) 1.267 + { 1.268 + buffer->replace_glyphs (t ? 3 : 2, 1, &s); 1.269 + if (unlikely (buffer->in_error)) 1.270 + return; 1.271 + end = start + 1; 1.272 + continue; 1.273 + } 1.274 + } 1.275 + 1.276 + /* We didn't compose, either because it's an Old Hangul syllable without a 1.277 + * precomposed character in Unicode, or because the font didn't support the 1.278 + * necessary precomposed glyph. 1.279 + * Set jamo features on the individual glyphs, and advance past them. 1.280 + */ 1.281 + buffer->cur().hangul_shaping_feature() = LJMO; 1.282 + buffer->next_glyph (); 1.283 + buffer->cur().hangul_shaping_feature() = VJMO; 1.284 + buffer->next_glyph (); 1.285 + if (t) 1.286 + { 1.287 + buffer->cur().hangul_shaping_feature() = TJMO; 1.288 + buffer->next_glyph (); 1.289 + end = start + 3; 1.290 + } 1.291 + else 1.292 + end = start + 2; 1.293 + buffer->merge_out_clusters (start, end); 1.294 + continue; 1.295 + } 1.296 + } 1.297 + 1.298 + else if (isCombinedS (u)) 1.299 + { 1.300 + /* Have <LV>, <LVT>, or <LV,T> */ 1.301 + hb_codepoint_t s = u; 1.302 + bool has_glyph = font->has_glyph (s); 1.303 + unsigned int lindex = (s - SBase) / NCount; 1.304 + unsigned int nindex = (s - SBase) % NCount; 1.305 + unsigned int vindex = nindex / TCount; 1.306 + unsigned int tindex = nindex % TCount; 1.307 + 1.308 + if (!tindex && 1.309 + buffer->idx + 1 < count && 1.310 + isCombiningT (buffer->cur(+1).codepoint)) 1.311 + { 1.312 + /* <LV,T>, try to combine. */ 1.313 + unsigned int new_tindex = buffer->cur(+1).codepoint - TBase; 1.314 + hb_codepoint_t new_s = s + new_tindex; 1.315 + if (font->has_glyph (new_s)) 1.316 + { 1.317 + buffer->replace_glyphs (2, 1, &new_s); 1.318 + if (unlikely (buffer->in_error)) 1.319 + return; 1.320 + end = start + 1; 1.321 + continue; 1.322 + } 1.323 + } 1.324 + 1.325 + /* Otherwise, decompose if font doesn't support <LV> or <LVT>, 1.326 + * or if having non-combining <LV,T>. Note that we already handled 1.327 + * combining <LV,T> above. */ 1.328 + if (!has_glyph || 1.329 + (!tindex && 1.330 + buffer->idx + 1 < count && 1.331 + isT (buffer->cur(+1).codepoint))) 1.332 + { 1.333 + hb_codepoint_t decomposed[3] = {LBase + lindex, 1.334 + VBase + vindex, 1.335 + TBase + tindex}; 1.336 + if (font->has_glyph (decomposed[0]) && 1.337 + font->has_glyph (decomposed[1]) && 1.338 + (!tindex || font->has_glyph (decomposed[2]))) 1.339 + { 1.340 + unsigned int s_len = tindex ? 3 : 2; 1.341 + buffer->replace_glyphs (1, s_len, decomposed); 1.342 + if (unlikely (buffer->in_error)) 1.343 + return; 1.344 + 1.345 + /* We decomposed S: apply jamo features to the individual glyphs 1.346 + * that are now in buffer->out_info. 1.347 + */ 1.348 + hb_glyph_info_t *info = buffer->out_info; 1.349 + 1.350 + /* If we decomposed an LV because of a non-combining T following, 1.351 + * we want to include this T in the syllable. 1.352 + */ 1.353 + if (has_glyph && !tindex) 1.354 + { 1.355 + buffer->next_glyph (); 1.356 + s_len++; 1.357 + } 1.358 + end = start + s_len; 1.359 + 1.360 + unsigned int i = start; 1.361 + info[i++].hangul_shaping_feature() = LJMO; 1.362 + info[i++].hangul_shaping_feature() = VJMO; 1.363 + if (i < end) 1.364 + info[i++].hangul_shaping_feature() = TJMO; 1.365 + buffer->merge_out_clusters (start, end); 1.366 + continue; 1.367 + } 1.368 + } 1.369 + 1.370 + if (has_glyph) 1.371 + { 1.372 + /* We didn't decompose the S, so just advance past it. */ 1.373 + end = start + 1; 1.374 + buffer->next_glyph (); 1.375 + continue; 1.376 + } 1.377 + } 1.378 + 1.379 + /* Didn't find a recognizable syllable, so we leave end <= start; 1.380 + * this will prevent tone-mark reordering happening. 1.381 + */ 1.382 + buffer->next_glyph (); 1.383 + } 1.384 + buffer->swap_buffers (); 1.385 +} 1.386 + 1.387 +static void 1.388 +setup_masks_hangul (const hb_ot_shape_plan_t *plan, 1.389 + hb_buffer_t *buffer, 1.390 + hb_font_t *font HB_UNUSED) 1.391 +{ 1.392 + const hangul_shape_plan_t *hangul_plan = (const hangul_shape_plan_t *) plan->data; 1.393 + 1.394 + if (likely (hangul_plan)) 1.395 + { 1.396 + unsigned int count = buffer->len; 1.397 + hb_glyph_info_t *info = buffer->info; 1.398 + for (unsigned int i = 0; i < count; i++, info++) 1.399 + info->mask |= hangul_plan->mask_array[info->hangul_shaping_feature()]; 1.400 + } 1.401 + 1.402 + HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature); 1.403 +} 1.404 + 1.405 + 1.406 +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = 1.407 +{ 1.408 + "hangul", 1.409 + collect_features_hangul, 1.410 + NULL, /* override_features */ 1.411 + data_create_hangul, /* data_create */ 1.412 + data_destroy_hangul, /* data_destroy */ 1.413 + preprocess_text_hangul, 1.414 + HB_OT_SHAPE_NORMALIZATION_MODE_NONE, 1.415 + NULL, /* decompose */ 1.416 + NULL, /* compose */ 1.417 + setup_masks_hangul, /* setup_masks */ 1.418 + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 1.419 + false, /* fallback_position */ 1.420 +};