1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/hb-ot-shape-complex-thai.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,378 @@ 1.4 +/* 1.5 + * Copyright © 2010,2012 Google, Inc. 1.6 + * 1.7 + * This is part of HarfBuzz, a text shaping library. 1.8 + * 1.9 + * Permission is hereby granted, without written agreement and without 1.10 + * license or royalty fees, to use, copy, modify, and distribute this 1.11 + * software and its documentation for any purpose, provided that the 1.12 + * above copyright notice and the following two paragraphs appear in 1.13 + * all copies of this software. 1.14 + * 1.15 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 1.16 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 1.17 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 1.18 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 1.19 + * DAMAGE. 1.20 + * 1.21 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 1.22 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.23 + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 1.24 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 1.25 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 1.26 + * 1.27 + * Google Author(s): Behdad Esfahbod 1.28 + */ 1.29 + 1.30 +#include "hb-ot-shape-complex-private.hh" 1.31 + 1.32 + 1.33 +/* Thai / Lao shaper */ 1.34 + 1.35 + 1.36 +/* PUA shaping */ 1.37 + 1.38 + 1.39 +enum thai_consonant_type_t 1.40 +{ 1.41 + NC, 1.42 + AC, 1.43 + RC, 1.44 + DC, 1.45 + NOT_CONSONANT, 1.46 + NUM_CONSONANT_TYPES = NOT_CONSONANT 1.47 +}; 1.48 + 1.49 +static thai_consonant_type_t 1.50 +get_consonant_type (hb_codepoint_t u) 1.51 +{ 1.52 + if (u == 0x0E1B || u == 0x0E1D || u == 0x0E1F/* || u == 0x0E2C*/) 1.53 + return AC; 1.54 + if (u == 0x0E0D || u == 0x0E10) 1.55 + return RC; 1.56 + if (u == 0x0E0E || u == 0x0E0F) 1.57 + return DC; 1.58 + if (hb_in_range<hb_codepoint_t> (u, 0x0E01, 0x0E2E)) 1.59 + return NC; 1.60 + return NOT_CONSONANT; 1.61 +} 1.62 + 1.63 + 1.64 +enum thai_mark_type_t 1.65 +{ 1.66 + AV, 1.67 + BV, 1.68 + T, 1.69 + NOT_MARK, 1.70 + NUM_MARK_TYPES = NOT_MARK 1.71 +}; 1.72 + 1.73 +static thai_mark_type_t 1.74 +get_mark_type (hb_codepoint_t u) 1.75 +{ 1.76 + if (u == 0x0E31 || hb_in_range<hb_codepoint_t> (u, 0x0E34, 0x0E37) || 1.77 + u == 0x0E47 || hb_in_range<hb_codepoint_t> (u, 0x0E4D, 0x0E4E)) 1.78 + return AV; 1.79 + if (hb_in_range<hb_codepoint_t> (u, 0x0E38, 0x0E3A)) 1.80 + return BV; 1.81 + if (hb_in_range<hb_codepoint_t> (u, 0x0E48, 0x0E4C)) 1.82 + return T; 1.83 + return NOT_MARK; 1.84 +} 1.85 + 1.86 + 1.87 +enum thai_action_t 1.88 +{ 1.89 + NOP, 1.90 + SD, /* Shift combining-mark down */ 1.91 + SL, /* Shift combining-mark left */ 1.92 + SDL, /* Shift combining-mark down-left */ 1.93 + RD /* Remove descender from base */ 1.94 +}; 1.95 + 1.96 +static hb_codepoint_t 1.97 +thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font) 1.98 +{ 1.99 + struct thai_pua_mapping_t { 1.100 + hb_codepoint_t u; 1.101 + hb_codepoint_t win_pua; 1.102 + hb_codepoint_t mac_pua; 1.103 + } const *pua_mappings = NULL; 1.104 + static const thai_pua_mapping_t SD_mappings[] = { 1.105 + {0x0E48, 0xF70A, 0xF88B}, /* MAI EK */ 1.106 + {0x0E49, 0xF70B, 0xF88E}, /* MAI THO */ 1.107 + {0x0E4A, 0xF70C, 0xF891}, /* MAI TRI */ 1.108 + {0x0E4B, 0xF70D, 0xF894}, /* MAI CHATTAWA */ 1.109 + {0x0E4C, 0xF70E, 0xF897}, /* THANTHAKHAT */ 1.110 + {0x0E38, 0xF718, 0xF89B}, /* SARA U */ 1.111 + {0x0E39, 0xF719, 0xF89C}, /* SARA UU */ 1.112 + {0x0E3A, 0xF71A, 0xF89D}, /* PHINTHU */ 1.113 + {0x0000, 0x0000, 0x0000} 1.114 + }; 1.115 + static const thai_pua_mapping_t SDL_mappings[] = { 1.116 + {0x0E48, 0xF705, 0xF88C}, /* MAI EK */ 1.117 + {0x0E49, 0xF706, 0xF88F}, /* MAI THO */ 1.118 + {0x0E4A, 0xF707, 0xF892}, /* MAI TRI */ 1.119 + {0x0E4B, 0xF708, 0xF895}, /* MAI CHATTAWA */ 1.120 + {0x0E4C, 0xF709, 0xF898}, /* THANTHAKHAT */ 1.121 + {0x0000, 0x0000, 0x0000} 1.122 + }; 1.123 + static const thai_pua_mapping_t SL_mappings[] = { 1.124 + {0x0E48, 0xF713, 0xF88A}, /* MAI EK */ 1.125 + {0x0E49, 0xF714, 0xF88D}, /* MAI THO */ 1.126 + {0x0E4A, 0xF715, 0xF890}, /* MAI TRI */ 1.127 + {0x0E4B, 0xF716, 0xF893}, /* MAI CHATTAWA */ 1.128 + {0x0E4C, 0xF717, 0xF896}, /* THANTHAKHAT */ 1.129 + {0x0E31, 0xF710, 0xF884}, /* MAI HAN-AKAT */ 1.130 + {0x0E34, 0xF701, 0xF885}, /* SARA I */ 1.131 + {0x0E35, 0xF702, 0xF886}, /* SARA II */ 1.132 + {0x0E36, 0xF703, 0xF887}, /* SARA UE */ 1.133 + {0x0E37, 0xF704, 0xF888}, /* SARA UEE */ 1.134 + {0x0E47, 0xF712, 0xF889}, /* MAITAIKHU */ 1.135 + {0x0E4D, 0xF711, 0xF899}, /* NIKHAHIT */ 1.136 + {0x0000, 0x0000, 0x0000} 1.137 + }; 1.138 + static const thai_pua_mapping_t RD_mappings[] = { 1.139 + {0x0E0D, 0xF70F, 0xF89A}, /* YO YING */ 1.140 + {0x0E10, 0xF700, 0xF89E}, /* THO THAN */ 1.141 + {0x0000, 0x0000, 0x0000} 1.142 + }; 1.143 + 1.144 + switch (action) { 1.145 + default: assert (false); /* Fallthrough */ 1.146 + case NOP: return u; 1.147 + case SD: pua_mappings = SD_mappings; break; 1.148 + case SDL: pua_mappings = SDL_mappings; break; 1.149 + case SL: pua_mappings = SL_mappings; break; 1.150 + case RD: pua_mappings = RD_mappings; break; 1.151 + } 1.152 + for (; pua_mappings->u; pua_mappings++) 1.153 + if (pua_mappings->u == u) 1.154 + { 1.155 + hb_codepoint_t glyph; 1.156 + if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph)) 1.157 + return pua_mappings->win_pua; 1.158 + if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph)) 1.159 + return pua_mappings->mac_pua; 1.160 + break; 1.161 + } 1.162 + return u; 1.163 +} 1.164 + 1.165 + 1.166 +static enum thai_above_state_t 1.167 +{ /* Cluster above looks like: */ 1.168 + T0, /* ⣤ */ 1.169 + T1, /* ⣼ */ 1.170 + T2, /* ⣾ */ 1.171 + T3, /* ⣿ */ 1.172 + NUM_ABOVE_STATES 1.173 +} thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] = 1.174 +{ 1.175 + T0, /* NC */ 1.176 + T1, /* AC */ 1.177 + T0, /* RC */ 1.178 + T0, /* DC */ 1.179 + T3, /* NOT_CONSONANT */ 1.180 +}; 1.181 + 1.182 +static const struct thai_above_state_machine_edge_t { 1.183 + thai_action_t action; 1.184 + thai_above_state_t next_state; 1.185 +} thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] = 1.186 +{ /*AV*/ /*BV*/ /*T*/ 1.187 +/*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}}, 1.188 +/*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}}, 1.189 +/*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}}, 1.190 +/*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}}, 1.191 +}; 1.192 + 1.193 + 1.194 +static enum thai_below_state_t 1.195 +{ 1.196 + B0, /* No descender */ 1.197 + B1, /* Removable descender */ 1.198 + B2, /* Strict descender */ 1.199 + NUM_BELOW_STATES 1.200 +} thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] = 1.201 +{ 1.202 + B0, /* NC */ 1.203 + B0, /* AC */ 1.204 + B1, /* RC */ 1.205 + B2, /* DC */ 1.206 + B2, /* NOT_CONSONANT */ 1.207 +}; 1.208 + 1.209 +static const struct thai_below_state_machine_edge_t { 1.210 + thai_action_t action; 1.211 + thai_below_state_t next_state; 1.212 +} thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] = 1.213 +{ /*AV*/ /*BV*/ /*T*/ 1.214 +/*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}}, 1.215 +/*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}}, 1.216 +/*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}}, 1.217 +}; 1.218 + 1.219 + 1.220 +static void 1.221 +do_thai_pua_shaping (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.222 + hb_buffer_t *buffer, 1.223 + hb_font_t *font) 1.224 +{ 1.225 + thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT]; 1.226 + thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT]; 1.227 + unsigned int base = 0; 1.228 + 1.229 + hb_glyph_info_t *info = buffer->info; 1.230 + unsigned int count = buffer->len; 1.231 + for (unsigned int i = 0; i < count; i++) 1.232 + { 1.233 + thai_mark_type_t mt = get_mark_type (info[i].codepoint); 1.234 + 1.235 + if (mt == NOT_MARK) { 1.236 + thai_consonant_type_t ct = get_consonant_type (info[i].codepoint); 1.237 + above_state = thai_above_start_state[ct]; 1.238 + below_state = thai_below_start_state[ct]; 1.239 + base = i; 1.240 + continue; 1.241 + } 1.242 + 1.243 + const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt]; 1.244 + const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt]; 1.245 + above_state = above_edge.next_state; 1.246 + below_state = below_edge.next_state; 1.247 + 1.248 + /* At least one of the above/below actions is NOP. */ 1.249 + thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action; 1.250 + 1.251 + if (action == RD) 1.252 + info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font); 1.253 + else 1.254 + info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font); 1.255 + } 1.256 +} 1.257 + 1.258 + 1.259 +static void 1.260 +preprocess_text_thai (const hb_ot_shape_plan_t *plan, 1.261 + hb_buffer_t *buffer, 1.262 + hb_font_t *font) 1.263 +{ 1.264 + /* This function implements the shaping logic documented here: 1.265 + * 1.266 + * http://linux.thai.net/~thep/th-otf/shaping.html 1.267 + * 1.268 + * The first shaping rule listed there is needed even if the font has Thai 1.269 + * OpenType tables. The rest do fallback positioning based on PUA codepoints. 1.270 + * We implement that only if there exist no Thai GSUB in the font. 1.271 + */ 1.272 + 1.273 + /* The following is NOT specified in the MS OT Thai spec, however, it seems 1.274 + * to be what Uniscribe and other engines implement. According to Eric Muller: 1.275 + * 1.276 + * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the 1.277 + * NIKHAHIT backwards over any tone mark (0E48-0E4B). 1.278 + * 1.279 + * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> 1.280 + * 1.281 + * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not 1.282 + * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably 1.283 + * not what a user wanted, but the rendering is nevertheless nikhahit above 1.284 + * chattawa. 1.285 + * 1.286 + * Same for Lao. 1.287 + * 1.288 + * Note: 1.289 + * 1.290 + * Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A 1.291 + * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A. 1.292 + * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A 1.293 + * equivalent. 1.294 + */ 1.295 + 1.296 + 1.297 + /* 1.298 + * Here are the characters of significance: 1.299 + * 1.300 + * Thai Lao 1.301 + * SARA AM: U+0E33 U+0EB3 1.302 + * SARA AA: U+0E32 U+0EB2 1.303 + * Nikhahit: U+0E4D U+0ECD 1.304 + * 1.305 + * Testing shows that Uniscribe reorder the following marks: 1.306 + * Thai: <0E31,0E34..0E37,0E47..0E4E> 1.307 + * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE> 1.308 + * 1.309 + * Note how the Lao versions are the same as Thai + 0x80. 1.310 + */ 1.311 + 1.312 + /* We only get one script at a time, so a script-agnostic implementation 1.313 + * is adequate here. */ 1.314 +#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33) 1.315 +#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D) 1.316 +#define SARA_AA_FROM_SARA_AM(x) ((x) - 1) 1.317 +#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080, 0x0E34, 0x0E37, 0x0E47, 0x0E4E, 0x0E31, 0x0E31)) 1.318 + 1.319 + buffer->clear_output (); 1.320 + unsigned int count = buffer->len; 1.321 + for (buffer->idx = 0; buffer->idx < count;) 1.322 + { 1.323 + hb_codepoint_t u = buffer->cur().codepoint; 1.324 + if (likely (!IS_SARA_AM (u))) { 1.325 + buffer->next_glyph (); 1.326 + continue; 1.327 + } 1.328 + 1.329 + /* Is SARA AM. Decompose and reorder. */ 1.330 + hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)), 1.331 + hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))}; 1.332 + buffer->replace_glyphs (1, 2, decomposed); 1.333 + if (unlikely (buffer->in_error)) 1.334 + return; 1.335 + 1.336 + /* Ok, let's see... */ 1.337 + unsigned int end = buffer->out_len; 1.338 + unsigned int start = end - 2; 1.339 + while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint)) 1.340 + start--; 1.341 + 1.342 + if (start + 2 < end) 1.343 + { 1.344 + /* Move Nikhahit (end-2) to the beginning */ 1.345 + buffer->merge_out_clusters (start, end); 1.346 + hb_glyph_info_t t = buffer->out_info[end - 2]; 1.347 + memmove (buffer->out_info + start + 1, 1.348 + buffer->out_info + start, 1.349 + sizeof (buffer->out_info[0]) * (end - start - 2)); 1.350 + buffer->out_info[start] = t; 1.351 + } 1.352 + else 1.353 + { 1.354 + /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the 1.355 + * previous cluster. */ 1.356 + if (start) 1.357 + buffer->merge_out_clusters (start - 1, end); 1.358 + } 1.359 + } 1.360 + buffer->swap_buffers (); 1.361 + 1.362 + /* If font has Thai GSUB, we are done. */ 1.363 + if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0]) 1.364 + do_thai_pua_shaping (plan, buffer, font); 1.365 +} 1.366 + 1.367 +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai = 1.368 +{ 1.369 + "thai", 1.370 + NULL, /* collect_features */ 1.371 + NULL, /* override_features */ 1.372 + NULL, /* data_create */ 1.373 + NULL, /* data_destroy */ 1.374 + preprocess_text_thai, 1.375 + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, 1.376 + NULL, /* decompose */ 1.377 + NULL, /* compose */ 1.378 + NULL, /* setup_masks */ 1.379 + HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, 1.380 + false,/* fallback_position */ 1.381 +};