1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/harfbuzz/src/hb-ot-shape-complex-indic.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1809 @@ 1.4 +/* 1.5 + * Copyright © 2011,2012 Google, Inc. 1.6 + * 1.7 + * This is part of HarfBuzz, a text shaping library. 1.8 + * 1.9 + * Permission is hereby granted, without written agreement and without 1.10 + * license or royalty fees, to use, copy, modify, and distribute this 1.11 + * software and its documentation for any purpose, provided that the 1.12 + * above copyright notice and the following two paragraphs appear in 1.13 + * all copies of this software. 1.14 + * 1.15 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 1.16 + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 1.17 + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 1.18 + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 1.19 + * DAMAGE. 1.20 + * 1.21 + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 1.22 + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.23 + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 1.24 + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 1.25 + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 1.26 + * 1.27 + * Google Author(s): Behdad Esfahbod 1.28 + */ 1.29 + 1.30 +#include "hb-ot-shape-complex-indic-private.hh" 1.31 +#include "hb-ot-layout-private.hh" 1.32 + 1.33 +/* buffer var allocations */ 1.34 +#define indic_category() complex_var_u8_0() /* indic_category_t */ 1.35 +#define indic_position() complex_var_u8_1() /* indic_position_t */ 1.36 + 1.37 + 1.38 +/* 1.39 + * Indic shaper. 1.40 + */ 1.41 + 1.42 + 1.43 +#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7F) == (Base)) 1.44 + 1.45 +#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900)) 1.46 +#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980)) 1.47 +#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00)) 1.48 +#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80)) 1.49 +#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00)) 1.50 +#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80)) 1.51 +#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00)) 1.52 +#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80)) 1.53 +#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00)) 1.54 +#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80)) 1.55 +#define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780)) 1.56 + 1.57 + 1.58 +#define MATRA_POS_LEFT(u) POS_PRE_M 1.59 +#define MATRA_POS_RIGHT(u) ( \ 1.60 + IS_DEVA(u) ? POS_AFTER_SUB : \ 1.61 + IS_BENG(u) ? POS_AFTER_POST : \ 1.62 + IS_GURU(u) ? POS_AFTER_POST : \ 1.63 + IS_GUJR(u) ? POS_AFTER_POST : \ 1.64 + IS_ORYA(u) ? POS_AFTER_POST : \ 1.65 + IS_TAML(u) ? POS_AFTER_POST : \ 1.66 + IS_TELU(u) ? (u <= 0x0C42 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 1.67 + IS_KNDA(u) ? (u < 0x0CC3 || u > 0xCD6 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 1.68 + IS_MLYM(u) ? POS_AFTER_POST : \ 1.69 + IS_SINH(u) ? POS_AFTER_SUB : \ 1.70 + IS_KHMR(u) ? POS_AFTER_POST : \ 1.71 + /*default*/ POS_AFTER_SUB \ 1.72 + ) 1.73 +#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ 1.74 + IS_DEVA(u) ? POS_AFTER_SUB : \ 1.75 + IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ 1.76 + IS_GUJR(u) ? POS_AFTER_SUB : \ 1.77 + IS_ORYA(u) ? POS_AFTER_MAIN : \ 1.78 + IS_TAML(u) ? POS_AFTER_SUB : \ 1.79 + IS_TELU(u) ? POS_BEFORE_SUB : \ 1.80 + IS_KNDA(u) ? POS_BEFORE_SUB : \ 1.81 + IS_SINH(u) ? POS_AFTER_SUB : \ 1.82 + IS_KHMR(u) ? POS_AFTER_POST : \ 1.83 + /*default*/ POS_AFTER_SUB \ 1.84 + ) 1.85 +#define MATRA_POS_BOTTOM(u) ( \ 1.86 + IS_DEVA(u) ? POS_AFTER_SUB : \ 1.87 + IS_BENG(u) ? POS_AFTER_SUB : \ 1.88 + IS_GURU(u) ? POS_AFTER_POST : \ 1.89 + IS_GUJR(u) ? POS_AFTER_POST : \ 1.90 + IS_ORYA(u) ? POS_AFTER_SUB : \ 1.91 + IS_TAML(u) ? POS_AFTER_POST : \ 1.92 + IS_TELU(u) ? POS_BEFORE_SUB : \ 1.93 + IS_KNDA(u) ? POS_BEFORE_SUB : \ 1.94 + IS_MLYM(u) ? POS_AFTER_POST : \ 1.95 + IS_SINH(u) ? POS_AFTER_SUB : \ 1.96 + IS_KHMR(u) ? POS_AFTER_POST : \ 1.97 + /*default*/ POS_AFTER_SUB \ 1.98 + ) 1.99 + 1.100 +static inline indic_position_t 1.101 +matra_position (hb_codepoint_t u, indic_position_t side) 1.102 +{ 1.103 + switch ((int) side) 1.104 + { 1.105 + case POS_PRE_C: return MATRA_POS_LEFT (u); 1.106 + case POS_POST_C: return MATRA_POS_RIGHT (u); 1.107 + case POS_ABOVE_C: return MATRA_POS_TOP (u); 1.108 + case POS_BELOW_C: return MATRA_POS_BOTTOM (u); 1.109 + }; 1.110 + return side; 1.111 +} 1.112 + 1.113 +/* XXX 1.114 + * This is a hack for now. We should move this data into the main Indic table. 1.115 + * Or completely remove it and just check in the tables. 1.116 + */ 1.117 +static const hb_codepoint_t ra_chars[] = { 1.118 + 0x0930, /* Devanagari */ 1.119 + 0x09B0, /* Bengali */ 1.120 + 0x09F0, /* Bengali */ 1.121 + 0x0A30, /* Gurmukhi */ /* No Reph */ 1.122 + 0x0AB0, /* Gujarati */ 1.123 + 0x0B30, /* Oriya */ 1.124 + 0x0BB0, /* Tamil */ /* No Reph */ 1.125 + 0x0C30, /* Telugu */ /* Reph formed only with ZWJ */ 1.126 + 0x0CB0, /* Kannada */ 1.127 + 0x0D30, /* Malayalam */ /* No Reph, Logical Repha */ 1.128 + 1.129 + 0x0DBB, /* Sinhala */ /* Reph formed only with ZWJ */ 1.130 + 1.131 + 0x179A, /* Khmer */ /* No Reph, Visual Repha */ 1.132 +}; 1.133 + 1.134 +static inline bool 1.135 +is_ra (hb_codepoint_t u) 1.136 +{ 1.137 + for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) 1.138 + if (u == ra_chars[i]) 1.139 + return true; 1.140 + return false; 1.141 +} 1.142 + 1.143 +static inline bool 1.144 +is_one_of (const hb_glyph_info_t &info, unsigned int flags) 1.145 +{ 1.146 + /* If it ligated, all bets are off. */ 1.147 + if (_hb_glyph_info_ligated (&info)) return false; 1.148 + return !!(FLAG (info.indic_category()) & flags); 1.149 +} 1.150 + 1.151 +#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) 1.152 +static inline bool 1.153 +is_joiner (const hb_glyph_info_t &info) 1.154 +{ 1.155 + return is_one_of (info, JOINER_FLAGS); 1.156 +} 1.157 + 1.158 +#define MEDIAL_FLAGS (FLAG (OT_CM) | FLAG (OT_CM2)) 1.159 + 1.160 +/* Note: 1.161 + * 1.162 + * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels 1.163 + * cannot happen in a consonant syllable. The plus side however is, we can call the 1.164 + * consonant syllable logic from the vowel syllable function and get it all right! */ 1.165 +#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | MEDIAL_FLAGS | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)) 1.166 +static inline bool 1.167 +is_consonant (const hb_glyph_info_t &info) 1.168 +{ 1.169 + return is_one_of (info, CONSONANT_FLAGS); 1.170 +} 1.171 + 1.172 +#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng)) 1.173 +static inline bool 1.174 +is_halant_or_coeng (const hb_glyph_info_t &info) 1.175 +{ 1.176 + return is_one_of (info, HALANT_OR_COENG_FLAGS); 1.177 +} 1.178 + 1.179 +static inline void 1.180 +set_indic_properties (hb_glyph_info_t &info) 1.181 +{ 1.182 + hb_codepoint_t u = info.codepoint; 1.183 + unsigned int type = hb_indic_get_categories (u); 1.184 + indic_category_t cat = (indic_category_t) (type & 0x7F); 1.185 + indic_position_t pos = (indic_position_t) (type >> 8); 1.186 + 1.187 + 1.188 + /* 1.189 + * Re-assign category 1.190 + */ 1.191 + 1.192 + 1.193 + /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe 1.194 + * treats U+0951..U+0954 all behave similarly. 1.195 + * TESTS: 1.196 + * U+092E,U+0947,U+0952 1.197 + * U+092E,U+0952,U+0947 1.198 + * U+092E,U+0947,U+0951 1.199 + * U+092E,U+0951,U+0947 1.200 + */ 1.201 + if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954))) 1.202 + cat = OT_A; 1.203 + 1.204 + if (unlikely (u == 0x17D1)) 1.205 + cat = OT_X; 1.206 + if (cat == OT_X && 1.207 + unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D3))) /* Khmer Various signs */ 1.208 + { 1.209 + /* These are like Top Matras. */ 1.210 + cat = OT_M; 1.211 + pos = POS_ABOVE_C; 1.212 + } 1.213 + if (u == 0x17C6) /* Khmer Bindu doesn't like to be repositioned. */ 1.214 + cat = OT_N; 1.215 + 1.216 + if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */ 1.217 + else if (unlikely (u == 0x200C)) cat = OT_ZWNJ; 1.218 + else if (unlikely (u == 0x200D)) cat = OT_ZWJ; 1.219 + else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE; 1.220 + else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. Move it to the end. */ 1.221 + else if (unlikely (u == 0xA982)) cat = OT_SM; /* Javanese repha. */ 1.222 + else if (unlikely (u == 0xA9BE)) cat = OT_CM2; /* Javanese medial ya. */ 1.223 + else if (unlikely (u == 0xA9BD)) { cat = OT_M; pos = POS_POST_C; } /* Javanese vocalic r. */ 1.224 + 1.225 + if (cat == OT_Repha) { 1.226 + /* There are two kinds of characters marked as Repha: 1.227 + * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer) 1.228 + * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam) 1.229 + * 1.230 + * We recategorize the first kind to look like a Nukta and attached to the base directly. 1.231 + */ 1.232 + if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) 1.233 + cat = OT_N; 1.234 + } 1.235 + 1.236 + 1.237 + 1.238 + /* 1.239 + * Re-assign position. 1.240 + */ 1.241 + 1.242 + if ((FLAG (cat) & CONSONANT_FLAGS)) 1.243 + { 1.244 + pos = POS_BASE_C; 1.245 + if (is_ra (u)) 1.246 + cat = OT_Ra; 1.247 + } 1.248 + else if (cat == OT_M) 1.249 + { 1.250 + pos = matra_position (u, pos); 1.251 + } 1.252 + else if ((FLAG (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Avag)))) 1.253 + { 1.254 + pos = POS_SMVD; 1.255 + } 1.256 + 1.257 + if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ 1.258 + 1.259 + 1.260 + 1.261 + info.indic_category() = cat; 1.262 + info.indic_position() = pos; 1.263 +} 1.264 + 1.265 +/* 1.266 + * Things above this line should ideally be moved to the Indic table itself. 1.267 + */ 1.268 + 1.269 + 1.270 +/* 1.271 + * Indic configurations. Note that we do not want to keep every single script-specific 1.272 + * behavior in these tables necessarily. This should mainly be used for per-script 1.273 + * properties that are cheaper keeping here, than in the code. Ie. if, say, one and 1.274 + * only one script has an exception, that one script can be if'ed directly in the code, 1.275 + * instead of adding a new flag in these structs. 1.276 + */ 1.277 + 1.278 +enum base_position_t { 1.279 + BASE_POS_FIRST, 1.280 + BASE_POS_LAST_SINHALA, 1.281 + BASE_POS_LAST 1.282 +}; 1.283 +enum reph_position_t { 1.284 + REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, 1.285 + REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, 1.286 + REPH_POS_AFTER_SUB = POS_AFTER_SUB, 1.287 + REPH_POS_BEFORE_POST = POS_BEFORE_POST, 1.288 + REPH_POS_AFTER_POST = POS_AFTER_POST, 1.289 + REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH 1.290 +}; 1.291 +enum reph_mode_t { 1.292 + REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ 1.293 + REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ 1.294 + REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */ 1.295 + REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ 1.296 +}; 1.297 +enum blwf_mode_t { 1.298 + BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */ 1.299 + BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */ 1.300 +}; 1.301 +enum pref_len_t { 1.302 + PREF_LEN_1 = 1, 1.303 + PREF_LEN_2 = 2, 1.304 + PREF_LEN_DONT_CARE = PREF_LEN_2 1.305 +}; 1.306 +struct indic_config_t 1.307 +{ 1.308 + hb_script_t script; 1.309 + bool has_old_spec; 1.310 + hb_codepoint_t virama; 1.311 + base_position_t base_pos; 1.312 + reph_position_t reph_pos; 1.313 + reph_mode_t reph_mode; 1.314 + blwf_mode_t blwf_mode; 1.315 + pref_len_t pref_len; 1.316 +}; 1.317 + 1.318 +static const indic_config_t indic_configs[] = 1.319 +{ 1.320 + /* Default. Should be first. */ 1.321 + {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_1}, 1.322 + {HB_SCRIPT_DEVANAGARI,true, 0x094D,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.323 + {HB_SCRIPT_BENGALI, true, 0x09CD,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.324 + {HB_SCRIPT_GURMUKHI, true, 0x0A4D,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.325 + {HB_SCRIPT_GUJARATI, true, 0x0ACD,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.326 + {HB_SCRIPT_ORIYA, true, 0x0B4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.327 + {HB_SCRIPT_TAMIL, true, 0x0BCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_2}, 1.328 + {HB_SCRIPT_TELUGU, true, 0x0C4D,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY, PREF_LEN_2}, 1.329 + {HB_SCRIPT_KANNADA, true, 0x0CCD,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY, PREF_LEN_2}, 1.330 + {HB_SCRIPT_MALAYALAM, true, 0x0D4D,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_2}, 1.331 + {HB_SCRIPT_SINHALA, false,0x0DCA,BASE_POS_LAST_SINHALA, 1.332 + REPH_POS_AFTER_MAIN, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST, PREF_LEN_DONT_CARE}, 1.333 + {HB_SCRIPT_KHMER, false,0x17D2,BASE_POS_FIRST,REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_2}, 1.334 + {HB_SCRIPT_JAVANESE, false,0xA9C0,BASE_POS_FIRST,REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST, PREF_LEN_1}, 1.335 +}; 1.336 + 1.337 + 1.338 + 1.339 +/* 1.340 + * Indic shaper. 1.341 + */ 1.342 + 1.343 +struct feature_list_t { 1.344 + hb_tag_t tag; 1.345 + hb_ot_map_feature_flags_t flags; 1.346 +}; 1.347 + 1.348 +static const feature_list_t 1.349 +indic_features[] = 1.350 +{ 1.351 + /* 1.352 + * Basic features. 1.353 + * These features are applied in order, one at a time, after initial_reordering. 1.354 + */ 1.355 + {HB_TAG('n','u','k','t'), F_GLOBAL}, 1.356 + {HB_TAG('a','k','h','n'), F_GLOBAL}, 1.357 + {HB_TAG('r','p','h','f'), F_NONE}, 1.358 + {HB_TAG('r','k','r','f'), F_GLOBAL}, 1.359 + {HB_TAG('p','r','e','f'), F_NONE}, 1.360 + {HB_TAG('b','l','w','f'), F_NONE}, 1.361 + {HB_TAG('a','b','v','f'), F_NONE}, 1.362 + {HB_TAG('h','a','l','f'), F_NONE}, 1.363 + {HB_TAG('p','s','t','f'), F_NONE}, 1.364 + {HB_TAG('v','a','t','u'), F_GLOBAL}, 1.365 + {HB_TAG('c','j','c','t'), F_GLOBAL}, 1.366 + {HB_TAG('c','f','a','r'), F_NONE}, 1.367 + /* 1.368 + * Other features. 1.369 + * These features are applied all at once, after final_reordering. 1.370 + * Default Bengali font in Windows for example has intermixed 1.371 + * lookups for init,pres,abvs,blws features. 1.372 + */ 1.373 + {HB_TAG('i','n','i','t'), F_NONE}, 1.374 + {HB_TAG('p','r','e','s'), F_GLOBAL}, 1.375 + {HB_TAG('a','b','v','s'), F_GLOBAL}, 1.376 + {HB_TAG('b','l','w','s'), F_GLOBAL}, 1.377 + {HB_TAG('p','s','t','s'), F_GLOBAL}, 1.378 + {HB_TAG('h','a','l','n'), F_GLOBAL}, 1.379 + /* Positioning features, though we don't care about the types. */ 1.380 + {HB_TAG('d','i','s','t'), F_GLOBAL}, 1.381 + {HB_TAG('a','b','v','m'), F_GLOBAL}, 1.382 + {HB_TAG('b','l','w','m'), F_GLOBAL}, 1.383 +}; 1.384 + 1.385 +/* 1.386 + * Must be in the same order as the indic_features array. 1.387 + */ 1.388 +enum { 1.389 + _NUKT, 1.390 + _AKHN, 1.391 + RPHF, 1.392 + _RKRF, 1.393 + PREF, 1.394 + BLWF, 1.395 + ABVF, 1.396 + HALF, 1.397 + PSTF, 1.398 + _VATU, 1.399 + _CJCT, 1.400 + CFAR, 1.401 + 1.402 + INIT, 1.403 + _PRES, 1.404 + _ABVS, 1.405 + _BLWS, 1.406 + _PSTS, 1.407 + _HALN, 1.408 + _DIST, 1.409 + _ABVM, 1.410 + _BLWM, 1.411 + 1.412 + INDIC_NUM_FEATURES, 1.413 + INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */ 1.414 +}; 1.415 + 1.416 +static void 1.417 +setup_syllables (const hb_ot_shape_plan_t *plan, 1.418 + hb_font_t *font, 1.419 + hb_buffer_t *buffer); 1.420 +static void 1.421 +initial_reordering (const hb_ot_shape_plan_t *plan, 1.422 + hb_font_t *font, 1.423 + hb_buffer_t *buffer); 1.424 +static void 1.425 +final_reordering (const hb_ot_shape_plan_t *plan, 1.426 + hb_font_t *font, 1.427 + hb_buffer_t *buffer); 1.428 +static void 1.429 +clear_syllables (const hb_ot_shape_plan_t *plan, 1.430 + hb_font_t *font, 1.431 + hb_buffer_t *buffer); 1.432 + 1.433 +static void 1.434 +collect_features_indic (hb_ot_shape_planner_t *plan) 1.435 +{ 1.436 + hb_ot_map_builder_t *map = &plan->map; 1.437 + 1.438 + /* Do this before any lookups have been applied. */ 1.439 + map->add_gsub_pause (setup_syllables); 1.440 + 1.441 + map->add_global_bool_feature (HB_TAG('l','o','c','l')); 1.442 + /* The Indic specs do not require ccmp, but we apply it here since if 1.443 + * there is a use of it, it's typically at the beginning. */ 1.444 + map->add_global_bool_feature (HB_TAG('c','c','m','p')); 1.445 + 1.446 + 1.447 + unsigned int i = 0; 1.448 + map->add_gsub_pause (initial_reordering); 1.449 + for (; i < INDIC_BASIC_FEATURES; i++) { 1.450 + map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ); 1.451 + map->add_gsub_pause (NULL); 1.452 + } 1.453 + map->add_gsub_pause (final_reordering); 1.454 + for (; i < INDIC_NUM_FEATURES; i++) { 1.455 + map->add_feature (indic_features[i].tag, 1, indic_features[i].flags | F_MANUAL_ZWJ); 1.456 + } 1.457 + 1.458 + map->add_global_bool_feature (HB_TAG('c','a','l','t')); 1.459 + map->add_global_bool_feature (HB_TAG('c','l','i','g')); 1.460 + 1.461 + map->add_gsub_pause (clear_syllables); 1.462 +} 1.463 + 1.464 +static void 1.465 +override_features_indic (hb_ot_shape_planner_t *plan) 1.466 +{ 1.467 + /* Uniscribe does not apply 'kern' in Khmer. */ 1.468 + if (hb_options ().uniscribe_bug_compatible) 1.469 + { 1.470 + switch ((hb_tag_t) plan->props.script) 1.471 + { 1.472 + case HB_SCRIPT_KHMER: 1.473 + plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL); 1.474 + break; 1.475 + } 1.476 + } 1.477 + 1.478 + plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL); 1.479 +} 1.480 + 1.481 + 1.482 +struct would_substitute_feature_t 1.483 +{ 1.484 + inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) 1.485 + { 1.486 + zero_context = zero_context_; 1.487 + map->get_stage_lookups (0/*GSUB*/, 1.488 + map->get_feature_stage (0/*GSUB*/, feature_tag), 1.489 + &lookups, &count); 1.490 + } 1.491 + 1.492 + inline bool would_substitute (const hb_codepoint_t *glyphs, 1.493 + unsigned int glyphs_count, 1.494 + hb_face_t *face) const 1.495 + { 1.496 + for (unsigned int i = 0; i < count; i++) 1.497 + if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context)) 1.498 + return true; 1.499 + return false; 1.500 + } 1.501 + 1.502 + private: 1.503 + const hb_ot_map_t::lookup_map_t *lookups; 1.504 + unsigned int count; 1.505 + bool zero_context; 1.506 +}; 1.507 + 1.508 +struct indic_shape_plan_t 1.509 +{ 1.510 + ASSERT_POD (); 1.511 + 1.512 + inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const 1.513 + { 1.514 + hb_codepoint_t glyph = virama_glyph; 1.515 + if (unlikely (virama_glyph == (hb_codepoint_t) -1)) 1.516 + { 1.517 + if (!config->virama || !font->get_glyph (config->virama, 0, &glyph)) 1.518 + glyph = 0; 1.519 + /* Technically speaking, the spec says we should apply 'locl' to virama too. 1.520 + * Maybe one day... */ 1.521 + 1.522 + /* Our get_glyph() function needs a font, so we can't get the virama glyph 1.523 + * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */ 1.524 + (const_cast<indic_shape_plan_t *> (this))->virama_glyph = glyph; 1.525 + } 1.526 + 1.527 + *pglyph = glyph; 1.528 + return glyph != 0; 1.529 + } 1.530 + 1.531 + const indic_config_t *config; 1.532 + 1.533 + bool is_old_spec; 1.534 + hb_codepoint_t virama_glyph; 1.535 + 1.536 + would_substitute_feature_t rphf; 1.537 + would_substitute_feature_t pref; 1.538 + would_substitute_feature_t blwf; 1.539 + would_substitute_feature_t pstf; 1.540 + 1.541 + hb_mask_t mask_array[INDIC_NUM_FEATURES]; 1.542 +}; 1.543 + 1.544 +static void * 1.545 +data_create_indic (const hb_ot_shape_plan_t *plan) 1.546 +{ 1.547 + indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (indic_shape_plan_t)); 1.548 + if (unlikely (!indic_plan)) 1.549 + return NULL; 1.550 + 1.551 + indic_plan->config = &indic_configs[0]; 1.552 + for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) 1.553 + if (plan->props.script == indic_configs[i].script) { 1.554 + indic_plan->config = &indic_configs[i]; 1.555 + break; 1.556 + } 1.557 + 1.558 + indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FF) != '2'); 1.559 + indic_plan->virama_glyph = (hb_codepoint_t) -1; 1.560 + 1.561 + /* Use zero-context would_substitute() matching for new-spec of the main 1.562 + * Indic scripts, but not for old-spec or scripts with one spec only. */ 1.563 + bool zero_context = indic_plan->config->has_old_spec || !indic_plan->is_old_spec; 1.564 + indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); 1.565 + indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); 1.566 + indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); 1.567 + indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); 1.568 + 1.569 + for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) 1.570 + indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? 1.571 + 0 : plan->map.get_1_mask (indic_features[i].tag); 1.572 + 1.573 + return indic_plan; 1.574 +} 1.575 + 1.576 +static void 1.577 +data_destroy_indic (void *data) 1.578 +{ 1.579 + free (data); 1.580 +} 1.581 + 1.582 +static indic_position_t 1.583 +consonant_position_from_face (const indic_shape_plan_t *indic_plan, 1.584 + const hb_codepoint_t consonant, 1.585 + const hb_codepoint_t virama, 1.586 + hb_face_t *face) 1.587 +{ 1.588 + /* For old-spec, the order of glyphs is Consonant,Virama, 1.589 + * whereas for new-spec, it's Virama,Consonant. However, 1.590 + * some broken fonts (like Free Sans) simply copied lookups 1.591 + * from old-spec to new-spec without modification. 1.592 + * And oddly enough, Uniscribe seems to respect those lookups. 1.593 + * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds 1.594 + * base at 0. The font however, only has lookups matching 1.595 + * 930,94D in 'blwf', not the expected 94D,930 (with new-spec 1.596 + * table). As such, we simply match both sequences. Seems 1.597 + * to work. */ 1.598 + hb_codepoint_t glyphs[3] = {virama, consonant, virama}; 1.599 + if (indic_plan->blwf.would_substitute (glyphs , 2, face) || 1.600 + indic_plan->blwf.would_substitute (glyphs+1, 2, face)) 1.601 + return POS_BELOW_C; 1.602 + if (indic_plan->pstf.would_substitute (glyphs , 2, face) || 1.603 + indic_plan->pstf.would_substitute (glyphs+1, 2, face)) 1.604 + return POS_POST_C; 1.605 + unsigned int pref_len = indic_plan->config->pref_len; 1.606 + if ((pref_len == PREF_LEN_2 && 1.607 + (indic_plan->pref.would_substitute (glyphs , 2, face) || 1.608 + indic_plan->pref.would_substitute (glyphs+1, 2, face))) 1.609 + || (pref_len == PREF_LEN_1 && 1.610 + indic_plan->pref.would_substitute (glyphs+1, 1, face))) 1.611 + return POS_POST_C; 1.612 + return POS_BASE_C; 1.613 +} 1.614 + 1.615 + 1.616 +enum syllable_type_t { 1.617 + consonant_syllable, 1.618 + vowel_syllable, 1.619 + standalone_cluster, 1.620 + avagraha_cluster, 1.621 + broken_cluster, 1.622 + non_indic_cluster, 1.623 +}; 1.624 + 1.625 +#include "hb-ot-shape-complex-indic-machine.hh" 1.626 + 1.627 + 1.628 +static void 1.629 +setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.630 + hb_buffer_t *buffer, 1.631 + hb_font_t *font HB_UNUSED) 1.632 +{ 1.633 + HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); 1.634 + HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); 1.635 + 1.636 + /* We cannot setup masks here. We save information about characters 1.637 + * and setup masks later on in a pause-callback. */ 1.638 + 1.639 + unsigned int count = buffer->len; 1.640 + for (unsigned int i = 0; i < count; i++) 1.641 + set_indic_properties (buffer->info[i]); 1.642 +} 1.643 + 1.644 +static void 1.645 +setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.646 + hb_font_t *font HB_UNUSED, 1.647 + hb_buffer_t *buffer) 1.648 +{ 1.649 + find_syllables (buffer); 1.650 +} 1.651 + 1.652 +static int 1.653 +compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) 1.654 +{ 1.655 + int a = pa->indic_position(); 1.656 + int b = pb->indic_position(); 1.657 + 1.658 + return a < b ? -1 : a == b ? 0 : +1; 1.659 +} 1.660 + 1.661 + 1.662 + 1.663 +static void 1.664 +update_consonant_positions (const hb_ot_shape_plan_t *plan, 1.665 + hb_font_t *font, 1.666 + hb_buffer_t *buffer) 1.667 +{ 1.668 + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 1.669 + 1.670 + if (indic_plan->config->base_pos != BASE_POS_LAST) 1.671 + return; 1.672 + 1.673 + hb_codepoint_t virama; 1.674 + if (indic_plan->get_virama_glyph (font, &virama)) 1.675 + { 1.676 + hb_face_t *face = font->face; 1.677 + unsigned int count = buffer->len; 1.678 + for (unsigned int i = 0; i < count; i++) 1.679 + if (buffer->info[i].indic_position() == POS_BASE_C) { 1.680 + hb_codepoint_t consonant = buffer->info[i].codepoint; 1.681 + buffer->info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face); 1.682 + } 1.683 + } 1.684 +} 1.685 + 1.686 + 1.687 +/* Rules from: 1.688 + * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 1.689 + 1.690 +static void 1.691 +initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, 1.692 + hb_face_t *face, 1.693 + hb_buffer_t *buffer, 1.694 + unsigned int start, unsigned int end) 1.695 +{ 1.696 + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 1.697 + hb_glyph_info_t *info = buffer->info; 1.698 + 1.699 + 1.700 + /* 1. Find base consonant: 1.701 + * 1.702 + * The shaping engine finds the base consonant of the syllable, using the 1.703 + * following algorithm: starting from the end of the syllable, move backwards 1.704 + * until a consonant is found that does not have a below-base or post-base 1.705 + * form (post-base forms have to follow below-base forms), or that is not a 1.706 + * pre-base reordering Ra, or arrive at the first consonant. The consonant 1.707 + * stopped at will be the base. 1.708 + * 1.709 + * o If the syllable starts with Ra + Halant (in a script that has Reph) 1.710 + * and has more than one consonant, Ra is excluded from candidates for 1.711 + * base consonants. 1.712 + */ 1.713 + 1.714 + unsigned int base = end; 1.715 + bool has_reph = false; 1.716 + 1.717 + { 1.718 + /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 1.719 + * and has more than one consonant, Ra is excluded from candidates for 1.720 + * base consonants. */ 1.721 + unsigned int limit = start; 1.722 + if (indic_plan->config->reph_pos != REPH_POS_DONT_CARE && 1.723 + indic_plan->mask_array[RPHF] && 1.724 + start + 3 <= end && 1.725 + ( 1.726 + (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) || 1.727 + (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ) 1.728 + )) 1.729 + { 1.730 + /* See if it matches the 'rphf' feature. */ 1.731 + hb_codepoint_t glyphs[2] = {info[start].codepoint, info[start + 1].codepoint}; 1.732 + if (indic_plan->rphf.would_substitute (glyphs, ARRAY_LENGTH (glyphs), face)) 1.733 + { 1.734 + limit += 2; 1.735 + while (limit < end && is_joiner (info[limit])) 1.736 + limit++; 1.737 + base = start; 1.738 + has_reph = true; 1.739 + } 1.740 + } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha) 1.741 + { 1.742 + limit += 1; 1.743 + while (limit < end && is_joiner (info[limit])) 1.744 + limit++; 1.745 + base = start; 1.746 + has_reph = true; 1.747 + } 1.748 + 1.749 + switch (indic_plan->config->base_pos) 1.750 + { 1.751 + default: 1.752 + assert (false); 1.753 + /* fallthrough */ 1.754 + 1.755 + case BASE_POS_LAST: 1.756 + { 1.757 + /* -> starting from the end of the syllable, move backwards */ 1.758 + unsigned int i = end; 1.759 + bool seen_below = false; 1.760 + do { 1.761 + i--; 1.762 + /* -> until a consonant is found */ 1.763 + if (is_consonant (info[i])) 1.764 + { 1.765 + /* -> that does not have a below-base or post-base form 1.766 + * (post-base forms have to follow below-base forms), */ 1.767 + if (info[i].indic_position() != POS_BELOW_C && 1.768 + (info[i].indic_position() != POS_POST_C || seen_below)) 1.769 + { 1.770 + base = i; 1.771 + break; 1.772 + } 1.773 + if (info[i].indic_position() == POS_BELOW_C) 1.774 + seen_below = true; 1.775 + 1.776 + /* -> or that is not a pre-base reordering Ra, 1.777 + * 1.778 + * IMPLEMENTATION NOTES: 1.779 + * 1.780 + * Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped 1.781 + * by the logic above already. 1.782 + */ 1.783 + 1.784 + /* -> or arrive at the first consonant. The consonant stopped at will 1.785 + * be the base. */ 1.786 + base = i; 1.787 + } 1.788 + else 1.789 + { 1.790 + /* A ZWJ after a Halant stops the base search, and requests an explicit 1.791 + * half form. 1.792 + * A ZWJ before a Halant, requests a subjoined form instead, and hence 1.793 + * search continues. This is particularly important for Bengali 1.794 + * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ 1.795 + if (start < i && 1.796 + info[i].indic_category() == OT_ZWJ && 1.797 + info[i - 1].indic_category() == OT_H) 1.798 + break; 1.799 + } 1.800 + } while (i > limit); 1.801 + } 1.802 + break; 1.803 + 1.804 + case BASE_POS_LAST_SINHALA: 1.805 + { 1.806 + /* Sinhala base positioning is slightly different from main Indic, in that: 1.807 + * 1. It's ZWJ behavior is different, 1.808 + * 2. We don't need to look into the font for consonant positions. 1.809 + */ 1.810 + 1.811 + if (!has_reph) 1.812 + base = limit; 1.813 + 1.814 + /* Find the last base consonant that is not blocked by ZWJ. If there is 1.815 + * a ZWJ right before a base consonant, that would request a subjoined form. */ 1.816 + for (unsigned int i = limit; i < end; i++) 1.817 + if (is_consonant (info[i])) 1.818 + { 1.819 + if (limit < i && info[i - 1].indic_category() == OT_ZWJ) 1.820 + break; 1.821 + else 1.822 + base = i; 1.823 + } 1.824 + 1.825 + /* Mark all subsequent consonants as below. */ 1.826 + for (unsigned int i = base + 1; i < end; i++) 1.827 + if (is_consonant (info[i])) 1.828 + info[i].indic_position() = POS_BELOW_C; 1.829 + } 1.830 + break; 1.831 + 1.832 + case BASE_POS_FIRST: 1.833 + { 1.834 + /* The first consonant is always the base. */ 1.835 + 1.836 + assert (indic_plan->config->reph_mode == REPH_MODE_VIS_REPHA); 1.837 + assert (!has_reph); 1.838 + 1.839 + base = start; 1.840 + 1.841 + /* Mark all subsequent consonants as below. */ 1.842 + for (unsigned int i = base + 1; i < end; i++) 1.843 + if (is_consonant (info[i])) 1.844 + info[i].indic_position() = POS_BELOW_C; 1.845 + } 1.846 + break; 1.847 + } 1.848 + 1.849 + /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 1.850 + * and has more than one consonant, Ra is excluded from candidates for 1.851 + * base consonants. 1.852 + * 1.853 + * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ 1.854 + if (has_reph && base == start && limit - base <= 2) { 1.855 + /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 1.856 + has_reph = false; 1.857 + } 1.858 + } 1.859 + 1.860 + 1.861 + /* 2. Decompose and reorder Matras: 1.862 + * 1.863 + * Each matra and any syllable modifier sign in the cluster are moved to the 1.864 + * appropriate position relative to the consonant(s) in the cluster. The 1.865 + * shaping engine decomposes two- or three-part matras into their constituent 1.866 + * parts before any repositioning. Matra characters are classified by which 1.867 + * consonant in a conjunct they have affinity for and are reordered to the 1.868 + * following positions: 1.869 + * 1.870 + * o Before first half form in the syllable 1.871 + * o After subjoined consonants 1.872 + * o After post-form consonant 1.873 + * o After main consonant (for above marks) 1.874 + * 1.875 + * IMPLEMENTATION NOTES: 1.876 + * 1.877 + * The normalize() routine has already decomposed matras for us, so we don't 1.878 + * need to worry about that. 1.879 + */ 1.880 + 1.881 + 1.882 + /* 3. Reorder marks to canonical order: 1.883 + * 1.884 + * Adjacent nukta and halant or nukta and vedic sign are always repositioned 1.885 + * if necessary, so that the nukta is first. 1.886 + * 1.887 + * IMPLEMENTATION NOTES: 1.888 + * 1.889 + * We don't need to do this: the normalize() routine already did this for us. 1.890 + */ 1.891 + 1.892 + 1.893 + /* Reorder characters */ 1.894 + 1.895 + for (unsigned int i = start; i < base; i++) 1.896 + info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_position()); 1.897 + 1.898 + if (base < end) 1.899 + info[base].indic_position() = POS_BASE_C; 1.900 + 1.901 + /* Mark final consonants. A final consonant is one appearing after a matra, 1.902 + * like in Khmer. */ 1.903 + for (unsigned int i = base + 1; i < end; i++) 1.904 + if (info[i].indic_category() == OT_M) { 1.905 + for (unsigned int j = i + 1; j < end; j++) 1.906 + if (is_consonant (info[j])) { 1.907 + info[j].indic_position() = POS_FINAL_C; 1.908 + break; 1.909 + } 1.910 + break; 1.911 + } 1.912 + 1.913 + /* Handle beginning Ra */ 1.914 + if (has_reph) 1.915 + info[start].indic_position() = POS_RA_TO_BECOME_REPH; 1.916 + 1.917 + /* For old-style Indic script tags, move the first post-base Halant after 1.918 + * last consonant. Only do this if there is *not* a Halant after last 1.919 + * consonant. Otherwise it becomes messy. */ 1.920 + if (indic_plan->is_old_spec) { 1.921 + for (unsigned int i = base + 1; i < end; i++) 1.922 + if (info[i].indic_category() == OT_H) { 1.923 + unsigned int j; 1.924 + for (j = end - 1; j > i; j--) 1.925 + if (is_consonant (info[j]) || info[j].indic_category() == OT_H) 1.926 + break; 1.927 + if (info[j].indic_category() != OT_H && j > i) { 1.928 + /* Move Halant to after last consonant. */ 1.929 + hb_glyph_info_t t = info[i]; 1.930 + memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); 1.931 + info[j] = t; 1.932 + } 1.933 + break; 1.934 + } 1.935 + } 1.936 + 1.937 + /* Attach misc marks to previous char to move with them. */ 1.938 + { 1.939 + indic_position_t last_pos = POS_START; 1.940 + for (unsigned int i = start; i < end; i++) 1.941 + { 1.942 + if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS))) 1.943 + { 1.944 + info[i].indic_position() = last_pos; 1.945 + if (unlikely (info[i].indic_category() == OT_H && 1.946 + info[i].indic_position() == POS_PRE_M)) 1.947 + { 1.948 + /* 1.949 + * Uniscribe doesn't move the Halant with Left Matra. 1.950 + * TEST: U+092B,U+093F,U+094DE 1.951 + * We follow. This is important for the Sinhala 1.952 + * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA 1.953 + * where U+0DD9 is a left matra and U+0DCA is the virama. 1.954 + * We don't want to move the virama with the left matra. 1.955 + * TEST: U+0D9A,U+0DDA 1.956 + */ 1.957 + for (unsigned int j = i; j > start; j--) 1.958 + if (info[j - 1].indic_position() != POS_PRE_M) { 1.959 + info[i].indic_position() = info[j - 1].indic_position(); 1.960 + break; 1.961 + } 1.962 + } 1.963 + } else if (info[i].indic_position() != POS_SMVD) { 1.964 + last_pos = (indic_position_t) info[i].indic_position(); 1.965 + } 1.966 + } 1.967 + } 1.968 + /* For post-base consonants let them own anything before them 1.969 + * since the last consonant or matra. */ 1.970 + { 1.971 + unsigned int last = base; 1.972 + for (unsigned int i = base + 1; i < end; i++) 1.973 + if (is_consonant (info[i])) 1.974 + { 1.975 + for (unsigned int j = last + 1; j < i; j++) 1.976 + if (info[j].indic_position() < POS_SMVD) 1.977 + info[j].indic_position() = info[i].indic_position(); 1.978 + last = i; 1.979 + } else if (info[i].indic_category() == OT_M) 1.980 + last = i; 1.981 + } 1.982 + 1.983 + 1.984 + { 1.985 + /* Use syllable() for sort accounting temporarily. */ 1.986 + unsigned int syllable = info[start].syllable(); 1.987 + for (unsigned int i = start; i < end; i++) 1.988 + info[i].syllable() = i - start; 1.989 + 1.990 + /* Sit tight, rock 'n roll! */ 1.991 + hb_bubble_sort (info + start, end - start, compare_indic_order); 1.992 + /* Find base again */ 1.993 + base = end; 1.994 + for (unsigned int i = start; i < end; i++) 1.995 + if (info[i].indic_position() == POS_BASE_C) 1.996 + { 1.997 + base = i; 1.998 + break; 1.999 + } 1.1000 + /* Things are out-of-control for post base positions, they may shuffle 1.1001 + * around like crazy. In old-spec mode, we move halants around, so in 1.1002 + * that case merge all clusters after base. Otherwise, check the sort 1.1003 + * order and merge as needed. 1.1004 + * For pre-base stuff, we handle cluster issues in final reordering. */ 1.1005 + if (indic_plan->is_old_spec || end - base > 127) 1.1006 + buffer->merge_clusters (base, end); 1.1007 + else 1.1008 + { 1.1009 + /* Note! syllable() is a one-byte field. */ 1.1010 + for (unsigned int i = base; i < end; i++) 1.1011 + if (info[i].syllable() != 255) 1.1012 + { 1.1013 + unsigned int max = i; 1.1014 + unsigned int j = start + info[i].syllable(); 1.1015 + while (j != i) 1.1016 + { 1.1017 + max = MAX (max, j); 1.1018 + unsigned int next = start + info[j].syllable(); 1.1019 + info[j].syllable() = 255; /* So we don't process j later again. */ 1.1020 + j = next; 1.1021 + } 1.1022 + if (i != max) 1.1023 + buffer->merge_clusters (i, max + 1); 1.1024 + } 1.1025 + } 1.1026 + 1.1027 + /* Put syllable back in. */ 1.1028 + for (unsigned int i = start; i < end; i++) 1.1029 + info[i].syllable() = syllable; 1.1030 + } 1.1031 + 1.1032 + /* Setup masks now */ 1.1033 + 1.1034 + { 1.1035 + hb_mask_t mask; 1.1036 + 1.1037 + /* Reph */ 1.1038 + for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) 1.1039 + info[i].mask |= indic_plan->mask_array[RPHF]; 1.1040 + 1.1041 + /* Pre-base */ 1.1042 + mask = indic_plan->mask_array[HALF]; 1.1043 + if (!indic_plan->is_old_spec && 1.1044 + indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST) 1.1045 + mask |= indic_plan->mask_array[BLWF]; 1.1046 + for (unsigned int i = start; i < base; i++) 1.1047 + info[i].mask |= mask; 1.1048 + /* Base */ 1.1049 + mask = 0; 1.1050 + if (base < end) 1.1051 + info[base].mask |= mask; 1.1052 + /* Post-base */ 1.1053 + mask = indic_plan->mask_array[BLWF] | indic_plan->mask_array[ABVF] | indic_plan->mask_array[PSTF]; 1.1054 + for (unsigned int i = base + 1; i < end; i++) 1.1055 + info[i].mask |= mask; 1.1056 + } 1.1057 + 1.1058 + if (indic_plan->is_old_spec && 1.1059 + buffer->props.script == HB_SCRIPT_DEVANAGARI) 1.1060 + { 1.1061 + /* Old-spec eye-lash Ra needs special handling. From the 1.1062 + * spec: 1.1063 + * 1.1064 + * "The feature 'below-base form' is applied to consonants 1.1065 + * having below-base forms and following the base consonant. 1.1066 + * The exception is vattu, which may appear below half forms 1.1067 + * as well as below the base glyph. The feature 'below-base 1.1068 + * form' will be applied to all such occurrences of Ra as well." 1.1069 + * 1.1070 + * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 1.1071 + * with Sanskrit 2003 font. 1.1072 + * 1.1073 + * However, note that Ra,Halant,ZWJ is the correct way to 1.1074 + * request eyelash form of Ra, so we wouldbn't inhibit it 1.1075 + * in that sequence. 1.1076 + * 1.1077 + * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 1.1078 + */ 1.1079 + for (unsigned int i = start; i + 1 < base; i++) 1.1080 + if (info[i ].indic_category() == OT_Ra && 1.1081 + info[i+1].indic_category() == OT_H && 1.1082 + (i + 2 == base || 1.1083 + info[i+2].indic_category() != OT_ZWJ)) 1.1084 + { 1.1085 + info[i ].mask |= indic_plan->mask_array[BLWF]; 1.1086 + info[i+1].mask |= indic_plan->mask_array[BLWF]; 1.1087 + } 1.1088 + } 1.1089 + 1.1090 + unsigned int pref_len = indic_plan->config->pref_len; 1.1091 + if (indic_plan->mask_array[PREF] && base + pref_len < end) 1.1092 + { 1.1093 + assert (1 <= pref_len && pref_len <= 2); 1.1094 + /* Find a Halant,Ra sequence and mark it for pre-base reordering processing. */ 1.1095 + for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) { 1.1096 + hb_codepoint_t glyphs[2]; 1.1097 + for (unsigned int j = 0; j < pref_len; j++) 1.1098 + glyphs[j] = info[i + j].codepoint; 1.1099 + if (indic_plan->pref.would_substitute (glyphs, pref_len, face)) 1.1100 + { 1.1101 + for (unsigned int j = 0; j < pref_len; j++) 1.1102 + info[i++].mask |= indic_plan->mask_array[PREF]; 1.1103 + 1.1104 + /* Mark the subsequent stuff with 'cfar'. Used in Khmer. 1.1105 + * Read the feature spec. 1.1106 + * This allows distinguishing the following cases with MS Khmer fonts: 1.1107 + * U+1784,U+17D2,U+179A,U+17D2,U+1782 1.1108 + * U+1784,U+17D2,U+1782,U+17D2,U+179A 1.1109 + */ 1.1110 + if (indic_plan->mask_array[CFAR]) 1.1111 + for (; i < end; i++) 1.1112 + info[i].mask |= indic_plan->mask_array[CFAR]; 1.1113 + 1.1114 + break; 1.1115 + } 1.1116 + } 1.1117 + } 1.1118 + 1.1119 + /* Apply ZWJ/ZWNJ effects */ 1.1120 + for (unsigned int i = start + 1; i < end; i++) 1.1121 + if (is_joiner (info[i])) { 1.1122 + bool non_joiner = info[i].indic_category() == OT_ZWNJ; 1.1123 + unsigned int j = i; 1.1124 + 1.1125 + do { 1.1126 + j--; 1.1127 + 1.1128 + /* ZWJ/ZWNJ should disable CJCT. They do that by simply 1.1129 + * being there, since we don't skip them for the CJCT 1.1130 + * feature (ie. F_MANUAL_ZWJ) */ 1.1131 + 1.1132 + /* A ZWNJ disables HALF. */ 1.1133 + if (non_joiner) 1.1134 + info[j].mask &= ~indic_plan->mask_array[HALF]; 1.1135 + 1.1136 + } while (j > start && !is_consonant (info[j])); 1.1137 + } 1.1138 +} 1.1139 + 1.1140 + 1.1141 +static void 1.1142 +initial_reordering_vowel_syllable (const hb_ot_shape_plan_t *plan, 1.1143 + hb_face_t *face, 1.1144 + hb_buffer_t *buffer, 1.1145 + unsigned int start, unsigned int end) 1.1146 +{ 1.1147 + /* We made the vowels look like consonants. So let's call the consonant logic! */ 1.1148 + initial_reordering_consonant_syllable (plan, face, buffer, start, end); 1.1149 +} 1.1150 + 1.1151 +static void 1.1152 +initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, 1.1153 + hb_face_t *face, 1.1154 + hb_buffer_t *buffer, 1.1155 + unsigned int start, unsigned int end) 1.1156 +{ 1.1157 + /* We treat NBSP/dotted-circle as if they are consonants, so we should just chain. 1.1158 + * Only if not in compatibility mode that is... */ 1.1159 + 1.1160 + if (hb_options ().uniscribe_bug_compatible) 1.1161 + { 1.1162 + /* For dotted-circle, this is what Uniscribe does: 1.1163 + * If dotted-circle is the last glyph, it just does nothing. 1.1164 + * Ie. It doesn't form Reph. */ 1.1165 + if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) 1.1166 + return; 1.1167 + } 1.1168 + 1.1169 + initial_reordering_consonant_syllable (plan, face, buffer, start, end); 1.1170 +} 1.1171 + 1.1172 +static void 1.1173 +initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan, 1.1174 + hb_face_t *face, 1.1175 + hb_buffer_t *buffer, 1.1176 + unsigned int start, unsigned int end) 1.1177 +{ 1.1178 + /* We already inserted dotted-circles, so just call the standalone_cluster. */ 1.1179 + initial_reordering_standalone_cluster (plan, face, buffer, start, end); 1.1180 +} 1.1181 + 1.1182 +static void 1.1183 +initial_reordering_avagraha_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.1184 + hb_face_t *face HB_UNUSED, 1.1185 + hb_buffer_t *buffer HB_UNUSED, 1.1186 + unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) 1.1187 +{ 1.1188 + /* Nothing to do right now. If we ever switch to using the output 1.1189 + * buffer in the reordering process, we'd need to next_glyph() here. */ 1.1190 +} 1.1191 + 1.1192 +static void 1.1193 +initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.1194 + hb_face_t *face HB_UNUSED, 1.1195 + hb_buffer_t *buffer HB_UNUSED, 1.1196 + unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) 1.1197 +{ 1.1198 + /* Nothing to do right now. If we ever switch to using the output 1.1199 + * buffer in the reordering process, we'd need to next_glyph() here. */ 1.1200 +} 1.1201 + 1.1202 + 1.1203 +static void 1.1204 +initial_reordering_syllable (const hb_ot_shape_plan_t *plan, 1.1205 + hb_face_t *face, 1.1206 + hb_buffer_t *buffer, 1.1207 + unsigned int start, unsigned int end) 1.1208 +{ 1.1209 + syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); 1.1210 + switch (syllable_type) { 1.1211 + case consonant_syllable: initial_reordering_consonant_syllable (plan, face, buffer, start, end); return; 1.1212 + case vowel_syllable: initial_reordering_vowel_syllable (plan, face, buffer, start, end); return; 1.1213 + case standalone_cluster: initial_reordering_standalone_cluster (plan, face, buffer, start, end); return; 1.1214 + case avagraha_cluster: initial_reordering_avagraha_cluster (plan, face, buffer, start, end); return; 1.1215 + case broken_cluster: initial_reordering_broken_cluster (plan, face, buffer, start, end); return; 1.1216 + case non_indic_cluster: initial_reordering_non_indic_cluster (plan, face, buffer, start, end); return; 1.1217 + } 1.1218 +} 1.1219 + 1.1220 +static inline void 1.1221 +insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.1222 + hb_font_t *font, 1.1223 + hb_buffer_t *buffer) 1.1224 +{ 1.1225 + /* Note: This loop is extra overhead, but should not be measurable. */ 1.1226 + bool has_broken_syllables = false; 1.1227 + unsigned int count = buffer->len; 1.1228 + for (unsigned int i = 0; i < count; i++) 1.1229 + if ((buffer->info[i].syllable() & 0x0F) == broken_cluster) { 1.1230 + has_broken_syllables = true; 1.1231 + break; 1.1232 + } 1.1233 + if (likely (!has_broken_syllables)) 1.1234 + return; 1.1235 + 1.1236 + 1.1237 + hb_codepoint_t dottedcircle_glyph; 1.1238 + if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph)) 1.1239 + return; 1.1240 + 1.1241 + hb_glyph_info_t dottedcircle = {0}; 1.1242 + dottedcircle.codepoint = 0x25CC; 1.1243 + set_indic_properties (dottedcircle); 1.1244 + dottedcircle.codepoint = dottedcircle_glyph; 1.1245 + 1.1246 + buffer->clear_output (); 1.1247 + 1.1248 + buffer->idx = 0; 1.1249 + unsigned int last_syllable = 0; 1.1250 + while (buffer->idx < buffer->len) 1.1251 + { 1.1252 + unsigned int syllable = buffer->cur().syllable(); 1.1253 + syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); 1.1254 + if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) 1.1255 + { 1.1256 + last_syllable = syllable; 1.1257 + 1.1258 + hb_glyph_info_t info = dottedcircle; 1.1259 + info.cluster = buffer->cur().cluster; 1.1260 + info.mask = buffer->cur().mask; 1.1261 + info.syllable() = buffer->cur().syllable(); 1.1262 + 1.1263 + /* Insert dottedcircle after possible Repha. */ 1.1264 + while (buffer->idx < buffer->len && 1.1265 + last_syllable == buffer->cur().syllable() && 1.1266 + buffer->cur().indic_category() == OT_Repha) 1.1267 + buffer->next_glyph (); 1.1268 + 1.1269 + buffer->output_info (info); 1.1270 + } 1.1271 + else 1.1272 + buffer->next_glyph (); 1.1273 + } 1.1274 + 1.1275 + buffer->swap_buffers (); 1.1276 +} 1.1277 + 1.1278 +static void 1.1279 +initial_reordering (const hb_ot_shape_plan_t *plan, 1.1280 + hb_font_t *font, 1.1281 + hb_buffer_t *buffer) 1.1282 +{ 1.1283 + update_consonant_positions (plan, font, buffer); 1.1284 + insert_dotted_circles (plan, font, buffer); 1.1285 + 1.1286 + hb_glyph_info_t *info = buffer->info; 1.1287 + unsigned int count = buffer->len; 1.1288 + if (unlikely (!count)) return; 1.1289 + unsigned int last = 0; 1.1290 + unsigned int last_syllable = info[0].syllable(); 1.1291 + for (unsigned int i = 1; i < count; i++) 1.1292 + if (last_syllable != info[i].syllable()) { 1.1293 + initial_reordering_syllable (plan, font->face, buffer, last, i); 1.1294 + last = i; 1.1295 + last_syllable = info[last].syllable(); 1.1296 + } 1.1297 + initial_reordering_syllable (plan, font->face, buffer, last, count); 1.1298 +} 1.1299 + 1.1300 +static void 1.1301 +final_reordering_syllable (const hb_ot_shape_plan_t *plan, 1.1302 + hb_buffer_t *buffer, 1.1303 + unsigned int start, unsigned int end) 1.1304 +{ 1.1305 + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; 1.1306 + hb_glyph_info_t *info = buffer->info; 1.1307 + 1.1308 + /* 4. Final reordering: 1.1309 + * 1.1310 + * After the localized forms and basic shaping forms GSUB features have been 1.1311 + * applied (see below), the shaping engine performs some final glyph 1.1312 + * reordering before applying all the remaining font features to the entire 1.1313 + * cluster. 1.1314 + */ 1.1315 + 1.1316 + /* Find base again */ 1.1317 + unsigned int base; 1.1318 + for (base = start; base < end; base++) 1.1319 + if (info[base].indic_position() >= POS_BASE_C) { 1.1320 + if (start < base && info[base].indic_position() > POS_BASE_C) 1.1321 + base--; 1.1322 + break; 1.1323 + } 1.1324 + if (base == end && start < base && 1.1325 + info[base - 1].indic_category() != OT_ZWJ) 1.1326 + base--; 1.1327 + while (start < base && 1.1328 + (info[base].indic_category() == OT_H || 1.1329 + info[base].indic_category() == OT_N)) 1.1330 + base--; 1.1331 + 1.1332 + 1.1333 + /* o Reorder matras: 1.1334 + * 1.1335 + * If a pre-base matra character had been reordered before applying basic 1.1336 + * features, the glyph can be moved closer to the main consonant based on 1.1337 + * whether half-forms had been formed. Actual position for the matra is 1.1338 + * defined as “after last standalone halant glyph, after initial matra 1.1339 + * position and before the main consonant”. If ZWJ or ZWNJ follow this 1.1340 + * halant, position is moved after it. 1.1341 + */ 1.1342 + 1.1343 + if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ 1.1344 + { 1.1345 + /* If we lost track of base, alas, position before last thingy. */ 1.1346 + unsigned int new_pos = base == end ? base - 2 : base - 1; 1.1347 + 1.1348 + /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1.1349 + * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1.1350 + * We want to position matra after them. 1.1351 + */ 1.1352 + if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1.1353 + { 1.1354 + while (new_pos > start && 1.1355 + !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng))))) 1.1356 + new_pos--; 1.1357 + 1.1358 + /* If we found no Halant we are done. 1.1359 + * Otherwise only proceed if the Halant does 1.1360 + * not belong to the Matra itself! */ 1.1361 + if (is_halant_or_coeng (info[new_pos]) && 1.1362 + info[new_pos].indic_position() != POS_PRE_M) 1.1363 + { 1.1364 + /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1.1365 + if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) 1.1366 + new_pos++; 1.1367 + } 1.1368 + else 1.1369 + new_pos = start; /* No move. */ 1.1370 + } 1.1371 + 1.1372 + if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) 1.1373 + { 1.1374 + /* Now go see if there's actually any matras... */ 1.1375 + for (unsigned int i = new_pos; i > start; i--) 1.1376 + if (info[i - 1].indic_position () == POS_PRE_M) 1.1377 + { 1.1378 + unsigned int old_pos = i - 1; 1.1379 + hb_glyph_info_t tmp = info[old_pos]; 1.1380 + memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); 1.1381 + info[new_pos] = tmp; 1.1382 + if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */ 1.1383 + base--; 1.1384 + buffer->merge_clusters (new_pos, MIN (end, base + 1)); 1.1385 + new_pos--; 1.1386 + } 1.1387 + } else { 1.1388 + for (unsigned int i = start; i < base; i++) 1.1389 + if (info[i].indic_position () == POS_PRE_M) { 1.1390 + buffer->merge_clusters (i, MIN (end, base + 1)); 1.1391 + break; 1.1392 + } 1.1393 + } 1.1394 + } 1.1395 + 1.1396 + 1.1397 + /* o Reorder reph: 1.1398 + * 1.1399 + * Reph’s original position is always at the beginning of the syllable, 1.1400 + * (i.e. it is not reordered at the character reordering stage). However, 1.1401 + * it will be reordered according to the basic-forms shaping results. 1.1402 + * Possible positions for reph, depending on the script, are; after main, 1.1403 + * before post-base consonant forms, and after post-base consonant forms. 1.1404 + */ 1.1405 + 1.1406 + /* Two cases: 1.1407 + * 1.1408 + * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then 1.1409 + * we should only move it if the sequence ligated to the repha form. 1.1410 + * 1.1411 + * - If repha is encoded separately and in the logical position, we should only 1.1412 + * move it if it did NOT ligate. If it ligated, it's probably the font trying 1.1413 + * to make it work without the reordering. 1.1414 + */ 1.1415 + if (start + 1 < end && 1.1416 + info[start].indic_position() == POS_RA_TO_BECOME_REPH && 1.1417 + ((info[start].indic_category() == OT_Repha) ^ 1.1418 + _hb_glyph_info_ligated (&info[start]))) 1.1419 + { 1.1420 + unsigned int new_reph_pos; 1.1421 + reph_position_t reph_pos = indic_plan->config->reph_pos; 1.1422 + 1.1423 + assert (reph_pos != REPH_POS_DONT_CARE); 1.1424 + 1.1425 + /* 1. If reph should be positioned after post-base consonant forms, 1.1426 + * proceed to step 5. 1.1427 + */ 1.1428 + if (reph_pos == REPH_POS_AFTER_POST) 1.1429 + { 1.1430 + goto reph_step_5; 1.1431 + } 1.1432 + 1.1433 + /* 2. If the reph repositioning class is not after post-base: target 1.1434 + * position is after the first explicit halant glyph between the 1.1435 + * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 1.1436 + * are following this halant, position is moved after it. If such 1.1437 + * position is found, this is the target position. Otherwise, 1.1438 + * proceed to the next step. 1.1439 + * 1.1440 + * Note: in old-implementation fonts, where classifications were 1.1441 + * fixed in shaping engine, there was no case where reph position 1.1442 + * will be found on this step. 1.1443 + */ 1.1444 + { 1.1445 + new_reph_pos = start + 1; 1.1446 + while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 1.1447 + new_reph_pos++; 1.1448 + 1.1449 + if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) 1.1450 + { 1.1451 + /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1.1452 + if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1.1453 + new_reph_pos++; 1.1454 + goto reph_move; 1.1455 + } 1.1456 + } 1.1457 + 1.1458 + /* 3. If reph should be repositioned after the main consonant: find the 1.1459 + * first consonant not ligated with main, or find the first 1.1460 + * consonant that is not a potential pre-base reordering Ra. 1.1461 + */ 1.1462 + if (reph_pos == REPH_POS_AFTER_MAIN) 1.1463 + { 1.1464 + new_reph_pos = base; 1.1465 + while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) 1.1466 + new_reph_pos++; 1.1467 + if (new_reph_pos < end) 1.1468 + goto reph_move; 1.1469 + } 1.1470 + 1.1471 + /* 4. If reph should be positioned before post-base consonant, find 1.1472 + * first post-base classified consonant not ligated with main. If no 1.1473 + * consonant is found, the target position should be before the 1.1474 + * first matra, syllable modifier sign or vedic sign. 1.1475 + */ 1.1476 + /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 1.1477 + if (reph_pos == REPH_POS_AFTER_SUB) 1.1478 + { 1.1479 + new_reph_pos = base; 1.1480 + while (new_reph_pos < end && 1.1481 + !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) 1.1482 + new_reph_pos++; 1.1483 + if (new_reph_pos < end) 1.1484 + goto reph_move; 1.1485 + } 1.1486 + 1.1487 + /* 5. If no consonant is found in steps 3 or 4, move reph to a position 1.1488 + * immediately before the first post-base matra, syllable modifier 1.1489 + * sign or vedic sign that has a reordering class after the intended 1.1490 + * reph position. For example, if the reordering position for reph 1.1491 + * is post-main, it will skip above-base matras that also have a 1.1492 + * post-main position. 1.1493 + */ 1.1494 + reph_step_5: 1.1495 + { 1.1496 + /* Copied from step 2. */ 1.1497 + new_reph_pos = start + 1; 1.1498 + while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 1.1499 + new_reph_pos++; 1.1500 + 1.1501 + if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) 1.1502 + { 1.1503 + /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1.1504 + if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1.1505 + new_reph_pos++; 1.1506 + goto reph_move; 1.1507 + } 1.1508 + } 1.1509 + 1.1510 + /* 6. Otherwise, reorder reph to the end of the syllable. 1.1511 + */ 1.1512 + { 1.1513 + new_reph_pos = end - 1; 1.1514 + while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) 1.1515 + new_reph_pos--; 1.1516 + 1.1517 + /* 1.1518 + * If the Reph is to be ending up after a Matra,Halant sequence, 1.1519 + * position it before that Halant so it can interact with the Matra. 1.1520 + * However, if it's a plain Consonant,Halant we shouldn't do that. 1.1521 + * Uniscribe doesn't do this. 1.1522 + * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 1.1523 + */ 1.1524 + if (!hb_options ().uniscribe_bug_compatible && 1.1525 + unlikely (is_halant_or_coeng (info[new_reph_pos]))) { 1.1526 + for (unsigned int i = base + 1; i < new_reph_pos; i++) 1.1527 + if (info[i].indic_category() == OT_M) { 1.1528 + /* Ok, got it. */ 1.1529 + new_reph_pos--; 1.1530 + } 1.1531 + } 1.1532 + goto reph_move; 1.1533 + } 1.1534 + 1.1535 + reph_move: 1.1536 + { 1.1537 + buffer->merge_clusters (start, new_reph_pos + 1); 1.1538 + 1.1539 + /* Move */ 1.1540 + hb_glyph_info_t reph = info[start]; 1.1541 + memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); 1.1542 + info[new_reph_pos] = reph; 1.1543 + if (start < base && base <= new_reph_pos) 1.1544 + base--; 1.1545 + } 1.1546 + } 1.1547 + 1.1548 + 1.1549 + /* o Reorder pre-base reordering consonants: 1.1550 + * 1.1551 + * If a pre-base reordering consonant is found, reorder it according to 1.1552 + * the following rules: 1.1553 + */ 1.1554 + 1.1555 + if (indic_plan->mask_array[PREF] && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */ 1.1556 + { 1.1557 + unsigned int pref_len = indic_plan->config->pref_len; 1.1558 + for (unsigned int i = base + 1; i < end; i++) 1.1559 + if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) 1.1560 + { 1.1561 + /* 1. Only reorder a glyph produced by substitution during application 1.1562 + * of the <pref> feature. (Note that a font may shape a Ra consonant with 1.1563 + * the feature generally but block it in certain contexts.) 1.1564 + */ 1.1565 + /* Note: We just check that something got substituted. We don't check that 1.1566 + * the <pref> feature actually did it... 1.1567 + * 1.1568 + * If pref len is longer than one, then only reorder if it ligated. If 1.1569 + * pref len is one, only reorder if it didn't ligate with other things. */ 1.1570 + if (_hb_glyph_info_substituted (&info[i]) && 1.1571 + ((pref_len == 1) ^ _hb_glyph_info_ligated (&info[i]))) 1.1572 + { 1.1573 + /* 1.1574 + * 2. Try to find a target position the same way as for pre-base matra. 1.1575 + * If it is found, reorder pre-base consonant glyph. 1.1576 + * 1.1577 + * 3. If position is not found, reorder immediately before main 1.1578 + * consonant. 1.1579 + */ 1.1580 + 1.1581 + unsigned int new_pos = base; 1.1582 + /* Malayalam / Tamil do not have "half" forms or explicit virama forms. 1.1583 + * The glyphs formed by 'half' are Chillus or ligated explicit viramas. 1.1584 + * We want to position matra after them. 1.1585 + */ 1.1586 + if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) 1.1587 + { 1.1588 + while (new_pos > start && 1.1589 + !(is_one_of (info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS))) 1.1590 + new_pos--; 1.1591 + 1.1592 + /* In Khmer coeng model, a H,Ra can go *after* matras. If it goes after a 1.1593 + * split matra, it should be reordered to *before* the left part of such matra. */ 1.1594 + if (new_pos > start && info[new_pos - 1].indic_category() == OT_M) 1.1595 + { 1.1596 + unsigned int old_pos = i; 1.1597 + for (unsigned int i = base + 1; i < old_pos; i++) 1.1598 + if (info[i].indic_category() == OT_M) 1.1599 + { 1.1600 + new_pos--; 1.1601 + break; 1.1602 + } 1.1603 + } 1.1604 + } 1.1605 + 1.1606 + if (new_pos > start && is_halant_or_coeng (info[new_pos - 1])) 1.1607 + { 1.1608 + /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1.1609 + if (new_pos < end && is_joiner (info[new_pos])) 1.1610 + new_pos++; 1.1611 + } 1.1612 + 1.1613 + { 1.1614 + unsigned int old_pos = i; 1.1615 + buffer->merge_clusters (new_pos, old_pos + 1); 1.1616 + hb_glyph_info_t tmp = info[old_pos]; 1.1617 + memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); 1.1618 + info[new_pos] = tmp; 1.1619 + if (new_pos <= base && base < old_pos) 1.1620 + base++; 1.1621 + } 1.1622 + } 1.1623 + 1.1624 + break; 1.1625 + } 1.1626 + } 1.1627 + 1.1628 + 1.1629 + /* Apply 'init' to the Left Matra if it's a word start. */ 1.1630 + if (info[start].indic_position () == POS_PRE_M && 1.1631 + (!start || 1.1632 + !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & 1.1633 + FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) 1.1634 + info[start].mask |= indic_plan->mask_array[INIT]; 1.1635 + 1.1636 + 1.1637 + /* 1.1638 + * Finish off the clusters and go home! 1.1639 + */ 1.1640 + if (hb_options ().uniscribe_bug_compatible) 1.1641 + { 1.1642 + switch ((hb_tag_t) plan->props.script) 1.1643 + { 1.1644 + case HB_SCRIPT_TAMIL: 1.1645 + case HB_SCRIPT_SINHALA: 1.1646 + break; 1.1647 + 1.1648 + default: 1.1649 + /* Uniscribe merges the entire cluster... Except for Tamil & Sinhala. 1.1650 + * This means, half forms are submerged into the main consonants cluster. 1.1651 + * This is unnecessary, and makes cursor positioning harder, but that's what 1.1652 + * Uniscribe does. */ 1.1653 + buffer->merge_clusters (start, end); 1.1654 + break; 1.1655 + } 1.1656 + } 1.1657 +} 1.1658 + 1.1659 + 1.1660 +static void 1.1661 +final_reordering (const hb_ot_shape_plan_t *plan, 1.1662 + hb_font_t *font HB_UNUSED, 1.1663 + hb_buffer_t *buffer) 1.1664 +{ 1.1665 + unsigned int count = buffer->len; 1.1666 + if (unlikely (!count)) return; 1.1667 + 1.1668 + hb_glyph_info_t *info = buffer->info; 1.1669 + unsigned int last = 0; 1.1670 + unsigned int last_syllable = info[0].syllable(); 1.1671 + for (unsigned int i = 1; i < count; i++) 1.1672 + if (last_syllable != info[i].syllable()) { 1.1673 + final_reordering_syllable (plan, buffer, last, i); 1.1674 + last = i; 1.1675 + last_syllable = info[last].syllable(); 1.1676 + } 1.1677 + final_reordering_syllable (plan, buffer, last, count); 1.1678 + 1.1679 + HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); 1.1680 + HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); 1.1681 +} 1.1682 + 1.1683 + 1.1684 +static void 1.1685 +clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, 1.1686 + hb_font_t *font HB_UNUSED, 1.1687 + hb_buffer_t *buffer) 1.1688 +{ 1.1689 + hb_glyph_info_t *info = buffer->info; 1.1690 + unsigned int count = buffer->len; 1.1691 + for (unsigned int i = 0; i < count; i++) 1.1692 + info[i].syllable() = 0; 1.1693 +} 1.1694 + 1.1695 + 1.1696 +static bool 1.1697 +decompose_indic (const hb_ot_shape_normalize_context_t *c, 1.1698 + hb_codepoint_t ab, 1.1699 + hb_codepoint_t *a, 1.1700 + hb_codepoint_t *b) 1.1701 +{ 1.1702 + switch (ab) 1.1703 + { 1.1704 + /* Don't decompose these. */ 1.1705 + case 0x0931 : return false; 1.1706 + case 0x0B94 : return false; 1.1707 + 1.1708 + 1.1709 + /* 1.1710 + * Decompose split matras that don't have Unicode decompositions. 1.1711 + */ 1.1712 + 1.1713 + case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true; 1.1714 + case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true; 1.1715 + case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true; 1.1716 + case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true; 1.1717 + case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true; 1.1718 + case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true; 1.1719 + case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true; 1.1720 + case 0x1925 : *a = 0x1920; *b= 0x1923; return true; 1.1721 + case 0x1926 : *a = 0x1920; *b= 0x1924; return true; 1.1722 + case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true; 1.1723 + case 0x1112E : *a = 0x11127; *b= 0x11131; return true; 1.1724 + case 0x1112F : *a = 0x11127; *b= 0x11132; return true; 1.1725 +#if 0 1.1726 + /* This one has no decomposition in Unicode, but needs no decomposition either. */ 1.1727 + /* case 0x0AC9 : return false; */ 1.1728 + case 0x0B57 : *a = no decomp, -> RIGHT; return true; 1.1729 + case 0x1C29 : *a = no decomp, -> LEFT; return true; 1.1730 + case 0xA9C0 : *a = no decomp, -> RIGHT; return true; 1.1731 + case 0x111BF : *a = no decomp, -> ABOVE; return true; 1.1732 +#endif 1.1733 + } 1.1734 + 1.1735 + if ((ab == 0x0DDA || hb_in_range<hb_codepoint_t> (ab, 0x0DDC, 0x0DDE))) 1.1736 + { 1.1737 + /* 1.1738 + * Sinhala split matras... Let the fun begin. 1.1739 + * 1.1740 + * These four characters have Unicode decompositions. However, Uniscribe 1.1741 + * decomposes them "Khmer-style", that is, it uses the character itself to 1.1742 + * get the second half. The first half of all four decompositions is always 1.1743 + * U+0DD9. 1.1744 + * 1.1745 + * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are 1.1746 + * broken with Uniscribe. But we need to support them. As such, we only 1.1747 + * do the Uniscribe-style decomposition if the character is transformed into 1.1748 + * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to 1.1749 + * Unicode decomposition. 1.1750 + * 1.1751 + * Note that we can't unconditionally use Unicode decomposition. That would 1.1752 + * break some other fonts, that are designed to work with Uniscribe, and 1.1753 + * don't have positioning features for the Unicode-style decomposition. 1.1754 + * 1.1755 + * Argh... 1.1756 + * 1.1757 + * The Uniscribe behavior is now documented in the newly published Sinhala 1.1758 + * spec in 2012: 1.1759 + * 1.1760 + * http://www.microsoft.com/typography/OpenTypeDev/sinhala/intro.htm#shaping 1.1761 + */ 1.1762 + 1.1763 + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; 1.1764 + 1.1765 + hb_codepoint_t glyph; 1.1766 + 1.1767 + if (hb_options ().uniscribe_bug_compatible || 1.1768 + (c->font->get_glyph (ab, 0, &glyph) && 1.1769 + indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) 1.1770 + { 1.1771 + /* Ok, safe to use Uniscribe-style decomposition. */ 1.1772 + *a = 0x0DD9; 1.1773 + *b = ab; 1.1774 + return true; 1.1775 + } 1.1776 + } 1.1777 + 1.1778 + return c->unicode->decompose (ab, a, b); 1.1779 +} 1.1780 + 1.1781 +static bool 1.1782 +compose_indic (const hb_ot_shape_normalize_context_t *c, 1.1783 + hb_codepoint_t a, 1.1784 + hb_codepoint_t b, 1.1785 + hb_codepoint_t *ab) 1.1786 +{ 1.1787 + /* Avoid recomposing split matras. */ 1.1788 + if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) 1.1789 + return false; 1.1790 + 1.1791 + /* Composition-exclusion exceptions that we want to recompose. */ 1.1792 + if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; } 1.1793 + 1.1794 + return c->unicode->compose (a, b, ab); 1.1795 +} 1.1796 + 1.1797 + 1.1798 +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = 1.1799 +{ 1.1800 + "indic", 1.1801 + collect_features_indic, 1.1802 + override_features_indic, 1.1803 + data_create_indic, 1.1804 + data_destroy_indic, 1.1805 + NULL, /* preprocess_text */ 1.1806 + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, 1.1807 + decompose_indic, 1.1808 + compose_indic, 1.1809 + setup_masks_indic, 1.1810 + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 1.1811 + false, /* fallback_position */ 1.1812 +};