1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/layout/base/nsBidi.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,892 @@ 1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- 1.5 + * 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef nsBidi_h__ 1.11 +#define nsBidi_h__ 1.12 + 1.13 +#include "nsBidiUtils.h" 1.14 + 1.15 +// Bidi reordering engine from ICU 1.16 +/* 1.17 + * javadoc-style comments are intended to be transformed into HTML 1.18 + * using DOC++ - see 1.19 + * http://www.zib.de/Visual/software/doc++/index.html . 1.20 + * 1.21 + * The HTML documentation is created with 1.22 + * doc++ -H nsIBidi.h 1.23 + */ 1.24 + 1.25 +/** 1.26 + * @mainpage BIDI algorithm for Mozilla (from ICU) 1.27 + * 1.28 + * <h2>BIDI algorithm for Mozilla</h2> 1.29 + * 1.30 + * This is an implementation of the Unicode Bidirectional algorithm. 1.31 + * The algorithm is defined in the 1.32 + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, 1.33 + * version 5, also described in The Unicode Standard, Version 3.0 .<p> 1.34 + * 1.35 + * <h3>General remarks about the API:</h3> 1.36 + * 1.37 + * The <quote>limit</quote> of a sequence of characters is the position just after their 1.38 + * last character, i.e., one more than that position.<p> 1.39 + * 1.40 + * Some of the API functions provide access to <quote>runs</quote>. 1.41 + * Such a <quote>run</quote> is defined as a sequence of characters 1.42 + * that are at the same embedding level 1.43 + * after performing the BIDI algorithm.<p> 1.44 + * 1.45 + * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu 1.46 + * @version 1.0 1.47 + */ 1.48 + 1.49 +/** 1.50 + * nsBidiLevel is the type of the level values in this 1.51 + * Bidi implementation. 1.52 + * It holds an embedding level and indicates the visual direction 1.53 + * by its bit 0 (even/odd value).<p> 1.54 + * 1.55 + * It can also hold non-level values for the 1.56 + * <code>aParaLevel</code> and <code>aEmbeddingLevels</code> 1.57 + * arguments of <code>SetPara</code>; there: 1.58 + * <ul> 1.59 + * <li>bit 7 of an <code>aEmbeddingLevels[]</code> 1.60 + * value indicates whether the using application is 1.61 + * specifying the level of a character to <i>override</i> whatever the 1.62 + * Bidi implementation would resolve it to.</li> 1.63 + * <li><code>aParaLevel</code> can be set to the 1.64 + * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code> 1.65 + * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul> 1.66 + * 1.67 + * @see nsIBidi::SetPara 1.68 + * 1.69 + * <p>The related constants are not real, valid level values. 1.70 + * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify 1.71 + * a default for the paragraph level for 1.72 + * when the <code>SetPara</code> function 1.73 + * shall determine it but there is no 1.74 + * strongly typed character in the input.<p> 1.75 + * 1.76 + * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even 1.77 + * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd, 1.78 + * just like with normal LTR and RTL level values - 1.79 + * these special values are designed that way. Also, the implementation 1.80 + * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd. 1.81 + * 1.82 + * @see NSBIDI_DEFAULT_LTR 1.83 + * @see NSBIDI_DEFAULT_RTL 1.84 + * @see NSBIDI_LEVEL_OVERRIDE 1.85 + * @see NSBIDI_MAX_EXPLICIT_LEVEL 1.86 + */ 1.87 +typedef uint8_t nsBidiLevel; 1.88 + 1.89 +/** Paragraph level setting. 1.90 + * If there is no strong character, then set the paragraph level to 0 (left-to-right). 1.91 + */ 1.92 +#define NSBIDI_DEFAULT_LTR 0xfe 1.93 + 1.94 +/** Paragraph level setting. 1.95 + * If there is no strong character, then set the paragraph level to 1 (right-to-left). 1.96 + */ 1.97 +#define NSBIDI_DEFAULT_RTL 0xff 1.98 + 1.99 +/** 1.100 + * Maximum explicit embedding level. 1.101 + * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>). 1.102 + * 1.103 + */ 1.104 +#define NSBIDI_MAX_EXPLICIT_LEVEL 61 1.105 + 1.106 +/** Bit flag for level input. 1.107 + * Overrides directional properties. 1.108 + */ 1.109 +#define NSBIDI_LEVEL_OVERRIDE 0x80 1.110 + 1.111 +/** 1.112 + * <code>nsBidiDirection</code> values indicate the text direction. 1.113 + */ 1.114 +enum nsBidiDirection { 1.115 + /** All left-to-right text This is a 0 value. */ 1.116 + NSBIDI_LTR, 1.117 + /** All right-to-left text This is a 1 value. */ 1.118 + NSBIDI_RTL, 1.119 + /** Mixed-directional text. */ 1.120 + NSBIDI_MIXED 1.121 +}; 1.122 + 1.123 +typedef enum nsBidiDirection nsBidiDirection; 1.124 + 1.125 +/* miscellaneous definitions ------------------------------------------------ */ 1.126 +/** option flags for WriteReverse() */ 1.127 +/** 1.128 + * option bit for WriteReverse(): 1.129 + * keep combining characters after their base characters in RTL runs 1.130 + * 1.131 + * @see WriteReverse 1.132 + */ 1.133 +#define NSBIDI_KEEP_BASE_COMBINING 1 1.134 + 1.135 +/** 1.136 + * option bit for WriteReverse(): 1.137 + * replace characters with the "mirrored" property in RTL runs 1.138 + * by their mirror-image mappings 1.139 + * 1.140 + * @see WriteReverse 1.141 + */ 1.142 +#define NSBIDI_DO_MIRRORING 2 1.143 + 1.144 +/** 1.145 + * option bit for WriteReverse(): 1.146 + * remove Bidi control characters 1.147 + * 1.148 + * @see WriteReverse 1.149 + */ 1.150 +#define NSBIDI_REMOVE_BIDI_CONTROLS 8 1.151 + 1.152 +/* helper macros for each allocated array member */ 1.153 +#define GETDIRPROPSMEMORY(length) \ 1.154 + GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ 1.155 + mMayAllocateText, (length)) 1.156 + 1.157 +#define GETLEVELSMEMORY(length) \ 1.158 + GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ 1.159 + mMayAllocateText, (length)) 1.160 + 1.161 +#define GETRUNSMEMORY(length) \ 1.162 + GetMemory((void **)&mRunsMemory, &mRunsSize, \ 1.163 + mMayAllocateRuns, (length)*sizeof(Run)) 1.164 + 1.165 +/* additional macros used by constructor - always allow allocation */ 1.166 +#define GETINITIALDIRPROPSMEMORY(length) \ 1.167 + GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ 1.168 + true, (length)) 1.169 + 1.170 +#define GETINITIALLEVELSMEMORY(length) \ 1.171 + GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ 1.172 + true, (length)) 1.173 + 1.174 +#define GETINITIALRUNSMEMORY(length) \ 1.175 + GetMemory((void **)&mRunsMemory, &mRunsSize, \ 1.176 + true, (length)*sizeof(Run)) 1.177 + 1.178 +/* 1.179 + * Sometimes, bit values are more appropriate 1.180 + * to deal with directionality properties. 1.181 + * Abbreviations in these macro names refer to names 1.182 + * used in the Bidi algorithm. 1.183 + */ 1.184 +typedef uint8_t DirProp; 1.185 + 1.186 +#define DIRPROP_FLAG(dir) (1UL<<(dir)) 1.187 + 1.188 +/* special flag for multiple runs from explicit embedding codes */ 1.189 +#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) 1.190 + 1.191 +/* are there any characters that are LTR or RTL? */ 1.192 +#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) 1.193 +#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) 1.194 + 1.195 +/* explicit embedding codes */ 1.196 +#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) 1.197 +#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) 1.198 +#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) 1.199 + 1.200 +#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) 1.201 +#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) 1.202 + 1.203 +/* paragraph and segment separators */ 1.204 +#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) 1.205 + 1.206 +/* all types that are counted as White Space or Neutral in some steps */ 1.207 +#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) 1.208 +#define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS) 1.209 + 1.210 +/* all types that are included in a sequence of European Terminators for (W5) */ 1.211 +#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) 1.212 + 1.213 +/* types that are neutrals or could becomes neutrals in (Wn) */ 1.214 +#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) 1.215 + 1.216 +/* 1.217 + * These types may be changed to "e", 1.218 + * the embedding type (L or R) of the run, 1.219 + * in the Bidi algorithm (N2) 1.220 + */ 1.221 +#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) 1.222 + 1.223 +/* the dirProp's L and R are defined to 0 and 1 values in nsCharType */ 1.224 +#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) 1.225 + 1.226 +#define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe) 1.227 + 1.228 +/* handle surrogate pairs --------------------------------------------------- */ 1.229 + 1.230 +#define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800) 1.231 +#define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00) 1.232 + 1.233 +/* get the UTF-32 value directly from the surrogate pseudo-characters */ 1.234 +#define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 1.235 +#define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET) 1.236 + 1.237 + 1.238 +#define UTF_ERROR_VALUE 0xffff 1.239 +/* definitions with forward iteration --------------------------------------- */ 1.240 + 1.241 +/* 1.242 + * all the macros that go forward assume that 1.243 + * the initial offset is 0<=i<length; 1.244 + * they update the offset 1.245 + */ 1.246 + 1.247 +/* fast versions, no error-checking */ 1.248 + 1.249 +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \ 1.250 + if((uint32_t)(c)<=0xffff) { \ 1.251 + (s)[(i)++]=(char16_t)(c); \ 1.252 + } else { \ 1.253 + (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \ 1.254 + (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ 1.255 + } \ 1.256 +} 1.257 + 1.258 +/* safe versions with error-checking and optional regularity-checking */ 1.259 + 1.260 +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 1.261 + if((PRUInt32)(c)<=0xffff) { \ 1.262 + (s)[(i)++]=(char16_t)(c); \ 1.263 + } else if((PRUInt32)(c)<=0x10ffff) { \ 1.264 + if((i)+1<(length)) { \ 1.265 + (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \ 1.266 + (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ 1.267 + } else /* not enough space */ { \ 1.268 + (s)[(i)++]=UTF_ERROR_VALUE; \ 1.269 + } \ 1.270 + } else /* c>0x10ffff, write error value */ { \ 1.271 + (s)[(i)++]=UTF_ERROR_VALUE; \ 1.272 + } \ 1.273 +} 1.274 + 1.275 +/* definitions with backward iteration -------------------------------------- */ 1.276 + 1.277 +/* 1.278 + * all the macros that go backward assume that 1.279 + * the valid buffer range starts at offset 0 1.280 + * and that the initial offset is 0<i<=length; 1.281 + * they update the offset 1.282 + */ 1.283 + 1.284 +/* fast versions, no error-checking */ 1.285 + 1.286 +/* 1.287 + * Get a single code point from an offset that points behind the last 1.288 + * of the code units that belong to that code point. 1.289 + * Assume 0<=i<length. 1.290 + */ 1.291 +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 1.292 + (c)=(s)[--(i)]; \ 1.293 + if(IS_SECOND_SURROGATE(c)) { \ 1.294 + (c)=GET_UTF_32((s)[--(i)], (c)); \ 1.295 + } \ 1.296 +} 1.297 + 1.298 +#define UTF16_BACK_1_UNSAFE(s, i) { \ 1.299 + if(IS_SECOND_SURROGATE((s)[--(i)])) { \ 1.300 + --(i); \ 1.301 + } \ 1.302 +} 1.303 + 1.304 +#define UTF16_BACK_N_UNSAFE(s, i, n) { \ 1.305 + int32_t __N=(n); \ 1.306 + while(__N>0) { \ 1.307 + UTF16_BACK_1_UNSAFE(s, i); \ 1.308 + --__N; \ 1.309 + } \ 1.310 +} 1.311 + 1.312 +/* safe versions with error-checking and optional regularity-checking */ 1.313 + 1.314 +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 1.315 + (c)=(s)[--(i)]; \ 1.316 + if(IS_SECOND_SURROGATE(c)) { \ 1.317 + char16_t __c2; \ 1.318 + if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 1.319 + --(i); \ 1.320 + (c)=GET_UTF_32(__c2, (c)); \ 1.321 + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \ 1.322 + } else if(strict) {\ 1.323 + /* unmatched second surrogate */ \ 1.324 + (c)=UTF_ERROR_VALUE; \ 1.325 + } \ 1.326 + } else if(strict && IS_FIRST_SURROGATE(c)) { \ 1.327 + /* unmatched first surrogate */ \ 1.328 + (c)=UTF_ERROR_VALUE; \ 1.329 + /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \ 1.330 + } \ 1.331 +} 1.332 + 1.333 +#define UTF16_BACK_1_SAFE(s, start, i) { \ 1.334 + if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \ 1.335 + --(i); \ 1.336 + } \ 1.337 +} 1.338 + 1.339 +#define UTF16_BACK_N_SAFE(s, start, i, n) { \ 1.340 + int32_t __N=(n); \ 1.341 + while(__N>0 && (i)>(start)) { \ 1.342 + UTF16_BACK_1_SAFE(s, start, i); \ 1.343 + --__N; \ 1.344 + } \ 1.345 +} 1.346 + 1.347 +#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 1.348 +#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 1.349 +#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 1.350 +#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 1.351 +#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 1.352 +#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 1.353 +#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 1.354 +#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1.355 + 1.356 +#define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, false) 1.357 +#define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i) 1.358 +#define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n) 1.359 +#define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c) 1.360 + 1.361 +/* Run structure for reordering --------------------------------------------- */ 1.362 + 1.363 +typedef struct Run { 1.364 + int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ 1.365 + visualLimit; /* last visual position of the run +1 */ 1.366 +} Run; 1.367 + 1.368 +/* in a Run, logicalStart will get this bit set if the run level is odd */ 1.369 +#define INDEX_ODD_BIT (1UL<<31) 1.370 + 1.371 +#define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31)) 1.372 +#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)level<<31)) 1.373 +#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) 1.374 + 1.375 +#define GET_INDEX(x) (x&~INDEX_ODD_BIT) 1.376 +#define GET_ODD_BIT(x) ((uint32_t)x>>31) 1.377 +#define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0) 1.378 +#define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0) 1.379 + 1.380 +typedef uint32_t Flags; 1.381 + 1.382 +/** 1.383 + * This class holds information about a paragraph of text 1.384 + * with Bidi-algorithm-related details, or about one line of 1.385 + * such a paragraph.<p> 1.386 + * Reordering can be done on a line, or on a paragraph which is 1.387 + * then interpreted as one single line.<p> 1.388 + * 1.389 + * On construction, the class is initially empty. It is assigned 1.390 + * the Bidi properties of a paragraph by <code>SetPara</code> 1.391 + * or the Bidi properties of a line of a paragraph by 1.392 + * <code>SetLine</code>.<p> 1.393 + * A Bidi class can be reused for as long as it is not deallocated 1.394 + * by calling its destructor.<p> 1.395 + * <code>SetPara</code> will allocate additional memory for 1.396 + * internal structures as necessary. 1.397 + */ 1.398 +class nsBidi 1.399 +{ 1.400 +public: 1.401 + /** @brief Default constructor. 1.402 + * 1.403 + * The nsBidi object is initially empty. It is assigned 1.404 + * the Bidi properties of a paragraph by <code>SetPara()</code> 1.405 + * or the Bidi properties of a line of a paragraph by 1.406 + * <code>GetLine()</code>.<p> 1.407 + * This object can be reused for as long as it is not destroyed.<p> 1.408 + * <code>SetPara()</code> will allocate additional memory for 1.409 + * internal structures as necessary. 1.410 + * 1.411 + */ 1.412 + nsBidi(); 1.413 + 1.414 + /** @brief Destructor. */ 1.415 + virtual ~nsBidi(); 1.416 + 1.417 + 1.418 + /** 1.419 + * Perform the Unicode Bidi algorithm. It is defined in the 1.420 + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, 1.421 + * version 5, 1.422 + * also described in The Unicode Standard, Version 3.0 .<p> 1.423 + * 1.424 + * This function takes a single plain text paragraph with or without 1.425 + * externally specified embedding levels from <quote>styled</quote> text 1.426 + * and computes the left-right-directionality of each character.<p> 1.427 + * 1.428 + * If the entire paragraph consists of text of only one direction, then 1.429 + * the function may not perform all the steps described by the algorithm, 1.430 + * i.e., some levels may not be the same as if all steps were performed. 1.431 + * This is not relevant for unidirectional text.<br> 1.432 + * For example, in pure LTR text with numbers the numbers would get 1.433 + * a resolved level of 2 higher than the surrounding text according to 1.434 + * the algorithm. This implementation may set all resolved levels to 1.435 + * the same value in such a case.<p> 1.436 + * 1.437 + * The text must be externally split into separate paragraphs (rule P1). 1.438 + * Paragraph separators (B) should appear at most at the very end. 1.439 + * 1.440 + * @param aText is a pointer to the single-paragraph text that the 1.441 + * Bidi algorithm will be performed on 1.442 + * (step (P1) of the algorithm is performed externally). 1.443 + * <strong>The text must be (at least) <code>aLength</code> long.</strong> 1.444 + * 1.445 + * @param aLength is the length of the text; if <code>aLength==-1</code> then 1.446 + * the text must be zero-terminated. 1.447 + * 1.448 + * @param aParaLevel specifies the default level for the paragraph; 1.449 + * it is typically 0 (LTR) or 1 (RTL). 1.450 + * If the function shall determine the paragraph level from the text, 1.451 + * then <code>aParaLevel</code> can be set to 1.452 + * either <code>NSBIDI_DEFAULT_LTR</code> 1.453 + * or <code>NSBIDI_DEFAULT_RTL</code>; 1.454 + * if there is no strongly typed character, then 1.455 + * the desired default is used (0 for LTR or 1 for RTL). 1.456 + * Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid, 1.457 + * with odd levels indicating RTL. 1.458 + * 1.459 + * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels, 1.460 + * ignoring characters like LRE and PDF in the text. 1.461 + * A level overrides the directional property of its corresponding 1.462 + * (same index) character if the level has the 1.463 + * <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p> 1.464 + * Except for that bit, it must be 1.465 + * <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p> 1.466 + * <strong>Caution: </strong>A copy of this pointer, not of the levels, 1.467 + * will be stored in the <code>nsBidi</code> object; 1.468 + * the <code>aEmbeddingLevels</code> array must not be 1.469 + * deallocated before the <code>nsBidi</code> object is destroyed or reused, 1.470 + * and the <code>aEmbeddingLevels</code> 1.471 + * should not be modified to avoid unexpected results on subsequent Bidi operations. 1.472 + * However, the <code>SetPara</code> and 1.473 + * <code>SetLine</code> functions may modify some or all of the levels.<p> 1.474 + * After the <code>nsBidi</code> object is reused or destroyed, the caller 1.475 + * must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p> 1.476 + * <strong>The <code>aEmbeddingLevels</code> array must be 1.477 + * at least <code>aLength</code> long.</strong> 1.478 + */ 1.479 + nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels); 1.480 + 1.481 + /** 1.482 + * Get the directionality of the text. 1.483 + * 1.484 + * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text 1.485 + * represented by this object is unidirectional, 1.486 + * and which direction, or if it is mixed-directional. 1.487 + * 1.488 + * @see nsBidiDirection 1.489 + */ 1.490 + nsresult GetDirection(nsBidiDirection* aDirection); 1.491 + 1.492 + /** 1.493 + * Get the paragraph level of the text. 1.494 + * 1.495 + * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level 1.496 + * 1.497 + * @see nsBidiLevel 1.498 + */ 1.499 + nsresult GetParaLevel(nsBidiLevel* aParaLevel); 1.500 + 1.501 +#ifdef FULL_BIDI_ENGINE 1.502 + /** 1.503 + * <code>SetLine</code> sets an <code>nsBidi</code> to 1.504 + * contain the reordering information, especially the resolved levels, 1.505 + * for all the characters in a line of text. This line of text is 1.506 + * specified by referring to an <code>nsBidi</code> object representing 1.507 + * this information for a paragraph of text, and by specifying 1.508 + * a range of indexes in this paragraph.<p> 1.509 + * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p> 1.510 + * 1.511 + * This is used after calling <code>SetPara</code> 1.512 + * for a paragraph, and after line-breaking on that paragraph. 1.513 + * It is not necessary if the paragraph is treated as a single line.<p> 1.514 + * 1.515 + * After line-breaking, rules (L1) and (L2) for the treatment of 1.516 + * trailing WS and for reordering are performed on 1.517 + * an <code>nsBidi</code> object that represents a line.<p> 1.518 + * 1.519 + * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with 1.520 + * <code>aParaBidi</code>. 1.521 + * You must destroy or reuse this object before <code>aParaBidi</code>. 1.522 + * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line 1.523 + * before the object for its parent paragraph. 1.524 + * 1.525 + * @param aParaBidi is the parent paragraph object. 1.526 + * 1.527 + * @param aStart is the line's first index into the paragraph text. 1.528 + * 1.529 + * @param aLimit is just behind the line's last index into the paragraph text 1.530 + * (its last index +1).<br> 1.531 + * It must be <code>0<=aStart<=aLimit<=</code>paragraph length. 1.532 + * 1.533 + * @see SetPara 1.534 + */ 1.535 + nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit); 1.536 + 1.537 + /** 1.538 + * Get the length of the text. 1.539 + * 1.540 + * @param aLength receives the length of the text that the nsBidi object was created for. 1.541 + */ 1.542 + nsresult GetLength(int32_t* aLength); 1.543 + 1.544 + /** 1.545 + * Get the level for one character. 1.546 + * 1.547 + * @param aCharIndex the index of a character. 1.548 + * 1.549 + * @param aLevel receives the level for the character at aCharIndex. 1.550 + * 1.551 + * @see nsBidiLevel 1.552 + */ 1.553 + nsresult GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel); 1.554 + 1.555 + /** 1.556 + * Get an array of levels for each character.<p> 1.557 + * 1.558 + * Note that this function may allocate memory under some 1.559 + * circumstances, unlike <code>GetLevelAt</code>. 1.560 + * 1.561 + * @param aLevels receives a pointer to the levels array for the text, 1.562 + * or <code>nullptr</code> if an error occurs. 1.563 + * 1.564 + * @see nsBidiLevel 1.565 + */ 1.566 + nsresult GetLevels(nsBidiLevel** aLevels); 1.567 +#endif // FULL_BIDI_ENGINE 1.568 + /** 1.569 + * Get the bidirectional type for one character. 1.570 + * 1.571 + * @param aCharIndex the index of a character. 1.572 + * 1.573 + * @param aType receives the bidirectional type of the character at aCharIndex. 1.574 + */ 1.575 + nsresult GetCharTypeAt(int32_t aCharIndex, nsCharType* aType); 1.576 + 1.577 + /** 1.578 + * Get a logical run. 1.579 + * This function returns information about a run and is used 1.580 + * to retrieve runs in logical order.<p> 1.581 + * This is especially useful for line-breaking on a paragraph. 1.582 + * 1.583 + * @param aLogicalStart is the first character of the run. 1.584 + * 1.585 + * @param aLogicalLimit will receive the limit of the run. 1.586 + * The l-value that you point to here may be the 1.587 + * same expression (variable) as the one for 1.588 + * <code>aLogicalStart</code>. 1.589 + * This pointer can be <code>nullptr</code> if this 1.590 + * value is not necessary. 1.591 + * 1.592 + * @param aLevel will receive the level of the run. 1.593 + * This pointer can be <code>nullptr</code> if this 1.594 + * value is not necessary. 1.595 + */ 1.596 + nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel); 1.597 + 1.598 + /** 1.599 + * Get the number of runs. 1.600 + * This function may invoke the actual reordering on the 1.601 + * <code>nsBidi</code> object, after <code>SetPara</code> 1.602 + * may have resolved only the levels of the text. Therefore, 1.603 + * <code>CountRuns</code> may have to allocate memory, 1.604 + * and may fail doing so. 1.605 + * 1.606 + * @param aRunCount will receive the number of runs. 1.607 + */ 1.608 + nsresult CountRuns(int32_t* aRunCount); 1.609 + 1.610 + /** 1.611 + * Get one run's logical start, length, and directionality, 1.612 + * which can be 0 for LTR or 1 for RTL. 1.613 + * In an RTL run, the character at the logical start is 1.614 + * visually on the right of the displayed run. 1.615 + * The length is the number of characters in the run.<p> 1.616 + * <code>CountRuns</code> should be called 1.617 + * before the runs are retrieved. 1.618 + * 1.619 + * @param aRunIndex is the number of the run in visual order, in the 1.620 + * range <code>[0..CountRuns-1]</code>. 1.621 + * 1.622 + * @param aLogicalStart is the first logical character index in the text. 1.623 + * The pointer may be <code>nullptr</code> if this index is not needed. 1.624 + * 1.625 + * @param aLength is the number of characters (at least one) in the run. 1.626 + * The pointer may be <code>nullptr</code> if this is not needed. 1.627 + * 1.628 + * @param aDirection will receive the directionality of the run, 1.629 + * <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>, 1.630 + * never <code>NSBIDI_MIXED</code>. 1.631 + * 1.632 + * @see CountRuns<p> 1.633 + * 1.634 + * Example: 1.635 + * @code 1.636 + * int32_t i, count, logicalStart, visualIndex=0, length; 1.637 + * nsBidiDirection dir; 1.638 + * pBidi->CountRuns(&count); 1.639 + * for(i=0; i<count; ++i) { 1.640 + * pBidi->GetVisualRun(i, &logicalStart, &length, &dir); 1.641 + * if(NSBIDI_LTR==dir) { 1.642 + * do { // LTR 1.643 + * show_char(text[logicalStart++], visualIndex++); 1.644 + * } while(--length>0); 1.645 + * } else { 1.646 + * logicalStart+=length; // logicalLimit 1.647 + * do { // RTL 1.648 + * show_char(text[--logicalStart], visualIndex++); 1.649 + * } while(--length>0); 1.650 + * } 1.651 + * } 1.652 + * @endcode 1.653 + * 1.654 + * Note that in right-to-left runs, code like this places 1.655 + * modifier letters before base characters and second surrogates 1.656 + * before first ones. 1.657 + */ 1.658 + nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection); 1.659 + 1.660 +#ifdef FULL_BIDI_ENGINE 1.661 + /** 1.662 + * Get the visual position from a logical text position. 1.663 + * If such a mapping is used many times on the same 1.664 + * <code>nsBidi</code> object, then calling 1.665 + * <code>GetLogicalMap</code> is more efficient.<p> 1.666 + * 1.667 + * Note that in right-to-left runs, this mapping places 1.668 + * modifier letters before base characters and second surrogates 1.669 + * before first ones. 1.670 + * 1.671 + * @param aLogicalIndex is the index of a character in the text. 1.672 + * 1.673 + * @param aVisualIndex will receive the visual position of this character. 1.674 + * 1.675 + * @see GetLogicalMap 1.676 + * @see GetLogicalIndex 1.677 + */ 1.678 + nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex); 1.679 + 1.680 + /** 1.681 + * Get the logical text position from a visual position. 1.682 + * If such a mapping is used many times on the same 1.683 + * <code>nsBidi</code> object, then calling 1.684 + * <code>GetVisualMap</code> is more efficient.<p> 1.685 + * 1.686 + * This is the inverse function to <code>GetVisualIndex</code>. 1.687 + * 1.688 + * @param aVisualIndex is the visual position of a character. 1.689 + * 1.690 + * @param aLogicalIndex will receive the index of this character in the text. 1.691 + * 1.692 + * @see GetVisualMap 1.693 + * @see GetVisualIndex 1.694 + */ 1.695 + nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex); 1.696 + 1.697 + /** 1.698 + * Get a logical-to-visual index map (array) for the characters in the nsBidi 1.699 + * (paragraph or line) object. 1.700 + * 1.701 + * @param aIndexMap is a pointer to an array of <code>GetLength</code> 1.702 + * indexes which will reflect the reordering of the characters. 1.703 + * The array does not need to be initialized.<p> 1.704 + * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p> 1.705 + * 1.706 + * @see GetVisualMap 1.707 + * @see GetVisualIndex 1.708 + */ 1.709 + nsresult GetLogicalMap(int32_t *aIndexMap); 1.710 + 1.711 + /** 1.712 + * Get a visual-to-logical index map (array) for the characters in the nsBidi 1.713 + * (paragraph or line) object. 1.714 + * 1.715 + * @param aIndexMap is a pointer to an array of <code>GetLength</code> 1.716 + * indexes which will reflect the reordering of the characters. 1.717 + * The array does not need to be initialized.<p> 1.718 + * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p> 1.719 + * 1.720 + * @see GetLogicalMap 1.721 + * @see GetLogicalIndex 1.722 + */ 1.723 + nsresult GetVisualMap(int32_t *aIndexMap); 1.724 + 1.725 + /** 1.726 + * This is a convenience function that does not use a nsBidi object. 1.727 + * It is intended to be used for when an application has determined the levels 1.728 + * of objects (character sequences) and just needs to have them reordered (L2). 1.729 + * This is equivalent to using <code>GetLogicalMap</code> on a 1.730 + * <code>nsBidi</code> object. 1.731 + * 1.732 + * @param aLevels is an array with <code>aLength</code> levels that have been determined by 1.733 + * the application. 1.734 + * 1.735 + * @param aLength is the number of levels in the array, or, semantically, 1.736 + * the number of objects to be reordered. 1.737 + * It must be <code>aLength>0</code>. 1.738 + * 1.739 + * @param aIndexMap is a pointer to an array of <code>aLength</code> 1.740 + * indexes which will reflect the reordering of the characters. 1.741 + * The array does not need to be initialized.<p> 1.742 + * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>. 1.743 + */ 1.744 + static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); 1.745 +#endif // FULL_BIDI_ENGINE 1.746 + /** 1.747 + * This is a convenience function that does not use a nsBidi object. 1.748 + * It is intended to be used for when an application has determined the levels 1.749 + * of objects (character sequences) and just needs to have them reordered (L2). 1.750 + * This is equivalent to using <code>GetVisualMap</code> on a 1.751 + * <code>nsBidi</code> object. 1.752 + * 1.753 + * @param aLevels is an array with <code>aLength</code> levels that have been determined by 1.754 + * the application. 1.755 + * 1.756 + * @param aLength is the number of levels in the array, or, semantically, 1.757 + * the number of objects to be reordered. 1.758 + * It must be <code>aLength>0</code>. 1.759 + * 1.760 + * @param aIndexMap is a pointer to an array of <code>aLength</code> 1.761 + * indexes which will reflect the reordering of the characters. 1.762 + * The array does not need to be initialized.<p> 1.763 + * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>. 1.764 + */ 1.765 + static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); 1.766 + 1.767 +#ifdef FULL_BIDI_ENGINE 1.768 + /** 1.769 + * Invert an index map. 1.770 + * The one-to-one index mapping of the first map is inverted and written to 1.771 + * the second one. 1.772 + * 1.773 + * @param aSrcMap is an array with <code>aLength</code> indexes 1.774 + * which define the original mapping. 1.775 + * 1.776 + * @param aDestMap is an array with <code>aLength</code> indexes 1.777 + * which will be filled with the inverse mapping. 1.778 + * 1.779 + * @param aLength is the length of each array. 1.780 + */ 1.781 + nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength); 1.782 +#endif // FULL_BIDI_ENGINE 1.783 + /** 1.784 + * Reverse a Right-To-Left run of Unicode text. 1.785 + * 1.786 + * This function preserves the integrity of characters with multiple 1.787 + * code units and (optionally) modifier letters. 1.788 + * Characters can be replaced by mirror-image characters 1.789 + * in the destination buffer. Note that "real" mirroring has 1.790 + * to be done in a rendering engine by glyph selection 1.791 + * and that for many "mirrored" characters there are no 1.792 + * Unicode characters as mirror-image equivalents. 1.793 + * There are also options to insert or remove Bidi control 1.794 + * characters; see the description of the <code>aDestSize</code> 1.795 + * and <code>aOptions</code> parameters and of the option bit flags. 1.796 + * 1.797 + * Since no Bidi controls are inserted here, this function will never 1.798 + * write more than <code>aSrcLength</code> characters to <code>aDest</code>. 1.799 + * 1.800 + * @param aSrc A pointer to the RTL run text. 1.801 + * 1.802 + * @param aSrcLength The length of the RTL run. 1.803 + * If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option 1.804 + * is set, then the destination length may be less than 1.805 + * <code>aSrcLength</code>. 1.806 + * If this option is not set, then the destination length 1.807 + * will be exactly <code>aSrcLength</code>. 1.808 + * 1.809 + * @param aDest A pointer to where the reordered text is to be copied. 1.810 + * <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code> 1.811 + * must not overlap. 1.812 + * 1.813 + * @param aOptions A bit set of options for the reordering that control 1.814 + * how the reordered text is written. 1.815 + * 1.816 + * @param aDestSize will receive the number of characters that were written to <code>aDest</code>. 1.817 + */ 1.818 + nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize); 1.819 + 1.820 +protected: 1.821 + friend class nsBidiPresUtils; 1.822 + 1.823 + /** length of the current text */ 1.824 + int32_t mLength; 1.825 + 1.826 + /** memory sizes in bytes */ 1.827 + size_t mDirPropsSize, mLevelsSize, mRunsSize; 1.828 + 1.829 + /** allocated memory */ 1.830 + DirProp* mDirPropsMemory; 1.831 + nsBidiLevel* mLevelsMemory; 1.832 + Run* mRunsMemory; 1.833 + 1.834 + /** indicators for whether memory may be allocated after construction */ 1.835 + bool mMayAllocateText, mMayAllocateRuns; 1.836 + 1.837 + const DirProp* mDirProps; 1.838 + nsBidiLevel* mLevels; 1.839 + 1.840 + /** the paragraph level */ 1.841 + nsBidiLevel mParaLevel; 1.842 + 1.843 + /** flags is a bit set for which directional properties are in the text */ 1.844 + Flags mFlags; 1.845 + 1.846 + /** the overall paragraph or line directionality - see nsBidiDirection */ 1.847 + nsBidiDirection mDirection; 1.848 + 1.849 + /** characters after trailingWSStart are WS and are */ 1.850 + /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 1.851 + int32_t mTrailingWSStart; 1.852 + 1.853 + /** fields for line reordering */ 1.854 + int32_t mRunCount; /* ==-1: runs not set up yet */ 1.855 + Run* mRuns; 1.856 + 1.857 + /** for non-mixed text, we only need a tiny array of runs (no malloc()) */ 1.858 + Run mSimpleRuns[1]; 1.859 + 1.860 +private: 1.861 + 1.862 + void Init(); 1.863 + 1.864 + bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded); 1.865 + 1.866 + void Free(); 1.867 + 1.868 + void GetDirProps(const char16_t *aText); 1.869 + 1.870 + nsBidiDirection ResolveExplicitLevels(); 1.871 + 1.872 + nsresult CheckExplicitLevels(nsBidiDirection *aDirection); 1.873 + 1.874 + nsBidiDirection DirectionFromFlags(Flags aFlags); 1.875 + 1.876 + void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR); 1.877 + 1.878 + void AdjustWSLevels(); 1.879 + 1.880 + void SetTrailingWSStart(); 1.881 + 1.882 + bool GetRuns(); 1.883 + 1.884 + void GetSingleRun(nsBidiLevel aLevel); 1.885 + 1.886 + void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel); 1.887 + 1.888 + static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel); 1.889 + 1.890 + int32_t doWriteReverse(const char16_t *src, int32_t srcLength, 1.891 + char16_t *dest, uint16_t options); 1.892 + 1.893 +}; 1.894 + 1.895 +#endif // _nsBidi_h_