Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
michael@0 | 2 | * |
michael@0 | 3 | * This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #ifndef nsBidi_h__ |
michael@0 | 8 | #define nsBidi_h__ |
michael@0 | 9 | |
michael@0 | 10 | #include "nsBidiUtils.h" |
michael@0 | 11 | |
michael@0 | 12 | // Bidi reordering engine from ICU |
michael@0 | 13 | /* |
michael@0 | 14 | * javadoc-style comments are intended to be transformed into HTML |
michael@0 | 15 | * using DOC++ - see |
michael@0 | 16 | * http://www.zib.de/Visual/software/doc++/index.html . |
michael@0 | 17 | * |
michael@0 | 18 | * The HTML documentation is created with |
michael@0 | 19 | * doc++ -H nsIBidi.h |
michael@0 | 20 | */ |
michael@0 | 21 | |
michael@0 | 22 | /** |
michael@0 | 23 | * @mainpage BIDI algorithm for Mozilla (from ICU) |
michael@0 | 24 | * |
michael@0 | 25 | * <h2>BIDI algorithm for Mozilla</h2> |
michael@0 | 26 | * |
michael@0 | 27 | * This is an implementation of the Unicode Bidirectional algorithm. |
michael@0 | 28 | * The algorithm is defined in the |
michael@0 | 29 | * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, |
michael@0 | 30 | * version 5, also described in The Unicode Standard, Version 3.0 .<p> |
michael@0 | 31 | * |
michael@0 | 32 | * <h3>General remarks about the API:</h3> |
michael@0 | 33 | * |
michael@0 | 34 | * The <quote>limit</quote> of a sequence of characters is the position just after their |
michael@0 | 35 | * last character, i.e., one more than that position.<p> |
michael@0 | 36 | * |
michael@0 | 37 | * Some of the API functions provide access to <quote>runs</quote>. |
michael@0 | 38 | * Such a <quote>run</quote> is defined as a sequence of characters |
michael@0 | 39 | * that are at the same embedding level |
michael@0 | 40 | * after performing the BIDI algorithm.<p> |
michael@0 | 41 | * |
michael@0 | 42 | * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu |
michael@0 | 43 | * @version 1.0 |
michael@0 | 44 | */ |
michael@0 | 45 | |
michael@0 | 46 | /** |
michael@0 | 47 | * nsBidiLevel is the type of the level values in this |
michael@0 | 48 | * Bidi implementation. |
michael@0 | 49 | * It holds an embedding level and indicates the visual direction |
michael@0 | 50 | * by its bit 0 (even/odd value).<p> |
michael@0 | 51 | * |
michael@0 | 52 | * It can also hold non-level values for the |
michael@0 | 53 | * <code>aParaLevel</code> and <code>aEmbeddingLevels</code> |
michael@0 | 54 | * arguments of <code>SetPara</code>; there: |
michael@0 | 55 | * <ul> |
michael@0 | 56 | * <li>bit 7 of an <code>aEmbeddingLevels[]</code> |
michael@0 | 57 | * value indicates whether the using application is |
michael@0 | 58 | * specifying the level of a character to <i>override</i> whatever the |
michael@0 | 59 | * Bidi implementation would resolve it to.</li> |
michael@0 | 60 | * <li><code>aParaLevel</code> can be set to the |
michael@0 | 61 | * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code> |
michael@0 | 62 | * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul> |
michael@0 | 63 | * |
michael@0 | 64 | * @see nsIBidi::SetPara |
michael@0 | 65 | * |
michael@0 | 66 | * <p>The related constants are not real, valid level values. |
michael@0 | 67 | * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify |
michael@0 | 68 | * a default for the paragraph level for |
michael@0 | 69 | * when the <code>SetPara</code> function |
michael@0 | 70 | * shall determine it but there is no |
michael@0 | 71 | * strongly typed character in the input.<p> |
michael@0 | 72 | * |
michael@0 | 73 | * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even |
michael@0 | 74 | * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd, |
michael@0 | 75 | * just like with normal LTR and RTL level values - |
michael@0 | 76 | * these special values are designed that way. Also, the implementation |
michael@0 | 77 | * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd. |
michael@0 | 78 | * |
michael@0 | 79 | * @see NSBIDI_DEFAULT_LTR |
michael@0 | 80 | * @see NSBIDI_DEFAULT_RTL |
michael@0 | 81 | * @see NSBIDI_LEVEL_OVERRIDE |
michael@0 | 82 | * @see NSBIDI_MAX_EXPLICIT_LEVEL |
michael@0 | 83 | */ |
michael@0 | 84 | typedef uint8_t nsBidiLevel; |
michael@0 | 85 | |
michael@0 | 86 | /** Paragraph level setting. |
michael@0 | 87 | * If there is no strong character, then set the paragraph level to 0 (left-to-right). |
michael@0 | 88 | */ |
michael@0 | 89 | #define NSBIDI_DEFAULT_LTR 0xfe |
michael@0 | 90 | |
michael@0 | 91 | /** Paragraph level setting. |
michael@0 | 92 | * If there is no strong character, then set the paragraph level to 1 (right-to-left). |
michael@0 | 93 | */ |
michael@0 | 94 | #define NSBIDI_DEFAULT_RTL 0xff |
michael@0 | 95 | |
michael@0 | 96 | /** |
michael@0 | 97 | * Maximum explicit embedding level. |
michael@0 | 98 | * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>). |
michael@0 | 99 | * |
michael@0 | 100 | */ |
michael@0 | 101 | #define NSBIDI_MAX_EXPLICIT_LEVEL 61 |
michael@0 | 102 | |
michael@0 | 103 | /** Bit flag for level input. |
michael@0 | 104 | * Overrides directional properties. |
michael@0 | 105 | */ |
michael@0 | 106 | #define NSBIDI_LEVEL_OVERRIDE 0x80 |
michael@0 | 107 | |
michael@0 | 108 | /** |
michael@0 | 109 | * <code>nsBidiDirection</code> values indicate the text direction. |
michael@0 | 110 | */ |
michael@0 | 111 | enum nsBidiDirection { |
michael@0 | 112 | /** All left-to-right text This is a 0 value. */ |
michael@0 | 113 | NSBIDI_LTR, |
michael@0 | 114 | /** All right-to-left text This is a 1 value. */ |
michael@0 | 115 | NSBIDI_RTL, |
michael@0 | 116 | /** Mixed-directional text. */ |
michael@0 | 117 | NSBIDI_MIXED |
michael@0 | 118 | }; |
michael@0 | 119 | |
michael@0 | 120 | typedef enum nsBidiDirection nsBidiDirection; |
michael@0 | 121 | |
michael@0 | 122 | /* miscellaneous definitions ------------------------------------------------ */ |
michael@0 | 123 | /** option flags for WriteReverse() */ |
michael@0 | 124 | /** |
michael@0 | 125 | * option bit for WriteReverse(): |
michael@0 | 126 | * keep combining characters after their base characters in RTL runs |
michael@0 | 127 | * |
michael@0 | 128 | * @see WriteReverse |
michael@0 | 129 | */ |
michael@0 | 130 | #define NSBIDI_KEEP_BASE_COMBINING 1 |
michael@0 | 131 | |
michael@0 | 132 | /** |
michael@0 | 133 | * option bit for WriteReverse(): |
michael@0 | 134 | * replace characters with the "mirrored" property in RTL runs |
michael@0 | 135 | * by their mirror-image mappings |
michael@0 | 136 | * |
michael@0 | 137 | * @see WriteReverse |
michael@0 | 138 | */ |
michael@0 | 139 | #define NSBIDI_DO_MIRRORING 2 |
michael@0 | 140 | |
michael@0 | 141 | /** |
michael@0 | 142 | * option bit for WriteReverse(): |
michael@0 | 143 | * remove Bidi control characters |
michael@0 | 144 | * |
michael@0 | 145 | * @see WriteReverse |
michael@0 | 146 | */ |
michael@0 | 147 | #define NSBIDI_REMOVE_BIDI_CONTROLS 8 |
michael@0 | 148 | |
michael@0 | 149 | /* helper macros for each allocated array member */ |
michael@0 | 150 | #define GETDIRPROPSMEMORY(length) \ |
michael@0 | 151 | GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ |
michael@0 | 152 | mMayAllocateText, (length)) |
michael@0 | 153 | |
michael@0 | 154 | #define GETLEVELSMEMORY(length) \ |
michael@0 | 155 | GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ |
michael@0 | 156 | mMayAllocateText, (length)) |
michael@0 | 157 | |
michael@0 | 158 | #define GETRUNSMEMORY(length) \ |
michael@0 | 159 | GetMemory((void **)&mRunsMemory, &mRunsSize, \ |
michael@0 | 160 | mMayAllocateRuns, (length)*sizeof(Run)) |
michael@0 | 161 | |
michael@0 | 162 | /* additional macros used by constructor - always allow allocation */ |
michael@0 | 163 | #define GETINITIALDIRPROPSMEMORY(length) \ |
michael@0 | 164 | GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ |
michael@0 | 165 | true, (length)) |
michael@0 | 166 | |
michael@0 | 167 | #define GETINITIALLEVELSMEMORY(length) \ |
michael@0 | 168 | GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ |
michael@0 | 169 | true, (length)) |
michael@0 | 170 | |
michael@0 | 171 | #define GETINITIALRUNSMEMORY(length) \ |
michael@0 | 172 | GetMemory((void **)&mRunsMemory, &mRunsSize, \ |
michael@0 | 173 | true, (length)*sizeof(Run)) |
michael@0 | 174 | |
michael@0 | 175 | /* |
michael@0 | 176 | * Sometimes, bit values are more appropriate |
michael@0 | 177 | * to deal with directionality properties. |
michael@0 | 178 | * Abbreviations in these macro names refer to names |
michael@0 | 179 | * used in the Bidi algorithm. |
michael@0 | 180 | */ |
michael@0 | 181 | typedef uint8_t DirProp; |
michael@0 | 182 | |
michael@0 | 183 | #define DIRPROP_FLAG(dir) (1UL<<(dir)) |
michael@0 | 184 | |
michael@0 | 185 | /* special flag for multiple runs from explicit embedding codes */ |
michael@0 | 186 | #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) |
michael@0 | 187 | |
michael@0 | 188 | /* are there any characters that are LTR or RTL? */ |
michael@0 | 189 | #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) |
michael@0 | 190 | #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) |
michael@0 | 191 | |
michael@0 | 192 | /* explicit embedding codes */ |
michael@0 | 193 | #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) |
michael@0 | 194 | #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) |
michael@0 | 195 | #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) |
michael@0 | 196 | |
michael@0 | 197 | #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) |
michael@0 | 198 | #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) |
michael@0 | 199 | |
michael@0 | 200 | /* paragraph and segment separators */ |
michael@0 | 201 | #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) |
michael@0 | 202 | |
michael@0 | 203 | /* all types that are counted as White Space or Neutral in some steps */ |
michael@0 | 204 | #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) |
michael@0 | 205 | #define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS) |
michael@0 | 206 | |
michael@0 | 207 | /* all types that are included in a sequence of European Terminators for (W5) */ |
michael@0 | 208 | #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) |
michael@0 | 209 | |
michael@0 | 210 | /* types that are neutrals or could becomes neutrals in (Wn) */ |
michael@0 | 211 | #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) |
michael@0 | 212 | |
michael@0 | 213 | /* |
michael@0 | 214 | * These types may be changed to "e", |
michael@0 | 215 | * the embedding type (L or R) of the run, |
michael@0 | 216 | * in the Bidi algorithm (N2) |
michael@0 | 217 | */ |
michael@0 | 218 | #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) |
michael@0 | 219 | |
michael@0 | 220 | /* the dirProp's L and R are defined to 0 and 1 values in nsCharType */ |
michael@0 | 221 | #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) |
michael@0 | 222 | |
michael@0 | 223 | #define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe) |
michael@0 | 224 | |
michael@0 | 225 | /* handle surrogate pairs --------------------------------------------------- */ |
michael@0 | 226 | |
michael@0 | 227 | #define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800) |
michael@0 | 228 | #define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00) |
michael@0 | 229 | |
michael@0 | 230 | /* get the UTF-32 value directly from the surrogate pseudo-characters */ |
michael@0 | 231 | #define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
michael@0 | 232 | #define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET) |
michael@0 | 233 | |
michael@0 | 234 | |
michael@0 | 235 | #define UTF_ERROR_VALUE 0xffff |
michael@0 | 236 | /* definitions with forward iteration --------------------------------------- */ |
michael@0 | 237 | |
michael@0 | 238 | /* |
michael@0 | 239 | * all the macros that go forward assume that |
michael@0 | 240 | * the initial offset is 0<=i<length; |
michael@0 | 241 | * they update the offset |
michael@0 | 242 | */ |
michael@0 | 243 | |
michael@0 | 244 | /* fast versions, no error-checking */ |
michael@0 | 245 | |
michael@0 | 246 | #define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \ |
michael@0 | 247 | if((uint32_t)(c)<=0xffff) { \ |
michael@0 | 248 | (s)[(i)++]=(char16_t)(c); \ |
michael@0 | 249 | } else { \ |
michael@0 | 250 | (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \ |
michael@0 | 251 | (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ |
michael@0 | 252 | } \ |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | /* safe versions with error-checking and optional regularity-checking */ |
michael@0 | 256 | |
michael@0 | 257 | #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ |
michael@0 | 258 | if((PRUInt32)(c)<=0xffff) { \ |
michael@0 | 259 | (s)[(i)++]=(char16_t)(c); \ |
michael@0 | 260 | } else if((PRUInt32)(c)<=0x10ffff) { \ |
michael@0 | 261 | if((i)+1<(length)) { \ |
michael@0 | 262 | (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \ |
michael@0 | 263 | (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ |
michael@0 | 264 | } else /* not enough space */ { \ |
michael@0 | 265 | (s)[(i)++]=UTF_ERROR_VALUE; \ |
michael@0 | 266 | } \ |
michael@0 | 267 | } else /* c>0x10ffff, write error value */ { \ |
michael@0 | 268 | (s)[(i)++]=UTF_ERROR_VALUE; \ |
michael@0 | 269 | } \ |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | /* definitions with backward iteration -------------------------------------- */ |
michael@0 | 273 | |
michael@0 | 274 | /* |
michael@0 | 275 | * all the macros that go backward assume that |
michael@0 | 276 | * the valid buffer range starts at offset 0 |
michael@0 | 277 | * and that the initial offset is 0<i<=length; |
michael@0 | 278 | * they update the offset |
michael@0 | 279 | */ |
michael@0 | 280 | |
michael@0 | 281 | /* fast versions, no error-checking */ |
michael@0 | 282 | |
michael@0 | 283 | /* |
michael@0 | 284 | * Get a single code point from an offset that points behind the last |
michael@0 | 285 | * of the code units that belong to that code point. |
michael@0 | 286 | * Assume 0<=i<length. |
michael@0 | 287 | */ |
michael@0 | 288 | #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ |
michael@0 | 289 | (c)=(s)[--(i)]; \ |
michael@0 | 290 | if(IS_SECOND_SURROGATE(c)) { \ |
michael@0 | 291 | (c)=GET_UTF_32((s)[--(i)], (c)); \ |
michael@0 | 292 | } \ |
michael@0 | 293 | } |
michael@0 | 294 | |
michael@0 | 295 | #define UTF16_BACK_1_UNSAFE(s, i) { \ |
michael@0 | 296 | if(IS_SECOND_SURROGATE((s)[--(i)])) { \ |
michael@0 | 297 | --(i); \ |
michael@0 | 298 | } \ |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | #define UTF16_BACK_N_UNSAFE(s, i, n) { \ |
michael@0 | 302 | int32_t __N=(n); \ |
michael@0 | 303 | while(__N>0) { \ |
michael@0 | 304 | UTF16_BACK_1_UNSAFE(s, i); \ |
michael@0 | 305 | --__N; \ |
michael@0 | 306 | } \ |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | /* safe versions with error-checking and optional regularity-checking */ |
michael@0 | 310 | |
michael@0 | 311 | #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ |
michael@0 | 312 | (c)=(s)[--(i)]; \ |
michael@0 | 313 | if(IS_SECOND_SURROGATE(c)) { \ |
michael@0 | 314 | char16_t __c2; \ |
michael@0 | 315 | if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ |
michael@0 | 316 | --(i); \ |
michael@0 | 317 | (c)=GET_UTF_32(__c2, (c)); \ |
michael@0 | 318 | /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \ |
michael@0 | 319 | } else if(strict) {\ |
michael@0 | 320 | /* unmatched second surrogate */ \ |
michael@0 | 321 | (c)=UTF_ERROR_VALUE; \ |
michael@0 | 322 | } \ |
michael@0 | 323 | } else if(strict && IS_FIRST_SURROGATE(c)) { \ |
michael@0 | 324 | /* unmatched first surrogate */ \ |
michael@0 | 325 | (c)=UTF_ERROR_VALUE; \ |
michael@0 | 326 | /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \ |
michael@0 | 327 | } \ |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | #define UTF16_BACK_1_SAFE(s, start, i) { \ |
michael@0 | 331 | if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \ |
michael@0 | 332 | --(i); \ |
michael@0 | 333 | } \ |
michael@0 | 334 | } |
michael@0 | 335 | |
michael@0 | 336 | #define UTF16_BACK_N_SAFE(s, start, i, n) { \ |
michael@0 | 337 | int32_t __N=(n); \ |
michael@0 | 338 | while(__N>0 && (i)>(start)) { \ |
michael@0 | 339 | UTF16_BACK_1_SAFE(s, start, i); \ |
michael@0 | 340 | --__N; \ |
michael@0 | 341 | } \ |
michael@0 | 342 | } |
michael@0 | 343 | |
michael@0 | 344 | #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) |
michael@0 | 345 | #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) |
michael@0 | 346 | #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) |
michael@0 | 347 | #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) |
michael@0 | 348 | #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) |
michael@0 | 349 | #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) |
michael@0 | 350 | #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) |
michael@0 | 351 | #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) |
michael@0 | 352 | |
michael@0 | 353 | #define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, false) |
michael@0 | 354 | #define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i) |
michael@0 | 355 | #define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n) |
michael@0 | 356 | #define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c) |
michael@0 | 357 | |
michael@0 | 358 | /* Run structure for reordering --------------------------------------------- */ |
michael@0 | 359 | |
michael@0 | 360 | typedef struct Run { |
michael@0 | 361 | int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ |
michael@0 | 362 | visualLimit; /* last visual position of the run +1 */ |
michael@0 | 363 | } Run; |
michael@0 | 364 | |
michael@0 | 365 | /* in a Run, logicalStart will get this bit set if the run level is odd */ |
michael@0 | 366 | #define INDEX_ODD_BIT (1UL<<31) |
michael@0 | 367 | |
michael@0 | 368 | #define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31)) |
michael@0 | 369 | #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)level<<31)) |
michael@0 | 370 | #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) |
michael@0 | 371 | |
michael@0 | 372 | #define GET_INDEX(x) (x&~INDEX_ODD_BIT) |
michael@0 | 373 | #define GET_ODD_BIT(x) ((uint32_t)x>>31) |
michael@0 | 374 | #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0) |
michael@0 | 375 | #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0) |
michael@0 | 376 | |
michael@0 | 377 | typedef uint32_t Flags; |
michael@0 | 378 | |
michael@0 | 379 | /** |
michael@0 | 380 | * This class holds information about a paragraph of text |
michael@0 | 381 | * with Bidi-algorithm-related details, or about one line of |
michael@0 | 382 | * such a paragraph.<p> |
michael@0 | 383 | * Reordering can be done on a line, or on a paragraph which is |
michael@0 | 384 | * then interpreted as one single line.<p> |
michael@0 | 385 | * |
michael@0 | 386 | * On construction, the class is initially empty. It is assigned |
michael@0 | 387 | * the Bidi properties of a paragraph by <code>SetPara</code> |
michael@0 | 388 | * or the Bidi properties of a line of a paragraph by |
michael@0 | 389 | * <code>SetLine</code>.<p> |
michael@0 | 390 | * A Bidi class can be reused for as long as it is not deallocated |
michael@0 | 391 | * by calling its destructor.<p> |
michael@0 | 392 | * <code>SetPara</code> will allocate additional memory for |
michael@0 | 393 | * internal structures as necessary. |
michael@0 | 394 | */ |
michael@0 | 395 | class nsBidi |
michael@0 | 396 | { |
michael@0 | 397 | public: |
michael@0 | 398 | /** @brief Default constructor. |
michael@0 | 399 | * |
michael@0 | 400 | * The nsBidi object is initially empty. It is assigned |
michael@0 | 401 | * the Bidi properties of a paragraph by <code>SetPara()</code> |
michael@0 | 402 | * or the Bidi properties of a line of a paragraph by |
michael@0 | 403 | * <code>GetLine()</code>.<p> |
michael@0 | 404 | * This object can be reused for as long as it is not destroyed.<p> |
michael@0 | 405 | * <code>SetPara()</code> will allocate additional memory for |
michael@0 | 406 | * internal structures as necessary. |
michael@0 | 407 | * |
michael@0 | 408 | */ |
michael@0 | 409 | nsBidi(); |
michael@0 | 410 | |
michael@0 | 411 | /** @brief Destructor. */ |
michael@0 | 412 | virtual ~nsBidi(); |
michael@0 | 413 | |
michael@0 | 414 | |
michael@0 | 415 | /** |
michael@0 | 416 | * Perform the Unicode Bidi algorithm. It is defined in the |
michael@0 | 417 | * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, |
michael@0 | 418 | * version 5, |
michael@0 | 419 | * also described in The Unicode Standard, Version 3.0 .<p> |
michael@0 | 420 | * |
michael@0 | 421 | * This function takes a single plain text paragraph with or without |
michael@0 | 422 | * externally specified embedding levels from <quote>styled</quote> text |
michael@0 | 423 | * and computes the left-right-directionality of each character.<p> |
michael@0 | 424 | * |
michael@0 | 425 | * If the entire paragraph consists of text of only one direction, then |
michael@0 | 426 | * the function may not perform all the steps described by the algorithm, |
michael@0 | 427 | * i.e., some levels may not be the same as if all steps were performed. |
michael@0 | 428 | * This is not relevant for unidirectional text.<br> |
michael@0 | 429 | * For example, in pure LTR text with numbers the numbers would get |
michael@0 | 430 | * a resolved level of 2 higher than the surrounding text according to |
michael@0 | 431 | * the algorithm. This implementation may set all resolved levels to |
michael@0 | 432 | * the same value in such a case.<p> |
michael@0 | 433 | * |
michael@0 | 434 | * The text must be externally split into separate paragraphs (rule P1). |
michael@0 | 435 | * Paragraph separators (B) should appear at most at the very end. |
michael@0 | 436 | * |
michael@0 | 437 | * @param aText is a pointer to the single-paragraph text that the |
michael@0 | 438 | * Bidi algorithm will be performed on |
michael@0 | 439 | * (step (P1) of the algorithm is performed externally). |
michael@0 | 440 | * <strong>The text must be (at least) <code>aLength</code> long.</strong> |
michael@0 | 441 | * |
michael@0 | 442 | * @param aLength is the length of the text; if <code>aLength==-1</code> then |
michael@0 | 443 | * the text must be zero-terminated. |
michael@0 | 444 | * |
michael@0 | 445 | * @param aParaLevel specifies the default level for the paragraph; |
michael@0 | 446 | * it is typically 0 (LTR) or 1 (RTL). |
michael@0 | 447 | * If the function shall determine the paragraph level from the text, |
michael@0 | 448 | * then <code>aParaLevel</code> can be set to |
michael@0 | 449 | * either <code>NSBIDI_DEFAULT_LTR</code> |
michael@0 | 450 | * or <code>NSBIDI_DEFAULT_RTL</code>; |
michael@0 | 451 | * if there is no strongly typed character, then |
michael@0 | 452 | * the desired default is used (0 for LTR or 1 for RTL). |
michael@0 | 453 | * Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid, |
michael@0 | 454 | * with odd levels indicating RTL. |
michael@0 | 455 | * |
michael@0 | 456 | * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels, |
michael@0 | 457 | * ignoring characters like LRE and PDF in the text. |
michael@0 | 458 | * A level overrides the directional property of its corresponding |
michael@0 | 459 | * (same index) character if the level has the |
michael@0 | 460 | * <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p> |
michael@0 | 461 | * Except for that bit, it must be |
michael@0 | 462 | * <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p> |
michael@0 | 463 | * <strong>Caution: </strong>A copy of this pointer, not of the levels, |
michael@0 | 464 | * will be stored in the <code>nsBidi</code> object; |
michael@0 | 465 | * the <code>aEmbeddingLevels</code> array must not be |
michael@0 | 466 | * deallocated before the <code>nsBidi</code> object is destroyed or reused, |
michael@0 | 467 | * and the <code>aEmbeddingLevels</code> |
michael@0 | 468 | * should not be modified to avoid unexpected results on subsequent Bidi operations. |
michael@0 | 469 | * However, the <code>SetPara</code> and |
michael@0 | 470 | * <code>SetLine</code> functions may modify some or all of the levels.<p> |
michael@0 | 471 | * After the <code>nsBidi</code> object is reused or destroyed, the caller |
michael@0 | 472 | * must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p> |
michael@0 | 473 | * <strong>The <code>aEmbeddingLevels</code> array must be |
michael@0 | 474 | * at least <code>aLength</code> long.</strong> |
michael@0 | 475 | */ |
michael@0 | 476 | nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels); |
michael@0 | 477 | |
michael@0 | 478 | /** |
michael@0 | 479 | * Get the directionality of the text. |
michael@0 | 480 | * |
michael@0 | 481 | * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text |
michael@0 | 482 | * represented by this object is unidirectional, |
michael@0 | 483 | * and which direction, or if it is mixed-directional. |
michael@0 | 484 | * |
michael@0 | 485 | * @see nsBidiDirection |
michael@0 | 486 | */ |
michael@0 | 487 | nsresult GetDirection(nsBidiDirection* aDirection); |
michael@0 | 488 | |
michael@0 | 489 | /** |
michael@0 | 490 | * Get the paragraph level of the text. |
michael@0 | 491 | * |
michael@0 | 492 | * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level |
michael@0 | 493 | * |
michael@0 | 494 | * @see nsBidiLevel |
michael@0 | 495 | */ |
michael@0 | 496 | nsresult GetParaLevel(nsBidiLevel* aParaLevel); |
michael@0 | 497 | |
michael@0 | 498 | #ifdef FULL_BIDI_ENGINE |
michael@0 | 499 | /** |
michael@0 | 500 | * <code>SetLine</code> sets an <code>nsBidi</code> to |
michael@0 | 501 | * contain the reordering information, especially the resolved levels, |
michael@0 | 502 | * for all the characters in a line of text. This line of text is |
michael@0 | 503 | * specified by referring to an <code>nsBidi</code> object representing |
michael@0 | 504 | * this information for a paragraph of text, and by specifying |
michael@0 | 505 | * a range of indexes in this paragraph.<p> |
michael@0 | 506 | * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p> |
michael@0 | 507 | * |
michael@0 | 508 | * This is used after calling <code>SetPara</code> |
michael@0 | 509 | * for a paragraph, and after line-breaking on that paragraph. |
michael@0 | 510 | * It is not necessary if the paragraph is treated as a single line.<p> |
michael@0 | 511 | * |
michael@0 | 512 | * After line-breaking, rules (L1) and (L2) for the treatment of |
michael@0 | 513 | * trailing WS and for reordering are performed on |
michael@0 | 514 | * an <code>nsBidi</code> object that represents a line.<p> |
michael@0 | 515 | * |
michael@0 | 516 | * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with |
michael@0 | 517 | * <code>aParaBidi</code>. |
michael@0 | 518 | * You must destroy or reuse this object before <code>aParaBidi</code>. |
michael@0 | 519 | * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line |
michael@0 | 520 | * before the object for its parent paragraph. |
michael@0 | 521 | * |
michael@0 | 522 | * @param aParaBidi is the parent paragraph object. |
michael@0 | 523 | * |
michael@0 | 524 | * @param aStart is the line's first index into the paragraph text. |
michael@0 | 525 | * |
michael@0 | 526 | * @param aLimit is just behind the line's last index into the paragraph text |
michael@0 | 527 | * (its last index +1).<br> |
michael@0 | 528 | * It must be <code>0<=aStart<=aLimit<=</code>paragraph length. |
michael@0 | 529 | * |
michael@0 | 530 | * @see SetPara |
michael@0 | 531 | */ |
michael@0 | 532 | nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit); |
michael@0 | 533 | |
michael@0 | 534 | /** |
michael@0 | 535 | * Get the length of the text. |
michael@0 | 536 | * |
michael@0 | 537 | * @param aLength receives the length of the text that the nsBidi object was created for. |
michael@0 | 538 | */ |
michael@0 | 539 | nsresult GetLength(int32_t* aLength); |
michael@0 | 540 | |
michael@0 | 541 | /** |
michael@0 | 542 | * Get the level for one character. |
michael@0 | 543 | * |
michael@0 | 544 | * @param aCharIndex the index of a character. |
michael@0 | 545 | * |
michael@0 | 546 | * @param aLevel receives the level for the character at aCharIndex. |
michael@0 | 547 | * |
michael@0 | 548 | * @see nsBidiLevel |
michael@0 | 549 | */ |
michael@0 | 550 | nsresult GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel); |
michael@0 | 551 | |
michael@0 | 552 | /** |
michael@0 | 553 | * Get an array of levels for each character.<p> |
michael@0 | 554 | * |
michael@0 | 555 | * Note that this function may allocate memory under some |
michael@0 | 556 | * circumstances, unlike <code>GetLevelAt</code>. |
michael@0 | 557 | * |
michael@0 | 558 | * @param aLevels receives a pointer to the levels array for the text, |
michael@0 | 559 | * or <code>nullptr</code> if an error occurs. |
michael@0 | 560 | * |
michael@0 | 561 | * @see nsBidiLevel |
michael@0 | 562 | */ |
michael@0 | 563 | nsresult GetLevels(nsBidiLevel** aLevels); |
michael@0 | 564 | #endif // FULL_BIDI_ENGINE |
michael@0 | 565 | /** |
michael@0 | 566 | * Get the bidirectional type for one character. |
michael@0 | 567 | * |
michael@0 | 568 | * @param aCharIndex the index of a character. |
michael@0 | 569 | * |
michael@0 | 570 | * @param aType receives the bidirectional type of the character at aCharIndex. |
michael@0 | 571 | */ |
michael@0 | 572 | nsresult GetCharTypeAt(int32_t aCharIndex, nsCharType* aType); |
michael@0 | 573 | |
michael@0 | 574 | /** |
michael@0 | 575 | * Get a logical run. |
michael@0 | 576 | * This function returns information about a run and is used |
michael@0 | 577 | * to retrieve runs in logical order.<p> |
michael@0 | 578 | * This is especially useful for line-breaking on a paragraph. |
michael@0 | 579 | * |
michael@0 | 580 | * @param aLogicalStart is the first character of the run. |
michael@0 | 581 | * |
michael@0 | 582 | * @param aLogicalLimit will receive the limit of the run. |
michael@0 | 583 | * The l-value that you point to here may be the |
michael@0 | 584 | * same expression (variable) as the one for |
michael@0 | 585 | * <code>aLogicalStart</code>. |
michael@0 | 586 | * This pointer can be <code>nullptr</code> if this |
michael@0 | 587 | * value is not necessary. |
michael@0 | 588 | * |
michael@0 | 589 | * @param aLevel will receive the level of the run. |
michael@0 | 590 | * This pointer can be <code>nullptr</code> if this |
michael@0 | 591 | * value is not necessary. |
michael@0 | 592 | */ |
michael@0 | 593 | nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel); |
michael@0 | 594 | |
michael@0 | 595 | /** |
michael@0 | 596 | * Get the number of runs. |
michael@0 | 597 | * This function may invoke the actual reordering on the |
michael@0 | 598 | * <code>nsBidi</code> object, after <code>SetPara</code> |
michael@0 | 599 | * may have resolved only the levels of the text. Therefore, |
michael@0 | 600 | * <code>CountRuns</code> may have to allocate memory, |
michael@0 | 601 | * and may fail doing so. |
michael@0 | 602 | * |
michael@0 | 603 | * @param aRunCount will receive the number of runs. |
michael@0 | 604 | */ |
michael@0 | 605 | nsresult CountRuns(int32_t* aRunCount); |
michael@0 | 606 | |
michael@0 | 607 | /** |
michael@0 | 608 | * Get one run's logical start, length, and directionality, |
michael@0 | 609 | * which can be 0 for LTR or 1 for RTL. |
michael@0 | 610 | * In an RTL run, the character at the logical start is |
michael@0 | 611 | * visually on the right of the displayed run. |
michael@0 | 612 | * The length is the number of characters in the run.<p> |
michael@0 | 613 | * <code>CountRuns</code> should be called |
michael@0 | 614 | * before the runs are retrieved. |
michael@0 | 615 | * |
michael@0 | 616 | * @param aRunIndex is the number of the run in visual order, in the |
michael@0 | 617 | * range <code>[0..CountRuns-1]</code>. |
michael@0 | 618 | * |
michael@0 | 619 | * @param aLogicalStart is the first logical character index in the text. |
michael@0 | 620 | * The pointer may be <code>nullptr</code> if this index is not needed. |
michael@0 | 621 | * |
michael@0 | 622 | * @param aLength is the number of characters (at least one) in the run. |
michael@0 | 623 | * The pointer may be <code>nullptr</code> if this is not needed. |
michael@0 | 624 | * |
michael@0 | 625 | * @param aDirection will receive the directionality of the run, |
michael@0 | 626 | * <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>, |
michael@0 | 627 | * never <code>NSBIDI_MIXED</code>. |
michael@0 | 628 | * |
michael@0 | 629 | * @see CountRuns<p> |
michael@0 | 630 | * |
michael@0 | 631 | * Example: |
michael@0 | 632 | * @code |
michael@0 | 633 | * int32_t i, count, logicalStart, visualIndex=0, length; |
michael@0 | 634 | * nsBidiDirection dir; |
michael@0 | 635 | * pBidi->CountRuns(&count); |
michael@0 | 636 | * for(i=0; i<count; ++i) { |
michael@0 | 637 | * pBidi->GetVisualRun(i, &logicalStart, &length, &dir); |
michael@0 | 638 | * if(NSBIDI_LTR==dir) { |
michael@0 | 639 | * do { // LTR |
michael@0 | 640 | * show_char(text[logicalStart++], visualIndex++); |
michael@0 | 641 | * } while(--length>0); |
michael@0 | 642 | * } else { |
michael@0 | 643 | * logicalStart+=length; // logicalLimit |
michael@0 | 644 | * do { // RTL |
michael@0 | 645 | * show_char(text[--logicalStart], visualIndex++); |
michael@0 | 646 | * } while(--length>0); |
michael@0 | 647 | * } |
michael@0 | 648 | * } |
michael@0 | 649 | * @endcode |
michael@0 | 650 | * |
michael@0 | 651 | * Note that in right-to-left runs, code like this places |
michael@0 | 652 | * modifier letters before base characters and second surrogates |
michael@0 | 653 | * before first ones. |
michael@0 | 654 | */ |
michael@0 | 655 | nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection); |
michael@0 | 656 | |
michael@0 | 657 | #ifdef FULL_BIDI_ENGINE |
michael@0 | 658 | /** |
michael@0 | 659 | * Get the visual position from a logical text position. |
michael@0 | 660 | * If such a mapping is used many times on the same |
michael@0 | 661 | * <code>nsBidi</code> object, then calling |
michael@0 | 662 | * <code>GetLogicalMap</code> is more efficient.<p> |
michael@0 | 663 | * |
michael@0 | 664 | * Note that in right-to-left runs, this mapping places |
michael@0 | 665 | * modifier letters before base characters and second surrogates |
michael@0 | 666 | * before first ones. |
michael@0 | 667 | * |
michael@0 | 668 | * @param aLogicalIndex is the index of a character in the text. |
michael@0 | 669 | * |
michael@0 | 670 | * @param aVisualIndex will receive the visual position of this character. |
michael@0 | 671 | * |
michael@0 | 672 | * @see GetLogicalMap |
michael@0 | 673 | * @see GetLogicalIndex |
michael@0 | 674 | */ |
michael@0 | 675 | nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex); |
michael@0 | 676 | |
michael@0 | 677 | /** |
michael@0 | 678 | * Get the logical text position from a visual position. |
michael@0 | 679 | * If such a mapping is used many times on the same |
michael@0 | 680 | * <code>nsBidi</code> object, then calling |
michael@0 | 681 | * <code>GetVisualMap</code> is more efficient.<p> |
michael@0 | 682 | * |
michael@0 | 683 | * This is the inverse function to <code>GetVisualIndex</code>. |
michael@0 | 684 | * |
michael@0 | 685 | * @param aVisualIndex is the visual position of a character. |
michael@0 | 686 | * |
michael@0 | 687 | * @param aLogicalIndex will receive the index of this character in the text. |
michael@0 | 688 | * |
michael@0 | 689 | * @see GetVisualMap |
michael@0 | 690 | * @see GetVisualIndex |
michael@0 | 691 | */ |
michael@0 | 692 | nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex); |
michael@0 | 693 | |
michael@0 | 694 | /** |
michael@0 | 695 | * Get a logical-to-visual index map (array) for the characters in the nsBidi |
michael@0 | 696 | * (paragraph or line) object. |
michael@0 | 697 | * |
michael@0 | 698 | * @param aIndexMap is a pointer to an array of <code>GetLength</code> |
michael@0 | 699 | * indexes which will reflect the reordering of the characters. |
michael@0 | 700 | * The array does not need to be initialized.<p> |
michael@0 | 701 | * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p> |
michael@0 | 702 | * |
michael@0 | 703 | * @see GetVisualMap |
michael@0 | 704 | * @see GetVisualIndex |
michael@0 | 705 | */ |
michael@0 | 706 | nsresult GetLogicalMap(int32_t *aIndexMap); |
michael@0 | 707 | |
michael@0 | 708 | /** |
michael@0 | 709 | * Get a visual-to-logical index map (array) for the characters in the nsBidi |
michael@0 | 710 | * (paragraph or line) object. |
michael@0 | 711 | * |
michael@0 | 712 | * @param aIndexMap is a pointer to an array of <code>GetLength</code> |
michael@0 | 713 | * indexes which will reflect the reordering of the characters. |
michael@0 | 714 | * The array does not need to be initialized.<p> |
michael@0 | 715 | * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p> |
michael@0 | 716 | * |
michael@0 | 717 | * @see GetLogicalMap |
michael@0 | 718 | * @see GetLogicalIndex |
michael@0 | 719 | */ |
michael@0 | 720 | nsresult GetVisualMap(int32_t *aIndexMap); |
michael@0 | 721 | |
michael@0 | 722 | /** |
michael@0 | 723 | * This is a convenience function that does not use a nsBidi object. |
michael@0 | 724 | * It is intended to be used for when an application has determined the levels |
michael@0 | 725 | * of objects (character sequences) and just needs to have them reordered (L2). |
michael@0 | 726 | * This is equivalent to using <code>GetLogicalMap</code> on a |
michael@0 | 727 | * <code>nsBidi</code> object. |
michael@0 | 728 | * |
michael@0 | 729 | * @param aLevels is an array with <code>aLength</code> levels that have been determined by |
michael@0 | 730 | * the application. |
michael@0 | 731 | * |
michael@0 | 732 | * @param aLength is the number of levels in the array, or, semantically, |
michael@0 | 733 | * the number of objects to be reordered. |
michael@0 | 734 | * It must be <code>aLength>0</code>. |
michael@0 | 735 | * |
michael@0 | 736 | * @param aIndexMap is a pointer to an array of <code>aLength</code> |
michael@0 | 737 | * indexes which will reflect the reordering of the characters. |
michael@0 | 738 | * The array does not need to be initialized.<p> |
michael@0 | 739 | * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>. |
michael@0 | 740 | */ |
michael@0 | 741 | static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); |
michael@0 | 742 | #endif // FULL_BIDI_ENGINE |
michael@0 | 743 | /** |
michael@0 | 744 | * This is a convenience function that does not use a nsBidi object. |
michael@0 | 745 | * It is intended to be used for when an application has determined the levels |
michael@0 | 746 | * of objects (character sequences) and just needs to have them reordered (L2). |
michael@0 | 747 | * This is equivalent to using <code>GetVisualMap</code> on a |
michael@0 | 748 | * <code>nsBidi</code> object. |
michael@0 | 749 | * |
michael@0 | 750 | * @param aLevels is an array with <code>aLength</code> levels that have been determined by |
michael@0 | 751 | * the application. |
michael@0 | 752 | * |
michael@0 | 753 | * @param aLength is the number of levels in the array, or, semantically, |
michael@0 | 754 | * the number of objects to be reordered. |
michael@0 | 755 | * It must be <code>aLength>0</code>. |
michael@0 | 756 | * |
michael@0 | 757 | * @param aIndexMap is a pointer to an array of <code>aLength</code> |
michael@0 | 758 | * indexes which will reflect the reordering of the characters. |
michael@0 | 759 | * The array does not need to be initialized.<p> |
michael@0 | 760 | * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>. |
michael@0 | 761 | */ |
michael@0 | 762 | static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); |
michael@0 | 763 | |
michael@0 | 764 | #ifdef FULL_BIDI_ENGINE |
michael@0 | 765 | /** |
michael@0 | 766 | * Invert an index map. |
michael@0 | 767 | * The one-to-one index mapping of the first map is inverted and written to |
michael@0 | 768 | * the second one. |
michael@0 | 769 | * |
michael@0 | 770 | * @param aSrcMap is an array with <code>aLength</code> indexes |
michael@0 | 771 | * which define the original mapping. |
michael@0 | 772 | * |
michael@0 | 773 | * @param aDestMap is an array with <code>aLength</code> indexes |
michael@0 | 774 | * which will be filled with the inverse mapping. |
michael@0 | 775 | * |
michael@0 | 776 | * @param aLength is the length of each array. |
michael@0 | 777 | */ |
michael@0 | 778 | nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength); |
michael@0 | 779 | #endif // FULL_BIDI_ENGINE |
michael@0 | 780 | /** |
michael@0 | 781 | * Reverse a Right-To-Left run of Unicode text. |
michael@0 | 782 | * |
michael@0 | 783 | * This function preserves the integrity of characters with multiple |
michael@0 | 784 | * code units and (optionally) modifier letters. |
michael@0 | 785 | * Characters can be replaced by mirror-image characters |
michael@0 | 786 | * in the destination buffer. Note that "real" mirroring has |
michael@0 | 787 | * to be done in a rendering engine by glyph selection |
michael@0 | 788 | * and that for many "mirrored" characters there are no |
michael@0 | 789 | * Unicode characters as mirror-image equivalents. |
michael@0 | 790 | * There are also options to insert or remove Bidi control |
michael@0 | 791 | * characters; see the description of the <code>aDestSize</code> |
michael@0 | 792 | * and <code>aOptions</code> parameters and of the option bit flags. |
michael@0 | 793 | * |
michael@0 | 794 | * Since no Bidi controls are inserted here, this function will never |
michael@0 | 795 | * write more than <code>aSrcLength</code> characters to <code>aDest</code>. |
michael@0 | 796 | * |
michael@0 | 797 | * @param aSrc A pointer to the RTL run text. |
michael@0 | 798 | * |
michael@0 | 799 | * @param aSrcLength The length of the RTL run. |
michael@0 | 800 | * If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option |
michael@0 | 801 | * is set, then the destination length may be less than |
michael@0 | 802 | * <code>aSrcLength</code>. |
michael@0 | 803 | * If this option is not set, then the destination length |
michael@0 | 804 | * will be exactly <code>aSrcLength</code>. |
michael@0 | 805 | * |
michael@0 | 806 | * @param aDest A pointer to where the reordered text is to be copied. |
michael@0 | 807 | * <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code> |
michael@0 | 808 | * must not overlap. |
michael@0 | 809 | * |
michael@0 | 810 | * @param aOptions A bit set of options for the reordering that control |
michael@0 | 811 | * how the reordered text is written. |
michael@0 | 812 | * |
michael@0 | 813 | * @param aDestSize will receive the number of characters that were written to <code>aDest</code>. |
michael@0 | 814 | */ |
michael@0 | 815 | nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize); |
michael@0 | 816 | |
michael@0 | 817 | protected: |
michael@0 | 818 | friend class nsBidiPresUtils; |
michael@0 | 819 | |
michael@0 | 820 | /** length of the current text */ |
michael@0 | 821 | int32_t mLength; |
michael@0 | 822 | |
michael@0 | 823 | /** memory sizes in bytes */ |
michael@0 | 824 | size_t mDirPropsSize, mLevelsSize, mRunsSize; |
michael@0 | 825 | |
michael@0 | 826 | /** allocated memory */ |
michael@0 | 827 | DirProp* mDirPropsMemory; |
michael@0 | 828 | nsBidiLevel* mLevelsMemory; |
michael@0 | 829 | Run* mRunsMemory; |
michael@0 | 830 | |
michael@0 | 831 | /** indicators for whether memory may be allocated after construction */ |
michael@0 | 832 | bool mMayAllocateText, mMayAllocateRuns; |
michael@0 | 833 | |
michael@0 | 834 | const DirProp* mDirProps; |
michael@0 | 835 | nsBidiLevel* mLevels; |
michael@0 | 836 | |
michael@0 | 837 | /** the paragraph level */ |
michael@0 | 838 | nsBidiLevel mParaLevel; |
michael@0 | 839 | |
michael@0 | 840 | /** flags is a bit set for which directional properties are in the text */ |
michael@0 | 841 | Flags mFlags; |
michael@0 | 842 | |
michael@0 | 843 | /** the overall paragraph or line directionality - see nsBidiDirection */ |
michael@0 | 844 | nsBidiDirection mDirection; |
michael@0 | 845 | |
michael@0 | 846 | /** characters after trailingWSStart are WS and are */ |
michael@0 | 847 | /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ |
michael@0 | 848 | int32_t mTrailingWSStart; |
michael@0 | 849 | |
michael@0 | 850 | /** fields for line reordering */ |
michael@0 | 851 | int32_t mRunCount; /* ==-1: runs not set up yet */ |
michael@0 | 852 | Run* mRuns; |
michael@0 | 853 | |
michael@0 | 854 | /** for non-mixed text, we only need a tiny array of runs (no malloc()) */ |
michael@0 | 855 | Run mSimpleRuns[1]; |
michael@0 | 856 | |
michael@0 | 857 | private: |
michael@0 | 858 | |
michael@0 | 859 | void Init(); |
michael@0 | 860 | |
michael@0 | 861 | bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded); |
michael@0 | 862 | |
michael@0 | 863 | void Free(); |
michael@0 | 864 | |
michael@0 | 865 | void GetDirProps(const char16_t *aText); |
michael@0 | 866 | |
michael@0 | 867 | nsBidiDirection ResolveExplicitLevels(); |
michael@0 | 868 | |
michael@0 | 869 | nsresult CheckExplicitLevels(nsBidiDirection *aDirection); |
michael@0 | 870 | |
michael@0 | 871 | nsBidiDirection DirectionFromFlags(Flags aFlags); |
michael@0 | 872 | |
michael@0 | 873 | void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR); |
michael@0 | 874 | |
michael@0 | 875 | void AdjustWSLevels(); |
michael@0 | 876 | |
michael@0 | 877 | void SetTrailingWSStart(); |
michael@0 | 878 | |
michael@0 | 879 | bool GetRuns(); |
michael@0 | 880 | |
michael@0 | 881 | void GetSingleRun(nsBidiLevel aLevel); |
michael@0 | 882 | |
michael@0 | 883 | void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel); |
michael@0 | 884 | |
michael@0 | 885 | static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel); |
michael@0 | 886 | |
michael@0 | 887 | int32_t doWriteReverse(const char16_t *src, int32_t srcLength, |
michael@0 | 888 | char16_t *dest, uint16_t options); |
michael@0 | 889 | |
michael@0 | 890 | }; |
michael@0 | 891 | |
michael@0 | 892 | #endif // _nsBidi_h_ |