layout/base/nsBidi.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
michael@0 2 *
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #ifndef nsBidi_h__
michael@0 8 #define nsBidi_h__
michael@0 9
michael@0 10 #include "nsBidiUtils.h"
michael@0 11
michael@0 12 // Bidi reordering engine from ICU
michael@0 13 /*
michael@0 14 * javadoc-style comments are intended to be transformed into HTML
michael@0 15 * using DOC++ - see
michael@0 16 * http://www.zib.de/Visual/software/doc++/index.html .
michael@0 17 *
michael@0 18 * The HTML documentation is created with
michael@0 19 * doc++ -H nsIBidi.h
michael@0 20 */
michael@0 21
michael@0 22 /**
michael@0 23 * @mainpage BIDI algorithm for Mozilla (from ICU)
michael@0 24 *
michael@0 25 * <h2>BIDI algorithm for Mozilla</h2>
michael@0 26 *
michael@0 27 * This is an implementation of the Unicode Bidirectional algorithm.
michael@0 28 * The algorithm is defined in the
michael@0 29 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
michael@0 30 * version 5, also described in The Unicode Standard, Version 3.0 .<p>
michael@0 31 *
michael@0 32 * <h3>General remarks about the API:</h3>
michael@0 33 *
michael@0 34 * The <quote>limit</quote> of a sequence of characters is the position just after their
michael@0 35 * last character, i.e., one more than that position.<p>
michael@0 36 *
michael@0 37 * Some of the API functions provide access to <quote>runs</quote>.
michael@0 38 * Such a <quote>run</quote> is defined as a sequence of characters
michael@0 39 * that are at the same embedding level
michael@0 40 * after performing the BIDI algorithm.<p>
michael@0 41 *
michael@0 42 * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu
michael@0 43 * @version 1.0
michael@0 44 */
michael@0 45
michael@0 46 /**
michael@0 47 * nsBidiLevel is the type of the level values in this
michael@0 48 * Bidi implementation.
michael@0 49 * It holds an embedding level and indicates the visual direction
michael@0 50 * by its bit 0 (even/odd value).<p>
michael@0 51 *
michael@0 52 * It can also hold non-level values for the
michael@0 53 * <code>aParaLevel</code> and <code>aEmbeddingLevels</code>
michael@0 54 * arguments of <code>SetPara</code>; there:
michael@0 55 * <ul>
michael@0 56 * <li>bit 7 of an <code>aEmbeddingLevels[]</code>
michael@0 57 * value indicates whether the using application is
michael@0 58 * specifying the level of a character to <i>override</i> whatever the
michael@0 59 * Bidi implementation would resolve it to.</li>
michael@0 60 * <li><code>aParaLevel</code> can be set to the
michael@0 61 * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code>
michael@0 62 * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul>
michael@0 63 *
michael@0 64 * @see nsIBidi::SetPara
michael@0 65 *
michael@0 66 * <p>The related constants are not real, valid level values.
michael@0 67 * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify
michael@0 68 * a default for the paragraph level for
michael@0 69 * when the <code>SetPara</code> function
michael@0 70 * shall determine it but there is no
michael@0 71 * strongly typed character in the input.<p>
michael@0 72 *
michael@0 73 * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even
michael@0 74 * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd,
michael@0 75 * just like with normal LTR and RTL level values -
michael@0 76 * these special values are designed that way. Also, the implementation
michael@0 77 * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
michael@0 78 *
michael@0 79 * @see NSBIDI_DEFAULT_LTR
michael@0 80 * @see NSBIDI_DEFAULT_RTL
michael@0 81 * @see NSBIDI_LEVEL_OVERRIDE
michael@0 82 * @see NSBIDI_MAX_EXPLICIT_LEVEL
michael@0 83 */
michael@0 84 typedef uint8_t nsBidiLevel;
michael@0 85
michael@0 86 /** Paragraph level setting.
michael@0 87 * If there is no strong character, then set the paragraph level to 0 (left-to-right).
michael@0 88 */
michael@0 89 #define NSBIDI_DEFAULT_LTR 0xfe
michael@0 90
michael@0 91 /** Paragraph level setting.
michael@0 92 * If there is no strong character, then set the paragraph level to 1 (right-to-left).
michael@0 93 */
michael@0 94 #define NSBIDI_DEFAULT_RTL 0xff
michael@0 95
michael@0 96 /**
michael@0 97 * Maximum explicit embedding level.
michael@0 98 * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>).
michael@0 99 *
michael@0 100 */
michael@0 101 #define NSBIDI_MAX_EXPLICIT_LEVEL 61
michael@0 102
michael@0 103 /** Bit flag for level input.
michael@0 104 * Overrides directional properties.
michael@0 105 */
michael@0 106 #define NSBIDI_LEVEL_OVERRIDE 0x80
michael@0 107
michael@0 108 /**
michael@0 109 * <code>nsBidiDirection</code> values indicate the text direction.
michael@0 110 */
michael@0 111 enum nsBidiDirection {
michael@0 112 /** All left-to-right text This is a 0 value. */
michael@0 113 NSBIDI_LTR,
michael@0 114 /** All right-to-left text This is a 1 value. */
michael@0 115 NSBIDI_RTL,
michael@0 116 /** Mixed-directional text. */
michael@0 117 NSBIDI_MIXED
michael@0 118 };
michael@0 119
michael@0 120 typedef enum nsBidiDirection nsBidiDirection;
michael@0 121
michael@0 122 /* miscellaneous definitions ------------------------------------------------ */
michael@0 123 /** option flags for WriteReverse() */
michael@0 124 /**
michael@0 125 * option bit for WriteReverse():
michael@0 126 * keep combining characters after their base characters in RTL runs
michael@0 127 *
michael@0 128 * @see WriteReverse
michael@0 129 */
michael@0 130 #define NSBIDI_KEEP_BASE_COMBINING 1
michael@0 131
michael@0 132 /**
michael@0 133 * option bit for WriteReverse():
michael@0 134 * replace characters with the "mirrored" property in RTL runs
michael@0 135 * by their mirror-image mappings
michael@0 136 *
michael@0 137 * @see WriteReverse
michael@0 138 */
michael@0 139 #define NSBIDI_DO_MIRRORING 2
michael@0 140
michael@0 141 /**
michael@0 142 * option bit for WriteReverse():
michael@0 143 * remove Bidi control characters
michael@0 144 *
michael@0 145 * @see WriteReverse
michael@0 146 */
michael@0 147 #define NSBIDI_REMOVE_BIDI_CONTROLS 8
michael@0 148
michael@0 149 /* helper macros for each allocated array member */
michael@0 150 #define GETDIRPROPSMEMORY(length) \
michael@0 151 GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
michael@0 152 mMayAllocateText, (length))
michael@0 153
michael@0 154 #define GETLEVELSMEMORY(length) \
michael@0 155 GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
michael@0 156 mMayAllocateText, (length))
michael@0 157
michael@0 158 #define GETRUNSMEMORY(length) \
michael@0 159 GetMemory((void **)&mRunsMemory, &mRunsSize, \
michael@0 160 mMayAllocateRuns, (length)*sizeof(Run))
michael@0 161
michael@0 162 /* additional macros used by constructor - always allow allocation */
michael@0 163 #define GETINITIALDIRPROPSMEMORY(length) \
michael@0 164 GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
michael@0 165 true, (length))
michael@0 166
michael@0 167 #define GETINITIALLEVELSMEMORY(length) \
michael@0 168 GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
michael@0 169 true, (length))
michael@0 170
michael@0 171 #define GETINITIALRUNSMEMORY(length) \
michael@0 172 GetMemory((void **)&mRunsMemory, &mRunsSize, \
michael@0 173 true, (length)*sizeof(Run))
michael@0 174
michael@0 175 /*
michael@0 176 * Sometimes, bit values are more appropriate
michael@0 177 * to deal with directionality properties.
michael@0 178 * Abbreviations in these macro names refer to names
michael@0 179 * used in the Bidi algorithm.
michael@0 180 */
michael@0 181 typedef uint8_t DirProp;
michael@0 182
michael@0 183 #define DIRPROP_FLAG(dir) (1UL<<(dir))
michael@0 184
michael@0 185 /* special flag for multiple runs from explicit embedding codes */
michael@0 186 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
michael@0 187
michael@0 188 /* are there any characters that are LTR or RTL? */
michael@0 189 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
michael@0 190 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
michael@0 191
michael@0 192 /* explicit embedding codes */
michael@0 193 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
michael@0 194 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
michael@0 195 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
michael@0 196
michael@0 197 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
michael@0 198 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
michael@0 199
michael@0 200 /* paragraph and segment separators */
michael@0 201 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
michael@0 202
michael@0 203 /* all types that are counted as White Space or Neutral in some steps */
michael@0 204 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
michael@0 205 #define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS)
michael@0 206
michael@0 207 /* all types that are included in a sequence of European Terminators for (W5) */
michael@0 208 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
michael@0 209
michael@0 210 /* types that are neutrals or could becomes neutrals in (Wn) */
michael@0 211 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
michael@0 212
michael@0 213 /*
michael@0 214 * These types may be changed to "e",
michael@0 215 * the embedding type (L or R) of the run,
michael@0 216 * in the Bidi algorithm (N2)
michael@0 217 */
michael@0 218 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
michael@0 219
michael@0 220 /* the dirProp's L and R are defined to 0 and 1 values in nsCharType */
michael@0 221 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
michael@0 222
michael@0 223 #define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe)
michael@0 224
michael@0 225 /* handle surrogate pairs --------------------------------------------------- */
michael@0 226
michael@0 227 #define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800)
michael@0 228 #define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00)
michael@0 229
michael@0 230 /* get the UTF-32 value directly from the surrogate pseudo-characters */
michael@0 231 #define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
michael@0 232 #define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET)
michael@0 233
michael@0 234
michael@0 235 #define UTF_ERROR_VALUE 0xffff
michael@0 236 /* definitions with forward iteration --------------------------------------- */
michael@0 237
michael@0 238 /*
michael@0 239 * all the macros that go forward assume that
michael@0 240 * the initial offset is 0<=i<length;
michael@0 241 * they update the offset
michael@0 242 */
michael@0 243
michael@0 244 /* fast versions, no error-checking */
michael@0 245
michael@0 246 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \
michael@0 247 if((uint32_t)(c)<=0xffff) { \
michael@0 248 (s)[(i)++]=(char16_t)(c); \
michael@0 249 } else { \
michael@0 250 (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
michael@0 251 (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
michael@0 252 } \
michael@0 253 }
michael@0 254
michael@0 255 /* safe versions with error-checking and optional regularity-checking */
michael@0 256
michael@0 257 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
michael@0 258 if((PRUInt32)(c)<=0xffff) { \
michael@0 259 (s)[(i)++]=(char16_t)(c); \
michael@0 260 } else if((PRUInt32)(c)<=0x10ffff) { \
michael@0 261 if((i)+1<(length)) { \
michael@0 262 (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
michael@0 263 (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
michael@0 264 } else /* not enough space */ { \
michael@0 265 (s)[(i)++]=UTF_ERROR_VALUE; \
michael@0 266 } \
michael@0 267 } else /* c>0x10ffff, write error value */ { \
michael@0 268 (s)[(i)++]=UTF_ERROR_VALUE; \
michael@0 269 } \
michael@0 270 }
michael@0 271
michael@0 272 /* definitions with backward iteration -------------------------------------- */
michael@0 273
michael@0 274 /*
michael@0 275 * all the macros that go backward assume that
michael@0 276 * the valid buffer range starts at offset 0
michael@0 277 * and that the initial offset is 0<i<=length;
michael@0 278 * they update the offset
michael@0 279 */
michael@0 280
michael@0 281 /* fast versions, no error-checking */
michael@0 282
michael@0 283 /*
michael@0 284 * Get a single code point from an offset that points behind the last
michael@0 285 * of the code units that belong to that code point.
michael@0 286 * Assume 0<=i<length.
michael@0 287 */
michael@0 288 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
michael@0 289 (c)=(s)[--(i)]; \
michael@0 290 if(IS_SECOND_SURROGATE(c)) { \
michael@0 291 (c)=GET_UTF_32((s)[--(i)], (c)); \
michael@0 292 } \
michael@0 293 }
michael@0 294
michael@0 295 #define UTF16_BACK_1_UNSAFE(s, i) { \
michael@0 296 if(IS_SECOND_SURROGATE((s)[--(i)])) { \
michael@0 297 --(i); \
michael@0 298 } \
michael@0 299 }
michael@0 300
michael@0 301 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
michael@0 302 int32_t __N=(n); \
michael@0 303 while(__N>0) { \
michael@0 304 UTF16_BACK_1_UNSAFE(s, i); \
michael@0 305 --__N; \
michael@0 306 } \
michael@0 307 }
michael@0 308
michael@0 309 /* safe versions with error-checking and optional regularity-checking */
michael@0 310
michael@0 311 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
michael@0 312 (c)=(s)[--(i)]; \
michael@0 313 if(IS_SECOND_SURROGATE(c)) { \
michael@0 314 char16_t __c2; \
michael@0 315 if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
michael@0 316 --(i); \
michael@0 317 (c)=GET_UTF_32(__c2, (c)); \
michael@0 318 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
michael@0 319 } else if(strict) {\
michael@0 320 /* unmatched second surrogate */ \
michael@0 321 (c)=UTF_ERROR_VALUE; \
michael@0 322 } \
michael@0 323 } else if(strict && IS_FIRST_SURROGATE(c)) { \
michael@0 324 /* unmatched first surrogate */ \
michael@0 325 (c)=UTF_ERROR_VALUE; \
michael@0 326 /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \
michael@0 327 } \
michael@0 328 }
michael@0 329
michael@0 330 #define UTF16_BACK_1_SAFE(s, start, i) { \
michael@0 331 if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \
michael@0 332 --(i); \
michael@0 333 } \
michael@0 334 }
michael@0 335
michael@0 336 #define UTF16_BACK_N_SAFE(s, start, i, n) { \
michael@0 337 int32_t __N=(n); \
michael@0 338 while(__N>0 && (i)>(start)) { \
michael@0 339 UTF16_BACK_1_SAFE(s, start, i); \
michael@0 340 --__N; \
michael@0 341 } \
michael@0 342 }
michael@0 343
michael@0 344 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
michael@0 345 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
michael@0 346 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
michael@0 347 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
michael@0 348 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
michael@0 349 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
michael@0 350 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
michael@0 351 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
michael@0 352
michael@0 353 #define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, false)
michael@0 354 #define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i)
michael@0 355 #define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n)
michael@0 356 #define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c)
michael@0 357
michael@0 358 /* Run structure for reordering --------------------------------------------- */
michael@0 359
michael@0 360 typedef struct Run {
michael@0 361 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
michael@0 362 visualLimit; /* last visual position of the run +1 */
michael@0 363 } Run;
michael@0 364
michael@0 365 /* in a Run, logicalStart will get this bit set if the run level is odd */
michael@0 366 #define INDEX_ODD_BIT (1UL<<31)
michael@0 367
michael@0 368 #define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31))
michael@0 369 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)level<<31))
michael@0 370 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
michael@0 371
michael@0 372 #define GET_INDEX(x) (x&~INDEX_ODD_BIT)
michael@0 373 #define GET_ODD_BIT(x) ((uint32_t)x>>31)
michael@0 374 #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0)
michael@0 375 #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
michael@0 376
michael@0 377 typedef uint32_t Flags;
michael@0 378
michael@0 379 /**
michael@0 380 * This class holds information about a paragraph of text
michael@0 381 * with Bidi-algorithm-related details, or about one line of
michael@0 382 * such a paragraph.<p>
michael@0 383 * Reordering can be done on a line, or on a paragraph which is
michael@0 384 * then interpreted as one single line.<p>
michael@0 385 *
michael@0 386 * On construction, the class is initially empty. It is assigned
michael@0 387 * the Bidi properties of a paragraph by <code>SetPara</code>
michael@0 388 * or the Bidi properties of a line of a paragraph by
michael@0 389 * <code>SetLine</code>.<p>
michael@0 390 * A Bidi class can be reused for as long as it is not deallocated
michael@0 391 * by calling its destructor.<p>
michael@0 392 * <code>SetPara</code> will allocate additional memory for
michael@0 393 * internal structures as necessary.
michael@0 394 */
michael@0 395 class nsBidi
michael@0 396 {
michael@0 397 public:
michael@0 398 /** @brief Default constructor.
michael@0 399 *
michael@0 400 * The nsBidi object is initially empty. It is assigned
michael@0 401 * the Bidi properties of a paragraph by <code>SetPara()</code>
michael@0 402 * or the Bidi properties of a line of a paragraph by
michael@0 403 * <code>GetLine()</code>.<p>
michael@0 404 * This object can be reused for as long as it is not destroyed.<p>
michael@0 405 * <code>SetPara()</code> will allocate additional memory for
michael@0 406 * internal structures as necessary.
michael@0 407 *
michael@0 408 */
michael@0 409 nsBidi();
michael@0 410
michael@0 411 /** @brief Destructor. */
michael@0 412 virtual ~nsBidi();
michael@0 413
michael@0 414
michael@0 415 /**
michael@0 416 * Perform the Unicode Bidi algorithm. It is defined in the
michael@0 417 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
michael@0 418 * version 5,
michael@0 419 * also described in The Unicode Standard, Version 3.0 .<p>
michael@0 420 *
michael@0 421 * This function takes a single plain text paragraph with or without
michael@0 422 * externally specified embedding levels from <quote>styled</quote> text
michael@0 423 * and computes the left-right-directionality of each character.<p>
michael@0 424 *
michael@0 425 * If the entire paragraph consists of text of only one direction, then
michael@0 426 * the function may not perform all the steps described by the algorithm,
michael@0 427 * i.e., some levels may not be the same as if all steps were performed.
michael@0 428 * This is not relevant for unidirectional text.<br>
michael@0 429 * For example, in pure LTR text with numbers the numbers would get
michael@0 430 * a resolved level of 2 higher than the surrounding text according to
michael@0 431 * the algorithm. This implementation may set all resolved levels to
michael@0 432 * the same value in such a case.<p>
michael@0 433 *
michael@0 434 * The text must be externally split into separate paragraphs (rule P1).
michael@0 435 * Paragraph separators (B) should appear at most at the very end.
michael@0 436 *
michael@0 437 * @param aText is a pointer to the single-paragraph text that the
michael@0 438 * Bidi algorithm will be performed on
michael@0 439 * (step (P1) of the algorithm is performed externally).
michael@0 440 * <strong>The text must be (at least) <code>aLength</code> long.</strong>
michael@0 441 *
michael@0 442 * @param aLength is the length of the text; if <code>aLength==-1</code> then
michael@0 443 * the text must be zero-terminated.
michael@0 444 *
michael@0 445 * @param aParaLevel specifies the default level for the paragraph;
michael@0 446 * it is typically 0 (LTR) or 1 (RTL).
michael@0 447 * If the function shall determine the paragraph level from the text,
michael@0 448 * then <code>aParaLevel</code> can be set to
michael@0 449 * either <code>NSBIDI_DEFAULT_LTR</code>
michael@0 450 * or <code>NSBIDI_DEFAULT_RTL</code>;
michael@0 451 * if there is no strongly typed character, then
michael@0 452 * the desired default is used (0 for LTR or 1 for RTL).
michael@0 453 * Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
michael@0 454 * with odd levels indicating RTL.
michael@0 455 *
michael@0 456 * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels,
michael@0 457 * ignoring characters like LRE and PDF in the text.
michael@0 458 * A level overrides the directional property of its corresponding
michael@0 459 * (same index) character if the level has the
michael@0 460 * <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p>
michael@0 461 * Except for that bit, it must be
michael@0 462 * <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
michael@0 463 * <strong>Caution: </strong>A copy of this pointer, not of the levels,
michael@0 464 * will be stored in the <code>nsBidi</code> object;
michael@0 465 * the <code>aEmbeddingLevels</code> array must not be
michael@0 466 * deallocated before the <code>nsBidi</code> object is destroyed or reused,
michael@0 467 * and the <code>aEmbeddingLevels</code>
michael@0 468 * should not be modified to avoid unexpected results on subsequent Bidi operations.
michael@0 469 * However, the <code>SetPara</code> and
michael@0 470 * <code>SetLine</code> functions may modify some or all of the levels.<p>
michael@0 471 * After the <code>nsBidi</code> object is reused or destroyed, the caller
michael@0 472 * must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p>
michael@0 473 * <strong>The <code>aEmbeddingLevels</code> array must be
michael@0 474 * at least <code>aLength</code> long.</strong>
michael@0 475 */
michael@0 476 nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels);
michael@0 477
michael@0 478 /**
michael@0 479 * Get the directionality of the text.
michael@0 480 *
michael@0 481 * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text
michael@0 482 * represented by this object is unidirectional,
michael@0 483 * and which direction, or if it is mixed-directional.
michael@0 484 *
michael@0 485 * @see nsBidiDirection
michael@0 486 */
michael@0 487 nsresult GetDirection(nsBidiDirection* aDirection);
michael@0 488
michael@0 489 /**
michael@0 490 * Get the paragraph level of the text.
michael@0 491 *
michael@0 492 * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level
michael@0 493 *
michael@0 494 * @see nsBidiLevel
michael@0 495 */
michael@0 496 nsresult GetParaLevel(nsBidiLevel* aParaLevel);
michael@0 497
michael@0 498 #ifdef FULL_BIDI_ENGINE
michael@0 499 /**
michael@0 500 * <code>SetLine</code> sets an <code>nsBidi</code> to
michael@0 501 * contain the reordering information, especially the resolved levels,
michael@0 502 * for all the characters in a line of text. This line of text is
michael@0 503 * specified by referring to an <code>nsBidi</code> object representing
michael@0 504 * this information for a paragraph of text, and by specifying
michael@0 505 * a range of indexes in this paragraph.<p>
michael@0 506 * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p>
michael@0 507 *
michael@0 508 * This is used after calling <code>SetPara</code>
michael@0 509 * for a paragraph, and after line-breaking on that paragraph.
michael@0 510 * It is not necessary if the paragraph is treated as a single line.<p>
michael@0 511 *
michael@0 512 * After line-breaking, rules (L1) and (L2) for the treatment of
michael@0 513 * trailing WS and for reordering are performed on
michael@0 514 * an <code>nsBidi</code> object that represents a line.<p>
michael@0 515 *
michael@0 516 * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with
michael@0 517 * <code>aParaBidi</code>.
michael@0 518 * You must destroy or reuse this object before <code>aParaBidi</code>.
michael@0 519 * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line
michael@0 520 * before the object for its parent paragraph.
michael@0 521 *
michael@0 522 * @param aParaBidi is the parent paragraph object.
michael@0 523 *
michael@0 524 * @param aStart is the line's first index into the paragraph text.
michael@0 525 *
michael@0 526 * @param aLimit is just behind the line's last index into the paragraph text
michael@0 527 * (its last index +1).<br>
michael@0 528 * It must be <code>0<=aStart<=aLimit<=</code>paragraph length.
michael@0 529 *
michael@0 530 * @see SetPara
michael@0 531 */
michael@0 532 nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit);
michael@0 533
michael@0 534 /**
michael@0 535 * Get the length of the text.
michael@0 536 *
michael@0 537 * @param aLength receives the length of the text that the nsBidi object was created for.
michael@0 538 */
michael@0 539 nsresult GetLength(int32_t* aLength);
michael@0 540
michael@0 541 /**
michael@0 542 * Get the level for one character.
michael@0 543 *
michael@0 544 * @param aCharIndex the index of a character.
michael@0 545 *
michael@0 546 * @param aLevel receives the level for the character at aCharIndex.
michael@0 547 *
michael@0 548 * @see nsBidiLevel
michael@0 549 */
michael@0 550 nsresult GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel);
michael@0 551
michael@0 552 /**
michael@0 553 * Get an array of levels for each character.<p>
michael@0 554 *
michael@0 555 * Note that this function may allocate memory under some
michael@0 556 * circumstances, unlike <code>GetLevelAt</code>.
michael@0 557 *
michael@0 558 * @param aLevels receives a pointer to the levels array for the text,
michael@0 559 * or <code>nullptr</code> if an error occurs.
michael@0 560 *
michael@0 561 * @see nsBidiLevel
michael@0 562 */
michael@0 563 nsresult GetLevels(nsBidiLevel** aLevels);
michael@0 564 #endif // FULL_BIDI_ENGINE
michael@0 565 /**
michael@0 566 * Get the bidirectional type for one character.
michael@0 567 *
michael@0 568 * @param aCharIndex the index of a character.
michael@0 569 *
michael@0 570 * @param aType receives the bidirectional type of the character at aCharIndex.
michael@0 571 */
michael@0 572 nsresult GetCharTypeAt(int32_t aCharIndex, nsCharType* aType);
michael@0 573
michael@0 574 /**
michael@0 575 * Get a logical run.
michael@0 576 * This function returns information about a run and is used
michael@0 577 * to retrieve runs in logical order.<p>
michael@0 578 * This is especially useful for line-breaking on a paragraph.
michael@0 579 *
michael@0 580 * @param aLogicalStart is the first character of the run.
michael@0 581 *
michael@0 582 * @param aLogicalLimit will receive the limit of the run.
michael@0 583 * The l-value that you point to here may be the
michael@0 584 * same expression (variable) as the one for
michael@0 585 * <code>aLogicalStart</code>.
michael@0 586 * This pointer can be <code>nullptr</code> if this
michael@0 587 * value is not necessary.
michael@0 588 *
michael@0 589 * @param aLevel will receive the level of the run.
michael@0 590 * This pointer can be <code>nullptr</code> if this
michael@0 591 * value is not necessary.
michael@0 592 */
michael@0 593 nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel);
michael@0 594
michael@0 595 /**
michael@0 596 * Get the number of runs.
michael@0 597 * This function may invoke the actual reordering on the
michael@0 598 * <code>nsBidi</code> object, after <code>SetPara</code>
michael@0 599 * may have resolved only the levels of the text. Therefore,
michael@0 600 * <code>CountRuns</code> may have to allocate memory,
michael@0 601 * and may fail doing so.
michael@0 602 *
michael@0 603 * @param aRunCount will receive the number of runs.
michael@0 604 */
michael@0 605 nsresult CountRuns(int32_t* aRunCount);
michael@0 606
michael@0 607 /**
michael@0 608 * Get one run's logical start, length, and directionality,
michael@0 609 * which can be 0 for LTR or 1 for RTL.
michael@0 610 * In an RTL run, the character at the logical start is
michael@0 611 * visually on the right of the displayed run.
michael@0 612 * The length is the number of characters in the run.<p>
michael@0 613 * <code>CountRuns</code> should be called
michael@0 614 * before the runs are retrieved.
michael@0 615 *
michael@0 616 * @param aRunIndex is the number of the run in visual order, in the
michael@0 617 * range <code>[0..CountRuns-1]</code>.
michael@0 618 *
michael@0 619 * @param aLogicalStart is the first logical character index in the text.
michael@0 620 * The pointer may be <code>nullptr</code> if this index is not needed.
michael@0 621 *
michael@0 622 * @param aLength is the number of characters (at least one) in the run.
michael@0 623 * The pointer may be <code>nullptr</code> if this is not needed.
michael@0 624 *
michael@0 625 * @param aDirection will receive the directionality of the run,
michael@0 626 * <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>,
michael@0 627 * never <code>NSBIDI_MIXED</code>.
michael@0 628 *
michael@0 629 * @see CountRuns<p>
michael@0 630 *
michael@0 631 * Example:
michael@0 632 * @code
michael@0 633 * int32_t i, count, logicalStart, visualIndex=0, length;
michael@0 634 * nsBidiDirection dir;
michael@0 635 * pBidi->CountRuns(&count);
michael@0 636 * for(i=0; i<count; ++i) {
michael@0 637 * pBidi->GetVisualRun(i, &logicalStart, &length, &dir);
michael@0 638 * if(NSBIDI_LTR==dir) {
michael@0 639 * do { // LTR
michael@0 640 * show_char(text[logicalStart++], visualIndex++);
michael@0 641 * } while(--length>0);
michael@0 642 * } else {
michael@0 643 * logicalStart+=length; // logicalLimit
michael@0 644 * do { // RTL
michael@0 645 * show_char(text[--logicalStart], visualIndex++);
michael@0 646 * } while(--length>0);
michael@0 647 * }
michael@0 648 * }
michael@0 649 * @endcode
michael@0 650 *
michael@0 651 * Note that in right-to-left runs, code like this places
michael@0 652 * modifier letters before base characters and second surrogates
michael@0 653 * before first ones.
michael@0 654 */
michael@0 655 nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection);
michael@0 656
michael@0 657 #ifdef FULL_BIDI_ENGINE
michael@0 658 /**
michael@0 659 * Get the visual position from a logical text position.
michael@0 660 * If such a mapping is used many times on the same
michael@0 661 * <code>nsBidi</code> object, then calling
michael@0 662 * <code>GetLogicalMap</code> is more efficient.<p>
michael@0 663 *
michael@0 664 * Note that in right-to-left runs, this mapping places
michael@0 665 * modifier letters before base characters and second surrogates
michael@0 666 * before first ones.
michael@0 667 *
michael@0 668 * @param aLogicalIndex is the index of a character in the text.
michael@0 669 *
michael@0 670 * @param aVisualIndex will receive the visual position of this character.
michael@0 671 *
michael@0 672 * @see GetLogicalMap
michael@0 673 * @see GetLogicalIndex
michael@0 674 */
michael@0 675 nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex);
michael@0 676
michael@0 677 /**
michael@0 678 * Get the logical text position from a visual position.
michael@0 679 * If such a mapping is used many times on the same
michael@0 680 * <code>nsBidi</code> object, then calling
michael@0 681 * <code>GetVisualMap</code> is more efficient.<p>
michael@0 682 *
michael@0 683 * This is the inverse function to <code>GetVisualIndex</code>.
michael@0 684 *
michael@0 685 * @param aVisualIndex is the visual position of a character.
michael@0 686 *
michael@0 687 * @param aLogicalIndex will receive the index of this character in the text.
michael@0 688 *
michael@0 689 * @see GetVisualMap
michael@0 690 * @see GetVisualIndex
michael@0 691 */
michael@0 692 nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex);
michael@0 693
michael@0 694 /**
michael@0 695 * Get a logical-to-visual index map (array) for the characters in the nsBidi
michael@0 696 * (paragraph or line) object.
michael@0 697 *
michael@0 698 * @param aIndexMap is a pointer to an array of <code>GetLength</code>
michael@0 699 * indexes which will reflect the reordering of the characters.
michael@0 700 * The array does not need to be initialized.<p>
michael@0 701 * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p>
michael@0 702 *
michael@0 703 * @see GetVisualMap
michael@0 704 * @see GetVisualIndex
michael@0 705 */
michael@0 706 nsresult GetLogicalMap(int32_t *aIndexMap);
michael@0 707
michael@0 708 /**
michael@0 709 * Get a visual-to-logical index map (array) for the characters in the nsBidi
michael@0 710 * (paragraph or line) object.
michael@0 711 *
michael@0 712 * @param aIndexMap is a pointer to an array of <code>GetLength</code>
michael@0 713 * indexes which will reflect the reordering of the characters.
michael@0 714 * The array does not need to be initialized.<p>
michael@0 715 * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p>
michael@0 716 *
michael@0 717 * @see GetLogicalMap
michael@0 718 * @see GetLogicalIndex
michael@0 719 */
michael@0 720 nsresult GetVisualMap(int32_t *aIndexMap);
michael@0 721
michael@0 722 /**
michael@0 723 * This is a convenience function that does not use a nsBidi object.
michael@0 724 * It is intended to be used for when an application has determined the levels
michael@0 725 * of objects (character sequences) and just needs to have them reordered (L2).
michael@0 726 * This is equivalent to using <code>GetLogicalMap</code> on a
michael@0 727 * <code>nsBidi</code> object.
michael@0 728 *
michael@0 729 * @param aLevels is an array with <code>aLength</code> levels that have been determined by
michael@0 730 * the application.
michael@0 731 *
michael@0 732 * @param aLength is the number of levels in the array, or, semantically,
michael@0 733 * the number of objects to be reordered.
michael@0 734 * It must be <code>aLength>0</code>.
michael@0 735 *
michael@0 736 * @param aIndexMap is a pointer to an array of <code>aLength</code>
michael@0 737 * indexes which will reflect the reordering of the characters.
michael@0 738 * The array does not need to be initialized.<p>
michael@0 739 * The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.
michael@0 740 */
michael@0 741 static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
michael@0 742 #endif // FULL_BIDI_ENGINE
michael@0 743 /**
michael@0 744 * This is a convenience function that does not use a nsBidi object.
michael@0 745 * It is intended to be used for when an application has determined the levels
michael@0 746 * of objects (character sequences) and just needs to have them reordered (L2).
michael@0 747 * This is equivalent to using <code>GetVisualMap</code> on a
michael@0 748 * <code>nsBidi</code> object.
michael@0 749 *
michael@0 750 * @param aLevels is an array with <code>aLength</code> levels that have been determined by
michael@0 751 * the application.
michael@0 752 *
michael@0 753 * @param aLength is the number of levels in the array, or, semantically,
michael@0 754 * the number of objects to be reordered.
michael@0 755 * It must be <code>aLength>0</code>.
michael@0 756 *
michael@0 757 * @param aIndexMap is a pointer to an array of <code>aLength</code>
michael@0 758 * indexes which will reflect the reordering of the characters.
michael@0 759 * The array does not need to be initialized.<p>
michael@0 760 * The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.
michael@0 761 */
michael@0 762 static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
michael@0 763
michael@0 764 #ifdef FULL_BIDI_ENGINE
michael@0 765 /**
michael@0 766 * Invert an index map.
michael@0 767 * The one-to-one index mapping of the first map is inverted and written to
michael@0 768 * the second one.
michael@0 769 *
michael@0 770 * @param aSrcMap is an array with <code>aLength</code> indexes
michael@0 771 * which define the original mapping.
michael@0 772 *
michael@0 773 * @param aDestMap is an array with <code>aLength</code> indexes
michael@0 774 * which will be filled with the inverse mapping.
michael@0 775 *
michael@0 776 * @param aLength is the length of each array.
michael@0 777 */
michael@0 778 nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength);
michael@0 779 #endif // FULL_BIDI_ENGINE
michael@0 780 /**
michael@0 781 * Reverse a Right-To-Left run of Unicode text.
michael@0 782 *
michael@0 783 * This function preserves the integrity of characters with multiple
michael@0 784 * code units and (optionally) modifier letters.
michael@0 785 * Characters can be replaced by mirror-image characters
michael@0 786 * in the destination buffer. Note that "real" mirroring has
michael@0 787 * to be done in a rendering engine by glyph selection
michael@0 788 * and that for many "mirrored" characters there are no
michael@0 789 * Unicode characters as mirror-image equivalents.
michael@0 790 * There are also options to insert or remove Bidi control
michael@0 791 * characters; see the description of the <code>aDestSize</code>
michael@0 792 * and <code>aOptions</code> parameters and of the option bit flags.
michael@0 793 *
michael@0 794 * Since no Bidi controls are inserted here, this function will never
michael@0 795 * write more than <code>aSrcLength</code> characters to <code>aDest</code>.
michael@0 796 *
michael@0 797 * @param aSrc A pointer to the RTL run text.
michael@0 798 *
michael@0 799 * @param aSrcLength The length of the RTL run.
michael@0 800 * If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option
michael@0 801 * is set, then the destination length may be less than
michael@0 802 * <code>aSrcLength</code>.
michael@0 803 * If this option is not set, then the destination length
michael@0 804 * will be exactly <code>aSrcLength</code>.
michael@0 805 *
michael@0 806 * @param aDest A pointer to where the reordered text is to be copied.
michael@0 807 * <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code>
michael@0 808 * must not overlap.
michael@0 809 *
michael@0 810 * @param aOptions A bit set of options for the reordering that control
michael@0 811 * how the reordered text is written.
michael@0 812 *
michael@0 813 * @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
michael@0 814 */
michael@0 815 nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize);
michael@0 816
michael@0 817 protected:
michael@0 818 friend class nsBidiPresUtils;
michael@0 819
michael@0 820 /** length of the current text */
michael@0 821 int32_t mLength;
michael@0 822
michael@0 823 /** memory sizes in bytes */
michael@0 824 size_t mDirPropsSize, mLevelsSize, mRunsSize;
michael@0 825
michael@0 826 /** allocated memory */
michael@0 827 DirProp* mDirPropsMemory;
michael@0 828 nsBidiLevel* mLevelsMemory;
michael@0 829 Run* mRunsMemory;
michael@0 830
michael@0 831 /** indicators for whether memory may be allocated after construction */
michael@0 832 bool mMayAllocateText, mMayAllocateRuns;
michael@0 833
michael@0 834 const DirProp* mDirProps;
michael@0 835 nsBidiLevel* mLevels;
michael@0 836
michael@0 837 /** the paragraph level */
michael@0 838 nsBidiLevel mParaLevel;
michael@0 839
michael@0 840 /** flags is a bit set for which directional properties are in the text */
michael@0 841 Flags mFlags;
michael@0 842
michael@0 843 /** the overall paragraph or line directionality - see nsBidiDirection */
michael@0 844 nsBidiDirection mDirection;
michael@0 845
michael@0 846 /** characters after trailingWSStart are WS and are */
michael@0 847 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
michael@0 848 int32_t mTrailingWSStart;
michael@0 849
michael@0 850 /** fields for line reordering */
michael@0 851 int32_t mRunCount; /* ==-1: runs not set up yet */
michael@0 852 Run* mRuns;
michael@0 853
michael@0 854 /** for non-mixed text, we only need a tiny array of runs (no malloc()) */
michael@0 855 Run mSimpleRuns[1];
michael@0 856
michael@0 857 private:
michael@0 858
michael@0 859 void Init();
michael@0 860
michael@0 861 bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded);
michael@0 862
michael@0 863 void Free();
michael@0 864
michael@0 865 void GetDirProps(const char16_t *aText);
michael@0 866
michael@0 867 nsBidiDirection ResolveExplicitLevels();
michael@0 868
michael@0 869 nsresult CheckExplicitLevels(nsBidiDirection *aDirection);
michael@0 870
michael@0 871 nsBidiDirection DirectionFromFlags(Flags aFlags);
michael@0 872
michael@0 873 void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR);
michael@0 874
michael@0 875 void AdjustWSLevels();
michael@0 876
michael@0 877 void SetTrailingWSStart();
michael@0 878
michael@0 879 bool GetRuns();
michael@0 880
michael@0 881 void GetSingleRun(nsBidiLevel aLevel);
michael@0 882
michael@0 883 void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel);
michael@0 884
michael@0 885 static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel);
michael@0 886
michael@0 887 int32_t doWriteReverse(const char16_t *src, int32_t srcLength,
michael@0 888 char16_t *dest, uint16_t options);
michael@0 889
michael@0 890 };
michael@0 891
michael@0 892 #endif // _nsBidi_h_

mercurial