layout/base/nsBidi.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/layout/base/nsBidi.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,892 @@
     1.4 +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     1.5 + *
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifndef nsBidi_h__
    1.11 +#define nsBidi_h__
    1.12 +
    1.13 +#include "nsBidiUtils.h"
    1.14 +
    1.15 +// Bidi reordering engine from ICU
    1.16 +/*
    1.17 + * javadoc-style comments are intended to be transformed into HTML
    1.18 + * using DOC++ - see
    1.19 + * http://www.zib.de/Visual/software/doc++/index.html .
    1.20 + *
    1.21 + * The HTML documentation is created with
    1.22 + *  doc++ -H nsIBidi.h
    1.23 + */
    1.24 +
    1.25 +/**
    1.26 + * @mainpage BIDI algorithm for Mozilla (from ICU)
    1.27 + *
    1.28 + * <h2>BIDI algorithm for Mozilla</h2>
    1.29 + *
    1.30 + * This is an implementation of the Unicode Bidirectional algorithm.
    1.31 + * The algorithm is defined in the
    1.32 + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
    1.33 + * version 5, also described in The Unicode Standard, Version 3.0 .<p>
    1.34 + *
    1.35 + * <h3>General remarks about the API:</h3>
    1.36 + *
    1.37 + * The <quote>limit</quote> of a sequence of characters is the position just after their
    1.38 + * last character, i.e., one more than that position.<p>
    1.39 + *
    1.40 + * Some of the API functions provide access to <quote>runs</quote>.
    1.41 + * Such a <quote>run</quote> is defined as a sequence of characters
    1.42 + * that are at the same embedding level
    1.43 + * after performing the BIDI algorithm.<p>
    1.44 + *
    1.45 + * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu
    1.46 + * @version 1.0
    1.47 + */
    1.48 +
    1.49 +/**
    1.50 + * nsBidiLevel is the type of the level values in this
    1.51 + * Bidi implementation.
    1.52 + * It holds an embedding level and indicates the visual direction
    1.53 + * by its bit 0 (even/odd value).<p>
    1.54 + *
    1.55 + * It can also hold non-level values for the
    1.56 + * <code>aParaLevel</code> and <code>aEmbeddingLevels</code>
    1.57 + * arguments of <code>SetPara</code>; there:
    1.58 + * <ul>
    1.59 + * <li>bit 7 of an <code>aEmbeddingLevels[]</code>
    1.60 + * value indicates whether the using application is
    1.61 + * specifying the level of a character to <i>override</i> whatever the
    1.62 + * Bidi implementation would resolve it to.</li>
    1.63 + * <li><code>aParaLevel</code> can be set to the
    1.64 + * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code>
    1.65 + * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul>
    1.66 + *
    1.67 + * @see nsIBidi::SetPara
    1.68 + *
    1.69 + * <p>The related constants are not real, valid level values.
    1.70 + * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify
    1.71 + * a default for the paragraph level for
    1.72 + * when the <code>SetPara</code> function
    1.73 + * shall determine it but there is no
    1.74 + * strongly typed character in the input.<p>
    1.75 + *
    1.76 + * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even
    1.77 + * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd,
    1.78 + * just like with normal LTR and RTL level values -
    1.79 + * these special values are designed that way. Also, the implementation
    1.80 + * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
    1.81 + *
    1.82 + * @see NSBIDI_DEFAULT_LTR
    1.83 + * @see NSBIDI_DEFAULT_RTL
    1.84 + * @see NSBIDI_LEVEL_OVERRIDE
    1.85 + * @see NSBIDI_MAX_EXPLICIT_LEVEL
    1.86 + */
    1.87 +typedef uint8_t nsBidiLevel;
    1.88 +
    1.89 +/** Paragraph level setting.
    1.90 + *  If there is no strong character, then set the paragraph level to 0 (left-to-right).
    1.91 + */
    1.92 +#define NSBIDI_DEFAULT_LTR 0xfe
    1.93 +
    1.94 +/** Paragraph level setting.
    1.95 + *  If there is no strong character, then set the paragraph level to 1 (right-to-left).
    1.96 + */
    1.97 +#define NSBIDI_DEFAULT_RTL 0xff
    1.98 +
    1.99 +/**
   1.100 + * Maximum explicit embedding level.
   1.101 + * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>).
   1.102 + *
   1.103 + */
   1.104 +#define NSBIDI_MAX_EXPLICIT_LEVEL 61
   1.105 +
   1.106 +/** Bit flag for level input. 
   1.107 + *  Overrides directional properties. 
   1.108 + */
   1.109 +#define NSBIDI_LEVEL_OVERRIDE 0x80
   1.110 +
   1.111 +/**
   1.112 + * <code>nsBidiDirection</code> values indicate the text direction.
   1.113 + */
   1.114 +enum nsBidiDirection {
   1.115 +  /** All left-to-right text This is a 0 value. */
   1.116 +  NSBIDI_LTR,
   1.117 +  /** All right-to-left text This is a 1 value. */
   1.118 +  NSBIDI_RTL,
   1.119 +  /** Mixed-directional text. */
   1.120 +  NSBIDI_MIXED
   1.121 +};
   1.122 +
   1.123 +typedef enum nsBidiDirection nsBidiDirection;
   1.124 +
   1.125 +/* miscellaneous definitions ------------------------------------------------ */
   1.126 +/** option flags for WriteReverse() */
   1.127 +/**
   1.128 + * option bit for WriteReverse():
   1.129 + * keep combining characters after their base characters in RTL runs
   1.130 + *
   1.131 + * @see WriteReverse
   1.132 + */
   1.133 +#define NSBIDI_KEEP_BASE_COMBINING       1
   1.134 +
   1.135 +/**
   1.136 + * option bit for WriteReverse():
   1.137 + * replace characters with the "mirrored" property in RTL runs
   1.138 + * by their mirror-image mappings
   1.139 + *
   1.140 + * @see WriteReverse
   1.141 + */
   1.142 +#define NSBIDI_DO_MIRRORING              2
   1.143 +
   1.144 +/**
   1.145 + * option bit for WriteReverse():
   1.146 + * remove Bidi control characters
   1.147 + *
   1.148 + * @see WriteReverse
   1.149 + */
   1.150 +#define NSBIDI_REMOVE_BIDI_CONTROLS      8
   1.151 +
   1.152 +/* helper macros for each allocated array member */
   1.153 +#define GETDIRPROPSMEMORY(length) \
   1.154 +                                  GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
   1.155 +                                  mMayAllocateText, (length))
   1.156 +
   1.157 +#define GETLEVELSMEMORY(length) \
   1.158 +                                GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
   1.159 +                                mMayAllocateText, (length))
   1.160 +
   1.161 +#define GETRUNSMEMORY(length) \
   1.162 +                              GetMemory((void **)&mRunsMemory, &mRunsSize, \
   1.163 +                              mMayAllocateRuns, (length)*sizeof(Run))
   1.164 +
   1.165 +/* additional macros used by constructor - always allow allocation */
   1.166 +#define GETINITIALDIRPROPSMEMORY(length) \
   1.167 +                                         GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
   1.168 +                                         true, (length))
   1.169 +
   1.170 +#define GETINITIALLEVELSMEMORY(length) \
   1.171 +                                       GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
   1.172 +                                       true, (length))
   1.173 +
   1.174 +#define GETINITIALRUNSMEMORY(length) \
   1.175 +                                     GetMemory((void **)&mRunsMemory, &mRunsSize, \
   1.176 +                                     true, (length)*sizeof(Run))
   1.177 +
   1.178 +/*
   1.179 + * Sometimes, bit values are more appropriate
   1.180 + * to deal with directionality properties.
   1.181 + * Abbreviations in these macro names refer to names
   1.182 + * used in the Bidi algorithm.
   1.183 + */
   1.184 +typedef uint8_t DirProp;
   1.185 +
   1.186 +#define DIRPROP_FLAG(dir) (1UL<<(dir))
   1.187 +
   1.188 +/* special flag for multiple runs from explicit embedding codes */
   1.189 +#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
   1.190 +
   1.191 +/* are there any characters that are LTR or RTL? */
   1.192 +#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
   1.193 +#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
   1.194 +
   1.195 +/* explicit embedding codes */
   1.196 +#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
   1.197 +#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
   1.198 +#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
   1.199 +
   1.200 +#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
   1.201 +#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
   1.202 +
   1.203 +/* paragraph and segment separators */
   1.204 +#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
   1.205 +
   1.206 +/* all types that are counted as White Space or Neutral in some steps */
   1.207 +#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
   1.208 +#define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS)
   1.209 +
   1.210 +/* all types that are included in a sequence of European Terminators for (W5) */
   1.211 +#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
   1.212 +
   1.213 +/* types that are neutrals or could becomes neutrals in (Wn) */
   1.214 +#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
   1.215 +
   1.216 +/*
   1.217 + * These types may be changed to "e",
   1.218 + * the embedding type (L or R) of the run,
   1.219 + * in the Bidi algorithm (N2)
   1.220 + */
   1.221 +#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
   1.222 +
   1.223 +/* the dirProp's L and R are defined to 0 and 1 values in nsCharType */
   1.224 +#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
   1.225 +
   1.226 +#define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe)
   1.227 +
   1.228 +/* handle surrogate pairs --------------------------------------------------- */
   1.229 +
   1.230 +#define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800)
   1.231 +#define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00)
   1.232 +
   1.233 +/* get the UTF-32 value directly from the surrogate pseudo-characters */
   1.234 +#define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
   1.235 +#define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET)
   1.236 +
   1.237 +
   1.238 +#define UTF_ERROR_VALUE 0xffff
   1.239 +/* definitions with forward iteration --------------------------------------- */
   1.240 +
   1.241 +/*
   1.242 + * all the macros that go forward assume that
   1.243 + * the initial offset is 0<=i<length;
   1.244 + * they update the offset
   1.245 + */
   1.246 +
   1.247 +/* fast versions, no error-checking */
   1.248 +
   1.249 +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \
   1.250 +                                         if((uint32_t)(c)<=0xffff) { \
   1.251 +                                         (s)[(i)++]=(char16_t)(c); \
   1.252 +                                         } else { \
   1.253 +                                         (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
   1.254 +                                         (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
   1.255 +                                         } \
   1.256 +}
   1.257 +
   1.258 +/* safe versions with error-checking and optional regularity-checking */
   1.259 +
   1.260 +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
   1.261 +                                                if((PRUInt32)(c)<=0xffff) { \
   1.262 +                                                (s)[(i)++]=(char16_t)(c); \
   1.263 +                                                } else if((PRUInt32)(c)<=0x10ffff) { \
   1.264 +                                                if((i)+1<(length)) { \
   1.265 +                                                (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
   1.266 +                                                (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
   1.267 +                                                } else /* not enough space */ { \
   1.268 +                                                (s)[(i)++]=UTF_ERROR_VALUE; \
   1.269 +                                                } \
   1.270 +                                                } else /* c>0x10ffff, write error value */ { \
   1.271 +                                                (s)[(i)++]=UTF_ERROR_VALUE; \
   1.272 +                                                } \
   1.273 +}
   1.274 +
   1.275 +/* definitions with backward iteration -------------------------------------- */
   1.276 +
   1.277 +/*
   1.278 + * all the macros that go backward assume that
   1.279 + * the valid buffer range starts at offset 0
   1.280 + * and that the initial offset is 0<i<=length;
   1.281 + * they update the offset
   1.282 + */
   1.283 +
   1.284 +/* fast versions, no error-checking */
   1.285 +
   1.286 +/*
   1.287 + * Get a single code point from an offset that points behind the last
   1.288 + * of the code units that belong to that code point.
   1.289 + * Assume 0<=i<length.
   1.290 + */
   1.291 +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
   1.292 +                                        (c)=(s)[--(i)]; \
   1.293 +                                        if(IS_SECOND_SURROGATE(c)) { \
   1.294 +                                        (c)=GET_UTF_32((s)[--(i)], (c)); \
   1.295 +                                        } \
   1.296 +}
   1.297 +
   1.298 +#define UTF16_BACK_1_UNSAFE(s, i) { \
   1.299 +                                  if(IS_SECOND_SURROGATE((s)[--(i)])) { \
   1.300 +                                  --(i); \
   1.301 +                                  } \
   1.302 +}
   1.303 +
   1.304 +#define UTF16_BACK_N_UNSAFE(s, i, n) { \
   1.305 +                                     int32_t __N=(n); \
   1.306 +                                     while(__N>0) { \
   1.307 +                                     UTF16_BACK_1_UNSAFE(s, i); \
   1.308 +                                     --__N; \
   1.309 +                                     } \
   1.310 +}
   1.311 +
   1.312 +/* safe versions with error-checking and optional regularity-checking */
   1.313 +
   1.314 +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
   1.315 +                                                     (c)=(s)[--(i)]; \
   1.316 +                                                     if(IS_SECOND_SURROGATE(c)) { \
   1.317 +                                                     char16_t __c2; \
   1.318 +                                                     if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
   1.319 +                                                     --(i); \
   1.320 +                                                     (c)=GET_UTF_32(__c2, (c)); \
   1.321 +      /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
   1.322 +                                                     } else if(strict) {\
   1.323 +      /* unmatched second surrogate */ \
   1.324 +                                                     (c)=UTF_ERROR_VALUE; \
   1.325 +                                                     } \
   1.326 +                                                     } else if(strict && IS_FIRST_SURROGATE(c)) { \
   1.327 +      /* unmatched first surrogate */ \
   1.328 +                                                     (c)=UTF_ERROR_VALUE; \
   1.329 +  /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \
   1.330 +                                                     } \
   1.331 +}
   1.332 +
   1.333 +#define UTF16_BACK_1_SAFE(s, start, i) { \
   1.334 +                                       if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \
   1.335 +                                       --(i); \
   1.336 +                                       } \
   1.337 +}
   1.338 +
   1.339 +#define UTF16_BACK_N_SAFE(s, start, i, n) { \
   1.340 +                                          int32_t __N=(n); \
   1.341 +                                          while(__N>0 && (i)>(start)) { \
   1.342 +                                          UTF16_BACK_1_SAFE(s, start, i); \
   1.343 +                                          --__N; \
   1.344 +                                          } \
   1.345 +}
   1.346 +
   1.347 +#define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
   1.348 +#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
   1.349 +#define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
   1.350 +#define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
   1.351 +#define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
   1.352 +#define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
   1.353 +#define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
   1.354 +#define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
   1.355 +
   1.356 +#define UTF_PREV_CHAR(s, start, i, c)                UTF_PREV_CHAR_SAFE(s, start, i, c, false)
   1.357 +#define UTF_BACK_1(s, start, i)                      UTF_BACK_1_SAFE(s, start, i)
   1.358 +#define UTF_BACK_N(s, start, i, n)                   UTF_BACK_N_SAFE(s, start, i, n)
   1.359 +#define UTF_APPEND_CHAR(s, i, length, c)             UTF_APPEND_CHAR_SAFE(s, i, length, c)
   1.360 +
   1.361 +/* Run structure for reordering --------------------------------------------- */
   1.362 +
   1.363 +typedef struct Run {
   1.364 +  int32_t logicalStart,  /* first character of the run; b31 indicates even/odd level */
   1.365 +  visualLimit;  /* last visual position of the run +1 */
   1.366 +} Run;
   1.367 +
   1.368 +/* in a Run, logicalStart will get this bit set if the run level is odd */
   1.369 +#define INDEX_ODD_BIT (1UL<<31)
   1.370 +
   1.371 +#define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31))
   1.372 +#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((uint32_t)level<<31))
   1.373 +#define REMOVE_ODD_BIT(x)          ((x)&=~INDEX_ODD_BIT)
   1.374 +
   1.375 +#define GET_INDEX(x)   (x&~INDEX_ODD_BIT)
   1.376 +#define GET_ODD_BIT(x) ((uint32_t)x>>31)
   1.377 +#define IS_ODD_RUN(x)  ((x&INDEX_ODD_BIT)!=0)
   1.378 +#define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
   1.379 +
   1.380 +typedef uint32_t Flags;
   1.381 +
   1.382 +/**
   1.383 + * This class holds information about a paragraph of text
   1.384 + * with Bidi-algorithm-related details, or about one line of
   1.385 + * such a paragraph.<p>
   1.386 + * Reordering can be done on a line, or on a paragraph which is
   1.387 + * then interpreted as one single line.<p>
   1.388 + *
   1.389 + * On construction, the class is initially empty. It is assigned
   1.390 + * the Bidi properties of a paragraph by <code>SetPara</code>
   1.391 + * or the Bidi properties of a line of a paragraph by
   1.392 + * <code>SetLine</code>.<p>
   1.393 + * A Bidi class can be reused for as long as it is not deallocated
   1.394 + * by calling its destructor.<p>
   1.395 + * <code>SetPara</code> will allocate additional memory for
   1.396 + * internal structures as necessary.
   1.397 + */
   1.398 +class nsBidi
   1.399 +{
   1.400 +public: 
   1.401 +  /** @brief Default constructor.
   1.402 +   * 
   1.403 +   * The nsBidi object is initially empty. It is assigned
   1.404 +   * the Bidi properties of a paragraph by <code>SetPara()</code>
   1.405 +   * or the Bidi properties of a line of a paragraph by
   1.406 +   * <code>GetLine()</code>.<p>
   1.407 +   * This object can be reused for as long as it is not destroyed.<p>
   1.408 +   * <code>SetPara()</code> will allocate additional memory for
   1.409 +   * internal structures as necessary.
   1.410 +   *
   1.411 +   */
   1.412 +  nsBidi();
   1.413 +
   1.414 +  /** @brief Destructor. */
   1.415 +  virtual ~nsBidi();
   1.416 +
   1.417 +
   1.418 +  /**
   1.419 +   * Perform the Unicode Bidi algorithm. It is defined in the
   1.420 +   * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
   1.421 +   * version 5,
   1.422 +   * also described in The Unicode Standard, Version 3.0 .<p>
   1.423 +   *
   1.424 +   * This function takes a single plain text paragraph with or without
   1.425 +   * externally specified embedding levels from <quote>styled</quote> text
   1.426 +   * and computes the left-right-directionality of each character.<p>
   1.427 +   *
   1.428 +   * If the entire paragraph consists of text of only one direction, then
   1.429 +   * the function may not perform all the steps described by the algorithm,
   1.430 +   * i.e., some levels may not be the same as if all steps were performed.
   1.431 +   * This is not relevant for unidirectional text.<br>
   1.432 +   * For example, in pure LTR text with numbers the numbers would get
   1.433 +   * a resolved level of 2 higher than the surrounding text according to
   1.434 +   * the algorithm. This implementation may set all resolved levels to
   1.435 +   * the same value in such a case.<p>
   1.436 +   *
   1.437 +   * The text must be externally split into separate paragraphs (rule P1).
   1.438 +   * Paragraph separators (B) should appear at most at the very end.
   1.439 +   *
   1.440 +   * @param aText is a pointer to the single-paragraph text that the
   1.441 +   *      Bidi algorithm will be performed on
   1.442 +   *      (step (P1) of the algorithm is performed externally).
   1.443 +   *      <strong>The text must be (at least) <code>aLength</code> long.</strong>
   1.444 +   *
   1.445 +   * @param aLength is the length of the text; if <code>aLength==-1</code> then
   1.446 +   *      the text must be zero-terminated.
   1.447 +   *
   1.448 +   * @param aParaLevel specifies the default level for the paragraph;
   1.449 +   *      it is typically 0 (LTR) or 1 (RTL).
   1.450 +   *      If the function shall determine the paragraph level from the text,
   1.451 +   *      then <code>aParaLevel</code> can be set to
   1.452 +   *      either <code>NSBIDI_DEFAULT_LTR</code>
   1.453 +   *      or <code>NSBIDI_DEFAULT_RTL</code>;
   1.454 +   *      if there is no strongly typed character, then
   1.455 +   *      the desired default is used (0 for LTR or 1 for RTL).
   1.456 +   *      Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
   1.457 +   *      with odd levels indicating RTL.
   1.458 +   *
   1.459 +   * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels,
   1.460 +   *      ignoring characters like LRE and PDF in the text.
   1.461 +   *      A level overrides the directional property of its corresponding
   1.462 +   *      (same index) character if the level has the
   1.463 +   *      <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p>
   1.464 +   *      Except for that bit, it must be
   1.465 +   *      <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
   1.466 +   *      <strong>Caution: </strong>A copy of this pointer, not of the levels,
   1.467 +   *      will be stored in the <code>nsBidi</code> object;
   1.468 +   *      the <code>aEmbeddingLevels</code> array must not be
   1.469 +   *      deallocated before the <code>nsBidi</code> object is destroyed or reused,
   1.470 +   *      and the <code>aEmbeddingLevels</code>
   1.471 +   *      should not be modified to avoid unexpected results on subsequent Bidi operations.
   1.472 +   *      However, the <code>SetPara</code> and
   1.473 +   *      <code>SetLine</code> functions may modify some or all of the levels.<p>
   1.474 +   *      After the <code>nsBidi</code> object is reused or destroyed, the caller
   1.475 +   *      must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p>
   1.476 +   *      <strong>The <code>aEmbeddingLevels</code> array must be
   1.477 +   *      at least <code>aLength</code> long.</strong>
   1.478 +   */
   1.479 +  nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels);
   1.480 +
   1.481 +  /**
   1.482 +   * Get the directionality of the text.
   1.483 +   *
   1.484 +   * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text
   1.485 +   *       represented by this object is unidirectional,
   1.486 +   *       and which direction, or if it is mixed-directional.
   1.487 +   *
   1.488 +   * @see nsBidiDirection
   1.489 +   */
   1.490 +  nsresult GetDirection(nsBidiDirection* aDirection);
   1.491 +
   1.492 +  /**
   1.493 +   * Get the paragraph level of the text.
   1.494 +   *
   1.495 +   * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level
   1.496 +   *
   1.497 +   * @see nsBidiLevel
   1.498 +   */
   1.499 +  nsresult GetParaLevel(nsBidiLevel* aParaLevel);
   1.500 +
   1.501 +#ifdef FULL_BIDI_ENGINE
   1.502 +  /**
   1.503 +   * <code>SetLine</code> sets an <code>nsBidi</code> to
   1.504 +   * contain the reordering information, especially the resolved levels,
   1.505 +   * for all the characters in a line of text. This line of text is
   1.506 +   * specified by referring to an <code>nsBidi</code> object representing
   1.507 +   * this information for a paragraph of text, and by specifying
   1.508 +   * a range of indexes in this paragraph.<p>
   1.509 +   * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p>
   1.510 +   *
   1.511 +   * This is used after calling <code>SetPara</code>
   1.512 +   * for a paragraph, and after line-breaking on that paragraph.
   1.513 +   * It is not necessary if the paragraph is treated as a single line.<p>
   1.514 +   *
   1.515 +   * After line-breaking, rules (L1) and (L2) for the treatment of
   1.516 +   * trailing WS and for reordering are performed on
   1.517 +   * an <code>nsBidi</code> object that represents a line.<p>
   1.518 +   *
   1.519 +   * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with
   1.520 +   * <code>aParaBidi</code>.
   1.521 +   * You must destroy or reuse this object before <code>aParaBidi</code>.
   1.522 +   * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line
   1.523 +   * before the object for its parent paragraph.
   1.524 +   *
   1.525 +   * @param aParaBidi is the parent paragraph object.
   1.526 +   *
   1.527 +   * @param aStart is the line's first index into the paragraph text.
   1.528 +   *
   1.529 +   * @param aLimit is just behind the line's last index into the paragraph text
   1.530 +   *      (its last index +1).<br>
   1.531 +   *      It must be <code>0<=aStart<=aLimit<=</code>paragraph length.
   1.532 +   *
   1.533 +   * @see SetPara
   1.534 +   */
   1.535 +  nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit);  
   1.536 +
   1.537 +  /**
   1.538 +   * Get the length of the text.
   1.539 +   *
   1.540 +   * @param aLength receives the length of the text that the nsBidi object was created for.
   1.541 +   */
   1.542 +  nsresult GetLength(int32_t* aLength);
   1.543 +
   1.544 +  /**
   1.545 +   * Get the level for one character.
   1.546 +   *
   1.547 +   * @param aCharIndex the index of a character.
   1.548 +   *
   1.549 +   * @param aLevel receives the level for the character at aCharIndex.
   1.550 +   *
   1.551 +   * @see nsBidiLevel
   1.552 +   */
   1.553 +  nsresult GetLevelAt(int32_t aCharIndex,  nsBidiLevel* aLevel);
   1.554 +
   1.555 +  /**
   1.556 +   * Get an array of levels for each character.<p>
   1.557 +   *
   1.558 +   * Note that this function may allocate memory under some
   1.559 +   * circumstances, unlike <code>GetLevelAt</code>.
   1.560 +   *
   1.561 +   * @param aLevels receives a pointer to the levels array for the text,
   1.562 +   *       or <code>nullptr</code> if an error occurs.
   1.563 +   *
   1.564 +   * @see nsBidiLevel
   1.565 +   */
   1.566 +  nsresult GetLevels(nsBidiLevel** aLevels);
   1.567 +#endif // FULL_BIDI_ENGINE
   1.568 +  /**
   1.569 +   * Get the bidirectional type for one character.
   1.570 +   *
   1.571 +   * @param aCharIndex the index of a character.
   1.572 +   *
   1.573 +   * @param aType receives the bidirectional type of the character at aCharIndex.
   1.574 +   */
   1.575 +  nsresult GetCharTypeAt(int32_t aCharIndex,  nsCharType* aType);
   1.576 +
   1.577 +  /**
   1.578 +   * Get a logical run.
   1.579 +   * This function returns information about a run and is used
   1.580 +   * to retrieve runs in logical order.<p>
   1.581 +   * This is especially useful for line-breaking on a paragraph.
   1.582 +   *
   1.583 +   * @param aLogicalStart is the first character of the run.
   1.584 +   *
   1.585 +   * @param aLogicalLimit will receive the limit of the run.
   1.586 +   *      The l-value that you point to here may be the
   1.587 +   *      same expression (variable) as the one for
   1.588 +   *      <code>aLogicalStart</code>.
   1.589 +   *      This pointer can be <code>nullptr</code> if this
   1.590 +   *      value is not necessary.
   1.591 +   *
   1.592 +   * @param aLevel will receive the level of the run.
   1.593 +   *      This pointer can be <code>nullptr</code> if this
   1.594 +   *      value is not necessary.
   1.595 +   */
   1.596 +  nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel);
   1.597 +
   1.598 +  /**
   1.599 +   * Get the number of runs.
   1.600 +   * This function may invoke the actual reordering on the
   1.601 +   * <code>nsBidi</code> object, after <code>SetPara</code>
   1.602 +   * may have resolved only the levels of the text. Therefore,
   1.603 +   * <code>CountRuns</code> may have to allocate memory,
   1.604 +   * and may fail doing so.
   1.605 +   *
   1.606 +   * @param aRunCount will receive the number of runs.
   1.607 +   */
   1.608 +  nsresult CountRuns(int32_t* aRunCount);
   1.609 +
   1.610 +  /**
   1.611 +   * Get one run's logical start, length, and directionality,
   1.612 +   * which can be 0 for LTR or 1 for RTL.
   1.613 +   * In an RTL run, the character at the logical start is
   1.614 +   * visually on the right of the displayed run.
   1.615 +   * The length is the number of characters in the run.<p>
   1.616 +   * <code>CountRuns</code> should be called
   1.617 +   * before the runs are retrieved.
   1.618 +   *
   1.619 +   * @param aRunIndex is the number of the run in visual order, in the
   1.620 +   *      range <code>[0..CountRuns-1]</code>.
   1.621 +   *
   1.622 +   * @param aLogicalStart is the first logical character index in the text.
   1.623 +   *      The pointer may be <code>nullptr</code> if this index is not needed.
   1.624 +   *
   1.625 +   * @param aLength is the number of characters (at least one) in the run.
   1.626 +   *      The pointer may be <code>nullptr</code> if this is not needed.
   1.627 +   *
   1.628 +   * @param aDirection will receive the directionality of the run,
   1.629 +   *       <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>,
   1.630 +   *       never <code>NSBIDI_MIXED</code>.
   1.631 +   *
   1.632 +   * @see CountRuns<p>
   1.633 +   *
   1.634 +   * Example:
   1.635 +   * @code
   1.636 +   *  int32_t i, count, logicalStart, visualIndex=0, length;
   1.637 +   *  nsBidiDirection dir;
   1.638 +   *  pBidi->CountRuns(&count);
   1.639 +   *  for(i=0; i<count; ++i) {
   1.640 +   *    pBidi->GetVisualRun(i, &logicalStart, &length, &dir);
   1.641 +   *    if(NSBIDI_LTR==dir) {
   1.642 +   *      do { // LTR
   1.643 +   *        show_char(text[logicalStart++], visualIndex++);
   1.644 +   *      } while(--length>0);
   1.645 +   *    } else {
   1.646 +   *      logicalStart+=length;  // logicalLimit
   1.647 +   *      do { // RTL
   1.648 +   *        show_char(text[--logicalStart], visualIndex++);
   1.649 +   *      } while(--length>0);
   1.650 +   *    }
   1.651 +   *  }
   1.652 +   * @endcode
   1.653 +   *
   1.654 +   * Note that in right-to-left runs, code like this places
   1.655 +   * modifier letters before base characters and second surrogates
   1.656 +   * before first ones.
   1.657 +   */
   1.658 +  nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection);
   1.659 +
   1.660 +#ifdef FULL_BIDI_ENGINE
   1.661 +  /**
   1.662 +   * Get the visual position from a logical text position.
   1.663 +   * If such a mapping is used many times on the same
   1.664 +   * <code>nsBidi</code> object, then calling
   1.665 +   * <code>GetLogicalMap</code> is more efficient.<p>
   1.666 +   *
   1.667 +   * Note that in right-to-left runs, this mapping places
   1.668 +   * modifier letters before base characters and second surrogates
   1.669 +   * before first ones.
   1.670 +   *
   1.671 +   * @param aLogicalIndex is the index of a character in the text.
   1.672 +   *
   1.673 +   * @param aVisualIndex will receive the visual position of this character.
   1.674 +   *
   1.675 +   * @see GetLogicalMap
   1.676 +   * @see GetLogicalIndex
   1.677 +   */
   1.678 +  nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex);
   1.679 +
   1.680 +  /**
   1.681 +   * Get the logical text position from a visual position.
   1.682 +   * If such a mapping is used many times on the same
   1.683 +   * <code>nsBidi</code> object, then calling
   1.684 +   * <code>GetVisualMap</code> is more efficient.<p>
   1.685 +   *
   1.686 +   * This is the inverse function to <code>GetVisualIndex</code>.
   1.687 +   *
   1.688 +   * @param aVisualIndex is the visual position of a character.
   1.689 +   *
   1.690 +   * @param aLogicalIndex will receive the index of this character in the text.
   1.691 +   *
   1.692 +   * @see GetVisualMap
   1.693 +   * @see GetVisualIndex
   1.694 +   */
   1.695 +  nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex);
   1.696 +
   1.697 +  /**
   1.698 +   * Get a logical-to-visual index map (array) for the characters in the nsBidi
   1.699 +   * (paragraph or line) object.
   1.700 +   *
   1.701 +   * @param aIndexMap is a pointer to an array of <code>GetLength</code>
   1.702 +   *      indexes which will reflect the reordering of the characters.
   1.703 +   *      The array does not need to be initialized.<p>
   1.704 +   *      The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p>
   1.705 +   *
   1.706 +   * @see GetVisualMap
   1.707 +   * @see GetVisualIndex
   1.708 +   */
   1.709 +  nsresult GetLogicalMap(int32_t *aIndexMap);
   1.710 +
   1.711 +  /**
   1.712 +   * Get a visual-to-logical index map (array) for the characters in the nsBidi
   1.713 +   * (paragraph or line) object.
   1.714 +   *
   1.715 +   * @param aIndexMap is a pointer to an array of <code>GetLength</code>
   1.716 +   *      indexes which will reflect the reordering of the characters.
   1.717 +   *      The array does not need to be initialized.<p>
   1.718 +   *      The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p>
   1.719 +   *
   1.720 +   * @see GetLogicalMap
   1.721 +   * @see GetLogicalIndex
   1.722 +   */
   1.723 +  nsresult GetVisualMap(int32_t *aIndexMap);
   1.724 +
   1.725 +  /**
   1.726 +   * This is a convenience function that does not use a nsBidi object.
   1.727 +   * It is intended to be used for when an application has determined the levels
   1.728 +   * of objects (character sequences) and just needs to have them reordered (L2).
   1.729 +   * This is equivalent to using <code>GetLogicalMap</code> on a
   1.730 +   * <code>nsBidi</code> object.
   1.731 +   *
   1.732 +   * @param aLevels is an array with <code>aLength</code> levels that have been determined by
   1.733 +   *      the application.
   1.734 +   *
   1.735 +   * @param aLength is the number of levels in the array, or, semantically,
   1.736 +   *      the number of objects to be reordered.
   1.737 +   *      It must be <code>aLength>0</code>.
   1.738 +   *
   1.739 +   * @param aIndexMap is a pointer to an array of <code>aLength</code>
   1.740 +   *      indexes which will reflect the reordering of the characters.
   1.741 +   *      The array does not need to be initialized.<p>
   1.742 +   *      The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.
   1.743 +   */
   1.744 +  static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
   1.745 +#endif // FULL_BIDI_ENGINE
   1.746 +  /**
   1.747 +   * This is a convenience function that does not use a nsBidi object.
   1.748 +   * It is intended to be used for when an application has determined the levels
   1.749 +   * of objects (character sequences) and just needs to have them reordered (L2).
   1.750 +   * This is equivalent to using <code>GetVisualMap</code> on a
   1.751 +   * <code>nsBidi</code> object.
   1.752 +   *
   1.753 +   * @param aLevels is an array with <code>aLength</code> levels that have been determined by
   1.754 +   *      the application.
   1.755 +   *
   1.756 +   * @param aLength is the number of levels in the array, or, semantically,
   1.757 +   *      the number of objects to be reordered.
   1.758 +   *      It must be <code>aLength>0</code>.
   1.759 +   *
   1.760 +   * @param aIndexMap is a pointer to an array of <code>aLength</code>
   1.761 +   *      indexes which will reflect the reordering of the characters.
   1.762 +   *      The array does not need to be initialized.<p>
   1.763 +   *      The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.
   1.764 +   */
   1.765 +  static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
   1.766 +
   1.767 +#ifdef FULL_BIDI_ENGINE
   1.768 +  /**
   1.769 +   * Invert an index map.
   1.770 +   * The one-to-one index mapping of the first map is inverted and written to
   1.771 +   * the second one.
   1.772 +   *
   1.773 +   * @param aSrcMap is an array with <code>aLength</code> indexes
   1.774 +   *      which define the original mapping.
   1.775 +   *
   1.776 +   * @param aDestMap is an array with <code>aLength</code> indexes
   1.777 +   *      which will be filled with the inverse mapping.
   1.778 +   *
   1.779 +   * @param aLength is the length of each array.
   1.780 +   */
   1.781 +  nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength);
   1.782 +#endif // FULL_BIDI_ENGINE
   1.783 +  /**
   1.784 +   * Reverse a Right-To-Left run of Unicode text.
   1.785 +   *
   1.786 +   * This function preserves the integrity of characters with multiple
   1.787 +   * code units and (optionally) modifier letters.
   1.788 +   * Characters can be replaced by mirror-image characters
   1.789 +   * in the destination buffer. Note that "real" mirroring has
   1.790 +   * to be done in a rendering engine by glyph selection
   1.791 +   * and that for many "mirrored" characters there are no
   1.792 +   * Unicode characters as mirror-image equivalents.
   1.793 +   * There are also options to insert or remove Bidi control
   1.794 +   * characters; see the description of the <code>aDestSize</code>
   1.795 +   * and <code>aOptions</code> parameters and of the option bit flags.
   1.796 +   *
   1.797 +   * Since no Bidi controls are inserted here, this function will never
   1.798 +   * write more than <code>aSrcLength</code> characters to <code>aDest</code>.
   1.799 +   *
   1.800 +   * @param aSrc A pointer to the RTL run text.
   1.801 +   *
   1.802 +   * @param aSrcLength The length of the RTL run.
   1.803 +   *                 If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option
   1.804 +   *                 is set, then the destination length may be less than
   1.805 +   *                 <code>aSrcLength</code>.
   1.806 +   *                 If this option is not set, then the destination length
   1.807 +   *                 will be exactly <code>aSrcLength</code>.
   1.808 +   *
   1.809 +   * @param aDest A pointer to where the reordered text is to be copied.
   1.810 +   *             <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code>
   1.811 +   *             must not overlap.
   1.812 +   *
   1.813 +   * @param aOptions A bit set of options for the reordering that control
   1.814 +   *                how the reordered text is written.
   1.815 +   *
   1.816 +   * @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
   1.817 +   */
   1.818 +  nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize);
   1.819 +
   1.820 +protected:
   1.821 +  friend class nsBidiPresUtils;
   1.822 +
   1.823 +  /** length of the current text */
   1.824 +  int32_t mLength;
   1.825 +
   1.826 +  /** memory sizes in bytes */
   1.827 +  size_t mDirPropsSize, mLevelsSize, mRunsSize;
   1.828 +
   1.829 +  /** allocated memory */
   1.830 +  DirProp* mDirPropsMemory;
   1.831 +  nsBidiLevel* mLevelsMemory;
   1.832 +  Run* mRunsMemory;
   1.833 +
   1.834 +  /** indicators for whether memory may be allocated after construction */
   1.835 +  bool mMayAllocateText, mMayAllocateRuns;
   1.836 +
   1.837 +  const DirProp* mDirProps;
   1.838 +  nsBidiLevel* mLevels;
   1.839 +
   1.840 +  /** the paragraph level */
   1.841 +  nsBidiLevel mParaLevel;
   1.842 +
   1.843 +  /** flags is a bit set for which directional properties are in the text */
   1.844 +  Flags mFlags;
   1.845 +
   1.846 +  /** the overall paragraph or line directionality - see nsBidiDirection */
   1.847 +  nsBidiDirection mDirection;
   1.848 +
   1.849 +  /** characters after trailingWSStart are WS and are */
   1.850 +  /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
   1.851 +  int32_t mTrailingWSStart;
   1.852 +
   1.853 +  /** fields for line reordering */
   1.854 +  int32_t mRunCount;     /* ==-1: runs not set up yet */
   1.855 +  Run* mRuns;
   1.856 +
   1.857 +  /** for non-mixed text, we only need a tiny array of runs (no malloc()) */
   1.858 +  Run mSimpleRuns[1];
   1.859 +
   1.860 +private:
   1.861 +
   1.862 +  void Init();
   1.863 +
   1.864 +  bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded);
   1.865 +
   1.866 +  void Free();
   1.867 +
   1.868 +  void GetDirProps(const char16_t *aText);
   1.869 +
   1.870 +  nsBidiDirection ResolveExplicitLevels();
   1.871 +
   1.872 +  nsresult CheckExplicitLevels(nsBidiDirection *aDirection);
   1.873 +
   1.874 +  nsBidiDirection DirectionFromFlags(Flags aFlags);
   1.875 +
   1.876 +  void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR);
   1.877 +
   1.878 +  void AdjustWSLevels();
   1.879 +
   1.880 +  void SetTrailingWSStart();
   1.881 +
   1.882 +  bool GetRuns();
   1.883 +
   1.884 +  void GetSingleRun(nsBidiLevel aLevel);
   1.885 +
   1.886 +  void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel);
   1.887 +
   1.888 +  static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel);
   1.889 +
   1.890 +  int32_t doWriteReverse(const char16_t *src, int32_t srcLength,
   1.891 +                         char16_t *dest, uint16_t options);
   1.892 +
   1.893 +};
   1.894 +
   1.895 +#endif // _nsBidi_h_

mercurial