layout/base/nsBidi.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     2  *
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef nsBidi_h__
     8 #define nsBidi_h__
    10 #include "nsBidiUtils.h"
    12 // Bidi reordering engine from ICU
    13 /*
    14  * javadoc-style comments are intended to be transformed into HTML
    15  * using DOC++ - see
    16  * http://www.zib.de/Visual/software/doc++/index.html .
    17  *
    18  * The HTML documentation is created with
    19  *  doc++ -H nsIBidi.h
    20  */
    22 /**
    23  * @mainpage BIDI algorithm for Mozilla (from ICU)
    24  *
    25  * <h2>BIDI algorithm for Mozilla</h2>
    26  *
    27  * This is an implementation of the Unicode Bidirectional algorithm.
    28  * The algorithm is defined in the
    29  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
    30  * version 5, also described in The Unicode Standard, Version 3.0 .<p>
    31  *
    32  * <h3>General remarks about the API:</h3>
    33  *
    34  * The <quote>limit</quote> of a sequence of characters is the position just after their
    35  * last character, i.e., one more than that position.<p>
    36  *
    37  * Some of the API functions provide access to <quote>runs</quote>.
    38  * Such a <quote>run</quote> is defined as a sequence of characters
    39  * that are at the same embedding level
    40  * after performing the BIDI algorithm.<p>
    41  *
    42  * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu
    43  * @version 1.0
    44  */
    46 /**
    47  * nsBidiLevel is the type of the level values in this
    48  * Bidi implementation.
    49  * It holds an embedding level and indicates the visual direction
    50  * by its bit 0 (even/odd value).<p>
    51  *
    52  * It can also hold non-level values for the
    53  * <code>aParaLevel</code> and <code>aEmbeddingLevels</code>
    54  * arguments of <code>SetPara</code>; there:
    55  * <ul>
    56  * <li>bit 7 of an <code>aEmbeddingLevels[]</code>
    57  * value indicates whether the using application is
    58  * specifying the level of a character to <i>override</i> whatever the
    59  * Bidi implementation would resolve it to.</li>
    60  * <li><code>aParaLevel</code> can be set to the
    61  * pseudo-level values <code>NSBIDI_DEFAULT_LTR</code>
    62  * and <code>NSBIDI_DEFAULT_RTL</code>.</li></ul>
    63  *
    64  * @see nsIBidi::SetPara
    65  *
    66  * <p>The related constants are not real, valid level values.
    67  * <code>NSBIDI_DEFAULT_XXX</code> can be used to specify
    68  * a default for the paragraph level for
    69  * when the <code>SetPara</code> function
    70  * shall determine it but there is no
    71  * strongly typed character in the input.<p>
    72  *
    73  * Note that the value for <code>NSBIDI_DEFAULT_LTR</code> is even
    74  * and the one for <code>NSBIDI_DEFAULT_RTL</code> is odd,
    75  * just like with normal LTR and RTL level values -
    76  * these special values are designed that way. Also, the implementation
    77  * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
    78  *
    79  * @see NSBIDI_DEFAULT_LTR
    80  * @see NSBIDI_DEFAULT_RTL
    81  * @see NSBIDI_LEVEL_OVERRIDE
    82  * @see NSBIDI_MAX_EXPLICIT_LEVEL
    83  */
    84 typedef uint8_t nsBidiLevel;
    86 /** Paragraph level setting.
    87  *  If there is no strong character, then set the paragraph level to 0 (left-to-right).
    88  */
    89 #define NSBIDI_DEFAULT_LTR 0xfe
    91 /** Paragraph level setting.
    92  *  If there is no strong character, then set the paragraph level to 1 (right-to-left).
    93  */
    94 #define NSBIDI_DEFAULT_RTL 0xff
    96 /**
    97  * Maximum explicit embedding level.
    98  * (The maximum resolved level can be up to <code>NSBIDI_MAX_EXPLICIT_LEVEL+1</code>).
    99  *
   100  */
   101 #define NSBIDI_MAX_EXPLICIT_LEVEL 61
   103 /** Bit flag for level input. 
   104  *  Overrides directional properties. 
   105  */
   106 #define NSBIDI_LEVEL_OVERRIDE 0x80
   108 /**
   109  * <code>nsBidiDirection</code> values indicate the text direction.
   110  */
   111 enum nsBidiDirection {
   112   /** All left-to-right text This is a 0 value. */
   113   NSBIDI_LTR,
   114   /** All right-to-left text This is a 1 value. */
   115   NSBIDI_RTL,
   116   /** Mixed-directional text. */
   117   NSBIDI_MIXED
   118 };
   120 typedef enum nsBidiDirection nsBidiDirection;
   122 /* miscellaneous definitions ------------------------------------------------ */
   123 /** option flags for WriteReverse() */
   124 /**
   125  * option bit for WriteReverse():
   126  * keep combining characters after their base characters in RTL runs
   127  *
   128  * @see WriteReverse
   129  */
   130 #define NSBIDI_KEEP_BASE_COMBINING       1
   132 /**
   133  * option bit for WriteReverse():
   134  * replace characters with the "mirrored" property in RTL runs
   135  * by their mirror-image mappings
   136  *
   137  * @see WriteReverse
   138  */
   139 #define NSBIDI_DO_MIRRORING              2
   141 /**
   142  * option bit for WriteReverse():
   143  * remove Bidi control characters
   144  *
   145  * @see WriteReverse
   146  */
   147 #define NSBIDI_REMOVE_BIDI_CONTROLS      8
   149 /* helper macros for each allocated array member */
   150 #define GETDIRPROPSMEMORY(length) \
   151                                   GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
   152                                   mMayAllocateText, (length))
   154 #define GETLEVELSMEMORY(length) \
   155                                 GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
   156                                 mMayAllocateText, (length))
   158 #define GETRUNSMEMORY(length) \
   159                               GetMemory((void **)&mRunsMemory, &mRunsSize, \
   160                               mMayAllocateRuns, (length)*sizeof(Run))
   162 /* additional macros used by constructor - always allow allocation */
   163 #define GETINITIALDIRPROPSMEMORY(length) \
   164                                          GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \
   165                                          true, (length))
   167 #define GETINITIALLEVELSMEMORY(length) \
   168                                        GetMemory((void **)&mLevelsMemory, &mLevelsSize, \
   169                                        true, (length))
   171 #define GETINITIALRUNSMEMORY(length) \
   172                                      GetMemory((void **)&mRunsMemory, &mRunsSize, \
   173                                      true, (length)*sizeof(Run))
   175 /*
   176  * Sometimes, bit values are more appropriate
   177  * to deal with directionality properties.
   178  * Abbreviations in these macro names refer to names
   179  * used in the Bidi algorithm.
   180  */
   181 typedef uint8_t DirProp;
   183 #define DIRPROP_FLAG(dir) (1UL<<(dir))
   185 /* special flag for multiple runs from explicit embedding codes */
   186 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
   188 /* are there any characters that are LTR or RTL? */
   189 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
   190 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
   192 /* explicit embedding codes */
   193 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
   194 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
   195 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
   197 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
   198 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
   200 /* paragraph and segment separators */
   201 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
   203 /* all types that are counted as White Space or Neutral in some steps */
   204 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
   205 #define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS)
   207 /* all types that are included in a sequence of European Terminators for (W5) */
   208 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
   210 /* types that are neutrals or could becomes neutrals in (Wn) */
   211 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
   213 /*
   214  * These types may be changed to "e",
   215  * the embedding type (L or R) of the run,
   216  * in the Bidi algorithm (N2)
   217  */
   218 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
   220 /* the dirProp's L and R are defined to 0 and 1 values in nsCharType */
   221 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
   223 #define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe)
   225 /* handle surrogate pairs --------------------------------------------------- */
   227 #define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800)
   228 #define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00)
   230 /* get the UTF-32 value directly from the surrogate pseudo-characters */
   231 #define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
   232 #define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET)
   235 #define UTF_ERROR_VALUE 0xffff
   236 /* definitions with forward iteration --------------------------------------- */
   238 /*
   239  * all the macros that go forward assume that
   240  * the initial offset is 0<=i<length;
   241  * they update the offset
   242  */
   244 /* fast versions, no error-checking */
   246 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c){ \
   247                                          if((uint32_t)(c)<=0xffff) { \
   248                                          (s)[(i)++]=(char16_t)(c); \
   249                                          } else { \
   250                                          (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
   251                                          (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
   252                                          } \
   253 }
   255 /* safe versions with error-checking and optional regularity-checking */
   257 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
   258                                                 if((PRUInt32)(c)<=0xffff) { \
   259                                                 (s)[(i)++]=(char16_t)(c); \
   260                                                 } else if((PRUInt32)(c)<=0x10ffff) { \
   261                                                 if((i)+1<(length)) { \
   262                                                 (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \
   263                                                 (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \
   264                                                 } else /* not enough space */ { \
   265                                                 (s)[(i)++]=UTF_ERROR_VALUE; \
   266                                                 } \
   267                                                 } else /* c>0x10ffff, write error value */ { \
   268                                                 (s)[(i)++]=UTF_ERROR_VALUE; \
   269                                                 } \
   270 }
   272 /* definitions with backward iteration -------------------------------------- */
   274 /*
   275  * all the macros that go backward assume that
   276  * the valid buffer range starts at offset 0
   277  * and that the initial offset is 0<i<=length;
   278  * they update the offset
   279  */
   281 /* fast versions, no error-checking */
   283 /*
   284  * Get a single code point from an offset that points behind the last
   285  * of the code units that belong to that code point.
   286  * Assume 0<=i<length.
   287  */
   288 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
   289                                         (c)=(s)[--(i)]; \
   290                                         if(IS_SECOND_SURROGATE(c)) { \
   291                                         (c)=GET_UTF_32((s)[--(i)], (c)); \
   292                                         } \
   293 }
   295 #define UTF16_BACK_1_UNSAFE(s, i) { \
   296                                   if(IS_SECOND_SURROGATE((s)[--(i)])) { \
   297                                   --(i); \
   298                                   } \
   299 }
   301 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
   302                                      int32_t __N=(n); \
   303                                      while(__N>0) { \
   304                                      UTF16_BACK_1_UNSAFE(s, i); \
   305                                      --__N; \
   306                                      } \
   307 }
   309 /* safe versions with error-checking and optional regularity-checking */
   311 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
   312                                                      (c)=(s)[--(i)]; \
   313                                                      if(IS_SECOND_SURROGATE(c)) { \
   314                                                      char16_t __c2; \
   315                                                      if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
   316                                                      --(i); \
   317                                                      (c)=GET_UTF_32(__c2, (c)); \
   318       /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \
   319                                                      } else if(strict) {\
   320       /* unmatched second surrogate */ \
   321                                                      (c)=UTF_ERROR_VALUE; \
   322                                                      } \
   323                                                      } else if(strict && IS_FIRST_SURROGATE(c)) { \
   324       /* unmatched first surrogate */ \
   325                                                      (c)=UTF_ERROR_VALUE; \
   326   /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \
   327                                                      } \
   328 }
   330 #define UTF16_BACK_1_SAFE(s, start, i) { \
   331                                        if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \
   332                                        --(i); \
   333                                        } \
   334 }
   336 #define UTF16_BACK_N_SAFE(s, start, i, n) { \
   337                                           int32_t __N=(n); \
   338                                           while(__N>0 && (i)>(start)) { \
   339                                           UTF16_BACK_1_SAFE(s, start, i); \
   340                                           --__N; \
   341                                           } \
   342 }
   344 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
   345 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
   346 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
   347 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
   348 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
   349 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
   350 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
   351 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
   353 #define UTF_PREV_CHAR(s, start, i, c)                UTF_PREV_CHAR_SAFE(s, start, i, c, false)
   354 #define UTF_BACK_1(s, start, i)                      UTF_BACK_1_SAFE(s, start, i)
   355 #define UTF_BACK_N(s, start, i, n)                   UTF_BACK_N_SAFE(s, start, i, n)
   356 #define UTF_APPEND_CHAR(s, i, length, c)             UTF_APPEND_CHAR_SAFE(s, i, length, c)
   358 /* Run structure for reordering --------------------------------------------- */
   360 typedef struct Run {
   361   int32_t logicalStart,  /* first character of the run; b31 indicates even/odd level */
   362   visualLimit;  /* last visual position of the run +1 */
   363 } Run;
   365 /* in a Run, logicalStart will get this bit set if the run level is odd */
   366 #define INDEX_ODD_BIT (1UL<<31)
   368 #define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31))
   369 #define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((uint32_t)level<<31))
   370 #define REMOVE_ODD_BIT(x)          ((x)&=~INDEX_ODD_BIT)
   372 #define GET_INDEX(x)   (x&~INDEX_ODD_BIT)
   373 #define GET_ODD_BIT(x) ((uint32_t)x>>31)
   374 #define IS_ODD_RUN(x)  ((x&INDEX_ODD_BIT)!=0)
   375 #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0)
   377 typedef uint32_t Flags;
   379 /**
   380  * This class holds information about a paragraph of text
   381  * with Bidi-algorithm-related details, or about one line of
   382  * such a paragraph.<p>
   383  * Reordering can be done on a line, or on a paragraph which is
   384  * then interpreted as one single line.<p>
   385  *
   386  * On construction, the class is initially empty. It is assigned
   387  * the Bidi properties of a paragraph by <code>SetPara</code>
   388  * or the Bidi properties of a line of a paragraph by
   389  * <code>SetLine</code>.<p>
   390  * A Bidi class can be reused for as long as it is not deallocated
   391  * by calling its destructor.<p>
   392  * <code>SetPara</code> will allocate additional memory for
   393  * internal structures as necessary.
   394  */
   395 class nsBidi
   396 {
   397 public: 
   398   /** @brief Default constructor.
   399    * 
   400    * The nsBidi object is initially empty. It is assigned
   401    * the Bidi properties of a paragraph by <code>SetPara()</code>
   402    * or the Bidi properties of a line of a paragraph by
   403    * <code>GetLine()</code>.<p>
   404    * This object can be reused for as long as it is not destroyed.<p>
   405    * <code>SetPara()</code> will allocate additional memory for
   406    * internal structures as necessary.
   407    *
   408    */
   409   nsBidi();
   411   /** @brief Destructor. */
   412   virtual ~nsBidi();
   415   /**
   416    * Perform the Unicode Bidi algorithm. It is defined in the
   417    * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
   418    * version 5,
   419    * also described in The Unicode Standard, Version 3.0 .<p>
   420    *
   421    * This function takes a single plain text paragraph with or without
   422    * externally specified embedding levels from <quote>styled</quote> text
   423    * and computes the left-right-directionality of each character.<p>
   424    *
   425    * If the entire paragraph consists of text of only one direction, then
   426    * the function may not perform all the steps described by the algorithm,
   427    * i.e., some levels may not be the same as if all steps were performed.
   428    * This is not relevant for unidirectional text.<br>
   429    * For example, in pure LTR text with numbers the numbers would get
   430    * a resolved level of 2 higher than the surrounding text according to
   431    * the algorithm. This implementation may set all resolved levels to
   432    * the same value in such a case.<p>
   433    *
   434    * The text must be externally split into separate paragraphs (rule P1).
   435    * Paragraph separators (B) should appear at most at the very end.
   436    *
   437    * @param aText is a pointer to the single-paragraph text that the
   438    *      Bidi algorithm will be performed on
   439    *      (step (P1) of the algorithm is performed externally).
   440    *      <strong>The text must be (at least) <code>aLength</code> long.</strong>
   441    *
   442    * @param aLength is the length of the text; if <code>aLength==-1</code> then
   443    *      the text must be zero-terminated.
   444    *
   445    * @param aParaLevel specifies the default level for the paragraph;
   446    *      it is typically 0 (LTR) or 1 (RTL).
   447    *      If the function shall determine the paragraph level from the text,
   448    *      then <code>aParaLevel</code> can be set to
   449    *      either <code>NSBIDI_DEFAULT_LTR</code>
   450    *      or <code>NSBIDI_DEFAULT_RTL</code>;
   451    *      if there is no strongly typed character, then
   452    *      the desired default is used (0 for LTR or 1 for RTL).
   453    *      Any other value between 0 and <code>NSBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
   454    *      with odd levels indicating RTL.
   455    *
   456    * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels,
   457    *      ignoring characters like LRE and PDF in the text.
   458    *      A level overrides the directional property of its corresponding
   459    *      (same index) character if the level has the
   460    *      <code>NSBIDI_LEVEL_OVERRIDE</code> bit set.<p>
   461    *      Except for that bit, it must be
   462    *      <code>aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
   463    *      <strong>Caution: </strong>A copy of this pointer, not of the levels,
   464    *      will be stored in the <code>nsBidi</code> object;
   465    *      the <code>aEmbeddingLevels</code> array must not be
   466    *      deallocated before the <code>nsBidi</code> object is destroyed or reused,
   467    *      and the <code>aEmbeddingLevels</code>
   468    *      should not be modified to avoid unexpected results on subsequent Bidi operations.
   469    *      However, the <code>SetPara</code> and
   470    *      <code>SetLine</code> functions may modify some or all of the levels.<p>
   471    *      After the <code>nsBidi</code> object is reused or destroyed, the caller
   472    *      must take care of the deallocation of the <code>aEmbeddingLevels</code> array.<p>
   473    *      <strong>The <code>aEmbeddingLevels</code> array must be
   474    *      at least <code>aLength</code> long.</strong>
   475    */
   476   nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels);
   478   /**
   479    * Get the directionality of the text.
   480    *
   481    * @param aDirection receives a <code>NSBIDI_XXX</code> value that indicates if the entire text
   482    *       represented by this object is unidirectional,
   483    *       and which direction, or if it is mixed-directional.
   484    *
   485    * @see nsBidiDirection
   486    */
   487   nsresult GetDirection(nsBidiDirection* aDirection);
   489   /**
   490    * Get the paragraph level of the text.
   491    *
   492    * @param aParaLevel receives a <code>NSBIDI_XXX</code> value indicating the paragraph level
   493    *
   494    * @see nsBidiLevel
   495    */
   496   nsresult GetParaLevel(nsBidiLevel* aParaLevel);
   498 #ifdef FULL_BIDI_ENGINE
   499   /**
   500    * <code>SetLine</code> sets an <code>nsBidi</code> to
   501    * contain the reordering information, especially the resolved levels,
   502    * for all the characters in a line of text. This line of text is
   503    * specified by referring to an <code>nsBidi</code> object representing
   504    * this information for a paragraph of text, and by specifying
   505    * a range of indexes in this paragraph.<p>
   506    * In the new line object, the indexes will range from 0 to <code>aLimit-aStart</code>.<p>
   507    *
   508    * This is used after calling <code>SetPara</code>
   509    * for a paragraph, and after line-breaking on that paragraph.
   510    * It is not necessary if the paragraph is treated as a single line.<p>
   511    *
   512    * After line-breaking, rules (L1) and (L2) for the treatment of
   513    * trailing WS and for reordering are performed on
   514    * an <code>nsBidi</code> object that represents a line.<p>
   515    *
   516    * <strong>Important:</strong> the line <code>nsBidi</code> object shares data with
   517    * <code>aParaBidi</code>.
   518    * You must destroy or reuse this object before <code>aParaBidi</code>.
   519    * In other words, you must destroy or reuse the <code>nsBidi</code> object for a line
   520    * before the object for its parent paragraph.
   521    *
   522    * @param aParaBidi is the parent paragraph object.
   523    *
   524    * @param aStart is the line's first index into the paragraph text.
   525    *
   526    * @param aLimit is just behind the line's last index into the paragraph text
   527    *      (its last index +1).<br>
   528    *      It must be <code>0<=aStart<=aLimit<=</code>paragraph length.
   529    *
   530    * @see SetPara
   531    */
   532   nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit);  
   534   /**
   535    * Get the length of the text.
   536    *
   537    * @param aLength receives the length of the text that the nsBidi object was created for.
   538    */
   539   nsresult GetLength(int32_t* aLength);
   541   /**
   542    * Get the level for one character.
   543    *
   544    * @param aCharIndex the index of a character.
   545    *
   546    * @param aLevel receives the level for the character at aCharIndex.
   547    *
   548    * @see nsBidiLevel
   549    */
   550   nsresult GetLevelAt(int32_t aCharIndex,  nsBidiLevel* aLevel);
   552   /**
   553    * Get an array of levels for each character.<p>
   554    *
   555    * Note that this function may allocate memory under some
   556    * circumstances, unlike <code>GetLevelAt</code>.
   557    *
   558    * @param aLevels receives a pointer to the levels array for the text,
   559    *       or <code>nullptr</code> if an error occurs.
   560    *
   561    * @see nsBidiLevel
   562    */
   563   nsresult GetLevels(nsBidiLevel** aLevels);
   564 #endif // FULL_BIDI_ENGINE
   565   /**
   566    * Get the bidirectional type for one character.
   567    *
   568    * @param aCharIndex the index of a character.
   569    *
   570    * @param aType receives the bidirectional type of the character at aCharIndex.
   571    */
   572   nsresult GetCharTypeAt(int32_t aCharIndex,  nsCharType* aType);
   574   /**
   575    * Get a logical run.
   576    * This function returns information about a run and is used
   577    * to retrieve runs in logical order.<p>
   578    * This is especially useful for line-breaking on a paragraph.
   579    *
   580    * @param aLogicalStart is the first character of the run.
   581    *
   582    * @param aLogicalLimit will receive the limit of the run.
   583    *      The l-value that you point to here may be the
   584    *      same expression (variable) as the one for
   585    *      <code>aLogicalStart</code>.
   586    *      This pointer can be <code>nullptr</code> if this
   587    *      value is not necessary.
   588    *
   589    * @param aLevel will receive the level of the run.
   590    *      This pointer can be <code>nullptr</code> if this
   591    *      value is not necessary.
   592    */
   593   nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel);
   595   /**
   596    * Get the number of runs.
   597    * This function may invoke the actual reordering on the
   598    * <code>nsBidi</code> object, after <code>SetPara</code>
   599    * may have resolved only the levels of the text. Therefore,
   600    * <code>CountRuns</code> may have to allocate memory,
   601    * and may fail doing so.
   602    *
   603    * @param aRunCount will receive the number of runs.
   604    */
   605   nsresult CountRuns(int32_t* aRunCount);
   607   /**
   608    * Get one run's logical start, length, and directionality,
   609    * which can be 0 for LTR or 1 for RTL.
   610    * In an RTL run, the character at the logical start is
   611    * visually on the right of the displayed run.
   612    * The length is the number of characters in the run.<p>
   613    * <code>CountRuns</code> should be called
   614    * before the runs are retrieved.
   615    *
   616    * @param aRunIndex is the number of the run in visual order, in the
   617    *      range <code>[0..CountRuns-1]</code>.
   618    *
   619    * @param aLogicalStart is the first logical character index in the text.
   620    *      The pointer may be <code>nullptr</code> if this index is not needed.
   621    *
   622    * @param aLength is the number of characters (at least one) in the run.
   623    *      The pointer may be <code>nullptr</code> if this is not needed.
   624    *
   625    * @param aDirection will receive the directionality of the run,
   626    *       <code>NSBIDI_LTR==0</code> or <code>NSBIDI_RTL==1</code>,
   627    *       never <code>NSBIDI_MIXED</code>.
   628    *
   629    * @see CountRuns<p>
   630    *
   631    * Example:
   632    * @code
   633    *  int32_t i, count, logicalStart, visualIndex=0, length;
   634    *  nsBidiDirection dir;
   635    *  pBidi->CountRuns(&count);
   636    *  for(i=0; i<count; ++i) {
   637    *    pBidi->GetVisualRun(i, &logicalStart, &length, &dir);
   638    *    if(NSBIDI_LTR==dir) {
   639    *      do { // LTR
   640    *        show_char(text[logicalStart++], visualIndex++);
   641    *      } while(--length>0);
   642    *    } else {
   643    *      logicalStart+=length;  // logicalLimit
   644    *      do { // RTL
   645    *        show_char(text[--logicalStart], visualIndex++);
   646    *      } while(--length>0);
   647    *    }
   648    *  }
   649    * @endcode
   650    *
   651    * Note that in right-to-left runs, code like this places
   652    * modifier letters before base characters and second surrogates
   653    * before first ones.
   654    */
   655   nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection);
   657 #ifdef FULL_BIDI_ENGINE
   658   /**
   659    * Get the visual position from a logical text position.
   660    * If such a mapping is used many times on the same
   661    * <code>nsBidi</code> object, then calling
   662    * <code>GetLogicalMap</code> is more efficient.<p>
   663    *
   664    * Note that in right-to-left runs, this mapping places
   665    * modifier letters before base characters and second surrogates
   666    * before first ones.
   667    *
   668    * @param aLogicalIndex is the index of a character in the text.
   669    *
   670    * @param aVisualIndex will receive the visual position of this character.
   671    *
   672    * @see GetLogicalMap
   673    * @see GetLogicalIndex
   674    */
   675   nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex);
   677   /**
   678    * Get the logical text position from a visual position.
   679    * If such a mapping is used many times on the same
   680    * <code>nsBidi</code> object, then calling
   681    * <code>GetVisualMap</code> is more efficient.<p>
   682    *
   683    * This is the inverse function to <code>GetVisualIndex</code>.
   684    *
   685    * @param aVisualIndex is the visual position of a character.
   686    *
   687    * @param aLogicalIndex will receive the index of this character in the text.
   688    *
   689    * @see GetVisualMap
   690    * @see GetVisualIndex
   691    */
   692   nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex);
   694   /**
   695    * Get a logical-to-visual index map (array) for the characters in the nsBidi
   696    * (paragraph or line) object.
   697    *
   698    * @param aIndexMap is a pointer to an array of <code>GetLength</code>
   699    *      indexes which will reflect the reordering of the characters.
   700    *      The array does not need to be initialized.<p>
   701    *      The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.<p>
   702    *
   703    * @see GetVisualMap
   704    * @see GetVisualIndex
   705    */
   706   nsresult GetLogicalMap(int32_t *aIndexMap);
   708   /**
   709    * Get a visual-to-logical index map (array) for the characters in the nsBidi
   710    * (paragraph or line) object.
   711    *
   712    * @param aIndexMap is a pointer to an array of <code>GetLength</code>
   713    *      indexes which will reflect the reordering of the characters.
   714    *      The array does not need to be initialized.<p>
   715    *      The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.<p>
   716    *
   717    * @see GetLogicalMap
   718    * @see GetLogicalIndex
   719    */
   720   nsresult GetVisualMap(int32_t *aIndexMap);
   722   /**
   723    * This is a convenience function that does not use a nsBidi object.
   724    * It is intended to be used for when an application has determined the levels
   725    * of objects (character sequences) and just needs to have them reordered (L2).
   726    * This is equivalent to using <code>GetLogicalMap</code> on a
   727    * <code>nsBidi</code> object.
   728    *
   729    * @param aLevels is an array with <code>aLength</code> levels that have been determined by
   730    *      the application.
   731    *
   732    * @param aLength is the number of levels in the array, or, semantically,
   733    *      the number of objects to be reordered.
   734    *      It must be <code>aLength>0</code>.
   735    *
   736    * @param aIndexMap is a pointer to an array of <code>aLength</code>
   737    *      indexes which will reflect the reordering of the characters.
   738    *      The array does not need to be initialized.<p>
   739    *      The index map will result in <code>aIndexMap[aLogicalIndex]==aVisualIndex</code>.
   740    */
   741   static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
   742 #endif // FULL_BIDI_ENGINE
   743   /**
   744    * This is a convenience function that does not use a nsBidi object.
   745    * It is intended to be used for when an application has determined the levels
   746    * of objects (character sequences) and just needs to have them reordered (L2).
   747    * This is equivalent to using <code>GetVisualMap</code> on a
   748    * <code>nsBidi</code> object.
   749    *
   750    * @param aLevels is an array with <code>aLength</code> levels that have been determined by
   751    *      the application.
   752    *
   753    * @param aLength is the number of levels in the array, or, semantically,
   754    *      the number of objects to be reordered.
   755    *      It must be <code>aLength>0</code>.
   756    *
   757    * @param aIndexMap is a pointer to an array of <code>aLength</code>
   758    *      indexes which will reflect the reordering of the characters.
   759    *      The array does not need to be initialized.<p>
   760    *      The index map will result in <code>aIndexMap[aVisualIndex]==aLogicalIndex</code>.
   761    */
   762   static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap);
   764 #ifdef FULL_BIDI_ENGINE
   765   /**
   766    * Invert an index map.
   767    * The one-to-one index mapping of the first map is inverted and written to
   768    * the second one.
   769    *
   770    * @param aSrcMap is an array with <code>aLength</code> indexes
   771    *      which define the original mapping.
   772    *
   773    * @param aDestMap is an array with <code>aLength</code> indexes
   774    *      which will be filled with the inverse mapping.
   775    *
   776    * @param aLength is the length of each array.
   777    */
   778   nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength);
   779 #endif // FULL_BIDI_ENGINE
   780   /**
   781    * Reverse a Right-To-Left run of Unicode text.
   782    *
   783    * This function preserves the integrity of characters with multiple
   784    * code units and (optionally) modifier letters.
   785    * Characters can be replaced by mirror-image characters
   786    * in the destination buffer. Note that "real" mirroring has
   787    * to be done in a rendering engine by glyph selection
   788    * and that for many "mirrored" characters there are no
   789    * Unicode characters as mirror-image equivalents.
   790    * There are also options to insert or remove Bidi control
   791    * characters; see the description of the <code>aDestSize</code>
   792    * and <code>aOptions</code> parameters and of the option bit flags.
   793    *
   794    * Since no Bidi controls are inserted here, this function will never
   795    * write more than <code>aSrcLength</code> characters to <code>aDest</code>.
   796    *
   797    * @param aSrc A pointer to the RTL run text.
   798    *
   799    * @param aSrcLength The length of the RTL run.
   800    *                 If the <code>NSBIDI_REMOVE_BIDI_CONTROLS</code> option
   801    *                 is set, then the destination length may be less than
   802    *                 <code>aSrcLength</code>.
   803    *                 If this option is not set, then the destination length
   804    *                 will be exactly <code>aSrcLength</code>.
   805    *
   806    * @param aDest A pointer to where the reordered text is to be copied.
   807    *             <code>aSrc[aSrcLength]</code> and <code>aDest[aSrcLength]</code>
   808    *             must not overlap.
   809    *
   810    * @param aOptions A bit set of options for the reordering that control
   811    *                how the reordered text is written.
   812    *
   813    * @param aDestSize will receive the number of characters that were written to <code>aDest</code>.
   814    */
   815   nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize);
   817 protected:
   818   friend class nsBidiPresUtils;
   820   /** length of the current text */
   821   int32_t mLength;
   823   /** memory sizes in bytes */
   824   size_t mDirPropsSize, mLevelsSize, mRunsSize;
   826   /** allocated memory */
   827   DirProp* mDirPropsMemory;
   828   nsBidiLevel* mLevelsMemory;
   829   Run* mRunsMemory;
   831   /** indicators for whether memory may be allocated after construction */
   832   bool mMayAllocateText, mMayAllocateRuns;
   834   const DirProp* mDirProps;
   835   nsBidiLevel* mLevels;
   837   /** the paragraph level */
   838   nsBidiLevel mParaLevel;
   840   /** flags is a bit set for which directional properties are in the text */
   841   Flags mFlags;
   843   /** the overall paragraph or line directionality - see nsBidiDirection */
   844   nsBidiDirection mDirection;
   846   /** characters after trailingWSStart are WS and are */
   847   /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
   848   int32_t mTrailingWSStart;
   850   /** fields for line reordering */
   851   int32_t mRunCount;     /* ==-1: runs not set up yet */
   852   Run* mRuns;
   854   /** for non-mixed text, we only need a tiny array of runs (no malloc()) */
   855   Run mSimpleRuns[1];
   857 private:
   859   void Init();
   861   bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded);
   863   void Free();
   865   void GetDirProps(const char16_t *aText);
   867   nsBidiDirection ResolveExplicitLevels();
   869   nsresult CheckExplicitLevels(nsBidiDirection *aDirection);
   871   nsBidiDirection DirectionFromFlags(Flags aFlags);
   873   void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR);
   875   void AdjustWSLevels();
   877   void SetTrailingWSStart();
   879   bool GetRuns();
   881   void GetSingleRun(nsBidiLevel aLevel);
   883   void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel);
   885   static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel);
   887   int32_t doWriteReverse(const char16_t *src, int32_t srcLength,
   888                          char16_t *dest, uint16_t options);
   890 };
   892 #endif // _nsBidi_h_

mercurial