michael@0: /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- michael@0: * michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef nsBidi_h__ michael@0: #define nsBidi_h__ michael@0: michael@0: #include "nsBidiUtils.h" michael@0: michael@0: // Bidi reordering engine from ICU michael@0: /* michael@0: * javadoc-style comments are intended to be transformed into HTML michael@0: * using DOC++ - see michael@0: * http://www.zib.de/Visual/software/doc++/index.html . michael@0: * michael@0: * The HTML documentation is created with michael@0: * doc++ -H nsIBidi.h michael@0: */ michael@0: michael@0: /** michael@0: * @mainpage BIDI algorithm for Mozilla (from ICU) michael@0: * michael@0: *

BIDI algorithm for Mozilla

michael@0: * michael@0: * This is an implementation of the Unicode Bidirectional algorithm. michael@0: * The algorithm is defined in the michael@0: * Unicode Technical Report 9, michael@0: * version 5, also described in The Unicode Standard, Version 3.0 .

michael@0: * michael@0: *

General remarks about the API:

michael@0: * michael@0: * The limit of a sequence of characters is the position just after their michael@0: * last character, i.e., one more than that position.

michael@0: * michael@0: * Some of the API functions provide access to runs. michael@0: * Such a run is defined as a sequence of characters michael@0: * that are at the same embedding level michael@0: * after performing the BIDI algorithm.

michael@0: * michael@0: * @author Markus W. Scherer. Ported to Mozilla by Simon Montagu michael@0: * @version 1.0 michael@0: */ michael@0: michael@0: /** michael@0: * nsBidiLevel is the type of the level values in this michael@0: * Bidi implementation. michael@0: * It holds an embedding level and indicates the visual direction michael@0: * by its bit 0 (even/odd value).

michael@0: * michael@0: * It can also hold non-level values for the michael@0: * aParaLevel and aEmbeddingLevels michael@0: * arguments of SetPara; there: michael@0: *

michael@0: * michael@0: * @see nsIBidi::SetPara michael@0: * michael@0: *

The related constants are not real, valid level values. michael@0: * NSBIDI_DEFAULT_XXX can be used to specify michael@0: * a default for the paragraph level for michael@0: * when the SetPara function michael@0: * shall determine it but there is no michael@0: * strongly typed character in the input.

michael@0: * michael@0: * Note that the value for NSBIDI_DEFAULT_LTR is even michael@0: * and the one for NSBIDI_DEFAULT_RTL is odd, michael@0: * just like with normal LTR and RTL level values - michael@0: * these special values are designed that way. Also, the implementation michael@0: * assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd. michael@0: * michael@0: * @see NSBIDI_DEFAULT_LTR michael@0: * @see NSBIDI_DEFAULT_RTL michael@0: * @see NSBIDI_LEVEL_OVERRIDE michael@0: * @see NSBIDI_MAX_EXPLICIT_LEVEL michael@0: */ michael@0: typedef uint8_t nsBidiLevel; michael@0: michael@0: /** Paragraph level setting. michael@0: * If there is no strong character, then set the paragraph level to 0 (left-to-right). michael@0: */ michael@0: #define NSBIDI_DEFAULT_LTR 0xfe michael@0: michael@0: /** Paragraph level setting. michael@0: * If there is no strong character, then set the paragraph level to 1 (right-to-left). michael@0: */ michael@0: #define NSBIDI_DEFAULT_RTL 0xff michael@0: michael@0: /** michael@0: * Maximum explicit embedding level. michael@0: * (The maximum resolved level can be up to NSBIDI_MAX_EXPLICIT_LEVEL+1). michael@0: * michael@0: */ michael@0: #define NSBIDI_MAX_EXPLICIT_LEVEL 61 michael@0: michael@0: /** Bit flag for level input. michael@0: * Overrides directional properties. michael@0: */ michael@0: #define NSBIDI_LEVEL_OVERRIDE 0x80 michael@0: michael@0: /** michael@0: * nsBidiDirection values indicate the text direction. michael@0: */ michael@0: enum nsBidiDirection { michael@0: /** All left-to-right text This is a 0 value. */ michael@0: NSBIDI_LTR, michael@0: /** All right-to-left text This is a 1 value. */ michael@0: NSBIDI_RTL, michael@0: /** Mixed-directional text. */ michael@0: NSBIDI_MIXED michael@0: }; michael@0: michael@0: typedef enum nsBidiDirection nsBidiDirection; michael@0: michael@0: /* miscellaneous definitions ------------------------------------------------ */ michael@0: /** option flags for WriteReverse() */ michael@0: /** michael@0: * option bit for WriteReverse(): michael@0: * keep combining characters after their base characters in RTL runs michael@0: * michael@0: * @see WriteReverse michael@0: */ michael@0: #define NSBIDI_KEEP_BASE_COMBINING 1 michael@0: michael@0: /** michael@0: * option bit for WriteReverse(): michael@0: * replace characters with the "mirrored" property in RTL runs michael@0: * by their mirror-image mappings michael@0: * michael@0: * @see WriteReverse michael@0: */ michael@0: #define NSBIDI_DO_MIRRORING 2 michael@0: michael@0: /** michael@0: * option bit for WriteReverse(): michael@0: * remove Bidi control characters michael@0: * michael@0: * @see WriteReverse michael@0: */ michael@0: #define NSBIDI_REMOVE_BIDI_CONTROLS 8 michael@0: michael@0: /* helper macros for each allocated array member */ michael@0: #define GETDIRPROPSMEMORY(length) \ michael@0: GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ michael@0: mMayAllocateText, (length)) michael@0: michael@0: #define GETLEVELSMEMORY(length) \ michael@0: GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ michael@0: mMayAllocateText, (length)) michael@0: michael@0: #define GETRUNSMEMORY(length) \ michael@0: GetMemory((void **)&mRunsMemory, &mRunsSize, \ michael@0: mMayAllocateRuns, (length)*sizeof(Run)) michael@0: michael@0: /* additional macros used by constructor - always allow allocation */ michael@0: #define GETINITIALDIRPROPSMEMORY(length) \ michael@0: GetMemory((void **)&mDirPropsMemory, &mDirPropsSize, \ michael@0: true, (length)) michael@0: michael@0: #define GETINITIALLEVELSMEMORY(length) \ michael@0: GetMemory((void **)&mLevelsMemory, &mLevelsSize, \ michael@0: true, (length)) michael@0: michael@0: #define GETINITIALRUNSMEMORY(length) \ michael@0: GetMemory((void **)&mRunsMemory, &mRunsSize, \ michael@0: true, (length)*sizeof(Run)) michael@0: michael@0: /* michael@0: * Sometimes, bit values are more appropriate michael@0: * to deal with directionality properties. michael@0: * Abbreviations in these macro names refer to names michael@0: * used in the Bidi algorithm. michael@0: */ michael@0: typedef uint8_t DirProp; michael@0: michael@0: #define DIRPROP_FLAG(dir) (1UL<<(dir)) michael@0: michael@0: /* special flag for multiple runs from explicit embedding codes */ michael@0: #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) michael@0: michael@0: /* are there any characters that are LTR or RTL? */ michael@0: #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) michael@0: #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) michael@0: michael@0: /* explicit embedding codes */ michael@0: #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) michael@0: #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) michael@0: #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) michael@0: michael@0: #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) michael@0: #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) michael@0: michael@0: /* paragraph and segment separators */ michael@0: #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) michael@0: michael@0: /* all types that are counted as White Space or Neutral in some steps */ michael@0: #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) michael@0: #define MASK_N (DIRPROP_FLAG(O_N)|MASK_WS) michael@0: michael@0: /* all types that are included in a sequence of European Terminators for (W5) */ michael@0: #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) michael@0: michael@0: /* types that are neutrals or could becomes neutrals in (Wn) */ michael@0: #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) michael@0: michael@0: /* michael@0: * These types may be changed to "e", michael@0: * the embedding type (L or R) of the run, michael@0: * in the Bidi algorithm (N2) michael@0: */ michael@0: #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) michael@0: michael@0: /* the dirProp's L and R are defined to 0 and 1 values in nsCharType */ michael@0: #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) michael@0: michael@0: #define IS_DEFAULT_LEVEL(level) (((level)&0xfe)==0xfe) michael@0: michael@0: /* handle surrogate pairs --------------------------------------------------- */ michael@0: michael@0: #define IS_FIRST_SURROGATE(uchar) (((uchar)&0xfc00)==0xd800) michael@0: #define IS_SECOND_SURROGATE(uchar) (((uchar)&0xfc00)==0xdc00) michael@0: michael@0: /* get the UTF-32 value directly from the surrogate pseudo-characters */ michael@0: #define SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) michael@0: #define GET_UTF_32(first, second) (((first)<<10UL)+(second)-SURROGATE_OFFSET) michael@0: michael@0: michael@0: #define UTF_ERROR_VALUE 0xffff michael@0: /* definitions with forward iteration --------------------------------------- */ michael@0: michael@0: /* michael@0: * all the macros that go forward assume that michael@0: * the initial offset is 0<=i>10)+0xd7c0; \ michael@0: (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ michael@0: } \ michael@0: } michael@0: michael@0: /* safe versions with error-checking and optional regularity-checking */ michael@0: michael@0: #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ michael@0: if((PRUInt32)(c)<=0xffff) { \ michael@0: (s)[(i)++]=(char16_t)(c); \ michael@0: } else if((PRUInt32)(c)<=0x10ffff) { \ michael@0: if((i)+1<(length)) { \ michael@0: (s)[(i)++]=(char16_t)((c)>>10)+0xd7c0; \ michael@0: (s)[(i)++]=(char16_t)(c)&0x3ff|0xdc00; \ michael@0: } else /* not enough space */ { \ michael@0: (s)[(i)++]=UTF_ERROR_VALUE; \ michael@0: } \ michael@0: } else /* c>0x10ffff, write error value */ { \ michael@0: (s)[(i)++]=UTF_ERROR_VALUE; \ michael@0: } \ michael@0: } michael@0: michael@0: /* definitions with backward iteration -------------------------------------- */ michael@0: michael@0: /* michael@0: * all the macros that go backward assume that michael@0: * the valid buffer range starts at offset 0 michael@0: * and that the initial offset is 00) { \ michael@0: UTF16_BACK_1_UNSAFE(s, i); \ michael@0: --__N; \ michael@0: } \ michael@0: } michael@0: michael@0: /* safe versions with error-checking and optional regularity-checking */ michael@0: michael@0: #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ michael@0: (c)=(s)[--(i)]; \ michael@0: if(IS_SECOND_SURROGATE(c)) { \ michael@0: char16_t __c2; \ michael@0: if((i)>(start) && IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ michael@0: --(i); \ michael@0: (c)=GET_UTF_32(__c2, (c)); \ michael@0: /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() */ \ michael@0: } else if(strict) {\ michael@0: /* unmatched second surrogate */ \ michael@0: (c)=UTF_ERROR_VALUE; \ michael@0: } \ michael@0: } else if(strict && IS_FIRST_SURROGATE(c)) { \ michael@0: /* unmatched first surrogate */ \ michael@0: (c)=UTF_ERROR_VALUE; \ michael@0: /* else strict: (c)==0xfffe is caught by UTF_IS_ERROR() */ \ michael@0: } \ michael@0: } michael@0: michael@0: #define UTF16_BACK_1_SAFE(s, start, i) { \ michael@0: if(IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && IS_FIRST_SURROGATE((s)[(i)-1])) { \ michael@0: --(i); \ michael@0: } \ michael@0: } michael@0: michael@0: #define UTF16_BACK_N_SAFE(s, start, i, n) { \ michael@0: int32_t __N=(n); \ michael@0: while(__N>0 && (i)>(start)) { \ michael@0: UTF16_BACK_1_SAFE(s, start, i); \ michael@0: --__N; \ michael@0: } \ michael@0: } michael@0: michael@0: #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) michael@0: #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) michael@0: #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) michael@0: #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) michael@0: #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) michael@0: #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) michael@0: #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) michael@0: #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) michael@0: michael@0: #define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, false) michael@0: #define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i) michael@0: #define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n) michael@0: #define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c) michael@0: michael@0: /* Run structure for reordering --------------------------------------------- */ michael@0: michael@0: typedef struct Run { michael@0: int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ michael@0: visualLimit; /* last visual position of the run +1 */ michael@0: } Run; michael@0: michael@0: /* in a Run, logicalStart will get this bit set if the run level is odd */ michael@0: #define INDEX_ODD_BIT (1UL<<31) michael@0: michael@0: #define MAKE_INDEX_ODD_PAIR(index, level) (index|((uint32_t)level<<31)) michael@0: #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)level<<31)) michael@0: #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) michael@0: michael@0: #define GET_INDEX(x) (x&~INDEX_ODD_BIT) michael@0: #define GET_ODD_BIT(x) ((uint32_t)x>>31) michael@0: #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0) michael@0: #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0) michael@0: michael@0: typedef uint32_t Flags; michael@0: michael@0: /** michael@0: * This class holds information about a paragraph of text michael@0: * with Bidi-algorithm-related details, or about one line of michael@0: * such a paragraph.

michael@0: * Reordering can be done on a line, or on a paragraph which is michael@0: * then interpreted as one single line.

michael@0: * michael@0: * On construction, the class is initially empty. It is assigned michael@0: * the Bidi properties of a paragraph by SetPara michael@0: * or the Bidi properties of a line of a paragraph by michael@0: * SetLine.

michael@0: * A Bidi class can be reused for as long as it is not deallocated michael@0: * by calling its destructor.

michael@0: * SetPara will allocate additional memory for michael@0: * internal structures as necessary. michael@0: */ michael@0: class nsBidi michael@0: { michael@0: public: michael@0: /** @brief Default constructor. michael@0: * michael@0: * The nsBidi object is initially empty. It is assigned michael@0: * the Bidi properties of a paragraph by SetPara() michael@0: * or the Bidi properties of a line of a paragraph by michael@0: * GetLine().

michael@0: * This object can be reused for as long as it is not destroyed.

michael@0: * SetPara() will allocate additional memory for michael@0: * internal structures as necessary. michael@0: * michael@0: */ michael@0: nsBidi(); michael@0: michael@0: /** @brief Destructor. */ michael@0: virtual ~nsBidi(); michael@0: michael@0: michael@0: /** michael@0: * Perform the Unicode Bidi algorithm. It is defined in the michael@0: * Unicode Technical Report 9, michael@0: * version 5, michael@0: * also described in The Unicode Standard, Version 3.0 .

michael@0: * michael@0: * This function takes a single plain text paragraph with or without michael@0: * externally specified embedding levels from styled text michael@0: * and computes the left-right-directionality of each character.

michael@0: * michael@0: * If the entire paragraph consists of text of only one direction, then michael@0: * the function may not perform all the steps described by the algorithm, michael@0: * i.e., some levels may not be the same as if all steps were performed. michael@0: * This is not relevant for unidirectional text.
michael@0: * For example, in pure LTR text with numbers the numbers would get michael@0: * a resolved level of 2 higher than the surrounding text according to michael@0: * the algorithm. This implementation may set all resolved levels to michael@0: * the same value in such a case.

michael@0: * michael@0: * The text must be externally split into separate paragraphs (rule P1). michael@0: * Paragraph separators (B) should appear at most at the very end. michael@0: * michael@0: * @param aText is a pointer to the single-paragraph text that the michael@0: * Bidi algorithm will be performed on michael@0: * (step (P1) of the algorithm is performed externally). michael@0: * The text must be (at least) aLength long. michael@0: * michael@0: * @param aLength is the length of the text; if aLength==-1 then michael@0: * the text must be zero-terminated. michael@0: * michael@0: * @param aParaLevel specifies the default level for the paragraph; michael@0: * it is typically 0 (LTR) or 1 (RTL). michael@0: * If the function shall determine the paragraph level from the text, michael@0: * then aParaLevel can be set to michael@0: * either NSBIDI_DEFAULT_LTR michael@0: * or NSBIDI_DEFAULT_RTL; michael@0: * if there is no strongly typed character, then michael@0: * the desired default is used (0 for LTR or 1 for RTL). michael@0: * Any other value between 0 and NSBIDI_MAX_EXPLICIT_LEVEL is also valid, michael@0: * with odd levels indicating RTL. michael@0: * michael@0: * @param aEmbeddingLevels (in) may be used to preset the embedding and override levels, michael@0: * ignoring characters like LRE and PDF in the text. michael@0: * A level overrides the directional property of its corresponding michael@0: * (same index) character if the level has the michael@0: * NSBIDI_LEVEL_OVERRIDE bit set.

michael@0: * Except for that bit, it must be michael@0: * aParaLevel<=aEmbeddingLevels[]<=NSBIDI_MAX_EXPLICIT_LEVEL.

michael@0: * Caution: A copy of this pointer, not of the levels, michael@0: * will be stored in the nsBidi object; michael@0: * the aEmbeddingLevels array must not be michael@0: * deallocated before the nsBidi object is destroyed or reused, michael@0: * and the aEmbeddingLevels michael@0: * should not be modified to avoid unexpected results on subsequent Bidi operations. michael@0: * However, the SetPara and michael@0: * SetLine functions may modify some or all of the levels.

michael@0: * After the nsBidi object is reused or destroyed, the caller michael@0: * must take care of the deallocation of the aEmbeddingLevels array.

michael@0: * The aEmbeddingLevels array must be michael@0: * at least aLength long. michael@0: */ michael@0: nsresult SetPara(const char16_t *aText, int32_t aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels); michael@0: michael@0: /** michael@0: * Get the directionality of the text. michael@0: * michael@0: * @param aDirection receives a NSBIDI_XXX value that indicates if the entire text michael@0: * represented by this object is unidirectional, michael@0: * and which direction, or if it is mixed-directional. michael@0: * michael@0: * @see nsBidiDirection michael@0: */ michael@0: nsresult GetDirection(nsBidiDirection* aDirection); michael@0: michael@0: /** michael@0: * Get the paragraph level of the text. michael@0: * michael@0: * @param aParaLevel receives a NSBIDI_XXX value indicating the paragraph level michael@0: * michael@0: * @see nsBidiLevel michael@0: */ michael@0: nsresult GetParaLevel(nsBidiLevel* aParaLevel); michael@0: michael@0: #ifdef FULL_BIDI_ENGINE michael@0: /** michael@0: * SetLine sets an nsBidi to michael@0: * contain the reordering information, especially the resolved levels, michael@0: * for all the characters in a line of text. This line of text is michael@0: * specified by referring to an nsBidi object representing michael@0: * this information for a paragraph of text, and by specifying michael@0: * a range of indexes in this paragraph.

michael@0: * In the new line object, the indexes will range from 0 to aLimit-aStart.

michael@0: * michael@0: * This is used after calling SetPara michael@0: * for a paragraph, and after line-breaking on that paragraph. michael@0: * It is not necessary if the paragraph is treated as a single line.

michael@0: * michael@0: * After line-breaking, rules (L1) and (L2) for the treatment of michael@0: * trailing WS and for reordering are performed on michael@0: * an nsBidi object that represents a line.

michael@0: * michael@0: * Important: the line nsBidi object shares data with michael@0: * aParaBidi. michael@0: * You must destroy or reuse this object before aParaBidi. michael@0: * In other words, you must destroy or reuse the nsBidi object for a line michael@0: * before the object for its parent paragraph. michael@0: * michael@0: * @param aParaBidi is the parent paragraph object. michael@0: * michael@0: * @param aStart is the line's first index into the paragraph text. michael@0: * michael@0: * @param aLimit is just behind the line's last index into the paragraph text michael@0: * (its last index +1).
michael@0: * It must be 0<=aStart<=aLimit<=paragraph length. michael@0: * michael@0: * @see SetPara michael@0: */ michael@0: nsresult SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit); michael@0: michael@0: /** michael@0: * Get the length of the text. michael@0: * michael@0: * @param aLength receives the length of the text that the nsBidi object was created for. michael@0: */ michael@0: nsresult GetLength(int32_t* aLength); michael@0: michael@0: /** michael@0: * Get the level for one character. michael@0: * michael@0: * @param aCharIndex the index of a character. michael@0: * michael@0: * @param aLevel receives the level for the character at aCharIndex. michael@0: * michael@0: * @see nsBidiLevel michael@0: */ michael@0: nsresult GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel); michael@0: michael@0: /** michael@0: * Get an array of levels for each character.

michael@0: * michael@0: * Note that this function may allocate memory under some michael@0: * circumstances, unlike GetLevelAt. michael@0: * michael@0: * @param aLevels receives a pointer to the levels array for the text, michael@0: * or nullptr if an error occurs. michael@0: * michael@0: * @see nsBidiLevel michael@0: */ michael@0: nsresult GetLevels(nsBidiLevel** aLevels); michael@0: #endif // FULL_BIDI_ENGINE michael@0: /** michael@0: * Get the bidirectional type for one character. michael@0: * michael@0: * @param aCharIndex the index of a character. michael@0: * michael@0: * @param aType receives the bidirectional type of the character at aCharIndex. michael@0: */ michael@0: nsresult GetCharTypeAt(int32_t aCharIndex, nsCharType* aType); michael@0: michael@0: /** michael@0: * Get a logical run. michael@0: * This function returns information about a run and is used michael@0: * to retrieve runs in logical order.

michael@0: * This is especially useful for line-breaking on a paragraph. michael@0: * michael@0: * @param aLogicalStart is the first character of the run. michael@0: * michael@0: * @param aLogicalLimit will receive the limit of the run. michael@0: * The l-value that you point to here may be the michael@0: * same expression (variable) as the one for michael@0: * aLogicalStart. michael@0: * This pointer can be nullptr if this michael@0: * value is not necessary. michael@0: * michael@0: * @param aLevel will receive the level of the run. michael@0: * This pointer can be nullptr if this michael@0: * value is not necessary. michael@0: */ michael@0: nsresult GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimit, nsBidiLevel* aLevel); michael@0: michael@0: /** michael@0: * Get the number of runs. michael@0: * This function may invoke the actual reordering on the michael@0: * nsBidi object, after SetPara michael@0: * may have resolved only the levels of the text. Therefore, michael@0: * CountRuns may have to allocate memory, michael@0: * and may fail doing so. michael@0: * michael@0: * @param aRunCount will receive the number of runs. michael@0: */ michael@0: nsresult CountRuns(int32_t* aRunCount); michael@0: michael@0: /** michael@0: * Get one run's logical start, length, and directionality, michael@0: * which can be 0 for LTR or 1 for RTL. michael@0: * In an RTL run, the character at the logical start is michael@0: * visually on the right of the displayed run. michael@0: * The length is the number of characters in the run.

michael@0: * CountRuns should be called michael@0: * before the runs are retrieved. michael@0: * michael@0: * @param aRunIndex is the number of the run in visual order, in the michael@0: * range [0..CountRuns-1]. michael@0: * michael@0: * @param aLogicalStart is the first logical character index in the text. michael@0: * The pointer may be nullptr if this index is not needed. michael@0: * michael@0: * @param aLength is the number of characters (at least one) in the run. michael@0: * The pointer may be nullptr if this is not needed. michael@0: * michael@0: * @param aDirection will receive the directionality of the run, michael@0: * NSBIDI_LTR==0 or NSBIDI_RTL==1, michael@0: * never NSBIDI_MIXED. michael@0: * michael@0: * @see CountRuns

michael@0: * michael@0: * Example: michael@0: * @code michael@0: * int32_t i, count, logicalStart, visualIndex=0, length; michael@0: * nsBidiDirection dir; michael@0: * pBidi->CountRuns(&count); michael@0: * for(i=0; iGetVisualRun(i, &logicalStart, &length, &dir); michael@0: * if(NSBIDI_LTR==dir) { michael@0: * do { // LTR michael@0: * show_char(text[logicalStart++], visualIndex++); michael@0: * } while(--length>0); michael@0: * } else { michael@0: * logicalStart+=length; // logicalLimit michael@0: * do { // RTL michael@0: * show_char(text[--logicalStart], visualIndex++); michael@0: * } while(--length>0); michael@0: * } michael@0: * } michael@0: * @endcode michael@0: * michael@0: * Note that in right-to-left runs, code like this places michael@0: * modifier letters before base characters and second surrogates michael@0: * before first ones. michael@0: */ michael@0: nsresult GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart, int32_t* aLength, nsBidiDirection* aDirection); michael@0: michael@0: #ifdef FULL_BIDI_ENGINE michael@0: /** michael@0: * Get the visual position from a logical text position. michael@0: * If such a mapping is used many times on the same michael@0: * nsBidi object, then calling michael@0: * GetLogicalMap is more efficient.

michael@0: * michael@0: * Note that in right-to-left runs, this mapping places michael@0: * modifier letters before base characters and second surrogates michael@0: * before first ones. michael@0: * michael@0: * @param aLogicalIndex is the index of a character in the text. michael@0: * michael@0: * @param aVisualIndex will receive the visual position of this character. michael@0: * michael@0: * @see GetLogicalMap michael@0: * @see GetLogicalIndex michael@0: */ michael@0: nsresult GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex); michael@0: michael@0: /** michael@0: * Get the logical text position from a visual position. michael@0: * If such a mapping is used many times on the same michael@0: * nsBidi object, then calling michael@0: * GetVisualMap is more efficient.

michael@0: * michael@0: * This is the inverse function to GetVisualIndex. michael@0: * michael@0: * @param aVisualIndex is the visual position of a character. michael@0: * michael@0: * @param aLogicalIndex will receive the index of this character in the text. michael@0: * michael@0: * @see GetVisualMap michael@0: * @see GetVisualIndex michael@0: */ michael@0: nsresult GetLogicalIndex(int32_t aVisualIndex, int32_t* aLogicalIndex); michael@0: michael@0: /** michael@0: * Get a logical-to-visual index map (array) for the characters in the nsBidi michael@0: * (paragraph or line) object. michael@0: * michael@0: * @param aIndexMap is a pointer to an array of GetLength michael@0: * indexes which will reflect the reordering of the characters. michael@0: * The array does not need to be initialized.

michael@0: * The index map will result in aIndexMap[aLogicalIndex]==aVisualIndex.

michael@0: * michael@0: * @see GetVisualMap michael@0: * @see GetVisualIndex michael@0: */ michael@0: nsresult GetLogicalMap(int32_t *aIndexMap); michael@0: michael@0: /** michael@0: * Get a visual-to-logical index map (array) for the characters in the nsBidi michael@0: * (paragraph or line) object. michael@0: * michael@0: * @param aIndexMap is a pointer to an array of GetLength michael@0: * indexes which will reflect the reordering of the characters. michael@0: * The array does not need to be initialized.

michael@0: * The index map will result in aIndexMap[aVisualIndex]==aLogicalIndex.

michael@0: * michael@0: * @see GetLogicalMap michael@0: * @see GetLogicalIndex michael@0: */ michael@0: nsresult GetVisualMap(int32_t *aIndexMap); michael@0: michael@0: /** michael@0: * This is a convenience function that does not use a nsBidi object. michael@0: * It is intended to be used for when an application has determined the levels michael@0: * of objects (character sequences) and just needs to have them reordered (L2). michael@0: * This is equivalent to using GetLogicalMap on a michael@0: * nsBidi object. michael@0: * michael@0: * @param aLevels is an array with aLength levels that have been determined by michael@0: * the application. michael@0: * michael@0: * @param aLength is the number of levels in the array, or, semantically, michael@0: * the number of objects to be reordered. michael@0: * It must be aLength>0. michael@0: * michael@0: * @param aIndexMap is a pointer to an array of aLength michael@0: * indexes which will reflect the reordering of the characters. michael@0: * The array does not need to be initialized.

michael@0: * The index map will result in aIndexMap[aLogicalIndex]==aVisualIndex. michael@0: */ michael@0: static nsresult ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); michael@0: #endif // FULL_BIDI_ENGINE michael@0: /** michael@0: * This is a convenience function that does not use a nsBidi object. michael@0: * It is intended to be used for when an application has determined the levels michael@0: * of objects (character sequences) and just needs to have them reordered (L2). michael@0: * This is equivalent to using GetVisualMap on a michael@0: * nsBidi object. michael@0: * michael@0: * @param aLevels is an array with aLength levels that have been determined by michael@0: * the application. michael@0: * michael@0: * @param aLength is the number of levels in the array, or, semantically, michael@0: * the number of objects to be reordered. michael@0: * It must be aLength>0. michael@0: * michael@0: * @param aIndexMap is a pointer to an array of aLength michael@0: * indexes which will reflect the reordering of the characters. michael@0: * The array does not need to be initialized.

michael@0: * The index map will result in aIndexMap[aVisualIndex]==aLogicalIndex. michael@0: */ michael@0: static nsresult ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap); michael@0: michael@0: #ifdef FULL_BIDI_ENGINE michael@0: /** michael@0: * Invert an index map. michael@0: * The one-to-one index mapping of the first map is inverted and written to michael@0: * the second one. michael@0: * michael@0: * @param aSrcMap is an array with aLength indexes michael@0: * which define the original mapping. michael@0: * michael@0: * @param aDestMap is an array with aLength indexes michael@0: * which will be filled with the inverse mapping. michael@0: * michael@0: * @param aLength is the length of each array. michael@0: */ michael@0: nsresult InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength); michael@0: #endif // FULL_BIDI_ENGINE michael@0: /** michael@0: * Reverse a Right-To-Left run of Unicode text. michael@0: * michael@0: * This function preserves the integrity of characters with multiple michael@0: * code units and (optionally) modifier letters. michael@0: * Characters can be replaced by mirror-image characters michael@0: * in the destination buffer. Note that "real" mirroring has michael@0: * to be done in a rendering engine by glyph selection michael@0: * and that for many "mirrored" characters there are no michael@0: * Unicode characters as mirror-image equivalents. michael@0: * There are also options to insert or remove Bidi control michael@0: * characters; see the description of the aDestSize michael@0: * and aOptions parameters and of the option bit flags. michael@0: * michael@0: * Since no Bidi controls are inserted here, this function will never michael@0: * write more than aSrcLength characters to aDest. michael@0: * michael@0: * @param aSrc A pointer to the RTL run text. michael@0: * michael@0: * @param aSrcLength The length of the RTL run. michael@0: * If the NSBIDI_REMOVE_BIDI_CONTROLS option michael@0: * is set, then the destination length may be less than michael@0: * aSrcLength. michael@0: * If this option is not set, then the destination length michael@0: * will be exactly aSrcLength. michael@0: * michael@0: * @param aDest A pointer to where the reordered text is to be copied. michael@0: * aSrc[aSrcLength] and aDest[aSrcLength] michael@0: * must not overlap. michael@0: * michael@0: * @param aOptions A bit set of options for the reordering that control michael@0: * how the reordered text is written. michael@0: * michael@0: * @param aDestSize will receive the number of characters that were written to aDest. michael@0: */ michael@0: nsresult WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize); michael@0: michael@0: protected: michael@0: friend class nsBidiPresUtils; michael@0: michael@0: /** length of the current text */ michael@0: int32_t mLength; michael@0: michael@0: /** memory sizes in bytes */ michael@0: size_t mDirPropsSize, mLevelsSize, mRunsSize; michael@0: michael@0: /** allocated memory */ michael@0: DirProp* mDirPropsMemory; michael@0: nsBidiLevel* mLevelsMemory; michael@0: Run* mRunsMemory; michael@0: michael@0: /** indicators for whether memory may be allocated after construction */ michael@0: bool mMayAllocateText, mMayAllocateRuns; michael@0: michael@0: const DirProp* mDirProps; michael@0: nsBidiLevel* mLevels; michael@0: michael@0: /** the paragraph level */ michael@0: nsBidiLevel mParaLevel; michael@0: michael@0: /** flags is a bit set for which directional properties are in the text */ michael@0: Flags mFlags; michael@0: michael@0: /** the overall paragraph or line directionality - see nsBidiDirection */ michael@0: nsBidiDirection mDirection; michael@0: michael@0: /** characters after trailingWSStart are WS and are */ michael@0: /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ michael@0: int32_t mTrailingWSStart; michael@0: michael@0: /** fields for line reordering */ michael@0: int32_t mRunCount; /* ==-1: runs not set up yet */ michael@0: Run* mRuns; michael@0: michael@0: /** for non-mixed text, we only need a tiny array of runs (no malloc()) */ michael@0: Run mSimpleRuns[1]; michael@0: michael@0: private: michael@0: michael@0: void Init(); michael@0: michael@0: bool GetMemory(void **aMemory, size_t* aSize, bool aMayAllocate, size_t aSizeNeeded); michael@0: michael@0: void Free(); michael@0: michael@0: void GetDirProps(const char16_t *aText); michael@0: michael@0: nsBidiDirection ResolveExplicitLevels(); michael@0: michael@0: nsresult CheckExplicitLevels(nsBidiDirection *aDirection); michael@0: michael@0: nsBidiDirection DirectionFromFlags(Flags aFlags); michael@0: michael@0: void ResolveImplicitLevels(int32_t aStart, int32_t aLimit, DirProp aSOR, DirProp aEOR); michael@0: michael@0: void AdjustWSLevels(); michael@0: michael@0: void SetTrailingWSStart(); michael@0: michael@0: bool GetRuns(); michael@0: michael@0: void GetSingleRun(nsBidiLevel aLevel); michael@0: michael@0: void ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel); michael@0: michael@0: static bool PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel); michael@0: michael@0: int32_t doWriteReverse(const char16_t *src, int32_t srcLength, michael@0: char16_t *dest, uint16_t options); michael@0: michael@0: }; michael@0: michael@0: #endif // _nsBidi_h_