intl/unicharutil/util/nsBidiUtils.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #ifndef nsBidiUtils_h__
     7 #define nsBidiUtils_h__
     9 #include "nsStringGlue.h"
    11    /**
    12     *  Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
    13     *  section BIDIRECTIONAL PROPERTIES
    14     *  for the detailed definition of the following categories
    15     *
    16     *  The values here must match the equivalents in %bidicategorycode in
    17     *  mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl
    18     */
    20 enum nsCharType   { 
    21   eCharType_LeftToRight              = 0, 
    22   eCharType_RightToLeft              = 1, 
    23   eCharType_EuropeanNumber           = 2,
    24   eCharType_EuropeanNumberSeparator  = 3,
    25   eCharType_EuropeanNumberTerminator = 4,
    26   eCharType_ArabicNumber             = 5,
    27   eCharType_CommonNumberSeparator    = 6,
    28   eCharType_BlockSeparator           = 7,
    29   eCharType_SegmentSeparator         = 8,
    30   eCharType_WhiteSpaceNeutral        = 9, 
    31   eCharType_OtherNeutral             = 10, 
    32   eCharType_LeftToRightEmbedding     = 11,
    33   eCharType_LeftToRightOverride      = 12,
    34   eCharType_RightToLeftArabic        = 13,
    35   eCharType_RightToLeftEmbedding     = 14,
    36   eCharType_RightToLeftOverride      = 15,
    37   eCharType_PopDirectionalFormat     = 16,
    38   eCharType_DirNonSpacingMark        = 17,
    39   eCharType_BoundaryNeutral          = 18,
    40   eCharType_CharTypeCount
    41 };
    43 /**
    44  * This specifies the language directional property of a character set.
    45  */
    46 typedef enum nsCharType nsCharType;
    48 /**
    49  * definitions of bidirection character types by category
    50  */
    52 #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
    54 #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator)    \
    55                            || ( (val) == eCharType_EuropeanNumberTerminator) \
    56                            || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
    58   /**
    59    * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
    60    * @param aChar is the character
    61    * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
    62    * @param aNumFlag specifies the conversion to perform:
    63    *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
    64    *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
    65    *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
    66    *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
    67    * @return the converted Unichar
    68    */
    69   char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
    71   /**
    72    * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
    73    * @param aBuffer is the string
    74    * @param aSize is the size of aBuffer
    75    * @param aNumFlag specifies the conversion to perform:
    76    *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
    77    *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
    78    *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
    79    *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
    80    */
    81   nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t  aNumFlag);
    83   /**
    84    * Give a UTF-32 codepoint
    85    * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
    86    * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
    87    * Return false, otherwise
    88    */
    89 #define LRM_CHAR 0x200e
    90 #define LRE_CHAR 0x202a
    91 #define RLO_CHAR 0x202e
    92 #define LRI_CHAR 0x2066
    93 #define PDI_CHAR 0x2069
    94 #define ALM_CHAR 0x061C
    95    inline bool IsBidiControl(uint32_t aChar) {
    96      return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
    97              (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
    98              (aChar == ALM_CHAR) ||
    99              (aChar & 0xfffffe) == LRM_CHAR);
   100    }
   102   /**
   103    * Give an nsString.
   104    * @return true if the string contains right-to-left characters
   105    */
   106    bool HasRTLChars(const nsAString& aString);
   108 // These values are shared with Preferences dialog
   109 //  ------------------
   110 //  If Pref values are to be changed
   111 //  in the XUL file of Prefs. the values
   112 //  Must be changed here too..
   113 //  ------------------
   114 //
   115 #define IBMBIDI_TEXTDIRECTION_STR       "bidi.direction"
   116 #define IBMBIDI_TEXTTYPE_STR            "bidi.texttype"
   117 #define IBMBIDI_NUMERAL_STR             "bidi.numeral"
   118 #define IBMBIDI_SUPPORTMODE_STR         "bidi.support"
   120 #define IBMBIDI_TEXTDIRECTION       1
   121 #define IBMBIDI_TEXTTYPE            2
   122 #define IBMBIDI_NUMERAL             4
   123 #define IBMBIDI_SUPPORTMODE         5
   125 //  ------------------
   126 //  Text Direction
   127 //  ------------------
   128 //  bidi.direction
   129 #define IBMBIDI_TEXTDIRECTION_LTR     1 //  1 = directionLTRBidi *
   130 #define IBMBIDI_TEXTDIRECTION_RTL     2 //  2 = directionRTLBidi
   131 //  ------------------
   132 //  Text Type
   133 //  ------------------
   134 //  bidi.texttype
   135 #define IBMBIDI_TEXTTYPE_CHARSET      1 //  1 = charsettexttypeBidi *
   136 #define IBMBIDI_TEXTTYPE_LOGICAL      2 //  2 = logicaltexttypeBidi
   137 #define IBMBIDI_TEXTTYPE_VISUAL       3 //  3 = visualtexttypeBidi
   138 //  ------------------
   139 //  Numeral Style
   140 //  ------------------
   141 //  bidi.numeral
   142 #define IBMBIDI_NUMERAL_NOMINAL       0 //  0 = nominalnumeralBidi *
   143 #define IBMBIDI_NUMERAL_REGULAR       1 //  1 = regularcontextnumeralBidi
   144 #define IBMBIDI_NUMERAL_HINDICONTEXT  2 //  2 = hindicontextnumeralBidi
   145 #define IBMBIDI_NUMERAL_ARABIC        3 //  3 = arabicnumeralBidi
   146 #define IBMBIDI_NUMERAL_HINDI         4 //  4 = hindinumeralBidi
   147 #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
   148 #define IBMBIDI_NUMERAL_PERSIAN       6 //  6 = persiannumeralBidi
   149 //  ------------------
   150 //  Support Mode
   151 //  ------------------
   152 //  bidi.support
   153 #define IBMBIDI_SUPPORTMODE_MOZILLA     1 //  1 = mozillaBidisupport *
   154 #define IBMBIDI_SUPPORTMODE_OSBIDI      2 //  2 = OsBidisupport
   155 #define IBMBIDI_SUPPORTMODE_DISABLE     3 //  3 = disableBidisupport
   157 #define IBMBIDI_DEFAULT_BIDI_OPTIONS              \
   158         ((IBMBIDI_TEXTDIRECTION_LTR<<0)         | \
   159          (IBMBIDI_TEXTTYPE_CHARSET<<4)          | \
   160          (IBMBIDI_NUMERAL_NOMINAL<<8)          | \
   161          (IBMBIDI_SUPPORTMODE_MOZILLA<<12))
   163 #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
   164 #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
   165 #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
   166 #define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */
   168 #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
   169 #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
   170 #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
   171 #define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);}
   173 /* Constants related to the position of numerics in the codepage */
   174 #define START_HINDI_DIGITS              0x0660
   175 #define END_HINDI_DIGITS                0x0669
   176 #define START_ARABIC_DIGITS             0x0030
   177 #define END_ARABIC_DIGITS               0x0039
   178 #define START_FARSI_DIGITS              0x06f0
   179 #define END_FARSI_DIGITS                0x06f9
   180 #define IS_HINDI_DIGIT(u)   ( ( (u) >= START_HINDI_DIGITS )  && ( (u) <= END_HINDI_DIGITS ) )
   181 #define IS_ARABIC_DIGIT(u)  ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
   182 #define IS_FARSI_DIGIT(u)  ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
   183 /**
   184  * Arabic numeric separator and numeric formatting characters:
   185  *  U+0600;ARABIC NUMBER SIGN
   186  *  U+0601;ARABIC SIGN SANAH
   187  *  U+0602;ARABIC FOOTNOTE MARKER
   188  *  U+0603;ARABIC SIGN SAFHA
   189  *  U+066A;ARABIC PERCENT SIGN
   190  *  U+066B;ARABIC DECIMAL SEPARATOR
   191  *  U+066C;ARABIC THOUSANDS SEPARATOR
   192  *  U+06DD;ARABIC END OF AYAH
   193  */
   194 #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
   195                                  ( (u) >= 0x066A && (u) <= 0x066C ) || \
   196                                  ( (u) == 0x06DD ) )
   198 #define IS_BIDI_DIACRITIC(u) ( \
   199   ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
   200     || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
   201     || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
   202     || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
   203     || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
   204     || ( (u) >= 0x06EA && (u) <= 0x06ED) )
   206 #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
   207 #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) &&   \
   208                             ( (c) <= 0x06ff ||                    \
   209                               ((c) >= 0x0750 && (c) <= 0x077f) || \
   210                               (c) >= 0x08a0 ) )
   211 #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
   212                                 !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
   214 /**
   215  * The codepoint ranges in the following macros are based on the blocks
   216  *  allocated, or planned to be allocated, to right-to-left characters in the
   217  *  BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
   218  *  according to
   219  *  http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
   220  *  http://www.unicode.org/roadmaps/
   221  */
   223 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
   224 #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
   225                                      ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
   226 #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
   227                                 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
   228 #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
   229                               (IS_RTL_PRESENTATION_FORM(c)))
   230 #define UTF32_CHAR_IS_BIDI(c)  ((IS_IN_BMP_RTL_BLOCK(c)) || \
   231                                (IS_RTL_PRESENTATION_FORM(c)) || \
   232                                (IS_IN_SMP_RTL_BLOCK(c)))
   233 #endif  /* nsBidiUtils_h__ */

mercurial