intl/unicharutil/util/nsBidiUtils.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/unicharutil/util/nsBidiUtils.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,233 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#ifndef nsBidiUtils_h__
    1.10 +#define nsBidiUtils_h__
    1.11 +
    1.12 +#include "nsStringGlue.h"
    1.13 +
    1.14 +   /**
    1.15 +    *  Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
    1.16 +    *  section BIDIRECTIONAL PROPERTIES
    1.17 +    *  for the detailed definition of the following categories
    1.18 +    *
    1.19 +    *  The values here must match the equivalents in %bidicategorycode in
    1.20 +    *  mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl
    1.21 +    */
    1.22 +
    1.23 +enum nsCharType   { 
    1.24 +  eCharType_LeftToRight              = 0, 
    1.25 +  eCharType_RightToLeft              = 1, 
    1.26 +  eCharType_EuropeanNumber           = 2,
    1.27 +  eCharType_EuropeanNumberSeparator  = 3,
    1.28 +  eCharType_EuropeanNumberTerminator = 4,
    1.29 +  eCharType_ArabicNumber             = 5,
    1.30 +  eCharType_CommonNumberSeparator    = 6,
    1.31 +  eCharType_BlockSeparator           = 7,
    1.32 +  eCharType_SegmentSeparator         = 8,
    1.33 +  eCharType_WhiteSpaceNeutral        = 9, 
    1.34 +  eCharType_OtherNeutral             = 10, 
    1.35 +  eCharType_LeftToRightEmbedding     = 11,
    1.36 +  eCharType_LeftToRightOverride      = 12,
    1.37 +  eCharType_RightToLeftArabic        = 13,
    1.38 +  eCharType_RightToLeftEmbedding     = 14,
    1.39 +  eCharType_RightToLeftOverride      = 15,
    1.40 +  eCharType_PopDirectionalFormat     = 16,
    1.41 +  eCharType_DirNonSpacingMark        = 17,
    1.42 +  eCharType_BoundaryNeutral          = 18,
    1.43 +  eCharType_CharTypeCount
    1.44 +};
    1.45 +
    1.46 +/**
    1.47 + * This specifies the language directional property of a character set.
    1.48 + */
    1.49 +typedef enum nsCharType nsCharType;
    1.50 +
    1.51 +/**
    1.52 + * definitions of bidirection character types by category
    1.53 + */
    1.54 +
    1.55 +#define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
    1.56 +
    1.57 +#define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator)    \
    1.58 +                           || ( (val) == eCharType_EuropeanNumberTerminator) \
    1.59 +                           || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
    1.60 +
    1.61 +  /**
    1.62 +   * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
    1.63 +   * @param aChar is the character
    1.64 +   * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
    1.65 +   * @param aNumFlag specifies the conversion to perform:
    1.66 +   *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
    1.67 +   *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
    1.68 +   *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
    1.69 +   *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
    1.70 +   * @return the converted Unichar
    1.71 +   */
    1.72 +  char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
    1.73 +
    1.74 +  /**
    1.75 +   * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
    1.76 +   * @param aBuffer is the string
    1.77 +   * @param aSize is the size of aBuffer
    1.78 +   * @param aNumFlag specifies the conversion to perform:
    1.79 +   *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
    1.80 +   *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
    1.81 +   *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
    1.82 +   *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
    1.83 +   */
    1.84 +  nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t  aNumFlag);
    1.85 +
    1.86 +  /**
    1.87 +   * Give a UTF-32 codepoint
    1.88 +   * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
    1.89 +   * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
    1.90 +   * Return false, otherwise
    1.91 +   */
    1.92 +#define LRM_CHAR 0x200e
    1.93 +#define LRE_CHAR 0x202a
    1.94 +#define RLO_CHAR 0x202e
    1.95 +#define LRI_CHAR 0x2066
    1.96 +#define PDI_CHAR 0x2069
    1.97 +#define ALM_CHAR 0x061C
    1.98 +   inline bool IsBidiControl(uint32_t aChar) {
    1.99 +     return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
   1.100 +             (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
   1.101 +             (aChar == ALM_CHAR) ||
   1.102 +             (aChar & 0xfffffe) == LRM_CHAR);
   1.103 +   }
   1.104 +
   1.105 +  /**
   1.106 +   * Give an nsString.
   1.107 +   * @return true if the string contains right-to-left characters
   1.108 +   */
   1.109 +   bool HasRTLChars(const nsAString& aString);
   1.110 +
   1.111 +// These values are shared with Preferences dialog
   1.112 +//  ------------------
   1.113 +//  If Pref values are to be changed
   1.114 +//  in the XUL file of Prefs. the values
   1.115 +//  Must be changed here too..
   1.116 +//  ------------------
   1.117 +//
   1.118 +#define IBMBIDI_TEXTDIRECTION_STR       "bidi.direction"
   1.119 +#define IBMBIDI_TEXTTYPE_STR            "bidi.texttype"
   1.120 +#define IBMBIDI_NUMERAL_STR             "bidi.numeral"
   1.121 +#define IBMBIDI_SUPPORTMODE_STR         "bidi.support"
   1.122 +
   1.123 +#define IBMBIDI_TEXTDIRECTION       1
   1.124 +#define IBMBIDI_TEXTTYPE            2
   1.125 +#define IBMBIDI_NUMERAL             4
   1.126 +#define IBMBIDI_SUPPORTMODE         5
   1.127 +
   1.128 +//  ------------------
   1.129 +//  Text Direction
   1.130 +//  ------------------
   1.131 +//  bidi.direction
   1.132 +#define IBMBIDI_TEXTDIRECTION_LTR     1 //  1 = directionLTRBidi *
   1.133 +#define IBMBIDI_TEXTDIRECTION_RTL     2 //  2 = directionRTLBidi
   1.134 +//  ------------------
   1.135 +//  Text Type
   1.136 +//  ------------------
   1.137 +//  bidi.texttype
   1.138 +#define IBMBIDI_TEXTTYPE_CHARSET      1 //  1 = charsettexttypeBidi *
   1.139 +#define IBMBIDI_TEXTTYPE_LOGICAL      2 //  2 = logicaltexttypeBidi
   1.140 +#define IBMBIDI_TEXTTYPE_VISUAL       3 //  3 = visualtexttypeBidi
   1.141 +//  ------------------
   1.142 +//  Numeral Style
   1.143 +//  ------------------
   1.144 +//  bidi.numeral
   1.145 +#define IBMBIDI_NUMERAL_NOMINAL       0 //  0 = nominalnumeralBidi *
   1.146 +#define IBMBIDI_NUMERAL_REGULAR       1 //  1 = regularcontextnumeralBidi
   1.147 +#define IBMBIDI_NUMERAL_HINDICONTEXT  2 //  2 = hindicontextnumeralBidi
   1.148 +#define IBMBIDI_NUMERAL_ARABIC        3 //  3 = arabicnumeralBidi
   1.149 +#define IBMBIDI_NUMERAL_HINDI         4 //  4 = hindinumeralBidi
   1.150 +#define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
   1.151 +#define IBMBIDI_NUMERAL_PERSIAN       6 //  6 = persiannumeralBidi
   1.152 +//  ------------------
   1.153 +//  Support Mode
   1.154 +//  ------------------
   1.155 +//  bidi.support
   1.156 +#define IBMBIDI_SUPPORTMODE_MOZILLA     1 //  1 = mozillaBidisupport *
   1.157 +#define IBMBIDI_SUPPORTMODE_OSBIDI      2 //  2 = OsBidisupport
   1.158 +#define IBMBIDI_SUPPORTMODE_DISABLE     3 //  3 = disableBidisupport
   1.159 +
   1.160 +#define IBMBIDI_DEFAULT_BIDI_OPTIONS              \
   1.161 +        ((IBMBIDI_TEXTDIRECTION_LTR<<0)         | \
   1.162 +         (IBMBIDI_TEXTTYPE_CHARSET<<4)          | \
   1.163 +         (IBMBIDI_NUMERAL_NOMINAL<<8)          | \
   1.164 +         (IBMBIDI_SUPPORTMODE_MOZILLA<<12))
   1.165 +
   1.166 +#define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
   1.167 +#define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
   1.168 +#define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
   1.169 +#define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */
   1.170 +
   1.171 +#define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
   1.172 +#define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
   1.173 +#define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
   1.174 +#define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);}
   1.175 +
   1.176 +/* Constants related to the position of numerics in the codepage */
   1.177 +#define START_HINDI_DIGITS              0x0660
   1.178 +#define END_HINDI_DIGITS                0x0669
   1.179 +#define START_ARABIC_DIGITS             0x0030
   1.180 +#define END_ARABIC_DIGITS               0x0039
   1.181 +#define START_FARSI_DIGITS              0x06f0
   1.182 +#define END_FARSI_DIGITS                0x06f9
   1.183 +#define IS_HINDI_DIGIT(u)   ( ( (u) >= START_HINDI_DIGITS )  && ( (u) <= END_HINDI_DIGITS ) )
   1.184 +#define IS_ARABIC_DIGIT(u)  ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
   1.185 +#define IS_FARSI_DIGIT(u)  ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
   1.186 +/**
   1.187 + * Arabic numeric separator and numeric formatting characters:
   1.188 + *  U+0600;ARABIC NUMBER SIGN
   1.189 + *  U+0601;ARABIC SIGN SANAH
   1.190 + *  U+0602;ARABIC FOOTNOTE MARKER
   1.191 + *  U+0603;ARABIC SIGN SAFHA
   1.192 + *  U+066A;ARABIC PERCENT SIGN
   1.193 + *  U+066B;ARABIC DECIMAL SEPARATOR
   1.194 + *  U+066C;ARABIC THOUSANDS SEPARATOR
   1.195 + *  U+06DD;ARABIC END OF AYAH
   1.196 + */
   1.197 +#define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
   1.198 +                                 ( (u) >= 0x066A && (u) <= 0x066C ) || \
   1.199 +                                 ( (u) == 0x06DD ) )
   1.200 +
   1.201 +#define IS_BIDI_DIACRITIC(u) ( \
   1.202 +  ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
   1.203 +    || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
   1.204 +    || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
   1.205 +    || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
   1.206 +    || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
   1.207 +    || ( (u) >= 0x06EA && (u) <= 0x06ED) )
   1.208 +
   1.209 +#define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
   1.210 +#define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) &&   \
   1.211 +                            ( (c) <= 0x06ff ||                    \
   1.212 +                              ((c) >= 0x0750 && (c) <= 0x077f) || \
   1.213 +                              (c) >= 0x08a0 ) )
   1.214 +#define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
   1.215 +                                !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
   1.216 +
   1.217 +/**
   1.218 + * The codepoint ranges in the following macros are based on the blocks
   1.219 + *  allocated, or planned to be allocated, to right-to-left characters in the
   1.220 + *  BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
   1.221 + *  according to
   1.222 + *  http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
   1.223 + *  http://www.unicode.org/roadmaps/
   1.224 + */
   1.225 +
   1.226 +#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
   1.227 +#define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
   1.228 +                                     ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
   1.229 +#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
   1.230 +                                ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
   1.231 +#define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
   1.232 +                              (IS_RTL_PRESENTATION_FORM(c)))
   1.233 +#define UTF32_CHAR_IS_BIDI(c)  ((IS_IN_BMP_RTL_BLOCK(c)) || \
   1.234 +                               (IS_RTL_PRESENTATION_FORM(c)) || \
   1.235 +                               (IS_IN_SMP_RTL_BLOCK(c)))
   1.236 +#endif  /* nsBidiUtils_h__ */

mercurial