intl/unicharutil/util/nsBidiUtils.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #ifndef nsBidiUtils_h__
michael@0 7 #define nsBidiUtils_h__
michael@0 8
michael@0 9 #include "nsStringGlue.h"
michael@0 10
michael@0 11 /**
michael@0 12 * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
michael@0 13 * section BIDIRECTIONAL PROPERTIES
michael@0 14 * for the detailed definition of the following categories
michael@0 15 *
michael@0 16 * The values here must match the equivalents in %bidicategorycode in
michael@0 17 * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl
michael@0 18 */
michael@0 19
michael@0 20 enum nsCharType {
michael@0 21 eCharType_LeftToRight = 0,
michael@0 22 eCharType_RightToLeft = 1,
michael@0 23 eCharType_EuropeanNumber = 2,
michael@0 24 eCharType_EuropeanNumberSeparator = 3,
michael@0 25 eCharType_EuropeanNumberTerminator = 4,
michael@0 26 eCharType_ArabicNumber = 5,
michael@0 27 eCharType_CommonNumberSeparator = 6,
michael@0 28 eCharType_BlockSeparator = 7,
michael@0 29 eCharType_SegmentSeparator = 8,
michael@0 30 eCharType_WhiteSpaceNeutral = 9,
michael@0 31 eCharType_OtherNeutral = 10,
michael@0 32 eCharType_LeftToRightEmbedding = 11,
michael@0 33 eCharType_LeftToRightOverride = 12,
michael@0 34 eCharType_RightToLeftArabic = 13,
michael@0 35 eCharType_RightToLeftEmbedding = 14,
michael@0 36 eCharType_RightToLeftOverride = 15,
michael@0 37 eCharType_PopDirectionalFormat = 16,
michael@0 38 eCharType_DirNonSpacingMark = 17,
michael@0 39 eCharType_BoundaryNeutral = 18,
michael@0 40 eCharType_CharTypeCount
michael@0 41 };
michael@0 42
michael@0 43 /**
michael@0 44 * This specifies the language directional property of a character set.
michael@0 45 */
michael@0 46 typedef enum nsCharType nsCharType;
michael@0 47
michael@0 48 /**
michael@0 49 * definitions of bidirection character types by category
michael@0 50 */
michael@0 51
michael@0 52 #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
michael@0 53
michael@0 54 #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \
michael@0 55 || ( (val) == eCharType_EuropeanNumberTerminator) \
michael@0 56 || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
michael@0 57
michael@0 58 /**
michael@0 59 * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
michael@0 60 * @param aChar is the character
michael@0 61 * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
michael@0 62 * @param aNumFlag specifies the conversion to perform:
michael@0 63 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
michael@0 64 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
michael@0 65 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
michael@0 66 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
michael@0 67 * @return the converted Unichar
michael@0 68 */
michael@0 69 char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
michael@0 70
michael@0 71 /**
michael@0 72 * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
michael@0 73 * @param aBuffer is the string
michael@0 74 * @param aSize is the size of aBuffer
michael@0 75 * @param aNumFlag specifies the conversion to perform:
michael@0 76 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
michael@0 77 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
michael@0 78 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
michael@0 79 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
michael@0 80 */
michael@0 81 nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag);
michael@0 82
michael@0 83 /**
michael@0 84 * Give a UTF-32 codepoint
michael@0 85 * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
michael@0 86 * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
michael@0 87 * Return false, otherwise
michael@0 88 */
michael@0 89 #define LRM_CHAR 0x200e
michael@0 90 #define LRE_CHAR 0x202a
michael@0 91 #define RLO_CHAR 0x202e
michael@0 92 #define LRI_CHAR 0x2066
michael@0 93 #define PDI_CHAR 0x2069
michael@0 94 #define ALM_CHAR 0x061C
michael@0 95 inline bool IsBidiControl(uint32_t aChar) {
michael@0 96 return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
michael@0 97 (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
michael@0 98 (aChar == ALM_CHAR) ||
michael@0 99 (aChar & 0xfffffe) == LRM_CHAR);
michael@0 100 }
michael@0 101
michael@0 102 /**
michael@0 103 * Give an nsString.
michael@0 104 * @return true if the string contains right-to-left characters
michael@0 105 */
michael@0 106 bool HasRTLChars(const nsAString& aString);
michael@0 107
michael@0 108 // These values are shared with Preferences dialog
michael@0 109 // ------------------
michael@0 110 // If Pref values are to be changed
michael@0 111 // in the XUL file of Prefs. the values
michael@0 112 // Must be changed here too..
michael@0 113 // ------------------
michael@0 114 //
michael@0 115 #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction"
michael@0 116 #define IBMBIDI_TEXTTYPE_STR "bidi.texttype"
michael@0 117 #define IBMBIDI_NUMERAL_STR "bidi.numeral"
michael@0 118 #define IBMBIDI_SUPPORTMODE_STR "bidi.support"
michael@0 119
michael@0 120 #define IBMBIDI_TEXTDIRECTION 1
michael@0 121 #define IBMBIDI_TEXTTYPE 2
michael@0 122 #define IBMBIDI_NUMERAL 4
michael@0 123 #define IBMBIDI_SUPPORTMODE 5
michael@0 124
michael@0 125 // ------------------
michael@0 126 // Text Direction
michael@0 127 // ------------------
michael@0 128 // bidi.direction
michael@0 129 #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi *
michael@0 130 #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi
michael@0 131 // ------------------
michael@0 132 // Text Type
michael@0 133 // ------------------
michael@0 134 // bidi.texttype
michael@0 135 #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi *
michael@0 136 #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi
michael@0 137 #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi
michael@0 138 // ------------------
michael@0 139 // Numeral Style
michael@0 140 // ------------------
michael@0 141 // bidi.numeral
michael@0 142 #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi *
michael@0 143 #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi
michael@0 144 #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi
michael@0 145 #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi
michael@0 146 #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi
michael@0 147 #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
michael@0 148 #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi
michael@0 149 // ------------------
michael@0 150 // Support Mode
michael@0 151 // ------------------
michael@0 152 // bidi.support
michael@0 153 #define IBMBIDI_SUPPORTMODE_MOZILLA 1 // 1 = mozillaBidisupport *
michael@0 154 #define IBMBIDI_SUPPORTMODE_OSBIDI 2 // 2 = OsBidisupport
michael@0 155 #define IBMBIDI_SUPPORTMODE_DISABLE 3 // 3 = disableBidisupport
michael@0 156
michael@0 157 #define IBMBIDI_DEFAULT_BIDI_OPTIONS \
michael@0 158 ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \
michael@0 159 (IBMBIDI_TEXTTYPE_CHARSET<<4) | \
michael@0 160 (IBMBIDI_NUMERAL_NOMINAL<<8) | \
michael@0 161 (IBMBIDI_SUPPORTMODE_MOZILLA<<12))
michael@0 162
michael@0 163 #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
michael@0 164 #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
michael@0 165 #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
michael@0 166 #define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */
michael@0 167
michael@0 168 #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
michael@0 169 #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
michael@0 170 #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
michael@0 171 #define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);}
michael@0 172
michael@0 173 /* Constants related to the position of numerics in the codepage */
michael@0 174 #define START_HINDI_DIGITS 0x0660
michael@0 175 #define END_HINDI_DIGITS 0x0669
michael@0 176 #define START_ARABIC_DIGITS 0x0030
michael@0 177 #define END_ARABIC_DIGITS 0x0039
michael@0 178 #define START_FARSI_DIGITS 0x06f0
michael@0 179 #define END_FARSI_DIGITS 0x06f9
michael@0 180 #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) )
michael@0 181 #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
michael@0 182 #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
michael@0 183 /**
michael@0 184 * Arabic numeric separator and numeric formatting characters:
michael@0 185 * U+0600;ARABIC NUMBER SIGN
michael@0 186 * U+0601;ARABIC SIGN SANAH
michael@0 187 * U+0602;ARABIC FOOTNOTE MARKER
michael@0 188 * U+0603;ARABIC SIGN SAFHA
michael@0 189 * U+066A;ARABIC PERCENT SIGN
michael@0 190 * U+066B;ARABIC DECIMAL SEPARATOR
michael@0 191 * U+066C;ARABIC THOUSANDS SEPARATOR
michael@0 192 * U+06DD;ARABIC END OF AYAH
michael@0 193 */
michael@0 194 #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
michael@0 195 ( (u) >= 0x066A && (u) <= 0x066C ) || \
michael@0 196 ( (u) == 0x06DD ) )
michael@0 197
michael@0 198 #define IS_BIDI_DIACRITIC(u) ( \
michael@0 199 ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
michael@0 200 || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
michael@0 201 || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
michael@0 202 || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
michael@0 203 || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
michael@0 204 || ( (u) >= 0x06EA && (u) <= 0x06ED) )
michael@0 205
michael@0 206 #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
michael@0 207 #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \
michael@0 208 ( (c) <= 0x06ff || \
michael@0 209 ((c) >= 0x0750 && (c) <= 0x077f) || \
michael@0 210 (c) >= 0x08a0 ) )
michael@0 211 #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
michael@0 212 !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
michael@0 213
michael@0 214 /**
michael@0 215 * The codepoint ranges in the following macros are based on the blocks
michael@0 216 * allocated, or planned to be allocated, to right-to-left characters in the
michael@0 217 * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
michael@0 218 * according to
michael@0 219 * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
michael@0 220 * http://www.unicode.org/roadmaps/
michael@0 221 */
michael@0 222
michael@0 223 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
michael@0 224 #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
michael@0 225 ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
michael@0 226 #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
michael@0 227 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
michael@0 228 #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
michael@0 229 (IS_RTL_PRESENTATION_FORM(c)))
michael@0 230 #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
michael@0 231 (IS_RTL_PRESENTATION_FORM(c)) || \
michael@0 232 (IS_IN_SMP_RTL_BLOCK(c)))
michael@0 233 #endif /* nsBidiUtils_h__ */

mercurial