Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #ifndef nsBidiUtils_h__ |
michael@0 | 7 | #define nsBidiUtils_h__ |
michael@0 | 8 | |
michael@0 | 9 | #include "nsStringGlue.h" |
michael@0 | 10 | |
michael@0 | 11 | /** |
michael@0 | 12 | * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt |
michael@0 | 13 | * section BIDIRECTIONAL PROPERTIES |
michael@0 | 14 | * for the detailed definition of the following categories |
michael@0 | 15 | * |
michael@0 | 16 | * The values here must match the equivalents in %bidicategorycode in |
michael@0 | 17 | * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl |
michael@0 | 18 | */ |
michael@0 | 19 | |
michael@0 | 20 | enum nsCharType { |
michael@0 | 21 | eCharType_LeftToRight = 0, |
michael@0 | 22 | eCharType_RightToLeft = 1, |
michael@0 | 23 | eCharType_EuropeanNumber = 2, |
michael@0 | 24 | eCharType_EuropeanNumberSeparator = 3, |
michael@0 | 25 | eCharType_EuropeanNumberTerminator = 4, |
michael@0 | 26 | eCharType_ArabicNumber = 5, |
michael@0 | 27 | eCharType_CommonNumberSeparator = 6, |
michael@0 | 28 | eCharType_BlockSeparator = 7, |
michael@0 | 29 | eCharType_SegmentSeparator = 8, |
michael@0 | 30 | eCharType_WhiteSpaceNeutral = 9, |
michael@0 | 31 | eCharType_OtherNeutral = 10, |
michael@0 | 32 | eCharType_LeftToRightEmbedding = 11, |
michael@0 | 33 | eCharType_LeftToRightOverride = 12, |
michael@0 | 34 | eCharType_RightToLeftArabic = 13, |
michael@0 | 35 | eCharType_RightToLeftEmbedding = 14, |
michael@0 | 36 | eCharType_RightToLeftOverride = 15, |
michael@0 | 37 | eCharType_PopDirectionalFormat = 16, |
michael@0 | 38 | eCharType_DirNonSpacingMark = 17, |
michael@0 | 39 | eCharType_BoundaryNeutral = 18, |
michael@0 | 40 | eCharType_CharTypeCount |
michael@0 | 41 | }; |
michael@0 | 42 | |
michael@0 | 43 | /** |
michael@0 | 44 | * This specifies the language directional property of a character set. |
michael@0 | 45 | */ |
michael@0 | 46 | typedef enum nsCharType nsCharType; |
michael@0 | 47 | |
michael@0 | 48 | /** |
michael@0 | 49 | * definitions of bidirection character types by category |
michael@0 | 50 | */ |
michael@0 | 51 | |
michael@0 | 52 | #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) ) |
michael@0 | 53 | |
michael@0 | 54 | #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \ |
michael@0 | 55 | || ( (val) == eCharType_EuropeanNumberTerminator) \ |
michael@0 | 56 | || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) ) |
michael@0 | 57 | |
michael@0 | 58 | /** |
michael@0 | 59 | * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them |
michael@0 | 60 | * @param aChar is the character |
michael@0 | 61 | * @param aPrevCharArabic is true if the previous character in the string is an Arabic char |
michael@0 | 62 | * @param aNumFlag specifies the conversion to perform: |
michael@0 | 63 | * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
michael@0 | 64 | * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
michael@0 | 65 | * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
michael@0 | 66 | * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
michael@0 | 67 | * @return the converted Unichar |
michael@0 | 68 | */ |
michael@0 | 69 | char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag); |
michael@0 | 70 | |
michael@0 | 71 | /** |
michael@0 | 72 | * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place |
michael@0 | 73 | * @param aBuffer is the string |
michael@0 | 74 | * @param aSize is the size of aBuffer |
michael@0 | 75 | * @param aNumFlag specifies the conversion to perform: |
michael@0 | 76 | * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
michael@0 | 77 | * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
michael@0 | 78 | * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
michael@0 | 79 | * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
michael@0 | 80 | */ |
michael@0 | 81 | nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); |
michael@0 | 82 | |
michael@0 | 83 | /** |
michael@0 | 84 | * Give a UTF-32 codepoint |
michael@0 | 85 | * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; |
michael@0 | 86 | * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). |
michael@0 | 87 | * Return false, otherwise |
michael@0 | 88 | */ |
michael@0 | 89 | #define LRM_CHAR 0x200e |
michael@0 | 90 | #define LRE_CHAR 0x202a |
michael@0 | 91 | #define RLO_CHAR 0x202e |
michael@0 | 92 | #define LRI_CHAR 0x2066 |
michael@0 | 93 | #define PDI_CHAR 0x2069 |
michael@0 | 94 | #define ALM_CHAR 0x061C |
michael@0 | 95 | inline bool IsBidiControl(uint32_t aChar) { |
michael@0 | 96 | return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || |
michael@0 | 97 | (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || |
michael@0 | 98 | (aChar == ALM_CHAR) || |
michael@0 | 99 | (aChar & 0xfffffe) == LRM_CHAR); |
michael@0 | 100 | } |
michael@0 | 101 | |
michael@0 | 102 | /** |
michael@0 | 103 | * Give an nsString. |
michael@0 | 104 | * @return true if the string contains right-to-left characters |
michael@0 | 105 | */ |
michael@0 | 106 | bool HasRTLChars(const nsAString& aString); |
michael@0 | 107 | |
michael@0 | 108 | // These values are shared with Preferences dialog |
michael@0 | 109 | // ------------------ |
michael@0 | 110 | // If Pref values are to be changed |
michael@0 | 111 | // in the XUL file of Prefs. the values |
michael@0 | 112 | // Must be changed here too.. |
michael@0 | 113 | // ------------------ |
michael@0 | 114 | // |
michael@0 | 115 | #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" |
michael@0 | 116 | #define IBMBIDI_TEXTTYPE_STR "bidi.texttype" |
michael@0 | 117 | #define IBMBIDI_NUMERAL_STR "bidi.numeral" |
michael@0 | 118 | #define IBMBIDI_SUPPORTMODE_STR "bidi.support" |
michael@0 | 119 | |
michael@0 | 120 | #define IBMBIDI_TEXTDIRECTION 1 |
michael@0 | 121 | #define IBMBIDI_TEXTTYPE 2 |
michael@0 | 122 | #define IBMBIDI_NUMERAL 4 |
michael@0 | 123 | #define IBMBIDI_SUPPORTMODE 5 |
michael@0 | 124 | |
michael@0 | 125 | // ------------------ |
michael@0 | 126 | // Text Direction |
michael@0 | 127 | // ------------------ |
michael@0 | 128 | // bidi.direction |
michael@0 | 129 | #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * |
michael@0 | 130 | #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi |
michael@0 | 131 | // ------------------ |
michael@0 | 132 | // Text Type |
michael@0 | 133 | // ------------------ |
michael@0 | 134 | // bidi.texttype |
michael@0 | 135 | #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * |
michael@0 | 136 | #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi |
michael@0 | 137 | #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi |
michael@0 | 138 | // ------------------ |
michael@0 | 139 | // Numeral Style |
michael@0 | 140 | // ------------------ |
michael@0 | 141 | // bidi.numeral |
michael@0 | 142 | #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * |
michael@0 | 143 | #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi |
michael@0 | 144 | #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi |
michael@0 | 145 | #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi |
michael@0 | 146 | #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi |
michael@0 | 147 | #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi |
michael@0 | 148 | #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi |
michael@0 | 149 | // ------------------ |
michael@0 | 150 | // Support Mode |
michael@0 | 151 | // ------------------ |
michael@0 | 152 | // bidi.support |
michael@0 | 153 | #define IBMBIDI_SUPPORTMODE_MOZILLA 1 // 1 = mozillaBidisupport * |
michael@0 | 154 | #define IBMBIDI_SUPPORTMODE_OSBIDI 2 // 2 = OsBidisupport |
michael@0 | 155 | #define IBMBIDI_SUPPORTMODE_DISABLE 3 // 3 = disableBidisupport |
michael@0 | 156 | |
michael@0 | 157 | #define IBMBIDI_DEFAULT_BIDI_OPTIONS \ |
michael@0 | 158 | ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \ |
michael@0 | 159 | (IBMBIDI_TEXTTYPE_CHARSET<<4) | \ |
michael@0 | 160 | (IBMBIDI_NUMERAL_NOMINAL<<8) | \ |
michael@0 | 161 | (IBMBIDI_SUPPORTMODE_MOZILLA<<12)) |
michael@0 | 162 | |
michael@0 | 163 | #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */ |
michael@0 | 164 | #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */ |
michael@0 | 165 | #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */ |
michael@0 | 166 | #define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */ |
michael@0 | 167 | |
michael@0 | 168 | #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);} |
michael@0 | 169 | #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);} |
michael@0 | 170 | #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);} |
michael@0 | 171 | #define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);} |
michael@0 | 172 | |
michael@0 | 173 | /* Constants related to the position of numerics in the codepage */ |
michael@0 | 174 | #define START_HINDI_DIGITS 0x0660 |
michael@0 | 175 | #define END_HINDI_DIGITS 0x0669 |
michael@0 | 176 | #define START_ARABIC_DIGITS 0x0030 |
michael@0 | 177 | #define END_ARABIC_DIGITS 0x0039 |
michael@0 | 178 | #define START_FARSI_DIGITS 0x06f0 |
michael@0 | 179 | #define END_FARSI_DIGITS 0x06f9 |
michael@0 | 180 | #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) ) |
michael@0 | 181 | #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) ) |
michael@0 | 182 | #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) ) |
michael@0 | 183 | /** |
michael@0 | 184 | * Arabic numeric separator and numeric formatting characters: |
michael@0 | 185 | * U+0600;ARABIC NUMBER SIGN |
michael@0 | 186 | * U+0601;ARABIC SIGN SANAH |
michael@0 | 187 | * U+0602;ARABIC FOOTNOTE MARKER |
michael@0 | 188 | * U+0603;ARABIC SIGN SAFHA |
michael@0 | 189 | * U+066A;ARABIC PERCENT SIGN |
michael@0 | 190 | * U+066B;ARABIC DECIMAL SEPARATOR |
michael@0 | 191 | * U+066C;ARABIC THOUSANDS SEPARATOR |
michael@0 | 192 | * U+06DD;ARABIC END OF AYAH |
michael@0 | 193 | */ |
michael@0 | 194 | #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \ |
michael@0 | 195 | ( (u) >= 0x066A && (u) <= 0x066C ) || \ |
michael@0 | 196 | ( (u) == 0x06DD ) ) |
michael@0 | 197 | |
michael@0 | 198 | #define IS_BIDI_DIACRITIC(u) ( \ |
michael@0 | 199 | ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \ |
michael@0 | 200 | || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \ |
michael@0 | 201 | || ( (u) == 0x05C2) || ( (u) == 0x05C4) \ |
michael@0 | 202 | || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \ |
michael@0 | 203 | || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \ |
michael@0 | 204 | || ( (u) >= 0x06EA && (u) <= 0x06ED) ) |
michael@0 | 205 | |
michael@0 | 206 | #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) |
michael@0 | 207 | #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \ |
michael@0 | 208 | ( (c) <= 0x06ff || \ |
michael@0 | 209 | ((c) >= 0x0750 && (c) <= 0x077f) || \ |
michael@0 | 210 | (c) >= 0x08a0 ) ) |
michael@0 | 211 | #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \ |
michael@0 | 212 | !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) |
michael@0 | 213 | |
michael@0 | 214 | /** |
michael@0 | 215 | * The codepoint ranges in the following macros are based on the blocks |
michael@0 | 216 | * allocated, or planned to be allocated, to right-to-left characters in the |
michael@0 | 217 | * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) |
michael@0 | 218 | * according to |
michael@0 | 219 | * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and |
michael@0 | 220 | * http://www.unicode.org/roadmaps/ |
michael@0 | 221 | */ |
michael@0 | 222 | |
michael@0 | 223 | #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) |
michael@0 | 224 | #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \ |
michael@0 | 225 | ((0xfe70 <= (c)) && ((c) <= 0xfefc))) |
michael@0 | 226 | #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ |
michael@0 | 227 | ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) |
michael@0 | 228 | #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
michael@0 | 229 | (IS_RTL_PRESENTATION_FORM(c))) |
michael@0 | 230 | #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
michael@0 | 231 | (IS_RTL_PRESENTATION_FORM(c)) || \ |
michael@0 | 232 | (IS_IN_SMP_RTL_BLOCK(c))) |
michael@0 | 233 | #endif /* nsBidiUtils_h__ */ |