|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #ifndef nsBidiUtils_h__ |
|
7 #define nsBidiUtils_h__ |
|
8 |
|
9 #include "nsStringGlue.h" |
|
10 |
|
11 /** |
|
12 * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt |
|
13 * section BIDIRECTIONAL PROPERTIES |
|
14 * for the detailed definition of the following categories |
|
15 * |
|
16 * The values here must match the equivalents in %bidicategorycode in |
|
17 * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl |
|
18 */ |
|
19 |
|
20 enum nsCharType { |
|
21 eCharType_LeftToRight = 0, |
|
22 eCharType_RightToLeft = 1, |
|
23 eCharType_EuropeanNumber = 2, |
|
24 eCharType_EuropeanNumberSeparator = 3, |
|
25 eCharType_EuropeanNumberTerminator = 4, |
|
26 eCharType_ArabicNumber = 5, |
|
27 eCharType_CommonNumberSeparator = 6, |
|
28 eCharType_BlockSeparator = 7, |
|
29 eCharType_SegmentSeparator = 8, |
|
30 eCharType_WhiteSpaceNeutral = 9, |
|
31 eCharType_OtherNeutral = 10, |
|
32 eCharType_LeftToRightEmbedding = 11, |
|
33 eCharType_LeftToRightOverride = 12, |
|
34 eCharType_RightToLeftArabic = 13, |
|
35 eCharType_RightToLeftEmbedding = 14, |
|
36 eCharType_RightToLeftOverride = 15, |
|
37 eCharType_PopDirectionalFormat = 16, |
|
38 eCharType_DirNonSpacingMark = 17, |
|
39 eCharType_BoundaryNeutral = 18, |
|
40 eCharType_CharTypeCount |
|
41 }; |
|
42 |
|
43 /** |
|
44 * This specifies the language directional property of a character set. |
|
45 */ |
|
46 typedef enum nsCharType nsCharType; |
|
47 |
|
48 /** |
|
49 * definitions of bidirection character types by category |
|
50 */ |
|
51 |
|
52 #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) ) |
|
53 |
|
54 #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \ |
|
55 || ( (val) == eCharType_EuropeanNumberTerminator) \ |
|
56 || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) ) |
|
57 |
|
58 /** |
|
59 * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them |
|
60 * @param aChar is the character |
|
61 * @param aPrevCharArabic is true if the previous character in the string is an Arabic char |
|
62 * @param aNumFlag specifies the conversion to perform: |
|
63 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
|
64 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
|
65 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
|
66 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
|
67 * @return the converted Unichar |
|
68 */ |
|
69 char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag); |
|
70 |
|
71 /** |
|
72 * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place |
|
73 * @param aBuffer is the string |
|
74 * @param aSize is the size of aBuffer |
|
75 * @param aNumFlag specifies the conversion to perform: |
|
76 * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion |
|
77 * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) |
|
78 * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) |
|
79 * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic |
|
80 */ |
|
81 nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); |
|
82 |
|
83 /** |
|
84 * Give a UTF-32 codepoint |
|
85 * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; |
|
86 * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). |
|
87 * Return false, otherwise |
|
88 */ |
|
89 #define LRM_CHAR 0x200e |
|
90 #define LRE_CHAR 0x202a |
|
91 #define RLO_CHAR 0x202e |
|
92 #define LRI_CHAR 0x2066 |
|
93 #define PDI_CHAR 0x2069 |
|
94 #define ALM_CHAR 0x061C |
|
95 inline bool IsBidiControl(uint32_t aChar) { |
|
96 return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || |
|
97 (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || |
|
98 (aChar == ALM_CHAR) || |
|
99 (aChar & 0xfffffe) == LRM_CHAR); |
|
100 } |
|
101 |
|
102 /** |
|
103 * Give an nsString. |
|
104 * @return true if the string contains right-to-left characters |
|
105 */ |
|
106 bool HasRTLChars(const nsAString& aString); |
|
107 |
|
108 // These values are shared with Preferences dialog |
|
109 // ------------------ |
|
110 // If Pref values are to be changed |
|
111 // in the XUL file of Prefs. the values |
|
112 // Must be changed here too.. |
|
113 // ------------------ |
|
114 // |
|
115 #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" |
|
116 #define IBMBIDI_TEXTTYPE_STR "bidi.texttype" |
|
117 #define IBMBIDI_NUMERAL_STR "bidi.numeral" |
|
118 #define IBMBIDI_SUPPORTMODE_STR "bidi.support" |
|
119 |
|
120 #define IBMBIDI_TEXTDIRECTION 1 |
|
121 #define IBMBIDI_TEXTTYPE 2 |
|
122 #define IBMBIDI_NUMERAL 4 |
|
123 #define IBMBIDI_SUPPORTMODE 5 |
|
124 |
|
125 // ------------------ |
|
126 // Text Direction |
|
127 // ------------------ |
|
128 // bidi.direction |
|
129 #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * |
|
130 #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi |
|
131 // ------------------ |
|
132 // Text Type |
|
133 // ------------------ |
|
134 // bidi.texttype |
|
135 #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * |
|
136 #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi |
|
137 #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi |
|
138 // ------------------ |
|
139 // Numeral Style |
|
140 // ------------------ |
|
141 // bidi.numeral |
|
142 #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * |
|
143 #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi |
|
144 #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi |
|
145 #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi |
|
146 #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi |
|
147 #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi |
|
148 #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi |
|
149 // ------------------ |
|
150 // Support Mode |
|
151 // ------------------ |
|
152 // bidi.support |
|
153 #define IBMBIDI_SUPPORTMODE_MOZILLA 1 // 1 = mozillaBidisupport * |
|
154 #define IBMBIDI_SUPPORTMODE_OSBIDI 2 // 2 = OsBidisupport |
|
155 #define IBMBIDI_SUPPORTMODE_DISABLE 3 // 3 = disableBidisupport |
|
156 |
|
157 #define IBMBIDI_DEFAULT_BIDI_OPTIONS \ |
|
158 ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \ |
|
159 (IBMBIDI_TEXTTYPE_CHARSET<<4) | \ |
|
160 (IBMBIDI_NUMERAL_NOMINAL<<8) | \ |
|
161 (IBMBIDI_SUPPORTMODE_MOZILLA<<12)) |
|
162 |
|
163 #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */ |
|
164 #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */ |
|
165 #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */ |
|
166 #define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */ |
|
167 |
|
168 #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);} |
|
169 #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);} |
|
170 #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);} |
|
171 #define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);} |
|
172 |
|
173 /* Constants related to the position of numerics in the codepage */ |
|
174 #define START_HINDI_DIGITS 0x0660 |
|
175 #define END_HINDI_DIGITS 0x0669 |
|
176 #define START_ARABIC_DIGITS 0x0030 |
|
177 #define END_ARABIC_DIGITS 0x0039 |
|
178 #define START_FARSI_DIGITS 0x06f0 |
|
179 #define END_FARSI_DIGITS 0x06f9 |
|
180 #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) ) |
|
181 #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) ) |
|
182 #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) ) |
|
183 /** |
|
184 * Arabic numeric separator and numeric formatting characters: |
|
185 * U+0600;ARABIC NUMBER SIGN |
|
186 * U+0601;ARABIC SIGN SANAH |
|
187 * U+0602;ARABIC FOOTNOTE MARKER |
|
188 * U+0603;ARABIC SIGN SAFHA |
|
189 * U+066A;ARABIC PERCENT SIGN |
|
190 * U+066B;ARABIC DECIMAL SEPARATOR |
|
191 * U+066C;ARABIC THOUSANDS SEPARATOR |
|
192 * U+06DD;ARABIC END OF AYAH |
|
193 */ |
|
194 #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \ |
|
195 ( (u) >= 0x066A && (u) <= 0x066C ) || \ |
|
196 ( (u) == 0x06DD ) ) |
|
197 |
|
198 #define IS_BIDI_DIACRITIC(u) ( \ |
|
199 ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \ |
|
200 || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \ |
|
201 || ( (u) == 0x05C2) || ( (u) == 0x05C4) \ |
|
202 || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \ |
|
203 || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \ |
|
204 || ( (u) >= 0x06EA && (u) <= 0x06ED) ) |
|
205 |
|
206 #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) |
|
207 #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \ |
|
208 ( (c) <= 0x06ff || \ |
|
209 ((c) >= 0x0750 && (c) <= 0x077f) || \ |
|
210 (c) >= 0x08a0 ) ) |
|
211 #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \ |
|
212 !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) |
|
213 |
|
214 /** |
|
215 * The codepoint ranges in the following macros are based on the blocks |
|
216 * allocated, or planned to be allocated, to right-to-left characters in the |
|
217 * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) |
|
218 * according to |
|
219 * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and |
|
220 * http://www.unicode.org/roadmaps/ |
|
221 */ |
|
222 |
|
223 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) |
|
224 #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \ |
|
225 ((0xfe70 <= (c)) && ((c) <= 0xfefc))) |
|
226 #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ |
|
227 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) |
|
228 #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
|
229 (IS_RTL_PRESENTATION_FORM(c))) |
|
230 #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ |
|
231 (IS_RTL_PRESENTATION_FORM(c)) || \ |
|
232 (IS_IN_SMP_RTL_BLOCK(c))) |
|
233 #endif /* nsBidiUtils_h__ */ |