1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/util/nsBidiUtils.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,233 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#ifndef nsBidiUtils_h__ 1.10 +#define nsBidiUtils_h__ 1.11 + 1.12 +#include "nsStringGlue.h" 1.13 + 1.14 + /** 1.15 + * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt 1.16 + * section BIDIRECTIONAL PROPERTIES 1.17 + * for the detailed definition of the following categories 1.18 + * 1.19 + * The values here must match the equivalents in %bidicategorycode in 1.20 + * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl 1.21 + */ 1.22 + 1.23 +enum nsCharType { 1.24 + eCharType_LeftToRight = 0, 1.25 + eCharType_RightToLeft = 1, 1.26 + eCharType_EuropeanNumber = 2, 1.27 + eCharType_EuropeanNumberSeparator = 3, 1.28 + eCharType_EuropeanNumberTerminator = 4, 1.29 + eCharType_ArabicNumber = 5, 1.30 + eCharType_CommonNumberSeparator = 6, 1.31 + eCharType_BlockSeparator = 7, 1.32 + eCharType_SegmentSeparator = 8, 1.33 + eCharType_WhiteSpaceNeutral = 9, 1.34 + eCharType_OtherNeutral = 10, 1.35 + eCharType_LeftToRightEmbedding = 11, 1.36 + eCharType_LeftToRightOverride = 12, 1.37 + eCharType_RightToLeftArabic = 13, 1.38 + eCharType_RightToLeftEmbedding = 14, 1.39 + eCharType_RightToLeftOverride = 15, 1.40 + eCharType_PopDirectionalFormat = 16, 1.41 + eCharType_DirNonSpacingMark = 17, 1.42 + eCharType_BoundaryNeutral = 18, 1.43 + eCharType_CharTypeCount 1.44 +}; 1.45 + 1.46 +/** 1.47 + * This specifies the language directional property of a character set. 1.48 + */ 1.49 +typedef enum nsCharType nsCharType; 1.50 + 1.51 +/** 1.52 + * definitions of bidirection character types by category 1.53 + */ 1.54 + 1.55 +#define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) ) 1.56 + 1.57 +#define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \ 1.58 + || ( (val) == eCharType_EuropeanNumberTerminator) \ 1.59 + || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) ) 1.60 + 1.61 + /** 1.62 + * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them 1.63 + * @param aChar is the character 1.64 + * @param aPrevCharArabic is true if the previous character in the string is an Arabic char 1.65 + * @param aNumFlag specifies the conversion to perform: 1.66 + * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion 1.67 + * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) 1.68 + * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) 1.69 + * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic 1.70 + * @return the converted Unichar 1.71 + */ 1.72 + char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag); 1.73 + 1.74 + /** 1.75 + * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place 1.76 + * @param aBuffer is the string 1.77 + * @param aSize is the size of aBuffer 1.78 + * @param aNumFlag specifies the conversion to perform: 1.79 + * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion 1.80 + * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) 1.81 + * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) 1.82 + * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic 1.83 + */ 1.84 + nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); 1.85 + 1.86 + /** 1.87 + * Give a UTF-32 codepoint 1.88 + * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; 1.89 + * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). 1.90 + * Return false, otherwise 1.91 + */ 1.92 +#define LRM_CHAR 0x200e 1.93 +#define LRE_CHAR 0x202a 1.94 +#define RLO_CHAR 0x202e 1.95 +#define LRI_CHAR 0x2066 1.96 +#define PDI_CHAR 0x2069 1.97 +#define ALM_CHAR 0x061C 1.98 + inline bool IsBidiControl(uint32_t aChar) { 1.99 + return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || 1.100 + (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || 1.101 + (aChar == ALM_CHAR) || 1.102 + (aChar & 0xfffffe) == LRM_CHAR); 1.103 + } 1.104 + 1.105 + /** 1.106 + * Give an nsString. 1.107 + * @return true if the string contains right-to-left characters 1.108 + */ 1.109 + bool HasRTLChars(const nsAString& aString); 1.110 + 1.111 +// These values are shared with Preferences dialog 1.112 +// ------------------ 1.113 +// If Pref values are to be changed 1.114 +// in the XUL file of Prefs. the values 1.115 +// Must be changed here too.. 1.116 +// ------------------ 1.117 +// 1.118 +#define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" 1.119 +#define IBMBIDI_TEXTTYPE_STR "bidi.texttype" 1.120 +#define IBMBIDI_NUMERAL_STR "bidi.numeral" 1.121 +#define IBMBIDI_SUPPORTMODE_STR "bidi.support" 1.122 + 1.123 +#define IBMBIDI_TEXTDIRECTION 1 1.124 +#define IBMBIDI_TEXTTYPE 2 1.125 +#define IBMBIDI_NUMERAL 4 1.126 +#define IBMBIDI_SUPPORTMODE 5 1.127 + 1.128 +// ------------------ 1.129 +// Text Direction 1.130 +// ------------------ 1.131 +// bidi.direction 1.132 +#define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * 1.133 +#define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi 1.134 +// ------------------ 1.135 +// Text Type 1.136 +// ------------------ 1.137 +// bidi.texttype 1.138 +#define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * 1.139 +#define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi 1.140 +#define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi 1.141 +// ------------------ 1.142 +// Numeral Style 1.143 +// ------------------ 1.144 +// bidi.numeral 1.145 +#define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * 1.146 +#define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi 1.147 +#define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi 1.148 +#define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi 1.149 +#define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi 1.150 +#define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi 1.151 +#define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi 1.152 +// ------------------ 1.153 +// Support Mode 1.154 +// ------------------ 1.155 +// bidi.support 1.156 +#define IBMBIDI_SUPPORTMODE_MOZILLA 1 // 1 = mozillaBidisupport * 1.157 +#define IBMBIDI_SUPPORTMODE_OSBIDI 2 // 2 = OsBidisupport 1.158 +#define IBMBIDI_SUPPORTMODE_DISABLE 3 // 3 = disableBidisupport 1.159 + 1.160 +#define IBMBIDI_DEFAULT_BIDI_OPTIONS \ 1.161 + ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \ 1.162 + (IBMBIDI_TEXTTYPE_CHARSET<<4) | \ 1.163 + (IBMBIDI_NUMERAL_NOMINAL<<8) | \ 1.164 + (IBMBIDI_SUPPORTMODE_MOZILLA<<12)) 1.165 + 1.166 +#define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */ 1.167 +#define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */ 1.168 +#define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */ 1.169 +#define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */ 1.170 + 1.171 +#define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);} 1.172 +#define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);} 1.173 +#define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);} 1.174 +#define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);} 1.175 + 1.176 +/* Constants related to the position of numerics in the codepage */ 1.177 +#define START_HINDI_DIGITS 0x0660 1.178 +#define END_HINDI_DIGITS 0x0669 1.179 +#define START_ARABIC_DIGITS 0x0030 1.180 +#define END_ARABIC_DIGITS 0x0039 1.181 +#define START_FARSI_DIGITS 0x06f0 1.182 +#define END_FARSI_DIGITS 0x06f9 1.183 +#define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) ) 1.184 +#define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) ) 1.185 +#define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) ) 1.186 +/** 1.187 + * Arabic numeric separator and numeric formatting characters: 1.188 + * U+0600;ARABIC NUMBER SIGN 1.189 + * U+0601;ARABIC SIGN SANAH 1.190 + * U+0602;ARABIC FOOTNOTE MARKER 1.191 + * U+0603;ARABIC SIGN SAFHA 1.192 + * U+066A;ARABIC PERCENT SIGN 1.193 + * U+066B;ARABIC DECIMAL SEPARATOR 1.194 + * U+066C;ARABIC THOUSANDS SEPARATOR 1.195 + * U+06DD;ARABIC END OF AYAH 1.196 + */ 1.197 +#define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \ 1.198 + ( (u) >= 0x066A && (u) <= 0x066C ) || \ 1.199 + ( (u) == 0x06DD ) ) 1.200 + 1.201 +#define IS_BIDI_DIACRITIC(u) ( \ 1.202 + ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \ 1.203 + || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \ 1.204 + || ( (u) == 0x05C2) || ( (u) == 0x05C4) \ 1.205 + || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \ 1.206 + || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \ 1.207 + || ( (u) >= 0x06EA && (u) <= 0x06ED) ) 1.208 + 1.209 +#define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) 1.210 +#define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \ 1.211 + ( (c) <= 0x06ff || \ 1.212 + ((c) >= 0x0750 && (c) <= 0x077f) || \ 1.213 + (c) >= 0x08a0 ) ) 1.214 +#define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \ 1.215 + !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) 1.216 + 1.217 +/** 1.218 + * The codepoint ranges in the following macros are based on the blocks 1.219 + * allocated, or planned to be allocated, to right-to-left characters in the 1.220 + * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) 1.221 + * according to 1.222 + * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and 1.223 + * http://www.unicode.org/roadmaps/ 1.224 + */ 1.225 + 1.226 +#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) 1.227 +#define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \ 1.228 + ((0xfe70 <= (c)) && ((c) <= 0xfefc))) 1.229 +#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ 1.230 + ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) 1.231 +#define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ 1.232 + (IS_RTL_PRESENTATION_FORM(c))) 1.233 +#define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ 1.234 + (IS_RTL_PRESENTATION_FORM(c)) || \ 1.235 + (IS_IN_SMP_RTL_BLOCK(c))) 1.236 +#endif /* nsBidiUtils_h__ */