michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef nsBidiUtils_h__ michael@0: #define nsBidiUtils_h__ michael@0: michael@0: #include "nsStringGlue.h" michael@0: michael@0: /** michael@0: * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt michael@0: * section BIDIRECTIONAL PROPERTIES michael@0: * for the detailed definition of the following categories michael@0: * michael@0: * The values here must match the equivalents in %bidicategorycode in michael@0: * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl michael@0: */ michael@0: michael@0: enum nsCharType { michael@0: eCharType_LeftToRight = 0, michael@0: eCharType_RightToLeft = 1, michael@0: eCharType_EuropeanNumber = 2, michael@0: eCharType_EuropeanNumberSeparator = 3, michael@0: eCharType_EuropeanNumberTerminator = 4, michael@0: eCharType_ArabicNumber = 5, michael@0: eCharType_CommonNumberSeparator = 6, michael@0: eCharType_BlockSeparator = 7, michael@0: eCharType_SegmentSeparator = 8, michael@0: eCharType_WhiteSpaceNeutral = 9, michael@0: eCharType_OtherNeutral = 10, michael@0: eCharType_LeftToRightEmbedding = 11, michael@0: eCharType_LeftToRightOverride = 12, michael@0: eCharType_RightToLeftArabic = 13, michael@0: eCharType_RightToLeftEmbedding = 14, michael@0: eCharType_RightToLeftOverride = 15, michael@0: eCharType_PopDirectionalFormat = 16, michael@0: eCharType_DirNonSpacingMark = 17, michael@0: eCharType_BoundaryNeutral = 18, michael@0: eCharType_CharTypeCount michael@0: }; michael@0: michael@0: /** michael@0: * This specifies the language directional property of a character set. michael@0: */ michael@0: typedef enum nsCharType nsCharType; michael@0: michael@0: /** michael@0: * definitions of bidirection character types by category michael@0: */ michael@0: michael@0: #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) ) michael@0: michael@0: #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \ michael@0: || ( (val) == eCharType_EuropeanNumberTerminator) \ michael@0: || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) ) michael@0: michael@0: /** michael@0: * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them michael@0: * @param aChar is the character michael@0: * @param aPrevCharArabic is true if the previous character in the string is an Arabic char michael@0: * @param aNumFlag specifies the conversion to perform: michael@0: * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion michael@0: * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) michael@0: * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) michael@0: * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic michael@0: * @return the converted Unichar michael@0: */ michael@0: char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag); michael@0: michael@0: /** michael@0: * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place michael@0: * @param aBuffer is the string michael@0: * @param aSize is the size of aBuffer michael@0: * @param aNumFlag specifies the conversion to perform: michael@0: * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion michael@0: * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669) michael@0: * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039) michael@0: * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic michael@0: */ michael@0: nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag); michael@0: michael@0: /** michael@0: * Give a UTF-32 codepoint michael@0: * return true if the codepoint is a Bidi control character (LRM, RLM, ALM; michael@0: * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI). michael@0: * Return false, otherwise michael@0: */ michael@0: #define LRM_CHAR 0x200e michael@0: #define LRE_CHAR 0x202a michael@0: #define RLO_CHAR 0x202e michael@0: #define LRI_CHAR 0x2066 michael@0: #define PDI_CHAR 0x2069 michael@0: #define ALM_CHAR 0x061C michael@0: inline bool IsBidiControl(uint32_t aChar) { michael@0: return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) || michael@0: (LRI_CHAR <= aChar && aChar <= PDI_CHAR) || michael@0: (aChar == ALM_CHAR) || michael@0: (aChar & 0xfffffe) == LRM_CHAR); michael@0: } michael@0: michael@0: /** michael@0: * Give an nsString. michael@0: * @return true if the string contains right-to-left characters michael@0: */ michael@0: bool HasRTLChars(const nsAString& aString); michael@0: michael@0: // These values are shared with Preferences dialog michael@0: // ------------------ michael@0: // If Pref values are to be changed michael@0: // in the XUL file of Prefs. the values michael@0: // Must be changed here too.. michael@0: // ------------------ michael@0: // michael@0: #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction" michael@0: #define IBMBIDI_TEXTTYPE_STR "bidi.texttype" michael@0: #define IBMBIDI_NUMERAL_STR "bidi.numeral" michael@0: #define IBMBIDI_SUPPORTMODE_STR "bidi.support" michael@0: michael@0: #define IBMBIDI_TEXTDIRECTION 1 michael@0: #define IBMBIDI_TEXTTYPE 2 michael@0: #define IBMBIDI_NUMERAL 4 michael@0: #define IBMBIDI_SUPPORTMODE 5 michael@0: michael@0: // ------------------ michael@0: // Text Direction michael@0: // ------------------ michael@0: // bidi.direction michael@0: #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi * michael@0: #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi michael@0: // ------------------ michael@0: // Text Type michael@0: // ------------------ michael@0: // bidi.texttype michael@0: #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi * michael@0: #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi michael@0: #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi michael@0: // ------------------ michael@0: // Numeral Style michael@0: // ------------------ michael@0: // bidi.numeral michael@0: #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi * michael@0: #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi michael@0: #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi michael@0: #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi michael@0: #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi michael@0: #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi michael@0: #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi michael@0: // ------------------ michael@0: // Support Mode michael@0: // ------------------ michael@0: // bidi.support michael@0: #define IBMBIDI_SUPPORTMODE_MOZILLA 1 // 1 = mozillaBidisupport * michael@0: #define IBMBIDI_SUPPORTMODE_OSBIDI 2 // 2 = OsBidisupport michael@0: #define IBMBIDI_SUPPORTMODE_DISABLE 3 // 3 = disableBidisupport michael@0: michael@0: #define IBMBIDI_DEFAULT_BIDI_OPTIONS \ michael@0: ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \ michael@0: (IBMBIDI_TEXTTYPE_CHARSET<<4) | \ michael@0: (IBMBIDI_NUMERAL_NOMINAL<<8) | \ michael@0: (IBMBIDI_SUPPORTMODE_MOZILLA<<12)) michael@0: michael@0: #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */ michael@0: #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */ michael@0: #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */ michael@0: #define GET_BIDI_OPTION_SUPPORT(bo) (((bo)>>12) & 0x0000000F) /* 4 bits for SUPPORT */ michael@0: michael@0: #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);} michael@0: #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);} michael@0: #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);} michael@0: #define SET_BIDI_OPTION_SUPPORT(bo, sup) {(bo)=((bo) & 0xFFFF0FFF)|(((sup)& 0x0000000F)<<12);} michael@0: michael@0: /* Constants related to the position of numerics in the codepage */ michael@0: #define START_HINDI_DIGITS 0x0660 michael@0: #define END_HINDI_DIGITS 0x0669 michael@0: #define START_ARABIC_DIGITS 0x0030 michael@0: #define END_ARABIC_DIGITS 0x0039 michael@0: #define START_FARSI_DIGITS 0x06f0 michael@0: #define END_FARSI_DIGITS 0x06f9 michael@0: #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) ) michael@0: #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) ) michael@0: #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) ) michael@0: /** michael@0: * Arabic numeric separator and numeric formatting characters: michael@0: * U+0600;ARABIC NUMBER SIGN michael@0: * U+0601;ARABIC SIGN SANAH michael@0: * U+0602;ARABIC FOOTNOTE MARKER michael@0: * U+0603;ARABIC SIGN SAFHA michael@0: * U+066A;ARABIC PERCENT SIGN michael@0: * U+066B;ARABIC DECIMAL SEPARATOR michael@0: * U+066C;ARABIC THOUSANDS SEPARATOR michael@0: * U+06DD;ARABIC END OF AYAH michael@0: */ michael@0: #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \ michael@0: ( (u) >= 0x066A && (u) <= 0x066C ) || \ michael@0: ( (u) == 0x06DD ) ) michael@0: michael@0: #define IS_BIDI_DIACRITIC(u) ( \ michael@0: ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \ michael@0: || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \ michael@0: || ( (u) == 0x05C2) || ( (u) == 0x05C4) \ michael@0: || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \ michael@0: || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \ michael@0: || ( (u) >= 0x06EA && (u) <= 0x06ED) ) michael@0: michael@0: #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f))) michael@0: #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \ michael@0: ( (c) <= 0x06ff || \ michael@0: ((c) >= 0x0750 && (c) <= 0x077f) || \ michael@0: (c) >= 0x08a0 ) ) michael@0: #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \ michael@0: !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c))) michael@0: michael@0: /** michael@0: * The codepoint ranges in the following macros are based on the blocks michael@0: * allocated, or planned to be allocated, to right-to-left characters in the michael@0: * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane) michael@0: * according to michael@0: * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and michael@0: * http://www.unicode.org/roadmaps/ michael@0: */ michael@0: michael@0: #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff)) michael@0: #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \ michael@0: ((0xfe70 <= (c)) && ((c) <= 0xfefc))) michael@0: #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \ michael@0: ((0x1e800 <= (c)) && ((c) <= 0x1eFFF))) michael@0: #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ michael@0: (IS_RTL_PRESENTATION_FORM(c))) michael@0: #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \ michael@0: (IS_RTL_PRESENTATION_FORM(c)) || \ michael@0: (IS_IN_SMP_RTL_BLOCK(c))) michael@0: #endif /* nsBidiUtils_h__ */