michael@0: /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef NS_UNICODEPROPERTIES_H michael@0: #define NS_UNICODEPROPERTIES_H michael@0: michael@0: #include "nsBidiUtils.h" michael@0: #include "nsIUGenCategory.h" michael@0: #include "nsUnicodeScriptCodes.h" michael@0: michael@0: const nsCharProps1& GetCharProps1(uint32_t aCh); michael@0: const nsCharProps2& GetCharProps2(uint32_t aCh); michael@0: michael@0: namespace mozilla { michael@0: michael@0: namespace unicode { michael@0: michael@0: extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[]; michael@0: michael@0: uint32_t GetMirroredChar(uint32_t aCh); michael@0: michael@0: inline uint8_t GetCombiningClass(uint32_t aCh) { michael@0: return GetCharProps1(aCh).mCombiningClass; michael@0: } michael@0: michael@0: // returns the detailed General Category in terms of HB_UNICODE_* values michael@0: inline uint8_t GetGeneralCategory(uint32_t aCh) { michael@0: return GetCharProps2(aCh).mCategory; michael@0: } michael@0: michael@0: // returns the simplified Gen Category as defined in nsIUGenCategory michael@0: inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) { michael@0: return sDetailedToGeneralCategory[GetGeneralCategory(aCh)]; michael@0: } michael@0: michael@0: inline uint8_t GetEastAsianWidth(uint32_t aCh) { michael@0: return GetCharProps2(aCh).mEAW; michael@0: } michael@0: michael@0: inline uint8_t GetScriptCode(uint32_t aCh) { michael@0: return GetCharProps2(aCh).mScriptCode; michael@0: } michael@0: michael@0: uint32_t GetScriptTagForCode(int32_t aScriptCode); michael@0: michael@0: inline nsCharType GetBidiCat(uint32_t aCh) { michael@0: return nsCharType(GetCharProps2(aCh).mBidiCategory); michael@0: } michael@0: michael@0: enum XidmodType { michael@0: XIDMOD_INCLUSION, michael@0: XIDMOD_RECOMMENDED, michael@0: XIDMOD_DEFAULT_IGNORABLE, michael@0: XIDMOD_HISTORIC, michael@0: XIDMOD_LIMITED_USE, michael@0: XIDMOD_NOT_NFKC, michael@0: XIDMOD_NOT_XID, michael@0: XIDMOD_OBSOLETE, michael@0: XIDMOD_TECHNICAL, michael@0: XIDMOD_NOT_CHARS michael@0: }; michael@0: michael@0: inline XidmodType GetIdentifierModification(uint32_t aCh) { michael@0: return XidmodType(GetCharProps2(aCh).mXidmod); michael@0: } michael@0: michael@0: inline bool IsRestrictedForIdentifiers(uint32_t aCh) { michael@0: XidmodType xm = GetIdentifierModification(aCh); michael@0: return (xm > XIDMOD_RECOMMENDED); michael@0: } michael@0: michael@0: /** michael@0: * Return the numeric value of the character. The value returned is the value michael@0: * of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty. michael@0: * To restrict to decimal digits, the caller should also check whether michael@0: * GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER michael@0: */ michael@0: inline int8_t GetNumericValue(uint32_t aCh) { michael@0: return GetCharProps2(aCh).mNumericValue; michael@0: } michael@0: michael@0: enum HanVariantType { michael@0: HVT_NotHan = 0x0, michael@0: HVT_SimplifiedOnly = 0x1, michael@0: HVT_TraditionalOnly = 0x2, michael@0: HVT_AnyHan = 0x3 michael@0: }; michael@0: michael@0: HanVariantType GetHanVariant(uint32_t aCh); michael@0: michael@0: uint32_t GetFullWidth(uint32_t aCh); michael@0: michael@0: bool IsClusterExtender(uint32_t aCh, uint8_t aCategory); michael@0: michael@0: inline bool IsClusterExtender(uint32_t aCh) { michael@0: return IsClusterExtender(aCh, GetGeneralCategory(aCh)); michael@0: } michael@0: michael@0: enum HSType { michael@0: HST_NONE = 0x00, michael@0: HST_L = 0x01, michael@0: HST_V = 0x02, michael@0: HST_T = 0x04, michael@0: HST_LV = 0x03, michael@0: HST_LVT = 0x07 michael@0: }; michael@0: michael@0: inline HSType GetHangulSyllableType(uint32_t aCh) { michael@0: return HSType(GetCharProps1(aCh).mHangulType); michael@0: } michael@0: michael@0: // Case mappings for the full Unicode range; michael@0: // note that it may be worth testing for ASCII chars and taking michael@0: // a separate fast-path before calling these, in perf-critical places michael@0: uint32_t GetUppercase(uint32_t aCh); michael@0: uint32_t GetLowercase(uint32_t aCh); michael@0: uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged michael@0: uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase michael@0: michael@0: enum ShapingType { michael@0: SHAPING_DEFAULT = 0x0001, michael@0: SHAPING_ARABIC = 0x0002, michael@0: SHAPING_HEBREW = 0x0004, michael@0: SHAPING_HANGUL = 0x0008, michael@0: SHAPING_MONGOLIAN = 0x0010, michael@0: SHAPING_INDIC = 0x0020, michael@0: SHAPING_THAI = 0x0040 michael@0: }; michael@0: michael@0: int32_t ScriptShapingType(int32_t aScriptCode); michael@0: michael@0: // A simple iterator for a string of char16_t codepoints that advances michael@0: // by Unicode grapheme clusters michael@0: class ClusterIterator michael@0: { michael@0: public: michael@0: ClusterIterator(const char16_t* aText, uint32_t aLength) michael@0: : mPos(aText), mLimit(aText + aLength) michael@0: #ifdef DEBUG michael@0: , mText(aText) michael@0: #endif michael@0: { } michael@0: michael@0: operator const char16_t* () const { michael@0: return mPos; michael@0: } michael@0: michael@0: bool AtEnd() const { michael@0: return mPos >= mLimit; michael@0: } michael@0: michael@0: void Next(); michael@0: michael@0: private: michael@0: const char16_t* mPos; michael@0: const char16_t* mLimit; michael@0: #ifdef DEBUG michael@0: const char16_t* mText; michael@0: #endif michael@0: }; michael@0: michael@0: } // end namespace unicode michael@0: michael@0: } // end namespace mozilla michael@0: michael@0: #endif /* NS_UNICODEPROPERTIES_H */