1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/util/nsUnicodeProperties.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,168 @@ 1.4 +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#ifndef NS_UNICODEPROPERTIES_H 1.10 +#define NS_UNICODEPROPERTIES_H 1.11 + 1.12 +#include "nsBidiUtils.h" 1.13 +#include "nsIUGenCategory.h" 1.14 +#include "nsUnicodeScriptCodes.h" 1.15 + 1.16 +const nsCharProps1& GetCharProps1(uint32_t aCh); 1.17 +const nsCharProps2& GetCharProps2(uint32_t aCh); 1.18 + 1.19 +namespace mozilla { 1.20 + 1.21 +namespace unicode { 1.22 + 1.23 +extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[]; 1.24 + 1.25 +uint32_t GetMirroredChar(uint32_t aCh); 1.26 + 1.27 +inline uint8_t GetCombiningClass(uint32_t aCh) { 1.28 + return GetCharProps1(aCh).mCombiningClass; 1.29 +} 1.30 + 1.31 +// returns the detailed General Category in terms of HB_UNICODE_* values 1.32 +inline uint8_t GetGeneralCategory(uint32_t aCh) { 1.33 + return GetCharProps2(aCh).mCategory; 1.34 +} 1.35 + 1.36 +// returns the simplified Gen Category as defined in nsIUGenCategory 1.37 +inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) { 1.38 + return sDetailedToGeneralCategory[GetGeneralCategory(aCh)]; 1.39 +} 1.40 + 1.41 +inline uint8_t GetEastAsianWidth(uint32_t aCh) { 1.42 + return GetCharProps2(aCh).mEAW; 1.43 +} 1.44 + 1.45 +inline uint8_t GetScriptCode(uint32_t aCh) { 1.46 + return GetCharProps2(aCh).mScriptCode; 1.47 +} 1.48 + 1.49 +uint32_t GetScriptTagForCode(int32_t aScriptCode); 1.50 + 1.51 +inline nsCharType GetBidiCat(uint32_t aCh) { 1.52 + return nsCharType(GetCharProps2(aCh).mBidiCategory); 1.53 +} 1.54 + 1.55 +enum XidmodType { 1.56 + XIDMOD_INCLUSION, 1.57 + XIDMOD_RECOMMENDED, 1.58 + XIDMOD_DEFAULT_IGNORABLE, 1.59 + XIDMOD_HISTORIC, 1.60 + XIDMOD_LIMITED_USE, 1.61 + XIDMOD_NOT_NFKC, 1.62 + XIDMOD_NOT_XID, 1.63 + XIDMOD_OBSOLETE, 1.64 + XIDMOD_TECHNICAL, 1.65 + XIDMOD_NOT_CHARS 1.66 +}; 1.67 + 1.68 +inline XidmodType GetIdentifierModification(uint32_t aCh) { 1.69 + return XidmodType(GetCharProps2(aCh).mXidmod); 1.70 +} 1.71 + 1.72 +inline bool IsRestrictedForIdentifiers(uint32_t aCh) { 1.73 + XidmodType xm = GetIdentifierModification(aCh); 1.74 + return (xm > XIDMOD_RECOMMENDED); 1.75 +} 1.76 + 1.77 +/** 1.78 + * Return the numeric value of the character. The value returned is the value 1.79 + * of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty. 1.80 + * To restrict to decimal digits, the caller should also check whether 1.81 + * GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER 1.82 + */ 1.83 +inline int8_t GetNumericValue(uint32_t aCh) { 1.84 + return GetCharProps2(aCh).mNumericValue; 1.85 +} 1.86 + 1.87 +enum HanVariantType { 1.88 + HVT_NotHan = 0x0, 1.89 + HVT_SimplifiedOnly = 0x1, 1.90 + HVT_TraditionalOnly = 0x2, 1.91 + HVT_AnyHan = 0x3 1.92 +}; 1.93 + 1.94 +HanVariantType GetHanVariant(uint32_t aCh); 1.95 + 1.96 +uint32_t GetFullWidth(uint32_t aCh); 1.97 + 1.98 +bool IsClusterExtender(uint32_t aCh, uint8_t aCategory); 1.99 + 1.100 +inline bool IsClusterExtender(uint32_t aCh) { 1.101 + return IsClusterExtender(aCh, GetGeneralCategory(aCh)); 1.102 +} 1.103 + 1.104 +enum HSType { 1.105 + HST_NONE = 0x00, 1.106 + HST_L = 0x01, 1.107 + HST_V = 0x02, 1.108 + HST_T = 0x04, 1.109 + HST_LV = 0x03, 1.110 + HST_LVT = 0x07 1.111 +}; 1.112 + 1.113 +inline HSType GetHangulSyllableType(uint32_t aCh) { 1.114 + return HSType(GetCharProps1(aCh).mHangulType); 1.115 +} 1.116 + 1.117 +// Case mappings for the full Unicode range; 1.118 +// note that it may be worth testing for ASCII chars and taking 1.119 +// a separate fast-path before calling these, in perf-critical places 1.120 +uint32_t GetUppercase(uint32_t aCh); 1.121 +uint32_t GetLowercase(uint32_t aCh); 1.122 +uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged 1.123 +uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase 1.124 + 1.125 +enum ShapingType { 1.126 + SHAPING_DEFAULT = 0x0001, 1.127 + SHAPING_ARABIC = 0x0002, 1.128 + SHAPING_HEBREW = 0x0004, 1.129 + SHAPING_HANGUL = 0x0008, 1.130 + SHAPING_MONGOLIAN = 0x0010, 1.131 + SHAPING_INDIC = 0x0020, 1.132 + SHAPING_THAI = 0x0040 1.133 +}; 1.134 + 1.135 +int32_t ScriptShapingType(int32_t aScriptCode); 1.136 + 1.137 +// A simple iterator for a string of char16_t codepoints that advances 1.138 +// by Unicode grapheme clusters 1.139 +class ClusterIterator 1.140 +{ 1.141 +public: 1.142 + ClusterIterator(const char16_t* aText, uint32_t aLength) 1.143 + : mPos(aText), mLimit(aText + aLength) 1.144 +#ifdef DEBUG 1.145 + , mText(aText) 1.146 +#endif 1.147 + { } 1.148 + 1.149 + operator const char16_t* () const { 1.150 + return mPos; 1.151 + } 1.152 + 1.153 + bool AtEnd() const { 1.154 + return mPos >= mLimit; 1.155 + } 1.156 + 1.157 + void Next(); 1.158 + 1.159 +private: 1.160 + const char16_t* mPos; 1.161 + const char16_t* mLimit; 1.162 +#ifdef DEBUG 1.163 + const char16_t* mText; 1.164 +#endif 1.165 +}; 1.166 + 1.167 +} // end namespace unicode 1.168 + 1.169 +} // end namespace mozilla 1.170 + 1.171 +#endif /* NS_UNICODEPROPERTIES_H */