|
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #ifndef NS_UNICODEPROPERTIES_H |
|
7 #define NS_UNICODEPROPERTIES_H |
|
8 |
|
9 #include "nsBidiUtils.h" |
|
10 #include "nsIUGenCategory.h" |
|
11 #include "nsUnicodeScriptCodes.h" |
|
12 |
|
13 const nsCharProps1& GetCharProps1(uint32_t aCh); |
|
14 const nsCharProps2& GetCharProps2(uint32_t aCh); |
|
15 |
|
16 namespace mozilla { |
|
17 |
|
18 namespace unicode { |
|
19 |
|
20 extern nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[]; |
|
21 |
|
22 uint32_t GetMirroredChar(uint32_t aCh); |
|
23 |
|
24 inline uint8_t GetCombiningClass(uint32_t aCh) { |
|
25 return GetCharProps1(aCh).mCombiningClass; |
|
26 } |
|
27 |
|
28 // returns the detailed General Category in terms of HB_UNICODE_* values |
|
29 inline uint8_t GetGeneralCategory(uint32_t aCh) { |
|
30 return GetCharProps2(aCh).mCategory; |
|
31 } |
|
32 |
|
33 // returns the simplified Gen Category as defined in nsIUGenCategory |
|
34 inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) { |
|
35 return sDetailedToGeneralCategory[GetGeneralCategory(aCh)]; |
|
36 } |
|
37 |
|
38 inline uint8_t GetEastAsianWidth(uint32_t aCh) { |
|
39 return GetCharProps2(aCh).mEAW; |
|
40 } |
|
41 |
|
42 inline uint8_t GetScriptCode(uint32_t aCh) { |
|
43 return GetCharProps2(aCh).mScriptCode; |
|
44 } |
|
45 |
|
46 uint32_t GetScriptTagForCode(int32_t aScriptCode); |
|
47 |
|
48 inline nsCharType GetBidiCat(uint32_t aCh) { |
|
49 return nsCharType(GetCharProps2(aCh).mBidiCategory); |
|
50 } |
|
51 |
|
52 enum XidmodType { |
|
53 XIDMOD_INCLUSION, |
|
54 XIDMOD_RECOMMENDED, |
|
55 XIDMOD_DEFAULT_IGNORABLE, |
|
56 XIDMOD_HISTORIC, |
|
57 XIDMOD_LIMITED_USE, |
|
58 XIDMOD_NOT_NFKC, |
|
59 XIDMOD_NOT_XID, |
|
60 XIDMOD_OBSOLETE, |
|
61 XIDMOD_TECHNICAL, |
|
62 XIDMOD_NOT_CHARS |
|
63 }; |
|
64 |
|
65 inline XidmodType GetIdentifierModification(uint32_t aCh) { |
|
66 return XidmodType(GetCharProps2(aCh).mXidmod); |
|
67 } |
|
68 |
|
69 inline bool IsRestrictedForIdentifiers(uint32_t aCh) { |
|
70 XidmodType xm = GetIdentifierModification(aCh); |
|
71 return (xm > XIDMOD_RECOMMENDED); |
|
72 } |
|
73 |
|
74 /** |
|
75 * Return the numeric value of the character. The value returned is the value |
|
76 * of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty. |
|
77 * To restrict to decimal digits, the caller should also check whether |
|
78 * GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER |
|
79 */ |
|
80 inline int8_t GetNumericValue(uint32_t aCh) { |
|
81 return GetCharProps2(aCh).mNumericValue; |
|
82 } |
|
83 |
|
84 enum HanVariantType { |
|
85 HVT_NotHan = 0x0, |
|
86 HVT_SimplifiedOnly = 0x1, |
|
87 HVT_TraditionalOnly = 0x2, |
|
88 HVT_AnyHan = 0x3 |
|
89 }; |
|
90 |
|
91 HanVariantType GetHanVariant(uint32_t aCh); |
|
92 |
|
93 uint32_t GetFullWidth(uint32_t aCh); |
|
94 |
|
95 bool IsClusterExtender(uint32_t aCh, uint8_t aCategory); |
|
96 |
|
97 inline bool IsClusterExtender(uint32_t aCh) { |
|
98 return IsClusterExtender(aCh, GetGeneralCategory(aCh)); |
|
99 } |
|
100 |
|
101 enum HSType { |
|
102 HST_NONE = 0x00, |
|
103 HST_L = 0x01, |
|
104 HST_V = 0x02, |
|
105 HST_T = 0x04, |
|
106 HST_LV = 0x03, |
|
107 HST_LVT = 0x07 |
|
108 }; |
|
109 |
|
110 inline HSType GetHangulSyllableType(uint32_t aCh) { |
|
111 return HSType(GetCharProps1(aCh).mHangulType); |
|
112 } |
|
113 |
|
114 // Case mappings for the full Unicode range; |
|
115 // note that it may be worth testing for ASCII chars and taking |
|
116 // a separate fast-path before calling these, in perf-critical places |
|
117 uint32_t GetUppercase(uint32_t aCh); |
|
118 uint32_t GetLowercase(uint32_t aCh); |
|
119 uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged |
|
120 uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase |
|
121 |
|
122 enum ShapingType { |
|
123 SHAPING_DEFAULT = 0x0001, |
|
124 SHAPING_ARABIC = 0x0002, |
|
125 SHAPING_HEBREW = 0x0004, |
|
126 SHAPING_HANGUL = 0x0008, |
|
127 SHAPING_MONGOLIAN = 0x0010, |
|
128 SHAPING_INDIC = 0x0020, |
|
129 SHAPING_THAI = 0x0040 |
|
130 }; |
|
131 |
|
132 int32_t ScriptShapingType(int32_t aScriptCode); |
|
133 |
|
134 // A simple iterator for a string of char16_t codepoints that advances |
|
135 // by Unicode grapheme clusters |
|
136 class ClusterIterator |
|
137 { |
|
138 public: |
|
139 ClusterIterator(const char16_t* aText, uint32_t aLength) |
|
140 : mPos(aText), mLimit(aText + aLength) |
|
141 #ifdef DEBUG |
|
142 , mText(aText) |
|
143 #endif |
|
144 { } |
|
145 |
|
146 operator const char16_t* () const { |
|
147 return mPos; |
|
148 } |
|
149 |
|
150 bool AtEnd() const { |
|
151 return mPos >= mLimit; |
|
152 } |
|
153 |
|
154 void Next(); |
|
155 |
|
156 private: |
|
157 const char16_t* mPos; |
|
158 const char16_t* mLimit; |
|
159 #ifdef DEBUG |
|
160 const char16_t* mText; |
|
161 #endif |
|
162 }; |
|
163 |
|
164 } // end namespace unicode |
|
165 |
|
166 } // end namespace mozilla |
|
167 |
|
168 #endif /* NS_UNICODEPROPERTIES_H */ |