|
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted |
|
8 * for use within Mozilla Gecko, separate from a standard ICU build. |
|
9 * |
|
10 * The original ICU license of the code follows: |
|
11 * |
|
12 * ICU License - ICU 1.8.1 and later |
|
13 * |
|
14 * COPYRIGHT AND PERMISSION NOTICE |
|
15 * |
|
16 * Copyright (c) 1995-2009 International Business Machines Corporation and |
|
17 * others |
|
18 * |
|
19 * All rights reserved. |
|
20 * |
|
21 * Permission is hereby granted, free of charge, to any person obtaining a |
|
22 * copy of this software and associated documentation files (the "Software"), |
|
23 * to deal in the Software without restriction, including without limitation |
|
24 * the rights to use, copy, modify, merge, publish, distribute, and/or sell |
|
25 * copies of the Software, and to permit persons to whom the Software is |
|
26 * furnished to do so, provided that the above copyright notice(s) and this |
|
27 * permission notice appear in all copies of the Software and that both the |
|
28 * above copyright notice(s) and this permission notice appear in supporting |
|
29 * documentation. |
|
30 * |
|
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
|
34 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE |
|
35 * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, |
|
36 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
37 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
38 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
39 * SOFTWARE. |
|
40 * |
|
41 * Except as contained in this notice, the name of a copyright holder shall |
|
42 * not be used in advertising or otherwise to promote the sale, use or other |
|
43 * dealings in this Software without prior written authorization of the |
|
44 * copyright holder. |
|
45 * |
|
46 * All trademarks and registered trademarks mentioned herein are the property |
|
47 * of their respective owners. |
|
48 */ |
|
49 |
|
50 #include "gfxScriptItemizer.h" |
|
51 #include "nsUnicodeProperties.h" |
|
52 #include "nsCharTraits.h" |
|
53 #include "harfbuzz/hb.h" |
|
54 |
|
55 #define MOD(sp) ((sp) % PAREN_STACK_DEPTH) |
|
56 #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) |
|
57 #define INC(sp,count) (MOD((sp) + (count))) |
|
58 #define INC1(sp) (INC(sp, 1)) |
|
59 #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) |
|
60 #define DEC1(sp) (DEC(sp, 1)) |
|
61 #define STACK_IS_EMPTY() (pushCount <= 0) |
|
62 #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY()) |
|
63 #define TOP() (parenStack[parenSP]) |
|
64 #define SYNC_FIXUP() (fixupCount = 0) |
|
65 |
|
66 void |
|
67 gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode) |
|
68 { |
|
69 pushCount = LIMIT_INC(pushCount); |
|
70 fixupCount = LIMIT_INC(fixupCount); |
|
71 |
|
72 parenSP = INC1(parenSP); |
|
73 parenStack[parenSP].endPairChar = endPairChar; |
|
74 parenStack[parenSP].scriptCode = scriptCode; |
|
75 } |
|
76 |
|
77 void |
|
78 gfxScriptItemizer::pop() |
|
79 { |
|
80 if (STACK_IS_EMPTY()) { |
|
81 return; |
|
82 } |
|
83 |
|
84 if (fixupCount > 0) { |
|
85 fixupCount -= 1; |
|
86 } |
|
87 |
|
88 pushCount -= 1; |
|
89 parenSP = DEC1(parenSP); |
|
90 |
|
91 /* If the stack is now empty, reset the stack |
|
92 pointers to their initial values. |
|
93 */ |
|
94 if (STACK_IS_EMPTY()) { |
|
95 parenSP = -1; |
|
96 } |
|
97 } |
|
98 |
|
99 void |
|
100 gfxScriptItemizer::fixup(int32_t scriptCode) |
|
101 { |
|
102 int32_t fixupSP = DEC(parenSP, fixupCount); |
|
103 |
|
104 while (fixupCount-- > 0) { |
|
105 fixupSP = INC1(fixupSP); |
|
106 parenStack[fixupSP].scriptCode = scriptCode; |
|
107 } |
|
108 } |
|
109 |
|
110 static inline bool |
|
111 SameScript(int32_t runScript, int32_t currCharScript) |
|
112 { |
|
113 return runScript <= MOZ_SCRIPT_INHERITED || |
|
114 currCharScript <= MOZ_SCRIPT_INHERITED || |
|
115 currCharScript == runScript; |
|
116 } |
|
117 |
|
118 // Return whether the char has a mirrored-pair counterpart. |
|
119 // NOTE that this depends on the implementation of nsCharProps records in |
|
120 // nsUnicodeProperties, and may need to be updated if those structures change |
|
121 static inline bool |
|
122 HasMirroredChar(uint32_t aCh) |
|
123 { |
|
124 return GetCharProps1(aCh).mMirrorOffsetIndex != 0; |
|
125 } |
|
126 |
|
127 gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length) |
|
128 : textPtr(src), textLength(length) |
|
129 { |
|
130 reset(); |
|
131 } |
|
132 |
|
133 void |
|
134 gfxScriptItemizer::SetText(const char16_t *src, uint32_t length) |
|
135 { |
|
136 textPtr = src; |
|
137 textLength = length; |
|
138 |
|
139 reset(); |
|
140 } |
|
141 |
|
142 bool |
|
143 gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, |
|
144 int32_t& aRunScript) |
|
145 { |
|
146 /* if we've fallen off the end of the text, we're done */ |
|
147 if (scriptLimit >= textLength) { |
|
148 return false; |
|
149 } |
|
150 |
|
151 SYNC_FIXUP(); |
|
152 scriptCode = MOZ_SCRIPT_COMMON; |
|
153 |
|
154 for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { |
|
155 uint32_t ch; |
|
156 int32_t sc; |
|
157 uint32_t startOfChar = scriptLimit; |
|
158 |
|
159 ch = textPtr[scriptLimit]; |
|
160 |
|
161 /* decode UTF-16 (may be surrogate pair) */ |
|
162 if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { |
|
163 uint32_t low = textPtr[scriptLimit + 1]; |
|
164 if (NS_IS_LOW_SURROGATE(low)) { |
|
165 ch = SURROGATE_TO_UCS4(ch, low); |
|
166 scriptLimit += 1; |
|
167 } |
|
168 } |
|
169 |
|
170 // Get the nsCharProps2 record for the current character, |
|
171 // so we can read the script and (if needed) the gen category |
|
172 // without needing to do two multi-level lookups. |
|
173 // NOTE that this means we're relying on an implementation detail |
|
174 // of the nsUnicodeProperties tables, and might have to revise this |
|
175 // if the nsCharProps records used there are modified in future. |
|
176 const nsCharProps2& charProps = GetCharProps2(ch); |
|
177 |
|
178 // Initialize gc to UNASSIGNED; we'll only set it to the true GC |
|
179 // if the character has script=COMMON, otherwise we don't care. |
|
180 uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; |
|
181 |
|
182 sc = charProps.mScriptCode; |
|
183 if (sc == MOZ_SCRIPT_COMMON) { |
|
184 /* |
|
185 * Paired character handling: |
|
186 * |
|
187 * if it's an open character, push it onto the stack. |
|
188 * if it's a close character, find the matching open on the |
|
189 * stack, and use that script code. Any non-matching open |
|
190 * characters above it on the stack will be popped. |
|
191 * |
|
192 * We only do this if the script is COMMON; for chars with |
|
193 * specific script assignments, we just use them as-is. |
|
194 */ |
|
195 gc = charProps.mCategory; |
|
196 if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { |
|
197 uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); |
|
198 if (endPairChar != ch) { |
|
199 push(endPairChar, scriptCode); |
|
200 } |
|
201 } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && |
|
202 HasMirroredChar(ch)) |
|
203 { |
|
204 while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { |
|
205 pop(); |
|
206 } |
|
207 |
|
208 if (STACK_IS_NOT_EMPTY()) { |
|
209 sc = TOP().scriptCode; |
|
210 } |
|
211 } |
|
212 } |
|
213 |
|
214 if (SameScript(scriptCode, sc)) { |
|
215 if (scriptCode <= MOZ_SCRIPT_INHERITED && |
|
216 sc > MOZ_SCRIPT_INHERITED) |
|
217 { |
|
218 scriptCode = sc; |
|
219 fixup(scriptCode); |
|
220 } |
|
221 |
|
222 /* |
|
223 * if this character is a close paired character, |
|
224 * pop the matching open character from the stack |
|
225 */ |
|
226 if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && |
|
227 HasMirroredChar(ch)) { |
|
228 pop(); |
|
229 } |
|
230 } else { |
|
231 /* |
|
232 * reset scriptLimit in case it was advanced during reading a |
|
233 * multiple-code-unit character |
|
234 */ |
|
235 scriptLimit = startOfChar; |
|
236 |
|
237 break; |
|
238 } |
|
239 } |
|
240 |
|
241 aRunStart = scriptStart; |
|
242 aRunLimit = scriptLimit; |
|
243 aRunScript = scriptCode; |
|
244 |
|
245 return true; |
|
246 } |