Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /*
7 * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
8 * for use within Mozilla Gecko, separate from a standard ICU build.
9 *
10 * The original ICU license of the code follows:
11 *
12 * ICU License - ICU 1.8.1 and later
13 *
14 * COPYRIGHT AND PERMISSION NOTICE
15 *
16 * Copyright (c) 1995-2009 International Business Machines Corporation and
17 * others
18 *
19 * All rights reserved.
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a
22 * copy of this software and associated documentation files (the "Software"),
23 * to deal in the Software without restriction, including without limitation
24 * the rights to use, copy, modify, merge, publish, distribute, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, provided that the above copyright notice(s) and this
27 * permission notice appear in all copies of the Software and that both the
28 * above copyright notice(s) and this permission notice appear in supporting
29 * documentation.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
34 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
35 * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
36 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
37 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
38 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
39 * SOFTWARE.
40 *
41 * Except as contained in this notice, the name of a copyright holder shall
42 * not be used in advertising or otherwise to promote the sale, use or other
43 * dealings in this Software without prior written authorization of the
44 * copyright holder.
45 *
46 * All trademarks and registered trademarks mentioned herein are the property
47 * of their respective owners.
48 */
50 #include "gfxScriptItemizer.h"
51 #include "nsUnicodeProperties.h"
52 #include "nsCharTraits.h"
53 #include "harfbuzz/hb.h"
55 #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
56 #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
57 #define INC(sp,count) (MOD((sp) + (count)))
58 #define INC1(sp) (INC(sp, 1))
59 #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
60 #define DEC1(sp) (DEC(sp, 1))
61 #define STACK_IS_EMPTY() (pushCount <= 0)
62 #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
63 #define TOP() (parenStack[parenSP])
64 #define SYNC_FIXUP() (fixupCount = 0)
66 void
67 gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode)
68 {
69 pushCount = LIMIT_INC(pushCount);
70 fixupCount = LIMIT_INC(fixupCount);
72 parenSP = INC1(parenSP);
73 parenStack[parenSP].endPairChar = endPairChar;
74 parenStack[parenSP].scriptCode = scriptCode;
75 }
77 void
78 gfxScriptItemizer::pop()
79 {
80 if (STACK_IS_EMPTY()) {
81 return;
82 }
84 if (fixupCount > 0) {
85 fixupCount -= 1;
86 }
88 pushCount -= 1;
89 parenSP = DEC1(parenSP);
91 /* If the stack is now empty, reset the stack
92 pointers to their initial values.
93 */
94 if (STACK_IS_EMPTY()) {
95 parenSP = -1;
96 }
97 }
99 void
100 gfxScriptItemizer::fixup(int32_t scriptCode)
101 {
102 int32_t fixupSP = DEC(parenSP, fixupCount);
104 while (fixupCount-- > 0) {
105 fixupSP = INC1(fixupSP);
106 parenStack[fixupSP].scriptCode = scriptCode;
107 }
108 }
110 static inline bool
111 SameScript(int32_t runScript, int32_t currCharScript)
112 {
113 return runScript <= MOZ_SCRIPT_INHERITED ||
114 currCharScript <= MOZ_SCRIPT_INHERITED ||
115 currCharScript == runScript;
116 }
118 // Return whether the char has a mirrored-pair counterpart.
119 // NOTE that this depends on the implementation of nsCharProps records in
120 // nsUnicodeProperties, and may need to be updated if those structures change
121 static inline bool
122 HasMirroredChar(uint32_t aCh)
123 {
124 return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
125 }
127 gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length)
128 : textPtr(src), textLength(length)
129 {
130 reset();
131 }
133 void
134 gfxScriptItemizer::SetText(const char16_t *src, uint32_t length)
135 {
136 textPtr = src;
137 textLength = length;
139 reset();
140 }
142 bool
143 gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
144 int32_t& aRunScript)
145 {
146 /* if we've fallen off the end of the text, we're done */
147 if (scriptLimit >= textLength) {
148 return false;
149 }
151 SYNC_FIXUP();
152 scriptCode = MOZ_SCRIPT_COMMON;
154 for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
155 uint32_t ch;
156 int32_t sc;
157 uint32_t startOfChar = scriptLimit;
159 ch = textPtr[scriptLimit];
161 /* decode UTF-16 (may be surrogate pair) */
162 if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
163 uint32_t low = textPtr[scriptLimit + 1];
164 if (NS_IS_LOW_SURROGATE(low)) {
165 ch = SURROGATE_TO_UCS4(ch, low);
166 scriptLimit += 1;
167 }
168 }
170 // Get the nsCharProps2 record for the current character,
171 // so we can read the script and (if needed) the gen category
172 // without needing to do two multi-level lookups.
173 // NOTE that this means we're relying on an implementation detail
174 // of the nsUnicodeProperties tables, and might have to revise this
175 // if the nsCharProps records used there are modified in future.
176 const nsCharProps2& charProps = GetCharProps2(ch);
178 // Initialize gc to UNASSIGNED; we'll only set it to the true GC
179 // if the character has script=COMMON, otherwise we don't care.
180 uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
182 sc = charProps.mScriptCode;
183 if (sc == MOZ_SCRIPT_COMMON) {
184 /*
185 * Paired character handling:
186 *
187 * if it's an open character, push it onto the stack.
188 * if it's a close character, find the matching open on the
189 * stack, and use that script code. Any non-matching open
190 * characters above it on the stack will be popped.
191 *
192 * We only do this if the script is COMMON; for chars with
193 * specific script assignments, we just use them as-is.
194 */
195 gc = charProps.mCategory;
196 if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
197 uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
198 if (endPairChar != ch) {
199 push(endPairChar, scriptCode);
200 }
201 } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
202 HasMirroredChar(ch))
203 {
204 while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
205 pop();
206 }
208 if (STACK_IS_NOT_EMPTY()) {
209 sc = TOP().scriptCode;
210 }
211 }
212 }
214 if (SameScript(scriptCode, sc)) {
215 if (scriptCode <= MOZ_SCRIPT_INHERITED &&
216 sc > MOZ_SCRIPT_INHERITED)
217 {
218 scriptCode = sc;
219 fixup(scriptCode);
220 }
222 /*
223 * if this character is a close paired character,
224 * pop the matching open character from the stack
225 */
226 if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
227 HasMirroredChar(ch)) {
228 pop();
229 }
230 } else {
231 /*
232 * reset scriptLimit in case it was advanced during reading a
233 * multiple-code-unit character
234 */
235 scriptLimit = startOfChar;
237 break;
238 }
239 }
241 aRunStart = scriptStart;
242 aRunLimit = scriptLimit;
243 aRunScript = scriptCode;
245 return true;
246 }