1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/thebes/gfxScriptItemizer.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,246 @@ 1.4 +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* 1.10 + * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted 1.11 + * for use within Mozilla Gecko, separate from a standard ICU build. 1.12 + * 1.13 + * The original ICU license of the code follows: 1.14 + * 1.15 + * ICU License - ICU 1.8.1 and later 1.16 + * 1.17 + * COPYRIGHT AND PERMISSION NOTICE 1.18 + * 1.19 + * Copyright (c) 1995-2009 International Business Machines Corporation and 1.20 + * others 1.21 + * 1.22 + * All rights reserved. 1.23 + * 1.24 + * Permission is hereby granted, free of charge, to any person obtaining a 1.25 + * copy of this software and associated documentation files (the "Software"), 1.26 + * to deal in the Software without restriction, including without limitation 1.27 + * the rights to use, copy, modify, merge, publish, distribute, and/or sell 1.28 + * copies of the Software, and to permit persons to whom the Software is 1.29 + * furnished to do so, provided that the above copyright notice(s) and this 1.30 + * permission notice appear in all copies of the Software and that both the 1.31 + * above copyright notice(s) and this permission notice appear in supporting 1.32 + * documentation. 1.33 + * 1.34 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1.35 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1.36 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. 1.37 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE 1.38 + * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, 1.39 + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 1.40 + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 1.41 + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 1.42 + * SOFTWARE. 1.43 + * 1.44 + * Except as contained in this notice, the name of a copyright holder shall 1.45 + * not be used in advertising or otherwise to promote the sale, use or other 1.46 + * dealings in this Software without prior written authorization of the 1.47 + * copyright holder. 1.48 + * 1.49 + * All trademarks and registered trademarks mentioned herein are the property 1.50 + * of their respective owners. 1.51 + */ 1.52 + 1.53 +#include "gfxScriptItemizer.h" 1.54 +#include "nsUnicodeProperties.h" 1.55 +#include "nsCharTraits.h" 1.56 +#include "harfbuzz/hb.h" 1.57 + 1.58 +#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) 1.59 +#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) 1.60 +#define INC(sp,count) (MOD((sp) + (count))) 1.61 +#define INC1(sp) (INC(sp, 1)) 1.62 +#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) 1.63 +#define DEC1(sp) (DEC(sp, 1)) 1.64 +#define STACK_IS_EMPTY() (pushCount <= 0) 1.65 +#define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY()) 1.66 +#define TOP() (parenStack[parenSP]) 1.67 +#define SYNC_FIXUP() (fixupCount = 0) 1.68 + 1.69 +void 1.70 +gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode) 1.71 +{ 1.72 + pushCount = LIMIT_INC(pushCount); 1.73 + fixupCount = LIMIT_INC(fixupCount); 1.74 + 1.75 + parenSP = INC1(parenSP); 1.76 + parenStack[parenSP].endPairChar = endPairChar; 1.77 + parenStack[parenSP].scriptCode = scriptCode; 1.78 +} 1.79 + 1.80 +void 1.81 +gfxScriptItemizer::pop() 1.82 +{ 1.83 + if (STACK_IS_EMPTY()) { 1.84 + return; 1.85 + } 1.86 + 1.87 + if (fixupCount > 0) { 1.88 + fixupCount -= 1; 1.89 + } 1.90 + 1.91 + pushCount -= 1; 1.92 + parenSP = DEC1(parenSP); 1.93 + 1.94 + /* If the stack is now empty, reset the stack 1.95 + pointers to their initial values. 1.96 + */ 1.97 + if (STACK_IS_EMPTY()) { 1.98 + parenSP = -1; 1.99 + } 1.100 +} 1.101 + 1.102 +void 1.103 +gfxScriptItemizer::fixup(int32_t scriptCode) 1.104 +{ 1.105 + int32_t fixupSP = DEC(parenSP, fixupCount); 1.106 + 1.107 + while (fixupCount-- > 0) { 1.108 + fixupSP = INC1(fixupSP); 1.109 + parenStack[fixupSP].scriptCode = scriptCode; 1.110 + } 1.111 +} 1.112 + 1.113 +static inline bool 1.114 +SameScript(int32_t runScript, int32_t currCharScript) 1.115 +{ 1.116 + return runScript <= MOZ_SCRIPT_INHERITED || 1.117 + currCharScript <= MOZ_SCRIPT_INHERITED || 1.118 + currCharScript == runScript; 1.119 +} 1.120 + 1.121 +// Return whether the char has a mirrored-pair counterpart. 1.122 +// NOTE that this depends on the implementation of nsCharProps records in 1.123 +// nsUnicodeProperties, and may need to be updated if those structures change 1.124 +static inline bool 1.125 +HasMirroredChar(uint32_t aCh) 1.126 +{ 1.127 + return GetCharProps1(aCh).mMirrorOffsetIndex != 0; 1.128 +} 1.129 + 1.130 +gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length) 1.131 + : textPtr(src), textLength(length) 1.132 +{ 1.133 + reset(); 1.134 +} 1.135 + 1.136 +void 1.137 +gfxScriptItemizer::SetText(const char16_t *src, uint32_t length) 1.138 +{ 1.139 + textPtr = src; 1.140 + textLength = length; 1.141 + 1.142 + reset(); 1.143 +} 1.144 + 1.145 +bool 1.146 +gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, 1.147 + int32_t& aRunScript) 1.148 +{ 1.149 + /* if we've fallen off the end of the text, we're done */ 1.150 + if (scriptLimit >= textLength) { 1.151 + return false; 1.152 + } 1.153 + 1.154 + SYNC_FIXUP(); 1.155 + scriptCode = MOZ_SCRIPT_COMMON; 1.156 + 1.157 + for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { 1.158 + uint32_t ch; 1.159 + int32_t sc; 1.160 + uint32_t startOfChar = scriptLimit; 1.161 + 1.162 + ch = textPtr[scriptLimit]; 1.163 + 1.164 + /* decode UTF-16 (may be surrogate pair) */ 1.165 + if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { 1.166 + uint32_t low = textPtr[scriptLimit + 1]; 1.167 + if (NS_IS_LOW_SURROGATE(low)) { 1.168 + ch = SURROGATE_TO_UCS4(ch, low); 1.169 + scriptLimit += 1; 1.170 + } 1.171 + } 1.172 + 1.173 + // Get the nsCharProps2 record for the current character, 1.174 + // so we can read the script and (if needed) the gen category 1.175 + // without needing to do two multi-level lookups. 1.176 + // NOTE that this means we're relying on an implementation detail 1.177 + // of the nsUnicodeProperties tables, and might have to revise this 1.178 + // if the nsCharProps records used there are modified in future. 1.179 + const nsCharProps2& charProps = GetCharProps2(ch); 1.180 + 1.181 + // Initialize gc to UNASSIGNED; we'll only set it to the true GC 1.182 + // if the character has script=COMMON, otherwise we don't care. 1.183 + uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; 1.184 + 1.185 + sc = charProps.mScriptCode; 1.186 + if (sc == MOZ_SCRIPT_COMMON) { 1.187 + /* 1.188 + * Paired character handling: 1.189 + * 1.190 + * if it's an open character, push it onto the stack. 1.191 + * if it's a close character, find the matching open on the 1.192 + * stack, and use that script code. Any non-matching open 1.193 + * characters above it on the stack will be popped. 1.194 + * 1.195 + * We only do this if the script is COMMON; for chars with 1.196 + * specific script assignments, we just use them as-is. 1.197 + */ 1.198 + gc = charProps.mCategory; 1.199 + if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { 1.200 + uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); 1.201 + if (endPairChar != ch) { 1.202 + push(endPairChar, scriptCode); 1.203 + } 1.204 + } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && 1.205 + HasMirroredChar(ch)) 1.206 + { 1.207 + while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { 1.208 + pop(); 1.209 + } 1.210 + 1.211 + if (STACK_IS_NOT_EMPTY()) { 1.212 + sc = TOP().scriptCode; 1.213 + } 1.214 + } 1.215 + } 1.216 + 1.217 + if (SameScript(scriptCode, sc)) { 1.218 + if (scriptCode <= MOZ_SCRIPT_INHERITED && 1.219 + sc > MOZ_SCRIPT_INHERITED) 1.220 + { 1.221 + scriptCode = sc; 1.222 + fixup(scriptCode); 1.223 + } 1.224 + 1.225 + /* 1.226 + * if this character is a close paired character, 1.227 + * pop the matching open character from the stack 1.228 + */ 1.229 + if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && 1.230 + HasMirroredChar(ch)) { 1.231 + pop(); 1.232 + } 1.233 + } else { 1.234 + /* 1.235 + * reset scriptLimit in case it was advanced during reading a 1.236 + * multiple-code-unit character 1.237 + */ 1.238 + scriptLimit = startOfChar; 1.239 + 1.240 + break; 1.241 + } 1.242 + } 1.243 + 1.244 + aRunStart = scriptStart; 1.245 + aRunLimit = scriptLimit; 1.246 + aRunScript = scriptCode; 1.247 + 1.248 + return true; 1.249 +}