michael@0: /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted michael@0: * for use within Mozilla Gecko, separate from a standard ICU build. michael@0: * michael@0: * The original ICU license of the code follows: michael@0: * michael@0: * ICU License - ICU 1.8.1 and later michael@0: * michael@0: * COPYRIGHT AND PERMISSION NOTICE michael@0: * michael@0: * Copyright (c) 1995-2009 International Business Machines Corporation and michael@0: * others michael@0: * michael@0: * All rights reserved. michael@0: * michael@0: * Permission is hereby granted, free of charge, to any person obtaining a michael@0: * copy of this software and associated documentation files (the "Software"), michael@0: * to deal in the Software without restriction, including without limitation michael@0: * the rights to use, copy, modify, merge, publish, distribute, and/or sell michael@0: * copies of the Software, and to permit persons to whom the Software is michael@0: * furnished to do so, provided that the above copyright notice(s) and this michael@0: * permission notice appear in all copies of the Software and that both the michael@0: * above copyright notice(s) and this permission notice appear in supporting michael@0: * documentation. michael@0: * michael@0: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR michael@0: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, michael@0: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. michael@0: * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE michael@0: * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, michael@0: * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, michael@0: * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, michael@0: * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS michael@0: * SOFTWARE. michael@0: * michael@0: * Except as contained in this notice, the name of a copyright holder shall michael@0: * not be used in advertising or otherwise to promote the sale, use or other michael@0: * dealings in this Software without prior written authorization of the michael@0: * copyright holder. michael@0: * michael@0: * All trademarks and registered trademarks mentioned herein are the property michael@0: * of their respective owners. michael@0: */ michael@0: michael@0: #include "gfxScriptItemizer.h" michael@0: #include "nsUnicodeProperties.h" michael@0: #include "nsCharTraits.h" michael@0: #include "harfbuzz/hb.h" michael@0: michael@0: #define MOD(sp) ((sp) % PAREN_STACK_DEPTH) michael@0: #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) michael@0: #define INC(sp,count) (MOD((sp) + (count))) michael@0: #define INC1(sp) (INC(sp, 1)) michael@0: #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) michael@0: #define DEC1(sp) (DEC(sp, 1)) michael@0: #define STACK_IS_EMPTY() (pushCount <= 0) michael@0: #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY()) michael@0: #define TOP() (parenStack[parenSP]) michael@0: #define SYNC_FIXUP() (fixupCount = 0) michael@0: michael@0: void michael@0: gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode) michael@0: { michael@0: pushCount = LIMIT_INC(pushCount); michael@0: fixupCount = LIMIT_INC(fixupCount); michael@0: michael@0: parenSP = INC1(parenSP); michael@0: parenStack[parenSP].endPairChar = endPairChar; michael@0: parenStack[parenSP].scriptCode = scriptCode; michael@0: } michael@0: michael@0: void michael@0: gfxScriptItemizer::pop() michael@0: { michael@0: if (STACK_IS_EMPTY()) { michael@0: return; michael@0: } michael@0: michael@0: if (fixupCount > 0) { michael@0: fixupCount -= 1; michael@0: } michael@0: michael@0: pushCount -= 1; michael@0: parenSP = DEC1(parenSP); michael@0: michael@0: /* If the stack is now empty, reset the stack michael@0: pointers to their initial values. michael@0: */ michael@0: if (STACK_IS_EMPTY()) { michael@0: parenSP = -1; michael@0: } michael@0: } michael@0: michael@0: void michael@0: gfxScriptItemizer::fixup(int32_t scriptCode) michael@0: { michael@0: int32_t fixupSP = DEC(parenSP, fixupCount); michael@0: michael@0: while (fixupCount-- > 0) { michael@0: fixupSP = INC1(fixupSP); michael@0: parenStack[fixupSP].scriptCode = scriptCode; michael@0: } michael@0: } michael@0: michael@0: static inline bool michael@0: SameScript(int32_t runScript, int32_t currCharScript) michael@0: { michael@0: return runScript <= MOZ_SCRIPT_INHERITED || michael@0: currCharScript <= MOZ_SCRIPT_INHERITED || michael@0: currCharScript == runScript; michael@0: } michael@0: michael@0: // Return whether the char has a mirrored-pair counterpart. michael@0: // NOTE that this depends on the implementation of nsCharProps records in michael@0: // nsUnicodeProperties, and may need to be updated if those structures change michael@0: static inline bool michael@0: HasMirroredChar(uint32_t aCh) michael@0: { michael@0: return GetCharProps1(aCh).mMirrorOffsetIndex != 0; michael@0: } michael@0: michael@0: gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length) michael@0: : textPtr(src), textLength(length) michael@0: { michael@0: reset(); michael@0: } michael@0: michael@0: void michael@0: gfxScriptItemizer::SetText(const char16_t *src, uint32_t length) michael@0: { michael@0: textPtr = src; michael@0: textLength = length; michael@0: michael@0: reset(); michael@0: } michael@0: michael@0: bool michael@0: gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit, michael@0: int32_t& aRunScript) michael@0: { michael@0: /* if we've fallen off the end of the text, we're done */ michael@0: if (scriptLimit >= textLength) { michael@0: return false; michael@0: } michael@0: michael@0: SYNC_FIXUP(); michael@0: scriptCode = MOZ_SCRIPT_COMMON; michael@0: michael@0: for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) { michael@0: uint32_t ch; michael@0: int32_t sc; michael@0: uint32_t startOfChar = scriptLimit; michael@0: michael@0: ch = textPtr[scriptLimit]; michael@0: michael@0: /* decode UTF-16 (may be surrogate pair) */ michael@0: if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) { michael@0: uint32_t low = textPtr[scriptLimit + 1]; michael@0: if (NS_IS_LOW_SURROGATE(low)) { michael@0: ch = SURROGATE_TO_UCS4(ch, low); michael@0: scriptLimit += 1; michael@0: } michael@0: } michael@0: michael@0: // Get the nsCharProps2 record for the current character, michael@0: // so we can read the script and (if needed) the gen category michael@0: // without needing to do two multi-level lookups. michael@0: // NOTE that this means we're relying on an implementation detail michael@0: // of the nsUnicodeProperties tables, and might have to revise this michael@0: // if the nsCharProps records used there are modified in future. michael@0: const nsCharProps2& charProps = GetCharProps2(ch); michael@0: michael@0: // Initialize gc to UNASSIGNED; we'll only set it to the true GC michael@0: // if the character has script=COMMON, otherwise we don't care. michael@0: uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; michael@0: michael@0: sc = charProps.mScriptCode; michael@0: if (sc == MOZ_SCRIPT_COMMON) { michael@0: /* michael@0: * Paired character handling: michael@0: * michael@0: * if it's an open character, push it onto the stack. michael@0: * if it's a close character, find the matching open on the michael@0: * stack, and use that script code. Any non-matching open michael@0: * characters above it on the stack will be popped. michael@0: * michael@0: * We only do this if the script is COMMON; for chars with michael@0: * specific script assignments, we just use them as-is. michael@0: */ michael@0: gc = charProps.mCategory; michael@0: if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) { michael@0: uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch); michael@0: if (endPairChar != ch) { michael@0: push(endPairChar, scriptCode); michael@0: } michael@0: } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && michael@0: HasMirroredChar(ch)) michael@0: { michael@0: while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) { michael@0: pop(); michael@0: } michael@0: michael@0: if (STACK_IS_NOT_EMPTY()) { michael@0: sc = TOP().scriptCode; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (SameScript(scriptCode, sc)) { michael@0: if (scriptCode <= MOZ_SCRIPT_INHERITED && michael@0: sc > MOZ_SCRIPT_INHERITED) michael@0: { michael@0: scriptCode = sc; michael@0: fixup(scriptCode); michael@0: } michael@0: michael@0: /* michael@0: * if this character is a close paired character, michael@0: * pop the matching open character from the stack michael@0: */ michael@0: if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION && michael@0: HasMirroredChar(ch)) { michael@0: pop(); michael@0: } michael@0: } else { michael@0: /* michael@0: * reset scriptLimit in case it was advanced during reading a michael@0: * multiple-code-unit character michael@0: */ michael@0: scriptLimit = startOfChar; michael@0: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: aRunStart = scriptStart; michael@0: aRunLimit = scriptLimit; michael@0: aRunScript = scriptCode; michael@0: michael@0: return true; michael@0: }