gfx/thebes/gfxScriptItemizer.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/thebes/gfxScriptItemizer.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,246 @@
     1.4 +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/*
    1.10 + * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
    1.11 + * for use within Mozilla Gecko, separate from a standard ICU build.
    1.12 + *
    1.13 + * The original ICU license of the code follows:
    1.14 + *
    1.15 + * ICU License - ICU 1.8.1 and later
    1.16 + *
    1.17 + * COPYRIGHT AND PERMISSION NOTICE
    1.18 + * 
    1.19 + * Copyright (c) 1995-2009 International Business Machines Corporation and
    1.20 + * others
    1.21 + *
    1.22 + * All rights reserved.
    1.23 + *
    1.24 + * Permission is hereby granted, free of charge, to any person obtaining a
    1.25 + * copy of this software and associated documentation files (the "Software"),
    1.26 + * to deal in the Software without restriction, including without limitation
    1.27 + * the rights to use, copy, modify, merge, publish, distribute, and/or sell
    1.28 + * copies of the Software, and to permit persons to whom the Software is
    1.29 + * furnished to do so, provided that the above copyright notice(s) and this
    1.30 + * permission notice appear in all copies of the Software and that both the
    1.31 + * above copyright notice(s) and this permission notice appear in supporting
    1.32 + * documentation.
    1.33 + *
    1.34 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    1.35 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    1.36 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
    1.37 + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
    1.38 + * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
    1.39 + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
    1.40 + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
    1.41 + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    1.42 + * SOFTWARE.
    1.43 + *
    1.44 + * Except as contained in this notice, the name of a copyright holder shall
    1.45 + * not be used in advertising or otherwise to promote the sale, use or other
    1.46 + * dealings in this Software without prior written authorization of the
    1.47 + * copyright holder.
    1.48 + *
    1.49 + * All trademarks and registered trademarks mentioned herein are the property
    1.50 + * of their respective owners. 
    1.51 + */
    1.52 +
    1.53 +#include "gfxScriptItemizer.h"
    1.54 +#include "nsUnicodeProperties.h"
    1.55 +#include "nsCharTraits.h"
    1.56 +#include "harfbuzz/hb.h"
    1.57 +
    1.58 +#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
    1.59 +#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
    1.60 +#define INC(sp,count) (MOD((sp) + (count)))
    1.61 +#define INC1(sp) (INC(sp, 1))
    1.62 +#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
    1.63 +#define DEC1(sp) (DEC(sp, 1))
    1.64 +#define STACK_IS_EMPTY() (pushCount <= 0)
    1.65 +#define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
    1.66 +#define TOP() (parenStack[parenSP])
    1.67 +#define SYNC_FIXUP() (fixupCount = 0)
    1.68 +
    1.69 +void
    1.70 +gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode)
    1.71 +{
    1.72 +    pushCount  = LIMIT_INC(pushCount);
    1.73 +    fixupCount = LIMIT_INC(fixupCount);
    1.74 +
    1.75 +    parenSP = INC1(parenSP);
    1.76 +    parenStack[parenSP].endPairChar = endPairChar;
    1.77 +    parenStack[parenSP].scriptCode = scriptCode;
    1.78 +}
    1.79 +
    1.80 +void
    1.81 +gfxScriptItemizer::pop()
    1.82 +{
    1.83 +    if (STACK_IS_EMPTY()) {
    1.84 +        return;
    1.85 +    }
    1.86 +
    1.87 +    if (fixupCount > 0) {
    1.88 +        fixupCount -= 1;
    1.89 +    }
    1.90 +
    1.91 +    pushCount -= 1;
    1.92 +    parenSP = DEC1(parenSP);
    1.93 +  
    1.94 +    /* If the stack is now empty, reset the stack
    1.95 +       pointers to their initial values.
    1.96 +     */
    1.97 +    if (STACK_IS_EMPTY()) {
    1.98 +        parenSP = -1;
    1.99 +    }
   1.100 +}
   1.101 +
   1.102 +void
   1.103 +gfxScriptItemizer::fixup(int32_t scriptCode)
   1.104 +{
   1.105 +    int32_t fixupSP = DEC(parenSP, fixupCount);
   1.106 +
   1.107 +    while (fixupCount-- > 0) {
   1.108 +        fixupSP = INC1(fixupSP);
   1.109 +        parenStack[fixupSP].scriptCode = scriptCode;
   1.110 +    }
   1.111 +}
   1.112 +
   1.113 +static inline bool
   1.114 +SameScript(int32_t runScript, int32_t currCharScript)
   1.115 +{
   1.116 +    return runScript <= MOZ_SCRIPT_INHERITED ||
   1.117 +           currCharScript <= MOZ_SCRIPT_INHERITED ||
   1.118 +           currCharScript == runScript;
   1.119 +}
   1.120 +
   1.121 +// Return whether the char has a mirrored-pair counterpart.
   1.122 +// NOTE that this depends on the implementation of nsCharProps records in
   1.123 +// nsUnicodeProperties, and may need to be updated if those structures change
   1.124 +static inline bool
   1.125 +HasMirroredChar(uint32_t aCh)
   1.126 +{
   1.127 +    return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
   1.128 +}
   1.129 +
   1.130 +gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length)
   1.131 +    : textPtr(src), textLength(length)
   1.132 +{
   1.133 +    reset();
   1.134 +}
   1.135 +
   1.136 +void
   1.137 +gfxScriptItemizer::SetText(const char16_t *src, uint32_t length)
   1.138 +{
   1.139 +    textPtr  = src;
   1.140 +    textLength = length;
   1.141 +
   1.142 +    reset();
   1.143 +}
   1.144 +
   1.145 +bool
   1.146 +gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
   1.147 +                        int32_t& aRunScript)
   1.148 +{
   1.149 +    /* if we've fallen off the end of the text, we're done */
   1.150 +    if (scriptLimit >= textLength) {
   1.151 +        return false;
   1.152 +    }
   1.153 +
   1.154 +    SYNC_FIXUP();
   1.155 +    scriptCode = MOZ_SCRIPT_COMMON;
   1.156 +
   1.157 +    for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
   1.158 +        uint32_t ch;
   1.159 +        int32_t sc;
   1.160 +        uint32_t startOfChar = scriptLimit;
   1.161 +
   1.162 +        ch = textPtr[scriptLimit];
   1.163 +
   1.164 +        /* decode UTF-16 (may be surrogate pair) */
   1.165 +        if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
   1.166 +            uint32_t low = textPtr[scriptLimit + 1];
   1.167 +            if (NS_IS_LOW_SURROGATE(low)) {
   1.168 +                ch = SURROGATE_TO_UCS4(ch, low);
   1.169 +                scriptLimit += 1;
   1.170 +            }
   1.171 +        }
   1.172 +
   1.173 +        // Get the nsCharProps2 record for the current character,
   1.174 +        // so we can read the script and (if needed) the gen category
   1.175 +        // without needing to do two multi-level lookups.
   1.176 +        // NOTE that this means we're relying on an implementation detail
   1.177 +        // of the nsUnicodeProperties tables, and might have to revise this
   1.178 +        // if the nsCharProps records used there are modified in future.
   1.179 +        const nsCharProps2& charProps = GetCharProps2(ch);
   1.180 +
   1.181 +        // Initialize gc to UNASSIGNED; we'll only set it to the true GC
   1.182 +        // if the character has script=COMMON, otherwise we don't care.
   1.183 +        uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
   1.184 +
   1.185 +        sc = charProps.mScriptCode;
   1.186 +        if (sc == MOZ_SCRIPT_COMMON) {
   1.187 +            /*
   1.188 +             * Paired character handling:
   1.189 +             *
   1.190 +             * if it's an open character, push it onto the stack.
   1.191 +             * if it's a close character, find the matching open on the
   1.192 +             * stack, and use that script code. Any non-matching open
   1.193 +             * characters above it on the stack will be popped.
   1.194 +             *
   1.195 +             * We only do this if the script is COMMON; for chars with
   1.196 +             * specific script assignments, we just use them as-is.
   1.197 +             */
   1.198 +            gc = charProps.mCategory;
   1.199 +            if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
   1.200 +                uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
   1.201 +                if (endPairChar != ch) {
   1.202 +                    push(endPairChar, scriptCode);
   1.203 +                }
   1.204 +            } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
   1.205 +                HasMirroredChar(ch))
   1.206 +            {
   1.207 +                while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
   1.208 +                    pop();
   1.209 +                }
   1.210 +
   1.211 +                if (STACK_IS_NOT_EMPTY()) {
   1.212 +                    sc = TOP().scriptCode;
   1.213 +                }
   1.214 +            }
   1.215 +        }
   1.216 +
   1.217 +        if (SameScript(scriptCode, sc)) {
   1.218 +            if (scriptCode <= MOZ_SCRIPT_INHERITED &&
   1.219 +                sc > MOZ_SCRIPT_INHERITED)
   1.220 +            {
   1.221 +                scriptCode = sc;
   1.222 +                fixup(scriptCode);
   1.223 +            }
   1.224 +
   1.225 +            /*
   1.226 +             * if this character is a close paired character,
   1.227 +             * pop the matching open character from the stack
   1.228 +             */
   1.229 +            if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
   1.230 +                HasMirroredChar(ch)) {
   1.231 +                pop();
   1.232 +            }
   1.233 +        } else {
   1.234 +            /*
   1.235 +             * reset scriptLimit in case it was advanced during reading a
   1.236 +             * multiple-code-unit character
   1.237 +             */
   1.238 +            scriptLimit = startOfChar;
   1.239 +
   1.240 +            break;
   1.241 +        }
   1.242 +    }
   1.243 +
   1.244 +    aRunStart = scriptStart;
   1.245 +    aRunLimit = scriptLimit;
   1.246 +    aRunScript = scriptCode;
   1.247 +
   1.248 +    return true;
   1.249 +}

mercurial