gfx/thebes/gfxScriptItemizer.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /*
     7  * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
     8  * for use within Mozilla Gecko, separate from a standard ICU build.
     9  *
    10  * The original ICU license of the code follows:
    11  *
    12  * ICU License - ICU 1.8.1 and later
    13  *
    14  * COPYRIGHT AND PERMISSION NOTICE
    15  * 
    16  * Copyright (c) 1995-2009 International Business Machines Corporation and
    17  * others
    18  *
    19  * All rights reserved.
    20  *
    21  * Permission is hereby granted, free of charge, to any person obtaining a
    22  * copy of this software and associated documentation files (the "Software"),
    23  * to deal in the Software without restriction, including without limitation
    24  * the rights to use, copy, modify, merge, publish, distribute, and/or sell
    25  * copies of the Software, and to permit persons to whom the Software is
    26  * furnished to do so, provided that the above copyright notice(s) and this
    27  * permission notice appear in all copies of the Software and that both the
    28  * above copyright notice(s) and this permission notice appear in supporting
    29  * documentation.
    30  *
    31  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    32  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    33  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
    34  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
    35  * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
    36  * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
    37  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
    38  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    39  * SOFTWARE.
    40  *
    41  * Except as contained in this notice, the name of a copyright holder shall
    42  * not be used in advertising or otherwise to promote the sale, use or other
    43  * dealings in this Software without prior written authorization of the
    44  * copyright holder.
    45  *
    46  * All trademarks and registered trademarks mentioned herein are the property
    47  * of their respective owners. 
    48  */
    50 #include "gfxScriptItemizer.h"
    51 #include "nsUnicodeProperties.h"
    52 #include "nsCharTraits.h"
    53 #include "harfbuzz/hb.h"
    55 #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
    56 #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
    57 #define INC(sp,count) (MOD((sp) + (count)))
    58 #define INC1(sp) (INC(sp, 1))
    59 #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
    60 #define DEC1(sp) (DEC(sp, 1))
    61 #define STACK_IS_EMPTY() (pushCount <= 0)
    62 #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
    63 #define TOP() (parenStack[parenSP])
    64 #define SYNC_FIXUP() (fixupCount = 0)
    66 void
    67 gfxScriptItemizer::push(uint32_t endPairChar, int32_t scriptCode)
    68 {
    69     pushCount  = LIMIT_INC(pushCount);
    70     fixupCount = LIMIT_INC(fixupCount);
    72     parenSP = INC1(parenSP);
    73     parenStack[parenSP].endPairChar = endPairChar;
    74     parenStack[parenSP].scriptCode = scriptCode;
    75 }
    77 void
    78 gfxScriptItemizer::pop()
    79 {
    80     if (STACK_IS_EMPTY()) {
    81         return;
    82     }
    84     if (fixupCount > 0) {
    85         fixupCount -= 1;
    86     }
    88     pushCount -= 1;
    89     parenSP = DEC1(parenSP);
    91     /* If the stack is now empty, reset the stack
    92        pointers to their initial values.
    93      */
    94     if (STACK_IS_EMPTY()) {
    95         parenSP = -1;
    96     }
    97 }
    99 void
   100 gfxScriptItemizer::fixup(int32_t scriptCode)
   101 {
   102     int32_t fixupSP = DEC(parenSP, fixupCount);
   104     while (fixupCount-- > 0) {
   105         fixupSP = INC1(fixupSP);
   106         parenStack[fixupSP].scriptCode = scriptCode;
   107     }
   108 }
   110 static inline bool
   111 SameScript(int32_t runScript, int32_t currCharScript)
   112 {
   113     return runScript <= MOZ_SCRIPT_INHERITED ||
   114            currCharScript <= MOZ_SCRIPT_INHERITED ||
   115            currCharScript == runScript;
   116 }
   118 // Return whether the char has a mirrored-pair counterpart.
   119 // NOTE that this depends on the implementation of nsCharProps records in
   120 // nsUnicodeProperties, and may need to be updated if those structures change
   121 static inline bool
   122 HasMirroredChar(uint32_t aCh)
   123 {
   124     return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
   125 }
   127 gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length)
   128     : textPtr(src), textLength(length)
   129 {
   130     reset();
   131 }
   133 void
   134 gfxScriptItemizer::SetText(const char16_t *src, uint32_t length)
   135 {
   136     textPtr  = src;
   137     textLength = length;
   139     reset();
   140 }
   142 bool
   143 gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
   144                         int32_t& aRunScript)
   145 {
   146     /* if we've fallen off the end of the text, we're done */
   147     if (scriptLimit >= textLength) {
   148         return false;
   149     }
   151     SYNC_FIXUP();
   152     scriptCode = MOZ_SCRIPT_COMMON;
   154     for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
   155         uint32_t ch;
   156         int32_t sc;
   157         uint32_t startOfChar = scriptLimit;
   159         ch = textPtr[scriptLimit];
   161         /* decode UTF-16 (may be surrogate pair) */
   162         if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
   163             uint32_t low = textPtr[scriptLimit + 1];
   164             if (NS_IS_LOW_SURROGATE(low)) {
   165                 ch = SURROGATE_TO_UCS4(ch, low);
   166                 scriptLimit += 1;
   167             }
   168         }
   170         // Get the nsCharProps2 record for the current character,
   171         // so we can read the script and (if needed) the gen category
   172         // without needing to do two multi-level lookups.
   173         // NOTE that this means we're relying on an implementation detail
   174         // of the nsUnicodeProperties tables, and might have to revise this
   175         // if the nsCharProps records used there are modified in future.
   176         const nsCharProps2& charProps = GetCharProps2(ch);
   178         // Initialize gc to UNASSIGNED; we'll only set it to the true GC
   179         // if the character has script=COMMON, otherwise we don't care.
   180         uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
   182         sc = charProps.mScriptCode;
   183         if (sc == MOZ_SCRIPT_COMMON) {
   184             /*
   185              * Paired character handling:
   186              *
   187              * if it's an open character, push it onto the stack.
   188              * if it's a close character, find the matching open on the
   189              * stack, and use that script code. Any non-matching open
   190              * characters above it on the stack will be popped.
   191              *
   192              * We only do this if the script is COMMON; for chars with
   193              * specific script assignments, we just use them as-is.
   194              */
   195             gc = charProps.mCategory;
   196             if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
   197                 uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
   198                 if (endPairChar != ch) {
   199                     push(endPairChar, scriptCode);
   200                 }
   201             } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
   202                 HasMirroredChar(ch))
   203             {
   204                 while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
   205                     pop();
   206                 }
   208                 if (STACK_IS_NOT_EMPTY()) {
   209                     sc = TOP().scriptCode;
   210                 }
   211             }
   212         }
   214         if (SameScript(scriptCode, sc)) {
   215             if (scriptCode <= MOZ_SCRIPT_INHERITED &&
   216                 sc > MOZ_SCRIPT_INHERITED)
   217             {
   218                 scriptCode = sc;
   219                 fixup(scriptCode);
   220             }
   222             /*
   223              * if this character is a close paired character,
   224              * pop the matching open character from the stack
   225              */
   226             if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
   227                 HasMirroredChar(ch)) {
   228                 pop();
   229             }
   230         } else {
   231             /*
   232              * reset scriptLimit in case it was advanced during reading a
   233              * multiple-code-unit character
   234              */
   235             scriptLimit = startOfChar;
   237             break;
   238         }
   239     }
   241     aRunStart = scriptStart;
   242     aRunLimit = scriptLimit;
   243     aRunScript = scriptCode;
   245     return true;
   246 }

mercurial