The Tor Browser: layout/generic/nsTextRunTransformations.cpp@b8a032363ba2 (annotated)

layout/generic/nsTextRunTransformations.cpp@b8a032363ba2 (annotated)

layout/generic/nsTextRunTransformations.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Thu, 22 Jan 2015 13:21:57 +0100
branch: TOR_BUG_9701
changeset 15: b8a032363ba2
permissions: -rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "nsTextRunTransformations.h"
 #include "mozilla/MemoryReporting.h"
 #include "nsGkAtoms.h"
 #include "nsStyleConsts.h"
 #include "nsStyleContext.h"
 #include "nsUnicodeProperties.h"
 #include "nsSpecialCasingData.h"
 #include "mozilla/gfx/2D.h"
 #include "nsTextFrameUtils.h"
 #include "nsIPersistentProperties2.h"
 #include "nsNetUtil.h"
 // Unicode characters needing special casing treatment in tr/az languages
 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
 #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131
 // Greek sigma needs custom handling for the lowercase transform; for details
 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
 #define GREEK_CAPITAL_LETTER_SIGMA             0x03A3
 #define GREEK_SMALL_LETTER_FINAL_SIGMA         0x03C2
 #define GREEK_SMALL_LETTER_SIGMA               0x03C3
 // Custom uppercase mapping for Greek; see bug 307039 for details
 #define GREEK_LOWER_ALPHA                      0x03B1
 #define GREEK_LOWER_ALPHA_TONOS                0x03AC
 #define GREEK_LOWER_ALPHA_OXIA                 0x1F71
 #define GREEK_LOWER_EPSILON                    0x03B5
 #define GREEK_LOWER_EPSILON_TONOS              0x03AD
 #define GREEK_LOWER_EPSILON_OXIA               0x1F73
 #define GREEK_LOWER_ETA                        0x03B7
 #define GREEK_LOWER_ETA_TONOS                  0x03AE
 #define GREEK_LOWER_ETA_OXIA                   0x1F75
 #define GREEK_LOWER_IOTA                       0x03B9
 #define GREEK_LOWER_IOTA_TONOS                 0x03AF
 #define GREEK_LOWER_IOTA_OXIA                  0x1F77
 #define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
 #define GREEK_LOWER_OMICRON                    0x03BF
 #define GREEK_LOWER_OMICRON_TONOS              0x03CC
 #define GREEK_LOWER_OMICRON_OXIA               0x1F79
 #define GREEK_LOWER_UPSILON                    0x03C5
 #define GREEK_LOWER_UPSILON_TONOS              0x03CD
 #define GREEK_LOWER_UPSILON_OXIA               0x1F7B
 #define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
 #define GREEK_LOWER_OMEGA                      0x03C9
 #define GREEK_LOWER_OMEGA_TONOS                0x03CE
 #define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
 #define GREEK_UPPER_ALPHA                      0x0391
 #define GREEK_UPPER_EPSILON                    0x0395
 #define GREEK_UPPER_ETA                        0x0397
 #define GREEK_UPPER_IOTA                       0x0399
 #define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
 #define GREEK_UPPER_OMICRON                    0x039F
 #define GREEK_UPPER_UPSILON                    0x03A5
 #define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
 #define GREEK_UPPER_OMEGA                      0x03A9
 #define GREEK_UPPER_ALPHA_TONOS                0x0386
 #define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
 #define GREEK_UPPER_EPSILON_TONOS              0x0388
 #define GREEK_UPPER_EPSILON_OXIA               0x1FC9
 #define GREEK_UPPER_ETA_TONOS                  0x0389
 #define GREEK_UPPER_ETA_OXIA                   0x1FCB
 #define GREEK_UPPER_IOTA_TONOS                 0x038A
 #define GREEK_UPPER_IOTA_OXIA                  0x1FDB
 #define GREEK_UPPER_OMICRON_TONOS              0x038C
 #define GREEK_UPPER_OMICRON_OXIA               0x1FF9
 #define GREEK_UPPER_UPSILON_TONOS              0x038E
 #define GREEK_UPPER_UPSILON_OXIA               0x1FEB
 #define GREEK_UPPER_OMEGA_TONOS                0x038F
 #define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
 #define COMBINING_ACUTE_ACCENT                 0x0301
 #define COMBINING_DIAERESIS                    0x0308
 #define COMBINING_ACUTE_TONE_MARK              0x0341
 #define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
 // When doing an Uppercase transform in Greek, we need to keep track of the
 // current state while iterating through the string, to recognize and process
 // diphthongs correctly. For clarity, we define a state for each vowel and
 // each vowel with accent, although a few of these do not actually need any
 // special treatment and could be folded into kStart.
 enum GreekCasingState {
   kStart,
   kAlpha,
   kEpsilon,
   kEta,
   kIota,
   kOmicron,
   kUpsilon,
   kOmega,
   kAlphaAcc,
   kEpsilonAcc,
   kEtaAcc,
   kIotaAcc,
   kOmicronAcc,
   kUpsilonAcc,
   kOmegaAcc,
   kOmicronUpsilon,
   kDiaeresis
 };
 static uint32_t
 GreekUpperCase(uint32_t aCh, GreekCasingState* aState)
 {
   switch (aCh) {
   case GREEK_UPPER_ALPHA:
   case GREEK_LOWER_ALPHA:
     *aState = kAlpha;
     return GREEK_UPPER_ALPHA;
   case GREEK_UPPER_EPSILON:
   case GREEK_LOWER_EPSILON:
     *aState = kEpsilon;
     return GREEK_UPPER_EPSILON;
   case GREEK_UPPER_ETA:
   case GREEK_LOWER_ETA:
     *aState = kEta;
     return GREEK_UPPER_ETA;
   case GREEK_UPPER_IOTA:
     *aState = kIota;
     return GREEK_UPPER_IOTA;
   case GREEK_UPPER_OMICRON:
   case GREEK_LOWER_OMICRON:
     *aState = kOmicron;
     return GREEK_UPPER_OMICRON;
   case GREEK_UPPER_UPSILON:
     switch (*aState) {
     case kOmicron:
       *aState = kOmicronUpsilon;
       break;
     default:
       *aState = kUpsilon;
       break;
     }
     return GREEK_UPPER_UPSILON;
   case GREEK_UPPER_OMEGA:
   case GREEK_LOWER_OMEGA:
     *aState = kOmega;
     return GREEK_UPPER_OMEGA;
   // iota and upsilon may be the second vowel of a diphthong
   case GREEK_LOWER_IOTA:
     switch (*aState) {
     case kAlphaAcc:
     case kEpsilonAcc:
     case kOmicronAcc:
     case kUpsilonAcc:
       *aState = kStart;
       return GREEK_UPPER_IOTA_DIALYTIKA;
     default:
       break;
     }
     *aState = kIota;
     return GREEK_UPPER_IOTA;
   case GREEK_LOWER_UPSILON:
     switch (*aState) {
     case kAlphaAcc:
     case kEpsilonAcc:
     case kEtaAcc:
     case kOmicronAcc:
       *aState = kStart;
       return GREEK_UPPER_UPSILON_DIALYTIKA;
     case kOmicron:
       *aState = kOmicronUpsilon;
       break;
     default:
       *aState = kUpsilon;
       break;
     }
     return GREEK_UPPER_UPSILON;
   case GREEK_UPPER_IOTA_DIALYTIKA:
   case GREEK_LOWER_IOTA_DIALYTIKA:
   case GREEK_UPPER_UPSILON_DIALYTIKA:
   case GREEK_LOWER_UPSILON_DIALYTIKA:
   case COMBINING_DIAERESIS:
     *aState = kDiaeresis;
     return ToUpperCase(aCh);
   // remove accent if it follows a vowel or diaeresis,
   // and set appropriate state for diphthong detection
   case COMBINING_ACUTE_ACCENT:
   case COMBINING_ACUTE_TONE_MARK:
     switch (*aState) {
     case kAlpha:
       *aState = kAlphaAcc;
       return uint32_t(-1); // omit this char from result string
     case kEpsilon:
       *aState = kEpsilonAcc;
       return uint32_t(-1);
     case kEta:
       *aState = kEtaAcc;
       return uint32_t(-1);
     case kIota:
       *aState = kIotaAcc;
       return uint32_t(-1);
     case kOmicron:
       *aState = kOmicronAcc;
       return uint32_t(-1);
     case kUpsilon:
       *aState = kUpsilonAcc;
       return uint32_t(-1);
     case kOmicronUpsilon:
       *aState = kStart; // this completed a diphthong
       return uint32_t(-1);
     case kOmega:
       *aState = kOmegaAcc;
       return uint32_t(-1);
     case kDiaeresis:
       *aState = kStart;
       return uint32_t(-1);
     default:
       break;
     }
     break;
   // combinations with dieresis+accent just strip the accent,
   // and reset to start state (don't form diphthong with following vowel)
   case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
   case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
     *aState = kStart;
     return GREEK_UPPER_IOTA_DIALYTIKA;
   case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
   case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
     *aState = kStart;
     return GREEK_UPPER_UPSILON_DIALYTIKA;
   case COMBINING_GREEK_DIALYTIKA_TONOS:
     *aState = kStart;
     return COMBINING_DIAERESIS;
   // strip accents from vowels, and note the vowel seen so that we can detect
   // diphthongs where diaeresis needs to be added
   case GREEK_LOWER_ALPHA_TONOS:
   case GREEK_LOWER_ALPHA_OXIA:
   case GREEK_UPPER_ALPHA_TONOS:
   case GREEK_UPPER_ALPHA_OXIA:
     *aState = kAlphaAcc;
     return GREEK_UPPER_ALPHA;
   case GREEK_LOWER_EPSILON_TONOS:
   case GREEK_LOWER_EPSILON_OXIA:
   case GREEK_UPPER_EPSILON_TONOS:
   case GREEK_UPPER_EPSILON_OXIA:
     *aState = kEpsilonAcc;
     return GREEK_UPPER_EPSILON;
   case GREEK_LOWER_ETA_TONOS:
   case GREEK_LOWER_ETA_OXIA:
   case GREEK_UPPER_ETA_TONOS:
   case GREEK_UPPER_ETA_OXIA:
     *aState = kEtaAcc;
     return GREEK_UPPER_ETA;
   case GREEK_LOWER_IOTA_TONOS:
   case GREEK_LOWER_IOTA_OXIA:
   case GREEK_UPPER_IOTA_TONOS:
   case GREEK_UPPER_IOTA_OXIA:
     *aState = kIotaAcc;
     return GREEK_UPPER_IOTA;
   case GREEK_LOWER_OMICRON_TONOS:
   case GREEK_LOWER_OMICRON_OXIA:
   case GREEK_UPPER_OMICRON_TONOS:
   case GREEK_UPPER_OMICRON_OXIA:
     *aState = kOmicronAcc;
     return GREEK_UPPER_OMICRON;
   case GREEK_LOWER_UPSILON_TONOS:
   case GREEK_LOWER_UPSILON_OXIA:
   case GREEK_UPPER_UPSILON_TONOS:
   case GREEK_UPPER_UPSILON_OXIA:
     switch (*aState) {
     case kOmicron:
       *aState = kStart; // this completed a diphthong
       break;
     default:
       *aState = kUpsilonAcc;
       break;
     }
     return GREEK_UPPER_UPSILON;
   case GREEK_LOWER_OMEGA_TONOS:
   case GREEK_LOWER_OMEGA_OXIA:
   case GREEK_UPPER_OMEGA_TONOS:
   case GREEK_UPPER_OMEGA_OXIA:
     *aState = kOmegaAcc;
     return GREEK_UPPER_OMEGA;
   }
   // all other characters just reset the state, and use standard mappings
   *aState = kStart;
   return ToUpperCase(aCh);
 }
 nsTransformedTextRun *
 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,
                              nsTransformingTextRunFactory* aFactory,
                              gfxFontGroup* aFontGroup,
                              const char16_t* aString, uint32_t aLength,
                              const uint32_t aFlags, nsStyleContext** aStyles,
                              bool aOwnsFactory)
 {
   NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),
                "didn't expect text to be marked as 8-bit here");
   void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
   if (!storage) {
     return nullptr;
   }
   return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup,
                                             aString, aLength,
                                             aFlags, aStyles, aOwnsFactory);
 }
 void
 nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
                                         bool* aCapitalization,
                                         gfxContext* aRefContext)
 {
   if (mCapitalize.IsEmpty()) {
     if (!mCapitalize.AppendElements(GetLength()))
       return;
     memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool));
   }
   memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool));
   mNeedsRebuild = true;
 }
 bool
 nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength,
                                              uint8_t* aBreakBefore,
                                              gfxContext* aRefContext)
 {
   bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength,
       aBreakBefore, aRefContext);
   if (changed) {
     mNeedsRebuild = true;
   }
   return changed;
 }
 size_t
 nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
 {
   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
   total += mStyles.SizeOfExcludingThis(aMallocSizeOf);
   total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf);
   if (mOwnsFactory) {
     total += aMallocSizeOf(mFactory);
   }
   return total;
 }
 size_t
 nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
 {
   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
 }
 nsTransformedTextRun*
 nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength,
                                           const gfxTextRunFactory::Parameters* aParams,
                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
                                           nsStyleContext** aStyles, bool aOwnsFactory)
 {
   return nsTransformedTextRun::Create(aParams, this, aFontGroup,
                                       aString, aLength, aFlags, aStyles, aOwnsFactory);
 }
 nsTransformedTextRun*
 nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength,
                                           const gfxTextRunFactory::Parameters* aParams,
                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
                                           nsStyleContext** aStyles, bool aOwnsFactory)
 {
   // We'll only have a Unicode code path to minimize the amount of code needed
   // for these rarely used features
   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength);
   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
                      aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT),
                      aStyles, aOwnsFactory);
 }
 void
 MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
                          const bool* aCharsToMerge, const bool* aDeletedChars)
 {
   aDest->ResetGlyphRuns();
   gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());
   uint32_t offset = 0;
   nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;
   while (iter.NextRun()) {
     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,
                                      offset, false);
     if (NS_FAILED(rv))
       return;
     bool anyMissing = false;
     uint32_t mergeRunStart = iter.GetStringStart();
     const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
     uint32_t stringEnd = iter.GetStringEnd();
     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
       if (g.IsSimpleGlyph()) {
         if (!anyMissing) {
           gfxTextRun::DetailedGlyph details;
           details.mGlyphID = g.GetSimpleGlyph();
           details.mAdvance = g.GetSimpleAdvance();
           details.mXOffset = 0;
           details.mYOffset = 0;
           glyphs.AppendElement(details);
         }
       } else {
         if (g.IsMissing()) {
           anyMissing = true;
           glyphs.Clear();
         }
         if (g.GetGlyphCount() > 0) {
           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
         }
       }
       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
         // next char is supposed to merge with current, so loop without
         // writing current merged glyph to the destination
         continue;
       }
       // If the start of the merge run is actually a character that should
       // have been merged with the previous character (this can happen
       // if there's a font change in the middle of a case-mapped character,
       // that decomposed into a sequence of base+diacritics, for example),
       // just discard the entire merge run. See comment at start of this
       // function.
       NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
                        "unable to merge across a glyph run boundary, "
                        "glyph(s) discarded");
       if (!aCharsToMerge[mergeRunStart]) {
         if (anyMissing) {
           mergedGlyph.SetMissing(glyphs.Length());
         } else {
           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
                                  mergedGlyph.IsLigatureGroupStart(),
                                  glyphs.Length());
         }
         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
         ++offset;
         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
           aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);
         }
       }
       glyphs.Clear();
       anyMissing = false;
       mergeRunStart = k + 1;
       if (mergeRunStart < stringEnd) {
         mergedGlyph = srcGlyphs[mergeRunStart];
       }
     }
     NS_ASSERTION(glyphs.Length() == 0,
                  "Leftover glyphs, don't request merging of the last character with its next!");
   }
   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
 }
 gfxTextRunFactory::Parameters
 GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags,
     gfxContext* aRefContext)
 {
   gfxTextRunFactory::Parameters params =
     { aRefContext, nullptr, nullptr,
       nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()
     };
   *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT;
   return params;
 }
 void
 nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
     gfxContext* aRefContext)
 {
   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
   gfxFontStyle fontStyle = *fontGroup->GetStyle();
   fontStyle.size *= 0.8;
   nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
   if (!smallFont)
     return;
   uint32_t flags;
   gfxTextRunFactory::Parameters innerParams =
       GetParametersForInner(aTextRun, &flags, aRefContext);
   uint32_t length = aTextRun->GetLength();
   const char16_t* str = aTextRun->mString.BeginReading();
   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
   // Create a textrun so we can check cluster-start properties
   nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
   if (!inner.get())
     return;
   nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);
   aTextRun->ResetGlyphRuns();
   uint32_t runStart = 0;
   nsAutoTArray<nsStyleContext*,50> styleArray;
   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
   enum RunCaseState {
     kUpperOrCaseless, // will be untouched by font-variant:small-caps
     kLowercase,       // will be uppercased and reduced
     kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
   };
   RunCaseState runCase = kUpperOrCaseless;
   // Note that this loop runs from 0 to length *inclusive*, so the last
   // iteration is in effect beyond the end of the input text, to give a
   // chance to finish the last casing run we've found.
   // The last iteration, when i==length, must not attempt to look at the
   // character position [i] or the style data for styles[i], as this would
   // be beyond the valid length of the textrun or its style array.
   for (uint32_t i = 0; i <= length; ++i) {
     RunCaseState chCase = kUpperOrCaseless;
     // Unless we're at the end, figure out what treatment the current
     // character will need.
     if (i < length) {
       nsStyleContext* styleContext = styles[i];
       // Characters that aren't the start of a cluster are ignored here. They
       // get added to whatever lowercase/non-lowercase run we're in.
       if (!inner->IsClusterStart(i)) {
         chCase = runCase;
       } else {
         if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
           uint32_t ch = str[i];
           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
           }
           uint32_t ch2 = ToUpperCase(ch);
           if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
             chCase = kLowercase;
           } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) {
             // In Greek, check for characters that will be modified by the
             // GreekUpperCase mapping - this catches accented capitals where
             // the accent is to be removed (bug 307039). These are handled by
             // a transformed child run using the full-size font.
             GreekCasingState state = kStart; // don't need exact context here
             ch2 = GreekUpperCase(ch, &state);
             if (ch != ch2) {
               chCase = kSpecialUpper;
             }
           }
         } else {
           // Don't transform the character! I.e., pretend that it's not lowercase
         }
       }
     }
     // At the end of the text, or when the current character needs different
     // casing treatment from the current run, finish the run-in-progress
     // and prepare to accumulate a new run.
     // Note that we do not look at any source data for offset [i] here,
     // as that would be invalid in the case where i==length.
     if ((i == length || runCase != chCase) && runStart < i) {
       nsAutoPtr<nsTransformedTextRun> transformedChild;
       nsAutoPtr<gfxTextRun> cachedChild;
       gfxTextRun* child;
       switch (runCase) {
       case kUpperOrCaseless:
         cachedChild =
           fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
                                  flags);
         child = cachedChild.get();
         break;
       case kLowercase:
         transformedChild =
           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                        &innerParams, smallFont, flags,
                                        styleArray.Elements(), false);
         child = transformedChild;
         break;
       case kSpecialUpper:
         transformedChild =
           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
                                        &innerParams, fontGroup, flags,
                                        styleArray.Elements(), false);
         child = transformedChild;
         break;
       }
       if (!child)
         return;
       // Copy potential linebreaks into child so they're preserved
       // (and also child will be shaped appropriately)
       NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
                    "lost some break-before values?");
       child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
           canBreakBeforeArray.Elements(), aRefContext);
       if (transformedChild) {
         transformedChild->FinishSettingProperties(aRefContext);
       }
       aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);
       runStart = i;
       styleArray.Clear();
       canBreakBeforeArray.Clear();
     }
     if (i < length) {
       runCase = chCase;
       styleArray.AppendElement(styles[i]);
       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
     }
   }
 }
 void
 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
     gfxContext* aRefContext)
 {
   uint32_t length = aTextRun->GetLength();
   const char16_t* str = aTextRun->mString.BeginReading();
   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
   nsAutoString convertedString;
   nsAutoTArray<bool,50> charsToMergeArray;
   nsAutoTArray<bool,50> deletedCharsArray;
   nsAutoTArray<nsStyleContext*,50> styleArray;
   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
   bool mergeNeeded = false;
   // Some languages have special casing conventions that differ from the
   // default Unicode mappings.
   // The enum values here are named for well-known exemplar languages that
   // exhibit the behavior in question; multiple lang tags may map to the
   // same setting here, if the behavior is shared by other languages.
   enum {
     eNone,    // default non-lang-specific behavior
     eTurkish, // preserve dotted/dotless-i distinction in uppercase
     eDutch,   // treat "ij" digraph as a unit for capitalization
     eGreek    // strip accent when uppercasing Greek vowels
   } languageSpecificCasing = eNone;
   const nsIAtom* lang = nullptr;
   bool capitalizeDutchIJ = false;
   bool prevIsLetter = false;
   uint32_t sigmaIndex = uint32_t(-1);
   nsIUGenCategory::nsUGenCategory cat;
   GreekCasingState greekState = kStart;
   uint32_t i;
   for (i = 0; i < length; ++i) {
     uint32_t ch = str[i];
     nsStyleContext* styleContext = styles[i];
     uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
       : styleContext->StyleText()->mTextTransform;
     int extraChars = 0;
     const mozilla::unicode::MultiCharMapping *mcm;
     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
     }
     if (lang != styleContext->StyleFont()->mLanguage) {
       lang = styleContext->StyleFont()->mLanguage;
       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
           lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
           lang == nsGkAtoms::tt) {
         languageSpecificCasing = eTurkish;
       } else if (lang == nsGkAtoms::nl) {
         languageSpecificCasing = eDutch;
       } else if (lang == nsGkAtoms::el) {
         languageSpecificCasing = eGreek;
         greekState = kStart;
       } else {
         languageSpecificCasing = eNone;
       }
     }
     switch (style) {
     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
       if (languageSpecificCasing == eTurkish) {
         if (ch == 'I') {
           ch = LATIN_SMALL_LETTER_DOTLESS_I;
           prevIsLetter = true;
           sigmaIndex = uint32_t(-1);
           break;
         }
         if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
           ch = 'i';
           prevIsLetter = true;
           sigmaIndex = uint32_t(-1);
           break;
         }
       }
       // Special lowercasing behavior for Greek Sigma: note that this is listed
       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
       // language-specific mapping; it applies regardless of the language of
       // the element.
       //
       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
       // the non-final form) whenever there is a following letter, or when the
       // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
       // LETTER); and to FINAL SIGMA when it is preceded by another letter but
       // not followed by one.
       //
       // To implement the context-sensitive nature of this mapping, we keep
       // track of whether the previous character was a letter. If not, CAPITAL
       // SIGMA will map directly to SMALL SIGMA. If the previous character
       // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
       // position in the converted string; if we then encounter another letter,
       // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
       cat = mozilla::unicode::GetGenCategory(ch);
       // If sigmaIndex is not -1, it marks where we have provisionally mapped
       // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
       // need to change it to SMALL SIGMA.
       if (sigmaIndex != uint32_t(-1)) {
         if (cat == nsIUGenCategory::kLetter) {
           convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
         }
       }
       if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
         // If preceding char was a letter, map to FINAL instead of SMALL,
         // and note where it occurred by setting sigmaIndex; we'll change it
         // to standard SMALL SIGMA later if another letter follows
         if (prevIsLetter) {
           ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
           sigmaIndex = convertedString.Length();
         } else {
           // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
           // to SMALL SIGMA
           ch = GREEK_SMALL_LETTER_SIGMA;
           sigmaIndex = uint32_t(-1);
         }
         prevIsLetter = true;
         break;
       }
       // ignore diacritics for the purpose of contextual sigma mapping;
       // otherwise, reset prevIsLetter appropriately and clear the
       // sigmaIndex marker
       if (cat != nsIUGenCategory::kMark) {
         prevIsLetter = (cat == nsIUGenCategory::kLetter);
         sigmaIndex = uint32_t(-1);
       }
       mcm = mozilla::unicode::SpecialLower(ch);
       if (mcm) {
         int j = 0;
         while (j < 2 && mcm->mMappedChars[j + 1]) {
           convertedString.Append(mcm->mMappedChars[j]);
           ++extraChars;
           ++j;
         }
         ch = mcm->mMappedChars[j];
         break;
       }
       ch = ToLowerCase(ch);
       break;
     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
       if (languageSpecificCasing == eTurkish && ch == 'i') {
         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
         break;
       }
       if (languageSpecificCasing == eGreek) {
         ch = GreekUpperCase(ch, &greekState);
         break;
       }
       mcm = mozilla::unicode::SpecialUpper(ch);
       if (mcm) {
         int j = 0;
         while (j < 2 && mcm->mMappedChars[j + 1]) {
           convertedString.Append(mcm->mMappedChars[j]);
           ++extraChars;
           ++j;
         }
         ch = mcm->mMappedChars[j];
         break;
       }
       ch = ToUpperCase(ch);
       break;
     case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
       if (capitalizeDutchIJ && ch == 'j') {
         ch = 'J';
         capitalizeDutchIJ = false;
         break;
       }
       capitalizeDutchIJ = false;
       if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
         if (languageSpecificCasing == eTurkish && ch == 'i') {
           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
           break;
         }
         if (languageSpecificCasing == eDutch && ch == 'i') {
           ch = 'I';
           capitalizeDutchIJ = true;
           break;
         }
         mcm = mozilla::unicode::SpecialTitle(ch);
         if (mcm) {
           int j = 0;
           while (j < 2 && mcm->mMappedChars[j + 1]) {
             convertedString.Append(mcm->mMappedChars[j]);
             ++extraChars;
             ++j;
           }
           ch = mcm->mMappedChars[j];
           break;
         }
         ch = ToTitleCase(ch);
       }
       break;
     case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
       ch = mozilla::unicode::GetFullWidth(ch);
       break;
     default:
       break;
     }
     if (ch == uint32_t(-1)) {
       deletedCharsArray.AppendElement(true);
       mergeNeeded = true;
     } else {
       deletedCharsArray.AppendElement(false);
       charsToMergeArray.AppendElement(false);
       styleArray.AppendElement(styleContext);
       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
       if (IS_IN_BMP(ch)) {
         convertedString.Append(ch);
       } else {
         convertedString.Append(H_SURROGATE(ch));
         convertedString.Append(L_SURROGATE(ch));
         ++i;
         deletedCharsArray.AppendElement(true); // not exactly deleted, but the
                                                // trailing surrogate is skipped
         ++extraChars;
       }
       while (extraChars-- > 0) {
         mergeNeeded = true;
         charsToMergeArray.AppendElement(true);
         styleArray.AppendElement(styleContext);
         canBreakBeforeArray.AppendElement(false);
       }
     }
   }
   uint32_t flags;
   gfxTextRunFactory::Parameters innerParams =
       GetParametersForInner(aTextRun, &flags, aRefContext);
   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
   nsAutoPtr<nsTransformedTextRun> transformedChild;
   nsAutoPtr<gfxTextRun> cachedChild;
   gfxTextRun* child;
   if (mInnerTransformingTextRunFactory) {
     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
         convertedString.BeginReading(), convertedString.Length(),
         &innerParams, fontGroup, flags, styleArray.Elements(), false);
     child = transformedChild.get();
   } else {
     cachedChild = fontGroup->MakeTextRun(
         convertedString.BeginReading(), convertedString.Length(),
         &innerParams, flags);
     child = cachedChild.get();
   }
   if (!child)
     return;
   // Copy potential linebreaks into child so they're preserved
   // (and also child will be shaped appropriately)
   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
                "Dropped characters or break-before values somewhere!");
   child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
       canBreakBeforeArray.Elements(), aRefContext);
   if (transformedChild) {
     transformedChild->FinishSettingProperties(aRefContext);
   }
   if (mergeNeeded) {
     // Now merge multiple characters into one multi-glyph character as required
     // and deal with skipping deleted accent chars
     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
                  "source length mismatch");
     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
                  "destination length mismatch");
     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
                              deletedCharsArray.Elements());
   } else {
     // No merging to do, so just copy; this produces a more optimized textrun.
     // We can't steal the data because the child may be cached and stealing
     // the data would break the cache.
     aTextRun->ResetGlyphRuns();
     aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
   }
 }

The Tor Browser / annotate

layout/generic/nsTextRunTransformations.cpp@b8a032363ba2 (annotated)

layout/generic/nsTextRunTransformations.cpp