michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
michael@0:  * This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "nsTextRunTransformations.h"
michael@0: 
michael@0: #include "mozilla/MemoryReporting.h"
michael@0: 
michael@0: #include "nsGkAtoms.h"
michael@0: #include "nsStyleConsts.h"
michael@0: #include "nsStyleContext.h"
michael@0: #include "nsUnicodeProperties.h"
michael@0: #include "nsSpecialCasingData.h"
michael@0: #include "mozilla/gfx/2D.h"
michael@0: #include "nsTextFrameUtils.h"
michael@0: #include "nsIPersistentProperties2.h"
michael@0: #include "nsNetUtil.h"
michael@0: 
michael@0: // Unicode characters needing special casing treatment in tr/az languages
michael@0: #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
michael@0: #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131
michael@0: 
michael@0: // Greek sigma needs custom handling for the lowercase transform; for details
michael@0: // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
michael@0: // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
michael@0: #define GREEK_CAPITAL_LETTER_SIGMA             0x03A3
michael@0: #define GREEK_SMALL_LETTER_FINAL_SIGMA         0x03C2
michael@0: #define GREEK_SMALL_LETTER_SIGMA               0x03C3
michael@0: 
michael@0: // Custom uppercase mapping for Greek; see bug 307039 for details
michael@0: #define GREEK_LOWER_ALPHA                      0x03B1
michael@0: #define GREEK_LOWER_ALPHA_TONOS                0x03AC
michael@0: #define GREEK_LOWER_ALPHA_OXIA                 0x1F71
michael@0: #define GREEK_LOWER_EPSILON                    0x03B5
michael@0: #define GREEK_LOWER_EPSILON_TONOS              0x03AD
michael@0: #define GREEK_LOWER_EPSILON_OXIA               0x1F73
michael@0: #define GREEK_LOWER_ETA                        0x03B7
michael@0: #define GREEK_LOWER_ETA_TONOS                  0x03AE
michael@0: #define GREEK_LOWER_ETA_OXIA                   0x1F75
michael@0: #define GREEK_LOWER_IOTA                       0x03B9
michael@0: #define GREEK_LOWER_IOTA_TONOS                 0x03AF
michael@0: #define GREEK_LOWER_IOTA_OXIA                  0x1F77
michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
michael@0: #define GREEK_LOWER_OMICRON                    0x03BF
michael@0: #define GREEK_LOWER_OMICRON_TONOS              0x03CC
michael@0: #define GREEK_LOWER_OMICRON_OXIA               0x1F79
michael@0: #define GREEK_LOWER_UPSILON                    0x03C5
michael@0: #define GREEK_LOWER_UPSILON_TONOS              0x03CD
michael@0: #define GREEK_LOWER_UPSILON_OXIA               0x1F7B
michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
michael@0: #define GREEK_LOWER_OMEGA                      0x03C9
michael@0: #define GREEK_LOWER_OMEGA_TONOS                0x03CE
michael@0: #define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
michael@0: #define GREEK_UPPER_ALPHA                      0x0391
michael@0: #define GREEK_UPPER_EPSILON                    0x0395
michael@0: #define GREEK_UPPER_ETA                        0x0397
michael@0: #define GREEK_UPPER_IOTA                       0x0399
michael@0: #define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
michael@0: #define GREEK_UPPER_OMICRON                    0x039F
michael@0: #define GREEK_UPPER_UPSILON                    0x03A5
michael@0: #define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
michael@0: #define GREEK_UPPER_OMEGA                      0x03A9
michael@0: #define GREEK_UPPER_ALPHA_TONOS                0x0386
michael@0: #define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
michael@0: #define GREEK_UPPER_EPSILON_TONOS              0x0388
michael@0: #define GREEK_UPPER_EPSILON_OXIA               0x1FC9
michael@0: #define GREEK_UPPER_ETA_TONOS                  0x0389
michael@0: #define GREEK_UPPER_ETA_OXIA                   0x1FCB
michael@0: #define GREEK_UPPER_IOTA_TONOS                 0x038A
michael@0: #define GREEK_UPPER_IOTA_OXIA                  0x1FDB
michael@0: #define GREEK_UPPER_OMICRON_TONOS              0x038C
michael@0: #define GREEK_UPPER_OMICRON_OXIA               0x1FF9
michael@0: #define GREEK_UPPER_UPSILON_TONOS              0x038E
michael@0: #define GREEK_UPPER_UPSILON_OXIA               0x1FEB
michael@0: #define GREEK_UPPER_OMEGA_TONOS                0x038F
michael@0: #define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
michael@0: #define COMBINING_ACUTE_ACCENT                 0x0301
michael@0: #define COMBINING_DIAERESIS                    0x0308
michael@0: #define COMBINING_ACUTE_TONE_MARK              0x0341
michael@0: #define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
michael@0: 
michael@0: // When doing an Uppercase transform in Greek, we need to keep track of the
michael@0: // current state while iterating through the string, to recognize and process
michael@0: // diphthongs correctly. For clarity, we define a state for each vowel and
michael@0: // each vowel with accent, although a few of these do not actually need any
michael@0: // special treatment and could be folded into kStart.
michael@0: enum GreekCasingState {
michael@0:   kStart,
michael@0:   kAlpha,
michael@0:   kEpsilon,
michael@0:   kEta,
michael@0:   kIota,
michael@0:   kOmicron,
michael@0:   kUpsilon,
michael@0:   kOmega,
michael@0:   kAlphaAcc,
michael@0:   kEpsilonAcc,
michael@0:   kEtaAcc,
michael@0:   kIotaAcc,
michael@0:   kOmicronAcc,
michael@0:   kUpsilonAcc,
michael@0:   kOmegaAcc,
michael@0:   kOmicronUpsilon,
michael@0:   kDiaeresis
michael@0: };
michael@0: 
michael@0: static uint32_t
michael@0: GreekUpperCase(uint32_t aCh, GreekCasingState* aState)
michael@0: {
michael@0:   switch (aCh) {
michael@0:   case GREEK_UPPER_ALPHA:
michael@0:   case GREEK_LOWER_ALPHA:
michael@0:     *aState = kAlpha;
michael@0:     return GREEK_UPPER_ALPHA;
michael@0: 
michael@0:   case GREEK_UPPER_EPSILON:
michael@0:   case GREEK_LOWER_EPSILON:
michael@0:     *aState = kEpsilon;
michael@0:     return GREEK_UPPER_EPSILON;
michael@0: 
michael@0:   case GREEK_UPPER_ETA:
michael@0:   case GREEK_LOWER_ETA:
michael@0:     *aState = kEta;
michael@0:     return GREEK_UPPER_ETA;
michael@0: 
michael@0:   case GREEK_UPPER_IOTA:
michael@0:     *aState = kIota;
michael@0:     return GREEK_UPPER_IOTA;
michael@0: 
michael@0:   case GREEK_UPPER_OMICRON:
michael@0:   case GREEK_LOWER_OMICRON:
michael@0:     *aState = kOmicron;
michael@0:     return GREEK_UPPER_OMICRON;
michael@0: 
michael@0:   case GREEK_UPPER_UPSILON:
michael@0:     switch (*aState) {
michael@0:     case kOmicron:
michael@0:       *aState = kOmicronUpsilon;
michael@0:       break;
michael@0:     default:
michael@0:       *aState = kUpsilon;
michael@0:       break;
michael@0:     }
michael@0:     return GREEK_UPPER_UPSILON;
michael@0: 
michael@0:   case GREEK_UPPER_OMEGA:
michael@0:   case GREEK_LOWER_OMEGA:
michael@0:     *aState = kOmega;
michael@0:     return GREEK_UPPER_OMEGA;
michael@0: 
michael@0:   // iota and upsilon may be the second vowel of a diphthong
michael@0:   case GREEK_LOWER_IOTA:
michael@0:     switch (*aState) {
michael@0:     case kAlphaAcc:
michael@0:     case kEpsilonAcc:
michael@0:     case kOmicronAcc:
michael@0:     case kUpsilonAcc:
michael@0:       *aState = kStart;
michael@0:       return GREEK_UPPER_IOTA_DIALYTIKA;
michael@0:     default:
michael@0:       break;
michael@0:     }
michael@0:     *aState = kIota;
michael@0:     return GREEK_UPPER_IOTA;
michael@0: 
michael@0:   case GREEK_LOWER_UPSILON:
michael@0:     switch (*aState) {
michael@0:     case kAlphaAcc:
michael@0:     case kEpsilonAcc:
michael@0:     case kEtaAcc:
michael@0:     case kOmicronAcc:
michael@0:       *aState = kStart;
michael@0:       return GREEK_UPPER_UPSILON_DIALYTIKA;
michael@0:     case kOmicron:
michael@0:       *aState = kOmicronUpsilon;
michael@0:       break;
michael@0:     default:
michael@0:       *aState = kUpsilon;
michael@0:       break;
michael@0:     }
michael@0:     return GREEK_UPPER_UPSILON;
michael@0: 
michael@0:   case GREEK_UPPER_IOTA_DIALYTIKA:
michael@0:   case GREEK_LOWER_IOTA_DIALYTIKA:
michael@0:   case GREEK_UPPER_UPSILON_DIALYTIKA:
michael@0:   case GREEK_LOWER_UPSILON_DIALYTIKA:
michael@0:   case COMBINING_DIAERESIS:
michael@0:     *aState = kDiaeresis;
michael@0:     return ToUpperCase(aCh);
michael@0: 
michael@0:   // remove accent if it follows a vowel or diaeresis,
michael@0:   // and set appropriate state for diphthong detection
michael@0:   case COMBINING_ACUTE_ACCENT:
michael@0:   case COMBINING_ACUTE_TONE_MARK:
michael@0:     switch (*aState) {
michael@0:     case kAlpha:
michael@0:       *aState = kAlphaAcc;
michael@0:       return uint32_t(-1); // omit this char from result string
michael@0:     case kEpsilon:
michael@0:       *aState = kEpsilonAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kEta:
michael@0:       *aState = kEtaAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kIota:
michael@0:       *aState = kIotaAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kOmicron:
michael@0:       *aState = kOmicronAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kUpsilon:
michael@0:       *aState = kUpsilonAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kOmicronUpsilon:
michael@0:       *aState = kStart; // this completed a diphthong
michael@0:       return uint32_t(-1);
michael@0:     case kOmega:
michael@0:       *aState = kOmegaAcc;
michael@0:       return uint32_t(-1);
michael@0:     case kDiaeresis:
michael@0:       *aState = kStart;
michael@0:       return uint32_t(-1);
michael@0:     default:
michael@0:       break;
michael@0:     }
michael@0:     break;
michael@0: 
michael@0:   // combinations with dieresis+accent just strip the accent,
michael@0:   // and reset to start state (don't form diphthong with following vowel)
michael@0:   case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
michael@0:   case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
michael@0:     *aState = kStart;
michael@0:     return GREEK_UPPER_IOTA_DIALYTIKA;
michael@0: 
michael@0:   case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
michael@0:   case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
michael@0:     *aState = kStart;
michael@0:     return GREEK_UPPER_UPSILON_DIALYTIKA;
michael@0: 
michael@0:   case COMBINING_GREEK_DIALYTIKA_TONOS:
michael@0:     *aState = kStart;
michael@0:     return COMBINING_DIAERESIS;
michael@0: 
michael@0:   // strip accents from vowels, and note the vowel seen so that we can detect
michael@0:   // diphthongs where diaeresis needs to be added
michael@0:   case GREEK_LOWER_ALPHA_TONOS:
michael@0:   case GREEK_LOWER_ALPHA_OXIA:
michael@0:   case GREEK_UPPER_ALPHA_TONOS:
michael@0:   case GREEK_UPPER_ALPHA_OXIA:
michael@0:     *aState = kAlphaAcc;
michael@0:     return GREEK_UPPER_ALPHA;
michael@0: 
michael@0:   case GREEK_LOWER_EPSILON_TONOS:
michael@0:   case GREEK_LOWER_EPSILON_OXIA:
michael@0:   case GREEK_UPPER_EPSILON_TONOS:
michael@0:   case GREEK_UPPER_EPSILON_OXIA:
michael@0:     *aState = kEpsilonAcc;
michael@0:     return GREEK_UPPER_EPSILON;
michael@0: 
michael@0:   case GREEK_LOWER_ETA_TONOS:
michael@0:   case GREEK_LOWER_ETA_OXIA:
michael@0:   case GREEK_UPPER_ETA_TONOS:
michael@0:   case GREEK_UPPER_ETA_OXIA:
michael@0:     *aState = kEtaAcc;
michael@0:     return GREEK_UPPER_ETA;
michael@0: 
michael@0:   case GREEK_LOWER_IOTA_TONOS:
michael@0:   case GREEK_LOWER_IOTA_OXIA:
michael@0:   case GREEK_UPPER_IOTA_TONOS:
michael@0:   case GREEK_UPPER_IOTA_OXIA:
michael@0:     *aState = kIotaAcc;
michael@0:     return GREEK_UPPER_IOTA;
michael@0: 
michael@0:   case GREEK_LOWER_OMICRON_TONOS:
michael@0:   case GREEK_LOWER_OMICRON_OXIA:
michael@0:   case GREEK_UPPER_OMICRON_TONOS:
michael@0:   case GREEK_UPPER_OMICRON_OXIA:
michael@0:     *aState = kOmicronAcc;
michael@0:     return GREEK_UPPER_OMICRON;
michael@0: 
michael@0:   case GREEK_LOWER_UPSILON_TONOS:
michael@0:   case GREEK_LOWER_UPSILON_OXIA:
michael@0:   case GREEK_UPPER_UPSILON_TONOS:
michael@0:   case GREEK_UPPER_UPSILON_OXIA:
michael@0:     switch (*aState) {
michael@0:     case kOmicron:
michael@0:       *aState = kStart; // this completed a diphthong
michael@0:       break;
michael@0:     default:
michael@0:       *aState = kUpsilonAcc;
michael@0:       break;
michael@0:     }
michael@0:     return GREEK_UPPER_UPSILON;
michael@0: 
michael@0:   case GREEK_LOWER_OMEGA_TONOS:
michael@0:   case GREEK_LOWER_OMEGA_OXIA:
michael@0:   case GREEK_UPPER_OMEGA_TONOS:
michael@0:   case GREEK_UPPER_OMEGA_OXIA:
michael@0:     *aState = kOmegaAcc;
michael@0:     return GREEK_UPPER_OMEGA;
michael@0:   }
michael@0: 
michael@0:   // all other characters just reset the state, and use standard mappings
michael@0:   *aState = kStart;
michael@0:   return ToUpperCase(aCh);
michael@0: }
michael@0: 
michael@0: nsTransformedTextRun *
michael@0: nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,
michael@0:                              nsTransformingTextRunFactory* aFactory,
michael@0:                              gfxFontGroup* aFontGroup,
michael@0:                              const char16_t* aString, uint32_t aLength,
michael@0:                              const uint32_t aFlags, nsStyleContext** aStyles,
michael@0:                              bool aOwnsFactory)
michael@0: {
michael@0:   NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),
michael@0:                "didn't expect text to be marked as 8-bit here");
michael@0: 
michael@0:   void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
michael@0:   if (!storage) {
michael@0:     return nullptr;
michael@0:   }
michael@0: 
michael@0:   return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup,
michael@0:                                             aString, aLength,
michael@0:                                             aFlags, aStyles, aOwnsFactory);
michael@0: }
michael@0: 
michael@0: void
michael@0: nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
michael@0:                                         bool* aCapitalization,
michael@0:                                         gfxContext* aRefContext)
michael@0: {
michael@0:   if (mCapitalize.IsEmpty()) {
michael@0:     if (!mCapitalize.AppendElements(GetLength()))
michael@0:       return;
michael@0:     memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool));
michael@0:   }
michael@0:   memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool));
michael@0:   mNeedsRebuild = true;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength,
michael@0:                                              uint8_t* aBreakBefore,
michael@0:                                              gfxContext* aRefContext)
michael@0: {
michael@0:   bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength,
michael@0:       aBreakBefore, aRefContext);
michael@0:   if (changed) {
michael@0:     mNeedsRebuild = true;
michael@0:   }
michael@0:   return changed;
michael@0: }
michael@0: 
michael@0: size_t
michael@0: nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
michael@0: {
michael@0:   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
michael@0:   total += mStyles.SizeOfExcludingThis(aMallocSizeOf);
michael@0:   total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf);
michael@0:   if (mOwnsFactory) {
michael@0:     total += aMallocSizeOf(mFactory);
michael@0:   }
michael@0:   return total;
michael@0: }
michael@0: 
michael@0: size_t
michael@0: nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
michael@0: {
michael@0:   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
michael@0: }
michael@0: 
michael@0: nsTransformedTextRun*
michael@0: nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength,
michael@0:                                           const gfxTextRunFactory::Parameters* aParams,
michael@0:                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
michael@0:                                           nsStyleContext** aStyles, bool aOwnsFactory)
michael@0: {
michael@0:   return nsTransformedTextRun::Create(aParams, this, aFontGroup,
michael@0:                                       aString, aLength, aFlags, aStyles, aOwnsFactory);
michael@0: }
michael@0: 
michael@0: nsTransformedTextRun*
michael@0: nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength,
michael@0:                                           const gfxTextRunFactory::Parameters* aParams,
michael@0:                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
michael@0:                                           nsStyleContext** aStyles, bool aOwnsFactory)
michael@0: {
michael@0:   // We'll only have a Unicode code path to minimize the amount of code needed
michael@0:   // for these rarely used features
michael@0:   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength);
michael@0:   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
michael@0:                      aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT),
michael@0:                      aStyles, aOwnsFactory);
michael@0: }
michael@0: 
michael@0: void
michael@0: MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
michael@0:                          const bool* aCharsToMerge, const bool* aDeletedChars)
michael@0: {
michael@0:   aDest->ResetGlyphRuns();
michael@0: 
michael@0:   gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());
michael@0:   uint32_t offset = 0;
michael@0:   nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;
michael@0:   while (iter.NextRun()) {
michael@0:     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
michael@0:     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,
michael@0:                                      offset, false);
michael@0:     if (NS_FAILED(rv))
michael@0:       return;
michael@0: 
michael@0:     bool anyMissing = false;
michael@0:     uint32_t mergeRunStart = iter.GetStringStart();
michael@0:     const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
michael@0:     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
michael@0:     uint32_t stringEnd = iter.GetStringEnd();
michael@0:     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
michael@0:       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
michael@0:       if (g.IsSimpleGlyph()) {
michael@0:         if (!anyMissing) {
michael@0:           gfxTextRun::DetailedGlyph details;
michael@0:           details.mGlyphID = g.GetSimpleGlyph();
michael@0:           details.mAdvance = g.GetSimpleAdvance();
michael@0:           details.mXOffset = 0;
michael@0:           details.mYOffset = 0;
michael@0:           glyphs.AppendElement(details);
michael@0:         }
michael@0:       } else {
michael@0:         if (g.IsMissing()) {
michael@0:           anyMissing = true;
michael@0:           glyphs.Clear();
michael@0:         }
michael@0:         if (g.GetGlyphCount() > 0) {
michael@0:           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
michael@0:         // next char is supposed to merge with current, so loop without
michael@0:         // writing current merged glyph to the destination
michael@0:         continue;
michael@0:       }
michael@0: 
michael@0:       // If the start of the merge run is actually a character that should
michael@0:       // have been merged with the previous character (this can happen
michael@0:       // if there's a font change in the middle of a case-mapped character,
michael@0:       // that decomposed into a sequence of base+diacritics, for example),
michael@0:       // just discard the entire merge run. See comment at start of this
michael@0:       // function.
michael@0:       NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
michael@0:                        "unable to merge across a glyph run boundary, "
michael@0:                        "glyph(s) discarded");
michael@0:       if (!aCharsToMerge[mergeRunStart]) {
michael@0:         if (anyMissing) {
michael@0:           mergedGlyph.SetMissing(glyphs.Length());
michael@0:         } else {
michael@0:           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
michael@0:                                  mergedGlyph.IsLigatureGroupStart(),
michael@0:                                  glyphs.Length());
michael@0:         }
michael@0:         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
michael@0:         ++offset;
michael@0: 
michael@0:         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
michael@0:           aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       glyphs.Clear();
michael@0:       anyMissing = false;
michael@0:       mergeRunStart = k + 1;
michael@0:       if (mergeRunStart < stringEnd) {
michael@0:         mergedGlyph = srcGlyphs[mergeRunStart];
michael@0:       }
michael@0:     }
michael@0:     NS_ASSERTION(glyphs.Length() == 0,
michael@0:                  "Leftover glyphs, don't request merging of the last character with its next!");  
michael@0:   }
michael@0:   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
michael@0: }
michael@0: 
michael@0: gfxTextRunFactory::Parameters
michael@0: GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags,
michael@0:     gfxContext* aRefContext)
michael@0: {
michael@0:   gfxTextRunFactory::Parameters params =
michael@0:     { aRefContext, nullptr, nullptr,
michael@0:       nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()
michael@0:     };
michael@0:   *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT;
michael@0:   return params;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
michael@0:     gfxContext* aRefContext)
michael@0: {
michael@0:   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
michael@0:   gfxFontStyle fontStyle = *fontGroup->GetStyle();
michael@0:   fontStyle.size *= 0.8;
michael@0:   nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
michael@0:   if (!smallFont)
michael@0:     return;
michael@0: 
michael@0:   uint32_t flags;
michael@0:   gfxTextRunFactory::Parameters innerParams =
michael@0:       GetParametersForInner(aTextRun, &flags, aRefContext);
michael@0: 
michael@0:   uint32_t length = aTextRun->GetLength();
michael@0:   const char16_t* str = aTextRun->mString.BeginReading();
michael@0:   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
michael@0:   // Create a textrun so we can check cluster-start properties
michael@0:   nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
michael@0:   if (!inner.get())
michael@0:     return;
michael@0: 
michael@0:   nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);
michael@0: 
michael@0:   aTextRun->ResetGlyphRuns();
michael@0: 
michael@0:   uint32_t runStart = 0;
michael@0:   nsAutoTArray<nsStyleContext*,50> styleArray;
michael@0:   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
michael@0: 
michael@0:   enum RunCaseState {
michael@0:     kUpperOrCaseless, // will be untouched by font-variant:small-caps
michael@0:     kLowercase,       // will be uppercased and reduced
michael@0:     kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
michael@0:   };
michael@0:   RunCaseState runCase = kUpperOrCaseless;
michael@0: 
michael@0:   // Note that this loop runs from 0 to length *inclusive*, so the last
michael@0:   // iteration is in effect beyond the end of the input text, to give a
michael@0:   // chance to finish the last casing run we've found.
michael@0:   // The last iteration, when i==length, must not attempt to look at the
michael@0:   // character position [i] or the style data for styles[i], as this would
michael@0:   // be beyond the valid length of the textrun or its style array.
michael@0:   for (uint32_t i = 0; i <= length; ++i) {
michael@0:     RunCaseState chCase = kUpperOrCaseless;
michael@0:     // Unless we're at the end, figure out what treatment the current
michael@0:     // character will need.
michael@0:     if (i < length) {
michael@0:       nsStyleContext* styleContext = styles[i];
michael@0:       // Characters that aren't the start of a cluster are ignored here. They
michael@0:       // get added to whatever lowercase/non-lowercase run we're in.
michael@0:       if (!inner->IsClusterStart(i)) {
michael@0:         chCase = runCase;
michael@0:       } else {
michael@0:         if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
michael@0:           uint32_t ch = str[i];
michael@0:           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
michael@0:             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
michael@0:           }
michael@0:           uint32_t ch2 = ToUpperCase(ch);
michael@0:           if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
michael@0:             chCase = kLowercase;
michael@0:           } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) {
michael@0:             // In Greek, check for characters that will be modified by the
michael@0:             // GreekUpperCase mapping - this catches accented capitals where
michael@0:             // the accent is to be removed (bug 307039). These are handled by
michael@0:             // a transformed child run using the full-size font.
michael@0:             GreekCasingState state = kStart; // don't need exact context here
michael@0:             ch2 = GreekUpperCase(ch, &state);
michael@0:             if (ch != ch2) {
michael@0:               chCase = kSpecialUpper;
michael@0:             }
michael@0:           }
michael@0:         } else {
michael@0:           // Don't transform the character! I.e., pretend that it's not lowercase
michael@0:         }
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     // At the end of the text, or when the current character needs different
michael@0:     // casing treatment from the current run, finish the run-in-progress
michael@0:     // and prepare to accumulate a new run.
michael@0:     // Note that we do not look at any source data for offset [i] here,
michael@0:     // as that would be invalid in the case where i==length.
michael@0:     if ((i == length || runCase != chCase) && runStart < i) {
michael@0:       nsAutoPtr<nsTransformedTextRun> transformedChild;
michael@0:       nsAutoPtr<gfxTextRun> cachedChild;
michael@0:       gfxTextRun* child;
michael@0: 
michael@0:       switch (runCase) {
michael@0:       case kUpperOrCaseless:
michael@0:         cachedChild =
michael@0:           fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
michael@0:                                  flags);
michael@0:         child = cachedChild.get();
michael@0:         break;
michael@0:       case kLowercase:
michael@0:         transformedChild =
michael@0:           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
michael@0:                                        &innerParams, smallFont, flags,
michael@0:                                        styleArray.Elements(), false);
michael@0:         child = transformedChild;
michael@0:         break;
michael@0:       case kSpecialUpper:
michael@0:         transformedChild =
michael@0:           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
michael@0:                                        &innerParams, fontGroup, flags,
michael@0:                                        styleArray.Elements(), false);
michael@0:         child = transformedChild;
michael@0:         break;
michael@0:       }
michael@0:       if (!child)
michael@0:         return;
michael@0:       // Copy potential linebreaks into child so they're preserved
michael@0:       // (and also child will be shaped appropriately)
michael@0:       NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
michael@0:                    "lost some break-before values?");
michael@0:       child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
michael@0:           canBreakBeforeArray.Elements(), aRefContext);
michael@0:       if (transformedChild) {
michael@0:         transformedChild->FinishSettingProperties(aRefContext);
michael@0:       }
michael@0:       aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);
michael@0: 
michael@0:       runStart = i;
michael@0:       styleArray.Clear();
michael@0:       canBreakBeforeArray.Clear();
michael@0:     }
michael@0: 
michael@0:     if (i < length) {
michael@0:       runCase = chCase;
michael@0:       styleArray.AppendElement(styles[i]);
michael@0:       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
michael@0:     gfxContext* aRefContext)
michael@0: {
michael@0:   uint32_t length = aTextRun->GetLength();
michael@0:   const char16_t* str = aTextRun->mString.BeginReading();
michael@0:   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
michael@0: 
michael@0:   nsAutoString convertedString;
michael@0:   nsAutoTArray<bool,50> charsToMergeArray;
michael@0:   nsAutoTArray<bool,50> deletedCharsArray;
michael@0:   nsAutoTArray<nsStyleContext*,50> styleArray;
michael@0:   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
michael@0:   bool mergeNeeded = false;
michael@0: 
michael@0:   // Some languages have special casing conventions that differ from the
michael@0:   // default Unicode mappings.
michael@0:   // The enum values here are named for well-known exemplar languages that
michael@0:   // exhibit the behavior in question; multiple lang tags may map to the
michael@0:   // same setting here, if the behavior is shared by other languages.
michael@0:   enum {
michael@0:     eNone,    // default non-lang-specific behavior
michael@0:     eTurkish, // preserve dotted/dotless-i distinction in uppercase
michael@0:     eDutch,   // treat "ij" digraph as a unit for capitalization
michael@0:     eGreek    // strip accent when uppercasing Greek vowels
michael@0:   } languageSpecificCasing = eNone;
michael@0: 
michael@0:   const nsIAtom* lang = nullptr;
michael@0:   bool capitalizeDutchIJ = false;
michael@0:   bool prevIsLetter = false;
michael@0:   uint32_t sigmaIndex = uint32_t(-1);
michael@0:   nsIUGenCategory::nsUGenCategory cat;
michael@0:   GreekCasingState greekState = kStart;
michael@0:   uint32_t i;
michael@0:   for (i = 0; i < length; ++i) {
michael@0:     uint32_t ch = str[i];
michael@0:     nsStyleContext* styleContext = styles[i];
michael@0: 
michael@0:     uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
michael@0:       : styleContext->StyleText()->mTextTransform;
michael@0:     int extraChars = 0;
michael@0:     const mozilla::unicode::MultiCharMapping *mcm;
michael@0: 
michael@0:     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
michael@0:       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
michael@0:     }
michael@0: 
michael@0:     if (lang != styleContext->StyleFont()->mLanguage) {
michael@0:       lang = styleContext->StyleFont()->mLanguage;
michael@0:       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
michael@0:           lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
michael@0:           lang == nsGkAtoms::tt) {
michael@0:         languageSpecificCasing = eTurkish;
michael@0:       } else if (lang == nsGkAtoms::nl) {
michael@0:         languageSpecificCasing = eDutch;
michael@0:       } else if (lang == nsGkAtoms::el) {
michael@0:         languageSpecificCasing = eGreek;
michael@0:         greekState = kStart;
michael@0:       } else {
michael@0:         languageSpecificCasing = eNone;
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     switch (style) {
michael@0:     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
michael@0:       if (languageSpecificCasing == eTurkish) {
michael@0:         if (ch == 'I') {
michael@0:           ch = LATIN_SMALL_LETTER_DOTLESS_I;
michael@0:           prevIsLetter = true;
michael@0:           sigmaIndex = uint32_t(-1);
michael@0:           break;
michael@0:         }
michael@0:         if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
michael@0:           ch = 'i';
michael@0:           prevIsLetter = true;
michael@0:           sigmaIndex = uint32_t(-1);
michael@0:           break;
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       // Special lowercasing behavior for Greek Sigma: note that this is listed
michael@0:       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
michael@0:       // language-specific mapping; it applies regardless of the language of
michael@0:       // the element.
michael@0:       //
michael@0:       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
michael@0:       // the non-final form) whenever there is a following letter, or when the
michael@0:       // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
michael@0:       // LETTER); and to FINAL SIGMA when it is preceded by another letter but
michael@0:       // not followed by one.
michael@0:       //
michael@0:       // To implement the context-sensitive nature of this mapping, we keep
michael@0:       // track of whether the previous character was a letter. If not, CAPITAL
michael@0:       // SIGMA will map directly to SMALL SIGMA. If the previous character
michael@0:       // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
michael@0:       // position in the converted string; if we then encounter another letter,
michael@0:       // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
michael@0: 
michael@0:       cat = mozilla::unicode::GetGenCategory(ch);
michael@0: 
michael@0:       // If sigmaIndex is not -1, it marks where we have provisionally mapped
michael@0:       // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
michael@0:       // need to change it to SMALL SIGMA.
michael@0:       if (sigmaIndex != uint32_t(-1)) {
michael@0:         if (cat == nsIUGenCategory::kLetter) {
michael@0:           convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
michael@0:         // If preceding char was a letter, map to FINAL instead of SMALL,
michael@0:         // and note where it occurred by setting sigmaIndex; we'll change it
michael@0:         // to standard SMALL SIGMA later if another letter follows
michael@0:         if (prevIsLetter) {
michael@0:           ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
michael@0:           sigmaIndex = convertedString.Length();
michael@0:         } else {
michael@0:           // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
michael@0:           // to SMALL SIGMA
michael@0:           ch = GREEK_SMALL_LETTER_SIGMA;
michael@0:           sigmaIndex = uint32_t(-1);
michael@0:         }
michael@0:         prevIsLetter = true;
michael@0:         break;
michael@0:       }
michael@0: 
michael@0:       // ignore diacritics for the purpose of contextual sigma mapping;
michael@0:       // otherwise, reset prevIsLetter appropriately and clear the
michael@0:       // sigmaIndex marker
michael@0:       if (cat != nsIUGenCategory::kMark) {
michael@0:         prevIsLetter = (cat == nsIUGenCategory::kLetter);
michael@0:         sigmaIndex = uint32_t(-1);
michael@0:       }
michael@0: 
michael@0:       mcm = mozilla::unicode::SpecialLower(ch);
michael@0:       if (mcm) {
michael@0:         int j = 0;
michael@0:         while (j < 2 && mcm->mMappedChars[j + 1]) {
michael@0:           convertedString.Append(mcm->mMappedChars[j]);
michael@0:           ++extraChars;
michael@0:           ++j;
michael@0:         }
michael@0:         ch = mcm->mMappedChars[j];
michael@0:         break;
michael@0:       }
michael@0: 
michael@0:       ch = ToLowerCase(ch);
michael@0:       break;
michael@0: 
michael@0:     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
michael@0:       if (languageSpecificCasing == eTurkish && ch == 'i') {
michael@0:         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
michael@0:         break;
michael@0:       }
michael@0: 
michael@0:       if (languageSpecificCasing == eGreek) {
michael@0:         ch = GreekUpperCase(ch, &greekState);
michael@0:         break;
michael@0:       }
michael@0: 
michael@0:       mcm = mozilla::unicode::SpecialUpper(ch);
michael@0:       if (mcm) {
michael@0:         int j = 0;
michael@0:         while (j < 2 && mcm->mMappedChars[j + 1]) {
michael@0:           convertedString.Append(mcm->mMappedChars[j]);
michael@0:           ++extraChars;
michael@0:           ++j;
michael@0:         }
michael@0:         ch = mcm->mMappedChars[j];
michael@0:         break;
michael@0:       }
michael@0: 
michael@0:       ch = ToUpperCase(ch);
michael@0:       break;
michael@0: 
michael@0:     case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
michael@0:       if (capitalizeDutchIJ && ch == 'j') {
michael@0:         ch = 'J';
michael@0:         capitalizeDutchIJ = false;
michael@0:         break;
michael@0:       }
michael@0:       capitalizeDutchIJ = false;
michael@0:       if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
michael@0:         if (languageSpecificCasing == eTurkish && ch == 'i') {
michael@0:           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
michael@0:           break;
michael@0:         }
michael@0:         if (languageSpecificCasing == eDutch && ch == 'i') {
michael@0:           ch = 'I';
michael@0:           capitalizeDutchIJ = true;
michael@0:           break;
michael@0:         }
michael@0: 
michael@0:         mcm = mozilla::unicode::SpecialTitle(ch);
michael@0:         if (mcm) {
michael@0:           int j = 0;
michael@0:           while (j < 2 && mcm->mMappedChars[j + 1]) {
michael@0:             convertedString.Append(mcm->mMappedChars[j]);
michael@0:             ++extraChars;
michael@0:             ++j;
michael@0:           }
michael@0:           ch = mcm->mMappedChars[j];
michael@0:           break;
michael@0:         }
michael@0: 
michael@0:         ch = ToTitleCase(ch);
michael@0:       }
michael@0:       break;
michael@0: 
michael@0:     case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
michael@0:       ch = mozilla::unicode::GetFullWidth(ch);
michael@0:       break;
michael@0: 
michael@0:     default:
michael@0:       break;
michael@0:     }
michael@0: 
michael@0:     if (ch == uint32_t(-1)) {
michael@0:       deletedCharsArray.AppendElement(true);
michael@0:       mergeNeeded = true;
michael@0:     } else {
michael@0:       deletedCharsArray.AppendElement(false);
michael@0:       charsToMergeArray.AppendElement(false);
michael@0:       styleArray.AppendElement(styleContext);
michael@0:       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
michael@0: 
michael@0:       if (IS_IN_BMP(ch)) {
michael@0:         convertedString.Append(ch);
michael@0:       } else {
michael@0:         convertedString.Append(H_SURROGATE(ch));
michael@0:         convertedString.Append(L_SURROGATE(ch));
michael@0:         ++i;
michael@0:         deletedCharsArray.AppendElement(true); // not exactly deleted, but the
michael@0:                                                // trailing surrogate is skipped
michael@0:         ++extraChars;
michael@0:       }
michael@0: 
michael@0:       while (extraChars-- > 0) {
michael@0:         mergeNeeded = true;
michael@0:         charsToMergeArray.AppendElement(true);
michael@0:         styleArray.AppendElement(styleContext);
michael@0:         canBreakBeforeArray.AppendElement(false);
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   uint32_t flags;
michael@0:   gfxTextRunFactory::Parameters innerParams =
michael@0:       GetParametersForInner(aTextRun, &flags, aRefContext);
michael@0:   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
michael@0: 
michael@0:   nsAutoPtr<nsTransformedTextRun> transformedChild;
michael@0:   nsAutoPtr<gfxTextRun> cachedChild;
michael@0:   gfxTextRun* child;
michael@0: 
michael@0:   if (mInnerTransformingTextRunFactory) {
michael@0:     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
michael@0:         convertedString.BeginReading(), convertedString.Length(),
michael@0:         &innerParams, fontGroup, flags, styleArray.Elements(), false);
michael@0:     child = transformedChild.get();
michael@0:   } else {
michael@0:     cachedChild = fontGroup->MakeTextRun(
michael@0:         convertedString.BeginReading(), convertedString.Length(),
michael@0:         &innerParams, flags);
michael@0:     child = cachedChild.get();
michael@0:   }
michael@0:   if (!child)
michael@0:     return;
michael@0:   // Copy potential linebreaks into child so they're preserved
michael@0:   // (and also child will be shaped appropriately)
michael@0:   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
michael@0:                "Dropped characters or break-before values somewhere!");
michael@0:   child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
michael@0:       canBreakBeforeArray.Elements(), aRefContext);
michael@0:   if (transformedChild) {
michael@0:     transformedChild->FinishSettingProperties(aRefContext);
michael@0:   }
michael@0: 
michael@0:   if (mergeNeeded) {
michael@0:     // Now merge multiple characters into one multi-glyph character as required
michael@0:     // and deal with skipping deleted accent chars
michael@0:     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
michael@0:                  "source length mismatch");
michael@0:     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
michael@0:                  "destination length mismatch");
michael@0:     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
michael@0:                              deletedCharsArray.Elements());
michael@0:   } else {
michael@0:     // No merging to do, so just copy; this produces a more optimized textrun.
michael@0:     // We can't steal the data because the child may be cached and stealing
michael@0:     // the data would break the cache.
michael@0:     aTextRun->ResetGlyphRuns();
michael@0:     aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
michael@0:   }
michael@0: }