michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsTextRunTransformations.h" michael@0: michael@0: #include "mozilla/MemoryReporting.h" michael@0: michael@0: #include "nsGkAtoms.h" michael@0: #include "nsStyleConsts.h" michael@0: #include "nsStyleContext.h" michael@0: #include "nsUnicodeProperties.h" michael@0: #include "nsSpecialCasingData.h" michael@0: #include "mozilla/gfx/2D.h" michael@0: #include "nsTextFrameUtils.h" michael@0: #include "nsIPersistentProperties2.h" michael@0: #include "nsNetUtil.h" michael@0: michael@0: // Unicode characters needing special casing treatment in tr/az languages michael@0: #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 michael@0: #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 michael@0: michael@0: // Greek sigma needs custom handling for the lowercase transform; for details michael@0: // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within michael@0: // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120. michael@0: #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 michael@0: #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 michael@0: #define GREEK_SMALL_LETTER_SIGMA 0x03C3 michael@0: michael@0: // Custom uppercase mapping for Greek; see bug 307039 for details michael@0: #define GREEK_LOWER_ALPHA 0x03B1 michael@0: #define GREEK_LOWER_ALPHA_TONOS 0x03AC michael@0: #define GREEK_LOWER_ALPHA_OXIA 0x1F71 michael@0: #define GREEK_LOWER_EPSILON 0x03B5 michael@0: #define GREEK_LOWER_EPSILON_TONOS 0x03AD michael@0: #define GREEK_LOWER_EPSILON_OXIA 0x1F73 michael@0: #define GREEK_LOWER_ETA 0x03B7 michael@0: #define GREEK_LOWER_ETA_TONOS 0x03AE michael@0: #define GREEK_LOWER_ETA_OXIA 0x1F75 michael@0: #define GREEK_LOWER_IOTA 0x03B9 michael@0: #define GREEK_LOWER_IOTA_TONOS 0x03AF michael@0: #define GREEK_LOWER_IOTA_OXIA 0x1F77 michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390 michael@0: #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3 michael@0: #define GREEK_LOWER_OMICRON 0x03BF michael@0: #define GREEK_LOWER_OMICRON_TONOS 0x03CC michael@0: #define GREEK_LOWER_OMICRON_OXIA 0x1F79 michael@0: #define GREEK_LOWER_UPSILON 0x03C5 michael@0: #define GREEK_LOWER_UPSILON_TONOS 0x03CD michael@0: #define GREEK_LOWER_UPSILON_OXIA 0x1F7B michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0 michael@0: #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3 michael@0: #define GREEK_LOWER_OMEGA 0x03C9 michael@0: #define GREEK_LOWER_OMEGA_TONOS 0x03CE michael@0: #define GREEK_LOWER_OMEGA_OXIA 0x1F7D michael@0: #define GREEK_UPPER_ALPHA 0x0391 michael@0: #define GREEK_UPPER_EPSILON 0x0395 michael@0: #define GREEK_UPPER_ETA 0x0397 michael@0: #define GREEK_UPPER_IOTA 0x0399 michael@0: #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA michael@0: #define GREEK_UPPER_OMICRON 0x039F michael@0: #define GREEK_UPPER_UPSILON 0x03A5 michael@0: #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB michael@0: #define GREEK_UPPER_OMEGA 0x03A9 michael@0: #define GREEK_UPPER_ALPHA_TONOS 0x0386 michael@0: #define GREEK_UPPER_ALPHA_OXIA 0x1FBB michael@0: #define GREEK_UPPER_EPSILON_TONOS 0x0388 michael@0: #define GREEK_UPPER_EPSILON_OXIA 0x1FC9 michael@0: #define GREEK_UPPER_ETA_TONOS 0x0389 michael@0: #define GREEK_UPPER_ETA_OXIA 0x1FCB michael@0: #define GREEK_UPPER_IOTA_TONOS 0x038A michael@0: #define GREEK_UPPER_IOTA_OXIA 0x1FDB michael@0: #define GREEK_UPPER_OMICRON_TONOS 0x038C michael@0: #define GREEK_UPPER_OMICRON_OXIA 0x1FF9 michael@0: #define GREEK_UPPER_UPSILON_TONOS 0x038E michael@0: #define GREEK_UPPER_UPSILON_OXIA 0x1FEB michael@0: #define GREEK_UPPER_OMEGA_TONOS 0x038F michael@0: #define GREEK_UPPER_OMEGA_OXIA 0x1FFB michael@0: #define COMBINING_ACUTE_ACCENT 0x0301 michael@0: #define COMBINING_DIAERESIS 0x0308 michael@0: #define COMBINING_ACUTE_TONE_MARK 0x0341 michael@0: #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344 michael@0: michael@0: // When doing an Uppercase transform in Greek, we need to keep track of the michael@0: // current state while iterating through the string, to recognize and process michael@0: // diphthongs correctly. For clarity, we define a state for each vowel and michael@0: // each vowel with accent, although a few of these do not actually need any michael@0: // special treatment and could be folded into kStart. michael@0: enum GreekCasingState { michael@0: kStart, michael@0: kAlpha, michael@0: kEpsilon, michael@0: kEta, michael@0: kIota, michael@0: kOmicron, michael@0: kUpsilon, michael@0: kOmega, michael@0: kAlphaAcc, michael@0: kEpsilonAcc, michael@0: kEtaAcc, michael@0: kIotaAcc, michael@0: kOmicronAcc, michael@0: kUpsilonAcc, michael@0: kOmegaAcc, michael@0: kOmicronUpsilon, michael@0: kDiaeresis michael@0: }; michael@0: michael@0: static uint32_t michael@0: GreekUpperCase(uint32_t aCh, GreekCasingState* aState) michael@0: { michael@0: switch (aCh) { michael@0: case GREEK_UPPER_ALPHA: michael@0: case GREEK_LOWER_ALPHA: michael@0: *aState = kAlpha; michael@0: return GREEK_UPPER_ALPHA; michael@0: michael@0: case GREEK_UPPER_EPSILON: michael@0: case GREEK_LOWER_EPSILON: michael@0: *aState = kEpsilon; michael@0: return GREEK_UPPER_EPSILON; michael@0: michael@0: case GREEK_UPPER_ETA: michael@0: case GREEK_LOWER_ETA: michael@0: *aState = kEta; michael@0: return GREEK_UPPER_ETA; michael@0: michael@0: case GREEK_UPPER_IOTA: michael@0: *aState = kIota; michael@0: return GREEK_UPPER_IOTA; michael@0: michael@0: case GREEK_UPPER_OMICRON: michael@0: case GREEK_LOWER_OMICRON: michael@0: *aState = kOmicron; michael@0: return GREEK_UPPER_OMICRON; michael@0: michael@0: case GREEK_UPPER_UPSILON: michael@0: switch (*aState) { michael@0: case kOmicron: michael@0: *aState = kOmicronUpsilon; michael@0: break; michael@0: default: michael@0: *aState = kUpsilon; michael@0: break; michael@0: } michael@0: return GREEK_UPPER_UPSILON; michael@0: michael@0: case GREEK_UPPER_OMEGA: michael@0: case GREEK_LOWER_OMEGA: michael@0: *aState = kOmega; michael@0: return GREEK_UPPER_OMEGA; michael@0: michael@0: // iota and upsilon may be the second vowel of a diphthong michael@0: case GREEK_LOWER_IOTA: michael@0: switch (*aState) { michael@0: case kAlphaAcc: michael@0: case kEpsilonAcc: michael@0: case kOmicronAcc: michael@0: case kUpsilonAcc: michael@0: *aState = kStart; michael@0: return GREEK_UPPER_IOTA_DIALYTIKA; michael@0: default: michael@0: break; michael@0: } michael@0: *aState = kIota; michael@0: return GREEK_UPPER_IOTA; michael@0: michael@0: case GREEK_LOWER_UPSILON: michael@0: switch (*aState) { michael@0: case kAlphaAcc: michael@0: case kEpsilonAcc: michael@0: case kEtaAcc: michael@0: case kOmicronAcc: michael@0: *aState = kStart; michael@0: return GREEK_UPPER_UPSILON_DIALYTIKA; michael@0: case kOmicron: michael@0: *aState = kOmicronUpsilon; michael@0: break; michael@0: default: michael@0: *aState = kUpsilon; michael@0: break; michael@0: } michael@0: return GREEK_UPPER_UPSILON; michael@0: michael@0: case GREEK_UPPER_IOTA_DIALYTIKA: michael@0: case GREEK_LOWER_IOTA_DIALYTIKA: michael@0: case GREEK_UPPER_UPSILON_DIALYTIKA: michael@0: case GREEK_LOWER_UPSILON_DIALYTIKA: michael@0: case COMBINING_DIAERESIS: michael@0: *aState = kDiaeresis; michael@0: return ToUpperCase(aCh); michael@0: michael@0: // remove accent if it follows a vowel or diaeresis, michael@0: // and set appropriate state for diphthong detection michael@0: case COMBINING_ACUTE_ACCENT: michael@0: case COMBINING_ACUTE_TONE_MARK: michael@0: switch (*aState) { michael@0: case kAlpha: michael@0: *aState = kAlphaAcc; michael@0: return uint32_t(-1); // omit this char from result string michael@0: case kEpsilon: michael@0: *aState = kEpsilonAcc; michael@0: return uint32_t(-1); michael@0: case kEta: michael@0: *aState = kEtaAcc; michael@0: return uint32_t(-1); michael@0: case kIota: michael@0: *aState = kIotaAcc; michael@0: return uint32_t(-1); michael@0: case kOmicron: michael@0: *aState = kOmicronAcc; michael@0: return uint32_t(-1); michael@0: case kUpsilon: michael@0: *aState = kUpsilonAcc; michael@0: return uint32_t(-1); michael@0: case kOmicronUpsilon: michael@0: *aState = kStart; // this completed a diphthong michael@0: return uint32_t(-1); michael@0: case kOmega: michael@0: *aState = kOmegaAcc; michael@0: return uint32_t(-1); michael@0: case kDiaeresis: michael@0: *aState = kStart; michael@0: return uint32_t(-1); michael@0: default: michael@0: break; michael@0: } michael@0: break; michael@0: michael@0: // combinations with dieresis+accent just strip the accent, michael@0: // and reset to start state (don't form diphthong with following vowel) michael@0: case GREEK_LOWER_IOTA_DIALYTIKA_TONOS: michael@0: case GREEK_LOWER_IOTA_DIALYTIKA_OXIA: michael@0: *aState = kStart; michael@0: return GREEK_UPPER_IOTA_DIALYTIKA; michael@0: michael@0: case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS: michael@0: case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA: michael@0: *aState = kStart; michael@0: return GREEK_UPPER_UPSILON_DIALYTIKA; michael@0: michael@0: case COMBINING_GREEK_DIALYTIKA_TONOS: michael@0: *aState = kStart; michael@0: return COMBINING_DIAERESIS; michael@0: michael@0: // strip accents from vowels, and note the vowel seen so that we can detect michael@0: // diphthongs where diaeresis needs to be added michael@0: case GREEK_LOWER_ALPHA_TONOS: michael@0: case GREEK_LOWER_ALPHA_OXIA: michael@0: case GREEK_UPPER_ALPHA_TONOS: michael@0: case GREEK_UPPER_ALPHA_OXIA: michael@0: *aState = kAlphaAcc; michael@0: return GREEK_UPPER_ALPHA; michael@0: michael@0: case GREEK_LOWER_EPSILON_TONOS: michael@0: case GREEK_LOWER_EPSILON_OXIA: michael@0: case GREEK_UPPER_EPSILON_TONOS: michael@0: case GREEK_UPPER_EPSILON_OXIA: michael@0: *aState = kEpsilonAcc; michael@0: return GREEK_UPPER_EPSILON; michael@0: michael@0: case GREEK_LOWER_ETA_TONOS: michael@0: case GREEK_LOWER_ETA_OXIA: michael@0: case GREEK_UPPER_ETA_TONOS: michael@0: case GREEK_UPPER_ETA_OXIA: michael@0: *aState = kEtaAcc; michael@0: return GREEK_UPPER_ETA; michael@0: michael@0: case GREEK_LOWER_IOTA_TONOS: michael@0: case GREEK_LOWER_IOTA_OXIA: michael@0: case GREEK_UPPER_IOTA_TONOS: michael@0: case GREEK_UPPER_IOTA_OXIA: michael@0: *aState = kIotaAcc; michael@0: return GREEK_UPPER_IOTA; michael@0: michael@0: case GREEK_LOWER_OMICRON_TONOS: michael@0: case GREEK_LOWER_OMICRON_OXIA: michael@0: case GREEK_UPPER_OMICRON_TONOS: michael@0: case GREEK_UPPER_OMICRON_OXIA: michael@0: *aState = kOmicronAcc; michael@0: return GREEK_UPPER_OMICRON; michael@0: michael@0: case GREEK_LOWER_UPSILON_TONOS: michael@0: case GREEK_LOWER_UPSILON_OXIA: michael@0: case GREEK_UPPER_UPSILON_TONOS: michael@0: case GREEK_UPPER_UPSILON_OXIA: michael@0: switch (*aState) { michael@0: case kOmicron: michael@0: *aState = kStart; // this completed a diphthong michael@0: break; michael@0: default: michael@0: *aState = kUpsilonAcc; michael@0: break; michael@0: } michael@0: return GREEK_UPPER_UPSILON; michael@0: michael@0: case GREEK_LOWER_OMEGA_TONOS: michael@0: case GREEK_LOWER_OMEGA_OXIA: michael@0: case GREEK_UPPER_OMEGA_TONOS: michael@0: case GREEK_UPPER_OMEGA_OXIA: michael@0: *aState = kOmegaAcc; michael@0: return GREEK_UPPER_OMEGA; michael@0: } michael@0: michael@0: // all other characters just reset the state, and use standard mappings michael@0: *aState = kStart; michael@0: return ToUpperCase(aCh); michael@0: } michael@0: michael@0: nsTransformedTextRun * michael@0: nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams, michael@0: nsTransformingTextRunFactory* aFactory, michael@0: gfxFontGroup* aFontGroup, michael@0: const char16_t* aString, uint32_t aLength, michael@0: const uint32_t aFlags, nsStyleContext** aStyles, michael@0: bool aOwnsFactory) michael@0: { michael@0: NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT), michael@0: "didn't expect text to be marked as 8-bit here"); michael@0: michael@0: void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); michael@0: if (!storage) { michael@0: return nullptr; michael@0: } michael@0: michael@0: return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, michael@0: aString, aLength, michael@0: aFlags, aStyles, aOwnsFactory); michael@0: } michael@0: michael@0: void michael@0: nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, michael@0: bool* aCapitalization, michael@0: gfxContext* aRefContext) michael@0: { michael@0: if (mCapitalize.IsEmpty()) { michael@0: if (!mCapitalize.AppendElements(GetLength())) michael@0: return; michael@0: memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool)); michael@0: } michael@0: memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool)); michael@0: mNeedsRebuild = true; michael@0: } michael@0: michael@0: bool michael@0: nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength, michael@0: uint8_t* aBreakBefore, michael@0: gfxContext* aRefContext) michael@0: { michael@0: bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength, michael@0: aBreakBefore, aRefContext); michael@0: if (changed) { michael@0: mNeedsRebuild = true; michael@0: } michael@0: return changed; michael@0: } michael@0: michael@0: size_t michael@0: nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) michael@0: { michael@0: size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); michael@0: total += mStyles.SizeOfExcludingThis(aMallocSizeOf); michael@0: total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf); michael@0: if (mOwnsFactory) { michael@0: total += aMallocSizeOf(mFactory); michael@0: } michael@0: return total; michael@0: } michael@0: michael@0: size_t michael@0: nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) michael@0: { michael@0: return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); michael@0: } michael@0: michael@0: nsTransformedTextRun* michael@0: nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength, michael@0: const gfxTextRunFactory::Parameters* aParams, michael@0: gfxFontGroup* aFontGroup, uint32_t aFlags, michael@0: nsStyleContext** aStyles, bool aOwnsFactory) michael@0: { michael@0: return nsTransformedTextRun::Create(aParams, this, aFontGroup, michael@0: aString, aLength, aFlags, aStyles, aOwnsFactory); michael@0: } michael@0: michael@0: nsTransformedTextRun* michael@0: nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength, michael@0: const gfxTextRunFactory::Parameters* aParams, michael@0: gfxFontGroup* aFontGroup, uint32_t aFlags, michael@0: nsStyleContext** aStyles, bool aOwnsFactory) michael@0: { michael@0: // We'll only have a Unicode code path to minimize the amount of code needed michael@0: // for these rarely used features michael@0: NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast(aString), aLength); michael@0: return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, michael@0: aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT), michael@0: aStyles, aOwnsFactory); michael@0: } michael@0: michael@0: void michael@0: MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, michael@0: const bool* aCharsToMerge, const bool* aDeletedChars) michael@0: { michael@0: aDest->ResetGlyphRuns(); michael@0: michael@0: gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength()); michael@0: uint32_t offset = 0; michael@0: nsAutoTArray glyphs; michael@0: while (iter.NextRun()) { michael@0: gfxTextRun::GlyphRun* run = iter.GetGlyphRun(); michael@0: nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, michael@0: offset, false); michael@0: if (NS_FAILED(rv)) michael@0: return; michael@0: michael@0: bool anyMissing = false; michael@0: uint32_t mergeRunStart = iter.GetStringStart(); michael@0: const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs(); michael@0: gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; michael@0: uint32_t stringEnd = iter.GetStringEnd(); michael@0: for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) { michael@0: const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; michael@0: if (g.IsSimpleGlyph()) { michael@0: if (!anyMissing) { michael@0: gfxTextRun::DetailedGlyph details; michael@0: details.mGlyphID = g.GetSimpleGlyph(); michael@0: details.mAdvance = g.GetSimpleAdvance(); michael@0: details.mXOffset = 0; michael@0: details.mYOffset = 0; michael@0: glyphs.AppendElement(details); michael@0: } michael@0: } else { michael@0: if (g.IsMissing()) { michael@0: anyMissing = true; michael@0: glyphs.Clear(); michael@0: } michael@0: if (g.GetGlyphCount() > 0) { michael@0: glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); michael@0: } michael@0: } michael@0: michael@0: if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) { michael@0: // next char is supposed to merge with current, so loop without michael@0: // writing current merged glyph to the destination michael@0: continue; michael@0: } michael@0: michael@0: // If the start of the merge run is actually a character that should michael@0: // have been merged with the previous character (this can happen michael@0: // if there's a font change in the middle of a case-mapped character, michael@0: // that decomposed into a sequence of base+diacritics, for example), michael@0: // just discard the entire merge run. See comment at start of this michael@0: // function. michael@0: NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart], michael@0: "unable to merge across a glyph run boundary, " michael@0: "glyph(s) discarded"); michael@0: if (!aCharsToMerge[mergeRunStart]) { michael@0: if (anyMissing) { michael@0: mergedGlyph.SetMissing(glyphs.Length()); michael@0: } else { michael@0: mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), michael@0: mergedGlyph.IsLigatureGroupStart(), michael@0: glyphs.Length()); michael@0: } michael@0: aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements()); michael@0: ++offset; michael@0: michael@0: while (offset < aDest->GetLength() && aDeletedChars[offset]) { michael@0: aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr); michael@0: } michael@0: } michael@0: michael@0: glyphs.Clear(); michael@0: anyMissing = false; michael@0: mergeRunStart = k + 1; michael@0: if (mergeRunStart < stringEnd) { michael@0: mergedGlyph = srcGlyphs[mergeRunStart]; michael@0: } michael@0: } michael@0: NS_ASSERTION(glyphs.Length() == 0, michael@0: "Leftover glyphs, don't request merging of the last character with its next!"); michael@0: } michael@0: NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); michael@0: } michael@0: michael@0: gfxTextRunFactory::Parameters michael@0: GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags, michael@0: gfxContext* aRefContext) michael@0: { michael@0: gfxTextRunFactory::Parameters params = michael@0: { aRefContext, nullptr, nullptr, michael@0: nullptr, 0, aTextRun->GetAppUnitsPerDevUnit() michael@0: }; michael@0: *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT; michael@0: return params; michael@0: } michael@0: michael@0: void michael@0: nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, michael@0: gfxContext* aRefContext) michael@0: { michael@0: gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); michael@0: gfxFontStyle fontStyle = *fontGroup->GetStyle(); michael@0: fontStyle.size *= 0.8; michael@0: nsRefPtr smallFont = fontGroup->Copy(&fontStyle); michael@0: if (!smallFont) michael@0: return; michael@0: michael@0: uint32_t flags; michael@0: gfxTextRunFactory::Parameters innerParams = michael@0: GetParametersForInner(aTextRun, &flags, aRefContext); michael@0: michael@0: uint32_t length = aTextRun->GetLength(); michael@0: const char16_t* str = aTextRun->mString.BeginReading(); michael@0: nsRefPtr* styles = aTextRun->mStyles.Elements(); michael@0: // Create a textrun so we can check cluster-start properties michael@0: nsAutoPtr inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); michael@0: if (!inner.get()) michael@0: return; michael@0: michael@0: nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true); michael@0: michael@0: aTextRun->ResetGlyphRuns(); michael@0: michael@0: uint32_t runStart = 0; michael@0: nsAutoTArray styleArray; michael@0: nsAutoTArray canBreakBeforeArray; michael@0: michael@0: enum RunCaseState { michael@0: kUpperOrCaseless, // will be untouched by font-variant:small-caps michael@0: kLowercase, // will be uppercased and reduced michael@0: kSpecialUpper // specials: don't shrink, but apply uppercase mapping michael@0: }; michael@0: RunCaseState runCase = kUpperOrCaseless; michael@0: michael@0: // Note that this loop runs from 0 to length *inclusive*, so the last michael@0: // iteration is in effect beyond the end of the input text, to give a michael@0: // chance to finish the last casing run we've found. michael@0: // The last iteration, when i==length, must not attempt to look at the michael@0: // character position [i] or the style data for styles[i], as this would michael@0: // be beyond the valid length of the textrun or its style array. michael@0: for (uint32_t i = 0; i <= length; ++i) { michael@0: RunCaseState chCase = kUpperOrCaseless; michael@0: // Unless we're at the end, figure out what treatment the current michael@0: // character will need. michael@0: if (i < length) { michael@0: nsStyleContext* styleContext = styles[i]; michael@0: // Characters that aren't the start of a cluster are ignored here. They michael@0: // get added to whatever lowercase/non-lowercase run we're in. michael@0: if (!inner->IsClusterStart(i)) { michael@0: chCase = runCase; michael@0: } else { michael@0: if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { michael@0: uint32_t ch = str[i]; michael@0: if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { michael@0: ch = SURROGATE_TO_UCS4(ch, str[i + 1]); michael@0: } michael@0: uint32_t ch2 = ToUpperCase(ch); michael@0: if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) { michael@0: chCase = kLowercase; michael@0: } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) { michael@0: // In Greek, check for characters that will be modified by the michael@0: // GreekUpperCase mapping - this catches accented capitals where michael@0: // the accent is to be removed (bug 307039). These are handled by michael@0: // a transformed child run using the full-size font. michael@0: GreekCasingState state = kStart; // don't need exact context here michael@0: ch2 = GreekUpperCase(ch, &state); michael@0: if (ch != ch2) { michael@0: chCase = kSpecialUpper; michael@0: } michael@0: } michael@0: } else { michael@0: // Don't transform the character! I.e., pretend that it's not lowercase michael@0: } michael@0: } michael@0: } michael@0: michael@0: // At the end of the text, or when the current character needs different michael@0: // casing treatment from the current run, finish the run-in-progress michael@0: // and prepare to accumulate a new run. michael@0: // Note that we do not look at any source data for offset [i] here, michael@0: // as that would be invalid in the case where i==length. michael@0: if ((i == length || runCase != chCase) && runStart < i) { michael@0: nsAutoPtr transformedChild; michael@0: nsAutoPtr cachedChild; michael@0: gfxTextRun* child; michael@0: michael@0: switch (runCase) { michael@0: case kUpperOrCaseless: michael@0: cachedChild = michael@0: fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, michael@0: flags); michael@0: child = cachedChild.get(); michael@0: break; michael@0: case kLowercase: michael@0: transformedChild = michael@0: uppercaseFactory.MakeTextRun(str + runStart, i - runStart, michael@0: &innerParams, smallFont, flags, michael@0: styleArray.Elements(), false); michael@0: child = transformedChild; michael@0: break; michael@0: case kSpecialUpper: michael@0: transformedChild = michael@0: uppercaseFactory.MakeTextRun(str + runStart, i - runStart, michael@0: &innerParams, fontGroup, flags, michael@0: styleArray.Elements(), false); michael@0: child = transformedChild; michael@0: break; michael@0: } michael@0: if (!child) michael@0: return; michael@0: // Copy potential linebreaks into child so they're preserved michael@0: // (and also child will be shaped appropriately) michael@0: NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, michael@0: "lost some break-before values?"); michael@0: child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), michael@0: canBreakBeforeArray.Elements(), aRefContext); michael@0: if (transformedChild) { michael@0: transformedChild->FinishSettingProperties(aRefContext); michael@0: } michael@0: aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); michael@0: michael@0: runStart = i; michael@0: styleArray.Clear(); michael@0: canBreakBeforeArray.Clear(); michael@0: } michael@0: michael@0: if (i < length) { michael@0: runCase = chCase; michael@0: styleArray.AppendElement(styles[i]); michael@0: canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, michael@0: gfxContext* aRefContext) michael@0: { michael@0: uint32_t length = aTextRun->GetLength(); michael@0: const char16_t* str = aTextRun->mString.BeginReading(); michael@0: nsRefPtr* styles = aTextRun->mStyles.Elements(); michael@0: michael@0: nsAutoString convertedString; michael@0: nsAutoTArray charsToMergeArray; michael@0: nsAutoTArray deletedCharsArray; michael@0: nsAutoTArray styleArray; michael@0: nsAutoTArray canBreakBeforeArray; michael@0: bool mergeNeeded = false; michael@0: michael@0: // Some languages have special casing conventions that differ from the michael@0: // default Unicode mappings. michael@0: // The enum values here are named for well-known exemplar languages that michael@0: // exhibit the behavior in question; multiple lang tags may map to the michael@0: // same setting here, if the behavior is shared by other languages. michael@0: enum { michael@0: eNone, // default non-lang-specific behavior michael@0: eTurkish, // preserve dotted/dotless-i distinction in uppercase michael@0: eDutch, // treat "ij" digraph as a unit for capitalization michael@0: eGreek // strip accent when uppercasing Greek vowels michael@0: } languageSpecificCasing = eNone; michael@0: michael@0: const nsIAtom* lang = nullptr; michael@0: bool capitalizeDutchIJ = false; michael@0: bool prevIsLetter = false; michael@0: uint32_t sigmaIndex = uint32_t(-1); michael@0: nsIUGenCategory::nsUGenCategory cat; michael@0: GreekCasingState greekState = kStart; michael@0: uint32_t i; michael@0: for (i = 0; i < length; ++i) { michael@0: uint32_t ch = str[i]; michael@0: nsStyleContext* styleContext = styles[i]; michael@0: michael@0: uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE michael@0: : styleContext->StyleText()->mTextTransform; michael@0: int extraChars = 0; michael@0: const mozilla::unicode::MultiCharMapping *mcm; michael@0: michael@0: if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { michael@0: ch = SURROGATE_TO_UCS4(ch, str[i + 1]); michael@0: } michael@0: michael@0: if (lang != styleContext->StyleFont()->mLanguage) { michael@0: lang = styleContext->StyleFont()->mLanguage; michael@0: if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || michael@0: lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || michael@0: lang == nsGkAtoms::tt) { michael@0: languageSpecificCasing = eTurkish; michael@0: } else if (lang == nsGkAtoms::nl) { michael@0: languageSpecificCasing = eDutch; michael@0: } else if (lang == nsGkAtoms::el) { michael@0: languageSpecificCasing = eGreek; michael@0: greekState = kStart; michael@0: } else { michael@0: languageSpecificCasing = eNone; michael@0: } michael@0: } michael@0: michael@0: switch (style) { michael@0: case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: michael@0: if (languageSpecificCasing == eTurkish) { michael@0: if (ch == 'I') { michael@0: ch = LATIN_SMALL_LETTER_DOTLESS_I; michael@0: prevIsLetter = true; michael@0: sigmaIndex = uint32_t(-1); michael@0: break; michael@0: } michael@0: if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { michael@0: ch = 'i'; michael@0: prevIsLetter = true; michael@0: sigmaIndex = uint32_t(-1); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: // Special lowercasing behavior for Greek Sigma: note that this is listed michael@0: // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a michael@0: // language-specific mapping; it applies regardless of the language of michael@0: // the element. michael@0: // michael@0: // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. michael@0: // the non-final form) whenever there is a following letter, or when the michael@0: // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a michael@0: // LETTER); and to FINAL SIGMA when it is preceded by another letter but michael@0: // not followed by one. michael@0: // michael@0: // To implement the context-sensitive nature of this mapping, we keep michael@0: // track of whether the previous character was a letter. If not, CAPITAL michael@0: // SIGMA will map directly to SMALL SIGMA. If the previous character michael@0: // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the michael@0: // position in the converted string; if we then encounter another letter, michael@0: // that FINAL SIGMA is replaced with a standard SMALL SIGMA. michael@0: michael@0: cat = mozilla::unicode::GetGenCategory(ch); michael@0: michael@0: // If sigmaIndex is not -1, it marks where we have provisionally mapped michael@0: // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we michael@0: // need to change it to SMALL SIGMA. michael@0: if (sigmaIndex != uint32_t(-1)) { michael@0: if (cat == nsIUGenCategory::kLetter) { michael@0: convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); michael@0: } michael@0: } michael@0: michael@0: if (ch == GREEK_CAPITAL_LETTER_SIGMA) { michael@0: // If preceding char was a letter, map to FINAL instead of SMALL, michael@0: // and note where it occurred by setting sigmaIndex; we'll change it michael@0: // to standard SMALL SIGMA later if another letter follows michael@0: if (prevIsLetter) { michael@0: ch = GREEK_SMALL_LETTER_FINAL_SIGMA; michael@0: sigmaIndex = convertedString.Length(); michael@0: } else { michael@0: // CAPITAL SIGMA not preceded by a letter is unconditionally mapped michael@0: // to SMALL SIGMA michael@0: ch = GREEK_SMALL_LETTER_SIGMA; michael@0: sigmaIndex = uint32_t(-1); michael@0: } michael@0: prevIsLetter = true; michael@0: break; michael@0: } michael@0: michael@0: // ignore diacritics for the purpose of contextual sigma mapping; michael@0: // otherwise, reset prevIsLetter appropriately and clear the michael@0: // sigmaIndex marker michael@0: if (cat != nsIUGenCategory::kMark) { michael@0: prevIsLetter = (cat == nsIUGenCategory::kLetter); michael@0: sigmaIndex = uint32_t(-1); michael@0: } michael@0: michael@0: mcm = mozilla::unicode::SpecialLower(ch); michael@0: if (mcm) { michael@0: int j = 0; michael@0: while (j < 2 && mcm->mMappedChars[j + 1]) { michael@0: convertedString.Append(mcm->mMappedChars[j]); michael@0: ++extraChars; michael@0: ++j; michael@0: } michael@0: ch = mcm->mMappedChars[j]; michael@0: break; michael@0: } michael@0: michael@0: ch = ToLowerCase(ch); michael@0: break; michael@0: michael@0: case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: michael@0: if (languageSpecificCasing == eTurkish && ch == 'i') { michael@0: ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; michael@0: break; michael@0: } michael@0: michael@0: if (languageSpecificCasing == eGreek) { michael@0: ch = GreekUpperCase(ch, &greekState); michael@0: break; michael@0: } michael@0: michael@0: mcm = mozilla::unicode::SpecialUpper(ch); michael@0: if (mcm) { michael@0: int j = 0; michael@0: while (j < 2 && mcm->mMappedChars[j + 1]) { michael@0: convertedString.Append(mcm->mMappedChars[j]); michael@0: ++extraChars; michael@0: ++j; michael@0: } michael@0: ch = mcm->mMappedChars[j]; michael@0: break; michael@0: } michael@0: michael@0: ch = ToUpperCase(ch); michael@0: break; michael@0: michael@0: case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: michael@0: if (capitalizeDutchIJ && ch == 'j') { michael@0: ch = 'J'; michael@0: capitalizeDutchIJ = false; michael@0: break; michael@0: } michael@0: capitalizeDutchIJ = false; michael@0: if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { michael@0: if (languageSpecificCasing == eTurkish && ch == 'i') { michael@0: ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; michael@0: break; michael@0: } michael@0: if (languageSpecificCasing == eDutch && ch == 'i') { michael@0: ch = 'I'; michael@0: capitalizeDutchIJ = true; michael@0: break; michael@0: } michael@0: michael@0: mcm = mozilla::unicode::SpecialTitle(ch); michael@0: if (mcm) { michael@0: int j = 0; michael@0: while (j < 2 && mcm->mMappedChars[j + 1]) { michael@0: convertedString.Append(mcm->mMappedChars[j]); michael@0: ++extraChars; michael@0: ++j; michael@0: } michael@0: ch = mcm->mMappedChars[j]; michael@0: break; michael@0: } michael@0: michael@0: ch = ToTitleCase(ch); michael@0: } michael@0: break; michael@0: michael@0: case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: michael@0: ch = mozilla::unicode::GetFullWidth(ch); michael@0: break; michael@0: michael@0: default: michael@0: break; michael@0: } michael@0: michael@0: if (ch == uint32_t(-1)) { michael@0: deletedCharsArray.AppendElement(true); michael@0: mergeNeeded = true; michael@0: } else { michael@0: deletedCharsArray.AppendElement(false); michael@0: charsToMergeArray.AppendElement(false); michael@0: styleArray.AppendElement(styleContext); michael@0: canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); michael@0: michael@0: if (IS_IN_BMP(ch)) { michael@0: convertedString.Append(ch); michael@0: } else { michael@0: convertedString.Append(H_SURROGATE(ch)); michael@0: convertedString.Append(L_SURROGATE(ch)); michael@0: ++i; michael@0: deletedCharsArray.AppendElement(true); // not exactly deleted, but the michael@0: // trailing surrogate is skipped michael@0: ++extraChars; michael@0: } michael@0: michael@0: while (extraChars-- > 0) { michael@0: mergeNeeded = true; michael@0: charsToMergeArray.AppendElement(true); michael@0: styleArray.AppendElement(styleContext); michael@0: canBreakBeforeArray.AppendElement(false); michael@0: } michael@0: } michael@0: } michael@0: michael@0: uint32_t flags; michael@0: gfxTextRunFactory::Parameters innerParams = michael@0: GetParametersForInner(aTextRun, &flags, aRefContext); michael@0: gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); michael@0: michael@0: nsAutoPtr transformedChild; michael@0: nsAutoPtr cachedChild; michael@0: gfxTextRun* child; michael@0: michael@0: if (mInnerTransformingTextRunFactory) { michael@0: transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( michael@0: convertedString.BeginReading(), convertedString.Length(), michael@0: &innerParams, fontGroup, flags, styleArray.Elements(), false); michael@0: child = transformedChild.get(); michael@0: } else { michael@0: cachedChild = fontGroup->MakeTextRun( michael@0: convertedString.BeginReading(), convertedString.Length(), michael@0: &innerParams, flags); michael@0: child = cachedChild.get(); michael@0: } michael@0: if (!child) michael@0: return; michael@0: // Copy potential linebreaks into child so they're preserved michael@0: // (and also child will be shaped appropriately) michael@0: NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), michael@0: "Dropped characters or break-before values somewhere!"); michael@0: child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), michael@0: canBreakBeforeArray.Elements(), aRefContext); michael@0: if (transformedChild) { michael@0: transformedChild->FinishSettingProperties(aRefContext); michael@0: } michael@0: michael@0: if (mergeNeeded) { michael@0: // Now merge multiple characters into one multi-glyph character as required michael@0: // and deal with skipping deleted accent chars michael@0: NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), michael@0: "source length mismatch"); michael@0: NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), michael@0: "destination length mismatch"); michael@0: MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), michael@0: deletedCharsArray.Elements()); michael@0: } else { michael@0: // No merging to do, so just copy; this produces a more optimized textrun. michael@0: // We can't steal the data because the child may be cached and stealing michael@0: // the data would break the cache. michael@0: aTextRun->ResetGlyphRuns(); michael@0: aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); michael@0: } michael@0: }