1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/layout/generic/nsTextRunTransformations.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,933 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- 1.5 + * This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsTextRunTransformations.h" 1.10 + 1.11 +#include "mozilla/MemoryReporting.h" 1.12 + 1.13 +#include "nsGkAtoms.h" 1.14 +#include "nsStyleConsts.h" 1.15 +#include "nsStyleContext.h" 1.16 +#include "nsUnicodeProperties.h" 1.17 +#include "nsSpecialCasingData.h" 1.18 +#include "mozilla/gfx/2D.h" 1.19 +#include "nsTextFrameUtils.h" 1.20 +#include "nsIPersistentProperties2.h" 1.21 +#include "nsNetUtil.h" 1.22 + 1.23 +// Unicode characters needing special casing treatment in tr/az languages 1.24 +#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 1.25 +#define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 1.26 + 1.27 +// Greek sigma needs custom handling for the lowercase transform; for details 1.28 +// see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within 1.29 +// nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120. 1.30 +#define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 1.31 +#define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 1.32 +#define GREEK_SMALL_LETTER_SIGMA 0x03C3 1.33 + 1.34 +// Custom uppercase mapping for Greek; see bug 307039 for details 1.35 +#define GREEK_LOWER_ALPHA 0x03B1 1.36 +#define GREEK_LOWER_ALPHA_TONOS 0x03AC 1.37 +#define GREEK_LOWER_ALPHA_OXIA 0x1F71 1.38 +#define GREEK_LOWER_EPSILON 0x03B5 1.39 +#define GREEK_LOWER_EPSILON_TONOS 0x03AD 1.40 +#define GREEK_LOWER_EPSILON_OXIA 0x1F73 1.41 +#define GREEK_LOWER_ETA 0x03B7 1.42 +#define GREEK_LOWER_ETA_TONOS 0x03AE 1.43 +#define GREEK_LOWER_ETA_OXIA 0x1F75 1.44 +#define GREEK_LOWER_IOTA 0x03B9 1.45 +#define GREEK_LOWER_IOTA_TONOS 0x03AF 1.46 +#define GREEK_LOWER_IOTA_OXIA 0x1F77 1.47 +#define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA 1.48 +#define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390 1.49 +#define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3 1.50 +#define GREEK_LOWER_OMICRON 0x03BF 1.51 +#define GREEK_LOWER_OMICRON_TONOS 0x03CC 1.52 +#define GREEK_LOWER_OMICRON_OXIA 0x1F79 1.53 +#define GREEK_LOWER_UPSILON 0x03C5 1.54 +#define GREEK_LOWER_UPSILON_TONOS 0x03CD 1.55 +#define GREEK_LOWER_UPSILON_OXIA 0x1F7B 1.56 +#define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB 1.57 +#define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0 1.58 +#define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3 1.59 +#define GREEK_LOWER_OMEGA 0x03C9 1.60 +#define GREEK_LOWER_OMEGA_TONOS 0x03CE 1.61 +#define GREEK_LOWER_OMEGA_OXIA 0x1F7D 1.62 +#define GREEK_UPPER_ALPHA 0x0391 1.63 +#define GREEK_UPPER_EPSILON 0x0395 1.64 +#define GREEK_UPPER_ETA 0x0397 1.65 +#define GREEK_UPPER_IOTA 0x0399 1.66 +#define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA 1.67 +#define GREEK_UPPER_OMICRON 0x039F 1.68 +#define GREEK_UPPER_UPSILON 0x03A5 1.69 +#define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB 1.70 +#define GREEK_UPPER_OMEGA 0x03A9 1.71 +#define GREEK_UPPER_ALPHA_TONOS 0x0386 1.72 +#define GREEK_UPPER_ALPHA_OXIA 0x1FBB 1.73 +#define GREEK_UPPER_EPSILON_TONOS 0x0388 1.74 +#define GREEK_UPPER_EPSILON_OXIA 0x1FC9 1.75 +#define GREEK_UPPER_ETA_TONOS 0x0389 1.76 +#define GREEK_UPPER_ETA_OXIA 0x1FCB 1.77 +#define GREEK_UPPER_IOTA_TONOS 0x038A 1.78 +#define GREEK_UPPER_IOTA_OXIA 0x1FDB 1.79 +#define GREEK_UPPER_OMICRON_TONOS 0x038C 1.80 +#define GREEK_UPPER_OMICRON_OXIA 0x1FF9 1.81 +#define GREEK_UPPER_UPSILON_TONOS 0x038E 1.82 +#define GREEK_UPPER_UPSILON_OXIA 0x1FEB 1.83 +#define GREEK_UPPER_OMEGA_TONOS 0x038F 1.84 +#define GREEK_UPPER_OMEGA_OXIA 0x1FFB 1.85 +#define COMBINING_ACUTE_ACCENT 0x0301 1.86 +#define COMBINING_DIAERESIS 0x0308 1.87 +#define COMBINING_ACUTE_TONE_MARK 0x0341 1.88 +#define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344 1.89 + 1.90 +// When doing an Uppercase transform in Greek, we need to keep track of the 1.91 +// current state while iterating through the string, to recognize and process 1.92 +// diphthongs correctly. For clarity, we define a state for each vowel and 1.93 +// each vowel with accent, although a few of these do not actually need any 1.94 +// special treatment and could be folded into kStart. 1.95 +enum GreekCasingState { 1.96 + kStart, 1.97 + kAlpha, 1.98 + kEpsilon, 1.99 + kEta, 1.100 + kIota, 1.101 + kOmicron, 1.102 + kUpsilon, 1.103 + kOmega, 1.104 + kAlphaAcc, 1.105 + kEpsilonAcc, 1.106 + kEtaAcc, 1.107 + kIotaAcc, 1.108 + kOmicronAcc, 1.109 + kUpsilonAcc, 1.110 + kOmegaAcc, 1.111 + kOmicronUpsilon, 1.112 + kDiaeresis 1.113 +}; 1.114 + 1.115 +static uint32_t 1.116 +GreekUpperCase(uint32_t aCh, GreekCasingState* aState) 1.117 +{ 1.118 + switch (aCh) { 1.119 + case GREEK_UPPER_ALPHA: 1.120 + case GREEK_LOWER_ALPHA: 1.121 + *aState = kAlpha; 1.122 + return GREEK_UPPER_ALPHA; 1.123 + 1.124 + case GREEK_UPPER_EPSILON: 1.125 + case GREEK_LOWER_EPSILON: 1.126 + *aState = kEpsilon; 1.127 + return GREEK_UPPER_EPSILON; 1.128 + 1.129 + case GREEK_UPPER_ETA: 1.130 + case GREEK_LOWER_ETA: 1.131 + *aState = kEta; 1.132 + return GREEK_UPPER_ETA; 1.133 + 1.134 + case GREEK_UPPER_IOTA: 1.135 + *aState = kIota; 1.136 + return GREEK_UPPER_IOTA; 1.137 + 1.138 + case GREEK_UPPER_OMICRON: 1.139 + case GREEK_LOWER_OMICRON: 1.140 + *aState = kOmicron; 1.141 + return GREEK_UPPER_OMICRON; 1.142 + 1.143 + case GREEK_UPPER_UPSILON: 1.144 + switch (*aState) { 1.145 + case kOmicron: 1.146 + *aState = kOmicronUpsilon; 1.147 + break; 1.148 + default: 1.149 + *aState = kUpsilon; 1.150 + break; 1.151 + } 1.152 + return GREEK_UPPER_UPSILON; 1.153 + 1.154 + case GREEK_UPPER_OMEGA: 1.155 + case GREEK_LOWER_OMEGA: 1.156 + *aState = kOmega; 1.157 + return GREEK_UPPER_OMEGA; 1.158 + 1.159 + // iota and upsilon may be the second vowel of a diphthong 1.160 + case GREEK_LOWER_IOTA: 1.161 + switch (*aState) { 1.162 + case kAlphaAcc: 1.163 + case kEpsilonAcc: 1.164 + case kOmicronAcc: 1.165 + case kUpsilonAcc: 1.166 + *aState = kStart; 1.167 + return GREEK_UPPER_IOTA_DIALYTIKA; 1.168 + default: 1.169 + break; 1.170 + } 1.171 + *aState = kIota; 1.172 + return GREEK_UPPER_IOTA; 1.173 + 1.174 + case GREEK_LOWER_UPSILON: 1.175 + switch (*aState) { 1.176 + case kAlphaAcc: 1.177 + case kEpsilonAcc: 1.178 + case kEtaAcc: 1.179 + case kOmicronAcc: 1.180 + *aState = kStart; 1.181 + return GREEK_UPPER_UPSILON_DIALYTIKA; 1.182 + case kOmicron: 1.183 + *aState = kOmicronUpsilon; 1.184 + break; 1.185 + default: 1.186 + *aState = kUpsilon; 1.187 + break; 1.188 + } 1.189 + return GREEK_UPPER_UPSILON; 1.190 + 1.191 + case GREEK_UPPER_IOTA_DIALYTIKA: 1.192 + case GREEK_LOWER_IOTA_DIALYTIKA: 1.193 + case GREEK_UPPER_UPSILON_DIALYTIKA: 1.194 + case GREEK_LOWER_UPSILON_DIALYTIKA: 1.195 + case COMBINING_DIAERESIS: 1.196 + *aState = kDiaeresis; 1.197 + return ToUpperCase(aCh); 1.198 + 1.199 + // remove accent if it follows a vowel or diaeresis, 1.200 + // and set appropriate state for diphthong detection 1.201 + case COMBINING_ACUTE_ACCENT: 1.202 + case COMBINING_ACUTE_TONE_MARK: 1.203 + switch (*aState) { 1.204 + case kAlpha: 1.205 + *aState = kAlphaAcc; 1.206 + return uint32_t(-1); // omit this char from result string 1.207 + case kEpsilon: 1.208 + *aState = kEpsilonAcc; 1.209 + return uint32_t(-1); 1.210 + case kEta: 1.211 + *aState = kEtaAcc; 1.212 + return uint32_t(-1); 1.213 + case kIota: 1.214 + *aState = kIotaAcc; 1.215 + return uint32_t(-1); 1.216 + case kOmicron: 1.217 + *aState = kOmicronAcc; 1.218 + return uint32_t(-1); 1.219 + case kUpsilon: 1.220 + *aState = kUpsilonAcc; 1.221 + return uint32_t(-1); 1.222 + case kOmicronUpsilon: 1.223 + *aState = kStart; // this completed a diphthong 1.224 + return uint32_t(-1); 1.225 + case kOmega: 1.226 + *aState = kOmegaAcc; 1.227 + return uint32_t(-1); 1.228 + case kDiaeresis: 1.229 + *aState = kStart; 1.230 + return uint32_t(-1); 1.231 + default: 1.232 + break; 1.233 + } 1.234 + break; 1.235 + 1.236 + // combinations with dieresis+accent just strip the accent, 1.237 + // and reset to start state (don't form diphthong with following vowel) 1.238 + case GREEK_LOWER_IOTA_DIALYTIKA_TONOS: 1.239 + case GREEK_LOWER_IOTA_DIALYTIKA_OXIA: 1.240 + *aState = kStart; 1.241 + return GREEK_UPPER_IOTA_DIALYTIKA; 1.242 + 1.243 + case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS: 1.244 + case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA: 1.245 + *aState = kStart; 1.246 + return GREEK_UPPER_UPSILON_DIALYTIKA; 1.247 + 1.248 + case COMBINING_GREEK_DIALYTIKA_TONOS: 1.249 + *aState = kStart; 1.250 + return COMBINING_DIAERESIS; 1.251 + 1.252 + // strip accents from vowels, and note the vowel seen so that we can detect 1.253 + // diphthongs where diaeresis needs to be added 1.254 + case GREEK_LOWER_ALPHA_TONOS: 1.255 + case GREEK_LOWER_ALPHA_OXIA: 1.256 + case GREEK_UPPER_ALPHA_TONOS: 1.257 + case GREEK_UPPER_ALPHA_OXIA: 1.258 + *aState = kAlphaAcc; 1.259 + return GREEK_UPPER_ALPHA; 1.260 + 1.261 + case GREEK_LOWER_EPSILON_TONOS: 1.262 + case GREEK_LOWER_EPSILON_OXIA: 1.263 + case GREEK_UPPER_EPSILON_TONOS: 1.264 + case GREEK_UPPER_EPSILON_OXIA: 1.265 + *aState = kEpsilonAcc; 1.266 + return GREEK_UPPER_EPSILON; 1.267 + 1.268 + case GREEK_LOWER_ETA_TONOS: 1.269 + case GREEK_LOWER_ETA_OXIA: 1.270 + case GREEK_UPPER_ETA_TONOS: 1.271 + case GREEK_UPPER_ETA_OXIA: 1.272 + *aState = kEtaAcc; 1.273 + return GREEK_UPPER_ETA; 1.274 + 1.275 + case GREEK_LOWER_IOTA_TONOS: 1.276 + case GREEK_LOWER_IOTA_OXIA: 1.277 + case GREEK_UPPER_IOTA_TONOS: 1.278 + case GREEK_UPPER_IOTA_OXIA: 1.279 + *aState = kIotaAcc; 1.280 + return GREEK_UPPER_IOTA; 1.281 + 1.282 + case GREEK_LOWER_OMICRON_TONOS: 1.283 + case GREEK_LOWER_OMICRON_OXIA: 1.284 + case GREEK_UPPER_OMICRON_TONOS: 1.285 + case GREEK_UPPER_OMICRON_OXIA: 1.286 + *aState = kOmicronAcc; 1.287 + return GREEK_UPPER_OMICRON; 1.288 + 1.289 + case GREEK_LOWER_UPSILON_TONOS: 1.290 + case GREEK_LOWER_UPSILON_OXIA: 1.291 + case GREEK_UPPER_UPSILON_TONOS: 1.292 + case GREEK_UPPER_UPSILON_OXIA: 1.293 + switch (*aState) { 1.294 + case kOmicron: 1.295 + *aState = kStart; // this completed a diphthong 1.296 + break; 1.297 + default: 1.298 + *aState = kUpsilonAcc; 1.299 + break; 1.300 + } 1.301 + return GREEK_UPPER_UPSILON; 1.302 + 1.303 + case GREEK_LOWER_OMEGA_TONOS: 1.304 + case GREEK_LOWER_OMEGA_OXIA: 1.305 + case GREEK_UPPER_OMEGA_TONOS: 1.306 + case GREEK_UPPER_OMEGA_OXIA: 1.307 + *aState = kOmegaAcc; 1.308 + return GREEK_UPPER_OMEGA; 1.309 + } 1.310 + 1.311 + // all other characters just reset the state, and use standard mappings 1.312 + *aState = kStart; 1.313 + return ToUpperCase(aCh); 1.314 +} 1.315 + 1.316 +nsTransformedTextRun * 1.317 +nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams, 1.318 + nsTransformingTextRunFactory* aFactory, 1.319 + gfxFontGroup* aFontGroup, 1.320 + const char16_t* aString, uint32_t aLength, 1.321 + const uint32_t aFlags, nsStyleContext** aStyles, 1.322 + bool aOwnsFactory) 1.323 +{ 1.324 + NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT), 1.325 + "didn't expect text to be marked as 8-bit here"); 1.326 + 1.327 + void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); 1.328 + if (!storage) { 1.329 + return nullptr; 1.330 + } 1.331 + 1.332 + return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, 1.333 + aString, aLength, 1.334 + aFlags, aStyles, aOwnsFactory); 1.335 +} 1.336 + 1.337 +void 1.338 +nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, 1.339 + bool* aCapitalization, 1.340 + gfxContext* aRefContext) 1.341 +{ 1.342 + if (mCapitalize.IsEmpty()) { 1.343 + if (!mCapitalize.AppendElements(GetLength())) 1.344 + return; 1.345 + memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool)); 1.346 + } 1.347 + memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool)); 1.348 + mNeedsRebuild = true; 1.349 +} 1.350 + 1.351 +bool 1.352 +nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength, 1.353 + uint8_t* aBreakBefore, 1.354 + gfxContext* aRefContext) 1.355 +{ 1.356 + bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength, 1.357 + aBreakBefore, aRefContext); 1.358 + if (changed) { 1.359 + mNeedsRebuild = true; 1.360 + } 1.361 + return changed; 1.362 +} 1.363 + 1.364 +size_t 1.365 +nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) 1.366 +{ 1.367 + size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); 1.368 + total += mStyles.SizeOfExcludingThis(aMallocSizeOf); 1.369 + total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf); 1.370 + if (mOwnsFactory) { 1.371 + total += aMallocSizeOf(mFactory); 1.372 + } 1.373 + return total; 1.374 +} 1.375 + 1.376 +size_t 1.377 +nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) 1.378 +{ 1.379 + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 1.380 +} 1.381 + 1.382 +nsTransformedTextRun* 1.383 +nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength, 1.384 + const gfxTextRunFactory::Parameters* aParams, 1.385 + gfxFontGroup* aFontGroup, uint32_t aFlags, 1.386 + nsStyleContext** aStyles, bool aOwnsFactory) 1.387 +{ 1.388 + return nsTransformedTextRun::Create(aParams, this, aFontGroup, 1.389 + aString, aLength, aFlags, aStyles, aOwnsFactory); 1.390 +} 1.391 + 1.392 +nsTransformedTextRun* 1.393 +nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength, 1.394 + const gfxTextRunFactory::Parameters* aParams, 1.395 + gfxFontGroup* aFontGroup, uint32_t aFlags, 1.396 + nsStyleContext** aStyles, bool aOwnsFactory) 1.397 +{ 1.398 + // We'll only have a Unicode code path to minimize the amount of code needed 1.399 + // for these rarely used features 1.400 + NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength); 1.401 + return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, 1.402 + aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT), 1.403 + aStyles, aOwnsFactory); 1.404 +} 1.405 + 1.406 +void 1.407 +MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, 1.408 + const bool* aCharsToMerge, const bool* aDeletedChars) 1.409 +{ 1.410 + aDest->ResetGlyphRuns(); 1.411 + 1.412 + gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength()); 1.413 + uint32_t offset = 0; 1.414 + nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs; 1.415 + while (iter.NextRun()) { 1.416 + gfxTextRun::GlyphRun* run = iter.GetGlyphRun(); 1.417 + nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, 1.418 + offset, false); 1.419 + if (NS_FAILED(rv)) 1.420 + return; 1.421 + 1.422 + bool anyMissing = false; 1.423 + uint32_t mergeRunStart = iter.GetStringStart(); 1.424 + const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs(); 1.425 + gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; 1.426 + uint32_t stringEnd = iter.GetStringEnd(); 1.427 + for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) { 1.428 + const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; 1.429 + if (g.IsSimpleGlyph()) { 1.430 + if (!anyMissing) { 1.431 + gfxTextRun::DetailedGlyph details; 1.432 + details.mGlyphID = g.GetSimpleGlyph(); 1.433 + details.mAdvance = g.GetSimpleAdvance(); 1.434 + details.mXOffset = 0; 1.435 + details.mYOffset = 0; 1.436 + glyphs.AppendElement(details); 1.437 + } 1.438 + } else { 1.439 + if (g.IsMissing()) { 1.440 + anyMissing = true; 1.441 + glyphs.Clear(); 1.442 + } 1.443 + if (g.GetGlyphCount() > 0) { 1.444 + glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); 1.445 + } 1.446 + } 1.447 + 1.448 + if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) { 1.449 + // next char is supposed to merge with current, so loop without 1.450 + // writing current merged glyph to the destination 1.451 + continue; 1.452 + } 1.453 + 1.454 + // If the start of the merge run is actually a character that should 1.455 + // have been merged with the previous character (this can happen 1.456 + // if there's a font change in the middle of a case-mapped character, 1.457 + // that decomposed into a sequence of base+diacritics, for example), 1.458 + // just discard the entire merge run. See comment at start of this 1.459 + // function. 1.460 + NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart], 1.461 + "unable to merge across a glyph run boundary, " 1.462 + "glyph(s) discarded"); 1.463 + if (!aCharsToMerge[mergeRunStart]) { 1.464 + if (anyMissing) { 1.465 + mergedGlyph.SetMissing(glyphs.Length()); 1.466 + } else { 1.467 + mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), 1.468 + mergedGlyph.IsLigatureGroupStart(), 1.469 + glyphs.Length()); 1.470 + } 1.471 + aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements()); 1.472 + ++offset; 1.473 + 1.474 + while (offset < aDest->GetLength() && aDeletedChars[offset]) { 1.475 + aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr); 1.476 + } 1.477 + } 1.478 + 1.479 + glyphs.Clear(); 1.480 + anyMissing = false; 1.481 + mergeRunStart = k + 1; 1.482 + if (mergeRunStart < stringEnd) { 1.483 + mergedGlyph = srcGlyphs[mergeRunStart]; 1.484 + } 1.485 + } 1.486 + NS_ASSERTION(glyphs.Length() == 0, 1.487 + "Leftover glyphs, don't request merging of the last character with its next!"); 1.488 + } 1.489 + NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); 1.490 +} 1.491 + 1.492 +gfxTextRunFactory::Parameters 1.493 +GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags, 1.494 + gfxContext* aRefContext) 1.495 +{ 1.496 + gfxTextRunFactory::Parameters params = 1.497 + { aRefContext, nullptr, nullptr, 1.498 + nullptr, 0, aTextRun->GetAppUnitsPerDevUnit() 1.499 + }; 1.500 + *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT; 1.501 + return params; 1.502 +} 1.503 + 1.504 +void 1.505 +nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, 1.506 + gfxContext* aRefContext) 1.507 +{ 1.508 + gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); 1.509 + gfxFontStyle fontStyle = *fontGroup->GetStyle(); 1.510 + fontStyle.size *= 0.8; 1.511 + nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle); 1.512 + if (!smallFont) 1.513 + return; 1.514 + 1.515 + uint32_t flags; 1.516 + gfxTextRunFactory::Parameters innerParams = 1.517 + GetParametersForInner(aTextRun, &flags, aRefContext); 1.518 + 1.519 + uint32_t length = aTextRun->GetLength(); 1.520 + const char16_t* str = aTextRun->mString.BeginReading(); 1.521 + nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); 1.522 + // Create a textrun so we can check cluster-start properties 1.523 + nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); 1.524 + if (!inner.get()) 1.525 + return; 1.526 + 1.527 + nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true); 1.528 + 1.529 + aTextRun->ResetGlyphRuns(); 1.530 + 1.531 + uint32_t runStart = 0; 1.532 + nsAutoTArray<nsStyleContext*,50> styleArray; 1.533 + nsAutoTArray<uint8_t,50> canBreakBeforeArray; 1.534 + 1.535 + enum RunCaseState { 1.536 + kUpperOrCaseless, // will be untouched by font-variant:small-caps 1.537 + kLowercase, // will be uppercased and reduced 1.538 + kSpecialUpper // specials: don't shrink, but apply uppercase mapping 1.539 + }; 1.540 + RunCaseState runCase = kUpperOrCaseless; 1.541 + 1.542 + // Note that this loop runs from 0 to length *inclusive*, so the last 1.543 + // iteration is in effect beyond the end of the input text, to give a 1.544 + // chance to finish the last casing run we've found. 1.545 + // The last iteration, when i==length, must not attempt to look at the 1.546 + // character position [i] or the style data for styles[i], as this would 1.547 + // be beyond the valid length of the textrun or its style array. 1.548 + for (uint32_t i = 0; i <= length; ++i) { 1.549 + RunCaseState chCase = kUpperOrCaseless; 1.550 + // Unless we're at the end, figure out what treatment the current 1.551 + // character will need. 1.552 + if (i < length) { 1.553 + nsStyleContext* styleContext = styles[i]; 1.554 + // Characters that aren't the start of a cluster are ignored here. They 1.555 + // get added to whatever lowercase/non-lowercase run we're in. 1.556 + if (!inner->IsClusterStart(i)) { 1.557 + chCase = runCase; 1.558 + } else { 1.559 + if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { 1.560 + uint32_t ch = str[i]; 1.561 + if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { 1.562 + ch = SURROGATE_TO_UCS4(ch, str[i + 1]); 1.563 + } 1.564 + uint32_t ch2 = ToUpperCase(ch); 1.565 + if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) { 1.566 + chCase = kLowercase; 1.567 + } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) { 1.568 + // In Greek, check for characters that will be modified by the 1.569 + // GreekUpperCase mapping - this catches accented capitals where 1.570 + // the accent is to be removed (bug 307039). These are handled by 1.571 + // a transformed child run using the full-size font. 1.572 + GreekCasingState state = kStart; // don't need exact context here 1.573 + ch2 = GreekUpperCase(ch, &state); 1.574 + if (ch != ch2) { 1.575 + chCase = kSpecialUpper; 1.576 + } 1.577 + } 1.578 + } else { 1.579 + // Don't transform the character! I.e., pretend that it's not lowercase 1.580 + } 1.581 + } 1.582 + } 1.583 + 1.584 + // At the end of the text, or when the current character needs different 1.585 + // casing treatment from the current run, finish the run-in-progress 1.586 + // and prepare to accumulate a new run. 1.587 + // Note that we do not look at any source data for offset [i] here, 1.588 + // as that would be invalid in the case where i==length. 1.589 + if ((i == length || runCase != chCase) && runStart < i) { 1.590 + nsAutoPtr<nsTransformedTextRun> transformedChild; 1.591 + nsAutoPtr<gfxTextRun> cachedChild; 1.592 + gfxTextRun* child; 1.593 + 1.594 + switch (runCase) { 1.595 + case kUpperOrCaseless: 1.596 + cachedChild = 1.597 + fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, 1.598 + flags); 1.599 + child = cachedChild.get(); 1.600 + break; 1.601 + case kLowercase: 1.602 + transformedChild = 1.603 + uppercaseFactory.MakeTextRun(str + runStart, i - runStart, 1.604 + &innerParams, smallFont, flags, 1.605 + styleArray.Elements(), false); 1.606 + child = transformedChild; 1.607 + break; 1.608 + case kSpecialUpper: 1.609 + transformedChild = 1.610 + uppercaseFactory.MakeTextRun(str + runStart, i - runStart, 1.611 + &innerParams, fontGroup, flags, 1.612 + styleArray.Elements(), false); 1.613 + child = transformedChild; 1.614 + break; 1.615 + } 1.616 + if (!child) 1.617 + return; 1.618 + // Copy potential linebreaks into child so they're preserved 1.619 + // (and also child will be shaped appropriately) 1.620 + NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, 1.621 + "lost some break-before values?"); 1.622 + child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), 1.623 + canBreakBeforeArray.Elements(), aRefContext); 1.624 + if (transformedChild) { 1.625 + transformedChild->FinishSettingProperties(aRefContext); 1.626 + } 1.627 + aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); 1.628 + 1.629 + runStart = i; 1.630 + styleArray.Clear(); 1.631 + canBreakBeforeArray.Clear(); 1.632 + } 1.633 + 1.634 + if (i < length) { 1.635 + runCase = chCase; 1.636 + styleArray.AppendElement(styles[i]); 1.637 + canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); 1.638 + } 1.639 + } 1.640 +} 1.641 + 1.642 +void 1.643 +nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, 1.644 + gfxContext* aRefContext) 1.645 +{ 1.646 + uint32_t length = aTextRun->GetLength(); 1.647 + const char16_t* str = aTextRun->mString.BeginReading(); 1.648 + nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements(); 1.649 + 1.650 + nsAutoString convertedString; 1.651 + nsAutoTArray<bool,50> charsToMergeArray; 1.652 + nsAutoTArray<bool,50> deletedCharsArray; 1.653 + nsAutoTArray<nsStyleContext*,50> styleArray; 1.654 + nsAutoTArray<uint8_t,50> canBreakBeforeArray; 1.655 + bool mergeNeeded = false; 1.656 + 1.657 + // Some languages have special casing conventions that differ from the 1.658 + // default Unicode mappings. 1.659 + // The enum values here are named for well-known exemplar languages that 1.660 + // exhibit the behavior in question; multiple lang tags may map to the 1.661 + // same setting here, if the behavior is shared by other languages. 1.662 + enum { 1.663 + eNone, // default non-lang-specific behavior 1.664 + eTurkish, // preserve dotted/dotless-i distinction in uppercase 1.665 + eDutch, // treat "ij" digraph as a unit for capitalization 1.666 + eGreek // strip accent when uppercasing Greek vowels 1.667 + } languageSpecificCasing = eNone; 1.668 + 1.669 + const nsIAtom* lang = nullptr; 1.670 + bool capitalizeDutchIJ = false; 1.671 + bool prevIsLetter = false; 1.672 + uint32_t sigmaIndex = uint32_t(-1); 1.673 + nsIUGenCategory::nsUGenCategory cat; 1.674 + GreekCasingState greekState = kStart; 1.675 + uint32_t i; 1.676 + for (i = 0; i < length; ++i) { 1.677 + uint32_t ch = str[i]; 1.678 + nsStyleContext* styleContext = styles[i]; 1.679 + 1.680 + uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE 1.681 + : styleContext->StyleText()->mTextTransform; 1.682 + int extraChars = 0; 1.683 + const mozilla::unicode::MultiCharMapping *mcm; 1.684 + 1.685 + if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { 1.686 + ch = SURROGATE_TO_UCS4(ch, str[i + 1]); 1.687 + } 1.688 + 1.689 + if (lang != styleContext->StyleFont()->mLanguage) { 1.690 + lang = styleContext->StyleFont()->mLanguage; 1.691 + if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || 1.692 + lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || 1.693 + lang == nsGkAtoms::tt) { 1.694 + languageSpecificCasing = eTurkish; 1.695 + } else if (lang == nsGkAtoms::nl) { 1.696 + languageSpecificCasing = eDutch; 1.697 + } else if (lang == nsGkAtoms::el) { 1.698 + languageSpecificCasing = eGreek; 1.699 + greekState = kStart; 1.700 + } else { 1.701 + languageSpecificCasing = eNone; 1.702 + } 1.703 + } 1.704 + 1.705 + switch (style) { 1.706 + case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: 1.707 + if (languageSpecificCasing == eTurkish) { 1.708 + if (ch == 'I') { 1.709 + ch = LATIN_SMALL_LETTER_DOTLESS_I; 1.710 + prevIsLetter = true; 1.711 + sigmaIndex = uint32_t(-1); 1.712 + break; 1.713 + } 1.714 + if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { 1.715 + ch = 'i'; 1.716 + prevIsLetter = true; 1.717 + sigmaIndex = uint32_t(-1); 1.718 + break; 1.719 + } 1.720 + } 1.721 + 1.722 + // Special lowercasing behavior for Greek Sigma: note that this is listed 1.723 + // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a 1.724 + // language-specific mapping; it applies regardless of the language of 1.725 + // the element. 1.726 + // 1.727 + // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. 1.728 + // the non-final form) whenever there is a following letter, or when the 1.729 + // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a 1.730 + // LETTER); and to FINAL SIGMA when it is preceded by another letter but 1.731 + // not followed by one. 1.732 + // 1.733 + // To implement the context-sensitive nature of this mapping, we keep 1.734 + // track of whether the previous character was a letter. If not, CAPITAL 1.735 + // SIGMA will map directly to SMALL SIGMA. If the previous character 1.736 + // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the 1.737 + // position in the converted string; if we then encounter another letter, 1.738 + // that FINAL SIGMA is replaced with a standard SMALL SIGMA. 1.739 + 1.740 + cat = mozilla::unicode::GetGenCategory(ch); 1.741 + 1.742 + // If sigmaIndex is not -1, it marks where we have provisionally mapped 1.743 + // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we 1.744 + // need to change it to SMALL SIGMA. 1.745 + if (sigmaIndex != uint32_t(-1)) { 1.746 + if (cat == nsIUGenCategory::kLetter) { 1.747 + convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); 1.748 + } 1.749 + } 1.750 + 1.751 + if (ch == GREEK_CAPITAL_LETTER_SIGMA) { 1.752 + // If preceding char was a letter, map to FINAL instead of SMALL, 1.753 + // and note where it occurred by setting sigmaIndex; we'll change it 1.754 + // to standard SMALL SIGMA later if another letter follows 1.755 + if (prevIsLetter) { 1.756 + ch = GREEK_SMALL_LETTER_FINAL_SIGMA; 1.757 + sigmaIndex = convertedString.Length(); 1.758 + } else { 1.759 + // CAPITAL SIGMA not preceded by a letter is unconditionally mapped 1.760 + // to SMALL SIGMA 1.761 + ch = GREEK_SMALL_LETTER_SIGMA; 1.762 + sigmaIndex = uint32_t(-1); 1.763 + } 1.764 + prevIsLetter = true; 1.765 + break; 1.766 + } 1.767 + 1.768 + // ignore diacritics for the purpose of contextual sigma mapping; 1.769 + // otherwise, reset prevIsLetter appropriately and clear the 1.770 + // sigmaIndex marker 1.771 + if (cat != nsIUGenCategory::kMark) { 1.772 + prevIsLetter = (cat == nsIUGenCategory::kLetter); 1.773 + sigmaIndex = uint32_t(-1); 1.774 + } 1.775 + 1.776 + mcm = mozilla::unicode::SpecialLower(ch); 1.777 + if (mcm) { 1.778 + int j = 0; 1.779 + while (j < 2 && mcm->mMappedChars[j + 1]) { 1.780 + convertedString.Append(mcm->mMappedChars[j]); 1.781 + ++extraChars; 1.782 + ++j; 1.783 + } 1.784 + ch = mcm->mMappedChars[j]; 1.785 + break; 1.786 + } 1.787 + 1.788 + ch = ToLowerCase(ch); 1.789 + break; 1.790 + 1.791 + case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: 1.792 + if (languageSpecificCasing == eTurkish && ch == 'i') { 1.793 + ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; 1.794 + break; 1.795 + } 1.796 + 1.797 + if (languageSpecificCasing == eGreek) { 1.798 + ch = GreekUpperCase(ch, &greekState); 1.799 + break; 1.800 + } 1.801 + 1.802 + mcm = mozilla::unicode::SpecialUpper(ch); 1.803 + if (mcm) { 1.804 + int j = 0; 1.805 + while (j < 2 && mcm->mMappedChars[j + 1]) { 1.806 + convertedString.Append(mcm->mMappedChars[j]); 1.807 + ++extraChars; 1.808 + ++j; 1.809 + } 1.810 + ch = mcm->mMappedChars[j]; 1.811 + break; 1.812 + } 1.813 + 1.814 + ch = ToUpperCase(ch); 1.815 + break; 1.816 + 1.817 + case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: 1.818 + if (capitalizeDutchIJ && ch == 'j') { 1.819 + ch = 'J'; 1.820 + capitalizeDutchIJ = false; 1.821 + break; 1.822 + } 1.823 + capitalizeDutchIJ = false; 1.824 + if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { 1.825 + if (languageSpecificCasing == eTurkish && ch == 'i') { 1.826 + ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; 1.827 + break; 1.828 + } 1.829 + if (languageSpecificCasing == eDutch && ch == 'i') { 1.830 + ch = 'I'; 1.831 + capitalizeDutchIJ = true; 1.832 + break; 1.833 + } 1.834 + 1.835 + mcm = mozilla::unicode::SpecialTitle(ch); 1.836 + if (mcm) { 1.837 + int j = 0; 1.838 + while (j < 2 && mcm->mMappedChars[j + 1]) { 1.839 + convertedString.Append(mcm->mMappedChars[j]); 1.840 + ++extraChars; 1.841 + ++j; 1.842 + } 1.843 + ch = mcm->mMappedChars[j]; 1.844 + break; 1.845 + } 1.846 + 1.847 + ch = ToTitleCase(ch); 1.848 + } 1.849 + break; 1.850 + 1.851 + case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH: 1.852 + ch = mozilla::unicode::GetFullWidth(ch); 1.853 + break; 1.854 + 1.855 + default: 1.856 + break; 1.857 + } 1.858 + 1.859 + if (ch == uint32_t(-1)) { 1.860 + deletedCharsArray.AppendElement(true); 1.861 + mergeNeeded = true; 1.862 + } else { 1.863 + deletedCharsArray.AppendElement(false); 1.864 + charsToMergeArray.AppendElement(false); 1.865 + styleArray.AppendElement(styleContext); 1.866 + canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); 1.867 + 1.868 + if (IS_IN_BMP(ch)) { 1.869 + convertedString.Append(ch); 1.870 + } else { 1.871 + convertedString.Append(H_SURROGATE(ch)); 1.872 + convertedString.Append(L_SURROGATE(ch)); 1.873 + ++i; 1.874 + deletedCharsArray.AppendElement(true); // not exactly deleted, but the 1.875 + // trailing surrogate is skipped 1.876 + ++extraChars; 1.877 + } 1.878 + 1.879 + while (extraChars-- > 0) { 1.880 + mergeNeeded = true; 1.881 + charsToMergeArray.AppendElement(true); 1.882 + styleArray.AppendElement(styleContext); 1.883 + canBreakBeforeArray.AppendElement(false); 1.884 + } 1.885 + } 1.886 + } 1.887 + 1.888 + uint32_t flags; 1.889 + gfxTextRunFactory::Parameters innerParams = 1.890 + GetParametersForInner(aTextRun, &flags, aRefContext); 1.891 + gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); 1.892 + 1.893 + nsAutoPtr<nsTransformedTextRun> transformedChild; 1.894 + nsAutoPtr<gfxTextRun> cachedChild; 1.895 + gfxTextRun* child; 1.896 + 1.897 + if (mInnerTransformingTextRunFactory) { 1.898 + transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( 1.899 + convertedString.BeginReading(), convertedString.Length(), 1.900 + &innerParams, fontGroup, flags, styleArray.Elements(), false); 1.901 + child = transformedChild.get(); 1.902 + } else { 1.903 + cachedChild = fontGroup->MakeTextRun( 1.904 + convertedString.BeginReading(), convertedString.Length(), 1.905 + &innerParams, flags); 1.906 + child = cachedChild.get(); 1.907 + } 1.908 + if (!child) 1.909 + return; 1.910 + // Copy potential linebreaks into child so they're preserved 1.911 + // (and also child will be shaped appropriately) 1.912 + NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), 1.913 + "Dropped characters or break-before values somewhere!"); 1.914 + child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), 1.915 + canBreakBeforeArray.Elements(), aRefContext); 1.916 + if (transformedChild) { 1.917 + transformedChild->FinishSettingProperties(aRefContext); 1.918 + } 1.919 + 1.920 + if (mergeNeeded) { 1.921 + // Now merge multiple characters into one multi-glyph character as required 1.922 + // and deal with skipping deleted accent chars 1.923 + NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), 1.924 + "source length mismatch"); 1.925 + NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), 1.926 + "destination length mismatch"); 1.927 + MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), 1.928 + deletedCharsArray.Elements()); 1.929 + } else { 1.930 + // No merging to do, so just copy; this produces a more optimized textrun. 1.931 + // We can't steal the data because the child may be cached and stealing 1.932 + // the data would break the cache. 1.933 + aTextRun->ResetGlyphRuns(); 1.934 + aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); 1.935 + } 1.936 +}