Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsTextRunTransformations.h"
8 #include "mozilla/MemoryReporting.h"
10 #include "nsGkAtoms.h"
11 #include "nsStyleConsts.h"
12 #include "nsStyleContext.h"
13 #include "nsUnicodeProperties.h"
14 #include "nsSpecialCasingData.h"
15 #include "mozilla/gfx/2D.h"
16 #include "nsTextFrameUtils.h"
17 #include "nsIPersistentProperties2.h"
18 #include "nsNetUtil.h"
20 // Unicode characters needing special casing treatment in tr/az languages
21 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
22 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
24 // Greek sigma needs custom handling for the lowercase transform; for details
25 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
26 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
27 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
28 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
29 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
31 // Custom uppercase mapping for Greek; see bug 307039 for details
32 #define GREEK_LOWER_ALPHA 0x03B1
33 #define GREEK_LOWER_ALPHA_TONOS 0x03AC
34 #define GREEK_LOWER_ALPHA_OXIA 0x1F71
35 #define GREEK_LOWER_EPSILON 0x03B5
36 #define GREEK_LOWER_EPSILON_TONOS 0x03AD
37 #define GREEK_LOWER_EPSILON_OXIA 0x1F73
38 #define GREEK_LOWER_ETA 0x03B7
39 #define GREEK_LOWER_ETA_TONOS 0x03AE
40 #define GREEK_LOWER_ETA_OXIA 0x1F75
41 #define GREEK_LOWER_IOTA 0x03B9
42 #define GREEK_LOWER_IOTA_TONOS 0x03AF
43 #define GREEK_LOWER_IOTA_OXIA 0x1F77
44 #define GREEK_LOWER_IOTA_DIALYTIKA 0x03CA
45 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS 0x0390
46 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA 0x1FD3
47 #define GREEK_LOWER_OMICRON 0x03BF
48 #define GREEK_LOWER_OMICRON_TONOS 0x03CC
49 #define GREEK_LOWER_OMICRON_OXIA 0x1F79
50 #define GREEK_LOWER_UPSILON 0x03C5
51 #define GREEK_LOWER_UPSILON_TONOS 0x03CD
52 #define GREEK_LOWER_UPSILON_OXIA 0x1F7B
53 #define GREEK_LOWER_UPSILON_DIALYTIKA 0x03CB
54 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS 0x03B0
55 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA 0x1FE3
56 #define GREEK_LOWER_OMEGA 0x03C9
57 #define GREEK_LOWER_OMEGA_TONOS 0x03CE
58 #define GREEK_LOWER_OMEGA_OXIA 0x1F7D
59 #define GREEK_UPPER_ALPHA 0x0391
60 #define GREEK_UPPER_EPSILON 0x0395
61 #define GREEK_UPPER_ETA 0x0397
62 #define GREEK_UPPER_IOTA 0x0399
63 #define GREEK_UPPER_IOTA_DIALYTIKA 0x03AA
64 #define GREEK_UPPER_OMICRON 0x039F
65 #define GREEK_UPPER_UPSILON 0x03A5
66 #define GREEK_UPPER_UPSILON_DIALYTIKA 0x03AB
67 #define GREEK_UPPER_OMEGA 0x03A9
68 #define GREEK_UPPER_ALPHA_TONOS 0x0386
69 #define GREEK_UPPER_ALPHA_OXIA 0x1FBB
70 #define GREEK_UPPER_EPSILON_TONOS 0x0388
71 #define GREEK_UPPER_EPSILON_OXIA 0x1FC9
72 #define GREEK_UPPER_ETA_TONOS 0x0389
73 #define GREEK_UPPER_ETA_OXIA 0x1FCB
74 #define GREEK_UPPER_IOTA_TONOS 0x038A
75 #define GREEK_UPPER_IOTA_OXIA 0x1FDB
76 #define GREEK_UPPER_OMICRON_TONOS 0x038C
77 #define GREEK_UPPER_OMICRON_OXIA 0x1FF9
78 #define GREEK_UPPER_UPSILON_TONOS 0x038E
79 #define GREEK_UPPER_UPSILON_OXIA 0x1FEB
80 #define GREEK_UPPER_OMEGA_TONOS 0x038F
81 #define GREEK_UPPER_OMEGA_OXIA 0x1FFB
82 #define COMBINING_ACUTE_ACCENT 0x0301
83 #define COMBINING_DIAERESIS 0x0308
84 #define COMBINING_ACUTE_TONE_MARK 0x0341
85 #define COMBINING_GREEK_DIALYTIKA_TONOS 0x0344
87 // When doing an Uppercase transform in Greek, we need to keep track of the
88 // current state while iterating through the string, to recognize and process
89 // diphthongs correctly. For clarity, we define a state for each vowel and
90 // each vowel with accent, although a few of these do not actually need any
91 // special treatment and could be folded into kStart.
92 enum GreekCasingState {
93 kStart,
94 kAlpha,
95 kEpsilon,
96 kEta,
97 kIota,
98 kOmicron,
99 kUpsilon,
100 kOmega,
101 kAlphaAcc,
102 kEpsilonAcc,
103 kEtaAcc,
104 kIotaAcc,
105 kOmicronAcc,
106 kUpsilonAcc,
107 kOmegaAcc,
108 kOmicronUpsilon,
109 kDiaeresis
110 };
112 static uint32_t
113 GreekUpperCase(uint32_t aCh, GreekCasingState* aState)
114 {
115 switch (aCh) {
116 case GREEK_UPPER_ALPHA:
117 case GREEK_LOWER_ALPHA:
118 *aState = kAlpha;
119 return GREEK_UPPER_ALPHA;
121 case GREEK_UPPER_EPSILON:
122 case GREEK_LOWER_EPSILON:
123 *aState = kEpsilon;
124 return GREEK_UPPER_EPSILON;
126 case GREEK_UPPER_ETA:
127 case GREEK_LOWER_ETA:
128 *aState = kEta;
129 return GREEK_UPPER_ETA;
131 case GREEK_UPPER_IOTA:
132 *aState = kIota;
133 return GREEK_UPPER_IOTA;
135 case GREEK_UPPER_OMICRON:
136 case GREEK_LOWER_OMICRON:
137 *aState = kOmicron;
138 return GREEK_UPPER_OMICRON;
140 case GREEK_UPPER_UPSILON:
141 switch (*aState) {
142 case kOmicron:
143 *aState = kOmicronUpsilon;
144 break;
145 default:
146 *aState = kUpsilon;
147 break;
148 }
149 return GREEK_UPPER_UPSILON;
151 case GREEK_UPPER_OMEGA:
152 case GREEK_LOWER_OMEGA:
153 *aState = kOmega;
154 return GREEK_UPPER_OMEGA;
156 // iota and upsilon may be the second vowel of a diphthong
157 case GREEK_LOWER_IOTA:
158 switch (*aState) {
159 case kAlphaAcc:
160 case kEpsilonAcc:
161 case kOmicronAcc:
162 case kUpsilonAcc:
163 *aState = kStart;
164 return GREEK_UPPER_IOTA_DIALYTIKA;
165 default:
166 break;
167 }
168 *aState = kIota;
169 return GREEK_UPPER_IOTA;
171 case GREEK_LOWER_UPSILON:
172 switch (*aState) {
173 case kAlphaAcc:
174 case kEpsilonAcc:
175 case kEtaAcc:
176 case kOmicronAcc:
177 *aState = kStart;
178 return GREEK_UPPER_UPSILON_DIALYTIKA;
179 case kOmicron:
180 *aState = kOmicronUpsilon;
181 break;
182 default:
183 *aState = kUpsilon;
184 break;
185 }
186 return GREEK_UPPER_UPSILON;
188 case GREEK_UPPER_IOTA_DIALYTIKA:
189 case GREEK_LOWER_IOTA_DIALYTIKA:
190 case GREEK_UPPER_UPSILON_DIALYTIKA:
191 case GREEK_LOWER_UPSILON_DIALYTIKA:
192 case COMBINING_DIAERESIS:
193 *aState = kDiaeresis;
194 return ToUpperCase(aCh);
196 // remove accent if it follows a vowel or diaeresis,
197 // and set appropriate state for diphthong detection
198 case COMBINING_ACUTE_ACCENT:
199 case COMBINING_ACUTE_TONE_MARK:
200 switch (*aState) {
201 case kAlpha:
202 *aState = kAlphaAcc;
203 return uint32_t(-1); // omit this char from result string
204 case kEpsilon:
205 *aState = kEpsilonAcc;
206 return uint32_t(-1);
207 case kEta:
208 *aState = kEtaAcc;
209 return uint32_t(-1);
210 case kIota:
211 *aState = kIotaAcc;
212 return uint32_t(-1);
213 case kOmicron:
214 *aState = kOmicronAcc;
215 return uint32_t(-1);
216 case kUpsilon:
217 *aState = kUpsilonAcc;
218 return uint32_t(-1);
219 case kOmicronUpsilon:
220 *aState = kStart; // this completed a diphthong
221 return uint32_t(-1);
222 case kOmega:
223 *aState = kOmegaAcc;
224 return uint32_t(-1);
225 case kDiaeresis:
226 *aState = kStart;
227 return uint32_t(-1);
228 default:
229 break;
230 }
231 break;
233 // combinations with dieresis+accent just strip the accent,
234 // and reset to start state (don't form diphthong with following vowel)
235 case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
236 case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
237 *aState = kStart;
238 return GREEK_UPPER_IOTA_DIALYTIKA;
240 case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
241 case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
242 *aState = kStart;
243 return GREEK_UPPER_UPSILON_DIALYTIKA;
245 case COMBINING_GREEK_DIALYTIKA_TONOS:
246 *aState = kStart;
247 return COMBINING_DIAERESIS;
249 // strip accents from vowels, and note the vowel seen so that we can detect
250 // diphthongs where diaeresis needs to be added
251 case GREEK_LOWER_ALPHA_TONOS:
252 case GREEK_LOWER_ALPHA_OXIA:
253 case GREEK_UPPER_ALPHA_TONOS:
254 case GREEK_UPPER_ALPHA_OXIA:
255 *aState = kAlphaAcc;
256 return GREEK_UPPER_ALPHA;
258 case GREEK_LOWER_EPSILON_TONOS:
259 case GREEK_LOWER_EPSILON_OXIA:
260 case GREEK_UPPER_EPSILON_TONOS:
261 case GREEK_UPPER_EPSILON_OXIA:
262 *aState = kEpsilonAcc;
263 return GREEK_UPPER_EPSILON;
265 case GREEK_LOWER_ETA_TONOS:
266 case GREEK_LOWER_ETA_OXIA:
267 case GREEK_UPPER_ETA_TONOS:
268 case GREEK_UPPER_ETA_OXIA:
269 *aState = kEtaAcc;
270 return GREEK_UPPER_ETA;
272 case GREEK_LOWER_IOTA_TONOS:
273 case GREEK_LOWER_IOTA_OXIA:
274 case GREEK_UPPER_IOTA_TONOS:
275 case GREEK_UPPER_IOTA_OXIA:
276 *aState = kIotaAcc;
277 return GREEK_UPPER_IOTA;
279 case GREEK_LOWER_OMICRON_TONOS:
280 case GREEK_LOWER_OMICRON_OXIA:
281 case GREEK_UPPER_OMICRON_TONOS:
282 case GREEK_UPPER_OMICRON_OXIA:
283 *aState = kOmicronAcc;
284 return GREEK_UPPER_OMICRON;
286 case GREEK_LOWER_UPSILON_TONOS:
287 case GREEK_LOWER_UPSILON_OXIA:
288 case GREEK_UPPER_UPSILON_TONOS:
289 case GREEK_UPPER_UPSILON_OXIA:
290 switch (*aState) {
291 case kOmicron:
292 *aState = kStart; // this completed a diphthong
293 break;
294 default:
295 *aState = kUpsilonAcc;
296 break;
297 }
298 return GREEK_UPPER_UPSILON;
300 case GREEK_LOWER_OMEGA_TONOS:
301 case GREEK_LOWER_OMEGA_OXIA:
302 case GREEK_UPPER_OMEGA_TONOS:
303 case GREEK_UPPER_OMEGA_OXIA:
304 *aState = kOmegaAcc;
305 return GREEK_UPPER_OMEGA;
306 }
308 // all other characters just reset the state, and use standard mappings
309 *aState = kStart;
310 return ToUpperCase(aCh);
311 }
313 nsTransformedTextRun *
314 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,
315 nsTransformingTextRunFactory* aFactory,
316 gfxFontGroup* aFontGroup,
317 const char16_t* aString, uint32_t aLength,
318 const uint32_t aFlags, nsStyleContext** aStyles,
319 bool aOwnsFactory)
320 {
321 NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),
322 "didn't expect text to be marked as 8-bit here");
324 void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
325 if (!storage) {
326 return nullptr;
327 }
329 return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup,
330 aString, aLength,
331 aFlags, aStyles, aOwnsFactory);
332 }
334 void
335 nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
336 bool* aCapitalization,
337 gfxContext* aRefContext)
338 {
339 if (mCapitalize.IsEmpty()) {
340 if (!mCapitalize.AppendElements(GetLength()))
341 return;
342 memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool));
343 }
344 memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool));
345 mNeedsRebuild = true;
346 }
348 bool
349 nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength,
350 uint8_t* aBreakBefore,
351 gfxContext* aRefContext)
352 {
353 bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength,
354 aBreakBefore, aRefContext);
355 if (changed) {
356 mNeedsRebuild = true;
357 }
358 return changed;
359 }
361 size_t
362 nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
363 {
364 size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
365 total += mStyles.SizeOfExcludingThis(aMallocSizeOf);
366 total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf);
367 if (mOwnsFactory) {
368 total += aMallocSizeOf(mFactory);
369 }
370 return total;
371 }
373 size_t
374 nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
375 {
376 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
377 }
379 nsTransformedTextRun*
380 nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength,
381 const gfxTextRunFactory::Parameters* aParams,
382 gfxFontGroup* aFontGroup, uint32_t aFlags,
383 nsStyleContext** aStyles, bool aOwnsFactory)
384 {
385 return nsTransformedTextRun::Create(aParams, this, aFontGroup,
386 aString, aLength, aFlags, aStyles, aOwnsFactory);
387 }
389 nsTransformedTextRun*
390 nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength,
391 const gfxTextRunFactory::Parameters* aParams,
392 gfxFontGroup* aFontGroup, uint32_t aFlags,
393 nsStyleContext** aStyles, bool aOwnsFactory)
394 {
395 // We'll only have a Unicode code path to minimize the amount of code needed
396 // for these rarely used features
397 NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength);
398 return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
399 aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT),
400 aStyles, aOwnsFactory);
401 }
403 void
404 MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
405 const bool* aCharsToMerge, const bool* aDeletedChars)
406 {
407 aDest->ResetGlyphRuns();
409 gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());
410 uint32_t offset = 0;
411 nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;
412 while (iter.NextRun()) {
413 gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
414 nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,
415 offset, false);
416 if (NS_FAILED(rv))
417 return;
419 bool anyMissing = false;
420 uint32_t mergeRunStart = iter.GetStringStart();
421 const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
422 gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
423 uint32_t stringEnd = iter.GetStringEnd();
424 for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
425 const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
426 if (g.IsSimpleGlyph()) {
427 if (!anyMissing) {
428 gfxTextRun::DetailedGlyph details;
429 details.mGlyphID = g.GetSimpleGlyph();
430 details.mAdvance = g.GetSimpleAdvance();
431 details.mXOffset = 0;
432 details.mYOffset = 0;
433 glyphs.AppendElement(details);
434 }
435 } else {
436 if (g.IsMissing()) {
437 anyMissing = true;
438 glyphs.Clear();
439 }
440 if (g.GetGlyphCount() > 0) {
441 glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
442 }
443 }
445 if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
446 // next char is supposed to merge with current, so loop without
447 // writing current merged glyph to the destination
448 continue;
449 }
451 // If the start of the merge run is actually a character that should
452 // have been merged with the previous character (this can happen
453 // if there's a font change in the middle of a case-mapped character,
454 // that decomposed into a sequence of base+diacritics, for example),
455 // just discard the entire merge run. See comment at start of this
456 // function.
457 NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
458 "unable to merge across a glyph run boundary, "
459 "glyph(s) discarded");
460 if (!aCharsToMerge[mergeRunStart]) {
461 if (anyMissing) {
462 mergedGlyph.SetMissing(glyphs.Length());
463 } else {
464 mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
465 mergedGlyph.IsLigatureGroupStart(),
466 glyphs.Length());
467 }
468 aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
469 ++offset;
471 while (offset < aDest->GetLength() && aDeletedChars[offset]) {
472 aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);
473 }
474 }
476 glyphs.Clear();
477 anyMissing = false;
478 mergeRunStart = k + 1;
479 if (mergeRunStart < stringEnd) {
480 mergedGlyph = srcGlyphs[mergeRunStart];
481 }
482 }
483 NS_ASSERTION(glyphs.Length() == 0,
484 "Leftover glyphs, don't request merging of the last character with its next!");
485 }
486 NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
487 }
489 gfxTextRunFactory::Parameters
490 GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags,
491 gfxContext* aRefContext)
492 {
493 gfxTextRunFactory::Parameters params =
494 { aRefContext, nullptr, nullptr,
495 nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()
496 };
497 *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT;
498 return params;
499 }
501 void
502 nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
503 gfxContext* aRefContext)
504 {
505 gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
506 gfxFontStyle fontStyle = *fontGroup->GetStyle();
507 fontStyle.size *= 0.8;
508 nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
509 if (!smallFont)
510 return;
512 uint32_t flags;
513 gfxTextRunFactory::Parameters innerParams =
514 GetParametersForInner(aTextRun, &flags, aRefContext);
516 uint32_t length = aTextRun->GetLength();
517 const char16_t* str = aTextRun->mString.BeginReading();
518 nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
519 // Create a textrun so we can check cluster-start properties
520 nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
521 if (!inner.get())
522 return;
524 nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);
526 aTextRun->ResetGlyphRuns();
528 uint32_t runStart = 0;
529 nsAutoTArray<nsStyleContext*,50> styleArray;
530 nsAutoTArray<uint8_t,50> canBreakBeforeArray;
532 enum RunCaseState {
533 kUpperOrCaseless, // will be untouched by font-variant:small-caps
534 kLowercase, // will be uppercased and reduced
535 kSpecialUpper // specials: don't shrink, but apply uppercase mapping
536 };
537 RunCaseState runCase = kUpperOrCaseless;
539 // Note that this loop runs from 0 to length *inclusive*, so the last
540 // iteration is in effect beyond the end of the input text, to give a
541 // chance to finish the last casing run we've found.
542 // The last iteration, when i==length, must not attempt to look at the
543 // character position [i] or the style data for styles[i], as this would
544 // be beyond the valid length of the textrun or its style array.
545 for (uint32_t i = 0; i <= length; ++i) {
546 RunCaseState chCase = kUpperOrCaseless;
547 // Unless we're at the end, figure out what treatment the current
548 // character will need.
549 if (i < length) {
550 nsStyleContext* styleContext = styles[i];
551 // Characters that aren't the start of a cluster are ignored here. They
552 // get added to whatever lowercase/non-lowercase run we're in.
553 if (!inner->IsClusterStart(i)) {
554 chCase = runCase;
555 } else {
556 if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
557 uint32_t ch = str[i];
558 if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
559 ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
560 }
561 uint32_t ch2 = ToUpperCase(ch);
562 if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
563 chCase = kLowercase;
564 } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) {
565 // In Greek, check for characters that will be modified by the
566 // GreekUpperCase mapping - this catches accented capitals where
567 // the accent is to be removed (bug 307039). These are handled by
568 // a transformed child run using the full-size font.
569 GreekCasingState state = kStart; // don't need exact context here
570 ch2 = GreekUpperCase(ch, &state);
571 if (ch != ch2) {
572 chCase = kSpecialUpper;
573 }
574 }
575 } else {
576 // Don't transform the character! I.e., pretend that it's not lowercase
577 }
578 }
579 }
581 // At the end of the text, or when the current character needs different
582 // casing treatment from the current run, finish the run-in-progress
583 // and prepare to accumulate a new run.
584 // Note that we do not look at any source data for offset [i] here,
585 // as that would be invalid in the case where i==length.
586 if ((i == length || runCase != chCase) && runStart < i) {
587 nsAutoPtr<nsTransformedTextRun> transformedChild;
588 nsAutoPtr<gfxTextRun> cachedChild;
589 gfxTextRun* child;
591 switch (runCase) {
592 case kUpperOrCaseless:
593 cachedChild =
594 fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
595 flags);
596 child = cachedChild.get();
597 break;
598 case kLowercase:
599 transformedChild =
600 uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
601 &innerParams, smallFont, flags,
602 styleArray.Elements(), false);
603 child = transformedChild;
604 break;
605 case kSpecialUpper:
606 transformedChild =
607 uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
608 &innerParams, fontGroup, flags,
609 styleArray.Elements(), false);
610 child = transformedChild;
611 break;
612 }
613 if (!child)
614 return;
615 // Copy potential linebreaks into child so they're preserved
616 // (and also child will be shaped appropriately)
617 NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
618 "lost some break-before values?");
619 child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
620 canBreakBeforeArray.Elements(), aRefContext);
621 if (transformedChild) {
622 transformedChild->FinishSettingProperties(aRefContext);
623 }
624 aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);
626 runStart = i;
627 styleArray.Clear();
628 canBreakBeforeArray.Clear();
629 }
631 if (i < length) {
632 runCase = chCase;
633 styleArray.AppendElement(styles[i]);
634 canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
635 }
636 }
637 }
639 void
640 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
641 gfxContext* aRefContext)
642 {
643 uint32_t length = aTextRun->GetLength();
644 const char16_t* str = aTextRun->mString.BeginReading();
645 nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
647 nsAutoString convertedString;
648 nsAutoTArray<bool,50> charsToMergeArray;
649 nsAutoTArray<bool,50> deletedCharsArray;
650 nsAutoTArray<nsStyleContext*,50> styleArray;
651 nsAutoTArray<uint8_t,50> canBreakBeforeArray;
652 bool mergeNeeded = false;
654 // Some languages have special casing conventions that differ from the
655 // default Unicode mappings.
656 // The enum values here are named for well-known exemplar languages that
657 // exhibit the behavior in question; multiple lang tags may map to the
658 // same setting here, if the behavior is shared by other languages.
659 enum {
660 eNone, // default non-lang-specific behavior
661 eTurkish, // preserve dotted/dotless-i distinction in uppercase
662 eDutch, // treat "ij" digraph as a unit for capitalization
663 eGreek // strip accent when uppercasing Greek vowels
664 } languageSpecificCasing = eNone;
666 const nsIAtom* lang = nullptr;
667 bool capitalizeDutchIJ = false;
668 bool prevIsLetter = false;
669 uint32_t sigmaIndex = uint32_t(-1);
670 nsIUGenCategory::nsUGenCategory cat;
671 GreekCasingState greekState = kStart;
672 uint32_t i;
673 for (i = 0; i < length; ++i) {
674 uint32_t ch = str[i];
675 nsStyleContext* styleContext = styles[i];
677 uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
678 : styleContext->StyleText()->mTextTransform;
679 int extraChars = 0;
680 const mozilla::unicode::MultiCharMapping *mcm;
682 if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
683 ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
684 }
686 if (lang != styleContext->StyleFont()->mLanguage) {
687 lang = styleContext->StyleFont()->mLanguage;
688 if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
689 lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
690 lang == nsGkAtoms::tt) {
691 languageSpecificCasing = eTurkish;
692 } else if (lang == nsGkAtoms::nl) {
693 languageSpecificCasing = eDutch;
694 } else if (lang == nsGkAtoms::el) {
695 languageSpecificCasing = eGreek;
696 greekState = kStart;
697 } else {
698 languageSpecificCasing = eNone;
699 }
700 }
702 switch (style) {
703 case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
704 if (languageSpecificCasing == eTurkish) {
705 if (ch == 'I') {
706 ch = LATIN_SMALL_LETTER_DOTLESS_I;
707 prevIsLetter = true;
708 sigmaIndex = uint32_t(-1);
709 break;
710 }
711 if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
712 ch = 'i';
713 prevIsLetter = true;
714 sigmaIndex = uint32_t(-1);
715 break;
716 }
717 }
719 // Special lowercasing behavior for Greek Sigma: note that this is listed
720 // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
721 // language-specific mapping; it applies regardless of the language of
722 // the element.
723 //
724 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
725 // the non-final form) whenever there is a following letter, or when the
726 // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
727 // LETTER); and to FINAL SIGMA when it is preceded by another letter but
728 // not followed by one.
729 //
730 // To implement the context-sensitive nature of this mapping, we keep
731 // track of whether the previous character was a letter. If not, CAPITAL
732 // SIGMA will map directly to SMALL SIGMA. If the previous character
733 // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
734 // position in the converted string; if we then encounter another letter,
735 // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
737 cat = mozilla::unicode::GetGenCategory(ch);
739 // If sigmaIndex is not -1, it marks where we have provisionally mapped
740 // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
741 // need to change it to SMALL SIGMA.
742 if (sigmaIndex != uint32_t(-1)) {
743 if (cat == nsIUGenCategory::kLetter) {
744 convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
745 }
746 }
748 if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
749 // If preceding char was a letter, map to FINAL instead of SMALL,
750 // and note where it occurred by setting sigmaIndex; we'll change it
751 // to standard SMALL SIGMA later if another letter follows
752 if (prevIsLetter) {
753 ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
754 sigmaIndex = convertedString.Length();
755 } else {
756 // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
757 // to SMALL SIGMA
758 ch = GREEK_SMALL_LETTER_SIGMA;
759 sigmaIndex = uint32_t(-1);
760 }
761 prevIsLetter = true;
762 break;
763 }
765 // ignore diacritics for the purpose of contextual sigma mapping;
766 // otherwise, reset prevIsLetter appropriately and clear the
767 // sigmaIndex marker
768 if (cat != nsIUGenCategory::kMark) {
769 prevIsLetter = (cat == nsIUGenCategory::kLetter);
770 sigmaIndex = uint32_t(-1);
771 }
773 mcm = mozilla::unicode::SpecialLower(ch);
774 if (mcm) {
775 int j = 0;
776 while (j < 2 && mcm->mMappedChars[j + 1]) {
777 convertedString.Append(mcm->mMappedChars[j]);
778 ++extraChars;
779 ++j;
780 }
781 ch = mcm->mMappedChars[j];
782 break;
783 }
785 ch = ToLowerCase(ch);
786 break;
788 case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
789 if (languageSpecificCasing == eTurkish && ch == 'i') {
790 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
791 break;
792 }
794 if (languageSpecificCasing == eGreek) {
795 ch = GreekUpperCase(ch, &greekState);
796 break;
797 }
799 mcm = mozilla::unicode::SpecialUpper(ch);
800 if (mcm) {
801 int j = 0;
802 while (j < 2 && mcm->mMappedChars[j + 1]) {
803 convertedString.Append(mcm->mMappedChars[j]);
804 ++extraChars;
805 ++j;
806 }
807 ch = mcm->mMappedChars[j];
808 break;
809 }
811 ch = ToUpperCase(ch);
812 break;
814 case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
815 if (capitalizeDutchIJ && ch == 'j') {
816 ch = 'J';
817 capitalizeDutchIJ = false;
818 break;
819 }
820 capitalizeDutchIJ = false;
821 if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
822 if (languageSpecificCasing == eTurkish && ch == 'i') {
823 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
824 break;
825 }
826 if (languageSpecificCasing == eDutch && ch == 'i') {
827 ch = 'I';
828 capitalizeDutchIJ = true;
829 break;
830 }
832 mcm = mozilla::unicode::SpecialTitle(ch);
833 if (mcm) {
834 int j = 0;
835 while (j < 2 && mcm->mMappedChars[j + 1]) {
836 convertedString.Append(mcm->mMappedChars[j]);
837 ++extraChars;
838 ++j;
839 }
840 ch = mcm->mMappedChars[j];
841 break;
842 }
844 ch = ToTitleCase(ch);
845 }
846 break;
848 case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
849 ch = mozilla::unicode::GetFullWidth(ch);
850 break;
852 default:
853 break;
854 }
856 if (ch == uint32_t(-1)) {
857 deletedCharsArray.AppendElement(true);
858 mergeNeeded = true;
859 } else {
860 deletedCharsArray.AppendElement(false);
861 charsToMergeArray.AppendElement(false);
862 styleArray.AppendElement(styleContext);
863 canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
865 if (IS_IN_BMP(ch)) {
866 convertedString.Append(ch);
867 } else {
868 convertedString.Append(H_SURROGATE(ch));
869 convertedString.Append(L_SURROGATE(ch));
870 ++i;
871 deletedCharsArray.AppendElement(true); // not exactly deleted, but the
872 // trailing surrogate is skipped
873 ++extraChars;
874 }
876 while (extraChars-- > 0) {
877 mergeNeeded = true;
878 charsToMergeArray.AppendElement(true);
879 styleArray.AppendElement(styleContext);
880 canBreakBeforeArray.AppendElement(false);
881 }
882 }
883 }
885 uint32_t flags;
886 gfxTextRunFactory::Parameters innerParams =
887 GetParametersForInner(aTextRun, &flags, aRefContext);
888 gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
890 nsAutoPtr<nsTransformedTextRun> transformedChild;
891 nsAutoPtr<gfxTextRun> cachedChild;
892 gfxTextRun* child;
894 if (mInnerTransformingTextRunFactory) {
895 transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
896 convertedString.BeginReading(), convertedString.Length(),
897 &innerParams, fontGroup, flags, styleArray.Elements(), false);
898 child = transformedChild.get();
899 } else {
900 cachedChild = fontGroup->MakeTextRun(
901 convertedString.BeginReading(), convertedString.Length(),
902 &innerParams, flags);
903 child = cachedChild.get();
904 }
905 if (!child)
906 return;
907 // Copy potential linebreaks into child so they're preserved
908 // (and also child will be shaped appropriately)
909 NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
910 "Dropped characters or break-before values somewhere!");
911 child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
912 canBreakBeforeArray.Elements(), aRefContext);
913 if (transformedChild) {
914 transformedChild->FinishSettingProperties(aRefContext);
915 }
917 if (mergeNeeded) {
918 // Now merge multiple characters into one multi-glyph character as required
919 // and deal with skipping deleted accent chars
920 NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
921 "source length mismatch");
922 NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
923 "destination length mismatch");
924 MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
925 deletedCharsArray.Elements());
926 } else {
927 // No merging to do, so just copy; this produces a more optimized textrun.
928 // We can't steal the data because the child may be cached and stealing
929 // the data would break the cache.
930 aTextRun->ResetGlyphRuns();
931 aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
932 }
933 }