The Tor Browser: layout/generic/nsTextRunTransformations.cpp@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-

     2  * This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 #include "nsTextRunTransformations.h"

     8 #include "mozilla/MemoryReporting.h"

    10 #include "nsGkAtoms.h"

    11 #include "nsStyleConsts.h"

    12 #include "nsStyleContext.h"

    13 #include "nsUnicodeProperties.h"

    14 #include "nsSpecialCasingData.h"

    15 #include "mozilla/gfx/2D.h"

    16 #include "nsTextFrameUtils.h"

    17 #include "nsIPersistentProperties2.h"

    18 #include "nsNetUtil.h"

    20 // Unicode characters needing special casing treatment in tr/az languages

    21 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130

    22 #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131

    24 // Greek sigma needs custom handling for the lowercase transform; for details

    25 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within

    26 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.

    27 #define GREEK_CAPITAL_LETTER_SIGMA             0x03A3

    28 #define GREEK_SMALL_LETTER_FINAL_SIGMA         0x03C2

    29 #define GREEK_SMALL_LETTER_SIGMA               0x03C3

    31 // Custom uppercase mapping for Greek; see bug 307039 for details

    32 #define GREEK_LOWER_ALPHA                      0x03B1

    33 #define GREEK_LOWER_ALPHA_TONOS                0x03AC

    34 #define GREEK_LOWER_ALPHA_OXIA                 0x1F71

    35 #define GREEK_LOWER_EPSILON                    0x03B5

    36 #define GREEK_LOWER_EPSILON_TONOS              0x03AD

    37 #define GREEK_LOWER_EPSILON_OXIA               0x1F73

    38 #define GREEK_LOWER_ETA                        0x03B7

    39 #define GREEK_LOWER_ETA_TONOS                  0x03AE

    40 #define GREEK_LOWER_ETA_OXIA                   0x1F75

    41 #define GREEK_LOWER_IOTA                       0x03B9

    42 #define GREEK_LOWER_IOTA_TONOS                 0x03AF

    43 #define GREEK_LOWER_IOTA_OXIA                  0x1F77

    44 #define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA

    45 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390

    46 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3

    47 #define GREEK_LOWER_OMICRON                    0x03BF

    48 #define GREEK_LOWER_OMICRON_TONOS              0x03CC

    49 #define GREEK_LOWER_OMICRON_OXIA               0x1F79

    50 #define GREEK_LOWER_UPSILON                    0x03C5

    51 #define GREEK_LOWER_UPSILON_TONOS              0x03CD

    52 #define GREEK_LOWER_UPSILON_OXIA               0x1F7B

    53 #define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB

    54 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0

    55 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3

    56 #define GREEK_LOWER_OMEGA                      0x03C9

    57 #define GREEK_LOWER_OMEGA_TONOS                0x03CE

    58 #define GREEK_LOWER_OMEGA_OXIA                 0x1F7D

    59 #define GREEK_UPPER_ALPHA                      0x0391

    60 #define GREEK_UPPER_EPSILON                    0x0395

    61 #define GREEK_UPPER_ETA                        0x0397

    62 #define GREEK_UPPER_IOTA                       0x0399

    63 #define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA

    64 #define GREEK_UPPER_OMICRON                    0x039F

    65 #define GREEK_UPPER_UPSILON                    0x03A5

    66 #define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB

    67 #define GREEK_UPPER_OMEGA                      0x03A9

    68 #define GREEK_UPPER_ALPHA_TONOS                0x0386

    69 #define GREEK_UPPER_ALPHA_OXIA                 0x1FBB

    70 #define GREEK_UPPER_EPSILON_TONOS              0x0388

    71 #define GREEK_UPPER_EPSILON_OXIA               0x1FC9

    72 #define GREEK_UPPER_ETA_TONOS                  0x0389

    73 #define GREEK_UPPER_ETA_OXIA                   0x1FCB

    74 #define GREEK_UPPER_IOTA_TONOS                 0x038A

    75 #define GREEK_UPPER_IOTA_OXIA                  0x1FDB

    76 #define GREEK_UPPER_OMICRON_TONOS              0x038C

    77 #define GREEK_UPPER_OMICRON_OXIA               0x1FF9

    78 #define GREEK_UPPER_UPSILON_TONOS              0x038E

    79 #define GREEK_UPPER_UPSILON_OXIA               0x1FEB

    80 #define GREEK_UPPER_OMEGA_TONOS                0x038F

    81 #define GREEK_UPPER_OMEGA_OXIA                 0x1FFB

    82 #define COMBINING_ACUTE_ACCENT                 0x0301

    83 #define COMBINING_DIAERESIS                    0x0308

    84 #define COMBINING_ACUTE_TONE_MARK              0x0341

    85 #define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344

    87 // When doing an Uppercase transform in Greek, we need to keep track of the

    88 // current state while iterating through the string, to recognize and process

    89 // diphthongs correctly. For clarity, we define a state for each vowel and

    90 // each vowel with accent, although a few of these do not actually need any

    91 // special treatment and could be folded into kStart.

    92 enum GreekCasingState {

    93   kStart,

    94   kAlpha,

    95   kEpsilon,

    96   kEta,

    97   kIota,

    98   kOmicron,

    99   kUpsilon,

   100   kOmega,

   101   kAlphaAcc,

   102   kEpsilonAcc,

   103   kEtaAcc,

   104   kIotaAcc,

   105   kOmicronAcc,

   106   kUpsilonAcc,

   107   kOmegaAcc,

   108   kOmicronUpsilon,

   109   kDiaeresis

   110 };

   112 static uint32_t

   113 GreekUpperCase(uint32_t aCh, GreekCasingState* aState)

   114 {

   115   switch (aCh) {

   116   case GREEK_UPPER_ALPHA:

   117   case GREEK_LOWER_ALPHA:

   118     *aState = kAlpha;

   119     return GREEK_UPPER_ALPHA;

   121   case GREEK_UPPER_EPSILON:

   122   case GREEK_LOWER_EPSILON:

   123     *aState = kEpsilon;

   124     return GREEK_UPPER_EPSILON;

   126   case GREEK_UPPER_ETA:

   127   case GREEK_LOWER_ETA:

   128     *aState = kEta;

   129     return GREEK_UPPER_ETA;

   131   case GREEK_UPPER_IOTA:

   132     *aState = kIota;

   133     return GREEK_UPPER_IOTA;

   135   case GREEK_UPPER_OMICRON:

   136   case GREEK_LOWER_OMICRON:

   137     *aState = kOmicron;

   138     return GREEK_UPPER_OMICRON;

   140   case GREEK_UPPER_UPSILON:

   141     switch (*aState) {

   142     case kOmicron:

   143       *aState = kOmicronUpsilon;

   144       break;

   145     default:

   146       *aState = kUpsilon;

   147       break;

   148     }

   149     return GREEK_UPPER_UPSILON;

   151   case GREEK_UPPER_OMEGA:

   152   case GREEK_LOWER_OMEGA:

   153     *aState = kOmega;

   154     return GREEK_UPPER_OMEGA;

   156   // iota and upsilon may be the second vowel of a diphthong

   157   case GREEK_LOWER_IOTA:

   158     switch (*aState) {

   159     case kAlphaAcc:

   160     case kEpsilonAcc:

   161     case kOmicronAcc:

   162     case kUpsilonAcc:

   163       *aState = kStart;

   164       return GREEK_UPPER_IOTA_DIALYTIKA;

   165     default:

   166       break;

   167     }

   168     *aState = kIota;

   169     return GREEK_UPPER_IOTA;

   171   case GREEK_LOWER_UPSILON:

   172     switch (*aState) {

   173     case kAlphaAcc:

   174     case kEpsilonAcc:

   175     case kEtaAcc:

   176     case kOmicronAcc:

   177       *aState = kStart;

   178       return GREEK_UPPER_UPSILON_DIALYTIKA;

   179     case kOmicron:

   180       *aState = kOmicronUpsilon;

   181       break;

   182     default:

   183       *aState = kUpsilon;

   184       break;

   185     }

   186     return GREEK_UPPER_UPSILON;

   188   case GREEK_UPPER_IOTA_DIALYTIKA:

   189   case GREEK_LOWER_IOTA_DIALYTIKA:

   190   case GREEK_UPPER_UPSILON_DIALYTIKA:

   191   case GREEK_LOWER_UPSILON_DIALYTIKA:

   192   case COMBINING_DIAERESIS:

   193     *aState = kDiaeresis;

   194     return ToUpperCase(aCh);

   196   // remove accent if it follows a vowel or diaeresis,

   197   // and set appropriate state for diphthong detection

   198   case COMBINING_ACUTE_ACCENT:

   199   case COMBINING_ACUTE_TONE_MARK:

   200     switch (*aState) {

   201     case kAlpha:

   202       *aState = kAlphaAcc;

   203       return uint32_t(-1); // omit this char from result string

   204     case kEpsilon:

   205       *aState = kEpsilonAcc;

   206       return uint32_t(-1);

   207     case kEta:

   208       *aState = kEtaAcc;

   209       return uint32_t(-1);

   210     case kIota:

   211       *aState = kIotaAcc;

   212       return uint32_t(-1);

   213     case kOmicron:

   214       *aState = kOmicronAcc;

   215       return uint32_t(-1);

   216     case kUpsilon:

   217       *aState = kUpsilonAcc;

   218       return uint32_t(-1);

   219     case kOmicronUpsilon:

   220       *aState = kStart; // this completed a diphthong

   221       return uint32_t(-1);

   222     case kOmega:

   223       *aState = kOmegaAcc;

   224       return uint32_t(-1);

   225     case kDiaeresis:

   226       *aState = kStart;

   227       return uint32_t(-1);

   228     default:

   229       break;

   230     }

   231     break;

   233   // combinations with dieresis+accent just strip the accent,

   234   // and reset to start state (don't form diphthong with following vowel)

   235   case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:

   236   case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:

   237     *aState = kStart;

   238     return GREEK_UPPER_IOTA_DIALYTIKA;

   240   case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:

   241   case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:

   242     *aState = kStart;

   243     return GREEK_UPPER_UPSILON_DIALYTIKA;

   245   case COMBINING_GREEK_DIALYTIKA_TONOS:

   246     *aState = kStart;

   247     return COMBINING_DIAERESIS;

   249   // strip accents from vowels, and note the vowel seen so that we can detect

   250   // diphthongs where diaeresis needs to be added

   251   case GREEK_LOWER_ALPHA_TONOS:

   252   case GREEK_LOWER_ALPHA_OXIA:

   253   case GREEK_UPPER_ALPHA_TONOS:

   254   case GREEK_UPPER_ALPHA_OXIA:

   255     *aState = kAlphaAcc;

   256     return GREEK_UPPER_ALPHA;

   258   case GREEK_LOWER_EPSILON_TONOS:

   259   case GREEK_LOWER_EPSILON_OXIA:

   260   case GREEK_UPPER_EPSILON_TONOS:

   261   case GREEK_UPPER_EPSILON_OXIA:

   262     *aState = kEpsilonAcc;

   263     return GREEK_UPPER_EPSILON;

   265   case GREEK_LOWER_ETA_TONOS:

   266   case GREEK_LOWER_ETA_OXIA:

   267   case GREEK_UPPER_ETA_TONOS:

   268   case GREEK_UPPER_ETA_OXIA:

   269     *aState = kEtaAcc;

   270     return GREEK_UPPER_ETA;

   272   case GREEK_LOWER_IOTA_TONOS:

   273   case GREEK_LOWER_IOTA_OXIA:

   274   case GREEK_UPPER_IOTA_TONOS:

   275   case GREEK_UPPER_IOTA_OXIA:

   276     *aState = kIotaAcc;

   277     return GREEK_UPPER_IOTA;

   279   case GREEK_LOWER_OMICRON_TONOS:

   280   case GREEK_LOWER_OMICRON_OXIA:

   281   case GREEK_UPPER_OMICRON_TONOS:

   282   case GREEK_UPPER_OMICRON_OXIA:

   283     *aState = kOmicronAcc;

   284     return GREEK_UPPER_OMICRON;

   286   case GREEK_LOWER_UPSILON_TONOS:

   287   case GREEK_LOWER_UPSILON_OXIA:

   288   case GREEK_UPPER_UPSILON_TONOS:

   289   case GREEK_UPPER_UPSILON_OXIA:

   290     switch (*aState) {

   291     case kOmicron:

   292       *aState = kStart; // this completed a diphthong

   293       break;

   294     default:

   295       *aState = kUpsilonAcc;

   296       break;

   297     }

   298     return GREEK_UPPER_UPSILON;

   300   case GREEK_LOWER_OMEGA_TONOS:

   301   case GREEK_LOWER_OMEGA_OXIA:

   302   case GREEK_UPPER_OMEGA_TONOS:

   303   case GREEK_UPPER_OMEGA_OXIA:

   304     *aState = kOmegaAcc;

   305     return GREEK_UPPER_OMEGA;

   306   }

   308   // all other characters just reset the state, and use standard mappings

   309   *aState = kStart;

   310   return ToUpperCase(aCh);

   311 }

   313 nsTransformedTextRun *

   314 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,

   315                              nsTransformingTextRunFactory* aFactory,

   316                              gfxFontGroup* aFontGroup,

   317                              const char16_t* aString, uint32_t aLength,

   318                              const uint32_t aFlags, nsStyleContext** aStyles,

   319                              bool aOwnsFactory)

   320 {

   321   NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),

   322                "didn't expect text to be marked as 8-bit here");

   324   void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);

   325   if (!storage) {

   326     return nullptr;

   327   }

   329   return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup,

   330                                             aString, aLength,

   331                                             aFlags, aStyles, aOwnsFactory);

   332 }

   334 void

   335 nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,

   336                                         bool* aCapitalization,

   337                                         gfxContext* aRefContext)

   338 {

   339   if (mCapitalize.IsEmpty()) {

   340     if (!mCapitalize.AppendElements(GetLength()))

   341       return;

   342     memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool));

   343   }

   344   memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool));

   345   mNeedsRebuild = true;

   346 }

   348 bool

   349 nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength,

   350                                              uint8_t* aBreakBefore,

   351                                              gfxContext* aRefContext)

   352 {

   353   bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength,

   354       aBreakBefore, aRefContext);

   355   if (changed) {

   356     mNeedsRebuild = true;

   357   }

   358   return changed;

   359 }

   361 size_t

   362 nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)

   363 {

   364   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);

   365   total += mStyles.SizeOfExcludingThis(aMallocSizeOf);

   366   total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf);

   367   if (mOwnsFactory) {

   368     total += aMallocSizeOf(mFactory);

   369   }

   370   return total;

   371 }

   373 size_t

   374 nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)

   375 {

   376   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);

   377 }

   379 nsTransformedTextRun*

   380 nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength,

   381                                           const gfxTextRunFactory::Parameters* aParams,

   382                                           gfxFontGroup* aFontGroup, uint32_t aFlags,

   383                                           nsStyleContext** aStyles, bool aOwnsFactory)

   384 {

   385   return nsTransformedTextRun::Create(aParams, this, aFontGroup,

   386                                       aString, aLength, aFlags, aStyles, aOwnsFactory);

   387 }

   389 nsTransformedTextRun*

   390 nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength,

   391                                           const gfxTextRunFactory::Parameters* aParams,

   392                                           gfxFontGroup* aFontGroup, uint32_t aFlags,

   393                                           nsStyleContext** aStyles, bool aOwnsFactory)

   394 {

   395   // We'll only have a Unicode code path to minimize the amount of code needed

   396   // for these rarely used features

   397   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength);

   398   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,

   399                      aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT),

   400                      aStyles, aOwnsFactory);

   401 }

   403 void

   404 MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,

   405                          const bool* aCharsToMerge, const bool* aDeletedChars)

   406 {

   407   aDest->ResetGlyphRuns();

   409   gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());

   410   uint32_t offset = 0;

   411   nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;

   412   while (iter.NextRun()) {

   413     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();

   414     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,

   415                                      offset, false);

   416     if (NS_FAILED(rv))

   417       return;

   419     bool anyMissing = false;

   420     uint32_t mergeRunStart = iter.GetStringStart();

   421     const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();

   422     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];

   423     uint32_t stringEnd = iter.GetStringEnd();

   424     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {

   425       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];

   426       if (g.IsSimpleGlyph()) {

   427         if (!anyMissing) {

   428           gfxTextRun::DetailedGlyph details;

   429           details.mGlyphID = g.GetSimpleGlyph();

   430           details.mAdvance = g.GetSimpleAdvance();

   431           details.mXOffset = 0;

   432           details.mYOffset = 0;

   433           glyphs.AppendElement(details);

   434         }

   435       } else {

   436         if (g.IsMissing()) {

   437           anyMissing = true;

   438           glyphs.Clear();

   439         }

   440         if (g.GetGlyphCount() > 0) {

   441           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());

   442         }

   443       }

   445       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {

   446         // next char is supposed to merge with current, so loop without

   447         // writing current merged glyph to the destination

   448         continue;

   449       }

   451       // If the start of the merge run is actually a character that should

   452       // have been merged with the previous character (this can happen

   453       // if there's a font change in the middle of a case-mapped character,

   454       // that decomposed into a sequence of base+diacritics, for example),

   455       // just discard the entire merge run. See comment at start of this

   456       // function.

   457       NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],

   458                        "unable to merge across a glyph run boundary, "

   459                        "glyph(s) discarded");

   460       if (!aCharsToMerge[mergeRunStart]) {

   461         if (anyMissing) {

   462           mergedGlyph.SetMissing(glyphs.Length());

   463         } else {

   464           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),

   465                                  mergedGlyph.IsLigatureGroupStart(),

   466                                  glyphs.Length());

   467         }

   468         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());

   469         ++offset;

   471         while (offset < aDest->GetLength() && aDeletedChars[offset]) {

   472           aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);

   473         }

   474       }

   476       glyphs.Clear();

   477       anyMissing = false;

   478       mergeRunStart = k + 1;

   479       if (mergeRunStart < stringEnd) {

   480         mergedGlyph = srcGlyphs[mergeRunStart];

   481       }

   482     }

   483     NS_ASSERTION(glyphs.Length() == 0,

   484                  "Leftover glyphs, don't request merging of the last character with its next!");

   485   }

   486   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");

   487 }

   489 gfxTextRunFactory::Parameters

   490 GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags,

   491     gfxContext* aRefContext)

   492 {

   493   gfxTextRunFactory::Parameters params =

   494     { aRefContext, nullptr, nullptr,

   495       nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()

   496     };

   497   *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT;

   498   return params;

   499 }

   501 void

   502 nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,

   503     gfxContext* aRefContext)

   504 {

   505   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

   506   gfxFontStyle fontStyle = *fontGroup->GetStyle();

   507   fontStyle.size *= 0.8;

   508   nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);

   509   if (!smallFont)

   510     return;

   512   uint32_t flags;

   513   gfxTextRunFactory::Parameters innerParams =

   514       GetParametersForInner(aTextRun, &flags, aRefContext);

   516   uint32_t length = aTextRun->GetLength();

   517   const char16_t* str = aTextRun->mString.BeginReading();

   518   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

   519   // Create a textrun so we can check cluster-start properties

   520   nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));

   521   if (!inner.get())

   522     return;

   524   nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);

   526   aTextRun->ResetGlyphRuns();

   528   uint32_t runStart = 0;

   529   nsAutoTArray<nsStyleContext*,50> styleArray;

   530   nsAutoTArray<uint8_t,50> canBreakBeforeArray;

   532   enum RunCaseState {

   533     kUpperOrCaseless, // will be untouched by font-variant:small-caps

   534     kLowercase,       // will be uppercased and reduced

   535     kSpecialUpper     // specials: don't shrink, but apply uppercase mapping

   536   };

   537   RunCaseState runCase = kUpperOrCaseless;

   539   // Note that this loop runs from 0 to length *inclusive*, so the last

   540   // iteration is in effect beyond the end of the input text, to give a

   541   // chance to finish the last casing run we've found.

   542   // The last iteration, when i==length, must not attempt to look at the

   543   // character position [i] or the style data for styles[i], as this would

   544   // be beyond the valid length of the textrun or its style array.

   545   for (uint32_t i = 0; i <= length; ++i) {

   546     RunCaseState chCase = kUpperOrCaseless;

   547     // Unless we're at the end, figure out what treatment the current

   548     // character will need.

   549     if (i < length) {

   550       nsStyleContext* styleContext = styles[i];

   551       // Characters that aren't the start of a cluster are ignored here. They

   552       // get added to whatever lowercase/non-lowercase run we're in.

   553       if (!inner->IsClusterStart(i)) {

   554         chCase = runCase;

   555       } else {

   556         if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {

   557           uint32_t ch = str[i];

   558           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {

   559             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);

   560           }

   561           uint32_t ch2 = ToUpperCase(ch);

   562           if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {

   563             chCase = kLowercase;

   564           } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) {

   565             // In Greek, check for characters that will be modified by the

   566             // GreekUpperCase mapping - this catches accented capitals where

   567             // the accent is to be removed (bug 307039). These are handled by

   568             // a transformed child run using the full-size font.

   569             GreekCasingState state = kStart; // don't need exact context here

   570             ch2 = GreekUpperCase(ch, &state);

   571             if (ch != ch2) {

   572               chCase = kSpecialUpper;

   573             }

   574           }

   575         } else {

   576           // Don't transform the character! I.e., pretend that it's not lowercase

   577         }

   578       }

   579     }

   581     // At the end of the text, or when the current character needs different

   582     // casing treatment from the current run, finish the run-in-progress

   583     // and prepare to accumulate a new run.

   584     // Note that we do not look at any source data for offset [i] here,

   585     // as that would be invalid in the case where i==length.

   586     if ((i == length || runCase != chCase) && runStart < i) {

   587       nsAutoPtr<nsTransformedTextRun> transformedChild;

   588       nsAutoPtr<gfxTextRun> cachedChild;

   589       gfxTextRun* child;

   591       switch (runCase) {

   592       case kUpperOrCaseless:

   593         cachedChild =

   594           fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,

   595                                  flags);

   596         child = cachedChild.get();

   597         break;

   598       case kLowercase:

   599         transformedChild =

   600           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,

   601                                        &innerParams, smallFont, flags,

   602                                        styleArray.Elements(), false);

   603         child = transformedChild;

   604         break;

   605       case kSpecialUpper:

   606         transformedChild =

   607           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,

   608                                        &innerParams, fontGroup, flags,

   609                                        styleArray.Elements(), false);

   610         child = transformedChild;

   611         break;

   612       }

   613       if (!child)

   614         return;

   615       // Copy potential linebreaks into child so they're preserved

   616       // (and also child will be shaped appropriately)

   617       NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,

   618                    "lost some break-before values?");

   619       child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),

   620           canBreakBeforeArray.Elements(), aRefContext);

   621       if (transformedChild) {

   622         transformedChild->FinishSettingProperties(aRefContext);

   623       }

   624       aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);

   626       runStart = i;

   627       styleArray.Clear();

   628       canBreakBeforeArray.Clear();

   629     }

   631     if (i < length) {

   632       runCase = chCase;

   633       styleArray.AppendElement(styles[i]);

   634       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

   635     }

   636   }

   637 }

   639 void

   640 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,

   641     gfxContext* aRefContext)

   642 {

   643   uint32_t length = aTextRun->GetLength();

   644   const char16_t* str = aTextRun->mString.BeginReading();

   645   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();

   647   nsAutoString convertedString;

   648   nsAutoTArray<bool,50> charsToMergeArray;

   649   nsAutoTArray<bool,50> deletedCharsArray;

   650   nsAutoTArray<nsStyleContext*,50> styleArray;

   651   nsAutoTArray<uint8_t,50> canBreakBeforeArray;

   652   bool mergeNeeded = false;

   654   // Some languages have special casing conventions that differ from the

   655   // default Unicode mappings.

   656   // The enum values here are named for well-known exemplar languages that

   657   // exhibit the behavior in question; multiple lang tags may map to the

   658   // same setting here, if the behavior is shared by other languages.

   659   enum {

   660     eNone,    // default non-lang-specific behavior

   661     eTurkish, // preserve dotted/dotless-i distinction in uppercase

   662     eDutch,   // treat "ij" digraph as a unit for capitalization

   663     eGreek    // strip accent when uppercasing Greek vowels

   664   } languageSpecificCasing = eNone;

   666   const nsIAtom* lang = nullptr;

   667   bool capitalizeDutchIJ = false;

   668   bool prevIsLetter = false;

   669   uint32_t sigmaIndex = uint32_t(-1);

   670   nsIUGenCategory::nsUGenCategory cat;

   671   GreekCasingState greekState = kStart;

   672   uint32_t i;

   673   for (i = 0; i < length; ++i) {

   674     uint32_t ch = str[i];

   675     nsStyleContext* styleContext = styles[i];

   677     uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE

   678       : styleContext->StyleText()->mTextTransform;

   679     int extraChars = 0;

   680     const mozilla::unicode::MultiCharMapping *mcm;

   682     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {

   683       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);

   684     }

   686     if (lang != styleContext->StyleFont()->mLanguage) {

   687       lang = styleContext->StyleFont()->mLanguage;

   688       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||

   689           lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||

   690           lang == nsGkAtoms::tt) {

   691         languageSpecificCasing = eTurkish;

   692       } else if (lang == nsGkAtoms::nl) {

   693         languageSpecificCasing = eDutch;

   694       } else if (lang == nsGkAtoms::el) {

   695         languageSpecificCasing = eGreek;

   696         greekState = kStart;

   697       } else {

   698         languageSpecificCasing = eNone;

   699       }

   700     }

   702     switch (style) {

   703     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:

   704       if (languageSpecificCasing == eTurkish) {

   705         if (ch == 'I') {

   706           ch = LATIN_SMALL_LETTER_DOTLESS_I;

   707           prevIsLetter = true;

   708           sigmaIndex = uint32_t(-1);

   709           break;

   710         }

   711         if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {

   712           ch = 'i';

   713           prevIsLetter = true;

   714           sigmaIndex = uint32_t(-1);

   715           break;

   716         }

   717       }

   719       // Special lowercasing behavior for Greek Sigma: note that this is listed

   720       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a

   721       // language-specific mapping; it applies regardless of the language of

   722       // the element.

   723       //

   724       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.

   725       // the non-final form) whenever there is a following letter, or when the

   726       // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a

   727       // LETTER); and to FINAL SIGMA when it is preceded by another letter but

   728       // not followed by one.

   729       //

   730       // To implement the context-sensitive nature of this mapping, we keep

   731       // track of whether the previous character was a letter. If not, CAPITAL

   732       // SIGMA will map directly to SMALL SIGMA. If the previous character

   733       // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the

   734       // position in the converted string; if we then encounter another letter,

   735       // that FINAL SIGMA is replaced with a standard SMALL SIGMA.

   737       cat = mozilla::unicode::GetGenCategory(ch);

   739       // If sigmaIndex is not -1, it marks where we have provisionally mapped

   740       // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we

   741       // need to change it to SMALL SIGMA.

   742       if (sigmaIndex != uint32_t(-1)) {

   743         if (cat == nsIUGenCategory::kLetter) {

   744           convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);

   745         }

   746       }

   748       if (ch == GREEK_CAPITAL_LETTER_SIGMA) {

   749         // If preceding char was a letter, map to FINAL instead of SMALL,

   750         // and note where it occurred by setting sigmaIndex; we'll change it

   751         // to standard SMALL SIGMA later if another letter follows

   752         if (prevIsLetter) {

   753           ch = GREEK_SMALL_LETTER_FINAL_SIGMA;

   754           sigmaIndex = convertedString.Length();

   755         } else {

   756           // CAPITAL SIGMA not preceded by a letter is unconditionally mapped

   757           // to SMALL SIGMA

   758           ch = GREEK_SMALL_LETTER_SIGMA;

   759           sigmaIndex = uint32_t(-1);

   760         }

   761         prevIsLetter = true;

   762         break;

   763       }

   765       // ignore diacritics for the purpose of contextual sigma mapping;

   766       // otherwise, reset prevIsLetter appropriately and clear the

   767       // sigmaIndex marker

   768       if (cat != nsIUGenCategory::kMark) {

   769         prevIsLetter = (cat == nsIUGenCategory::kLetter);

   770         sigmaIndex = uint32_t(-1);

   771       }

   773       mcm = mozilla::unicode::SpecialLower(ch);

   774       if (mcm) {

   775         int j = 0;

   776         while (j < 2 && mcm->mMappedChars[j + 1]) {

   777           convertedString.Append(mcm->mMappedChars[j]);

   778           ++extraChars;

   779           ++j;

   780         }

   781         ch = mcm->mMappedChars[j];

   782         break;

   783       }

   785       ch = ToLowerCase(ch);

   786       break;

   788     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:

   789       if (languageSpecificCasing == eTurkish && ch == 'i') {

   790         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;

   791         break;

   792       }

   794       if (languageSpecificCasing == eGreek) {

   795         ch = GreekUpperCase(ch, &greekState);

   796         break;

   797       }

   799       mcm = mozilla::unicode::SpecialUpper(ch);

   800       if (mcm) {

   801         int j = 0;

   802         while (j < 2 && mcm->mMappedChars[j + 1]) {

   803           convertedString.Append(mcm->mMappedChars[j]);

   804           ++extraChars;

   805           ++j;

   806         }

   807         ch = mcm->mMappedChars[j];

   808         break;

   809       }

   811       ch = ToUpperCase(ch);

   812       break;

   814     case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:

   815       if (capitalizeDutchIJ && ch == 'j') {

   816         ch = 'J';

   817         capitalizeDutchIJ = false;

   818         break;

   819       }

   820       capitalizeDutchIJ = false;

   821       if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {

   822         if (languageSpecificCasing == eTurkish && ch == 'i') {

   823           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;

   824           break;

   825         }

   826         if (languageSpecificCasing == eDutch && ch == 'i') {

   827           ch = 'I';

   828           capitalizeDutchIJ = true;

   829           break;

   830         }

   832         mcm = mozilla::unicode::SpecialTitle(ch);

   833         if (mcm) {

   834           int j = 0;

   835           while (j < 2 && mcm->mMappedChars[j + 1]) {

   836             convertedString.Append(mcm->mMappedChars[j]);

   837             ++extraChars;

   838             ++j;

   839           }

   840           ch = mcm->mMappedChars[j];

   841           break;

   842         }

   844         ch = ToTitleCase(ch);

   845       }

   846       break;

   848     case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:

   849       ch = mozilla::unicode::GetFullWidth(ch);

   850       break;

   852     default:

   853       break;

   854     }

   856     if (ch == uint32_t(-1)) {

   857       deletedCharsArray.AppendElement(true);

   858       mergeNeeded = true;

   859     } else {

   860       deletedCharsArray.AppendElement(false);

   861       charsToMergeArray.AppendElement(false);

   862       styleArray.AppendElement(styleContext);

   863       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));

   865       if (IS_IN_BMP(ch)) {

   866         convertedString.Append(ch);

   867       } else {

   868         convertedString.Append(H_SURROGATE(ch));

   869         convertedString.Append(L_SURROGATE(ch));

   870         ++i;

   871         deletedCharsArray.AppendElement(true); // not exactly deleted, but the

   872                                                // trailing surrogate is skipped

   873         ++extraChars;

   874       }

   876       while (extraChars-- > 0) {

   877         mergeNeeded = true;

   878         charsToMergeArray.AppendElement(true);

   879         styleArray.AppendElement(styleContext);

   880         canBreakBeforeArray.AppendElement(false);

   881       }

   882     }

   883   }

   885   uint32_t flags;

   886   gfxTextRunFactory::Parameters innerParams =

   887       GetParametersForInner(aTextRun, &flags, aRefContext);

   888   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();

   890   nsAutoPtr<nsTransformedTextRun> transformedChild;

   891   nsAutoPtr<gfxTextRun> cachedChild;

   892   gfxTextRun* child;

   894   if (mInnerTransformingTextRunFactory) {

   895     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(

   896         convertedString.BeginReading(), convertedString.Length(),

   897         &innerParams, fontGroup, flags, styleArray.Elements(), false);

   898     child = transformedChild.get();

   899   } else {

   900     cachedChild = fontGroup->MakeTextRun(

   901         convertedString.BeginReading(), convertedString.Length(),

   902         &innerParams, flags);

   903     child = cachedChild.get();

   904   }

   905   if (!child)

   906     return;

   907   // Copy potential linebreaks into child so they're preserved

   908   // (and also child will be shaped appropriately)

   909   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),

   910                "Dropped characters or break-before values somewhere!");

   911   child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),

   912       canBreakBeforeArray.Elements(), aRefContext);

   913   if (transformedChild) {

   914     transformedChild->FinishSettingProperties(aRefContext);

   915   }

   917   if (mergeNeeded) {

   918     // Now merge multiple characters into one multi-glyph character as required

   919     // and deal with skipping deleted accent chars

   920     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),

   921                  "source length mismatch");

   922     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),

   923                  "destination length mismatch");

   924     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),

   925                              deletedCharsArray.Elements());

   926   } else {

   927     // No merging to do, so just copy; this produces a more optimized textrun.

   928     // We can't steal the data because the child may be cached and stealing

   929     // the data would break the cache.

   930     aTextRun->ResetGlyphRuns();

   931     aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);

   932   }

   933 }

The Tor Browser / file revision

layout/generic/nsTextRunTransformations.cpp@b8a032363ba2

layout/generic/nsTextRunTransformations.cpp