layout/generic/nsTextRunTransformations.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
     2  * This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsTextRunTransformations.h"
     8 #include "mozilla/MemoryReporting.h"
    10 #include "nsGkAtoms.h"
    11 #include "nsStyleConsts.h"
    12 #include "nsStyleContext.h"
    13 #include "nsUnicodeProperties.h"
    14 #include "nsSpecialCasingData.h"
    15 #include "mozilla/gfx/2D.h"
    16 #include "nsTextFrameUtils.h"
    17 #include "nsIPersistentProperties2.h"
    18 #include "nsNetUtil.h"
    20 // Unicode characters needing special casing treatment in tr/az languages
    21 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
    22 #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131
    24 // Greek sigma needs custom handling for the lowercase transform; for details
    25 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
    26 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
    27 #define GREEK_CAPITAL_LETTER_SIGMA             0x03A3
    28 #define GREEK_SMALL_LETTER_FINAL_SIGMA         0x03C2
    29 #define GREEK_SMALL_LETTER_SIGMA               0x03C3
    31 // Custom uppercase mapping for Greek; see bug 307039 for details
    32 #define GREEK_LOWER_ALPHA                      0x03B1
    33 #define GREEK_LOWER_ALPHA_TONOS                0x03AC
    34 #define GREEK_LOWER_ALPHA_OXIA                 0x1F71
    35 #define GREEK_LOWER_EPSILON                    0x03B5
    36 #define GREEK_LOWER_EPSILON_TONOS              0x03AD
    37 #define GREEK_LOWER_EPSILON_OXIA               0x1F73
    38 #define GREEK_LOWER_ETA                        0x03B7
    39 #define GREEK_LOWER_ETA_TONOS                  0x03AE
    40 #define GREEK_LOWER_ETA_OXIA                   0x1F75
    41 #define GREEK_LOWER_IOTA                       0x03B9
    42 #define GREEK_LOWER_IOTA_TONOS                 0x03AF
    43 #define GREEK_LOWER_IOTA_OXIA                  0x1F77
    44 #define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
    45 #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
    46 #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
    47 #define GREEK_LOWER_OMICRON                    0x03BF
    48 #define GREEK_LOWER_OMICRON_TONOS              0x03CC
    49 #define GREEK_LOWER_OMICRON_OXIA               0x1F79
    50 #define GREEK_LOWER_UPSILON                    0x03C5
    51 #define GREEK_LOWER_UPSILON_TONOS              0x03CD
    52 #define GREEK_LOWER_UPSILON_OXIA               0x1F7B
    53 #define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
    54 #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
    55 #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
    56 #define GREEK_LOWER_OMEGA                      0x03C9
    57 #define GREEK_LOWER_OMEGA_TONOS                0x03CE
    58 #define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
    59 #define GREEK_UPPER_ALPHA                      0x0391
    60 #define GREEK_UPPER_EPSILON                    0x0395
    61 #define GREEK_UPPER_ETA                        0x0397
    62 #define GREEK_UPPER_IOTA                       0x0399
    63 #define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
    64 #define GREEK_UPPER_OMICRON                    0x039F
    65 #define GREEK_UPPER_UPSILON                    0x03A5
    66 #define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
    67 #define GREEK_UPPER_OMEGA                      0x03A9
    68 #define GREEK_UPPER_ALPHA_TONOS                0x0386
    69 #define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
    70 #define GREEK_UPPER_EPSILON_TONOS              0x0388
    71 #define GREEK_UPPER_EPSILON_OXIA               0x1FC9
    72 #define GREEK_UPPER_ETA_TONOS                  0x0389
    73 #define GREEK_UPPER_ETA_OXIA                   0x1FCB
    74 #define GREEK_UPPER_IOTA_TONOS                 0x038A
    75 #define GREEK_UPPER_IOTA_OXIA                  0x1FDB
    76 #define GREEK_UPPER_OMICRON_TONOS              0x038C
    77 #define GREEK_UPPER_OMICRON_OXIA               0x1FF9
    78 #define GREEK_UPPER_UPSILON_TONOS              0x038E
    79 #define GREEK_UPPER_UPSILON_OXIA               0x1FEB
    80 #define GREEK_UPPER_OMEGA_TONOS                0x038F
    81 #define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
    82 #define COMBINING_ACUTE_ACCENT                 0x0301
    83 #define COMBINING_DIAERESIS                    0x0308
    84 #define COMBINING_ACUTE_TONE_MARK              0x0341
    85 #define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
    87 // When doing an Uppercase transform in Greek, we need to keep track of the
    88 // current state while iterating through the string, to recognize and process
    89 // diphthongs correctly. For clarity, we define a state for each vowel and
    90 // each vowel with accent, although a few of these do not actually need any
    91 // special treatment and could be folded into kStart.
    92 enum GreekCasingState {
    93   kStart,
    94   kAlpha,
    95   kEpsilon,
    96   kEta,
    97   kIota,
    98   kOmicron,
    99   kUpsilon,
   100   kOmega,
   101   kAlphaAcc,
   102   kEpsilonAcc,
   103   kEtaAcc,
   104   kIotaAcc,
   105   kOmicronAcc,
   106   kUpsilonAcc,
   107   kOmegaAcc,
   108   kOmicronUpsilon,
   109   kDiaeresis
   110 };
   112 static uint32_t
   113 GreekUpperCase(uint32_t aCh, GreekCasingState* aState)
   114 {
   115   switch (aCh) {
   116   case GREEK_UPPER_ALPHA:
   117   case GREEK_LOWER_ALPHA:
   118     *aState = kAlpha;
   119     return GREEK_UPPER_ALPHA;
   121   case GREEK_UPPER_EPSILON:
   122   case GREEK_LOWER_EPSILON:
   123     *aState = kEpsilon;
   124     return GREEK_UPPER_EPSILON;
   126   case GREEK_UPPER_ETA:
   127   case GREEK_LOWER_ETA:
   128     *aState = kEta;
   129     return GREEK_UPPER_ETA;
   131   case GREEK_UPPER_IOTA:
   132     *aState = kIota;
   133     return GREEK_UPPER_IOTA;
   135   case GREEK_UPPER_OMICRON:
   136   case GREEK_LOWER_OMICRON:
   137     *aState = kOmicron;
   138     return GREEK_UPPER_OMICRON;
   140   case GREEK_UPPER_UPSILON:
   141     switch (*aState) {
   142     case kOmicron:
   143       *aState = kOmicronUpsilon;
   144       break;
   145     default:
   146       *aState = kUpsilon;
   147       break;
   148     }
   149     return GREEK_UPPER_UPSILON;
   151   case GREEK_UPPER_OMEGA:
   152   case GREEK_LOWER_OMEGA:
   153     *aState = kOmega;
   154     return GREEK_UPPER_OMEGA;
   156   // iota and upsilon may be the second vowel of a diphthong
   157   case GREEK_LOWER_IOTA:
   158     switch (*aState) {
   159     case kAlphaAcc:
   160     case kEpsilonAcc:
   161     case kOmicronAcc:
   162     case kUpsilonAcc:
   163       *aState = kStart;
   164       return GREEK_UPPER_IOTA_DIALYTIKA;
   165     default:
   166       break;
   167     }
   168     *aState = kIota;
   169     return GREEK_UPPER_IOTA;
   171   case GREEK_LOWER_UPSILON:
   172     switch (*aState) {
   173     case kAlphaAcc:
   174     case kEpsilonAcc:
   175     case kEtaAcc:
   176     case kOmicronAcc:
   177       *aState = kStart;
   178       return GREEK_UPPER_UPSILON_DIALYTIKA;
   179     case kOmicron:
   180       *aState = kOmicronUpsilon;
   181       break;
   182     default:
   183       *aState = kUpsilon;
   184       break;
   185     }
   186     return GREEK_UPPER_UPSILON;
   188   case GREEK_UPPER_IOTA_DIALYTIKA:
   189   case GREEK_LOWER_IOTA_DIALYTIKA:
   190   case GREEK_UPPER_UPSILON_DIALYTIKA:
   191   case GREEK_LOWER_UPSILON_DIALYTIKA:
   192   case COMBINING_DIAERESIS:
   193     *aState = kDiaeresis;
   194     return ToUpperCase(aCh);
   196   // remove accent if it follows a vowel or diaeresis,
   197   // and set appropriate state for diphthong detection
   198   case COMBINING_ACUTE_ACCENT:
   199   case COMBINING_ACUTE_TONE_MARK:
   200     switch (*aState) {
   201     case kAlpha:
   202       *aState = kAlphaAcc;
   203       return uint32_t(-1); // omit this char from result string
   204     case kEpsilon:
   205       *aState = kEpsilonAcc;
   206       return uint32_t(-1);
   207     case kEta:
   208       *aState = kEtaAcc;
   209       return uint32_t(-1);
   210     case kIota:
   211       *aState = kIotaAcc;
   212       return uint32_t(-1);
   213     case kOmicron:
   214       *aState = kOmicronAcc;
   215       return uint32_t(-1);
   216     case kUpsilon:
   217       *aState = kUpsilonAcc;
   218       return uint32_t(-1);
   219     case kOmicronUpsilon:
   220       *aState = kStart; // this completed a diphthong
   221       return uint32_t(-1);
   222     case kOmega:
   223       *aState = kOmegaAcc;
   224       return uint32_t(-1);
   225     case kDiaeresis:
   226       *aState = kStart;
   227       return uint32_t(-1);
   228     default:
   229       break;
   230     }
   231     break;
   233   // combinations with dieresis+accent just strip the accent,
   234   // and reset to start state (don't form diphthong with following vowel)
   235   case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
   236   case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
   237     *aState = kStart;
   238     return GREEK_UPPER_IOTA_DIALYTIKA;
   240   case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
   241   case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
   242     *aState = kStart;
   243     return GREEK_UPPER_UPSILON_DIALYTIKA;
   245   case COMBINING_GREEK_DIALYTIKA_TONOS:
   246     *aState = kStart;
   247     return COMBINING_DIAERESIS;
   249   // strip accents from vowels, and note the vowel seen so that we can detect
   250   // diphthongs where diaeresis needs to be added
   251   case GREEK_LOWER_ALPHA_TONOS:
   252   case GREEK_LOWER_ALPHA_OXIA:
   253   case GREEK_UPPER_ALPHA_TONOS:
   254   case GREEK_UPPER_ALPHA_OXIA:
   255     *aState = kAlphaAcc;
   256     return GREEK_UPPER_ALPHA;
   258   case GREEK_LOWER_EPSILON_TONOS:
   259   case GREEK_LOWER_EPSILON_OXIA:
   260   case GREEK_UPPER_EPSILON_TONOS:
   261   case GREEK_UPPER_EPSILON_OXIA:
   262     *aState = kEpsilonAcc;
   263     return GREEK_UPPER_EPSILON;
   265   case GREEK_LOWER_ETA_TONOS:
   266   case GREEK_LOWER_ETA_OXIA:
   267   case GREEK_UPPER_ETA_TONOS:
   268   case GREEK_UPPER_ETA_OXIA:
   269     *aState = kEtaAcc;
   270     return GREEK_UPPER_ETA;
   272   case GREEK_LOWER_IOTA_TONOS:
   273   case GREEK_LOWER_IOTA_OXIA:
   274   case GREEK_UPPER_IOTA_TONOS:
   275   case GREEK_UPPER_IOTA_OXIA:
   276     *aState = kIotaAcc;
   277     return GREEK_UPPER_IOTA;
   279   case GREEK_LOWER_OMICRON_TONOS:
   280   case GREEK_LOWER_OMICRON_OXIA:
   281   case GREEK_UPPER_OMICRON_TONOS:
   282   case GREEK_UPPER_OMICRON_OXIA:
   283     *aState = kOmicronAcc;
   284     return GREEK_UPPER_OMICRON;
   286   case GREEK_LOWER_UPSILON_TONOS:
   287   case GREEK_LOWER_UPSILON_OXIA:
   288   case GREEK_UPPER_UPSILON_TONOS:
   289   case GREEK_UPPER_UPSILON_OXIA:
   290     switch (*aState) {
   291     case kOmicron:
   292       *aState = kStart; // this completed a diphthong
   293       break;
   294     default:
   295       *aState = kUpsilonAcc;
   296       break;
   297     }
   298     return GREEK_UPPER_UPSILON;
   300   case GREEK_LOWER_OMEGA_TONOS:
   301   case GREEK_LOWER_OMEGA_OXIA:
   302   case GREEK_UPPER_OMEGA_TONOS:
   303   case GREEK_UPPER_OMEGA_OXIA:
   304     *aState = kOmegaAcc;
   305     return GREEK_UPPER_OMEGA;
   306   }
   308   // all other characters just reset the state, and use standard mappings
   309   *aState = kStart;
   310   return ToUpperCase(aCh);
   311 }
   313 nsTransformedTextRun *
   314 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,
   315                              nsTransformingTextRunFactory* aFactory,
   316                              gfxFontGroup* aFontGroup,
   317                              const char16_t* aString, uint32_t aLength,
   318                              const uint32_t aFlags, nsStyleContext** aStyles,
   319                              bool aOwnsFactory)
   320 {
   321   NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT),
   322                "didn't expect text to be marked as 8-bit here");
   324   void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
   325   if (!storage) {
   326     return nullptr;
   327   }
   329   return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup,
   330                                             aString, aLength,
   331                                             aFlags, aStyles, aOwnsFactory);
   332 }
   334 void
   335 nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
   336                                         bool* aCapitalization,
   337                                         gfxContext* aRefContext)
   338 {
   339   if (mCapitalize.IsEmpty()) {
   340     if (!mCapitalize.AppendElements(GetLength()))
   341       return;
   342     memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool));
   343   }
   344   memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool));
   345   mNeedsRebuild = true;
   346 }
   348 bool
   349 nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart, uint32_t aLength,
   350                                              uint8_t* aBreakBefore,
   351                                              gfxContext* aRefContext)
   352 {
   353   bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength,
   354       aBreakBefore, aRefContext);
   355   if (changed) {
   356     mNeedsRebuild = true;
   357   }
   358   return changed;
   359 }
   361 size_t
   362 nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf)
   363 {
   364   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
   365   total += mStyles.SizeOfExcludingThis(aMallocSizeOf);
   366   total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf);
   367   if (mOwnsFactory) {
   368     total += aMallocSizeOf(mFactory);
   369   }
   370   return total;
   371 }
   373 size_t
   374 nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
   375 {
   376   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
   377 }
   379 nsTransformedTextRun*
   380 nsTransformingTextRunFactory::MakeTextRun(const char16_t* aString, uint32_t aLength,
   381                                           const gfxTextRunFactory::Parameters* aParams,
   382                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
   383                                           nsStyleContext** aStyles, bool aOwnsFactory)
   384 {
   385   return nsTransformedTextRun::Create(aParams, this, aFontGroup,
   386                                       aString, aLength, aFlags, aStyles, aOwnsFactory);
   387 }
   389 nsTransformedTextRun*
   390 nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString, uint32_t aLength,
   391                                           const gfxTextRunFactory::Parameters* aParams,
   392                                           gfxFontGroup* aFontGroup, uint32_t aFlags,
   393                                           nsStyleContext** aStyles, bool aOwnsFactory)
   394 {
   395   // We'll only have a Unicode code path to minimize the amount of code needed
   396   // for these rarely used features
   397   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString), aLength);
   398   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
   399                      aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT),
   400                      aStyles, aOwnsFactory);
   401 }
   403 void
   404 MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
   405                          const bool* aCharsToMerge, const bool* aDeletedChars)
   406 {
   407   aDest->ResetGlyphRuns();
   409   gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());
   410   uint32_t offset = 0;
   411   nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;
   412   while (iter.NextRun()) {
   413     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
   414     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,
   415                                      offset, false);
   416     if (NS_FAILED(rv))
   417       return;
   419     bool anyMissing = false;
   420     uint32_t mergeRunStart = iter.GetStringStart();
   421     const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
   422     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
   423     uint32_t stringEnd = iter.GetStringEnd();
   424     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
   425       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
   426       if (g.IsSimpleGlyph()) {
   427         if (!anyMissing) {
   428           gfxTextRun::DetailedGlyph details;
   429           details.mGlyphID = g.GetSimpleGlyph();
   430           details.mAdvance = g.GetSimpleAdvance();
   431           details.mXOffset = 0;
   432           details.mYOffset = 0;
   433           glyphs.AppendElement(details);
   434         }
   435       } else {
   436         if (g.IsMissing()) {
   437           anyMissing = true;
   438           glyphs.Clear();
   439         }
   440         if (g.GetGlyphCount() > 0) {
   441           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
   442         }
   443       }
   445       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
   446         // next char is supposed to merge with current, so loop without
   447         // writing current merged glyph to the destination
   448         continue;
   449       }
   451       // If the start of the merge run is actually a character that should
   452       // have been merged with the previous character (this can happen
   453       // if there's a font change in the middle of a case-mapped character,
   454       // that decomposed into a sequence of base+diacritics, for example),
   455       // just discard the entire merge run. See comment at start of this
   456       // function.
   457       NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
   458                        "unable to merge across a glyph run boundary, "
   459                        "glyph(s) discarded");
   460       if (!aCharsToMerge[mergeRunStart]) {
   461         if (anyMissing) {
   462           mergedGlyph.SetMissing(glyphs.Length());
   463         } else {
   464           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
   465                                  mergedGlyph.IsLigatureGroupStart(),
   466                                  glyphs.Length());
   467         }
   468         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
   469         ++offset;
   471         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
   472           aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nullptr);
   473         }
   474       }
   476       glyphs.Clear();
   477       anyMissing = false;
   478       mergeRunStart = k + 1;
   479       if (mergeRunStart < stringEnd) {
   480         mergedGlyph = srcGlyphs[mergeRunStart];
   481       }
   482     }
   483     NS_ASSERTION(glyphs.Length() == 0,
   484                  "Leftover glyphs, don't request merging of the last character with its next!");  
   485   }
   486   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
   487 }
   489 gfxTextRunFactory::Parameters
   490 GetParametersForInner(nsTransformedTextRun* aTextRun, uint32_t* aFlags,
   491     gfxContext* aRefContext)
   492 {
   493   gfxTextRunFactory::Parameters params =
   494     { aRefContext, nullptr, nullptr,
   495       nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()
   496     };
   497   *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT;
   498   return params;
   499 }
   501 void
   502 nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
   503     gfxContext* aRefContext)
   504 {
   505   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
   506   gfxFontStyle fontStyle = *fontGroup->GetStyle();
   507   fontStyle.size *= 0.8;
   508   nsRefPtr<gfxFontGroup> smallFont = fontGroup->Copy(&fontStyle);
   509   if (!smallFont)
   510     return;
   512   uint32_t flags;
   513   gfxTextRunFactory::Parameters innerParams =
   514       GetParametersForInner(aTextRun, &flags, aRefContext);
   516   uint32_t length = aTextRun->GetLength();
   517   const char16_t* str = aTextRun->mString.BeginReading();
   518   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
   519   // Create a textrun so we can check cluster-start properties
   520   nsAutoPtr<gfxTextRun> inner(fontGroup->MakeTextRun(str, length, &innerParams, flags));
   521   if (!inner.get())
   522     return;
   524   nsCaseTransformTextRunFactory uppercaseFactory(nullptr, true);
   526   aTextRun->ResetGlyphRuns();
   528   uint32_t runStart = 0;
   529   nsAutoTArray<nsStyleContext*,50> styleArray;
   530   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
   532   enum RunCaseState {
   533     kUpperOrCaseless, // will be untouched by font-variant:small-caps
   534     kLowercase,       // will be uppercased and reduced
   535     kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
   536   };
   537   RunCaseState runCase = kUpperOrCaseless;
   539   // Note that this loop runs from 0 to length *inclusive*, so the last
   540   // iteration is in effect beyond the end of the input text, to give a
   541   // chance to finish the last casing run we've found.
   542   // The last iteration, when i==length, must not attempt to look at the
   543   // character position [i] or the style data for styles[i], as this would
   544   // be beyond the valid length of the textrun or its style array.
   545   for (uint32_t i = 0; i <= length; ++i) {
   546     RunCaseState chCase = kUpperOrCaseless;
   547     // Unless we're at the end, figure out what treatment the current
   548     // character will need.
   549     if (i < length) {
   550       nsStyleContext* styleContext = styles[i];
   551       // Characters that aren't the start of a cluster are ignored here. They
   552       // get added to whatever lowercase/non-lowercase run we're in.
   553       if (!inner->IsClusterStart(i)) {
   554         chCase = runCase;
   555       } else {
   556         if (styleContext->StyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
   557           uint32_t ch = str[i];
   558           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
   559             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
   560           }
   561           uint32_t ch2 = ToUpperCase(ch);
   562           if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
   563             chCase = kLowercase;
   564           } else if (styleContext->StyleFont()->mLanguage == nsGkAtoms::el) {
   565             // In Greek, check for characters that will be modified by the
   566             // GreekUpperCase mapping - this catches accented capitals where
   567             // the accent is to be removed (bug 307039). These are handled by
   568             // a transformed child run using the full-size font.
   569             GreekCasingState state = kStart; // don't need exact context here
   570             ch2 = GreekUpperCase(ch, &state);
   571             if (ch != ch2) {
   572               chCase = kSpecialUpper;
   573             }
   574           }
   575         } else {
   576           // Don't transform the character! I.e., pretend that it's not lowercase
   577         }
   578       }
   579     }
   581     // At the end of the text, or when the current character needs different
   582     // casing treatment from the current run, finish the run-in-progress
   583     // and prepare to accumulate a new run.
   584     // Note that we do not look at any source data for offset [i] here,
   585     // as that would be invalid in the case where i==length.
   586     if ((i == length || runCase != chCase) && runStart < i) {
   587       nsAutoPtr<nsTransformedTextRun> transformedChild;
   588       nsAutoPtr<gfxTextRun> cachedChild;
   589       gfxTextRun* child;
   591       switch (runCase) {
   592       case kUpperOrCaseless:
   593         cachedChild =
   594           fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
   595                                  flags);
   596         child = cachedChild.get();
   597         break;
   598       case kLowercase:
   599         transformedChild =
   600           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
   601                                        &innerParams, smallFont, flags,
   602                                        styleArray.Elements(), false);
   603         child = transformedChild;
   604         break;
   605       case kSpecialUpper:
   606         transformedChild =
   607           uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
   608                                        &innerParams, fontGroup, flags,
   609                                        styleArray.Elements(), false);
   610         child = transformedChild;
   611         break;
   612       }
   613       if (!child)
   614         return;
   615       // Copy potential linebreaks into child so they're preserved
   616       // (and also child will be shaped appropriately)
   617       NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
   618                    "lost some break-before values?");
   619       child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
   620           canBreakBeforeArray.Elements(), aRefContext);
   621       if (transformedChild) {
   622         transformedChild->FinishSettingProperties(aRefContext);
   623       }
   624       aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);
   626       runStart = i;
   627       styleArray.Clear();
   628       canBreakBeforeArray.Clear();
   629     }
   631     if (i < length) {
   632       runCase = chCase;
   633       styleArray.AppendElement(styles[i]);
   634       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
   635     }
   636   }
   637 }
   639 void
   640 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
   641     gfxContext* aRefContext)
   642 {
   643   uint32_t length = aTextRun->GetLength();
   644   const char16_t* str = aTextRun->mString.BeginReading();
   645   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
   647   nsAutoString convertedString;
   648   nsAutoTArray<bool,50> charsToMergeArray;
   649   nsAutoTArray<bool,50> deletedCharsArray;
   650   nsAutoTArray<nsStyleContext*,50> styleArray;
   651   nsAutoTArray<uint8_t,50> canBreakBeforeArray;
   652   bool mergeNeeded = false;
   654   // Some languages have special casing conventions that differ from the
   655   // default Unicode mappings.
   656   // The enum values here are named for well-known exemplar languages that
   657   // exhibit the behavior in question; multiple lang tags may map to the
   658   // same setting here, if the behavior is shared by other languages.
   659   enum {
   660     eNone,    // default non-lang-specific behavior
   661     eTurkish, // preserve dotted/dotless-i distinction in uppercase
   662     eDutch,   // treat "ij" digraph as a unit for capitalization
   663     eGreek    // strip accent when uppercasing Greek vowels
   664   } languageSpecificCasing = eNone;
   666   const nsIAtom* lang = nullptr;
   667   bool capitalizeDutchIJ = false;
   668   bool prevIsLetter = false;
   669   uint32_t sigmaIndex = uint32_t(-1);
   670   nsIUGenCategory::nsUGenCategory cat;
   671   GreekCasingState greekState = kStart;
   672   uint32_t i;
   673   for (i = 0; i < length; ++i) {
   674     uint32_t ch = str[i];
   675     nsStyleContext* styleContext = styles[i];
   677     uint8_t style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
   678       : styleContext->StyleText()->mTextTransform;
   679     int extraChars = 0;
   680     const mozilla::unicode::MultiCharMapping *mcm;
   682     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
   683       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
   684     }
   686     if (lang != styleContext->StyleFont()->mLanguage) {
   687       lang = styleContext->StyleFont()->mLanguage;
   688       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
   689           lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
   690           lang == nsGkAtoms::tt) {
   691         languageSpecificCasing = eTurkish;
   692       } else if (lang == nsGkAtoms::nl) {
   693         languageSpecificCasing = eDutch;
   694       } else if (lang == nsGkAtoms::el) {
   695         languageSpecificCasing = eGreek;
   696         greekState = kStart;
   697       } else {
   698         languageSpecificCasing = eNone;
   699       }
   700     }
   702     switch (style) {
   703     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
   704       if (languageSpecificCasing == eTurkish) {
   705         if (ch == 'I') {
   706           ch = LATIN_SMALL_LETTER_DOTLESS_I;
   707           prevIsLetter = true;
   708           sigmaIndex = uint32_t(-1);
   709           break;
   710         }
   711         if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
   712           ch = 'i';
   713           prevIsLetter = true;
   714           sigmaIndex = uint32_t(-1);
   715           break;
   716         }
   717       }
   719       // Special lowercasing behavior for Greek Sigma: note that this is listed
   720       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
   721       // language-specific mapping; it applies regardless of the language of
   722       // the element.
   723       //
   724       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
   725       // the non-final form) whenever there is a following letter, or when the
   726       // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
   727       // LETTER); and to FINAL SIGMA when it is preceded by another letter but
   728       // not followed by one.
   729       //
   730       // To implement the context-sensitive nature of this mapping, we keep
   731       // track of whether the previous character was a letter. If not, CAPITAL
   732       // SIGMA will map directly to SMALL SIGMA. If the previous character
   733       // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
   734       // position in the converted string; if we then encounter another letter,
   735       // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
   737       cat = mozilla::unicode::GetGenCategory(ch);
   739       // If sigmaIndex is not -1, it marks where we have provisionally mapped
   740       // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
   741       // need to change it to SMALL SIGMA.
   742       if (sigmaIndex != uint32_t(-1)) {
   743         if (cat == nsIUGenCategory::kLetter) {
   744           convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
   745         }
   746       }
   748       if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
   749         // If preceding char was a letter, map to FINAL instead of SMALL,
   750         // and note where it occurred by setting sigmaIndex; we'll change it
   751         // to standard SMALL SIGMA later if another letter follows
   752         if (prevIsLetter) {
   753           ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
   754           sigmaIndex = convertedString.Length();
   755         } else {
   756           // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
   757           // to SMALL SIGMA
   758           ch = GREEK_SMALL_LETTER_SIGMA;
   759           sigmaIndex = uint32_t(-1);
   760         }
   761         prevIsLetter = true;
   762         break;
   763       }
   765       // ignore diacritics for the purpose of contextual sigma mapping;
   766       // otherwise, reset prevIsLetter appropriately and clear the
   767       // sigmaIndex marker
   768       if (cat != nsIUGenCategory::kMark) {
   769         prevIsLetter = (cat == nsIUGenCategory::kLetter);
   770         sigmaIndex = uint32_t(-1);
   771       }
   773       mcm = mozilla::unicode::SpecialLower(ch);
   774       if (mcm) {
   775         int j = 0;
   776         while (j < 2 && mcm->mMappedChars[j + 1]) {
   777           convertedString.Append(mcm->mMappedChars[j]);
   778           ++extraChars;
   779           ++j;
   780         }
   781         ch = mcm->mMappedChars[j];
   782         break;
   783       }
   785       ch = ToLowerCase(ch);
   786       break;
   788     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
   789       if (languageSpecificCasing == eTurkish && ch == 'i') {
   790         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
   791         break;
   792       }
   794       if (languageSpecificCasing == eGreek) {
   795         ch = GreekUpperCase(ch, &greekState);
   796         break;
   797       }
   799       mcm = mozilla::unicode::SpecialUpper(ch);
   800       if (mcm) {
   801         int j = 0;
   802         while (j < 2 && mcm->mMappedChars[j + 1]) {
   803           convertedString.Append(mcm->mMappedChars[j]);
   804           ++extraChars;
   805           ++j;
   806         }
   807         ch = mcm->mMappedChars[j];
   808         break;
   809       }
   811       ch = ToUpperCase(ch);
   812       break;
   814     case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
   815       if (capitalizeDutchIJ && ch == 'j') {
   816         ch = 'J';
   817         capitalizeDutchIJ = false;
   818         break;
   819       }
   820       capitalizeDutchIJ = false;
   821       if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
   822         if (languageSpecificCasing == eTurkish && ch == 'i') {
   823           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
   824           break;
   825         }
   826         if (languageSpecificCasing == eDutch && ch == 'i') {
   827           ch = 'I';
   828           capitalizeDutchIJ = true;
   829           break;
   830         }
   832         mcm = mozilla::unicode::SpecialTitle(ch);
   833         if (mcm) {
   834           int j = 0;
   835           while (j < 2 && mcm->mMappedChars[j + 1]) {
   836             convertedString.Append(mcm->mMappedChars[j]);
   837             ++extraChars;
   838             ++j;
   839           }
   840           ch = mcm->mMappedChars[j];
   841           break;
   842         }
   844         ch = ToTitleCase(ch);
   845       }
   846       break;
   848     case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH:
   849       ch = mozilla::unicode::GetFullWidth(ch);
   850       break;
   852     default:
   853       break;
   854     }
   856     if (ch == uint32_t(-1)) {
   857       deletedCharsArray.AppendElement(true);
   858       mergeNeeded = true;
   859     } else {
   860       deletedCharsArray.AppendElement(false);
   861       charsToMergeArray.AppendElement(false);
   862       styleArray.AppendElement(styleContext);
   863       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
   865       if (IS_IN_BMP(ch)) {
   866         convertedString.Append(ch);
   867       } else {
   868         convertedString.Append(H_SURROGATE(ch));
   869         convertedString.Append(L_SURROGATE(ch));
   870         ++i;
   871         deletedCharsArray.AppendElement(true); // not exactly deleted, but the
   872                                                // trailing surrogate is skipped
   873         ++extraChars;
   874       }
   876       while (extraChars-- > 0) {
   877         mergeNeeded = true;
   878         charsToMergeArray.AppendElement(true);
   879         styleArray.AppendElement(styleContext);
   880         canBreakBeforeArray.AppendElement(false);
   881       }
   882     }
   883   }
   885   uint32_t flags;
   886   gfxTextRunFactory::Parameters innerParams =
   887       GetParametersForInner(aTextRun, &flags, aRefContext);
   888   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
   890   nsAutoPtr<nsTransformedTextRun> transformedChild;
   891   nsAutoPtr<gfxTextRun> cachedChild;
   892   gfxTextRun* child;
   894   if (mInnerTransformingTextRunFactory) {
   895     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
   896         convertedString.BeginReading(), convertedString.Length(),
   897         &innerParams, fontGroup, flags, styleArray.Elements(), false);
   898     child = transformedChild.get();
   899   } else {
   900     cachedChild = fontGroup->MakeTextRun(
   901         convertedString.BeginReading(), convertedString.Length(),
   902         &innerParams, flags);
   903     child = cachedChild.get();
   904   }
   905   if (!child)
   906     return;
   907   // Copy potential linebreaks into child so they're preserved
   908   // (and also child will be shaped appropriately)
   909   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
   910                "Dropped characters or break-before values somewhere!");
   911   child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
   912       canBreakBeforeArray.Elements(), aRefContext);
   913   if (transformedChild) {
   914     transformedChild->FinishSettingProperties(aRefContext);
   915   }
   917   if (mergeNeeded) {
   918     // Now merge multiple characters into one multi-glyph character as required
   919     // and deal with skipping deleted accent chars
   920     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
   921                  "source length mismatch");
   922     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
   923                  "destination length mismatch");
   924     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
   925                              deletedCharsArray.Elements());
   926   } else {
   927     // No merging to do, so just copy; this produces a more optimized textrun.
   928     // We can't steal the data because the child may be cached and stealing
   929     // the data would break the cache.
   930     aTextRun->ResetGlyphRuns();
   931     aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
   932   }
   933 }

mercurial