content/base/src/nsLineBreaker.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsLineBreaker.h"
     7 #include "nsContentUtils.h"
     8 #include "nsILineBreaker.h"
     9 #include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
    10 #include "nsHyphenationManager.h"
    11 #include "nsHyphenator.h"
    12 #include "mozilla/gfx/2D.h"
    14 nsLineBreaker::nsLineBreaker()
    15   : mCurrentWordLanguage(nullptr),
    16     mCurrentWordContainsMixedLang(false),
    17     mCurrentWordContainsComplexChar(false),
    18     mAfterBreakableSpace(false), mBreakHere(false),
    19     mWordBreak(nsILineBreaker::kWordBreak_Normal)
    20 {
    21 }
    23 nsLineBreaker::~nsLineBreaker()
    24 {
    25   NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
    26 }
    28 static void
    29 SetupCapitalization(const char16_t* aWord, uint32_t aLength,
    30                     bool* aCapitalization)
    31 {
    32   // Capitalize the first alphanumeric character after a space or start
    33   // of the word.
    34   // The only space character a word can contain is NBSP.
    35   bool capitalizeNextChar = true;
    36   for (uint32_t i = 0; i < aLength; ++i) {
    37     uint32_t ch = aWord[i];
    38     if (capitalizeNextChar) {
    39       if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
    40           NS_IS_LOW_SURROGATE(aWord[i + 1])) {
    41         ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
    42       }
    43       if (nsContentUtils::IsAlphanumeric(ch)) {
    44         aCapitalization[i] = true;
    45         capitalizeNextChar = false;
    46       }
    47       if (!IS_IN_BMP(ch)) {
    48         ++i;
    49       }
    50     }
    51     if (ch == 0xA0 /*NBSP*/) {
    52       capitalizeNextChar = true;
    53     }
    54   }
    55 }
    57 nsresult
    58 nsLineBreaker::FlushCurrentWord()
    59 {
    60   uint32_t length = mCurrentWord.Length();
    61   nsAutoTArray<uint8_t,4000> breakState;
    62   if (!breakState.AppendElements(length))
    63     return NS_ERROR_OUT_OF_MEMORY;
    65   nsTArray<bool> capitalizationState;
    67   if (!mCurrentWordContainsComplexChar) {
    68     // For break-strict set everything internal to "break", otherwise
    69     // to "no break"!
    70     memset(breakState.Elements(),
    71            mWordBreak == nsILineBreaker::kWordBreak_BreakAll ?
    72              gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
    73              gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
    74            length*sizeof(uint8_t));
    75   } else {
    76     nsContentUtils::LineBreaker()->
    77       GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
    78                         breakState.Elements());
    79   }
    81   bool autoHyphenate = mCurrentWordLanguage &&
    82     !mCurrentWordContainsMixedLang;
    83   uint32_t i;
    84   for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
    85     TextItem* ti = &mTextItems[i];
    86     if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
    87       autoHyphenate = false;
    88     }
    89   }
    90   if (autoHyphenate) {
    91     nsRefPtr<nsHyphenator> hyphenator =
    92       nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
    93     if (hyphenator) {
    94       FindHyphenationPoints(hyphenator,
    95                             mCurrentWord.Elements(),
    96                             mCurrentWord.Elements() + length,
    97                             breakState.Elements());
    98     }
    99   }
   101   uint32_t offset = 0;
   102   for (i = 0; i < mTextItems.Length(); ++i) {
   103     TextItem* ti = &mTextItems[i];
   104     NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
   106     if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
   107       breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
   108     }
   109     if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
   110       uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
   111       memset(breakState.Elements() + offset + exclude,
   112              gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
   113              (ti->mLength - exclude)*sizeof(uint8_t));
   114     }
   116     // Don't set the break state for the first character of the word, because
   117     // it was already set correctly earlier and we don't know what the true
   118     // value should be.
   119     uint32_t skipSet = i == 0 ? 1 : 0;
   120     if (ti->mSink) {
   121       ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
   122                            breakState.Elements() + offset + skipSet);
   124       if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
   125         if (capitalizationState.Length() == 0) {
   126           if (!capitalizationState.AppendElements(length))
   127             return NS_ERROR_OUT_OF_MEMORY;
   128           memset(capitalizationState.Elements(), false, length*sizeof(bool));
   129           SetupCapitalization(mCurrentWord.Elements(), length,
   130                               capitalizationState.Elements());
   131         }
   132         ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
   133                                      capitalizationState.Elements() + offset);
   134       }
   135     }
   137     offset += ti->mLength;
   138   }
   140   mCurrentWord.Clear();
   141   mTextItems.Clear();
   142   mCurrentWordContainsComplexChar = false;
   143   mCurrentWordContainsMixedLang = false;
   144   mCurrentWordLanguage = nullptr;
   145   return NS_OK;
   146 }
   148 nsresult
   149 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
   150                           uint32_t aFlags, nsILineBreakSink* aSink)
   151 {
   152   NS_ASSERTION(aLength > 0, "Appending empty text...");
   154   uint32_t offset = 0;
   156   // Continue the current word
   157   if (mCurrentWord.Length() > 0) {
   158     NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
   160     while (offset < aLength && !IsSpace(aText[offset])) {
   161       mCurrentWord.AppendElement(aText[offset]);
   162       if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
   163         mCurrentWordContainsComplexChar = true;
   164       }
   165       UpdateCurrentWordLanguage(aHyphenationLanguage);
   166       ++offset;
   167     }
   169     if (offset > 0) {
   170       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
   171     }
   173     if (offset == aLength)
   174       return NS_OK;
   176     // We encountered whitespace, so we're done with this word
   177     nsresult rv = FlushCurrentWord();
   178     if (NS_FAILED(rv))
   179       return rv;
   180   }
   182   nsAutoTArray<uint8_t,4000> breakState;
   183   if (aSink) {
   184     if (!breakState.AppendElements(aLength))
   185       return NS_ERROR_OUT_OF_MEMORY;
   186   }
   188   nsTArray<bool> capitalizationState;
   189   if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
   190     if (!capitalizationState.AppendElements(aLength))
   191       return NS_ERROR_OUT_OF_MEMORY;
   192     memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
   193   }
   195   uint32_t start = offset;
   196   bool noBreaksNeeded = !aSink ||
   197     (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
   198      !mBreakHere && !mAfterBreakableSpace);
   199   if (noBreaksNeeded) {
   200     // Skip to the space before the last word, since either the break data
   201     // here is not needed, or no breaks are set in the sink and there cannot
   202     // be any breaks in this chunk; all we need is the context for the next
   203     // chunk (if any)
   204     offset = aLength;
   205     while (offset > start) {
   206       --offset;
   207       if (IsSpace(aText[offset]))
   208         break;
   209     }
   210   }
   211   uint32_t wordStart = offset;
   212   bool wordHasComplexChar = false;
   214   nsRefPtr<nsHyphenator> hyphenator;
   215   if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
   216       !(aFlags & BREAK_SUPPRESS_INSIDE) &&
   217       aHyphenationLanguage) {
   218     hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
   219   }
   221   for (;;) {
   222     char16_t ch = aText[offset];
   223     bool isSpace = IsSpace(ch);
   224     bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
   226     if (aSink) {
   227       breakState[offset] =
   228         mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
   229         (mWordBreak == nsILineBreaker::kWordBreak_BreakAll)  ?
   230           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
   231           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
   232     }
   233     mBreakHere = false;
   234     mAfterBreakableSpace = isBreakableSpace;
   236     if (isSpace) {
   237       if (offset > wordStart && aSink) {
   238         if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
   239           if (wordHasComplexChar) {
   240             // Save current start-of-word state because GetJISx4051Breaks will
   241             // set it to false
   242             uint8_t currentStart = breakState[wordStart];
   243             nsContentUtils::LineBreaker()->
   244               GetJISx4051Breaks(aText + wordStart, offset - wordStart,
   245                                 mWordBreak,
   246                                 breakState.Elements() + wordStart);
   247             breakState[wordStart] = currentStart;
   248           }
   249           if (hyphenator) {
   250             FindHyphenationPoints(hyphenator,
   251                                   aText + wordStart, aText + offset,
   252                                   breakState.Elements() + wordStart);
   253           }
   254         }
   255         if (aFlags & BREAK_NEED_CAPITALIZATION) {
   256           SetupCapitalization(aText + wordStart, offset - wordStart,
   257                               capitalizationState.Elements() + wordStart);
   258         }
   259       }
   260       wordHasComplexChar = false;
   261       ++offset;
   262       if (offset >= aLength)
   263         break;
   264       wordStart = offset;
   265     } else {
   266       if (!wordHasComplexChar && IsComplexChar(ch)) {
   267         wordHasComplexChar = true;
   268       }
   269       ++offset;
   270       if (offset >= aLength) {
   271         // Save this word
   272         mCurrentWordContainsComplexChar = wordHasComplexChar;
   273         uint32_t len = offset - wordStart;
   274         char16_t* elems = mCurrentWord.AppendElements(len);
   275         if (!elems)
   276           return NS_ERROR_OUT_OF_MEMORY;
   277         memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
   278         mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
   279         // Ensure that the break-before for this word is written out
   280         offset = wordStart + 1;
   281         UpdateCurrentWordLanguage(aHyphenationLanguage);
   282         break;
   283       }
   284     }
   285   }
   287   if (!noBreaksNeeded) {
   288     // aSink must not be null
   289     aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
   290     if (aFlags & BREAK_NEED_CAPITALIZATION) {
   291       aSink->SetCapitalization(start, offset - start,
   292                                capitalizationState.Elements() + start);
   293     }
   294   }
   295   return NS_OK;
   296 }
   298 void
   299 nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
   300                                      const char16_t *aTextStart,
   301                                      const char16_t *aTextLimit,
   302                                      uint8_t *aBreakState)
   303 {
   304   nsDependentSubstring string(aTextStart, aTextLimit);
   305   AutoFallibleTArray<bool,200> hyphens;
   306   if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
   307     for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
   308       if (hyphens[i]) {
   309         aBreakState[i + 1] =
   310           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
   311       }
   312     }
   313   }
   314 }
   316 nsresult
   317 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
   318                           uint32_t aFlags, nsILineBreakSink* aSink)
   319 {
   320   NS_ASSERTION(aLength > 0, "Appending empty text...");
   322   if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
   323     // Defer to the Unicode path if capitalization or hyphenation is required
   324     nsAutoString str;
   325     const char* cp = reinterpret_cast<const char*>(aText);
   326     CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
   327     return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
   328   }
   330   uint32_t offset = 0;
   332   // Continue the current word
   333   if (mCurrentWord.Length() > 0) {
   334     NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
   336     while (offset < aLength && !IsSpace(aText[offset])) {
   337       mCurrentWord.AppendElement(aText[offset]);
   338       if (!mCurrentWordContainsComplexChar &&
   339           IsComplexASCIIChar(aText[offset])) {
   340         mCurrentWordContainsComplexChar = true;
   341       }
   342       ++offset;
   343     }
   345     if (offset > 0) {
   346       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
   347     }
   349     if (offset == aLength) {
   350       // We did not encounter whitespace so the word hasn't finished yet.
   351       return NS_OK;
   352     }
   354     // We encountered whitespace, so we're done with this word
   355     nsresult rv = FlushCurrentWord();
   356     if (NS_FAILED(rv))
   357       return rv;
   358   }
   360   nsAutoTArray<uint8_t,4000> breakState;
   361   if (aSink) {
   362     if (!breakState.AppendElements(aLength))
   363       return NS_ERROR_OUT_OF_MEMORY;
   364   }
   366   uint32_t start = offset;
   367   bool noBreaksNeeded = !aSink ||
   368     (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
   369      !mBreakHere && !mAfterBreakableSpace);
   370   if (noBreaksNeeded) {
   371     // Skip to the space before the last word, since either the break data
   372     // here is not needed, or no breaks are set in the sink and there cannot
   373     // be any breaks in this chunk; all we need is the context for the next
   374     // chunk (if any)
   375     offset = aLength;
   376     while (offset > start) {
   377       --offset;
   378       if (IsSpace(aText[offset]))
   379         break;
   380     }
   381   }
   382   uint32_t wordStart = offset;
   383   bool wordHasComplexChar = false;
   385   for (;;) {
   386     uint8_t ch = aText[offset];
   387     bool isSpace = IsSpace(ch);
   388     bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
   390     if (aSink) {
   391       // Consider word-break style.  Since the break position of CJK scripts
   392       // will be set by nsILineBreaker, we don't consider CJK at this point.
   393       breakState[offset] =
   394         mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
   395         (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
   396           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
   397           gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
   398     }
   399     mBreakHere = false;
   400     mAfterBreakableSpace = isBreakableSpace;
   402     if (isSpace) {
   403       if (offset > wordStart && wordHasComplexChar) {
   404         if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
   405           // Save current start-of-word state because GetJISx4051Breaks will
   406           // set it to false
   407           uint8_t currentStart = breakState[wordStart];
   408           nsContentUtils::LineBreaker()->
   409             GetJISx4051Breaks(aText + wordStart, offset - wordStart,
   410                               mWordBreak,
   411                               breakState.Elements() + wordStart);
   412           breakState[wordStart] = currentStart;
   413         }
   414         wordHasComplexChar = false;
   415       }
   417       ++offset;
   418       if (offset >= aLength)
   419         break;
   420       wordStart = offset;
   421     } else {
   422       if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
   423         wordHasComplexChar = true;
   424       }
   425       ++offset;
   426       if (offset >= aLength) {
   427         // Save this word
   428         mCurrentWordContainsComplexChar = wordHasComplexChar;
   429         uint32_t len = offset - wordStart;
   430         char16_t* elems = mCurrentWord.AppendElements(len);
   431         if (!elems)
   432           return NS_ERROR_OUT_OF_MEMORY;
   433         uint32_t i;
   434         for (i = wordStart; i < offset; ++i) {
   435           elems[i - wordStart] = aText[i];
   436         }
   437         mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
   438         // Ensure that the break-before for this word is written out
   439         offset = wordStart + 1;
   440         break;
   441       }
   442     }
   443   }
   445   if (!noBreaksNeeded) {
   446     aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
   447   }
   448   return NS_OK;
   449 }
   451 void
   452 nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage)
   453 {
   454   if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
   455     mCurrentWordContainsMixedLang = true;
   456   } else {
   457     mCurrentWordLanguage = aHyphenationLanguage;
   458   }
   459 }
   461 nsresult
   462 nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
   463 {
   464   nsresult rv = FlushCurrentWord();
   465   if (NS_FAILED(rv))
   466     return rv;
   468   bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
   469   if (mAfterBreakableSpace && !isBreakableSpace) {
   470     mBreakHere = true;
   471   }
   472   mAfterBreakableSpace = isBreakableSpace;
   473   return NS_OK;
   474 }
   476 nsresult
   477 nsLineBreaker::Reset(bool* aTrailingBreak)
   478 {
   479   nsresult rv = FlushCurrentWord();
   480   if (NS_FAILED(rv))
   481     return rv;
   483   *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
   484   mBreakHere = false;
   485   mAfterBreakableSpace = false;
   486   return NS_OK;
   487 }

mercurial