michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsLineBreaker.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsILineBreaker.h" michael@0: #include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values michael@0: #include "nsHyphenationManager.h" michael@0: #include "nsHyphenator.h" michael@0: #include "mozilla/gfx/2D.h" michael@0: michael@0: nsLineBreaker::nsLineBreaker() michael@0: : mCurrentWordLanguage(nullptr), michael@0: mCurrentWordContainsMixedLang(false), michael@0: mCurrentWordContainsComplexChar(false), michael@0: mAfterBreakableSpace(false), mBreakHere(false), michael@0: mWordBreak(nsILineBreaker::kWordBreak_Normal) michael@0: { michael@0: } michael@0: michael@0: nsLineBreaker::~nsLineBreaker() michael@0: { michael@0: NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!"); michael@0: } michael@0: michael@0: static void michael@0: SetupCapitalization(const char16_t* aWord, uint32_t aLength, michael@0: bool* aCapitalization) michael@0: { michael@0: // Capitalize the first alphanumeric character after a space or start michael@0: // of the word. michael@0: // The only space character a word can contain is NBSP. michael@0: bool capitalizeNextChar = true; michael@0: for (uint32_t i = 0; i < aLength; ++i) { michael@0: uint32_t ch = aWord[i]; michael@0: if (capitalizeNextChar) { michael@0: if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength && michael@0: NS_IS_LOW_SURROGATE(aWord[i + 1])) { michael@0: ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); michael@0: } michael@0: if (nsContentUtils::IsAlphanumeric(ch)) { michael@0: aCapitalization[i] = true; michael@0: capitalizeNextChar = false; michael@0: } michael@0: if (!IS_IN_BMP(ch)) { michael@0: ++i; michael@0: } michael@0: } michael@0: if (ch == 0xA0 /*NBSP*/) { michael@0: capitalizeNextChar = true; michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: nsLineBreaker::FlushCurrentWord() michael@0: { michael@0: uint32_t length = mCurrentWord.Length(); michael@0: nsAutoTArray breakState; michael@0: if (!breakState.AppendElements(length)) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: nsTArray capitalizationState; michael@0: michael@0: if (!mCurrentWordContainsComplexChar) { michael@0: // For break-strict set everything internal to "break", otherwise michael@0: // to "no break"! michael@0: memset(breakState.Elements(), michael@0: mWordBreak == nsILineBreaker::kWordBreak_BreakAll ? michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, michael@0: length*sizeof(uint8_t)); michael@0: } else { michael@0: nsContentUtils::LineBreaker()-> michael@0: GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak, michael@0: breakState.Elements()); michael@0: } michael@0: michael@0: bool autoHyphenate = mCurrentWordLanguage && michael@0: !mCurrentWordContainsMixedLang; michael@0: uint32_t i; michael@0: for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) { michael@0: TextItem* ti = &mTextItems[i]; michael@0: if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) { michael@0: autoHyphenate = false; michael@0: } michael@0: } michael@0: if (autoHyphenate) { michael@0: nsRefPtr hyphenator = michael@0: nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage); michael@0: if (hyphenator) { michael@0: FindHyphenationPoints(hyphenator, michael@0: mCurrentWord.Elements(), michael@0: mCurrentWord.Elements() + length, michael@0: breakState.Elements()); michael@0: } michael@0: } michael@0: michael@0: uint32_t offset = 0; michael@0: for (i = 0; i < mTextItems.Length(); ++i) { michael@0: TextItem* ti = &mTextItems[i]; michael@0: NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?"); michael@0: michael@0: if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) { michael@0: breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; michael@0: } michael@0: if (ti->mFlags & BREAK_SUPPRESS_INSIDE) { michael@0: uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0; michael@0: memset(breakState.Elements() + offset + exclude, michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, michael@0: (ti->mLength - exclude)*sizeof(uint8_t)); michael@0: } michael@0: michael@0: // Don't set the break state for the first character of the word, because michael@0: // it was already set correctly earlier and we don't know what the true michael@0: // value should be. michael@0: uint32_t skipSet = i == 0 ? 1 : 0; michael@0: if (ti->mSink) { michael@0: ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet, michael@0: breakState.Elements() + offset + skipSet); michael@0: michael@0: if (ti->mFlags & BREAK_NEED_CAPITALIZATION) { michael@0: if (capitalizationState.Length() == 0) { michael@0: if (!capitalizationState.AppendElements(length)) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: memset(capitalizationState.Elements(), false, length*sizeof(bool)); michael@0: SetupCapitalization(mCurrentWord.Elements(), length, michael@0: capitalizationState.Elements()); michael@0: } michael@0: ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength, michael@0: capitalizationState.Elements() + offset); michael@0: } michael@0: } michael@0: michael@0: offset += ti->mLength; michael@0: } michael@0: michael@0: mCurrentWord.Clear(); michael@0: mTextItems.Clear(); michael@0: mCurrentWordContainsComplexChar = false; michael@0: mCurrentWordContainsMixedLang = false; michael@0: mCurrentWordLanguage = nullptr; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength, michael@0: uint32_t aFlags, nsILineBreakSink* aSink) michael@0: { michael@0: NS_ASSERTION(aLength > 0, "Appending empty text..."); michael@0: michael@0: uint32_t offset = 0; michael@0: michael@0: // Continue the current word michael@0: if (mCurrentWord.Length() > 0) { michael@0: NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); michael@0: michael@0: while (offset < aLength && !IsSpace(aText[offset])) { michael@0: mCurrentWord.AppendElement(aText[offset]); michael@0: if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) { michael@0: mCurrentWordContainsComplexChar = true; michael@0: } michael@0: UpdateCurrentWordLanguage(aHyphenationLanguage); michael@0: ++offset; michael@0: } michael@0: michael@0: if (offset > 0) { michael@0: mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); michael@0: } michael@0: michael@0: if (offset == aLength) michael@0: return NS_OK; michael@0: michael@0: // We encountered whitespace, so we're done with this word michael@0: nsresult rv = FlushCurrentWord(); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: } michael@0: michael@0: nsAutoTArray breakState; michael@0: if (aSink) { michael@0: if (!breakState.AppendElements(aLength)) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: nsTArray capitalizationState; michael@0: if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) { michael@0: if (!capitalizationState.AppendElements(aLength)) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: memset(capitalizationState.Elements(), false, aLength*sizeof(bool)); michael@0: } michael@0: michael@0: uint32_t start = offset; michael@0: bool noBreaksNeeded = !aSink || michael@0: (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && michael@0: !mBreakHere && !mAfterBreakableSpace); michael@0: if (noBreaksNeeded) { michael@0: // Skip to the space before the last word, since either the break data michael@0: // here is not needed, or no breaks are set in the sink and there cannot michael@0: // be any breaks in this chunk; all we need is the context for the next michael@0: // chunk (if any) michael@0: offset = aLength; michael@0: while (offset > start) { michael@0: --offset; michael@0: if (IsSpace(aText[offset])) michael@0: break; michael@0: } michael@0: } michael@0: uint32_t wordStart = offset; michael@0: bool wordHasComplexChar = false; michael@0: michael@0: nsRefPtr hyphenator; michael@0: if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && michael@0: !(aFlags & BREAK_SUPPRESS_INSIDE) && michael@0: aHyphenationLanguage) { michael@0: hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage); michael@0: } michael@0: michael@0: for (;;) { michael@0: char16_t ch = aText[offset]; michael@0: bool isSpace = IsSpace(ch); michael@0: bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); michael@0: michael@0: if (aSink) { michael@0: breakState[offset] = michael@0: mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || michael@0: (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; michael@0: } michael@0: mBreakHere = false; michael@0: mAfterBreakableSpace = isBreakableSpace; michael@0: michael@0: if (isSpace) { michael@0: if (offset > wordStart && aSink) { michael@0: if (!(aFlags & BREAK_SUPPRESS_INSIDE)) { michael@0: if (wordHasComplexChar) { michael@0: // Save current start-of-word state because GetJISx4051Breaks will michael@0: // set it to false michael@0: uint8_t currentStart = breakState[wordStart]; michael@0: nsContentUtils::LineBreaker()-> michael@0: GetJISx4051Breaks(aText + wordStart, offset - wordStart, michael@0: mWordBreak, michael@0: breakState.Elements() + wordStart); michael@0: breakState[wordStart] = currentStart; michael@0: } michael@0: if (hyphenator) { michael@0: FindHyphenationPoints(hyphenator, michael@0: aText + wordStart, aText + offset, michael@0: breakState.Elements() + wordStart); michael@0: } michael@0: } michael@0: if (aFlags & BREAK_NEED_CAPITALIZATION) { michael@0: SetupCapitalization(aText + wordStart, offset - wordStart, michael@0: capitalizationState.Elements() + wordStart); michael@0: } michael@0: } michael@0: wordHasComplexChar = false; michael@0: ++offset; michael@0: if (offset >= aLength) michael@0: break; michael@0: wordStart = offset; michael@0: } else { michael@0: if (!wordHasComplexChar && IsComplexChar(ch)) { michael@0: wordHasComplexChar = true; michael@0: } michael@0: ++offset; michael@0: if (offset >= aLength) { michael@0: // Save this word michael@0: mCurrentWordContainsComplexChar = wordHasComplexChar; michael@0: uint32_t len = offset - wordStart; michael@0: char16_t* elems = mCurrentWord.AppendElements(len); michael@0: if (!elems) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: memcpy(elems, aText + wordStart, sizeof(char16_t)*len); michael@0: mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); michael@0: // Ensure that the break-before for this word is written out michael@0: offset = wordStart + 1; michael@0: UpdateCurrentWordLanguage(aHyphenationLanguage); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (!noBreaksNeeded) { michael@0: // aSink must not be null michael@0: aSink->SetBreaks(start, offset - start, breakState.Elements() + start); michael@0: if (aFlags & BREAK_NEED_CAPITALIZATION) { michael@0: aSink->SetCapitalization(start, offset - start, michael@0: capitalizationState.Elements() + start); michael@0: } michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator, michael@0: const char16_t *aTextStart, michael@0: const char16_t *aTextLimit, michael@0: uint8_t *aBreakState) michael@0: { michael@0: nsDependentSubstring string(aTextStart, aTextLimit); michael@0: AutoFallibleTArray hyphens; michael@0: if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { michael@0: for (uint32_t i = 0; i + 1 < string.Length(); ++i) { michael@0: if (hyphens[i]) { michael@0: aBreakState[i + 1] = michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength, michael@0: uint32_t aFlags, nsILineBreakSink* aSink) michael@0: { michael@0: NS_ASSERTION(aLength > 0, "Appending empty text..."); michael@0: michael@0: if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) { michael@0: // Defer to the Unicode path if capitalization or hyphenation is required michael@0: nsAutoString str; michael@0: const char* cp = reinterpret_cast(aText); michael@0: CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str); michael@0: return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink); michael@0: } michael@0: michael@0: uint32_t offset = 0; michael@0: michael@0: // Continue the current word michael@0: if (mCurrentWord.Length() > 0) { michael@0: NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); michael@0: michael@0: while (offset < aLength && !IsSpace(aText[offset])) { michael@0: mCurrentWord.AppendElement(aText[offset]); michael@0: if (!mCurrentWordContainsComplexChar && michael@0: IsComplexASCIIChar(aText[offset])) { michael@0: mCurrentWordContainsComplexChar = true; michael@0: } michael@0: ++offset; michael@0: } michael@0: michael@0: if (offset > 0) { michael@0: mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); michael@0: } michael@0: michael@0: if (offset == aLength) { michael@0: // We did not encounter whitespace so the word hasn't finished yet. michael@0: return NS_OK; michael@0: } michael@0: michael@0: // We encountered whitespace, so we're done with this word michael@0: nsresult rv = FlushCurrentWord(); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: } michael@0: michael@0: nsAutoTArray breakState; michael@0: if (aSink) { michael@0: if (!breakState.AppendElements(aLength)) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: uint32_t start = offset; michael@0: bool noBreaksNeeded = !aSink || michael@0: (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && michael@0: !mBreakHere && !mAfterBreakableSpace); michael@0: if (noBreaksNeeded) { michael@0: // Skip to the space before the last word, since either the break data michael@0: // here is not needed, or no breaks are set in the sink and there cannot michael@0: // be any breaks in this chunk; all we need is the context for the next michael@0: // chunk (if any) michael@0: offset = aLength; michael@0: while (offset > start) { michael@0: --offset; michael@0: if (IsSpace(aText[offset])) michael@0: break; michael@0: } michael@0: } michael@0: uint32_t wordStart = offset; michael@0: bool wordHasComplexChar = false; michael@0: michael@0: for (;;) { michael@0: uint8_t ch = aText[offset]; michael@0: bool isSpace = IsSpace(ch); michael@0: bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); michael@0: michael@0: if (aSink) { michael@0: // Consider word-break style. Since the break position of CJK scripts michael@0: // will be set by nsILineBreaker, we don't consider CJK at this point. michael@0: breakState[offset] = michael@0: mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || michael@0: (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : michael@0: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; michael@0: } michael@0: mBreakHere = false; michael@0: mAfterBreakableSpace = isBreakableSpace; michael@0: michael@0: if (isSpace) { michael@0: if (offset > wordStart && wordHasComplexChar) { michael@0: if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) { michael@0: // Save current start-of-word state because GetJISx4051Breaks will michael@0: // set it to false michael@0: uint8_t currentStart = breakState[wordStart]; michael@0: nsContentUtils::LineBreaker()-> michael@0: GetJISx4051Breaks(aText + wordStart, offset - wordStart, michael@0: mWordBreak, michael@0: breakState.Elements() + wordStart); michael@0: breakState[wordStart] = currentStart; michael@0: } michael@0: wordHasComplexChar = false; michael@0: } michael@0: michael@0: ++offset; michael@0: if (offset >= aLength) michael@0: break; michael@0: wordStart = offset; michael@0: } else { michael@0: if (!wordHasComplexChar && IsComplexASCIIChar(ch)) { michael@0: wordHasComplexChar = true; michael@0: } michael@0: ++offset; michael@0: if (offset >= aLength) { michael@0: // Save this word michael@0: mCurrentWordContainsComplexChar = wordHasComplexChar; michael@0: uint32_t len = offset - wordStart; michael@0: char16_t* elems = mCurrentWord.AppendElements(len); michael@0: if (!elems) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: uint32_t i; michael@0: for (i = wordStart; i < offset; ++i) { michael@0: elems[i - wordStart] = aText[i]; michael@0: } michael@0: mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); michael@0: // Ensure that the break-before for this word is written out michael@0: offset = wordStart + 1; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (!noBreaksNeeded) { michael@0: aSink->SetBreaks(start, offset - start, breakState.Elements() + start); michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage) michael@0: { michael@0: if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) { michael@0: mCurrentWordContainsMixedLang = true; michael@0: } else { michael@0: mCurrentWordLanguage = aHyphenationLanguage; michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) michael@0: { michael@0: nsresult rv = FlushCurrentWord(); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE); michael@0: if (mAfterBreakableSpace && !isBreakableSpace) { michael@0: mBreakHere = true; michael@0: } michael@0: mAfterBreakableSpace = isBreakableSpace; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsLineBreaker::Reset(bool* aTrailingBreak) michael@0: { michael@0: nsresult rv = FlushCurrentWord(); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: *aTrailingBreak = mBreakHere || mAfterBreakableSpace; michael@0: mBreakHere = false; michael@0: mAfterBreakableSpace = false; michael@0: return NS_OK; michael@0: }