michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsTextFrameUtils.h" michael@0: michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsBidiUtils.h" michael@0: #include "nsIContent.h" michael@0: #include "nsStyleStruct.h" michael@0: #include "nsTextFragment.h" michael@0: #include michael@0: michael@0: static bool IsDiscardable(char16_t ch, uint32_t* aFlags) michael@0: { michael@0: // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun michael@0: // and discarding it would force us to copy text in many cases of preformatted michael@0: // text containing \r\n. michael@0: if (ch == CH_SHY) { michael@0: *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY; michael@0: return true; michael@0: } michael@0: return IsBidiControl(ch); michael@0: } michael@0: michael@0: static bool IsDiscardable(uint8_t ch, uint32_t* aFlags) michael@0: { michael@0: if (ch == CH_SHY) { michael@0: *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY; michael@0: return true; michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: char16_t* michael@0: nsTextFrameUtils::TransformText(const char16_t* aText, uint32_t aLength, michael@0: char16_t* aOutput, michael@0: CompressionMode aCompression, michael@0: uint8_t* aIncomingFlags, michael@0: gfxSkipChars* aSkipChars, michael@0: uint32_t* aAnalysisFlags) michael@0: { michael@0: uint32_t flags = 0; michael@0: char16_t* outputStart = aOutput; michael@0: michael@0: bool lastCharArabic = false; michael@0: michael@0: if (aCompression == COMPRESS_NONE || michael@0: aCompression == DISCARD_NEWLINE) { michael@0: // Skip discardables. michael@0: uint32_t i; michael@0: for (i = 0; i < aLength; ++i) { michael@0: char16_t ch = *aText++; michael@0: if (IsDiscardable(ch, &flags) || michael@0: (ch == '\n' && aCompression == DISCARD_NEWLINE)) { michael@0: aSkipChars->SkipChar(); michael@0: } else { michael@0: aSkipChars->KeepChar(); michael@0: if (ch > ' ') { michael@0: lastCharArabic = IS_ARABIC_CHAR(ch); michael@0: } else if (ch == '\t') { michael@0: flags |= TEXT_HAS_TAB; michael@0: } michael@0: *aOutput++ = ch; michael@0: } michael@0: } michael@0: if (lastCharArabic) { michael@0: *aIncomingFlags |= INCOMING_ARABICCHAR; michael@0: } else { michael@0: *aIncomingFlags &= ~INCOMING_ARABICCHAR; michael@0: } michael@0: *aIncomingFlags &= ~INCOMING_WHITESPACE; michael@0: } else { michael@0: bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0; michael@0: uint32_t i; michael@0: for (i = 0; i < aLength; ++i) { michael@0: char16_t ch = *aText++; michael@0: bool nowInWhitespace; michael@0: if (ch == ' ' && michael@0: (i + 1 >= aLength || michael@0: !IsSpaceCombiningSequenceTail(aText, aLength - (i + 1)))) { michael@0: nowInWhitespace = true; michael@0: } else if (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE) { michael@0: if (i > 0 && IS_CJ_CHAR(aText[-1]) && michael@0: i + 1 < aLength && IS_CJ_CHAR(aText[1])) { michael@0: // Discard newlines between CJK chars. michael@0: // XXX this really requires more context to get right! michael@0: aSkipChars->SkipChar(); michael@0: continue; michael@0: } michael@0: nowInWhitespace = true; michael@0: } else { michael@0: nowInWhitespace = ch == '\t'; michael@0: } michael@0: michael@0: if (!nowInWhitespace) { michael@0: if (IsDiscardable(ch, &flags)) { michael@0: aSkipChars->SkipChar(); michael@0: nowInWhitespace = inWhitespace; michael@0: } else { michael@0: *aOutput++ = ch; michael@0: aSkipChars->KeepChar(); michael@0: lastCharArabic = IS_ARABIC_CHAR(ch); michael@0: } michael@0: } else { michael@0: if (inWhitespace) { michael@0: aSkipChars->SkipChar(); michael@0: } else { michael@0: if (ch != ' ') { michael@0: flags |= TEXT_WAS_TRANSFORMED; michael@0: } michael@0: *aOutput++ = ' '; michael@0: aSkipChars->KeepChar(); michael@0: } michael@0: } michael@0: inWhitespace = nowInWhitespace; michael@0: } michael@0: if (lastCharArabic) { michael@0: *aIncomingFlags |= INCOMING_ARABICCHAR; michael@0: } else { michael@0: *aIncomingFlags &= ~INCOMING_ARABICCHAR; michael@0: } michael@0: if (inWhitespace) { michael@0: *aIncomingFlags |= INCOMING_WHITESPACE; michael@0: } else { michael@0: *aIncomingFlags &= ~INCOMING_WHITESPACE; michael@0: } michael@0: } michael@0: michael@0: if (outputStart + aLength != aOutput) { michael@0: flags |= TEXT_WAS_TRANSFORMED; michael@0: } michael@0: *aAnalysisFlags = flags; michael@0: return aOutput; michael@0: } michael@0: michael@0: uint8_t* michael@0: nsTextFrameUtils::TransformText(const uint8_t* aText, uint32_t aLength, michael@0: uint8_t* aOutput, michael@0: CompressionMode aCompression, michael@0: uint8_t* aIncomingFlags, michael@0: gfxSkipChars* aSkipChars, michael@0: uint32_t* aAnalysisFlags) michael@0: { michael@0: uint32_t flags = 0; michael@0: uint8_t* outputStart = aOutput; michael@0: michael@0: if (aCompression == COMPRESS_NONE || michael@0: aCompression == DISCARD_NEWLINE) { michael@0: // Skip discardables. michael@0: uint32_t i; michael@0: for (i = 0; i < aLength; ++i) { michael@0: uint8_t ch = *aText++; michael@0: if (IsDiscardable(ch, &flags) || michael@0: (ch == '\n' && aCompression == DISCARD_NEWLINE)) { michael@0: aSkipChars->SkipChar(); michael@0: } else { michael@0: aSkipChars->KeepChar(); michael@0: if (ch == '\t') { michael@0: flags |= TEXT_HAS_TAB; michael@0: } michael@0: *aOutput++ = ch; michael@0: } michael@0: } michael@0: *aIncomingFlags &= ~(INCOMING_ARABICCHAR | INCOMING_WHITESPACE); michael@0: } else { michael@0: bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0; michael@0: uint32_t i; michael@0: for (i = 0; i < aLength; ++i) { michael@0: uint8_t ch = *aText++; michael@0: bool nowInWhitespace = ch == ' ' || ch == '\t' || michael@0: (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE); michael@0: if (!nowInWhitespace) { michael@0: if (IsDiscardable(ch, &flags)) { michael@0: aSkipChars->SkipChar(); michael@0: nowInWhitespace = inWhitespace; michael@0: } else { michael@0: *aOutput++ = ch; michael@0: aSkipChars->KeepChar(); michael@0: } michael@0: } else { michael@0: if (inWhitespace) { michael@0: aSkipChars->SkipChar(); michael@0: } else { michael@0: if (ch != ' ') { michael@0: flags |= TEXT_WAS_TRANSFORMED; michael@0: } michael@0: *aOutput++ = ' '; michael@0: aSkipChars->KeepChar(); michael@0: } michael@0: } michael@0: inWhitespace = nowInWhitespace; michael@0: } michael@0: *aIncomingFlags &= ~INCOMING_ARABICCHAR; michael@0: if (inWhitespace) { michael@0: *aIncomingFlags |= INCOMING_WHITESPACE; michael@0: } else { michael@0: *aIncomingFlags &= ~INCOMING_WHITESPACE; michael@0: } michael@0: } michael@0: michael@0: if (outputStart + aLength != aOutput) { michael@0: flags |= TEXT_WAS_TRANSFORMED; michael@0: } michael@0: *aAnalysisFlags = flags; michael@0: return aOutput; michael@0: } michael@0: michael@0: uint32_t michael@0: nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression( michael@0: nsIContent *aContent, const nsStyleText *aStyleText) michael@0: { michael@0: const nsTextFragment *frag = aContent->GetText(); michael@0: // This is an approximation so we don't really need anything michael@0: // too fancy here. michael@0: uint32_t len; michael@0: if (aStyleText->WhiteSpaceIsSignificant()) { michael@0: len = frag->GetLength(); michael@0: } else { michael@0: bool is2b = frag->Is2b(); michael@0: union { michael@0: const char *s1b; michael@0: const char16_t *s2b; michael@0: } u; michael@0: if (is2b) { michael@0: u.s2b = frag->Get2b(); michael@0: } else { michael@0: u.s1b = frag->Get1b(); michael@0: } michael@0: bool prevWS = true; // more important to ignore blocks with michael@0: // only whitespace than get inline boundaries michael@0: // exactly right michael@0: len = 0; michael@0: for (uint32_t i = 0, i_end = frag->GetLength(); i < i_end; ++i) { michael@0: char16_t c = is2b ? u.s2b[i] : u.s1b[i]; michael@0: if (c == ' ' || c == '\n' || c == '\t' || c == '\r') { michael@0: if (!prevWS) { michael@0: ++len; michael@0: } michael@0: prevWS = true; michael@0: } else { michael@0: ++len; michael@0: prevWS = false; michael@0: } michael@0: } michael@0: } michael@0: return len; michael@0: } michael@0: michael@0: bool nsSkipCharsRunIterator::NextRun() { michael@0: do { michael@0: if (mRunLength) { michael@0: mIterator.AdvanceOriginal(mRunLength); michael@0: NS_ASSERTION(mRunLength > 0, "No characters in run (initial length too large?)"); michael@0: if (!mSkipped || mLengthIncludesSkipped) { michael@0: mRemainingLength -= mRunLength; michael@0: } michael@0: } michael@0: if (!mRemainingLength) michael@0: return false; michael@0: int32_t length; michael@0: mSkipped = mIterator.IsOriginalCharSkipped(&length); michael@0: mRunLength = std::min(length, mRemainingLength); michael@0: } while (!mVisitSkipped && mSkipped); michael@0: michael@0: return true; michael@0: }