michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * nsIContentSerializer implementation that can be used with an michael@0: * nsIDocumentEncoder to convert a DOM into plaintext in a nice way michael@0: * (eg for copy/paste as plaintext). michael@0: */ michael@0: michael@0: #include "nsPlainTextSerializer.h" michael@0: #include "nsLWBrkCIID.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsGkAtoms.h" michael@0: #include "nsNameSpaceManager.h" michael@0: #include "nsTextFragment.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsCRT.h" michael@0: #include "mozilla/dom/Element.h" michael@0: #include "mozilla/Preferences.h" michael@0: michael@0: using namespace mozilla; michael@0: using namespace mozilla::dom; michael@0: michael@0: #define PREF_STRUCTS "converter.html2txt.structs" michael@0: #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" michael@0: michael@0: static const int32_t kTabSize=4; michael@0: static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if michael@0: mHeaderStrategy = 1 or = 2. michael@0: Indention of other headers michael@0: is derived from that. michael@0: XXX center h1? */ michael@0: static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, michael@0: indent h(x+1) this many michael@0: columns more than h(x) */ michael@0: static const int32_t kIndentSizeList = kTabSize; michael@0: // Indention of non-first lines of ul and ol michael@0: static const int32_t kIndentSizeDD = kTabSize; // Indention of
michael@0: static const char16_t kNBSP = 160; michael@0: static const char16_t kSPACE = ' '; michael@0: michael@0: static int32_t HeaderLevel(nsIAtom* aTag); michael@0: static int32_t GetUnicharWidth(char16_t ucs); michael@0: static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); michael@0: michael@0: // Someday may want to make this non-const: michael@0: static const uint32_t TagStackSize = 500; michael@0: static const uint32_t OLStackSize = 100; michael@0: michael@0: nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) michael@0: { michael@0: nsPlainTextSerializer* it = new nsPlainTextSerializer(); michael@0: if (!it) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: return CallQueryInterface(it, aSerializer); michael@0: } michael@0: michael@0: nsPlainTextSerializer::nsPlainTextSerializer() michael@0: : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" michael@0: { michael@0: michael@0: mOutputString = nullptr; michael@0: mHeadLevel = 0; michael@0: mAtFirstColumn = true; michael@0: mIndent = 0; michael@0: mCiteQuoteLevel = 0; michael@0: mStructs = true; // will be read from prefs later michael@0: mHeaderStrategy = 1 /*indent increasingly*/; // ditto michael@0: mDontWrapAnyQuotes = false; // ditto michael@0: mHasWrittenCiteBlockquote = false; michael@0: mSpanLevel = 0; michael@0: for (int32_t i = 0; i <= 6; i++) { michael@0: mHeaderCounter[i] = 0; michael@0: } michael@0: michael@0: // Line breaker michael@0: mWrapColumn = 72; // XXX magic number, we expect someone to reset this michael@0: mCurrentLineWidth = 0; michael@0: michael@0: // Flow michael@0: mEmptyLines = 1; // The start of the document is an "empty line" in itself, michael@0: mInWhitespace = false; michael@0: mPreFormatted = false; michael@0: mStartedOutput = false; michael@0: michael@0: // initialize the tag stack to zero: michael@0: // The stack only ever contains pointers to static atoms, so they don't michael@0: // need refcounting. michael@0: mTagStack = new nsIAtom*[TagStackSize]; michael@0: mTagStackIndex = 0; michael@0: mIgnoreAboveIndex = (uint32_t)kNotFound; michael@0: michael@0: // initialize the OL stack, where numbers for ordered lists are kept michael@0: mOLStack = new int32_t[OLStackSize]; michael@0: mOLStackIndex = 0; michael@0: michael@0: mULCount = 0; michael@0: michael@0: mIgnoredChildNodeLevel = 0; michael@0: } michael@0: michael@0: nsPlainTextSerializer::~nsPlainTextSerializer() michael@0: { michael@0: delete[] mTagStack; michael@0: delete[] mOLStack; michael@0: NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(nsPlainTextSerializer, michael@0: nsIContentSerializer) michael@0: michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, michael@0: const char* aCharSet, bool aIsCopying, michael@0: bool aIsWholeDocument) michael@0: { michael@0: #ifdef DEBUG michael@0: // Check if the major control flags are set correctly. michael@0: if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { michael@0: NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, michael@0: "If you want format=flowed, you must combine it with " michael@0: "nsIDocumentEncoder::OutputFormatted"); michael@0: } michael@0: michael@0: if (aFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), michael@0: "Can't do formatted and preformatted output at the same time!"); michael@0: } michael@0: #endif michael@0: michael@0: mFlags = aFlags; michael@0: mWrapColumn = aWrapColumn; michael@0: michael@0: // Only create a linebreaker if we will handle wrapping. michael@0: if (MayWrap()) { michael@0: mLineBreaker = nsContentUtils::LineBreaker(); michael@0: } michael@0: michael@0: // Set the line break character: michael@0: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) michael@0: && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { michael@0: // Windows michael@0: mLineBreak.AssignLiteral("\r\n"); michael@0: } michael@0: else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { michael@0: // Mac michael@0: mLineBreak.Assign(char16_t('\r')); michael@0: } michael@0: else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { michael@0: // Unix/DOM michael@0: mLineBreak.Assign(char16_t('\n')); michael@0: } michael@0: else { michael@0: // Platform/default michael@0: mLineBreak.AssignLiteral(NS_LINEBREAK); michael@0: } michael@0: michael@0: mLineBreakDue = false; michael@0: mFloatingLines = -1; michael@0: michael@0: if (mFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: // Get some prefs that controls how we do formatted output michael@0: mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); michael@0: michael@0: mHeaderStrategy = michael@0: Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); michael@0: michael@0: // DontWrapAnyQuotes is set according to whether plaintext mail michael@0: // is wrapping to window width -- see bug 134439. michael@0: // We'll only want this if we're wrapping and formatted. michael@0: if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { michael@0: mDontWrapAnyQuotes = michael@0: Preferences::GetBool("mail.compose.wrap_to_window_width", michael@0: mDontWrapAnyQuotes); michael@0: } michael@0: } michael@0: michael@0: // XXX We should let the caller pass this in. michael@0: if (Preferences::GetBool("browser.frames.enabled")) { michael@0: mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; michael@0: } michael@0: else { michael@0: mFlags |= nsIDocumentEncoder::OutputNoFramesContent; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::GetLastBool(const nsTArray& aStack) michael@0: { michael@0: uint32_t size = aStack.Length(); michael@0: if (size == 0) { michael@0: return false; michael@0: } michael@0: return aStack.ElementAt(size-1); michael@0: } michael@0: michael@0: void michael@0: nsPlainTextSerializer::SetLastBool(nsTArray& aStack, bool aValue) michael@0: { michael@0: uint32_t size = aStack.Length(); michael@0: if (size > 0) { michael@0: aStack.ElementAt(size-1) = aValue; michael@0: } michael@0: else { michael@0: NS_ERROR("There is no \"Last\" value"); michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsPlainTextSerializer::PushBool(nsTArray& aStack, bool aValue) michael@0: { michael@0: aStack.AppendElement(bool(aValue)); michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::PopBool(nsTArray& aStack) michael@0: { michael@0: bool returnValue = false; michael@0: uint32_t size = aStack.Length(); michael@0: if (size > 0) { michael@0: returnValue = aStack.ElementAt(size-1); michael@0: aStack.RemoveElementAt(size-1); michael@0: } michael@0: return returnValue; michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) michael@0: { michael@0: // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, michael@0: // non-textual container element should be serialized as placeholder michael@0: // character and its child nodes should be ignored. See bug 895239. michael@0: if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { michael@0: return false; michael@0: } michael@0: michael@0: return michael@0: (aTag == nsGkAtoms::audio) || michael@0: (aTag == nsGkAtoms::canvas) || michael@0: (aTag == nsGkAtoms::iframe) || michael@0: (aTag == nsGkAtoms::meter) || michael@0: (aTag == nsGkAtoms::progress) || michael@0: (aTag == nsGkAtoms::object) || michael@0: (aTag == nsGkAtoms::svg) || michael@0: (aTag == nsGkAtoms::video); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendText(nsIContent* aText, michael@0: int32_t aStartOffset, michael@0: int32_t aEndOffset, michael@0: nsAString& aStr) michael@0: { michael@0: if (mIgnoreAboveIndex != (uint32_t)kNotFound) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); michael@0: if ( aStartOffset < 0 ) michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: NS_ENSURE_ARG(aText); michael@0: michael@0: nsresult rv = NS_OK; michael@0: michael@0: nsIContent* content = aText; michael@0: const nsTextFragment* frag; michael@0: if (!content || !(frag = content->GetText())) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: int32_t fragLength = frag->GetLength(); michael@0: int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); michael@0: NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); michael@0: michael@0: int32_t length = endoffset - aStartOffset; michael@0: if (length <= 0) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsAutoString textstr; michael@0: if (frag->Is2b()) { michael@0: textstr.Assign(frag->Get2b() + aStartOffset, length); michael@0: } michael@0: else { michael@0: // AssignASCII is for 7-bit character only, so don't use it michael@0: const char *data = frag->Get1b(); michael@0: CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); michael@0: } michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: // We have to split the string across newlines michael@0: // to match parser behavior michael@0: int32_t start = 0; michael@0: int32_t offset = textstr.FindCharInSet("\n\r"); michael@0: while (offset != kNotFound) { michael@0: michael@0: if (offset>start) { michael@0: // Pass in the line michael@0: DoAddText(false, michael@0: Substring(textstr, start, offset-start)); michael@0: } michael@0: michael@0: // Pass in a newline michael@0: DoAddText(true, mLineBreak); michael@0: michael@0: start = offset+1; michael@0: offset = textstr.FindCharInSet("\n\r", start); michael@0: } michael@0: michael@0: // Consume the last bit of the string if there's any left michael@0: if (start < length) { michael@0: if (start) { michael@0: DoAddText(false, Substring(textstr, start, length - start)); michael@0: } michael@0: else { michael@0: DoAddText(false, textstr); michael@0: } michael@0: } michael@0: michael@0: mOutputString = nullptr; michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, michael@0: int32_t aStartOffset, michael@0: int32_t aEndOffset, michael@0: nsAString& aStr) michael@0: { michael@0: return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendElementStart(Element* aElement, michael@0: Element* aOriginalElement, michael@0: nsAString& aStr) michael@0: { michael@0: NS_ENSURE_ARG(aElement); michael@0: michael@0: mElement = aElement; michael@0: michael@0: nsresult rv; michael@0: nsIAtom* id = GetIdForContent(mElement); michael@0: michael@0: bool isContainer = !nsContentUtils::IsHTMLVoid(id); michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: if (isContainer) { michael@0: rv = DoOpenContainer(id); michael@0: } michael@0: else { michael@0: rv = DoAddLeaf(id); michael@0: } michael@0: michael@0: mElement = nullptr; michael@0: mOutputString = nullptr; michael@0: michael@0: if (id == nsGkAtoms::head) { michael@0: ++mHeadLevel; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendElementEnd(Element* aElement, michael@0: nsAString& aStr) michael@0: { michael@0: NS_ENSURE_ARG(aElement); michael@0: michael@0: mElement = aElement; michael@0: michael@0: nsresult rv; michael@0: nsIAtom* id = GetIdForContent(mElement); michael@0: michael@0: bool isContainer = !nsContentUtils::IsHTMLVoid(id); michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: rv = NS_OK; michael@0: if (isContainer) { michael@0: rv = DoCloseContainer(id); michael@0: } michael@0: michael@0: mElement = nullptr; michael@0: mOutputString = nullptr; michael@0: michael@0: if (id == nsGkAtoms::head) { michael@0: NS_ASSERTION(mHeadLevel != 0, michael@0: "mHeadLevel being decremented below 0"); michael@0: --mHeadLevel; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::Flush(nsAString& aStr) michael@0: { michael@0: mOutputString = &aStr; michael@0: FlushLine(); michael@0: mOutputString = nullptr; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, michael@0: nsAString& aStr) michael@0: { michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) michael@0: { michael@0: // Check if we need output current node as placeholder character and ignore michael@0: // child nodes. michael@0: if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { michael@0: if (mIgnoredChildNodeLevel == 0) { michael@0: // Serialize current node as placeholder character michael@0: Write(NS_LITERAL_STRING("\xFFFC")); michael@0: } michael@0: // Ignore child nodes. michael@0: mIgnoredChildNodeLevel++; michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mFlags & nsIDocumentEncoder::OutputRaw) { michael@0: // Raw means raw. Don't even think about doing anything fancy michael@0: // here like indenting, adding line breaks or any other michael@0: // characters such as list item bullets, quote characters michael@0: // around , etc. I mean it! Don't make me smack you! michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mTagStackIndex < TagStackSize) { michael@0: mTagStack[mTagStackIndex++] = aTag; michael@0: } michael@0: michael@0: if (mIgnoreAboveIndex != (uint32_t)kNotFound) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Reset this so that
doesn't affect the whitespace michael@0: // above random
s below it.
michael@0:   mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
michael@0:                               aTag == nsGkAtoms::pre;
michael@0: 
michael@0:   bool isInCiteBlockquote = false;
michael@0: 
michael@0:   // XXX special-case 
so that we don't add additional michael@0: // newlines before the text. michael@0: if (aTag == nsGkAtoms::blockquote) { michael@0: nsAutoString value; michael@0: nsresult rv = GetAttributeValue(nsGkAtoms::type, value); michael@0: isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); michael@0: } michael@0: michael@0: if (mLineBreakDue && !isInCiteBlockquote) michael@0: EnsureVerticalSpace(mFloatingLines); michael@0: michael@0: // Check if this tag's content that should not be output michael@0: if ((aTag == nsGkAtoms::noscript && michael@0: !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || michael@0: ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && michael@0: !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { michael@0: // Ignore everything that follows the current tag in michael@0: // question until a matching end tag is encountered. michael@0: mIgnoreAboveIndex = mTagStackIndex - 1; michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aTag == nsGkAtoms::body) { michael@0: // Try to figure out here whether we have a michael@0: // preformatted style attribute. michael@0: // michael@0: // Trigger on the presence of a "pre-wrap" in the michael@0: // style attribute. That's a very simplistic way to do michael@0: // it, but better than nothing. michael@0: // Also set mWrapColumn to the value given there michael@0: // (which arguably we should only do if told to do so). michael@0: nsAutoString style; michael@0: int32_t whitespace; michael@0: if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && michael@0: (kNotFound != (whitespace = style.Find("white-space:")))) { michael@0: michael@0: if (kNotFound != style.Find("pre-wrap", true, whitespace)) { michael@0: #ifdef DEBUG_preformatted michael@0: printf("Set mPreFormatted based on style pre-wrap\n"); michael@0: #endif michael@0: mPreFormatted = true; michael@0: int32_t widthOffset = style.Find("width:"); michael@0: if (widthOffset >= 0) { michael@0: // We have to search for the ch before the semicolon, michael@0: // not for the semicolon itself, because nsString::ToInteger() michael@0: // considers 'c' to be a valid numeric char (even if radix=10) michael@0: // but then gets confused if it sees it next to the number michael@0: // when the radix specified was 10, and returns an error code. michael@0: int32_t semiOffset = style.Find("ch", false, widthOffset+6); michael@0: int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 michael@0: : style.Length() - widthOffset); michael@0: nsAutoString widthstr; michael@0: style.Mid(widthstr, widthOffset+6, length); michael@0: nsresult err; michael@0: int32_t col = widthstr.ToInteger(&err); michael@0: michael@0: if (NS_SUCCEEDED(err)) { michael@0: mWrapColumn = (uint32_t)col; michael@0: #ifdef DEBUG_preformatted michael@0: printf("Set wrap column to %d based on style\n", mWrapColumn); michael@0: #endif michael@0: } michael@0: } michael@0: } michael@0: else if (kNotFound != style.Find("pre", true, whitespace)) { michael@0: #ifdef DEBUG_preformatted michael@0: printf("Set mPreFormatted based on style pre\n"); michael@0: #endif michael@0: mPreFormatted = true; michael@0: mWrapColumn = 0; michael@0: } michael@0: } michael@0: else { michael@0: /* See comment at end of function. */ michael@0: mInWhitespace = true; michael@0: mPreFormatted = false; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Keep this in sync with DoCloseContainer! michael@0: if (!DoOutput()) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aTag == nsGkAtoms::p) michael@0: EnsureVerticalSpace(1); michael@0: else if (aTag == nsGkAtoms::pre) { michael@0: if (GetLastBool(mIsInCiteBlockquote)) michael@0: EnsureVerticalSpace(0); michael@0: else if (mHasWrittenCiteBlockquote) { michael@0: EnsureVerticalSpace(0); michael@0: mHasWrittenCiteBlockquote = false; michael@0: } michael@0: else michael@0: EnsureVerticalSpace(1); michael@0: } michael@0: else if (aTag == nsGkAtoms::tr) { michael@0: PushBool(mHasWrittenCellsForRow, false); michael@0: } michael@0: else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { michael@0: // We must make sure that the content of two table cells get a michael@0: // space between them. michael@0: michael@0: // To make the separation between cells most obvious and michael@0: // importable, we use a TAB. michael@0: if (GetLastBool(mHasWrittenCellsForRow)) { michael@0: // Bypass |Write| so that the TAB isn't compressed away. michael@0: AddToLine(MOZ_UTF16("\t"), 1); michael@0: mInWhitespace = true; michael@0: } michael@0: else if (mHasWrittenCellsForRow.IsEmpty()) { michael@0: // We don't always see a (nor a ) before the
if we're michael@0: // copying part of a table michael@0: PushBool(mHasWrittenCellsForRow, true); // will never be popped michael@0: } michael@0: else { michael@0: SetLastBool(mHasWrittenCellsForRow, true); michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::ul) { michael@0: // Indent here to support nested lists, which aren't included in li :-( michael@0: EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); michael@0: // Must end the current line before we change indention michael@0: mIndent += kIndentSizeList; michael@0: mULCount++; michael@0: } michael@0: else if (aTag == nsGkAtoms::ol) { michael@0: EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); michael@0: if (mFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: // Must end the current line before we change indention michael@0: if (mOLStackIndex < OLStackSize) { michael@0: nsAutoString startAttr; michael@0: int32_t startVal = 1; michael@0: if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { michael@0: nsresult rv = NS_OK; michael@0: startVal = startAttr.ToInteger(&rv); michael@0: if (NS_FAILED(rv)) michael@0: startVal = 1; michael@0: } michael@0: mOLStack[mOLStackIndex++] = startVal; michael@0: } michael@0: } else { michael@0: mOLStackIndex++; michael@0: } michael@0: mIndent += kIndentSizeList; // see ul michael@0: } michael@0: else if (aTag == nsGkAtoms::li && michael@0: (mFlags & nsIDocumentEncoder::OutputFormatted)) { michael@0: if (mTagStackIndex > 1 && IsInOL()) { michael@0: if (mOLStackIndex > 0) { michael@0: nsAutoString valueAttr; michael@0: if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { michael@0: nsresult rv = NS_OK; michael@0: int32_t valueAttrVal = valueAttr.ToInteger(&rv); michael@0: if (NS_SUCCEEDED(rv)) michael@0: mOLStack[mOLStackIndex-1] = valueAttrVal; michael@0: } michael@0: // This is what nsBulletFrame does for OLs: michael@0: mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); michael@0: } michael@0: else { michael@0: mInIndentString.Append(char16_t('#')); michael@0: } michael@0: michael@0: mInIndentString.Append(char16_t('.')); michael@0: michael@0: } michael@0: else { michael@0: static char bulletCharArray[] = "*o+#"; michael@0: uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; michael@0: char bulletChar = bulletCharArray[index % 4]; michael@0: mInIndentString.Append(char16_t(bulletChar)); michael@0: } michael@0: michael@0: mInIndentString.Append(char16_t(' ')); michael@0: } michael@0: else if (aTag == nsGkAtoms::dl) { michael@0: EnsureVerticalSpace(1); michael@0: } michael@0: else if (aTag == nsGkAtoms::dt) { michael@0: EnsureVerticalSpace(0); michael@0: } michael@0: else if (aTag == nsGkAtoms::dd) { michael@0: EnsureVerticalSpace(0); michael@0: mIndent += kIndentSizeDD; michael@0: } michael@0: else if (aTag == nsGkAtoms::span) { michael@0: ++mSpanLevel; michael@0: } michael@0: else if (aTag == nsGkAtoms::blockquote) { michael@0: // Push michael@0: PushBool(mIsInCiteBlockquote, isInCiteBlockquote); michael@0: if (isInCiteBlockquote) { michael@0: EnsureVerticalSpace(0); michael@0: mCiteQuoteLevel++; michael@0: } michael@0: else { michael@0: EnsureVerticalSpace(1); michael@0: mIndent += kTabSize; // Check for some maximum value? michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::q) { michael@0: Write(NS_LITERAL_STRING("\"")); michael@0: } michael@0: michael@0: // Else make sure we'll separate block level tags, michael@0: // even if we're about to leave, before doing any other formatting. michael@0: else if (nsContentUtils::IsHTMLBlock(aTag)) { michael@0: EnsureVerticalSpace(0); michael@0: } michael@0: michael@0: ////////////////////////////////////////////////////////////// michael@0: if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { michael@0: return NS_OK; michael@0: } michael@0: ////////////////////////////////////////////////////////////// michael@0: // The rest of this routine is formatted output stuff, michael@0: // which we should skip if we're not formatted: michael@0: ////////////////////////////////////////////////////////////// michael@0: michael@0: // Push on stack michael@0: bool currentNodeIsConverted = IsCurrentNodeConverted(); michael@0: michael@0: if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || michael@0: aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || michael@0: aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) michael@0: { michael@0: EnsureVerticalSpace(2); michael@0: if (mHeaderStrategy == 2) { // numbered michael@0: mIndent += kIndentSizeHeaders; michael@0: // Caching michael@0: int32_t level = HeaderLevel(aTag); michael@0: // Increase counter for current level michael@0: mHeaderCounter[level]++; michael@0: // Reset all lower levels michael@0: int32_t i; michael@0: michael@0: for (i = level + 1; i <= 6; i++) { michael@0: mHeaderCounter[i] = 0; michael@0: } michael@0: michael@0: // Construct numbers michael@0: nsAutoString leadup; michael@0: for (i = 1; i <= level; i++) { michael@0: leadup.AppendInt(mHeaderCounter[i]); michael@0: leadup.Append(char16_t('.')); michael@0: } michael@0: leadup.Append(char16_t(' ')); michael@0: Write(leadup); michael@0: } michael@0: else if (mHeaderStrategy == 1) { // indent increasingly michael@0: mIndent += kIndentSizeHeaders; michael@0: for (int32_t i = HeaderLevel(aTag); i > 1; i--) { michael@0: // for h(x), run x-1 times michael@0: mIndent += kIndentIncrementHeaders; michael@0: } michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { michael@0: nsAutoString url; michael@0: if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) michael@0: && !url.IsEmpty()) { michael@0: mURL = url; michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("^")); michael@0: } michael@0: else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("_")); michael@0: } michael@0: else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("|")); michael@0: } michael@0: else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) michael@0: && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("*")); michael@0: } michael@0: else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) michael@0: && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("/")); michael@0: } michael@0: else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("_")); michael@0: } michael@0: michael@0: /* Container elements are always block elements, so we shouldn't michael@0: output any whitespace immediately after the container tag even if michael@0: there's extra whitespace there because the HTML is pretty-printed michael@0: or something. To ensure that happens, tell the serializer we're michael@0: already in whitespace so it won't output more. */ michael@0: mInWhitespace = true; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) michael@0: { michael@0: if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { michael@0: mIgnoredChildNodeLevel--; michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mFlags & nsIDocumentEncoder::OutputRaw) { michael@0: // Raw means raw. Don't even think about doing anything fancy michael@0: // here like indenting, adding line breaks or any other michael@0: // characters such as list item bullets, quote characters michael@0: // around , etc. I mean it! Don't make me smack you! michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mTagStackIndex > 0) { michael@0: --mTagStackIndex; michael@0: } michael@0: michael@0: if (mTagStackIndex >= mIgnoreAboveIndex) { michael@0: if (mTagStackIndex == mIgnoreAboveIndex) { michael@0: // We're dealing with the close tag whose matching michael@0: // open tag had set the mIgnoreAboveIndex value. michael@0: // Reset mIgnoreAboveIndex before discarding this tag. michael@0: mIgnoreAboveIndex = (uint32_t)kNotFound; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: // End current line if we're ending a block level tag michael@0: if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { michael@0: // We want the output to end with a new line, michael@0: // but in preformatted areas like text fields, michael@0: // we can't emit newlines that weren't there. michael@0: // So add the newline only in the case of formatted output. michael@0: if (mFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: EnsureVerticalSpace(0); michael@0: } michael@0: else { michael@0: FlushLine(); michael@0: } michael@0: // We won't want to do anything with these in formatted mode either, michael@0: // so just return now: michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Keep this in sync with DoOpenContainer! michael@0: if (!DoOutput()) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aTag == nsGkAtoms::tr) { michael@0: PopBool(mHasWrittenCellsForRow); michael@0: // Should always end a line, but get no more whitespace michael@0: if (mFloatingLines < 0) michael@0: mFloatingLines = 0; michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (((aTag == nsGkAtoms::li) || michael@0: (aTag == nsGkAtoms::dt)) && michael@0: (mFlags & nsIDocumentEncoder::OutputFormatted)) { michael@0: // Items that should always end a line, but get no more whitespace michael@0: if (mFloatingLines < 0) michael@0: mFloatingLines = 0; michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (aTag == nsGkAtoms::pre) { michael@0: mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (aTag == nsGkAtoms::ul) { michael@0: FlushLine(); michael@0: mIndent -= kIndentSizeList; michael@0: if (--mULCount + mOLStackIndex == 0) { michael@0: mFloatingLines = 1; michael@0: mLineBreakDue = true; michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::ol) { michael@0: FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. michael@0: mIndent -= kIndentSizeList; michael@0: NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); michael@0: mOLStackIndex--; michael@0: if (mULCount + mOLStackIndex == 0) { michael@0: mFloatingLines = 1; michael@0: mLineBreakDue = true; michael@0: } michael@0: } michael@0: else if (aTag == nsGkAtoms::dl) { michael@0: mFloatingLines = 1; michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (aTag == nsGkAtoms::dd) { michael@0: FlushLine(); michael@0: mIndent -= kIndentSizeDD; michael@0: } michael@0: else if (aTag == nsGkAtoms::span) { michael@0: NS_ASSERTION(mSpanLevel, "Span level will be negative!"); michael@0: --mSpanLevel; michael@0: } michael@0: else if (aTag == nsGkAtoms::div) { michael@0: if (mFloatingLines < 0) michael@0: mFloatingLines = 0; michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (aTag == nsGkAtoms::blockquote) { michael@0: FlushLine(); // Is this needed? michael@0: michael@0: // Pop michael@0: bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); michael@0: michael@0: if (isInCiteBlockquote) { michael@0: NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); michael@0: mCiteQuoteLevel--; michael@0: mFloatingLines = 0; michael@0: mHasWrittenCiteBlockquote = true; michael@0: } michael@0: else { michael@0: mIndent -= kTabSize; michael@0: mFloatingLines = 1; michael@0: } michael@0: mLineBreakDue = true; michael@0: } michael@0: else if (aTag == nsGkAtoms::q) { michael@0: Write(NS_LITERAL_STRING("\"")); michael@0: } michael@0: else if (nsContentUtils::IsHTMLBlock(aTag) michael@0: && aTag != nsGkAtoms::script) { michael@0: // All other blocks get 1 vertical space after them michael@0: // in formatted mode, otherwise 0. michael@0: // This is hard. Sometimes 0 is a better number, but michael@0: // how to know? michael@0: if (mFlags & nsIDocumentEncoder::OutputFormatted) michael@0: EnsureVerticalSpace(1); michael@0: else { michael@0: if (mFloatingLines < 0) michael@0: mFloatingLines = 0; michael@0: mLineBreakDue = true; michael@0: } michael@0: } michael@0: michael@0: ////////////////////////////////////////////////////////////// michael@0: if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { michael@0: return NS_OK; michael@0: } michael@0: ////////////////////////////////////////////////////////////// michael@0: // The rest of this routine is formatted output stuff, michael@0: // which we should skip if we're not formatted: michael@0: ////////////////////////////////////////////////////////////// michael@0: michael@0: // Pop the currentConverted stack michael@0: bool currentNodeIsConverted = IsCurrentNodeConverted(); michael@0: michael@0: if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || michael@0: aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || michael@0: aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { michael@0: michael@0: if (mHeaderStrategy) { /*numbered or indent increasingly*/ michael@0: mIndent -= kIndentSizeHeaders; michael@0: } michael@0: if (mHeaderStrategy == 1 /*indent increasingly*/ ) { michael@0: for (int32_t i = HeaderLevel(aTag); i > 1; i--) { michael@0: // for h(x), run x-1 times michael@0: mIndent -= kIndentIncrementHeaders; michael@0: } michael@0: } michael@0: EnsureVerticalSpace(1); michael@0: } michael@0: else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { michael@0: nsAutoString temp; michael@0: temp.AssignLiteral(" <"); michael@0: temp += mURL; michael@0: temp.Append(char16_t('>')); michael@0: Write(temp); michael@0: mURL.Truncate(); michael@0: } michael@0: else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) michael@0: && mStructs && !currentNodeIsConverted) { michael@0: Write(kSpace); michael@0: } michael@0: else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("|")); michael@0: } michael@0: else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) michael@0: && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("*")); michael@0: } michael@0: else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) michael@0: && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("/")); michael@0: } michael@0: else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { michael@0: Write(NS_LITERAL_STRING("_")); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::MustSuppressLeaf() michael@0: { michael@0: if (mIgnoredChildNodeLevel > 0) { michael@0: return true; michael@0: } michael@0: michael@0: if ((mTagStackIndex > 1 && michael@0: mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || michael@0: (mTagStackIndex > 0 && michael@0: mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { michael@0: // Don't output the contents of SELECT elements; michael@0: // Might be nice, eventually, to output just the selected element. michael@0: // Read more in bug 31994. michael@0: return true; michael@0: } michael@0: michael@0: if (mTagStackIndex > 0 && michael@0: (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || michael@0: mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { michael@0: // Don't output the contents of