michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * nsIContentSerializer implementation that can be used with an michael@0: * nsIDocumentEncoder to convert a DOM into plaintext in a nice way michael@0: * (eg for copy/paste as plaintext). michael@0: */ michael@0: michael@0: #include "nsPlainTextSerializer.h" michael@0: #include "nsLWBrkCIID.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsGkAtoms.h" michael@0: #include "nsNameSpaceManager.h" michael@0: #include "nsTextFragment.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsCRT.h" michael@0: #include "mozilla/dom/Element.h" michael@0: #include "mozilla/Preferences.h" michael@0: michael@0: using namespace mozilla; michael@0: using namespace mozilla::dom; michael@0: michael@0: #define PREF_STRUCTS "converter.html2txt.structs" michael@0: #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" michael@0: michael@0: static const int32_t kTabSize=4; michael@0: static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if michael@0: mHeaderStrategy = 1 or = 2. michael@0: Indention of other headers michael@0: is derived from that. michael@0: XXX center h1? */ michael@0: static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, michael@0: indent h(x+1) this many michael@0: columns more than h(x) */ michael@0: static const int32_t kIndentSizeList = kTabSize; michael@0: // Indention of non-first lines of ul and ol michael@0: static const int32_t kIndentSizeDD = kTabSize; // Indention of

michael@0: static const char16_t kNBSP = 160; michael@0: static const char16_t kSPACE = ' '; michael@0: michael@0: static int32_t HeaderLevel(nsIAtom* aTag); michael@0: static int32_t GetUnicharWidth(char16_t ucs); michael@0: static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); michael@0: michael@0: // Someday may want to make this non-const: michael@0: static const uint32_t TagStackSize = 500; michael@0: static const uint32_t OLStackSize = 100; michael@0: michael@0: nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) michael@0: { michael@0: nsPlainTextSerializer* it = new nsPlainTextSerializer(); michael@0: if (!it) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: return CallQueryInterface(it, aSerializer); michael@0: } michael@0: michael@0: nsPlainTextSerializer::nsPlainTextSerializer() michael@0: : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" michael@0: { michael@0: michael@0: mOutputString = nullptr; michael@0: mHeadLevel = 0; michael@0: mAtFirstColumn = true; michael@0: mIndent = 0; michael@0: mCiteQuoteLevel = 0; michael@0: mStructs = true; // will be read from prefs later michael@0: mHeaderStrategy = 1 /*indent increasingly*/; // ditto michael@0: mDontWrapAnyQuotes = false; // ditto michael@0: mHasWrittenCiteBlockquote = false; michael@0: mSpanLevel = 0; michael@0: for (int32_t i = 0; i <= 6; i++) { michael@0: mHeaderCounter[i] = 0; michael@0: } michael@0: michael@0: // Line breaker michael@0: mWrapColumn = 72; // XXX magic number, we expect someone to reset this michael@0: mCurrentLineWidth = 0; michael@0: michael@0: // Flow michael@0: mEmptyLines = 1; // The start of the document is an "empty line" in itself, michael@0: mInWhitespace = false; michael@0: mPreFormatted = false; michael@0: mStartedOutput = false; michael@0: michael@0: // initialize the tag stack to zero: michael@0: // The stack only ever contains pointers to static atoms, so they don't michael@0: // need refcounting. michael@0: mTagStack = new nsIAtom*[TagStackSize]; michael@0: mTagStackIndex = 0; michael@0: mIgnoreAboveIndex = (uint32_t)kNotFound; michael@0: michael@0: // initialize the OL stack, where numbers for ordered lists are kept michael@0: mOLStack = new int32_t[OLStackSize]; michael@0: mOLStackIndex = 0; michael@0: michael@0: mULCount = 0; michael@0: michael@0: mIgnoredChildNodeLevel = 0; michael@0: } michael@0: michael@0: nsPlainTextSerializer::~nsPlainTextSerializer() michael@0: { michael@0: delete[] mTagStack; michael@0: delete[] mOLStack; michael@0: NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(nsPlainTextSerializer, michael@0: nsIContentSerializer) michael@0: michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, michael@0: const char* aCharSet, bool aIsCopying, michael@0: bool aIsWholeDocument) michael@0: { michael@0: #ifdef DEBUG michael@0: // Check if the major control flags are set correctly. michael@0: if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { michael@0: NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, michael@0: "If you want format=flowed, you must combine it with " michael@0: "nsIDocumentEncoder::OutputFormatted"); michael@0: } michael@0: michael@0: if (aFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), michael@0: "Can't do formatted and preformatted output at the same time!"); michael@0: } michael@0: #endif michael@0: michael@0: mFlags = aFlags; michael@0: mWrapColumn = aWrapColumn; michael@0: michael@0: // Only create a linebreaker if we will handle wrapping. michael@0: if (MayWrap()) { michael@0: mLineBreaker = nsContentUtils::LineBreaker(); michael@0: } michael@0: michael@0: // Set the line break character: michael@0: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) michael@0: && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { michael@0: // Windows michael@0: mLineBreak.AssignLiteral("\r\n"); michael@0: } michael@0: else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { michael@0: // Mac michael@0: mLineBreak.Assign(char16_t('\r')); michael@0: } michael@0: else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { michael@0: // Unix/DOM michael@0: mLineBreak.Assign(char16_t('\n')); michael@0: } michael@0: else { michael@0: // Platform/default michael@0: mLineBreak.AssignLiteral(NS_LINEBREAK); michael@0: } michael@0: michael@0: mLineBreakDue = false; michael@0: mFloatingLines = -1; michael@0: michael@0: if (mFlags & nsIDocumentEncoder::OutputFormatted) { michael@0: // Get some prefs that controls how we do formatted output michael@0: mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); michael@0: michael@0: mHeaderStrategy = michael@0: Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); michael@0: michael@0: // DontWrapAnyQuotes is set according to whether plaintext mail michael@0: // is wrapping to window width -- see bug 134439. michael@0: // We'll only want this if we're wrapping and formatted. michael@0: if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { michael@0: mDontWrapAnyQuotes = michael@0: Preferences::GetBool("mail.compose.wrap_to_window_width", michael@0: mDontWrapAnyQuotes); michael@0: } michael@0: } michael@0: michael@0: // XXX We should let the caller pass this in. michael@0: if (Preferences::GetBool("browser.frames.enabled")) { michael@0: mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; michael@0: } michael@0: else { michael@0: mFlags |= nsIDocumentEncoder::OutputNoFramesContent; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::GetLastBool(const nsTArray& aStack) michael@0: { michael@0: uint32_t size = aStack.Length(); michael@0: if (size == 0) { michael@0: return false; michael@0: } michael@0: return aStack.ElementAt(size-1); michael@0: } michael@0: michael@0: void michael@0: nsPlainTextSerializer::SetLastBool(nsTArray& aStack, bool aValue) michael@0: { michael@0: uint32_t size = aStack.Length(); michael@0: if (size > 0) { michael@0: aStack.ElementAt(size-1) = aValue; michael@0: } michael@0: else { michael@0: NS_ERROR("There is no \"Last\" value"); michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsPlainTextSerializer::PushBool(nsTArray& aStack, bool aValue) michael@0: { michael@0: aStack.AppendElement(bool(aValue)); michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::PopBool(nsTArray& aStack) michael@0: { michael@0: bool returnValue = false; michael@0: uint32_t size = aStack.Length(); michael@0: if (size > 0) { michael@0: returnValue = aStack.ElementAt(size-1); michael@0: aStack.RemoveElementAt(size-1); michael@0: } michael@0: return returnValue; michael@0: } michael@0: michael@0: bool michael@0: nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) michael@0: { michael@0: // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, michael@0: // non-textual container element should be serialized as placeholder michael@0: // character and its child nodes should be ignored. See bug 895239. michael@0: if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { michael@0: return false; michael@0: } michael@0: michael@0: return michael@0: (aTag == nsGkAtoms::audio) || michael@0: (aTag == nsGkAtoms::canvas) || michael@0: (aTag == nsGkAtoms::iframe) || michael@0: (aTag == nsGkAtoms::meter) || michael@0: (aTag == nsGkAtoms::progress) || michael@0: (aTag == nsGkAtoms::object) || michael@0: (aTag == nsGkAtoms::svg) || michael@0: (aTag == nsGkAtoms::video); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendText(nsIContent* aText, michael@0: int32_t aStartOffset, michael@0: int32_t aEndOffset, michael@0: nsAString& aStr) michael@0: { michael@0: if (mIgnoreAboveIndex != (uint32_t)kNotFound) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); michael@0: if ( aStartOffset < 0 ) michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: NS_ENSURE_ARG(aText); michael@0: michael@0: nsresult rv = NS_OK; michael@0: michael@0: nsIContent* content = aText; michael@0: const nsTextFragment* frag; michael@0: if (!content || !(frag = content->GetText())) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: int32_t fragLength = frag->GetLength(); michael@0: int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); michael@0: NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); michael@0: michael@0: int32_t length = endoffset - aStartOffset; michael@0: if (length <= 0) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsAutoString textstr; michael@0: if (frag->Is2b()) { michael@0: textstr.Assign(frag->Get2b() + aStartOffset, length); michael@0: } michael@0: else { michael@0: // AssignASCII is for 7-bit character only, so don't use it michael@0: const char *data = frag->Get1b(); michael@0: CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); michael@0: } michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: // We have to split the string across newlines michael@0: // to match parser behavior michael@0: int32_t start = 0; michael@0: int32_t offset = textstr.FindCharInSet("\n\r"); michael@0: while (offset != kNotFound) { michael@0: michael@0: if (offset>start) { michael@0: // Pass in the line michael@0: DoAddText(false, michael@0: Substring(textstr, start, offset-start)); michael@0: } michael@0: michael@0: // Pass in a newline michael@0: DoAddText(true, mLineBreak); michael@0: michael@0: start = offset+1; michael@0: offset = textstr.FindCharInSet("\n\r", start); michael@0: } michael@0: michael@0: // Consume the last bit of the string if there's any left michael@0: if (start < length) { michael@0: if (start) { michael@0: DoAddText(false, Substring(textstr, start, length - start)); michael@0: } michael@0: else { michael@0: DoAddText(false, textstr); michael@0: } michael@0: } michael@0: michael@0: mOutputString = nullptr; michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, michael@0: int32_t aStartOffset, michael@0: int32_t aEndOffset, michael@0: nsAString& aStr) michael@0: { michael@0: return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendElementStart(Element* aElement, michael@0: Element* aOriginalElement, michael@0: nsAString& aStr) michael@0: { michael@0: NS_ENSURE_ARG(aElement); michael@0: michael@0: mElement = aElement; michael@0: michael@0: nsresult rv; michael@0: nsIAtom* id = GetIdForContent(mElement); michael@0: michael@0: bool isContainer = !nsContentUtils::IsHTMLVoid(id); michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: if (isContainer) { michael@0: rv = DoOpenContainer(id); michael@0: } michael@0: else { michael@0: rv = DoAddLeaf(id); michael@0: } michael@0: michael@0: mElement = nullptr; michael@0: mOutputString = nullptr; michael@0: michael@0: if (id == nsGkAtoms::head) { michael@0: ++mHeadLevel; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendElementEnd(Element* aElement, michael@0: nsAString& aStr) michael@0: { michael@0: NS_ENSURE_ARG(aElement); michael@0: michael@0: mElement = aElement; michael@0: michael@0: nsresult rv; michael@0: nsIAtom* id = GetIdForContent(mElement); michael@0: michael@0: bool isContainer = !nsContentUtils::IsHTMLVoid(id); michael@0: michael@0: mOutputString = &aStr; michael@0: michael@0: rv = NS_OK; michael@0: if (isContainer) { michael@0: rv = DoCloseContainer(id); michael@0: } michael@0: michael@0: mElement = nullptr; michael@0: mOutputString = nullptr; michael@0: michael@0: if (id == nsGkAtoms::head) { michael@0: NS_ASSERTION(mHeadLevel != 0, michael@0: "mHeadLevel being decremented below 0"); michael@0: --mHeadLevel; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::Flush(nsAString& aStr) michael@0: { michael@0: mOutputString = &aStr; michael@0: FlushLine(); michael@0: mOutputString = nullptr; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, michael@0: nsAString& aStr) michael@0: { michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) michael@0: { michael@0: // Check if we need output current node as placeholder character and ignore michael@0: // child nodes. michael@0: if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { michael@0: if (mIgnoredChildNodeLevel == 0) { michael@0: // Serialize current node as placeholder character michael@0: Write(NS_LITERAL_STRING("\xFFFC")); michael@0: } michael@0: // Ignore child nodes. michael@0: mIgnoredChildNodeLevel++; michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mFlags & nsIDocumentEncoder::OutputRaw) { michael@0: // Raw means raw. Don't even think about doing anything fancy michael@0: // here like indenting, adding line breaks or any other michael@0: // characters such as list item bullets, quote characters michael@0: // around , etc. I mean it! Don't make me smack you! michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mTagStackIndex < TagStackSize) { michael@0: mTagStack[mTagStackIndex++] = aTag; michael@0: } michael@0: michael@0: if (mIgnoreAboveIndex != (uint32_t)kNotFound) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Reset this so that

doesn't affect the whitespace michael@0: // above random

s below it.
michael@0:   mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
michael@0:                               aTag == nsGkAtoms::pre;
michael@0: 
michael@0:   bool isInCiteBlockquote = false;
michael@0: 
michael@0:   // XXX special-case  so that we don't add additional
michael@0:   // newlines before the text.
michael@0:   if (aTag == nsGkAtoms::blockquote) {
michael@0:     nsAutoString value;
michael@0:     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
michael@0:     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
michael@0:   }
michael@0: 
michael@0:   if (mLineBreakDue && !isInCiteBlockquote)
michael@0:     EnsureVerticalSpace(mFloatingLines);
michael@0: 
michael@0:   // Check if this tag's content that should not be output
michael@0:   if ((aTag == nsGkAtoms::noscript &&
michael@0:        !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
michael@0:       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
michael@0:        !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
michael@0:     // Ignore everything that follows the current tag in 
michael@0:     // question until a matching end tag is encountered.
michael@0:     mIgnoreAboveIndex = mTagStackIndex - 1;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   if (aTag == nsGkAtoms::body) {
michael@0:     // Try to figure out here whether we have a
michael@0:     // preformatted style attribute.
michael@0:     //
michael@0:     // Trigger on the presence of a "pre-wrap" in the
michael@0:     // style attribute. That's a very simplistic way to do
michael@0:     // it, but better than nothing.
michael@0:     // Also set mWrapColumn to the value given there
michael@0:     // (which arguably we should only do if told to do so).
michael@0:     nsAutoString style;
michael@0:     int32_t whitespace;
michael@0:     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
michael@0:        (kNotFound != (whitespace = style.Find("white-space:")))) {
michael@0: 
michael@0:       if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
michael@0: #ifdef DEBUG_preformatted
michael@0:         printf("Set mPreFormatted based on style pre-wrap\n");
michael@0: #endif
michael@0:         mPreFormatted = true;
michael@0:         int32_t widthOffset = style.Find("width:");
michael@0:         if (widthOffset >= 0) {
michael@0:           // We have to search for the ch before the semicolon,
michael@0:           // not for the semicolon itself, because nsString::ToInteger()
michael@0:           // considers 'c' to be a valid numeric char (even if radix=10)
michael@0:           // but then gets confused if it sees it next to the number
michael@0:           // when the radix specified was 10, and returns an error code.
michael@0:           int32_t semiOffset = style.Find("ch", false, widthOffset+6);
michael@0:           int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
michael@0:                             : style.Length() - widthOffset);
michael@0:           nsAutoString widthstr;
michael@0:           style.Mid(widthstr, widthOffset+6, length);
michael@0:           nsresult err;
michael@0:           int32_t col = widthstr.ToInteger(&err);
michael@0: 
michael@0:           if (NS_SUCCEEDED(err)) {
michael@0:             mWrapColumn = (uint32_t)col;
michael@0: #ifdef DEBUG_preformatted
michael@0:             printf("Set wrap column to %d based on style\n", mWrapColumn);
michael@0: #endif
michael@0:           }
michael@0:         }
michael@0:       }
michael@0:       else if (kNotFound != style.Find("pre", true, whitespace)) {
michael@0: #ifdef DEBUG_preformatted
michael@0:         printf("Set mPreFormatted based on style pre\n");
michael@0: #endif
michael@0:         mPreFormatted = true;
michael@0:         mWrapColumn = 0;
michael@0:       }
michael@0:     } 
michael@0:     else {
michael@0:       /* See comment at end of function. */
michael@0:       mInWhitespace = true;
michael@0:       mPreFormatted = false;
michael@0:     }
michael@0: 
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // Keep this in sync with DoCloseContainer!
michael@0:   if (!DoOutput()) {
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   if (aTag == nsGkAtoms::p)
michael@0:     EnsureVerticalSpace(1);
michael@0:   else if (aTag == nsGkAtoms::pre) {
michael@0:     if (GetLastBool(mIsInCiteBlockquote))
michael@0:       EnsureVerticalSpace(0);
michael@0:     else if (mHasWrittenCiteBlockquote) {
michael@0:       EnsureVerticalSpace(0);
michael@0:       mHasWrittenCiteBlockquote = false;
michael@0:     }
michael@0:     else
michael@0:       EnsureVerticalSpace(1);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::tr) {
michael@0:     PushBool(mHasWrittenCellsForRow, false);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
michael@0:     // We must make sure that the content of two table cells get a
michael@0:     // space between them.
michael@0: 
michael@0:     // To make the separation between cells most obvious and
michael@0:     // importable, we use a TAB.
michael@0:     if (GetLastBool(mHasWrittenCellsForRow)) {
michael@0:       // Bypass |Write| so that the TAB isn't compressed away.
michael@0:       AddToLine(MOZ_UTF16("\t"), 1);
michael@0:       mInWhitespace = true;
michael@0:     }
michael@0:     else if (mHasWrittenCellsForRow.IsEmpty()) {
michael@0:       // We don't always see a  (nor a ) before the  if we're
michael@0:       // copying part of a table
michael@0:       PushBool(mHasWrittenCellsForRow, true); // will never be popped
michael@0:     }
michael@0:     else {
michael@0:       SetLastBool(mHasWrittenCellsForRow, true);
michael@0:     }
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::ul) {
michael@0:     // Indent here to support nested lists, which aren't included in li :-(
michael@0:     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
michael@0:          // Must end the current line before we change indention
michael@0:     mIndent += kIndentSizeList;
michael@0:     mULCount++;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::ol) {
michael@0:     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
michael@0:     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0:       // Must end the current line before we change indention
michael@0:       if (mOLStackIndex < OLStackSize) {
michael@0:         nsAutoString startAttr;
michael@0:         int32_t startVal = 1;
michael@0:         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
michael@0:           nsresult rv = NS_OK;
michael@0:           startVal = startAttr.ToInteger(&rv);
michael@0:           if (NS_FAILED(rv))
michael@0:             startVal = 1;
michael@0:         }
michael@0:         mOLStack[mOLStackIndex++] = startVal;
michael@0:       }
michael@0:     } else {
michael@0:       mOLStackIndex++;
michael@0:     }
michael@0:     mIndent += kIndentSizeList;  // see ul
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::li &&
michael@0:            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0:     if (mTagStackIndex > 1 && IsInOL()) {
michael@0:       if (mOLStackIndex > 0) {
michael@0:         nsAutoString valueAttr;
michael@0:         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
michael@0:           nsresult rv = NS_OK;
michael@0:           int32_t valueAttrVal = valueAttr.ToInteger(&rv);
michael@0:           if (NS_SUCCEEDED(rv))
michael@0:             mOLStack[mOLStackIndex-1] = valueAttrVal;
michael@0:         }
michael@0:         // This is what nsBulletFrame does for OLs:
michael@0:         mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
michael@0:       }
michael@0:       else {
michael@0:         mInIndentString.Append(char16_t('#'));
michael@0:       }
michael@0: 
michael@0:       mInIndentString.Append(char16_t('.'));
michael@0: 
michael@0:     }
michael@0:     else {
michael@0:       static char bulletCharArray[] = "*o+#";
michael@0:       uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
michael@0:       char bulletChar = bulletCharArray[index % 4];
michael@0:       mInIndentString.Append(char16_t(bulletChar));
michael@0:     }
michael@0: 
michael@0:     mInIndentString.Append(char16_t(' '));
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::dl) {
michael@0:     EnsureVerticalSpace(1);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::dt) {
michael@0:     EnsureVerticalSpace(0);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::dd) {
michael@0:     EnsureVerticalSpace(0);
michael@0:     mIndent += kIndentSizeDD;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::span) {
michael@0:     ++mSpanLevel;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::blockquote) {
michael@0:     // Push
michael@0:     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
michael@0:     if (isInCiteBlockquote) {
michael@0:       EnsureVerticalSpace(0);
michael@0:       mCiteQuoteLevel++;
michael@0:     }
michael@0:     else {
michael@0:       EnsureVerticalSpace(1);
michael@0:       mIndent += kTabSize; // Check for some maximum value?
michael@0:     }
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::q) {
michael@0:     Write(NS_LITERAL_STRING("\""));
michael@0:   }
michael@0: 
michael@0:   // Else make sure we'll separate block level tags,
michael@0:   // even if we're about to leave, before doing any other formatting.
michael@0:   else if (nsContentUtils::IsHTMLBlock(aTag)) {
michael@0:     EnsureVerticalSpace(0);
michael@0:   }
michael@0: 
michael@0:   //////////////////////////////////////////////////////////////
michael@0:   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0:     return NS_OK;
michael@0:   }
michael@0:   //////////////////////////////////////////////////////////////
michael@0:   // The rest of this routine is formatted output stuff,
michael@0:   // which we should skip if we're not formatted:
michael@0:   //////////////////////////////////////////////////////////////
michael@0: 
michael@0:   // Push on stack
michael@0:   bool currentNodeIsConverted = IsCurrentNodeConverted();
michael@0: 
michael@0:   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
michael@0:       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
michael@0:       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
michael@0:   {
michael@0:     EnsureVerticalSpace(2);
michael@0:     if (mHeaderStrategy == 2) {  // numbered
michael@0:       mIndent += kIndentSizeHeaders;
michael@0:       // Caching
michael@0:       int32_t level = HeaderLevel(aTag);
michael@0:       // Increase counter for current level
michael@0:       mHeaderCounter[level]++;
michael@0:       // Reset all lower levels
michael@0:       int32_t i;
michael@0: 
michael@0:       for (i = level + 1; i <= 6; i++) {
michael@0:         mHeaderCounter[i] = 0;
michael@0:       }
michael@0: 
michael@0:       // Construct numbers
michael@0:       nsAutoString leadup;
michael@0:       for (i = 1; i <= level; i++) {
michael@0:         leadup.AppendInt(mHeaderCounter[i]);
michael@0:         leadup.Append(char16_t('.'));
michael@0:       }
michael@0:       leadup.Append(char16_t(' '));
michael@0:       Write(leadup);
michael@0:     }
michael@0:     else if (mHeaderStrategy == 1) { // indent increasingly
michael@0:       mIndent += kIndentSizeHeaders;
michael@0:       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
michael@0:            // for h(x), run x-1 times
michael@0:         mIndent += kIndentIncrementHeaders;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
michael@0:     nsAutoString url;
michael@0:     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
michael@0:         && !url.IsEmpty()) {
michael@0:       mURL = url;
michael@0:     }
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("^"));
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("_"));
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("|"));
michael@0:   }
michael@0:   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
michael@0:            && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("*"));
michael@0:   }
michael@0:   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
michael@0:            && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("/"));
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("_"));
michael@0:   }
michael@0: 
michael@0:   /* Container elements are always block elements, so we shouldn't
michael@0:      output any whitespace immediately after the container tag even if
michael@0:      there's extra whitespace there because the HTML is pretty-printed
michael@0:      or something. To ensure that happens, tell the serializer we're
michael@0:      already in whitespace so it won't output more. */
michael@0:   mInWhitespace = true;
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
michael@0: {
michael@0:   if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
michael@0:     mIgnoredChildNodeLevel--;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   if (mFlags & nsIDocumentEncoder::OutputRaw) {
michael@0:     // Raw means raw.  Don't even think about doing anything fancy
michael@0:     // here like indenting, adding line breaks or any other
michael@0:     // characters such as list item bullets, quote characters
michael@0:     // around , etc.  I mean it!  Don't make me smack you!
michael@0: 
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   if (mTagStackIndex > 0) {
michael@0:     --mTagStackIndex;
michael@0:   }
michael@0: 
michael@0:   if (mTagStackIndex >= mIgnoreAboveIndex) {
michael@0:     if (mTagStackIndex == mIgnoreAboveIndex) {
michael@0:       // We're dealing with the close tag whose matching
michael@0:       // open tag had set the mIgnoreAboveIndex value.
michael@0:       // Reset mIgnoreAboveIndex before discarding this tag.
michael@0:       mIgnoreAboveIndex = (uint32_t)kNotFound;
michael@0:     }
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // End current line if we're ending a block level tag
michael@0:   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
michael@0:     // We want the output to end with a new line,
michael@0:     // but in preformatted areas like text fields,
michael@0:     // we can't emit newlines that weren't there.
michael@0:     // So add the newline only in the case of formatted output.
michael@0:     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0:       EnsureVerticalSpace(0);
michael@0:     }
michael@0:     else {
michael@0:       FlushLine();
michael@0:     }
michael@0:     // We won't want to do anything with these in formatted mode either,
michael@0:     // so just return now:
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // Keep this in sync with DoOpenContainer!
michael@0:   if (!DoOutput()) {
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   if (aTag == nsGkAtoms::tr) {
michael@0:     PopBool(mHasWrittenCellsForRow);
michael@0:     // Should always end a line, but get no more whitespace
michael@0:     if (mFloatingLines < 0)
michael@0:       mFloatingLines = 0;
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (((aTag == nsGkAtoms::li) ||
michael@0:             (aTag == nsGkAtoms::dt)) &&
michael@0:            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0:     // Items that should always end a line, but get no more whitespace
michael@0:     if (mFloatingLines < 0)
michael@0:       mFloatingLines = 0;
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::pre) {
michael@0:     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::ul) {
michael@0:     FlushLine();
michael@0:     mIndent -= kIndentSizeList;
michael@0:     if (--mULCount + mOLStackIndex == 0) {
michael@0:       mFloatingLines = 1;
michael@0:       mLineBreakDue = true;
michael@0:     }
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::ol) {
michael@0:     FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
michael@0:     mIndent -= kIndentSizeList;
michael@0:     NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
michael@0:     mOLStackIndex--;
michael@0:     if (mULCount + mOLStackIndex == 0) {
michael@0:       mFloatingLines = 1;
michael@0:       mLineBreakDue = true;
michael@0:     }
michael@0:   }  
michael@0:   else if (aTag == nsGkAtoms::dl) {
michael@0:     mFloatingLines = 1;
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::dd) {
michael@0:     FlushLine();
michael@0:     mIndent -= kIndentSizeDD;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::span) {
michael@0:     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
michael@0:     --mSpanLevel;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::div) {
michael@0:     if (mFloatingLines < 0)
michael@0:       mFloatingLines = 0;
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::blockquote) {
michael@0:     FlushLine();    // Is this needed?
michael@0: 
michael@0:     // Pop
michael@0:     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
michael@0: 
michael@0:     if (isInCiteBlockquote) {
michael@0:       NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
michael@0:       mCiteQuoteLevel--;
michael@0:       mFloatingLines = 0;
michael@0:       mHasWrittenCiteBlockquote = true;
michael@0:     }
michael@0:     else {
michael@0:       mIndent -= kTabSize;
michael@0:       mFloatingLines = 1;
michael@0:     }
michael@0:     mLineBreakDue = true;
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::q) {
michael@0:     Write(NS_LITERAL_STRING("\""));
michael@0:   }
michael@0:   else if (nsContentUtils::IsHTMLBlock(aTag)
michael@0:            && aTag != nsGkAtoms::script) {
michael@0:     // All other blocks get 1 vertical space after them
michael@0:     // in formatted mode, otherwise 0.
michael@0:     // This is hard. Sometimes 0 is a better number, but
michael@0:     // how to know?
michael@0:     if (mFlags & nsIDocumentEncoder::OutputFormatted)
michael@0:       EnsureVerticalSpace(1);
michael@0:     else {
michael@0:       if (mFloatingLines < 0)
michael@0:         mFloatingLines = 0;
michael@0:       mLineBreakDue = true;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   //////////////////////////////////////////////////////////////
michael@0:   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0:     return NS_OK;
michael@0:   }
michael@0:   //////////////////////////////////////////////////////////////
michael@0:   // The rest of this routine is formatted output stuff,
michael@0:   // which we should skip if we're not formatted:
michael@0:   //////////////////////////////////////////////////////////////
michael@0: 
michael@0:   // Pop the currentConverted stack
michael@0:   bool currentNodeIsConverted = IsCurrentNodeConverted();
michael@0:   
michael@0:   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
michael@0:       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
michael@0:       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
michael@0:     
michael@0:     if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
michael@0:       mIndent -= kIndentSizeHeaders;
michael@0:     }
michael@0:     if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
michael@0:       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
michael@0:            // for h(x), run x-1 times
michael@0:         mIndent -= kIndentIncrementHeaders;
michael@0:       }
michael@0:     }
michael@0:     EnsureVerticalSpace(1);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
michael@0:     nsAutoString temp; 
michael@0:     temp.AssignLiteral(" <");
michael@0:     temp += mURL;
michael@0:     temp.Append(char16_t('>'));
michael@0:     Write(temp);
michael@0:     mURL.Truncate();
michael@0:   }
michael@0:   else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
michael@0:            && mStructs && !currentNodeIsConverted) {
michael@0:     Write(kSpace);
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("|"));
michael@0:   }
michael@0:   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
michael@0:            && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("*"));
michael@0:   }
michael@0:   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
michael@0:            && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("/"));
michael@0:   }
michael@0:   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
michael@0:     Write(NS_LITERAL_STRING("_"));
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsPlainTextSerializer::MustSuppressLeaf()
michael@0: {
michael@0:   if (mIgnoredChildNodeLevel > 0) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   if ((mTagStackIndex > 1 &&
michael@0:        mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
michael@0:       (mTagStackIndex > 0 &&
michael@0:         mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
michael@0:     // Don't output the contents of SELECT elements;
michael@0:     // Might be nice, eventually, to output just the selected element.
michael@0:     // Read more in bug 31994.
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   if (mTagStackIndex > 0 &&
michael@0:       (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
michael@0:        mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
michael@0:     // Don't output the contents of