diff -r 000000000000 -r 6474c204b198 content/base/src/nsPlainTextSerializer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/content/base/src/nsPlainTextSerializer.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,1963 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * nsIContentSerializer implementation that can be used with an + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way + * (eg for copy/paste as plaintext). + */ + +#include "nsPlainTextSerializer.h" +#include "nsLWBrkCIID.h" +#include "nsIServiceManager.h" +#include "nsGkAtoms.h" +#include "nsNameSpaceManager.h" +#include "nsTextFragment.h" +#include "nsContentUtils.h" +#include "nsReadableUtils.h" +#include "nsUnicharUtils.h" +#include "nsCRT.h" +#include "mozilla/dom/Element.h" +#include "mozilla/Preferences.h" + +using namespace mozilla; +using namespace mozilla::dom; + +#define PREF_STRUCTS "converter.html2txt.structs" +#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" + +static const int32_t kTabSize=4; +static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if + mHeaderStrategy = 1 or = 2. + Indention of other headers + is derived from that. + XXX center h1? */ +static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, + indent h(x+1) this many + columns more than h(x) */ +static const int32_t kIndentSizeList = kTabSize; + // Indention of non-first lines of ul and ol +static const int32_t kIndentSizeDD = kTabSize; // Indention of

+static const char16_t kNBSP = 160; +static const char16_t kSPACE = ' '; + +static int32_t HeaderLevel(nsIAtom* aTag); +static int32_t GetUnicharWidth(char16_t ucs); +static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); + +// Someday may want to make this non-const: +static const uint32_t TagStackSize = 500; +static const uint32_t OLStackSize = 100; + +nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) +{ + nsPlainTextSerializer* it = new nsPlainTextSerializer(); + if (!it) { + return NS_ERROR_OUT_OF_MEMORY; + } + + return CallQueryInterface(it, aSerializer); +} + +nsPlainTextSerializer::nsPlainTextSerializer() + : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" +{ + + mOutputString = nullptr; + mHeadLevel = 0; + mAtFirstColumn = true; + mIndent = 0; + mCiteQuoteLevel = 0; + mStructs = true; // will be read from prefs later + mHeaderStrategy = 1 /*indent increasingly*/; // ditto + mDontWrapAnyQuotes = false; // ditto + mHasWrittenCiteBlockquote = false; + mSpanLevel = 0; + for (int32_t i = 0; i <= 6; i++) { + mHeaderCounter[i] = 0; + } + + // Line breaker + mWrapColumn = 72; // XXX magic number, we expect someone to reset this + mCurrentLineWidth = 0; + + // Flow + mEmptyLines = 1; // The start of the document is an "empty line" in itself, + mInWhitespace = false; + mPreFormatted = false; + mStartedOutput = false; + + // initialize the tag stack to zero: + // The stack only ever contains pointers to static atoms, so they don't + // need refcounting. + mTagStack = new nsIAtom*[TagStackSize]; + mTagStackIndex = 0; + mIgnoreAboveIndex = (uint32_t)kNotFound; + + // initialize the OL stack, where numbers for ordered lists are kept + mOLStack = new int32_t[OLStackSize]; + mOLStackIndex = 0; + + mULCount = 0; + + mIgnoredChildNodeLevel = 0; +} + +nsPlainTextSerializer::~nsPlainTextSerializer() +{ + delete[] mTagStack; + delete[] mOLStack; + NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); +} + +NS_IMPL_ISUPPORTS(nsPlainTextSerializer, + nsIContentSerializer) + + +NS_IMETHODIMP +nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, + const char* aCharSet, bool aIsCopying, + bool aIsWholeDocument) +{ +#ifdef DEBUG + // Check if the major control flags are set correctly. + if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { + NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, + "If you want format=flowed, you must combine it with " + "nsIDocumentEncoder::OutputFormatted"); + } + + if (aFlags & nsIDocumentEncoder::OutputFormatted) { + NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), + "Can't do formatted and preformatted output at the same time!"); + } +#endif + + mFlags = aFlags; + mWrapColumn = aWrapColumn; + + // Only create a linebreaker if we will handle wrapping. + if (MayWrap()) { + mLineBreaker = nsContentUtils::LineBreaker(); + } + + // Set the line break character: + if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) + && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { + // Windows + mLineBreak.AssignLiteral("\r\n"); + } + else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { + // Mac + mLineBreak.Assign(char16_t('\r')); + } + else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { + // Unix/DOM + mLineBreak.Assign(char16_t('\n')); + } + else { + // Platform/default + mLineBreak.AssignLiteral(NS_LINEBREAK); + } + + mLineBreakDue = false; + mFloatingLines = -1; + + if (mFlags & nsIDocumentEncoder::OutputFormatted) { + // Get some prefs that controls how we do formatted output + mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); + + mHeaderStrategy = + Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); + + // DontWrapAnyQuotes is set according to whether plaintext mail + // is wrapping to window width -- see bug 134439. + // We'll only want this if we're wrapping and formatted. + if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { + mDontWrapAnyQuotes = + Preferences::GetBool("mail.compose.wrap_to_window_width", + mDontWrapAnyQuotes); + } + } + + // XXX We should let the caller pass this in. + if (Preferences::GetBool("browser.frames.enabled")) { + mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; + } + else { + mFlags |= nsIDocumentEncoder::OutputNoFramesContent; + } + + return NS_OK; +} + +bool +nsPlainTextSerializer::GetLastBool(const nsTArray& aStack) +{ + uint32_t size = aStack.Length(); + if (size == 0) { + return false; + } + return aStack.ElementAt(size-1); +} + +void +nsPlainTextSerializer::SetLastBool(nsTArray& aStack, bool aValue) +{ + uint32_t size = aStack.Length(); + if (size > 0) { + aStack.ElementAt(size-1) = aValue; + } + else { + NS_ERROR("There is no \"Last\" value"); + } +} + +void +nsPlainTextSerializer::PushBool(nsTArray& aStack, bool aValue) +{ + aStack.AppendElement(bool(aValue)); +} + +bool +nsPlainTextSerializer::PopBool(nsTArray& aStack) +{ + bool returnValue = false; + uint32_t size = aStack.Length(); + if (size > 0) { + returnValue = aStack.ElementAt(size-1); + aStack.RemoveElementAt(size-1); + } + return returnValue; +} + +bool +nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) +{ + // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, + // non-textual container element should be serialized as placeholder + // character and its child nodes should be ignored. See bug 895239. + if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { + return false; + } + + return + (aTag == nsGkAtoms::audio) || + (aTag == nsGkAtoms::canvas) || + (aTag == nsGkAtoms::iframe) || + (aTag == nsGkAtoms::meter) || + (aTag == nsGkAtoms::progress) || + (aTag == nsGkAtoms::object) || + (aTag == nsGkAtoms::svg) || + (aTag == nsGkAtoms::video); +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendText(nsIContent* aText, + int32_t aStartOffset, + int32_t aEndOffset, + nsAString& aStr) +{ + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { + return NS_OK; + } + + NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); + if ( aStartOffset < 0 ) + return NS_ERROR_INVALID_ARG; + + NS_ENSURE_ARG(aText); + + nsresult rv = NS_OK; + + nsIContent* content = aText; + const nsTextFragment* frag; + if (!content || !(frag = content->GetText())) { + return NS_ERROR_FAILURE; + } + + int32_t fragLength = frag->GetLength(); + int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); + NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); + + int32_t length = endoffset - aStartOffset; + if (length <= 0) { + return NS_OK; + } + + nsAutoString textstr; + if (frag->Is2b()) { + textstr.Assign(frag->Get2b() + aStartOffset, length); + } + else { + // AssignASCII is for 7-bit character only, so don't use it + const char *data = frag->Get1b(); + CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); + } + + mOutputString = &aStr; + + // We have to split the string across newlines + // to match parser behavior + int32_t start = 0; + int32_t offset = textstr.FindCharInSet("\n\r"); + while (offset != kNotFound) { + + if (offset>start) { + // Pass in the line + DoAddText(false, + Substring(textstr, start, offset-start)); + } + + // Pass in a newline + DoAddText(true, mLineBreak); + + start = offset+1; + offset = textstr.FindCharInSet("\n\r", start); + } + + // Consume the last bit of the string if there's any left + if (start < length) { + if (start) { + DoAddText(false, Substring(textstr, start, length - start)); + } + else { + DoAddText(false, textstr); + } + } + + mOutputString = nullptr; + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, + int32_t aStartOffset, + int32_t aEndOffset, + nsAString& aStr) +{ + return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendElementStart(Element* aElement, + Element* aOriginalElement, + nsAString& aStr) +{ + NS_ENSURE_ARG(aElement); + + mElement = aElement; + + nsresult rv; + nsIAtom* id = GetIdForContent(mElement); + + bool isContainer = !nsContentUtils::IsHTMLVoid(id); + + mOutputString = &aStr; + + if (isContainer) { + rv = DoOpenContainer(id); + } + else { + rv = DoAddLeaf(id); + } + + mElement = nullptr; + mOutputString = nullptr; + + if (id == nsGkAtoms::head) { + ++mHeadLevel; + } + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendElementEnd(Element* aElement, + nsAString& aStr) +{ + NS_ENSURE_ARG(aElement); + + mElement = aElement; + + nsresult rv; + nsIAtom* id = GetIdForContent(mElement); + + bool isContainer = !nsContentUtils::IsHTMLVoid(id); + + mOutputString = &aStr; + + rv = NS_OK; + if (isContainer) { + rv = DoCloseContainer(id); + } + + mElement = nullptr; + mOutputString = nullptr; + + if (id == nsGkAtoms::head) { + NS_ASSERTION(mHeadLevel != 0, + "mHeadLevel being decremented below 0"); + --mHeadLevel; + } + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::Flush(nsAString& aStr) +{ + mOutputString = &aStr; + FlushLine(); + mOutputString = nullptr; + return NS_OK; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, + nsAString& aStr) +{ + return NS_OK; +} + +nsresult +nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) +{ + // Check if we need output current node as placeholder character and ignore + // child nodes. + if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { + if (mIgnoredChildNodeLevel == 0) { + // Serialize current node as placeholder character + Write(NS_LITERAL_STRING("\xFFFC")); + } + // Ignore child nodes. + mIgnoredChildNodeLevel++; + return NS_OK; + } + + if (mFlags & nsIDocumentEncoder::OutputRaw) { + // Raw means raw. Don't even think about doing anything fancy + // here like indenting, adding line breaks or any other + // characters such as list item bullets, quote characters + // around , etc. I mean it! Don't make me smack you! + + return NS_OK; + } + + if (mTagStackIndex < TagStackSize) { + mTagStack[mTagStackIndex++] = aTag; + } + + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { + return NS_OK; + } + + // Reset this so that

doesn't affect the whitespace + // above random

s below it.
+  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
+                              aTag == nsGkAtoms::pre;
+
+  bool isInCiteBlockquote = false;
+
+  // XXX special-case  so that we don't add additional
+  // newlines before the text.
+  if (aTag == nsGkAtoms::blockquote) {
+    nsAutoString value;
+    nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
+    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
+  }
+
+  if (mLineBreakDue && !isInCiteBlockquote)
+    EnsureVerticalSpace(mFloatingLines);
+
+  // Check if this tag's content that should not be output
+  if ((aTag == nsGkAtoms::noscript &&
+       !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
+      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
+       !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
+    // Ignore everything that follows the current tag in 
+    // question until a matching end tag is encountered.
+    mIgnoreAboveIndex = mTagStackIndex - 1;
+    return NS_OK;
+  }
+
+  if (aTag == nsGkAtoms::body) {
+    // Try to figure out here whether we have a
+    // preformatted style attribute.
+    //
+    // Trigger on the presence of a "pre-wrap" in the
+    // style attribute. That's a very simplistic way to do
+    // it, but better than nothing.
+    // Also set mWrapColumn to the value given there
+    // (which arguably we should only do if told to do so).
+    nsAutoString style;
+    int32_t whitespace;
+    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
+       (kNotFound != (whitespace = style.Find("white-space:")))) {
+
+      if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
+#ifdef DEBUG_preformatted
+        printf("Set mPreFormatted based on style pre-wrap\n");
+#endif
+        mPreFormatted = true;
+        int32_t widthOffset = style.Find("width:");
+        if (widthOffset >= 0) {
+          // We have to search for the ch before the semicolon,
+          // not for the semicolon itself, because nsString::ToInteger()
+          // considers 'c' to be a valid numeric char (even if radix=10)
+          // but then gets confused if it sees it next to the number
+          // when the radix specified was 10, and returns an error code.
+          int32_t semiOffset = style.Find("ch", false, widthOffset+6);
+          int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
+                            : style.Length() - widthOffset);
+          nsAutoString widthstr;
+          style.Mid(widthstr, widthOffset+6, length);
+          nsresult err;
+          int32_t col = widthstr.ToInteger(&err);
+
+          if (NS_SUCCEEDED(err)) {
+            mWrapColumn = (uint32_t)col;
+#ifdef DEBUG_preformatted
+            printf("Set wrap column to %d based on style\n", mWrapColumn);
+#endif
+          }
+        }
+      }
+      else if (kNotFound != style.Find("pre", true, whitespace)) {
+#ifdef DEBUG_preformatted
+        printf("Set mPreFormatted based on style pre\n");
+#endif
+        mPreFormatted = true;
+        mWrapColumn = 0;
+      }
+    } 
+    else {
+      /* See comment at end of function. */
+      mInWhitespace = true;
+      mPreFormatted = false;
+    }
+
+    return NS_OK;
+  }
+
+  // Keep this in sync with DoCloseContainer!
+  if (!DoOutput()) {
+    return NS_OK;
+  }
+
+  if (aTag == nsGkAtoms::p)
+    EnsureVerticalSpace(1);
+  else if (aTag == nsGkAtoms::pre) {
+    if (GetLastBool(mIsInCiteBlockquote))
+      EnsureVerticalSpace(0);
+    else if (mHasWrittenCiteBlockquote) {
+      EnsureVerticalSpace(0);
+      mHasWrittenCiteBlockquote = false;
+    }
+    else
+      EnsureVerticalSpace(1);
+  }
+  else if (aTag == nsGkAtoms::tr) {
+    PushBool(mHasWrittenCellsForRow, false);
+  }
+  else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
+    // We must make sure that the content of two table cells get a
+    // space between them.
+
+    // To make the separation between cells most obvious and
+    // importable, we use a TAB.
+    if (GetLastBool(mHasWrittenCellsForRow)) {
+      // Bypass |Write| so that the TAB isn't compressed away.
+      AddToLine(MOZ_UTF16("\t"), 1);
+      mInWhitespace = true;
+    }
+    else if (mHasWrittenCellsForRow.IsEmpty()) {
+      // We don't always see a  (nor a ) before the  if we're
+      // copying part of a table
+      PushBool(mHasWrittenCellsForRow, true); // will never be popped
+    }
+    else {
+      SetLastBool(mHasWrittenCellsForRow, true);
+    }
+  }
+  else if (aTag == nsGkAtoms::ul) {
+    // Indent here to support nested lists, which aren't included in li :-(
+    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
+         // Must end the current line before we change indention
+    mIndent += kIndentSizeList;
+    mULCount++;
+  }
+  else if (aTag == nsGkAtoms::ol) {
+    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
+    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
+      // Must end the current line before we change indention
+      if (mOLStackIndex < OLStackSize) {
+        nsAutoString startAttr;
+        int32_t startVal = 1;
+        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
+          nsresult rv = NS_OK;
+          startVal = startAttr.ToInteger(&rv);
+          if (NS_FAILED(rv))
+            startVal = 1;
+        }
+        mOLStack[mOLStackIndex++] = startVal;
+      }
+    } else {
+      mOLStackIndex++;
+    }
+    mIndent += kIndentSizeList;  // see ul
+  }
+  else if (aTag == nsGkAtoms::li &&
+           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
+    if (mTagStackIndex > 1 && IsInOL()) {
+      if (mOLStackIndex > 0) {
+        nsAutoString valueAttr;
+        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
+          nsresult rv = NS_OK;
+          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
+          if (NS_SUCCEEDED(rv))
+            mOLStack[mOLStackIndex-1] = valueAttrVal;
+        }
+        // This is what nsBulletFrame does for OLs:
+        mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
+      }
+      else {
+        mInIndentString.Append(char16_t('#'));
+      }
+
+      mInIndentString.Append(char16_t('.'));
+
+    }
+    else {
+      static char bulletCharArray[] = "*o+#";
+      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
+      char bulletChar = bulletCharArray[index % 4];
+      mInIndentString.Append(char16_t(bulletChar));
+    }
+
+    mInIndentString.Append(char16_t(' '));
+  }
+  else if (aTag == nsGkAtoms::dl) {
+    EnsureVerticalSpace(1);
+  }
+  else if (aTag == nsGkAtoms::dt) {
+    EnsureVerticalSpace(0);
+  }
+  else if (aTag == nsGkAtoms::dd) {
+    EnsureVerticalSpace(0);
+    mIndent += kIndentSizeDD;
+  }
+  else if (aTag == nsGkAtoms::span) {
+    ++mSpanLevel;
+  }
+  else if (aTag == nsGkAtoms::blockquote) {
+    // Push
+    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
+    if (isInCiteBlockquote) {
+      EnsureVerticalSpace(0);
+      mCiteQuoteLevel++;
+    }
+    else {
+      EnsureVerticalSpace(1);
+      mIndent += kTabSize; // Check for some maximum value?
+    }
+  }
+  else if (aTag == nsGkAtoms::q) {
+    Write(NS_LITERAL_STRING("\""));
+  }
+
+  // Else make sure we'll separate block level tags,
+  // even if we're about to leave, before doing any other formatting.
+  else if (nsContentUtils::IsHTMLBlock(aTag)) {
+    EnsureVerticalSpace(0);
+  }
+
+  //////////////////////////////////////////////////////////////
+  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
+    return NS_OK;
+  }
+  //////////////////////////////////////////////////////////////
+  // The rest of this routine is formatted output stuff,
+  // which we should skip if we're not formatted:
+  //////////////////////////////////////////////////////////////
+
+  // Push on stack
+  bool currentNodeIsConverted = IsCurrentNodeConverted();
+
+  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
+      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
+      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
+  {
+    EnsureVerticalSpace(2);
+    if (mHeaderStrategy == 2) {  // numbered
+      mIndent += kIndentSizeHeaders;
+      // Caching
+      int32_t level = HeaderLevel(aTag);
+      // Increase counter for current level
+      mHeaderCounter[level]++;
+      // Reset all lower levels
+      int32_t i;
+
+      for (i = level + 1; i <= 6; i++) {
+        mHeaderCounter[i] = 0;
+      }
+
+      // Construct numbers
+      nsAutoString leadup;
+      for (i = 1; i <= level; i++) {
+        leadup.AppendInt(mHeaderCounter[i]);
+        leadup.Append(char16_t('.'));
+      }
+      leadup.Append(char16_t(' '));
+      Write(leadup);
+    }
+    else if (mHeaderStrategy == 1) { // indent increasingly
+      mIndent += kIndentSizeHeaders;
+      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
+           // for h(x), run x-1 times
+        mIndent += kIndentIncrementHeaders;
+      }
+    }
+  }
+  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
+    nsAutoString url;
+    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
+        && !url.IsEmpty()) {
+      mURL = url;
+    }
+  }
+  else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("^"));
+  }
+  else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("_"));
+  }
+  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("|"));
+  }
+  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
+           && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("*"));
+  }
+  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
+           && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("/"));
+  }
+  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("_"));
+  }
+
+  /* Container elements are always block elements, so we shouldn't
+     output any whitespace immediately after the container tag even if
+     there's extra whitespace there because the HTML is pretty-printed
+     or something. To ensure that happens, tell the serializer we're
+     already in whitespace so it won't output more. */
+  mInWhitespace = true;
+
+  return NS_OK;
+}
+
+nsresult
+nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
+{
+  if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
+    mIgnoredChildNodeLevel--;
+    return NS_OK;
+  }
+
+  if (mFlags & nsIDocumentEncoder::OutputRaw) {
+    // Raw means raw.  Don't even think about doing anything fancy
+    // here like indenting, adding line breaks or any other
+    // characters such as list item bullets, quote characters
+    // around , etc.  I mean it!  Don't make me smack you!
+
+    return NS_OK;
+  }
+
+  if (mTagStackIndex > 0) {
+    --mTagStackIndex;
+  }
+
+  if (mTagStackIndex >= mIgnoreAboveIndex) {
+    if (mTagStackIndex == mIgnoreAboveIndex) {
+      // We're dealing with the close tag whose matching
+      // open tag had set the mIgnoreAboveIndex value.
+      // Reset mIgnoreAboveIndex before discarding this tag.
+      mIgnoreAboveIndex = (uint32_t)kNotFound;
+    }
+    return NS_OK;
+  }
+
+  // End current line if we're ending a block level tag
+  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
+    // We want the output to end with a new line,
+    // but in preformatted areas like text fields,
+    // we can't emit newlines that weren't there.
+    // So add the newline only in the case of formatted output.
+    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
+      EnsureVerticalSpace(0);
+    }
+    else {
+      FlushLine();
+    }
+    // We won't want to do anything with these in formatted mode either,
+    // so just return now:
+    return NS_OK;
+  }
+
+  // Keep this in sync with DoOpenContainer!
+  if (!DoOutput()) {
+    return NS_OK;
+  }
+
+  if (aTag == nsGkAtoms::tr) {
+    PopBool(mHasWrittenCellsForRow);
+    // Should always end a line, but get no more whitespace
+    if (mFloatingLines < 0)
+      mFloatingLines = 0;
+    mLineBreakDue = true;
+  }
+  else if (((aTag == nsGkAtoms::li) ||
+            (aTag == nsGkAtoms::dt)) &&
+           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
+    // Items that should always end a line, but get no more whitespace
+    if (mFloatingLines < 0)
+      mFloatingLines = 0;
+    mLineBreakDue = true;
+  }
+  else if (aTag == nsGkAtoms::pre) {
+    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
+    mLineBreakDue = true;
+  }
+  else if (aTag == nsGkAtoms::ul) {
+    FlushLine();
+    mIndent -= kIndentSizeList;
+    if (--mULCount + mOLStackIndex == 0) {
+      mFloatingLines = 1;
+      mLineBreakDue = true;
+    }
+  }
+  else if (aTag == nsGkAtoms::ol) {
+    FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
+    mIndent -= kIndentSizeList;
+    NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
+    mOLStackIndex--;
+    if (mULCount + mOLStackIndex == 0) {
+      mFloatingLines = 1;
+      mLineBreakDue = true;
+    }
+  }  
+  else if (aTag == nsGkAtoms::dl) {
+    mFloatingLines = 1;
+    mLineBreakDue = true;
+  }
+  else if (aTag == nsGkAtoms::dd) {
+    FlushLine();
+    mIndent -= kIndentSizeDD;
+  }
+  else if (aTag == nsGkAtoms::span) {
+    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
+    --mSpanLevel;
+  }
+  else if (aTag == nsGkAtoms::div) {
+    if (mFloatingLines < 0)
+      mFloatingLines = 0;
+    mLineBreakDue = true;
+  }
+  else if (aTag == nsGkAtoms::blockquote) {
+    FlushLine();    // Is this needed?
+
+    // Pop
+    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
+
+    if (isInCiteBlockquote) {
+      NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
+      mCiteQuoteLevel--;
+      mFloatingLines = 0;
+      mHasWrittenCiteBlockquote = true;
+    }
+    else {
+      mIndent -= kTabSize;
+      mFloatingLines = 1;
+    }
+    mLineBreakDue = true;
+  }
+  else if (aTag == nsGkAtoms::q) {
+    Write(NS_LITERAL_STRING("\""));
+  }
+  else if (nsContentUtils::IsHTMLBlock(aTag)
+           && aTag != nsGkAtoms::script) {
+    // All other blocks get 1 vertical space after them
+    // in formatted mode, otherwise 0.
+    // This is hard. Sometimes 0 is a better number, but
+    // how to know?
+    if (mFlags & nsIDocumentEncoder::OutputFormatted)
+      EnsureVerticalSpace(1);
+    else {
+      if (mFloatingLines < 0)
+        mFloatingLines = 0;
+      mLineBreakDue = true;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////
+  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
+    return NS_OK;
+  }
+  //////////////////////////////////////////////////////////////
+  // The rest of this routine is formatted output stuff,
+  // which we should skip if we're not formatted:
+  //////////////////////////////////////////////////////////////
+
+  // Pop the currentConverted stack
+  bool currentNodeIsConverted = IsCurrentNodeConverted();
+  
+  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
+      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
+      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
+    
+    if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
+      mIndent -= kIndentSizeHeaders;
+    }
+    if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
+      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
+           // for h(x), run x-1 times
+        mIndent -= kIndentIncrementHeaders;
+      }
+    }
+    EnsureVerticalSpace(1);
+  }
+  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
+    nsAutoString temp; 
+    temp.AssignLiteral(" <");
+    temp += mURL;
+    temp.Append(char16_t('>'));
+    Write(temp);
+    mURL.Truncate();
+  }
+  else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
+           && mStructs && !currentNodeIsConverted) {
+    Write(kSpace);
+  }
+  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("|"));
+  }
+  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
+           && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("*"));
+  }
+  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
+           && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("/"));
+  }
+  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
+    Write(NS_LITERAL_STRING("_"));
+  }
+
+  return NS_OK;
+}
+
+bool
+nsPlainTextSerializer::MustSuppressLeaf()
+{
+  if (mIgnoredChildNodeLevel > 0) {
+    return true;
+  }
+
+  if ((mTagStackIndex > 1 &&
+       mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
+      (mTagStackIndex > 0 &&
+        mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
+    // Don't output the contents of SELECT elements;
+    // Might be nice, eventually, to output just the selected element.
+    // Read more in bug 31994.
+    return true;
+  }
+
+  if (mTagStackIndex > 0 &&
+      (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
+       mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
+    // Don't output the contents of