michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * nsIContentSerializer implementation that can be used with an michael@0: * nsIDocumentEncoder to convert a DOM into plaintext in a nice way michael@0: * (eg for copy/paste as plaintext). michael@0: */ michael@0: michael@0: #ifndef nsPlainTextSerializer_h__ michael@0: #define nsPlainTextSerializer_h__ michael@0: michael@0: #include "mozilla/Attributes.h" michael@0: #include "nsAutoPtr.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsIAtom.h" michael@0: #include "nsIContentSerializer.h" michael@0: #include "nsIDocumentEncoder.h" michael@0: #include "nsILineBreaker.h" michael@0: #include "nsString.h" michael@0: #include "nsTArray.h" michael@0: michael@0: class nsIContent; michael@0: michael@0: namespace mozilla { michael@0: namespace dom { michael@0: class Element; michael@0: } // namespace dom michael@0: } // namespace mozilla michael@0: michael@0: class nsPlainTextSerializer : public nsIContentSerializer michael@0: { michael@0: public: michael@0: nsPlainTextSerializer(); michael@0: virtual ~nsPlainTextSerializer(); michael@0: michael@0: NS_DECL_ISUPPORTS michael@0: michael@0: // nsIContentSerializer michael@0: NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn, michael@0: const char* aCharSet, bool aIsCopying, michael@0: bool aIsWholeDocument) MOZ_OVERRIDE; michael@0: michael@0: NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset, michael@0: int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE; michael@0: NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, michael@0: int32_t aStartOffset, int32_t aEndOffset, michael@0: nsAString& aStr) MOZ_OVERRIDE; michael@0: NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI, michael@0: int32_t aStartOffset, michael@0: int32_t aEndOffset, michael@0: nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } michael@0: NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset, michael@0: int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } michael@0: NS_IMETHOD AppendDoctype(nsIContent *aDoctype, michael@0: nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } michael@0: NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement, michael@0: mozilla::dom::Element* aOriginalElement, michael@0: nsAString& aStr) MOZ_OVERRIDE; michael@0: NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, michael@0: nsAString& aStr) MOZ_OVERRIDE; michael@0: NS_IMETHOD Flush(nsAString& aStr) MOZ_OVERRIDE; michael@0: michael@0: NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument, michael@0: nsAString& aStr) MOZ_OVERRIDE; michael@0: michael@0: protected: michael@0: nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet); michael@0: void AddToLine(const char16_t* aStringToAdd, int32_t aLength); michael@0: void EndLine(bool softlinebreak, bool aBreakBySpace = false); michael@0: void EnsureVerticalSpace(int32_t noOfRows); michael@0: void FlushLine(); michael@0: void OutputQuotesAndIndent(bool stripTrailingSpaces=false); michael@0: void Output(nsString& aString); michael@0: void Write(const nsAString& aString); michael@0: bool IsInPre(); michael@0: bool IsInOL(); michael@0: bool IsCurrentNodeConverted(); michael@0: bool MustSuppressLeaf(); michael@0: michael@0: /** michael@0: * Returns the local name of the element as an atom if the element is an michael@0: * HTML element and the atom is a static atom. Otherwise, nullptr is returned. michael@0: */ michael@0: static nsIAtom* GetIdForContent(nsIContent* aContent); michael@0: nsresult DoOpenContainer(nsIAtom* aTag); michael@0: nsresult DoCloseContainer(nsIAtom* aTag); michael@0: nsresult DoAddLeaf(nsIAtom* aTag); michael@0: void DoAddText(bool aIsWhitespace, const nsAString& aText); michael@0: michael@0: // Inlined functions michael@0: inline bool MayWrap() michael@0: { michael@0: return mWrapColumn && michael@0: ((mFlags & nsIDocumentEncoder::OutputFormatted) || michael@0: (mFlags & nsIDocumentEncoder::OutputWrap)); michael@0: } michael@0: michael@0: inline bool DoOutput() michael@0: { michael@0: return mHeadLevel == 0; michael@0: } michael@0: michael@0: // Stack handling functions michael@0: bool GetLastBool(const nsTArray& aStack); michael@0: void SetLastBool(nsTArray& aStack, bool aValue); michael@0: void PushBool(nsTArray& aStack, bool aValue); michael@0: bool PopBool(nsTArray& aStack); michael@0: michael@0: bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag); michael@0: michael@0: protected: michael@0: nsString mCurrentLine; michael@0: uint32_t mHeadLevel; michael@0: bool mAtFirstColumn; michael@0: michael@0: // Handling of quoted text (for mail): michael@0: // Quotes need to be wrapped differently from non-quoted text, michael@0: // because quoted text has a few extra characters (e.g. ">> ") michael@0: // which makes the line length longer. michael@0: // Mail can represent quotes in different ways: michael@0: // Not wrapped in any special tag (if mail.compose.wrap_to_window_width) michael@0: // or in a . michael@0: bool mDontWrapAnyQuotes; // no special quote markers michael@0: michael@0: bool mStructs; // Output structs (pref) michael@0: michael@0: // If we've just written out a cite blockquote, we need to remember it michael@0: // so we don't duplicate spaces before a
 (which mail uses to quote
michael@0:   // old messages).
michael@0:   bool             mHasWrittenCiteBlockquote;
michael@0: 
michael@0:   int32_t          mIndent;
michael@0:   // mInIndentString keeps a header that has to be written in the indent.
michael@0:   // That could be, for instance, the bullet in a bulleted list.
michael@0:   nsString         mInIndentString;
michael@0:   int32_t          mCiteQuoteLevel;
michael@0:   int32_t          mFlags;
michael@0:   int32_t          mFloatingLines; // To store the number of lazy line breaks
michael@0: 
michael@0:   // The wrap column is how many standard sized chars (western languages)
michael@0:   // should be allowed on a line. There could be less chars if the chars
michael@0:   // are wider than latin chars of more if the chars are more narrow.
michael@0:   uint32_t         mWrapColumn;
michael@0: 
michael@0:   // The width of the line as it will appear on the screen (approx.) 
michael@0:   uint32_t         mCurrentLineWidth; 
michael@0: 
michael@0:   // Treat quoted text as though it's preformatted -- don't wrap it.
michael@0:   // Having it on a pref is a temporary measure, See bug 69638.
michael@0:   int32_t          mSpanLevel;
michael@0: 
michael@0: 
michael@0:   int32_t          mEmptyLines; // Will be the number of empty lines before
michael@0:                                 // the current. 0 if we are starting a new
michael@0:                                 // line and -1 if we are in a line.
michael@0: 
michael@0:   bool             mInWhitespace;
michael@0:   bool             mPreFormatted;
michael@0:   bool             mStartedOutput; // we've produced at least a character
michael@0: 
michael@0:   // While handling a new tag, this variable should remind if any line break
michael@0:   // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
michael@0:   // Hence opening tags are guaranteed to start with appropriate line breaks.
michael@0:   bool             mLineBreakDue; 
michael@0: 
michael@0:   nsString         mURL;
michael@0:   int32_t          mHeaderStrategy;    /* Header strategy (pref)
michael@0:                                           0 = no indention
michael@0:                                           1 = indention, increased with
michael@0:                                               header level (default)
michael@0:                                           2 = numbering and slight indention */
michael@0:   int32_t          mHeaderCounter[7];  /* For header-numbering:
michael@0:                                           Number of previous headers of
michael@0:                                           the same depth and in the same
michael@0:                                           section.
michael@0:                                           mHeaderCounter[1] for 

etc. */ michael@0: michael@0: nsRefPtr mElement; michael@0: michael@0: // For handling table rows michael@0: nsAutoTArray mHasWrittenCellsForRow; michael@0: michael@0: // Values gotten in OpenContainer that is (also) needed in CloseContainer michael@0: nsAutoTArray mIsInCiteBlockquote; michael@0: michael@0: // The output data michael@0: nsAString* mOutputString; michael@0: michael@0: // The tag stack: the stack of tags we're operating on, so we can nest. michael@0: // The stack only ever points to static atoms, so they don't need to be michael@0: // refcounted. michael@0: nsIAtom** mTagStack; michael@0: uint32_t mTagStackIndex; michael@0: michael@0: // Content in the stack above this index should be ignored: michael@0: uint32_t mIgnoreAboveIndex; michael@0: michael@0: // The stack for ordered lists michael@0: int32_t *mOLStack; michael@0: uint32_t mOLStackIndex; michael@0: michael@0: uint32_t mULCount; michael@0: michael@0: nsString mLineBreak; michael@0: nsCOMPtr mLineBreaker; michael@0: michael@0: // Conveniance constant. It would be nice to have it as a const static michael@0: // variable, but that causes issues with OpenBSD and module unloading. michael@0: const nsString kSpace; michael@0: michael@0: // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child michael@0: // nodes of specific nodes -