content/base/src/nsPlainTextSerializer.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/base/src/nsPlainTextSerializer.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,227 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/*
    1.10 + * nsIContentSerializer implementation that can be used with an
    1.11 + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
    1.12 + * (eg for copy/paste as plaintext).
    1.13 + */
    1.14 +
    1.15 +#ifndef nsPlainTextSerializer_h__
    1.16 +#define nsPlainTextSerializer_h__
    1.17 +
    1.18 +#include "mozilla/Attributes.h"
    1.19 +#include "nsAutoPtr.h"
    1.20 +#include "nsCOMPtr.h"
    1.21 +#include "nsIAtom.h"
    1.22 +#include "nsIContentSerializer.h"
    1.23 +#include "nsIDocumentEncoder.h"
    1.24 +#include "nsILineBreaker.h"
    1.25 +#include "nsString.h"
    1.26 +#include "nsTArray.h"
    1.27 +
    1.28 +class nsIContent;
    1.29 +
    1.30 +namespace mozilla {
    1.31 +namespace dom {
    1.32 +class Element;
    1.33 +} // namespace dom
    1.34 +} // namespace mozilla
    1.35 +
    1.36 +class nsPlainTextSerializer : public nsIContentSerializer
    1.37 +{
    1.38 +public:
    1.39 +  nsPlainTextSerializer();
    1.40 +  virtual ~nsPlainTextSerializer();
    1.41 +
    1.42 +  NS_DECL_ISUPPORTS
    1.43 +
    1.44 +  // nsIContentSerializer
    1.45 +  NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
    1.46 +                  const char* aCharSet, bool aIsCopying,
    1.47 +                  bool aIsWholeDocument) MOZ_OVERRIDE;
    1.48 +
    1.49 +  NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
    1.50 +                        int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE;
    1.51 +  NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
    1.52 +                                int32_t aStartOffset, int32_t aEndOffset,
    1.53 +                                nsAString& aStr) MOZ_OVERRIDE;
    1.54 +  NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
    1.55 +                                         int32_t aStartOffset,
    1.56 +                                         int32_t aEndOffset,
    1.57 +                                         nsAString& aStr) MOZ_OVERRIDE  { return NS_OK; }
    1.58 +  NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
    1.59 +                           int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE  { return NS_OK; }
    1.60 +  NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
    1.61 +                           nsAString& aStr) MOZ_OVERRIDE  { return NS_OK; }
    1.62 +  NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
    1.63 +                                mozilla::dom::Element* aOriginalElement,
    1.64 +                                nsAString& aStr) MOZ_OVERRIDE; 
    1.65 +  NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
    1.66 +                              nsAString& aStr) MOZ_OVERRIDE;
    1.67 +  NS_IMETHOD Flush(nsAString& aStr) MOZ_OVERRIDE;
    1.68 +
    1.69 +  NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
    1.70 +                                 nsAString& aStr) MOZ_OVERRIDE;
    1.71 +
    1.72 +protected:
    1.73 +  nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
    1.74 +  void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
    1.75 +  void EndLine(bool softlinebreak, bool aBreakBySpace = false);
    1.76 +  void EnsureVerticalSpace(int32_t noOfRows);
    1.77 +  void FlushLine();
    1.78 +  void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
    1.79 +  void Output(nsString& aString);
    1.80 +  void Write(const nsAString& aString);
    1.81 +  bool IsInPre();
    1.82 +  bool IsInOL();
    1.83 +  bool IsCurrentNodeConverted();
    1.84 +  bool MustSuppressLeaf();
    1.85 +
    1.86 +  /**
    1.87 +   * Returns the local name of the element as an atom if the element is an
    1.88 +   * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
    1.89 +   */
    1.90 +  static nsIAtom* GetIdForContent(nsIContent* aContent);
    1.91 +  nsresult DoOpenContainer(nsIAtom* aTag);
    1.92 +  nsresult DoCloseContainer(nsIAtom* aTag);
    1.93 +  nsresult DoAddLeaf(nsIAtom* aTag);
    1.94 +  void DoAddText(bool aIsWhitespace, const nsAString& aText);
    1.95 +
    1.96 +  // Inlined functions
    1.97 +  inline bool MayWrap()
    1.98 +  {
    1.99 +    return mWrapColumn &&
   1.100 +      ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
   1.101 +       (mFlags & nsIDocumentEncoder::OutputWrap));
   1.102 +  }
   1.103 +
   1.104 +  inline bool DoOutput()
   1.105 +  {
   1.106 +    return mHeadLevel == 0;
   1.107 +  }
   1.108 +
   1.109 +  // Stack handling functions
   1.110 +  bool GetLastBool(const nsTArray<bool>& aStack);
   1.111 +  void SetLastBool(nsTArray<bool>& aStack, bool aValue);
   1.112 +  void PushBool(nsTArray<bool>& aStack, bool aValue);
   1.113 +  bool PopBool(nsTArray<bool>& aStack);
   1.114 +
   1.115 +  bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
   1.116 +
   1.117 +protected:
   1.118 +  nsString         mCurrentLine;
   1.119 +  uint32_t         mHeadLevel;
   1.120 +  bool             mAtFirstColumn;
   1.121 +
   1.122 +  // Handling of quoted text (for mail):
   1.123 +  // Quotes need to be wrapped differently from non-quoted text,
   1.124 +  // because quoted text has a few extra characters (e.g. ">> ")
   1.125 +  // which makes the line length longer.
   1.126 +  // Mail can represent quotes in different ways:
   1.127 +  // Not wrapped in any special tag (if mail.compose.wrap_to_window_width)
   1.128 +  // or in a <span>.
   1.129 +  bool             mDontWrapAnyQuotes;  // no special quote markers
   1.130 +
   1.131 +  bool             mStructs;            // Output structs (pref)
   1.132 +
   1.133 +  // If we've just written out a cite blockquote, we need to remember it
   1.134 +  // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
   1.135 +  // old messages).
   1.136 +  bool             mHasWrittenCiteBlockquote;
   1.137 +
   1.138 +  int32_t          mIndent;
   1.139 +  // mInIndentString keeps a header that has to be written in the indent.
   1.140 +  // That could be, for instance, the bullet in a bulleted list.
   1.141 +  nsString         mInIndentString;
   1.142 +  int32_t          mCiteQuoteLevel;
   1.143 +  int32_t          mFlags;
   1.144 +  int32_t          mFloatingLines; // To store the number of lazy line breaks
   1.145 +
   1.146 +  // The wrap column is how many standard sized chars (western languages)
   1.147 +  // should be allowed on a line. There could be less chars if the chars
   1.148 +  // are wider than latin chars of more if the chars are more narrow.
   1.149 +  uint32_t         mWrapColumn;
   1.150 +
   1.151 +  // The width of the line as it will appear on the screen (approx.) 
   1.152 +  uint32_t         mCurrentLineWidth; 
   1.153 +
   1.154 +  // Treat quoted text as though it's preformatted -- don't wrap it.
   1.155 +  // Having it on a pref is a temporary measure, See bug 69638.
   1.156 +  int32_t          mSpanLevel;
   1.157 +
   1.158 +
   1.159 +  int32_t          mEmptyLines; // Will be the number of empty lines before
   1.160 +                                // the current. 0 if we are starting a new
   1.161 +                                // line and -1 if we are in a line.
   1.162 +
   1.163 +  bool             mInWhitespace;
   1.164 +  bool             mPreFormatted;
   1.165 +  bool             mStartedOutput; // we've produced at least a character
   1.166 +
   1.167 +  // While handling a new tag, this variable should remind if any line break
   1.168 +  // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
   1.169 +  // Hence opening tags are guaranteed to start with appropriate line breaks.
   1.170 +  bool             mLineBreakDue; 
   1.171 +
   1.172 +  nsString         mURL;
   1.173 +  int32_t          mHeaderStrategy;    /* Header strategy (pref)
   1.174 +                                          0 = no indention
   1.175 +                                          1 = indention, increased with
   1.176 +                                              header level (default)
   1.177 +                                          2 = numbering and slight indention */
   1.178 +  int32_t          mHeaderCounter[7];  /* For header-numbering:
   1.179 +                                          Number of previous headers of
   1.180 +                                          the same depth and in the same
   1.181 +                                          section.
   1.182 +                                          mHeaderCounter[1] for <h1> etc. */
   1.183 +
   1.184 +  nsRefPtr<mozilla::dom::Element> mElement;
   1.185 +
   1.186 +  // For handling table rows
   1.187 +  nsAutoTArray<bool, 8> mHasWrittenCellsForRow;
   1.188 +  
   1.189 +  // Values gotten in OpenContainer that is (also) needed in CloseContainer
   1.190 +  nsAutoTArray<bool, 8> mIsInCiteBlockquote;
   1.191 +
   1.192 +  // The output data
   1.193 +  nsAString*            mOutputString;
   1.194 +
   1.195 +  // The tag stack: the stack of tags we're operating on, so we can nest.
   1.196 +  // The stack only ever points to static atoms, so they don't need to be
   1.197 +  // refcounted.
   1.198 +  nsIAtom**        mTagStack;
   1.199 +  uint32_t         mTagStackIndex;
   1.200 +
   1.201 +  // Content in the stack above this index should be ignored:
   1.202 +  uint32_t          mIgnoreAboveIndex;
   1.203 +
   1.204 +  // The stack for ordered lists
   1.205 +  int32_t         *mOLStack;
   1.206 +  uint32_t         mOLStackIndex;
   1.207 +
   1.208 +  uint32_t         mULCount;
   1.209 +
   1.210 +  nsString                     mLineBreak;
   1.211 +  nsCOMPtr<nsILineBreaker>     mLineBreaker;
   1.212 +
   1.213 +  // Conveniance constant. It would be nice to have it as a const static
   1.214 +  // variable, but that causes issues with OpenBSD and module unloading.
   1.215 +  const nsString          kSpace;
   1.216 +
   1.217 +  // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
   1.218 +  // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
   1.219 +  // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
   1.220 +  // child node. The initial value of mIgnoredChildNodeLevel is 0. When
   1.221 +  // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
   1.222 +  // and is greater than 0. Otherwise when serializer leaves those nodes,
   1.223 +  // mIgnoredChildNodeLevel decreases.
   1.224 +  uint32_t mIgnoredChildNodeLevel;
   1.225 +};
   1.226 +
   1.227 +nsresult
   1.228 +NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
   1.229 +
   1.230 +#endif

mercurial