|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * nsIContentSerializer implementation that can be used with an |
|
8 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way |
|
9 * (eg for copy/paste as plaintext). |
|
10 */ |
|
11 |
|
12 #ifndef nsPlainTextSerializer_h__ |
|
13 #define nsPlainTextSerializer_h__ |
|
14 |
|
15 #include "mozilla/Attributes.h" |
|
16 #include "nsAutoPtr.h" |
|
17 #include "nsCOMPtr.h" |
|
18 #include "nsIAtom.h" |
|
19 #include "nsIContentSerializer.h" |
|
20 #include "nsIDocumentEncoder.h" |
|
21 #include "nsILineBreaker.h" |
|
22 #include "nsString.h" |
|
23 #include "nsTArray.h" |
|
24 |
|
25 class nsIContent; |
|
26 |
|
27 namespace mozilla { |
|
28 namespace dom { |
|
29 class Element; |
|
30 } // namespace dom |
|
31 } // namespace mozilla |
|
32 |
|
33 class nsPlainTextSerializer : public nsIContentSerializer |
|
34 { |
|
35 public: |
|
36 nsPlainTextSerializer(); |
|
37 virtual ~nsPlainTextSerializer(); |
|
38 |
|
39 NS_DECL_ISUPPORTS |
|
40 |
|
41 // nsIContentSerializer |
|
42 NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn, |
|
43 const char* aCharSet, bool aIsCopying, |
|
44 bool aIsWholeDocument) MOZ_OVERRIDE; |
|
45 |
|
46 NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset, |
|
47 int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE; |
|
48 NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, |
|
49 int32_t aStartOffset, int32_t aEndOffset, |
|
50 nsAString& aStr) MOZ_OVERRIDE; |
|
51 NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI, |
|
52 int32_t aStartOffset, |
|
53 int32_t aEndOffset, |
|
54 nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } |
|
55 NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset, |
|
56 int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } |
|
57 NS_IMETHOD AppendDoctype(nsIContent *aDoctype, |
|
58 nsAString& aStr) MOZ_OVERRIDE { return NS_OK; } |
|
59 NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement, |
|
60 mozilla::dom::Element* aOriginalElement, |
|
61 nsAString& aStr) MOZ_OVERRIDE; |
|
62 NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, |
|
63 nsAString& aStr) MOZ_OVERRIDE; |
|
64 NS_IMETHOD Flush(nsAString& aStr) MOZ_OVERRIDE; |
|
65 |
|
66 NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument, |
|
67 nsAString& aStr) MOZ_OVERRIDE; |
|
68 |
|
69 protected: |
|
70 nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet); |
|
71 void AddToLine(const char16_t* aStringToAdd, int32_t aLength); |
|
72 void EndLine(bool softlinebreak, bool aBreakBySpace = false); |
|
73 void EnsureVerticalSpace(int32_t noOfRows); |
|
74 void FlushLine(); |
|
75 void OutputQuotesAndIndent(bool stripTrailingSpaces=false); |
|
76 void Output(nsString& aString); |
|
77 void Write(const nsAString& aString); |
|
78 bool IsInPre(); |
|
79 bool IsInOL(); |
|
80 bool IsCurrentNodeConverted(); |
|
81 bool MustSuppressLeaf(); |
|
82 |
|
83 /** |
|
84 * Returns the local name of the element as an atom if the element is an |
|
85 * HTML element and the atom is a static atom. Otherwise, nullptr is returned. |
|
86 */ |
|
87 static nsIAtom* GetIdForContent(nsIContent* aContent); |
|
88 nsresult DoOpenContainer(nsIAtom* aTag); |
|
89 nsresult DoCloseContainer(nsIAtom* aTag); |
|
90 nsresult DoAddLeaf(nsIAtom* aTag); |
|
91 void DoAddText(bool aIsWhitespace, const nsAString& aText); |
|
92 |
|
93 // Inlined functions |
|
94 inline bool MayWrap() |
|
95 { |
|
96 return mWrapColumn && |
|
97 ((mFlags & nsIDocumentEncoder::OutputFormatted) || |
|
98 (mFlags & nsIDocumentEncoder::OutputWrap)); |
|
99 } |
|
100 |
|
101 inline bool DoOutput() |
|
102 { |
|
103 return mHeadLevel == 0; |
|
104 } |
|
105 |
|
106 // Stack handling functions |
|
107 bool GetLastBool(const nsTArray<bool>& aStack); |
|
108 void SetLastBool(nsTArray<bool>& aStack, bool aValue); |
|
109 void PushBool(nsTArray<bool>& aStack, bool aValue); |
|
110 bool PopBool(nsTArray<bool>& aStack); |
|
111 |
|
112 bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag); |
|
113 |
|
114 protected: |
|
115 nsString mCurrentLine; |
|
116 uint32_t mHeadLevel; |
|
117 bool mAtFirstColumn; |
|
118 |
|
119 // Handling of quoted text (for mail): |
|
120 // Quotes need to be wrapped differently from non-quoted text, |
|
121 // because quoted text has a few extra characters (e.g. ">> ") |
|
122 // which makes the line length longer. |
|
123 // Mail can represent quotes in different ways: |
|
124 // Not wrapped in any special tag (if mail.compose.wrap_to_window_width) |
|
125 // or in a <span>. |
|
126 bool mDontWrapAnyQuotes; // no special quote markers |
|
127 |
|
128 bool mStructs; // Output structs (pref) |
|
129 |
|
130 // If we've just written out a cite blockquote, we need to remember it |
|
131 // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote |
|
132 // old messages). |
|
133 bool mHasWrittenCiteBlockquote; |
|
134 |
|
135 int32_t mIndent; |
|
136 // mInIndentString keeps a header that has to be written in the indent. |
|
137 // That could be, for instance, the bullet in a bulleted list. |
|
138 nsString mInIndentString; |
|
139 int32_t mCiteQuoteLevel; |
|
140 int32_t mFlags; |
|
141 int32_t mFloatingLines; // To store the number of lazy line breaks |
|
142 |
|
143 // The wrap column is how many standard sized chars (western languages) |
|
144 // should be allowed on a line. There could be less chars if the chars |
|
145 // are wider than latin chars of more if the chars are more narrow. |
|
146 uint32_t mWrapColumn; |
|
147 |
|
148 // The width of the line as it will appear on the screen (approx.) |
|
149 uint32_t mCurrentLineWidth; |
|
150 |
|
151 // Treat quoted text as though it's preformatted -- don't wrap it. |
|
152 // Having it on a pref is a temporary measure, See bug 69638. |
|
153 int32_t mSpanLevel; |
|
154 |
|
155 |
|
156 int32_t mEmptyLines; // Will be the number of empty lines before |
|
157 // the current. 0 if we are starting a new |
|
158 // line and -1 if we are in a line. |
|
159 |
|
160 bool mInWhitespace; |
|
161 bool mPreFormatted; |
|
162 bool mStartedOutput; // we've produced at least a character |
|
163 |
|
164 // While handling a new tag, this variable should remind if any line break |
|
165 // is due because of a closing tag. Setting it to "TRUE" while closing the tags. |
|
166 // Hence opening tags are guaranteed to start with appropriate line breaks. |
|
167 bool mLineBreakDue; |
|
168 |
|
169 nsString mURL; |
|
170 int32_t mHeaderStrategy; /* Header strategy (pref) |
|
171 0 = no indention |
|
172 1 = indention, increased with |
|
173 header level (default) |
|
174 2 = numbering and slight indention */ |
|
175 int32_t mHeaderCounter[7]; /* For header-numbering: |
|
176 Number of previous headers of |
|
177 the same depth and in the same |
|
178 section. |
|
179 mHeaderCounter[1] for <h1> etc. */ |
|
180 |
|
181 nsRefPtr<mozilla::dom::Element> mElement; |
|
182 |
|
183 // For handling table rows |
|
184 nsAutoTArray<bool, 8> mHasWrittenCellsForRow; |
|
185 |
|
186 // Values gotten in OpenContainer that is (also) needed in CloseContainer |
|
187 nsAutoTArray<bool, 8> mIsInCiteBlockquote; |
|
188 |
|
189 // The output data |
|
190 nsAString* mOutputString; |
|
191 |
|
192 // The tag stack: the stack of tags we're operating on, so we can nest. |
|
193 // The stack only ever points to static atoms, so they don't need to be |
|
194 // refcounted. |
|
195 nsIAtom** mTagStack; |
|
196 uint32_t mTagStackIndex; |
|
197 |
|
198 // Content in the stack above this index should be ignored: |
|
199 uint32_t mIgnoreAboveIndex; |
|
200 |
|
201 // The stack for ordered lists |
|
202 int32_t *mOLStack; |
|
203 uint32_t mOLStackIndex; |
|
204 |
|
205 uint32_t mULCount; |
|
206 |
|
207 nsString mLineBreak; |
|
208 nsCOMPtr<nsILineBreaker> mLineBreaker; |
|
209 |
|
210 // Conveniance constant. It would be nice to have it as a const static |
|
211 // variable, but that causes issues with OpenBSD and module unloading. |
|
212 const nsString kSpace; |
|
213 |
|
214 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child |
|
215 // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored. |
|
216 // mIgnoredChildNodeLevel is used to tell if current node is an ignorable |
|
217 // child node. The initial value of mIgnoredChildNodeLevel is 0. When |
|
218 // serializer enters those specific nodes, mIgnoredChildNodeLevel increases |
|
219 // and is greater than 0. Otherwise when serializer leaves those nodes, |
|
220 // mIgnoredChildNodeLevel decreases. |
|
221 uint32_t mIgnoredChildNodeLevel; |
|
222 }; |
|
223 |
|
224 nsresult |
|
225 NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer); |
|
226 |
|
227 #endif |