| |
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
| |
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
| |
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
| |
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
5 |
| |
6 /* |
| |
7 * nsIContentSerializer implementation that can be used with an |
| |
8 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way |
| |
9 * (eg for copy/paste as plaintext). |
| |
10 */ |
| |
11 |
| |
12 #include "nsPlainTextSerializer.h" |
| |
13 #include "nsLWBrkCIID.h" |
| |
14 #include "nsIServiceManager.h" |
| |
15 #include "nsGkAtoms.h" |
| |
16 #include "nsNameSpaceManager.h" |
| |
17 #include "nsTextFragment.h" |
| |
18 #include "nsContentUtils.h" |
| |
19 #include "nsReadableUtils.h" |
| |
20 #include "nsUnicharUtils.h" |
| |
21 #include "nsCRT.h" |
| |
22 #include "mozilla/dom/Element.h" |
| |
23 #include "mozilla/Preferences.h" |
| |
24 |
| |
25 using namespace mozilla; |
| |
26 using namespace mozilla::dom; |
| |
27 |
| |
28 #define PREF_STRUCTS "converter.html2txt.structs" |
| |
29 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" |
| |
30 |
| |
31 static const int32_t kTabSize=4; |
| |
32 static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if |
| |
33 mHeaderStrategy = 1 or = 2. |
| |
34 Indention of other headers |
| |
35 is derived from that. |
| |
36 XXX center h1? */ |
| |
37 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, |
| |
38 indent h(x+1) this many |
| |
39 columns more than h(x) */ |
| |
40 static const int32_t kIndentSizeList = kTabSize; |
| |
41 // Indention of non-first lines of ul and ol |
| |
42 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd> |
| |
43 static const char16_t kNBSP = 160; |
| |
44 static const char16_t kSPACE = ' '; |
| |
45 |
| |
46 static int32_t HeaderLevel(nsIAtom* aTag); |
| |
47 static int32_t GetUnicharWidth(char16_t ucs); |
| |
48 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); |
| |
49 |
| |
50 // Someday may want to make this non-const: |
| |
51 static const uint32_t TagStackSize = 500; |
| |
52 static const uint32_t OLStackSize = 100; |
| |
53 |
| |
54 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) |
| |
55 { |
| |
56 nsPlainTextSerializer* it = new nsPlainTextSerializer(); |
| |
57 if (!it) { |
| |
58 return NS_ERROR_OUT_OF_MEMORY; |
| |
59 } |
| |
60 |
| |
61 return CallQueryInterface(it, aSerializer); |
| |
62 } |
| |
63 |
| |
64 nsPlainTextSerializer::nsPlainTextSerializer() |
| |
65 : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" |
| |
66 { |
| |
67 |
| |
68 mOutputString = nullptr; |
| |
69 mHeadLevel = 0; |
| |
70 mAtFirstColumn = true; |
| |
71 mIndent = 0; |
| |
72 mCiteQuoteLevel = 0; |
| |
73 mStructs = true; // will be read from prefs later |
| |
74 mHeaderStrategy = 1 /*indent increasingly*/; // ditto |
| |
75 mDontWrapAnyQuotes = false; // ditto |
| |
76 mHasWrittenCiteBlockquote = false; |
| |
77 mSpanLevel = 0; |
| |
78 for (int32_t i = 0; i <= 6; i++) { |
| |
79 mHeaderCounter[i] = 0; |
| |
80 } |
| |
81 |
| |
82 // Line breaker |
| |
83 mWrapColumn = 72; // XXX magic number, we expect someone to reset this |
| |
84 mCurrentLineWidth = 0; |
| |
85 |
| |
86 // Flow |
| |
87 mEmptyLines = 1; // The start of the document is an "empty line" in itself, |
| |
88 mInWhitespace = false; |
| |
89 mPreFormatted = false; |
| |
90 mStartedOutput = false; |
| |
91 |
| |
92 // initialize the tag stack to zero: |
| |
93 // The stack only ever contains pointers to static atoms, so they don't |
| |
94 // need refcounting. |
| |
95 mTagStack = new nsIAtom*[TagStackSize]; |
| |
96 mTagStackIndex = 0; |
| |
97 mIgnoreAboveIndex = (uint32_t)kNotFound; |
| |
98 |
| |
99 // initialize the OL stack, where numbers for ordered lists are kept |
| |
100 mOLStack = new int32_t[OLStackSize]; |
| |
101 mOLStackIndex = 0; |
| |
102 |
| |
103 mULCount = 0; |
| |
104 |
| |
105 mIgnoredChildNodeLevel = 0; |
| |
106 } |
| |
107 |
| |
108 nsPlainTextSerializer::~nsPlainTextSerializer() |
| |
109 { |
| |
110 delete[] mTagStack; |
| |
111 delete[] mOLStack; |
| |
112 NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); |
| |
113 } |
| |
114 |
| |
115 NS_IMPL_ISUPPORTS(nsPlainTextSerializer, |
| |
116 nsIContentSerializer) |
| |
117 |
| |
118 |
| |
119 NS_IMETHODIMP |
| |
120 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, |
| |
121 const char* aCharSet, bool aIsCopying, |
| |
122 bool aIsWholeDocument) |
| |
123 { |
| |
124 #ifdef DEBUG |
| |
125 // Check if the major control flags are set correctly. |
| |
126 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
| |
127 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, |
| |
128 "If you want format=flowed, you must combine it with " |
| |
129 "nsIDocumentEncoder::OutputFormatted"); |
| |
130 } |
| |
131 |
| |
132 if (aFlags & nsIDocumentEncoder::OutputFormatted) { |
| |
133 NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), |
| |
134 "Can't do formatted and preformatted output at the same time!"); |
| |
135 } |
| |
136 #endif |
| |
137 |
| |
138 mFlags = aFlags; |
| |
139 mWrapColumn = aWrapColumn; |
| |
140 |
| |
141 // Only create a linebreaker if we will handle wrapping. |
| |
142 if (MayWrap()) { |
| |
143 mLineBreaker = nsContentUtils::LineBreaker(); |
| |
144 } |
| |
145 |
| |
146 // Set the line break character: |
| |
147 if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) |
| |
148 && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { |
| |
149 // Windows |
| |
150 mLineBreak.AssignLiteral("\r\n"); |
| |
151 } |
| |
152 else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { |
| |
153 // Mac |
| |
154 mLineBreak.Assign(char16_t('\r')); |
| |
155 } |
| |
156 else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { |
| |
157 // Unix/DOM |
| |
158 mLineBreak.Assign(char16_t('\n')); |
| |
159 } |
| |
160 else { |
| |
161 // Platform/default |
| |
162 mLineBreak.AssignLiteral(NS_LINEBREAK); |
| |
163 } |
| |
164 |
| |
165 mLineBreakDue = false; |
| |
166 mFloatingLines = -1; |
| |
167 |
| |
168 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
| |
169 // Get some prefs that controls how we do formatted output |
| |
170 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); |
| |
171 |
| |
172 mHeaderStrategy = |
| |
173 Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); |
| |
174 |
| |
175 // DontWrapAnyQuotes is set according to whether plaintext mail |
| |
176 // is wrapping to window width -- see bug 134439. |
| |
177 // We'll only want this if we're wrapping and formatted. |
| |
178 if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { |
| |
179 mDontWrapAnyQuotes = |
| |
180 Preferences::GetBool("mail.compose.wrap_to_window_width", |
| |
181 mDontWrapAnyQuotes); |
| |
182 } |
| |
183 } |
| |
184 |
| |
185 // XXX We should let the caller pass this in. |
| |
186 if (Preferences::GetBool("browser.frames.enabled")) { |
| |
187 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; |
| |
188 } |
| |
189 else { |
| |
190 mFlags |= nsIDocumentEncoder::OutputNoFramesContent; |
| |
191 } |
| |
192 |
| |
193 return NS_OK; |
| |
194 } |
| |
195 |
| |
196 bool |
| |
197 nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) |
| |
198 { |
| |
199 uint32_t size = aStack.Length(); |
| |
200 if (size == 0) { |
| |
201 return false; |
| |
202 } |
| |
203 return aStack.ElementAt(size-1); |
| |
204 } |
| |
205 |
| |
206 void |
| |
207 nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) |
| |
208 { |
| |
209 uint32_t size = aStack.Length(); |
| |
210 if (size > 0) { |
| |
211 aStack.ElementAt(size-1) = aValue; |
| |
212 } |
| |
213 else { |
| |
214 NS_ERROR("There is no \"Last\" value"); |
| |
215 } |
| |
216 } |
| |
217 |
| |
218 void |
| |
219 nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) |
| |
220 { |
| |
221 aStack.AppendElement(bool(aValue)); |
| |
222 } |
| |
223 |
| |
224 bool |
| |
225 nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) |
| |
226 { |
| |
227 bool returnValue = false; |
| |
228 uint32_t size = aStack.Length(); |
| |
229 if (size > 0) { |
| |
230 returnValue = aStack.ElementAt(size-1); |
| |
231 aStack.RemoveElementAt(size-1); |
| |
232 } |
| |
233 return returnValue; |
| |
234 } |
| |
235 |
| |
236 bool |
| |
237 nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) |
| |
238 { |
| |
239 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, |
| |
240 // non-textual container element should be serialized as placeholder |
| |
241 // character and its child nodes should be ignored. See bug 895239. |
| |
242 if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { |
| |
243 return false; |
| |
244 } |
| |
245 |
| |
246 return |
| |
247 (aTag == nsGkAtoms::audio) || |
| |
248 (aTag == nsGkAtoms::canvas) || |
| |
249 (aTag == nsGkAtoms::iframe) || |
| |
250 (aTag == nsGkAtoms::meter) || |
| |
251 (aTag == nsGkAtoms::progress) || |
| |
252 (aTag == nsGkAtoms::object) || |
| |
253 (aTag == nsGkAtoms::svg) || |
| |
254 (aTag == nsGkAtoms::video); |
| |
255 } |
| |
256 |
| |
257 NS_IMETHODIMP |
| |
258 nsPlainTextSerializer::AppendText(nsIContent* aText, |
| |
259 int32_t aStartOffset, |
| |
260 int32_t aEndOffset, |
| |
261 nsAString& aStr) |
| |
262 { |
| |
263 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
| |
264 return NS_OK; |
| |
265 } |
| |
266 |
| |
267 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); |
| |
268 if ( aStartOffset < 0 ) |
| |
269 return NS_ERROR_INVALID_ARG; |
| |
270 |
| |
271 NS_ENSURE_ARG(aText); |
| |
272 |
| |
273 nsresult rv = NS_OK; |
| |
274 |
| |
275 nsIContent* content = aText; |
| |
276 const nsTextFragment* frag; |
| |
277 if (!content || !(frag = content->GetText())) { |
| |
278 return NS_ERROR_FAILURE; |
| |
279 } |
| |
280 |
| |
281 int32_t fragLength = frag->GetLength(); |
| |
282 int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); |
| |
283 NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); |
| |
284 |
| |
285 int32_t length = endoffset - aStartOffset; |
| |
286 if (length <= 0) { |
| |
287 return NS_OK; |
| |
288 } |
| |
289 |
| |
290 nsAutoString textstr; |
| |
291 if (frag->Is2b()) { |
| |
292 textstr.Assign(frag->Get2b() + aStartOffset, length); |
| |
293 } |
| |
294 else { |
| |
295 // AssignASCII is for 7-bit character only, so don't use it |
| |
296 const char *data = frag->Get1b(); |
| |
297 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); |
| |
298 } |
| |
299 |
| |
300 mOutputString = &aStr; |
| |
301 |
| |
302 // We have to split the string across newlines |
| |
303 // to match parser behavior |
| |
304 int32_t start = 0; |
| |
305 int32_t offset = textstr.FindCharInSet("\n\r"); |
| |
306 while (offset != kNotFound) { |
| |
307 |
| |
308 if (offset>start) { |
| |
309 // Pass in the line |
| |
310 DoAddText(false, |
| |
311 Substring(textstr, start, offset-start)); |
| |
312 } |
| |
313 |
| |
314 // Pass in a newline |
| |
315 DoAddText(true, mLineBreak); |
| |
316 |
| |
317 start = offset+1; |
| |
318 offset = textstr.FindCharInSet("\n\r", start); |
| |
319 } |
| |
320 |
| |
321 // Consume the last bit of the string if there's any left |
| |
322 if (start < length) { |
| |
323 if (start) { |
| |
324 DoAddText(false, Substring(textstr, start, length - start)); |
| |
325 } |
| |
326 else { |
| |
327 DoAddText(false, textstr); |
| |
328 } |
| |
329 } |
| |
330 |
| |
331 mOutputString = nullptr; |
| |
332 |
| |
333 return rv; |
| |
334 } |
| |
335 |
| |
336 NS_IMETHODIMP |
| |
337 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, |
| |
338 int32_t aStartOffset, |
| |
339 int32_t aEndOffset, |
| |
340 nsAString& aStr) |
| |
341 { |
| |
342 return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); |
| |
343 } |
| |
344 |
| |
345 NS_IMETHODIMP |
| |
346 nsPlainTextSerializer::AppendElementStart(Element* aElement, |
| |
347 Element* aOriginalElement, |
| |
348 nsAString& aStr) |
| |
349 { |
| |
350 NS_ENSURE_ARG(aElement); |
| |
351 |
| |
352 mElement = aElement; |
| |
353 |
| |
354 nsresult rv; |
| |
355 nsIAtom* id = GetIdForContent(mElement); |
| |
356 |
| |
357 bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
| |
358 |
| |
359 mOutputString = &aStr; |
| |
360 |
| |
361 if (isContainer) { |
| |
362 rv = DoOpenContainer(id); |
| |
363 } |
| |
364 else { |
| |
365 rv = DoAddLeaf(id); |
| |
366 } |
| |
367 |
| |
368 mElement = nullptr; |
| |
369 mOutputString = nullptr; |
| |
370 |
| |
371 if (id == nsGkAtoms::head) { |
| |
372 ++mHeadLevel; |
| |
373 } |
| |
374 |
| |
375 return rv; |
| |
376 } |
| |
377 |
| |
378 NS_IMETHODIMP |
| |
379 nsPlainTextSerializer::AppendElementEnd(Element* aElement, |
| |
380 nsAString& aStr) |
| |
381 { |
| |
382 NS_ENSURE_ARG(aElement); |
| |
383 |
| |
384 mElement = aElement; |
| |
385 |
| |
386 nsresult rv; |
| |
387 nsIAtom* id = GetIdForContent(mElement); |
| |
388 |
| |
389 bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
| |
390 |
| |
391 mOutputString = &aStr; |
| |
392 |
| |
393 rv = NS_OK; |
| |
394 if (isContainer) { |
| |
395 rv = DoCloseContainer(id); |
| |
396 } |
| |
397 |
| |
398 mElement = nullptr; |
| |
399 mOutputString = nullptr; |
| |
400 |
| |
401 if (id == nsGkAtoms::head) { |
| |
402 NS_ASSERTION(mHeadLevel != 0, |
| |
403 "mHeadLevel being decremented below 0"); |
| |
404 --mHeadLevel; |
| |
405 } |
| |
406 |
| |
407 return rv; |
| |
408 } |
| |
409 |
| |
410 NS_IMETHODIMP |
| |
411 nsPlainTextSerializer::Flush(nsAString& aStr) |
| |
412 { |
| |
413 mOutputString = &aStr; |
| |
414 FlushLine(); |
| |
415 mOutputString = nullptr; |
| |
416 return NS_OK; |
| |
417 } |
| |
418 |
| |
419 NS_IMETHODIMP |
| |
420 nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, |
| |
421 nsAString& aStr) |
| |
422 { |
| |
423 return NS_OK; |
| |
424 } |
| |
425 |
| |
426 nsresult |
| |
427 nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) |
| |
428 { |
| |
429 // Check if we need output current node as placeholder character and ignore |
| |
430 // child nodes. |
| |
431 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
| |
432 if (mIgnoredChildNodeLevel == 0) { |
| |
433 // Serialize current node as placeholder character |
| |
434 Write(NS_LITERAL_STRING("\xFFFC")); |
| |
435 } |
| |
436 // Ignore child nodes. |
| |
437 mIgnoredChildNodeLevel++; |
| |
438 return NS_OK; |
| |
439 } |
| |
440 |
| |
441 if (mFlags & nsIDocumentEncoder::OutputRaw) { |
| |
442 // Raw means raw. Don't even think about doing anything fancy |
| |
443 // here like indenting, adding line breaks or any other |
| |
444 // characters such as list item bullets, quote characters |
| |
445 // around <q>, etc. I mean it! Don't make me smack you! |
| |
446 |
| |
447 return NS_OK; |
| |
448 } |
| |
449 |
| |
450 if (mTagStackIndex < TagStackSize) { |
| |
451 mTagStack[mTagStackIndex++] = aTag; |
| |
452 } |
| |
453 |
| |
454 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
| |
455 return NS_OK; |
| |
456 } |
| |
457 |
| |
458 // Reset this so that <blockquote type=cite> doesn't affect the whitespace |
| |
459 // above random <pre>s below it. |
| |
460 mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && |
| |
461 aTag == nsGkAtoms::pre; |
| |
462 |
| |
463 bool isInCiteBlockquote = false; |
| |
464 |
| |
465 // XXX special-case <blockquote type=cite> so that we don't add additional |
| |
466 // newlines before the text. |
| |
467 if (aTag == nsGkAtoms::blockquote) { |
| |
468 nsAutoString value; |
| |
469 nsresult rv = GetAttributeValue(nsGkAtoms::type, value); |
| |
470 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); |
| |
471 } |
| |
472 |
| |
473 if (mLineBreakDue && !isInCiteBlockquote) |
| |
474 EnsureVerticalSpace(mFloatingLines); |
| |
475 |
| |
476 // Check if this tag's content that should not be output |
| |
477 if ((aTag == nsGkAtoms::noscript && |
| |
478 !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || |
| |
479 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && |
| |
480 !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { |
| |
481 // Ignore everything that follows the current tag in |
| |
482 // question until a matching end tag is encountered. |
| |
483 mIgnoreAboveIndex = mTagStackIndex - 1; |
| |
484 return NS_OK; |
| |
485 } |
| |
486 |
| |
487 if (aTag == nsGkAtoms::body) { |
| |
488 // Try to figure out here whether we have a |
| |
489 // preformatted style attribute. |
| |
490 // |
| |
491 // Trigger on the presence of a "pre-wrap" in the |
| |
492 // style attribute. That's a very simplistic way to do |
| |
493 // it, but better than nothing. |
| |
494 // Also set mWrapColumn to the value given there |
| |
495 // (which arguably we should only do if told to do so). |
| |
496 nsAutoString style; |
| |
497 int32_t whitespace; |
| |
498 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && |
| |
499 (kNotFound != (whitespace = style.Find("white-space:")))) { |
| |
500 |
| |
501 if (kNotFound != style.Find("pre-wrap", true, whitespace)) { |
| |
502 #ifdef DEBUG_preformatted |
| |
503 printf("Set mPreFormatted based on style pre-wrap\n"); |
| |
504 #endif |
| |
505 mPreFormatted = true; |
| |
506 int32_t widthOffset = style.Find("width:"); |
| |
507 if (widthOffset >= 0) { |
| |
508 // We have to search for the ch before the semicolon, |
| |
509 // not for the semicolon itself, because nsString::ToInteger() |
| |
510 // considers 'c' to be a valid numeric char (even if radix=10) |
| |
511 // but then gets confused if it sees it next to the number |
| |
512 // when the radix specified was 10, and returns an error code. |
| |
513 int32_t semiOffset = style.Find("ch", false, widthOffset+6); |
| |
514 int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 |
| |
515 : style.Length() - widthOffset); |
| |
516 nsAutoString widthstr; |
| |
517 style.Mid(widthstr, widthOffset+6, length); |
| |
518 nsresult err; |
| |
519 int32_t col = widthstr.ToInteger(&err); |
| |
520 |
| |
521 if (NS_SUCCEEDED(err)) { |
| |
522 mWrapColumn = (uint32_t)col; |
| |
523 #ifdef DEBUG_preformatted |
| |
524 printf("Set wrap column to %d based on style\n", mWrapColumn); |
| |
525 #endif |
| |
526 } |
| |
527 } |
| |
528 } |
| |
529 else if (kNotFound != style.Find("pre", true, whitespace)) { |
| |
530 #ifdef DEBUG_preformatted |
| |
531 printf("Set mPreFormatted based on style pre\n"); |
| |
532 #endif |
| |
533 mPreFormatted = true; |
| |
534 mWrapColumn = 0; |
| |
535 } |
| |
536 } |
| |
537 else { |
| |
538 /* See comment at end of function. */ |
| |
539 mInWhitespace = true; |
| |
540 mPreFormatted = false; |
| |
541 } |
| |
542 |
| |
543 return NS_OK; |
| |
544 } |
| |
545 |
| |
546 // Keep this in sync with DoCloseContainer! |
| |
547 if (!DoOutput()) { |
| |
548 return NS_OK; |
| |
549 } |
| |
550 |
| |
551 if (aTag == nsGkAtoms::p) |
| |
552 EnsureVerticalSpace(1); |
| |
553 else if (aTag == nsGkAtoms::pre) { |
| |
554 if (GetLastBool(mIsInCiteBlockquote)) |
| |
555 EnsureVerticalSpace(0); |
| |
556 else if (mHasWrittenCiteBlockquote) { |
| |
557 EnsureVerticalSpace(0); |
| |
558 mHasWrittenCiteBlockquote = false; |
| |
559 } |
| |
560 else |
| |
561 EnsureVerticalSpace(1); |
| |
562 } |
| |
563 else if (aTag == nsGkAtoms::tr) { |
| |
564 PushBool(mHasWrittenCellsForRow, false); |
| |
565 } |
| |
566 else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { |
| |
567 // We must make sure that the content of two table cells get a |
| |
568 // space between them. |
| |
569 |
| |
570 // To make the separation between cells most obvious and |
| |
571 // importable, we use a TAB. |
| |
572 if (GetLastBool(mHasWrittenCellsForRow)) { |
| |
573 // Bypass |Write| so that the TAB isn't compressed away. |
| |
574 AddToLine(MOZ_UTF16("\t"), 1); |
| |
575 mInWhitespace = true; |
| |
576 } |
| |
577 else if (mHasWrittenCellsForRow.IsEmpty()) { |
| |
578 // We don't always see a <tr> (nor a <table>) before the <td> if we're |
| |
579 // copying part of a table |
| |
580 PushBool(mHasWrittenCellsForRow, true); // will never be popped |
| |
581 } |
| |
582 else { |
| |
583 SetLastBool(mHasWrittenCellsForRow, true); |
| |
584 } |
| |
585 } |
| |
586 else if (aTag == nsGkAtoms::ul) { |
| |
587 // Indent here to support nested lists, which aren't included in li :-( |
| |
588 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
| |
589 // Must end the current line before we change indention |
| |
590 mIndent += kIndentSizeList; |
| |
591 mULCount++; |
| |
592 } |
| |
593 else if (aTag == nsGkAtoms::ol) { |
| |
594 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
| |
595 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
| |
596 // Must end the current line before we change indention |
| |
597 if (mOLStackIndex < OLStackSize) { |
| |
598 nsAutoString startAttr; |
| |
599 int32_t startVal = 1; |
| |
600 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { |
| |
601 nsresult rv = NS_OK; |
| |
602 startVal = startAttr.ToInteger(&rv); |
| |
603 if (NS_FAILED(rv)) |
| |
604 startVal = 1; |
| |
605 } |
| |
606 mOLStack[mOLStackIndex++] = startVal; |
| |
607 } |
| |
608 } else { |
| |
609 mOLStackIndex++; |
| |
610 } |
| |
611 mIndent += kIndentSizeList; // see ul |
| |
612 } |
| |
613 else if (aTag == nsGkAtoms::li && |
| |
614 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
| |
615 if (mTagStackIndex > 1 && IsInOL()) { |
| |
616 if (mOLStackIndex > 0) { |
| |
617 nsAutoString valueAttr; |
| |
618 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { |
| |
619 nsresult rv = NS_OK; |
| |
620 int32_t valueAttrVal = valueAttr.ToInteger(&rv); |
| |
621 if (NS_SUCCEEDED(rv)) |
| |
622 mOLStack[mOLStackIndex-1] = valueAttrVal; |
| |
623 } |
| |
624 // This is what nsBulletFrame does for OLs: |
| |
625 mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); |
| |
626 } |
| |
627 else { |
| |
628 mInIndentString.Append(char16_t('#')); |
| |
629 } |
| |
630 |
| |
631 mInIndentString.Append(char16_t('.')); |
| |
632 |
| |
633 } |
| |
634 else { |
| |
635 static char bulletCharArray[] = "*o+#"; |
| |
636 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; |
| |
637 char bulletChar = bulletCharArray[index % 4]; |
| |
638 mInIndentString.Append(char16_t(bulletChar)); |
| |
639 } |
| |
640 |
| |
641 mInIndentString.Append(char16_t(' ')); |
| |
642 } |
| |
643 else if (aTag == nsGkAtoms::dl) { |
| |
644 EnsureVerticalSpace(1); |
| |
645 } |
| |
646 else if (aTag == nsGkAtoms::dt) { |
| |
647 EnsureVerticalSpace(0); |
| |
648 } |
| |
649 else if (aTag == nsGkAtoms::dd) { |
| |
650 EnsureVerticalSpace(0); |
| |
651 mIndent += kIndentSizeDD; |
| |
652 } |
| |
653 else if (aTag == nsGkAtoms::span) { |
| |
654 ++mSpanLevel; |
| |
655 } |
| |
656 else if (aTag == nsGkAtoms::blockquote) { |
| |
657 // Push |
| |
658 PushBool(mIsInCiteBlockquote, isInCiteBlockquote); |
| |
659 if (isInCiteBlockquote) { |
| |
660 EnsureVerticalSpace(0); |
| |
661 mCiteQuoteLevel++; |
| |
662 } |
| |
663 else { |
| |
664 EnsureVerticalSpace(1); |
| |
665 mIndent += kTabSize; // Check for some maximum value? |
| |
666 } |
| |
667 } |
| |
668 else if (aTag == nsGkAtoms::q) { |
| |
669 Write(NS_LITERAL_STRING("\"")); |
| |
670 } |
| |
671 |
| |
672 // Else make sure we'll separate block level tags, |
| |
673 // even if we're about to leave, before doing any other formatting. |
| |
674 else if (nsContentUtils::IsHTMLBlock(aTag)) { |
| |
675 EnsureVerticalSpace(0); |
| |
676 } |
| |
677 |
| |
678 ////////////////////////////////////////////////////////////// |
| |
679 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
| |
680 return NS_OK; |
| |
681 } |
| |
682 ////////////////////////////////////////////////////////////// |
| |
683 // The rest of this routine is formatted output stuff, |
| |
684 // which we should skip if we're not formatted: |
| |
685 ////////////////////////////////////////////////////////////// |
| |
686 |
| |
687 // Push on stack |
| |
688 bool currentNodeIsConverted = IsCurrentNodeConverted(); |
| |
689 |
| |
690 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
| |
691 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
| |
692 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) |
| |
693 { |
| |
694 EnsureVerticalSpace(2); |
| |
695 if (mHeaderStrategy == 2) { // numbered |
| |
696 mIndent += kIndentSizeHeaders; |
| |
697 // Caching |
| |
698 int32_t level = HeaderLevel(aTag); |
| |
699 // Increase counter for current level |
| |
700 mHeaderCounter[level]++; |
| |
701 // Reset all lower levels |
| |
702 int32_t i; |
| |
703 |
| |
704 for (i = level + 1; i <= 6; i++) { |
| |
705 mHeaderCounter[i] = 0; |
| |
706 } |
| |
707 |
| |
708 // Construct numbers |
| |
709 nsAutoString leadup; |
| |
710 for (i = 1; i <= level; i++) { |
| |
711 leadup.AppendInt(mHeaderCounter[i]); |
| |
712 leadup.Append(char16_t('.')); |
| |
713 } |
| |
714 leadup.Append(char16_t(' ')); |
| |
715 Write(leadup); |
| |
716 } |
| |
717 else if (mHeaderStrategy == 1) { // indent increasingly |
| |
718 mIndent += kIndentSizeHeaders; |
| |
719 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
| |
720 // for h(x), run x-1 times |
| |
721 mIndent += kIndentIncrementHeaders; |
| |
722 } |
| |
723 } |
| |
724 } |
| |
725 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { |
| |
726 nsAutoString url; |
| |
727 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) |
| |
728 && !url.IsEmpty()) { |
| |
729 mURL = url; |
| |
730 } |
| |
731 } |
| |
732 else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { |
| |
733 Write(NS_LITERAL_STRING("^")); |
| |
734 } |
| |
735 else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { |
| |
736 Write(NS_LITERAL_STRING("_")); |
| |
737 } |
| |
738 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
| |
739 Write(NS_LITERAL_STRING("|")); |
| |
740 } |
| |
741 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
| |
742 && mStructs && !currentNodeIsConverted) { |
| |
743 Write(NS_LITERAL_STRING("*")); |
| |
744 } |
| |
745 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
| |
746 && mStructs && !currentNodeIsConverted) { |
| |
747 Write(NS_LITERAL_STRING("/")); |
| |
748 } |
| |
749 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
| |
750 Write(NS_LITERAL_STRING("_")); |
| |
751 } |
| |
752 |
| |
753 /* Container elements are always block elements, so we shouldn't |
| |
754 output any whitespace immediately after the container tag even if |
| |
755 there's extra whitespace there because the HTML is pretty-printed |
| |
756 or something. To ensure that happens, tell the serializer we're |
| |
757 already in whitespace so it won't output more. */ |
| |
758 mInWhitespace = true; |
| |
759 |
| |
760 return NS_OK; |
| |
761 } |
| |
762 |
| |
763 nsresult |
| |
764 nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) |
| |
765 { |
| |
766 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
| |
767 mIgnoredChildNodeLevel--; |
| |
768 return NS_OK; |
| |
769 } |
| |
770 |
| |
771 if (mFlags & nsIDocumentEncoder::OutputRaw) { |
| |
772 // Raw means raw. Don't even think about doing anything fancy |
| |
773 // here like indenting, adding line breaks or any other |
| |
774 // characters such as list item bullets, quote characters |
| |
775 // around <q>, etc. I mean it! Don't make me smack you! |
| |
776 |
| |
777 return NS_OK; |
| |
778 } |
| |
779 |
| |
780 if (mTagStackIndex > 0) { |
| |
781 --mTagStackIndex; |
| |
782 } |
| |
783 |
| |
784 if (mTagStackIndex >= mIgnoreAboveIndex) { |
| |
785 if (mTagStackIndex == mIgnoreAboveIndex) { |
| |
786 // We're dealing with the close tag whose matching |
| |
787 // open tag had set the mIgnoreAboveIndex value. |
| |
788 // Reset mIgnoreAboveIndex before discarding this tag. |
| |
789 mIgnoreAboveIndex = (uint32_t)kNotFound; |
| |
790 } |
| |
791 return NS_OK; |
| |
792 } |
| |
793 |
| |
794 // End current line if we're ending a block level tag |
| |
795 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { |
| |
796 // We want the output to end with a new line, |
| |
797 // but in preformatted areas like text fields, |
| |
798 // we can't emit newlines that weren't there. |
| |
799 // So add the newline only in the case of formatted output. |
| |
800 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
| |
801 EnsureVerticalSpace(0); |
| |
802 } |
| |
803 else { |
| |
804 FlushLine(); |
| |
805 } |
| |
806 // We won't want to do anything with these in formatted mode either, |
| |
807 // so just return now: |
| |
808 return NS_OK; |
| |
809 } |
| |
810 |
| |
811 // Keep this in sync with DoOpenContainer! |
| |
812 if (!DoOutput()) { |
| |
813 return NS_OK; |
| |
814 } |
| |
815 |
| |
816 if (aTag == nsGkAtoms::tr) { |
| |
817 PopBool(mHasWrittenCellsForRow); |
| |
818 // Should always end a line, but get no more whitespace |
| |
819 if (mFloatingLines < 0) |
| |
820 mFloatingLines = 0; |
| |
821 mLineBreakDue = true; |
| |
822 } |
| |
823 else if (((aTag == nsGkAtoms::li) || |
| |
824 (aTag == nsGkAtoms::dt)) && |
| |
825 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
| |
826 // Items that should always end a line, but get no more whitespace |
| |
827 if (mFloatingLines < 0) |
| |
828 mFloatingLines = 0; |
| |
829 mLineBreakDue = true; |
| |
830 } |
| |
831 else if (aTag == nsGkAtoms::pre) { |
| |
832 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; |
| |
833 mLineBreakDue = true; |
| |
834 } |
| |
835 else if (aTag == nsGkAtoms::ul) { |
| |
836 FlushLine(); |
| |
837 mIndent -= kIndentSizeList; |
| |
838 if (--mULCount + mOLStackIndex == 0) { |
| |
839 mFloatingLines = 1; |
| |
840 mLineBreakDue = true; |
| |
841 } |
| |
842 } |
| |
843 else if (aTag == nsGkAtoms::ol) { |
| |
844 FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. |
| |
845 mIndent -= kIndentSizeList; |
| |
846 NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); |
| |
847 mOLStackIndex--; |
| |
848 if (mULCount + mOLStackIndex == 0) { |
| |
849 mFloatingLines = 1; |
| |
850 mLineBreakDue = true; |
| |
851 } |
| |
852 } |
| |
853 else if (aTag == nsGkAtoms::dl) { |
| |
854 mFloatingLines = 1; |
| |
855 mLineBreakDue = true; |
| |
856 } |
| |
857 else if (aTag == nsGkAtoms::dd) { |
| |
858 FlushLine(); |
| |
859 mIndent -= kIndentSizeDD; |
| |
860 } |
| |
861 else if (aTag == nsGkAtoms::span) { |
| |
862 NS_ASSERTION(mSpanLevel, "Span level will be negative!"); |
| |
863 --mSpanLevel; |
| |
864 } |
| |
865 else if (aTag == nsGkAtoms::div) { |
| |
866 if (mFloatingLines < 0) |
| |
867 mFloatingLines = 0; |
| |
868 mLineBreakDue = true; |
| |
869 } |
| |
870 else if (aTag == nsGkAtoms::blockquote) { |
| |
871 FlushLine(); // Is this needed? |
| |
872 |
| |
873 // Pop |
| |
874 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); |
| |
875 |
| |
876 if (isInCiteBlockquote) { |
| |
877 NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); |
| |
878 mCiteQuoteLevel--; |
| |
879 mFloatingLines = 0; |
| |
880 mHasWrittenCiteBlockquote = true; |
| |
881 } |
| |
882 else { |
| |
883 mIndent -= kTabSize; |
| |
884 mFloatingLines = 1; |
| |
885 } |
| |
886 mLineBreakDue = true; |
| |
887 } |
| |
888 else if (aTag == nsGkAtoms::q) { |
| |
889 Write(NS_LITERAL_STRING("\"")); |
| |
890 } |
| |
891 else if (nsContentUtils::IsHTMLBlock(aTag) |
| |
892 && aTag != nsGkAtoms::script) { |
| |
893 // All other blocks get 1 vertical space after them |
| |
894 // in formatted mode, otherwise 0. |
| |
895 // This is hard. Sometimes 0 is a better number, but |
| |
896 // how to know? |
| |
897 if (mFlags & nsIDocumentEncoder::OutputFormatted) |
| |
898 EnsureVerticalSpace(1); |
| |
899 else { |
| |
900 if (mFloatingLines < 0) |
| |
901 mFloatingLines = 0; |
| |
902 mLineBreakDue = true; |
| |
903 } |
| |
904 } |
| |
905 |
| |
906 ////////////////////////////////////////////////////////////// |
| |
907 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
| |
908 return NS_OK; |
| |
909 } |
| |
910 ////////////////////////////////////////////////////////////// |
| |
911 // The rest of this routine is formatted output stuff, |
| |
912 // which we should skip if we're not formatted: |
| |
913 ////////////////////////////////////////////////////////////// |
| |
914 |
| |
915 // Pop the currentConverted stack |
| |
916 bool currentNodeIsConverted = IsCurrentNodeConverted(); |
| |
917 |
| |
918 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
| |
919 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
| |
920 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { |
| |
921 |
| |
922 if (mHeaderStrategy) { /*numbered or indent increasingly*/ |
| |
923 mIndent -= kIndentSizeHeaders; |
| |
924 } |
| |
925 if (mHeaderStrategy == 1 /*indent increasingly*/ ) { |
| |
926 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
| |
927 // for h(x), run x-1 times |
| |
928 mIndent -= kIndentIncrementHeaders; |
| |
929 } |
| |
930 } |
| |
931 EnsureVerticalSpace(1); |
| |
932 } |
| |
933 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { |
| |
934 nsAutoString temp; |
| |
935 temp.AssignLiteral(" <"); |
| |
936 temp += mURL; |
| |
937 temp.Append(char16_t('>')); |
| |
938 Write(temp); |
| |
939 mURL.Truncate(); |
| |
940 } |
| |
941 else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) |
| |
942 && mStructs && !currentNodeIsConverted) { |
| |
943 Write(kSpace); |
| |
944 } |
| |
945 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
| |
946 Write(NS_LITERAL_STRING("|")); |
| |
947 } |
| |
948 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
| |
949 && mStructs && !currentNodeIsConverted) { |
| |
950 Write(NS_LITERAL_STRING("*")); |
| |
951 } |
| |
952 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
| |
953 && mStructs && !currentNodeIsConverted) { |
| |
954 Write(NS_LITERAL_STRING("/")); |
| |
955 } |
| |
956 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
| |
957 Write(NS_LITERAL_STRING("_")); |
| |
958 } |
| |
959 |
| |
960 return NS_OK; |
| |
961 } |
| |
962 |
| |
963 bool |
| |
964 nsPlainTextSerializer::MustSuppressLeaf() |
| |
965 { |
| |
966 if (mIgnoredChildNodeLevel > 0) { |
| |
967 return true; |
| |
968 } |
| |
969 |
| |
970 if ((mTagStackIndex > 1 && |
| |
971 mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || |
| |
972 (mTagStackIndex > 0 && |
| |
973 mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { |
| |
974 // Don't output the contents of SELECT elements; |
| |
975 // Might be nice, eventually, to output just the selected element. |
| |
976 // Read more in bug 31994. |
| |
977 return true; |
| |
978 } |
| |
979 |
| |
980 if (mTagStackIndex > 0 && |
| |
981 (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || |
| |
982 mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { |
| |
983 // Don't output the contents of <script> or <style> tags; |
| |
984 return true; |
| |
985 } |
| |
986 |
| |
987 return false; |
| |
988 } |
| |
989 |
| |
990 void |
| |
991 nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText) |
| |
992 { |
| |
993 // If we don't want any output, just return |
| |
994 if (!DoOutput()) { |
| |
995 return; |
| |
996 } |
| |
997 |
| |
998 if (!aIsLineBreak) { |
| |
999 // Make sure to reset this, since it's no longer true. |
| |
1000 mHasWrittenCiteBlockquote = false; |
| |
1001 } |
| |
1002 |
| |
1003 if (mLineBreakDue) |
| |
1004 EnsureVerticalSpace(mFloatingLines); |
| |
1005 |
| |
1006 if (MustSuppressLeaf()) { |
| |
1007 return; |
| |
1008 } |
| |
1009 |
| |
1010 if (aIsLineBreak) { |
| |
1011 // The only times we want to pass along whitespace from the original |
| |
1012 // html source are if we're forced into preformatted mode via flags, |
| |
1013 // or if we're prettyprinting and we're inside a <pre>. |
| |
1014 // Otherwise, either we're collapsing to minimal text, or we're |
| |
1015 // prettyprinting to mimic the html format, and in neither case |
| |
1016 // does the formatting of the html source help us. |
| |
1017 if ((mFlags & nsIDocumentEncoder::OutputPreformatted) || |
| |
1018 (mPreFormatted && !mWrapColumn) || |
| |
1019 IsInPre()) { |
| |
1020 EnsureVerticalSpace(mEmptyLines+1); |
| |
1021 } |
| |
1022 else if (!mInWhitespace) { |
| |
1023 Write(kSpace); |
| |
1024 mInWhitespace = true; |
| |
1025 } |
| |
1026 return; |
| |
1027 } |
| |
1028 |
| |
1029 /* Check, if we are in a link (symbolized with mURL containing the URL) |
| |
1030 and the text is equal to the URL. In that case we don't want to output |
| |
1031 the URL twice so we scrap the text in mURL. */ |
| |
1032 if (!mURL.IsEmpty() && mURL.Equals(aText)) { |
| |
1033 mURL.Truncate(); |
| |
1034 } |
| |
1035 Write(aText); |
| |
1036 } |
| |
1037 |
| |
1038 nsresult |
| |
1039 nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag) |
| |
1040 { |
| |
1041 // If we don't want any output, just return |
| |
1042 if (!DoOutput()) { |
| |
1043 return NS_OK; |
| |
1044 } |
| |
1045 |
| |
1046 if (mLineBreakDue) |
| |
1047 EnsureVerticalSpace(mFloatingLines); |
| |
1048 |
| |
1049 if (MustSuppressLeaf()) { |
| |
1050 return NS_OK; |
| |
1051 } |
| |
1052 |
| |
1053 if (aTag == nsGkAtoms::br) { |
| |
1054 // Another egregious editor workaround, see bug 38194: |
| |
1055 // ignore the bogus br tags that the editor sticks here and there. |
| |
1056 nsAutoString tagAttr; |
| |
1057 if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) |
| |
1058 || !tagAttr.EqualsLiteral("_moz")) { |
| |
1059 EnsureVerticalSpace(mEmptyLines+1); |
| |
1060 } |
| |
1061 } |
| |
1062 else if (aTag == nsGkAtoms::hr && |
| |
1063 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
| |
1064 EnsureVerticalSpace(0); |
| |
1065 |
| |
1066 // Make a line of dashes as wide as the wrap width |
| |
1067 // XXX honoring percentage would be nice |
| |
1068 nsAutoString line; |
| |
1069 uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25); |
| |
1070 while (line.Length() < width) { |
| |
1071 line.Append(char16_t('-')); |
| |
1072 } |
| |
1073 Write(line); |
| |
1074 |
| |
1075 EnsureVerticalSpace(0); |
| |
1076 } |
| |
1077 else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) { |
| |
1078 Write(NS_LITERAL_STRING("\xFFFC")); |
| |
1079 } |
| |
1080 else if (aTag == nsGkAtoms::img) { |
| |
1081 /* Output (in decreasing order of preference) |
| |
1082 alt, title or nothing */ |
| |
1083 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG> |
| |
1084 nsAutoString imageDescription; |
| |
1085 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, |
| |
1086 imageDescription))) { |
| |
1087 // If the alt attribute has an empty value (|alt=""|), output nothing |
| |
1088 } |
| |
1089 else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title, |
| |
1090 imageDescription)) |
| |
1091 && !imageDescription.IsEmpty()) { |
| |
1092 imageDescription = NS_LITERAL_STRING(" [") + |
| |
1093 imageDescription + |
| |
1094 NS_LITERAL_STRING("] "); |
| |
1095 } |
| |
1096 |
| |
1097 Write(imageDescription); |
| |
1098 } |
| |
1099 |
| |
1100 return NS_OK; |
| |
1101 } |
| |
1102 |
| |
1103 /** |
| |
1104 * Adds as many newline as necessary to get |noOfRows| empty lines |
| |
1105 * |
| |
1106 * noOfRows = -1 : Being in the middle of some line of text |
| |
1107 * noOfRows = 0 : Being at the start of a line |
| |
1108 * noOfRows = n>0 : Having n empty lines before the current line. |
| |
1109 */ |
| |
1110 void |
| |
1111 nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) |
| |
1112 { |
| |
1113 // If we have something in the indent we probably want to output |
| |
1114 // it and it's not included in the count for empty lines so we don't |
| |
1115 // realize that we should start a new line. |
| |
1116 if (noOfRows >= 0 && !mInIndentString.IsEmpty()) { |
| |
1117 EndLine(false); |
| |
1118 mInWhitespace = true; |
| |
1119 } |
| |
1120 |
| |
1121 while(mEmptyLines < noOfRows) { |
| |
1122 EndLine(false); |
| |
1123 mInWhitespace = true; |
| |
1124 } |
| |
1125 mLineBreakDue = false; |
| |
1126 mFloatingLines = -1; |
| |
1127 } |
| |
1128 |
| |
1129 /** |
| |
1130 * This empties the current line cache without adding a NEWLINE. |
| |
1131 * Should not be used if line wrapping is of importance since |
| |
1132 * this function destroys the cache information. |
| |
1133 * |
| |
1134 * It will also write indentation and quotes if we believe us to be |
| |
1135 * at the start of the line. |
| |
1136 */ |
| |
1137 void |
| |
1138 nsPlainTextSerializer::FlushLine() |
| |
1139 { |
| |
1140 if (!mCurrentLine.IsEmpty()) { |
| |
1141 if (mAtFirstColumn) { |
| |
1142 OutputQuotesAndIndent(); // XXX: Should we always do this? Bug? |
| |
1143 } |
| |
1144 |
| |
1145 Output(mCurrentLine); |
| |
1146 mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty(); |
| |
1147 mCurrentLine.Truncate(); |
| |
1148 mCurrentLineWidth = 0; |
| |
1149 } |
| |
1150 } |
| |
1151 |
| |
1152 /** |
| |
1153 * Prints the text to output to our current output device (the string mOutputString). |
| |
1154 * The only logic here is to replace non breaking spaces with a normal space since |
| |
1155 * most (all?) receivers of the result won't understand the nbsp and even be |
| |
1156 * confused by it. |
| |
1157 */ |
| |
1158 void |
| |
1159 nsPlainTextSerializer::Output(nsString& aString) |
| |
1160 { |
| |
1161 if (!aString.IsEmpty()) { |
| |
1162 mStartedOutput = true; |
| |
1163 } |
| |
1164 |
| |
1165 if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) { |
| |
1166 // First, replace all nbsp characters with spaces, |
| |
1167 // which the unicode encoder won't do for us. |
| |
1168 aString.ReplaceChar(kNBSP, kSPACE); |
| |
1169 } |
| |
1170 mOutputString->Append(aString); |
| |
1171 } |
| |
1172 |
| |
1173 static bool |
| |
1174 IsSpaceStuffable(const char16_t *s) |
| |
1175 { |
| |
1176 if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP || |
| |
1177 nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0) |
| |
1178 return true; |
| |
1179 else |
| |
1180 return false; |
| |
1181 } |
| |
1182 |
| |
1183 /** |
| |
1184 * This function adds a piece of text to the current stored line. If we are |
| |
1185 * wrapping text and the stored line will become too long, a suitable |
| |
1186 * location to wrap will be found and the line that's complete will be |
| |
1187 * output. |
| |
1188 */ |
| |
1189 void |
| |
1190 nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, |
| |
1191 int32_t aLineFragmentLength) |
| |
1192 { |
| |
1193 uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent; |
| |
1194 |
| |
1195 if (mLineBreakDue) |
| |
1196 EnsureVerticalSpace(mFloatingLines); |
| |
1197 |
| |
1198 int32_t linelength = mCurrentLine.Length(); |
| |
1199 if (0 == linelength) { |
| |
1200 if (0 == aLineFragmentLength) { |
| |
1201 // Nothing at all. Are you kidding me? |
| |
1202 return; |
| |
1203 } |
| |
1204 |
| |
1205 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
| |
1206 if (IsSpaceStuffable(aLineFragment) |
| |
1207 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
| |
1208 ) |
| |
1209 { |
| |
1210 // Space stuffing a la RFC 2646 (format=flowed). |
| |
1211 mCurrentLine.Append(char16_t(' ')); |
| |
1212 |
| |
1213 if (MayWrap()) { |
| |
1214 mCurrentLineWidth += GetUnicharWidth(' '); |
| |
1215 #ifdef DEBUG_wrapping |
| |
1216 NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(), |
| |
1217 mCurrentLine.Length()) == |
| |
1218 (int32_t)mCurrentLineWidth, |
| |
1219 "mCurrentLineWidth and reality out of sync!"); |
| |
1220 #endif |
| |
1221 } |
| |
1222 } |
| |
1223 } |
| |
1224 mEmptyLines=-1; |
| |
1225 } |
| |
1226 |
| |
1227 mCurrentLine.Append(aLineFragment, aLineFragmentLength); |
| |
1228 if (MayWrap()) { |
| |
1229 mCurrentLineWidth += GetUnicharStringWidth(aLineFragment, |
| |
1230 aLineFragmentLength); |
| |
1231 #ifdef DEBUG_wrapping |
| |
1232 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
| |
1233 mCurrentLine.Length()) == |
| |
1234 (int32_t)mCurrentLineWidth, |
| |
1235 "mCurrentLineWidth and reality out of sync!"); |
| |
1236 #endif |
| |
1237 } |
| |
1238 |
| |
1239 linelength = mCurrentLine.Length(); |
| |
1240 |
| |
1241 // Wrap? |
| |
1242 if (MayWrap()) |
| |
1243 { |
| |
1244 #ifdef DEBUG_wrapping |
| |
1245 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
| |
1246 mCurrentLine.Length()) == |
| |
1247 (int32_t)mCurrentLineWidth, |
| |
1248 "mCurrentLineWidth and reality out of sync!"); |
| |
1249 #endif |
| |
1250 // Yes, wrap! |
| |
1251 // The "+4" is to avoid wrap lines that only would be a couple |
| |
1252 // of letters too long. We give this bonus only if the |
| |
1253 // wrapcolumn is more than 20. |
| |
1254 uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0; |
| |
1255 |
| |
1256 // XXX: Should calculate prefixwidth with GetUnicharStringWidth |
| |
1257 while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) { |
| |
1258 // We go from the end removing one letter at a time until |
| |
1259 // we have a reasonable width |
| |
1260 int32_t goodSpace = mCurrentLine.Length(); |
| |
1261 uint32_t width = mCurrentLineWidth; |
| |
1262 while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) { |
| |
1263 goodSpace--; |
| |
1264 width -= GetUnicharWidth(mCurrentLine[goodSpace]); |
| |
1265 } |
| |
1266 |
| |
1267 goodSpace++; |
| |
1268 |
| |
1269 if (mLineBreaker) { |
| |
1270 goodSpace = mLineBreaker->Prev(mCurrentLine.get(), |
| |
1271 mCurrentLine.Length(), goodSpace); |
| |
1272 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT && |
| |
1273 nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) { |
| |
1274 --goodSpace; // adjust the position since line breaker returns a position next to space |
| |
1275 } |
| |
1276 } |
| |
1277 // fallback if the line breaker is unavailable or failed |
| |
1278 if (!mLineBreaker) { |
| |
1279 goodSpace = mWrapColumn-prefixwidth; |
| |
1280 while (goodSpace >= 0 && |
| |
1281 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
| |
1282 goodSpace--; |
| |
1283 } |
| |
1284 } |
| |
1285 |
| |
1286 nsAutoString restOfLine; |
| |
1287 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) { |
| |
1288 // If we don't found a good place to break, accept long line and |
| |
1289 // try to find another place to break |
| |
1290 goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1; |
| |
1291 if (mLineBreaker) { |
| |
1292 if ((uint32_t)goodSpace < mCurrentLine.Length()) |
| |
1293 goodSpace = mLineBreaker->Next(mCurrentLine.get(), |
| |
1294 mCurrentLine.Length(), goodSpace); |
| |
1295 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) |
| |
1296 goodSpace = mCurrentLine.Length(); |
| |
1297 } |
| |
1298 // fallback if the line breaker is unavailable or failed |
| |
1299 if (!mLineBreaker) { |
| |
1300 goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth; |
| |
1301 while (goodSpace < linelength && |
| |
1302 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
| |
1303 goodSpace++; |
| |
1304 } |
| |
1305 } |
| |
1306 } |
| |
1307 |
| |
1308 if ((goodSpace < linelength) && (goodSpace > 0)) { |
| |
1309 // Found a place to break |
| |
1310 |
| |
1311 // -1 (trim a char at the break position) |
| |
1312 // only if the line break was a space. |
| |
1313 if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
| |
1314 mCurrentLine.Right(restOfLine, linelength-goodSpace-1); |
| |
1315 } |
| |
1316 else { |
| |
1317 mCurrentLine.Right(restOfLine, linelength-goodSpace); |
| |
1318 } |
| |
1319 // if breaker was U+0020, it has to consider for delsp=yes support |
| |
1320 bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' '; |
| |
1321 mCurrentLine.Truncate(goodSpace); |
| |
1322 EndLine(true, breakBySpace); |
| |
1323 mCurrentLine.Truncate(); |
| |
1324 // Space stuff new line? |
| |
1325 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
| |
1326 if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) |
| |
1327 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
| |
1328 ) |
| |
1329 { |
| |
1330 // Space stuffing a la RFC 2646 (format=flowed). |
| |
1331 mCurrentLine.Append(char16_t(' ')); |
| |
1332 //XXX doesn't seem to work correctly for ' ' |
| |
1333 } |
| |
1334 } |
| |
1335 mCurrentLine.Append(restOfLine); |
| |
1336 mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(), |
| |
1337 mCurrentLine.Length()); |
| |
1338 linelength = mCurrentLine.Length(); |
| |
1339 mEmptyLines = -1; |
| |
1340 } |
| |
1341 else { |
| |
1342 // Nothing to do. Hopefully we get more data later |
| |
1343 // to use for a place to break line |
| |
1344 break; |
| |
1345 } |
| |
1346 } |
| |
1347 } |
| |
1348 else { |
| |
1349 // No wrapping. |
| |
1350 } |
| |
1351 } |
| |
1352 |
| |
1353 /** |
| |
1354 * Outputs the contents of mCurrentLine, and resets line specific |
| |
1355 * variables. Also adds an indentation and prefix if there is |
| |
1356 * one specified. Strips ending spaces from the line if it isn't |
| |
1357 * preformatted. |
| |
1358 */ |
| |
1359 void |
| |
1360 nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) |
| |
1361 { |
| |
1362 uint32_t currentlinelength = mCurrentLine.Length(); |
| |
1363 |
| |
1364 if (aSoftlinebreak && 0 == currentlinelength) { |
| |
1365 // No meaning |
| |
1366 return; |
| |
1367 } |
| |
1368 |
| |
1369 /* In non-preformatted mode, remove spaces from the end of the line for |
| |
1370 * format=flowed compatibility. Don't do this for these special cases: |
| |
1371 * "-- ", the signature separator (RFC 2646) shouldn't be touched and |
| |
1372 * "- -- ", the OpenPGP dash-escaped signature separator in inline |
| |
1373 * signed messages according to the OpenPGP standard (RFC 2440). |
| |
1374 */ |
| |
1375 if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) && |
| |
1376 !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) && |
| |
1377 (aSoftlinebreak || |
| |
1378 !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) { |
| |
1379 // Remove spaces from the end of the line. |
| |
1380 while(currentlinelength > 0 && |
| |
1381 mCurrentLine[currentlinelength-1] == ' ') { |
| |
1382 --currentlinelength; |
| |
1383 } |
| |
1384 mCurrentLine.SetLength(currentlinelength); |
| |
1385 } |
| |
1386 |
| |
1387 if (aSoftlinebreak && |
| |
1388 (mFlags & nsIDocumentEncoder::OutputFormatFlowed) && |
| |
1389 (mIndent == 0)) { |
| |
1390 // Add the soft part of the soft linebreak (RFC 2646 4.1) |
| |
1391 // We only do this when there is no indentation since format=flowed |
| |
1392 // lines and indentation doesn't work well together. |
| |
1393 |
| |
1394 // If breaker character is ASCII space with RFC 3676 support (delsp=yes), |
| |
1395 // add twice space. |
| |
1396 if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace) |
| |
1397 mCurrentLine.Append(NS_LITERAL_STRING(" ")); |
| |
1398 else |
| |
1399 mCurrentLine.Append(char16_t(' ')); |
| |
1400 } |
| |
1401 |
| |
1402 if (aSoftlinebreak) { |
| |
1403 mEmptyLines=0; |
| |
1404 } |
| |
1405 else { |
| |
1406 // Hard break |
| |
1407 if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) { |
| |
1408 mEmptyLines=-1; |
| |
1409 } |
| |
1410 |
| |
1411 mEmptyLines++; |
| |
1412 } |
| |
1413 |
| |
1414 if (mAtFirstColumn) { |
| |
1415 // If we don't have anything "real" to output we have to |
| |
1416 // make sure the indent doesn't end in a space since that |
| |
1417 // would trick a format=flowed-aware receiver. |
| |
1418 bool stripTrailingSpaces = mCurrentLine.IsEmpty(); |
| |
1419 OutputQuotesAndIndent(stripTrailingSpaces); |
| |
1420 } |
| |
1421 |
| |
1422 mCurrentLine.Append(mLineBreak); |
| |
1423 Output(mCurrentLine); |
| |
1424 mCurrentLine.Truncate(); |
| |
1425 mCurrentLineWidth = 0; |
| |
1426 mAtFirstColumn=true; |
| |
1427 mInWhitespace=true; |
| |
1428 mLineBreakDue = false; |
| |
1429 mFloatingLines = -1; |
| |
1430 } |
| |
1431 |
| |
1432 |
| |
1433 /** |
| |
1434 * Outputs the calculated and stored indent and text in the indentation. That is |
| |
1435 * quote chars and numbers for numbered lists and such. It will also reset any |
| |
1436 * stored text to put in the indentation after using it. |
| |
1437 */ |
| |
1438 void |
| |
1439 nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */) |
| |
1440 { |
| |
1441 nsAutoString stringToOutput; |
| |
1442 |
| |
1443 // Put the mail quote "> " chars in, if appropriate: |
| |
1444 if (mCiteQuoteLevel > 0) { |
| |
1445 nsAutoString quotes; |
| |
1446 for(int i=0; i < mCiteQuoteLevel; i++) { |
| |
1447 quotes.Append(char16_t('>')); |
| |
1448 } |
| |
1449 if (!mCurrentLine.IsEmpty()) { |
| |
1450 /* Better don't output a space here, if the line is empty, |
| |
1451 in case a receiving f=f-aware UA thinks, this were a flowed line, |
| |
1452 which it isn't - it's just empty. |
| |
1453 (Flowed lines may be joined with the following one, |
| |
1454 so the empty line may be lost completely.) */ |
| |
1455 quotes.Append(char16_t(' ')); |
| |
1456 } |
| |
1457 stringToOutput = quotes; |
| |
1458 mAtFirstColumn = false; |
| |
1459 } |
| |
1460 |
| |
1461 // Indent if necessary |
| |
1462 int32_t indentwidth = mIndent - mInIndentString.Length(); |
| |
1463 if (indentwidth > 0 |
| |
1464 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) |
| |
1465 // Don't make empty lines look flowed |
| |
1466 ) { |
| |
1467 nsAutoString spaces; |
| |
1468 for (int i=0; i < indentwidth; ++i) |
| |
1469 spaces.Append(char16_t(' ')); |
| |
1470 stringToOutput += spaces; |
| |
1471 mAtFirstColumn = false; |
| |
1472 } |
| |
1473 |
| |
1474 if (!mInIndentString.IsEmpty()) { |
| |
1475 stringToOutput += mInIndentString; |
| |
1476 mAtFirstColumn = false; |
| |
1477 mInIndentString.Truncate(); |
| |
1478 } |
| |
1479 |
| |
1480 if (stripTrailingSpaces) { |
| |
1481 int32_t lineLength = stringToOutput.Length(); |
| |
1482 while(lineLength > 0 && |
| |
1483 ' ' == stringToOutput[lineLength-1]) { |
| |
1484 --lineLength; |
| |
1485 } |
| |
1486 stringToOutput.SetLength(lineLength); |
| |
1487 } |
| |
1488 |
| |
1489 if (!stringToOutput.IsEmpty()) { |
| |
1490 Output(stringToOutput); |
| |
1491 } |
| |
1492 |
| |
1493 } |
| |
1494 |
| |
1495 /** |
| |
1496 * Write a string. This is the highlevel function to use to get text output. |
| |
1497 * By using AddToLine, Output, EndLine and other functions it handles quotation, |
| |
1498 * line wrapping, indentation, whitespace compression and other things. |
| |
1499 */ |
| |
1500 void |
| |
1501 nsPlainTextSerializer::Write(const nsAString& aStr) |
| |
1502 { |
| |
1503 // XXX Copy necessary to use nsString methods and gain |
| |
1504 // access to underlying buffer |
| |
1505 nsAutoString str(aStr); |
| |
1506 |
| |
1507 #ifdef DEBUG_wrapping |
| |
1508 printf("Write(%s): wrap col = %d\n", |
| |
1509 NS_ConvertUTF16toUTF8(str).get(), mWrapColumn); |
| |
1510 #endif |
| |
1511 |
| |
1512 int32_t bol = 0; |
| |
1513 int32_t newline; |
| |
1514 |
| |
1515 int32_t totLen = str.Length(); |
| |
1516 |
| |
1517 // If the string is empty, do nothing: |
| |
1518 if (totLen <= 0) return; |
| |
1519 |
| |
1520 // For Flowed text change nbsp-ses to spaces at end of lines to allow them |
| |
1521 // to be cut off along with usual spaces if required. (bug #125928) |
| |
1522 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
| |
1523 for (int32_t i = totLen-1; i >= 0; i--) { |
| |
1524 char16_t c = str[i]; |
| |
1525 if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) |
| |
1526 continue; |
| |
1527 if (kNBSP == c) |
| |
1528 str.Replace(i, 1, ' '); |
| |
1529 else |
| |
1530 break; |
| |
1531 } |
| |
1532 } |
| |
1533 |
| |
1534 // We have two major codepaths here. One that does preformatted text and one |
| |
1535 // that does normal formatted text. The one for preformatted text calls |
| |
1536 // Output directly while the other code path goes through AddToLine. |
| |
1537 if ((mPreFormatted && !mWrapColumn) || IsInPre() |
| |
1538 || ((mSpanLevel > 0 || mDontWrapAnyQuotes) |
| |
1539 && mEmptyLines >= 0 && str.First() == char16_t('>'))) { |
| |
1540 // No intelligent wrapping. |
| |
1541 |
| |
1542 // This mustn't be mixed with intelligent wrapping without clearing |
| |
1543 // the mCurrentLine buffer before!!! |
| |
1544 NS_ASSERTION(mCurrentLine.IsEmpty(), |
| |
1545 "Mixed wrapping data and nonwrapping data on the same line"); |
| |
1546 if (!mCurrentLine.IsEmpty()) { |
| |
1547 FlushLine(); |
| |
1548 } |
| |
1549 |
| |
1550 // Put the mail quote "> " chars in, if appropriate. |
| |
1551 // Have to put it in before every line. |
| |
1552 while(bol<totLen) { |
| |
1553 bool outputQuotes = mAtFirstColumn; |
| |
1554 bool atFirstColumn = mAtFirstColumn; |
| |
1555 bool outputLineBreak = false; |
| |
1556 bool spacesOnly = true; |
| |
1557 |
| |
1558 // Find one of '\n' or '\r' using iterators since nsAString |
| |
1559 // doesn't have the old FindCharInSet function. |
| |
1560 nsAString::const_iterator iter; str.BeginReading(iter); |
| |
1561 nsAString::const_iterator done_searching; str.EndReading(done_searching); |
| |
1562 iter.advance(bol); |
| |
1563 int32_t new_newline = bol; |
| |
1564 newline = kNotFound; |
| |
1565 while(iter != done_searching) { |
| |
1566 if ('\n' == *iter || '\r' == *iter) { |
| |
1567 newline = new_newline; |
| |
1568 break; |
| |
1569 } |
| |
1570 if (' ' != *iter) |
| |
1571 spacesOnly = false; |
| |
1572 ++new_newline; |
| |
1573 ++iter; |
| |
1574 } |
| |
1575 |
| |
1576 // Done searching |
| |
1577 nsAutoString stringpart; |
| |
1578 if (newline == kNotFound) { |
| |
1579 // No new lines. |
| |
1580 stringpart.Assign(Substring(str, bol, totLen - bol)); |
| |
1581 if (!stringpart.IsEmpty()) { |
| |
1582 char16_t lastchar = stringpart[stringpart.Length()-1]; |
| |
1583 if ((lastchar == '\t') || (lastchar == ' ') || |
| |
1584 (lastchar == '\r') ||(lastchar == '\n')) { |
| |
1585 mInWhitespace = true; |
| |
1586 } |
| |
1587 else { |
| |
1588 mInWhitespace = false; |
| |
1589 } |
| |
1590 } |
| |
1591 mEmptyLines=-1; |
| |
1592 atFirstColumn = mAtFirstColumn && (totLen-bol)==0; |
| |
1593 bol = totLen; |
| |
1594 } |
| |
1595 else { |
| |
1596 // There is a newline |
| |
1597 stringpart.Assign(Substring(str, bol, newline-bol)); |
| |
1598 mInWhitespace = true; |
| |
1599 outputLineBreak = true; |
| |
1600 mEmptyLines=0; |
| |
1601 atFirstColumn = true; |
| |
1602 bol = newline+1; |
| |
1603 if ('\r' == *iter && bol < totLen && '\n' == *++iter) { |
| |
1604 // There was a CRLF in the input. This used to be illegal and |
| |
1605 // stripped by the parser. Apparently not anymore. Let's skip |
| |
1606 // over the LF. |
| |
1607 bol++; |
| |
1608 } |
| |
1609 } |
| |
1610 |
| |
1611 mCurrentLine.AssignLiteral(""); |
| |
1612 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
| |
1613 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928 |
| |
1614 !stringpart.EqualsLiteral("-- ") && |
| |
1615 !stringpart.EqualsLiteral("- -- ")) |
| |
1616 stringpart.Trim(" ", false, true, true); |
| |
1617 if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>') |
| |
1618 mCurrentLine.Append(char16_t(' ')); |
| |
1619 } |
| |
1620 mCurrentLine.Append(stringpart); |
| |
1621 |
| |
1622 if (outputQuotes) { |
| |
1623 // Note: this call messes with mAtFirstColumn |
| |
1624 OutputQuotesAndIndent(); |
| |
1625 } |
| |
1626 |
| |
1627 Output(mCurrentLine); |
| |
1628 if (outputLineBreak) { |
| |
1629 Output(mLineBreak); |
| |
1630 } |
| |
1631 mAtFirstColumn = atFirstColumn; |
| |
1632 } |
| |
1633 |
| |
1634 // Reset mCurrentLine. |
| |
1635 mCurrentLine.Truncate(); |
| |
1636 |
| |
1637 #ifdef DEBUG_wrapping |
| |
1638 printf("No wrapping: newline is %d, totLen is %d\n", |
| |
1639 newline, totLen); |
| |
1640 #endif |
| |
1641 return; |
| |
1642 } |
| |
1643 |
| |
1644 // Intelligent handling of text |
| |
1645 // If needed, strip out all "end of lines" |
| |
1646 // and multiple whitespace between words |
| |
1647 int32_t nextpos; |
| |
1648 const char16_t * offsetIntoBuffer = nullptr; |
| |
1649 |
| |
1650 while (bol < totLen) { // Loop over lines |
| |
1651 // Find a place where we may have to do whitespace compression |
| |
1652 nextpos = str.FindCharInSet(" \t\n\r", bol); |
| |
1653 #ifdef DEBUG_wrapping |
| |
1654 nsAutoString remaining; |
| |
1655 str.Right(remaining, totLen - bol); |
| |
1656 foo = ToNewCString(remaining); |
| |
1657 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n", |
| |
1658 // bol, nextpos, totLen, foo); |
| |
1659 nsMemory::Free(foo); |
| |
1660 #endif |
| |
1661 |
| |
1662 if (nextpos == kNotFound) { |
| |
1663 // The rest of the string |
| |
1664 offsetIntoBuffer = str.get() + bol; |
| |
1665 AddToLine(offsetIntoBuffer, totLen-bol); |
| |
1666 bol=totLen; |
| |
1667 mInWhitespace=false; |
| |
1668 } |
| |
1669 else { |
| |
1670 // There's still whitespace left in the string |
| |
1671 if (nextpos != 0 && (nextpos + 1) < totLen) { |
| |
1672 offsetIntoBuffer = str.get() + nextpos; |
| |
1673 // skip '\n' if it is between CJ chars |
| |
1674 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) { |
| |
1675 offsetIntoBuffer = str.get() + bol; |
| |
1676 AddToLine(offsetIntoBuffer, nextpos-bol); |
| |
1677 bol = nextpos + 1; |
| |
1678 continue; |
| |
1679 } |
| |
1680 } |
| |
1681 // If we're already in whitespace and not preformatted, just skip it: |
| |
1682 if (mInWhitespace && (nextpos == bol) && !mPreFormatted && |
| |
1683 !(mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
| |
1684 // Skip whitespace |
| |
1685 bol++; |
| |
1686 continue; |
| |
1687 } |
| |
1688 |
| |
1689 if (nextpos == bol) { |
| |
1690 // Note that we are in whitespace. |
| |
1691 mInWhitespace = true; |
| |
1692 offsetIntoBuffer = str.get() + nextpos; |
| |
1693 AddToLine(offsetIntoBuffer, 1); |
| |
1694 bol++; |
| |
1695 continue; |
| |
1696 } |
| |
1697 |
| |
1698 mInWhitespace = true; |
| |
1699 |
| |
1700 offsetIntoBuffer = str.get() + bol; |
| |
1701 if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
| |
1702 // Preserve the real whitespace character |
| |
1703 nextpos++; |
| |
1704 AddToLine(offsetIntoBuffer, nextpos-bol); |
| |
1705 bol = nextpos; |
| |
1706 } |
| |
1707 else { |
| |
1708 // Replace the whitespace with a space |
| |
1709 AddToLine(offsetIntoBuffer, nextpos-bol); |
| |
1710 AddToLine(kSpace.get(),1); |
| |
1711 bol = nextpos + 1; // Let's eat the whitespace |
| |
1712 } |
| |
1713 } |
| |
1714 } // Continue looping over the string |
| |
1715 } |
| |
1716 |
| |
1717 |
| |
1718 /** |
| |
1719 * Gets the value of an attribute in a string. If the function returns |
| |
1720 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified. |
| |
1721 */ |
| |
1722 nsresult |
| |
1723 nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName, |
| |
1724 nsString& aValueRet) |
| |
1725 { |
| |
1726 if (mElement) { |
| |
1727 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) { |
| |
1728 return NS_OK; |
| |
1729 } |
| |
1730 } |
| |
1731 |
| |
1732 return NS_ERROR_NOT_AVAILABLE; |
| |
1733 } |
| |
1734 |
| |
1735 /** |
| |
1736 * Returns true, if the element was inserted by Moz' TXT->HTML converter. |
| |
1737 * In this case, we should ignore it. |
| |
1738 */ |
| |
1739 bool |
| |
1740 nsPlainTextSerializer::IsCurrentNodeConverted() |
| |
1741 { |
| |
1742 nsAutoString value; |
| |
1743 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value); |
| |
1744 return (NS_SUCCEEDED(rv) && |
| |
1745 (value.EqualsIgnoreCase("moz-txt", 7) || |
| |
1746 value.EqualsIgnoreCase("\"moz-txt", 8))); |
| |
1747 } |
| |
1748 |
| |
1749 |
| |
1750 // static |
| |
1751 nsIAtom* |
| |
1752 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) |
| |
1753 { |
| |
1754 if (!aContent->IsHTML()) { |
| |
1755 return nullptr; |
| |
1756 } |
| |
1757 |
| |
1758 nsIAtom* localName = aContent->Tag(); |
| |
1759 return localName->IsStaticAtom() ? localName : nullptr; |
| |
1760 } |
| |
1761 |
| |
1762 /** |
| |
1763 * Returns true if we currently are inside a <pre>. The check is done |
| |
1764 * by traversing the tag stack looking for <pre> until we hit a block |
| |
1765 * level tag which is assumed to override any <pre>:s below it in |
| |
1766 * the stack. To do this correctly to a 100% would require access |
| |
1767 * to style which we don't support in this converter. |
| |
1768 */ |
| |
1769 bool |
| |
1770 nsPlainTextSerializer::IsInPre() |
| |
1771 { |
| |
1772 int32_t i = mTagStackIndex; |
| |
1773 while(i > 0) { |
| |
1774 if (mTagStack[i - 1] == nsGkAtoms::pre) |
| |
1775 return true; |
| |
1776 if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) { |
| |
1777 // We assume that every other block overrides a <pre> |
| |
1778 return false; |
| |
1779 } |
| |
1780 --i; |
| |
1781 } |
| |
1782 |
| |
1783 // Not a <pre> in the whole stack |
| |
1784 return false; |
| |
1785 } |
| |
1786 |
| |
1787 /** |
| |
1788 * This method is required only to identify LI's inside OL. |
| |
1789 * Returns TRUE if we are inside an OL tag and FALSE otherwise. |
| |
1790 */ |
| |
1791 bool |
| |
1792 nsPlainTextSerializer::IsInOL() |
| |
1793 { |
| |
1794 int32_t i = mTagStackIndex; |
| |
1795 while(--i >= 0) { |
| |
1796 if (mTagStack[i] == nsGkAtoms::ol) |
| |
1797 return true; |
| |
1798 if (mTagStack[i] == nsGkAtoms::ul) { |
| |
1799 // If a UL is reached first, LI belongs the UL nested in OL. |
| |
1800 return false; |
| |
1801 } |
| |
1802 } |
| |
1803 // We may reach here for orphan LI's. |
| |
1804 return false; |
| |
1805 } |
| |
1806 |
| |
1807 /* |
| |
1808 @return 0 = no header, 1 = h1, ..., 6 = h6 |
| |
1809 */ |
| |
1810 int32_t HeaderLevel(nsIAtom* aTag) |
| |
1811 { |
| |
1812 if (aTag == nsGkAtoms::h1) { |
| |
1813 return 1; |
| |
1814 } |
| |
1815 if (aTag == nsGkAtoms::h2) { |
| |
1816 return 2; |
| |
1817 } |
| |
1818 if (aTag == nsGkAtoms::h3) { |
| |
1819 return 3; |
| |
1820 } |
| |
1821 if (aTag == nsGkAtoms::h4) { |
| |
1822 return 4; |
| |
1823 } |
| |
1824 if (aTag == nsGkAtoms::h5) { |
| |
1825 return 5; |
| |
1826 } |
| |
1827 if (aTag == nsGkAtoms::h6) { |
| |
1828 return 6; |
| |
1829 } |
| |
1830 return 0; |
| |
1831 } |
| |
1832 |
| |
1833 |
| |
1834 /* |
| |
1835 * This is an implementation of GetUnicharWidth() and |
| |
1836 * GetUnicharStringWidth() as defined in |
| |
1837 * "The Single UNIX Specification, Version 2, The Open Group, 1997" |
| |
1838 * <http://www.UNIX-systems.org/online.html> |
| |
1839 * |
| |
1840 * Markus Kuhn -- 2000-02-08 -- public domain |
| |
1841 * |
| |
1842 * Minor alterations to fit Mozilla's data types by Daniel Bratell |
| |
1843 */ |
| |
1844 |
| |
1845 /* These functions define the column width of an ISO 10646 character |
| |
1846 * as follows: |
| |
1847 * |
| |
1848 * - The null character (U+0000) has a column width of 0. |
| |
1849 * |
| |
1850 * - Other C0/C1 control characters and DEL will lead to a return |
| |
1851 * value of -1. |
| |
1852 * |
| |
1853 * - Non-spacing and enclosing combining characters (general |
| |
1854 * category code Mn or Me in the Unicode database) have a |
| |
1855 * column width of 0. |
| |
1856 * |
| |
1857 * - Spacing characters in the East Asian Wide (W) or East Asian |
| |
1858 * FullWidth (F) category as defined in Unicode Technical |
| |
1859 * Report #11 have a column width of 2. |
| |
1860 * |
| |
1861 * - All remaining characters (including all printable |
| |
1862 * ISO 8859-1 and WGL4 characters, Unicode control characters, |
| |
1863 * etc.) have a column width of 1. |
| |
1864 * |
| |
1865 * This implementation assumes that wchar_t characters are encoded |
| |
1866 * in ISO 10646. |
| |
1867 */ |
| |
1868 |
| |
1869 int32_t GetUnicharWidth(char16_t ucs) |
| |
1870 { |
| |
1871 /* sorted list of non-overlapping intervals of non-spacing characters */ |
| |
1872 static const struct interval { |
| |
1873 uint16_t first; |
| |
1874 uint16_t last; |
| |
1875 } combining[] = { |
| |
1876 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, |
| |
1877 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, |
| |
1878 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, |
| |
1879 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, |
| |
1880 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, |
| |
1881 { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, |
| |
1882 { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, |
| |
1883 { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, |
| |
1884 { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, |
| |
1885 { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, |
| |
1886 { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, |
| |
1887 { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, |
| |
1888 { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, |
| |
1889 { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, |
| |
1890 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, |
| |
1891 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, |
| |
1892 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, |
| |
1893 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, |
| |
1894 { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, |
| |
1895 { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, |
| |
1896 { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, |
| |
1897 { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, |
| |
1898 { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, |
| |
1899 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, |
| |
1900 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, |
| |
1901 { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, |
| |
1902 { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, |
| |
1903 { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, |
| |
1904 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, |
| |
1905 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, |
| |
1906 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } |
| |
1907 }; |
| |
1908 int32_t min = 0; |
| |
1909 int32_t max = sizeof(combining) / sizeof(struct interval) - 1; |
| |
1910 int32_t mid; |
| |
1911 |
| |
1912 /* test for 8-bit control characters */ |
| |
1913 if (ucs == 0) |
| |
1914 return 0; |
| |
1915 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) |
| |
1916 return -1; |
| |
1917 |
| |
1918 /* first quick check for Latin-1 etc. characters */ |
| |
1919 if (ucs < combining[0].first) |
| |
1920 return 1; |
| |
1921 |
| |
1922 /* binary search in table of non-spacing characters */ |
| |
1923 while (max >= min) { |
| |
1924 mid = (min + max) / 2; |
| |
1925 if (combining[mid].last < ucs) |
| |
1926 min = mid + 1; |
| |
1927 else if (combining[mid].first > ucs) |
| |
1928 max = mid - 1; |
| |
1929 else if (combining[mid].first <= ucs && combining[mid].last >= ucs) |
| |
1930 return 0; |
| |
1931 } |
| |
1932 |
| |
1933 /* if we arrive here, ucs is not a combining or C0/C1 control character */ |
| |
1934 |
| |
1935 /* fast test for majority of non-wide scripts */ |
| |
1936 if (ucs < 0x1100) |
| |
1937 return 1; |
| |
1938 |
| |
1939 return 1 + |
| |
1940 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ |
| |
1941 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && |
| |
1942 ucs != 0x303f) || /* CJK ... Yi */ |
| |
1943 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ |
| |
1944 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ |
| |
1945 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ |
| |
1946 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ |
| |
1947 (ucs >= 0xffe0 && ucs <= 0xffe6)); |
| |
1948 } |
| |
1949 |
| |
1950 |
| |
1951 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) |
| |
1952 { |
| |
1953 int32_t w, width = 0; |
| |
1954 |
| |
1955 for (;*pwcs && n-- > 0; pwcs++) |
| |
1956 if ((w = GetUnicharWidth(*pwcs)) < 0) |
| |
1957 ++width; // Taking 1 as the width of non-printable character, for bug# 94475. |
| |
1958 else |
| |
1959 width += w; |
| |
1960 |
| |
1961 return width; |
| |
1962 } |
| |
1963 |