|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * nsIContentSerializer implementation that can be used with an |
|
8 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way |
|
9 * (eg for copy/paste as plaintext). |
|
10 */ |
|
11 |
|
12 #include "nsPlainTextSerializer.h" |
|
13 #include "nsLWBrkCIID.h" |
|
14 #include "nsIServiceManager.h" |
|
15 #include "nsGkAtoms.h" |
|
16 #include "nsNameSpaceManager.h" |
|
17 #include "nsTextFragment.h" |
|
18 #include "nsContentUtils.h" |
|
19 #include "nsReadableUtils.h" |
|
20 #include "nsUnicharUtils.h" |
|
21 #include "nsCRT.h" |
|
22 #include "mozilla/dom/Element.h" |
|
23 #include "mozilla/Preferences.h" |
|
24 |
|
25 using namespace mozilla; |
|
26 using namespace mozilla::dom; |
|
27 |
|
28 #define PREF_STRUCTS "converter.html2txt.structs" |
|
29 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" |
|
30 |
|
31 static const int32_t kTabSize=4; |
|
32 static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if |
|
33 mHeaderStrategy = 1 or = 2. |
|
34 Indention of other headers |
|
35 is derived from that. |
|
36 XXX center h1? */ |
|
37 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, |
|
38 indent h(x+1) this many |
|
39 columns more than h(x) */ |
|
40 static const int32_t kIndentSizeList = kTabSize; |
|
41 // Indention of non-first lines of ul and ol |
|
42 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd> |
|
43 static const char16_t kNBSP = 160; |
|
44 static const char16_t kSPACE = ' '; |
|
45 |
|
46 static int32_t HeaderLevel(nsIAtom* aTag); |
|
47 static int32_t GetUnicharWidth(char16_t ucs); |
|
48 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); |
|
49 |
|
50 // Someday may want to make this non-const: |
|
51 static const uint32_t TagStackSize = 500; |
|
52 static const uint32_t OLStackSize = 100; |
|
53 |
|
54 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) |
|
55 { |
|
56 nsPlainTextSerializer* it = new nsPlainTextSerializer(); |
|
57 if (!it) { |
|
58 return NS_ERROR_OUT_OF_MEMORY; |
|
59 } |
|
60 |
|
61 return CallQueryInterface(it, aSerializer); |
|
62 } |
|
63 |
|
64 nsPlainTextSerializer::nsPlainTextSerializer() |
|
65 : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" |
|
66 { |
|
67 |
|
68 mOutputString = nullptr; |
|
69 mHeadLevel = 0; |
|
70 mAtFirstColumn = true; |
|
71 mIndent = 0; |
|
72 mCiteQuoteLevel = 0; |
|
73 mStructs = true; // will be read from prefs later |
|
74 mHeaderStrategy = 1 /*indent increasingly*/; // ditto |
|
75 mDontWrapAnyQuotes = false; // ditto |
|
76 mHasWrittenCiteBlockquote = false; |
|
77 mSpanLevel = 0; |
|
78 for (int32_t i = 0; i <= 6; i++) { |
|
79 mHeaderCounter[i] = 0; |
|
80 } |
|
81 |
|
82 // Line breaker |
|
83 mWrapColumn = 72; // XXX magic number, we expect someone to reset this |
|
84 mCurrentLineWidth = 0; |
|
85 |
|
86 // Flow |
|
87 mEmptyLines = 1; // The start of the document is an "empty line" in itself, |
|
88 mInWhitespace = false; |
|
89 mPreFormatted = false; |
|
90 mStartedOutput = false; |
|
91 |
|
92 // initialize the tag stack to zero: |
|
93 // The stack only ever contains pointers to static atoms, so they don't |
|
94 // need refcounting. |
|
95 mTagStack = new nsIAtom*[TagStackSize]; |
|
96 mTagStackIndex = 0; |
|
97 mIgnoreAboveIndex = (uint32_t)kNotFound; |
|
98 |
|
99 // initialize the OL stack, where numbers for ordered lists are kept |
|
100 mOLStack = new int32_t[OLStackSize]; |
|
101 mOLStackIndex = 0; |
|
102 |
|
103 mULCount = 0; |
|
104 |
|
105 mIgnoredChildNodeLevel = 0; |
|
106 } |
|
107 |
|
108 nsPlainTextSerializer::~nsPlainTextSerializer() |
|
109 { |
|
110 delete[] mTagStack; |
|
111 delete[] mOLStack; |
|
112 NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); |
|
113 } |
|
114 |
|
115 NS_IMPL_ISUPPORTS(nsPlainTextSerializer, |
|
116 nsIContentSerializer) |
|
117 |
|
118 |
|
119 NS_IMETHODIMP |
|
120 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, |
|
121 const char* aCharSet, bool aIsCopying, |
|
122 bool aIsWholeDocument) |
|
123 { |
|
124 #ifdef DEBUG |
|
125 // Check if the major control flags are set correctly. |
|
126 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
|
127 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, |
|
128 "If you want format=flowed, you must combine it with " |
|
129 "nsIDocumentEncoder::OutputFormatted"); |
|
130 } |
|
131 |
|
132 if (aFlags & nsIDocumentEncoder::OutputFormatted) { |
|
133 NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), |
|
134 "Can't do formatted and preformatted output at the same time!"); |
|
135 } |
|
136 #endif |
|
137 |
|
138 mFlags = aFlags; |
|
139 mWrapColumn = aWrapColumn; |
|
140 |
|
141 // Only create a linebreaker if we will handle wrapping. |
|
142 if (MayWrap()) { |
|
143 mLineBreaker = nsContentUtils::LineBreaker(); |
|
144 } |
|
145 |
|
146 // Set the line break character: |
|
147 if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) |
|
148 && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { |
|
149 // Windows |
|
150 mLineBreak.AssignLiteral("\r\n"); |
|
151 } |
|
152 else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { |
|
153 // Mac |
|
154 mLineBreak.Assign(char16_t('\r')); |
|
155 } |
|
156 else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { |
|
157 // Unix/DOM |
|
158 mLineBreak.Assign(char16_t('\n')); |
|
159 } |
|
160 else { |
|
161 // Platform/default |
|
162 mLineBreak.AssignLiteral(NS_LINEBREAK); |
|
163 } |
|
164 |
|
165 mLineBreakDue = false; |
|
166 mFloatingLines = -1; |
|
167 |
|
168 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
|
169 // Get some prefs that controls how we do formatted output |
|
170 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); |
|
171 |
|
172 mHeaderStrategy = |
|
173 Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); |
|
174 |
|
175 // DontWrapAnyQuotes is set according to whether plaintext mail |
|
176 // is wrapping to window width -- see bug 134439. |
|
177 // We'll only want this if we're wrapping and formatted. |
|
178 if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { |
|
179 mDontWrapAnyQuotes = |
|
180 Preferences::GetBool("mail.compose.wrap_to_window_width", |
|
181 mDontWrapAnyQuotes); |
|
182 } |
|
183 } |
|
184 |
|
185 // XXX We should let the caller pass this in. |
|
186 if (Preferences::GetBool("browser.frames.enabled")) { |
|
187 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; |
|
188 } |
|
189 else { |
|
190 mFlags |= nsIDocumentEncoder::OutputNoFramesContent; |
|
191 } |
|
192 |
|
193 return NS_OK; |
|
194 } |
|
195 |
|
196 bool |
|
197 nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) |
|
198 { |
|
199 uint32_t size = aStack.Length(); |
|
200 if (size == 0) { |
|
201 return false; |
|
202 } |
|
203 return aStack.ElementAt(size-1); |
|
204 } |
|
205 |
|
206 void |
|
207 nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) |
|
208 { |
|
209 uint32_t size = aStack.Length(); |
|
210 if (size > 0) { |
|
211 aStack.ElementAt(size-1) = aValue; |
|
212 } |
|
213 else { |
|
214 NS_ERROR("There is no \"Last\" value"); |
|
215 } |
|
216 } |
|
217 |
|
218 void |
|
219 nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) |
|
220 { |
|
221 aStack.AppendElement(bool(aValue)); |
|
222 } |
|
223 |
|
224 bool |
|
225 nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) |
|
226 { |
|
227 bool returnValue = false; |
|
228 uint32_t size = aStack.Length(); |
|
229 if (size > 0) { |
|
230 returnValue = aStack.ElementAt(size-1); |
|
231 aStack.RemoveElementAt(size-1); |
|
232 } |
|
233 return returnValue; |
|
234 } |
|
235 |
|
236 bool |
|
237 nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) |
|
238 { |
|
239 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, |
|
240 // non-textual container element should be serialized as placeholder |
|
241 // character and its child nodes should be ignored. See bug 895239. |
|
242 if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { |
|
243 return false; |
|
244 } |
|
245 |
|
246 return |
|
247 (aTag == nsGkAtoms::audio) || |
|
248 (aTag == nsGkAtoms::canvas) || |
|
249 (aTag == nsGkAtoms::iframe) || |
|
250 (aTag == nsGkAtoms::meter) || |
|
251 (aTag == nsGkAtoms::progress) || |
|
252 (aTag == nsGkAtoms::object) || |
|
253 (aTag == nsGkAtoms::svg) || |
|
254 (aTag == nsGkAtoms::video); |
|
255 } |
|
256 |
|
257 NS_IMETHODIMP |
|
258 nsPlainTextSerializer::AppendText(nsIContent* aText, |
|
259 int32_t aStartOffset, |
|
260 int32_t aEndOffset, |
|
261 nsAString& aStr) |
|
262 { |
|
263 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
|
264 return NS_OK; |
|
265 } |
|
266 |
|
267 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); |
|
268 if ( aStartOffset < 0 ) |
|
269 return NS_ERROR_INVALID_ARG; |
|
270 |
|
271 NS_ENSURE_ARG(aText); |
|
272 |
|
273 nsresult rv = NS_OK; |
|
274 |
|
275 nsIContent* content = aText; |
|
276 const nsTextFragment* frag; |
|
277 if (!content || !(frag = content->GetText())) { |
|
278 return NS_ERROR_FAILURE; |
|
279 } |
|
280 |
|
281 int32_t fragLength = frag->GetLength(); |
|
282 int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); |
|
283 NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); |
|
284 |
|
285 int32_t length = endoffset - aStartOffset; |
|
286 if (length <= 0) { |
|
287 return NS_OK; |
|
288 } |
|
289 |
|
290 nsAutoString textstr; |
|
291 if (frag->Is2b()) { |
|
292 textstr.Assign(frag->Get2b() + aStartOffset, length); |
|
293 } |
|
294 else { |
|
295 // AssignASCII is for 7-bit character only, so don't use it |
|
296 const char *data = frag->Get1b(); |
|
297 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); |
|
298 } |
|
299 |
|
300 mOutputString = &aStr; |
|
301 |
|
302 // We have to split the string across newlines |
|
303 // to match parser behavior |
|
304 int32_t start = 0; |
|
305 int32_t offset = textstr.FindCharInSet("\n\r"); |
|
306 while (offset != kNotFound) { |
|
307 |
|
308 if (offset>start) { |
|
309 // Pass in the line |
|
310 DoAddText(false, |
|
311 Substring(textstr, start, offset-start)); |
|
312 } |
|
313 |
|
314 // Pass in a newline |
|
315 DoAddText(true, mLineBreak); |
|
316 |
|
317 start = offset+1; |
|
318 offset = textstr.FindCharInSet("\n\r", start); |
|
319 } |
|
320 |
|
321 // Consume the last bit of the string if there's any left |
|
322 if (start < length) { |
|
323 if (start) { |
|
324 DoAddText(false, Substring(textstr, start, length - start)); |
|
325 } |
|
326 else { |
|
327 DoAddText(false, textstr); |
|
328 } |
|
329 } |
|
330 |
|
331 mOutputString = nullptr; |
|
332 |
|
333 return rv; |
|
334 } |
|
335 |
|
336 NS_IMETHODIMP |
|
337 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, |
|
338 int32_t aStartOffset, |
|
339 int32_t aEndOffset, |
|
340 nsAString& aStr) |
|
341 { |
|
342 return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); |
|
343 } |
|
344 |
|
345 NS_IMETHODIMP |
|
346 nsPlainTextSerializer::AppendElementStart(Element* aElement, |
|
347 Element* aOriginalElement, |
|
348 nsAString& aStr) |
|
349 { |
|
350 NS_ENSURE_ARG(aElement); |
|
351 |
|
352 mElement = aElement; |
|
353 |
|
354 nsresult rv; |
|
355 nsIAtom* id = GetIdForContent(mElement); |
|
356 |
|
357 bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
|
358 |
|
359 mOutputString = &aStr; |
|
360 |
|
361 if (isContainer) { |
|
362 rv = DoOpenContainer(id); |
|
363 } |
|
364 else { |
|
365 rv = DoAddLeaf(id); |
|
366 } |
|
367 |
|
368 mElement = nullptr; |
|
369 mOutputString = nullptr; |
|
370 |
|
371 if (id == nsGkAtoms::head) { |
|
372 ++mHeadLevel; |
|
373 } |
|
374 |
|
375 return rv; |
|
376 } |
|
377 |
|
378 NS_IMETHODIMP |
|
379 nsPlainTextSerializer::AppendElementEnd(Element* aElement, |
|
380 nsAString& aStr) |
|
381 { |
|
382 NS_ENSURE_ARG(aElement); |
|
383 |
|
384 mElement = aElement; |
|
385 |
|
386 nsresult rv; |
|
387 nsIAtom* id = GetIdForContent(mElement); |
|
388 |
|
389 bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
|
390 |
|
391 mOutputString = &aStr; |
|
392 |
|
393 rv = NS_OK; |
|
394 if (isContainer) { |
|
395 rv = DoCloseContainer(id); |
|
396 } |
|
397 |
|
398 mElement = nullptr; |
|
399 mOutputString = nullptr; |
|
400 |
|
401 if (id == nsGkAtoms::head) { |
|
402 NS_ASSERTION(mHeadLevel != 0, |
|
403 "mHeadLevel being decremented below 0"); |
|
404 --mHeadLevel; |
|
405 } |
|
406 |
|
407 return rv; |
|
408 } |
|
409 |
|
410 NS_IMETHODIMP |
|
411 nsPlainTextSerializer::Flush(nsAString& aStr) |
|
412 { |
|
413 mOutputString = &aStr; |
|
414 FlushLine(); |
|
415 mOutputString = nullptr; |
|
416 return NS_OK; |
|
417 } |
|
418 |
|
419 NS_IMETHODIMP |
|
420 nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, |
|
421 nsAString& aStr) |
|
422 { |
|
423 return NS_OK; |
|
424 } |
|
425 |
|
426 nsresult |
|
427 nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) |
|
428 { |
|
429 // Check if we need output current node as placeholder character and ignore |
|
430 // child nodes. |
|
431 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
|
432 if (mIgnoredChildNodeLevel == 0) { |
|
433 // Serialize current node as placeholder character |
|
434 Write(NS_LITERAL_STRING("\xFFFC")); |
|
435 } |
|
436 // Ignore child nodes. |
|
437 mIgnoredChildNodeLevel++; |
|
438 return NS_OK; |
|
439 } |
|
440 |
|
441 if (mFlags & nsIDocumentEncoder::OutputRaw) { |
|
442 // Raw means raw. Don't even think about doing anything fancy |
|
443 // here like indenting, adding line breaks or any other |
|
444 // characters such as list item bullets, quote characters |
|
445 // around <q>, etc. I mean it! Don't make me smack you! |
|
446 |
|
447 return NS_OK; |
|
448 } |
|
449 |
|
450 if (mTagStackIndex < TagStackSize) { |
|
451 mTagStack[mTagStackIndex++] = aTag; |
|
452 } |
|
453 |
|
454 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
|
455 return NS_OK; |
|
456 } |
|
457 |
|
458 // Reset this so that <blockquote type=cite> doesn't affect the whitespace |
|
459 // above random <pre>s below it. |
|
460 mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && |
|
461 aTag == nsGkAtoms::pre; |
|
462 |
|
463 bool isInCiteBlockquote = false; |
|
464 |
|
465 // XXX special-case <blockquote type=cite> so that we don't add additional |
|
466 // newlines before the text. |
|
467 if (aTag == nsGkAtoms::blockquote) { |
|
468 nsAutoString value; |
|
469 nsresult rv = GetAttributeValue(nsGkAtoms::type, value); |
|
470 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); |
|
471 } |
|
472 |
|
473 if (mLineBreakDue && !isInCiteBlockquote) |
|
474 EnsureVerticalSpace(mFloatingLines); |
|
475 |
|
476 // Check if this tag's content that should not be output |
|
477 if ((aTag == nsGkAtoms::noscript && |
|
478 !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || |
|
479 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && |
|
480 !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { |
|
481 // Ignore everything that follows the current tag in |
|
482 // question until a matching end tag is encountered. |
|
483 mIgnoreAboveIndex = mTagStackIndex - 1; |
|
484 return NS_OK; |
|
485 } |
|
486 |
|
487 if (aTag == nsGkAtoms::body) { |
|
488 // Try to figure out here whether we have a |
|
489 // preformatted style attribute. |
|
490 // |
|
491 // Trigger on the presence of a "pre-wrap" in the |
|
492 // style attribute. That's a very simplistic way to do |
|
493 // it, but better than nothing. |
|
494 // Also set mWrapColumn to the value given there |
|
495 // (which arguably we should only do if told to do so). |
|
496 nsAutoString style; |
|
497 int32_t whitespace; |
|
498 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && |
|
499 (kNotFound != (whitespace = style.Find("white-space:")))) { |
|
500 |
|
501 if (kNotFound != style.Find("pre-wrap", true, whitespace)) { |
|
502 #ifdef DEBUG_preformatted |
|
503 printf("Set mPreFormatted based on style pre-wrap\n"); |
|
504 #endif |
|
505 mPreFormatted = true; |
|
506 int32_t widthOffset = style.Find("width:"); |
|
507 if (widthOffset >= 0) { |
|
508 // We have to search for the ch before the semicolon, |
|
509 // not for the semicolon itself, because nsString::ToInteger() |
|
510 // considers 'c' to be a valid numeric char (even if radix=10) |
|
511 // but then gets confused if it sees it next to the number |
|
512 // when the radix specified was 10, and returns an error code. |
|
513 int32_t semiOffset = style.Find("ch", false, widthOffset+6); |
|
514 int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 |
|
515 : style.Length() - widthOffset); |
|
516 nsAutoString widthstr; |
|
517 style.Mid(widthstr, widthOffset+6, length); |
|
518 nsresult err; |
|
519 int32_t col = widthstr.ToInteger(&err); |
|
520 |
|
521 if (NS_SUCCEEDED(err)) { |
|
522 mWrapColumn = (uint32_t)col; |
|
523 #ifdef DEBUG_preformatted |
|
524 printf("Set wrap column to %d based on style\n", mWrapColumn); |
|
525 #endif |
|
526 } |
|
527 } |
|
528 } |
|
529 else if (kNotFound != style.Find("pre", true, whitespace)) { |
|
530 #ifdef DEBUG_preformatted |
|
531 printf("Set mPreFormatted based on style pre\n"); |
|
532 #endif |
|
533 mPreFormatted = true; |
|
534 mWrapColumn = 0; |
|
535 } |
|
536 } |
|
537 else { |
|
538 /* See comment at end of function. */ |
|
539 mInWhitespace = true; |
|
540 mPreFormatted = false; |
|
541 } |
|
542 |
|
543 return NS_OK; |
|
544 } |
|
545 |
|
546 // Keep this in sync with DoCloseContainer! |
|
547 if (!DoOutput()) { |
|
548 return NS_OK; |
|
549 } |
|
550 |
|
551 if (aTag == nsGkAtoms::p) |
|
552 EnsureVerticalSpace(1); |
|
553 else if (aTag == nsGkAtoms::pre) { |
|
554 if (GetLastBool(mIsInCiteBlockquote)) |
|
555 EnsureVerticalSpace(0); |
|
556 else if (mHasWrittenCiteBlockquote) { |
|
557 EnsureVerticalSpace(0); |
|
558 mHasWrittenCiteBlockquote = false; |
|
559 } |
|
560 else |
|
561 EnsureVerticalSpace(1); |
|
562 } |
|
563 else if (aTag == nsGkAtoms::tr) { |
|
564 PushBool(mHasWrittenCellsForRow, false); |
|
565 } |
|
566 else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { |
|
567 // We must make sure that the content of two table cells get a |
|
568 // space between them. |
|
569 |
|
570 // To make the separation between cells most obvious and |
|
571 // importable, we use a TAB. |
|
572 if (GetLastBool(mHasWrittenCellsForRow)) { |
|
573 // Bypass |Write| so that the TAB isn't compressed away. |
|
574 AddToLine(MOZ_UTF16("\t"), 1); |
|
575 mInWhitespace = true; |
|
576 } |
|
577 else if (mHasWrittenCellsForRow.IsEmpty()) { |
|
578 // We don't always see a <tr> (nor a <table>) before the <td> if we're |
|
579 // copying part of a table |
|
580 PushBool(mHasWrittenCellsForRow, true); // will never be popped |
|
581 } |
|
582 else { |
|
583 SetLastBool(mHasWrittenCellsForRow, true); |
|
584 } |
|
585 } |
|
586 else if (aTag == nsGkAtoms::ul) { |
|
587 // Indent here to support nested lists, which aren't included in li :-( |
|
588 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
|
589 // Must end the current line before we change indention |
|
590 mIndent += kIndentSizeList; |
|
591 mULCount++; |
|
592 } |
|
593 else if (aTag == nsGkAtoms::ol) { |
|
594 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
|
595 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
|
596 // Must end the current line before we change indention |
|
597 if (mOLStackIndex < OLStackSize) { |
|
598 nsAutoString startAttr; |
|
599 int32_t startVal = 1; |
|
600 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { |
|
601 nsresult rv = NS_OK; |
|
602 startVal = startAttr.ToInteger(&rv); |
|
603 if (NS_FAILED(rv)) |
|
604 startVal = 1; |
|
605 } |
|
606 mOLStack[mOLStackIndex++] = startVal; |
|
607 } |
|
608 } else { |
|
609 mOLStackIndex++; |
|
610 } |
|
611 mIndent += kIndentSizeList; // see ul |
|
612 } |
|
613 else if (aTag == nsGkAtoms::li && |
|
614 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
|
615 if (mTagStackIndex > 1 && IsInOL()) { |
|
616 if (mOLStackIndex > 0) { |
|
617 nsAutoString valueAttr; |
|
618 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { |
|
619 nsresult rv = NS_OK; |
|
620 int32_t valueAttrVal = valueAttr.ToInteger(&rv); |
|
621 if (NS_SUCCEEDED(rv)) |
|
622 mOLStack[mOLStackIndex-1] = valueAttrVal; |
|
623 } |
|
624 // This is what nsBulletFrame does for OLs: |
|
625 mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); |
|
626 } |
|
627 else { |
|
628 mInIndentString.Append(char16_t('#')); |
|
629 } |
|
630 |
|
631 mInIndentString.Append(char16_t('.')); |
|
632 |
|
633 } |
|
634 else { |
|
635 static char bulletCharArray[] = "*o+#"; |
|
636 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; |
|
637 char bulletChar = bulletCharArray[index % 4]; |
|
638 mInIndentString.Append(char16_t(bulletChar)); |
|
639 } |
|
640 |
|
641 mInIndentString.Append(char16_t(' ')); |
|
642 } |
|
643 else if (aTag == nsGkAtoms::dl) { |
|
644 EnsureVerticalSpace(1); |
|
645 } |
|
646 else if (aTag == nsGkAtoms::dt) { |
|
647 EnsureVerticalSpace(0); |
|
648 } |
|
649 else if (aTag == nsGkAtoms::dd) { |
|
650 EnsureVerticalSpace(0); |
|
651 mIndent += kIndentSizeDD; |
|
652 } |
|
653 else if (aTag == nsGkAtoms::span) { |
|
654 ++mSpanLevel; |
|
655 } |
|
656 else if (aTag == nsGkAtoms::blockquote) { |
|
657 // Push |
|
658 PushBool(mIsInCiteBlockquote, isInCiteBlockquote); |
|
659 if (isInCiteBlockquote) { |
|
660 EnsureVerticalSpace(0); |
|
661 mCiteQuoteLevel++; |
|
662 } |
|
663 else { |
|
664 EnsureVerticalSpace(1); |
|
665 mIndent += kTabSize; // Check for some maximum value? |
|
666 } |
|
667 } |
|
668 else if (aTag == nsGkAtoms::q) { |
|
669 Write(NS_LITERAL_STRING("\"")); |
|
670 } |
|
671 |
|
672 // Else make sure we'll separate block level tags, |
|
673 // even if we're about to leave, before doing any other formatting. |
|
674 else if (nsContentUtils::IsHTMLBlock(aTag)) { |
|
675 EnsureVerticalSpace(0); |
|
676 } |
|
677 |
|
678 ////////////////////////////////////////////////////////////// |
|
679 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
|
680 return NS_OK; |
|
681 } |
|
682 ////////////////////////////////////////////////////////////// |
|
683 // The rest of this routine is formatted output stuff, |
|
684 // which we should skip if we're not formatted: |
|
685 ////////////////////////////////////////////////////////////// |
|
686 |
|
687 // Push on stack |
|
688 bool currentNodeIsConverted = IsCurrentNodeConverted(); |
|
689 |
|
690 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
|
691 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
|
692 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) |
|
693 { |
|
694 EnsureVerticalSpace(2); |
|
695 if (mHeaderStrategy == 2) { // numbered |
|
696 mIndent += kIndentSizeHeaders; |
|
697 // Caching |
|
698 int32_t level = HeaderLevel(aTag); |
|
699 // Increase counter for current level |
|
700 mHeaderCounter[level]++; |
|
701 // Reset all lower levels |
|
702 int32_t i; |
|
703 |
|
704 for (i = level + 1; i <= 6; i++) { |
|
705 mHeaderCounter[i] = 0; |
|
706 } |
|
707 |
|
708 // Construct numbers |
|
709 nsAutoString leadup; |
|
710 for (i = 1; i <= level; i++) { |
|
711 leadup.AppendInt(mHeaderCounter[i]); |
|
712 leadup.Append(char16_t('.')); |
|
713 } |
|
714 leadup.Append(char16_t(' ')); |
|
715 Write(leadup); |
|
716 } |
|
717 else if (mHeaderStrategy == 1) { // indent increasingly |
|
718 mIndent += kIndentSizeHeaders; |
|
719 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
|
720 // for h(x), run x-1 times |
|
721 mIndent += kIndentIncrementHeaders; |
|
722 } |
|
723 } |
|
724 } |
|
725 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { |
|
726 nsAutoString url; |
|
727 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) |
|
728 && !url.IsEmpty()) { |
|
729 mURL = url; |
|
730 } |
|
731 } |
|
732 else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { |
|
733 Write(NS_LITERAL_STRING("^")); |
|
734 } |
|
735 else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { |
|
736 Write(NS_LITERAL_STRING("_")); |
|
737 } |
|
738 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
|
739 Write(NS_LITERAL_STRING("|")); |
|
740 } |
|
741 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
|
742 && mStructs && !currentNodeIsConverted) { |
|
743 Write(NS_LITERAL_STRING("*")); |
|
744 } |
|
745 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
|
746 && mStructs && !currentNodeIsConverted) { |
|
747 Write(NS_LITERAL_STRING("/")); |
|
748 } |
|
749 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
|
750 Write(NS_LITERAL_STRING("_")); |
|
751 } |
|
752 |
|
753 /* Container elements are always block elements, so we shouldn't |
|
754 output any whitespace immediately after the container tag even if |
|
755 there's extra whitespace there because the HTML is pretty-printed |
|
756 or something. To ensure that happens, tell the serializer we're |
|
757 already in whitespace so it won't output more. */ |
|
758 mInWhitespace = true; |
|
759 |
|
760 return NS_OK; |
|
761 } |
|
762 |
|
763 nsresult |
|
764 nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) |
|
765 { |
|
766 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
|
767 mIgnoredChildNodeLevel--; |
|
768 return NS_OK; |
|
769 } |
|
770 |
|
771 if (mFlags & nsIDocumentEncoder::OutputRaw) { |
|
772 // Raw means raw. Don't even think about doing anything fancy |
|
773 // here like indenting, adding line breaks or any other |
|
774 // characters such as list item bullets, quote characters |
|
775 // around <q>, etc. I mean it! Don't make me smack you! |
|
776 |
|
777 return NS_OK; |
|
778 } |
|
779 |
|
780 if (mTagStackIndex > 0) { |
|
781 --mTagStackIndex; |
|
782 } |
|
783 |
|
784 if (mTagStackIndex >= mIgnoreAboveIndex) { |
|
785 if (mTagStackIndex == mIgnoreAboveIndex) { |
|
786 // We're dealing with the close tag whose matching |
|
787 // open tag had set the mIgnoreAboveIndex value. |
|
788 // Reset mIgnoreAboveIndex before discarding this tag. |
|
789 mIgnoreAboveIndex = (uint32_t)kNotFound; |
|
790 } |
|
791 return NS_OK; |
|
792 } |
|
793 |
|
794 // End current line if we're ending a block level tag |
|
795 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { |
|
796 // We want the output to end with a new line, |
|
797 // but in preformatted areas like text fields, |
|
798 // we can't emit newlines that weren't there. |
|
799 // So add the newline only in the case of formatted output. |
|
800 if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
|
801 EnsureVerticalSpace(0); |
|
802 } |
|
803 else { |
|
804 FlushLine(); |
|
805 } |
|
806 // We won't want to do anything with these in formatted mode either, |
|
807 // so just return now: |
|
808 return NS_OK; |
|
809 } |
|
810 |
|
811 // Keep this in sync with DoOpenContainer! |
|
812 if (!DoOutput()) { |
|
813 return NS_OK; |
|
814 } |
|
815 |
|
816 if (aTag == nsGkAtoms::tr) { |
|
817 PopBool(mHasWrittenCellsForRow); |
|
818 // Should always end a line, but get no more whitespace |
|
819 if (mFloatingLines < 0) |
|
820 mFloatingLines = 0; |
|
821 mLineBreakDue = true; |
|
822 } |
|
823 else if (((aTag == nsGkAtoms::li) || |
|
824 (aTag == nsGkAtoms::dt)) && |
|
825 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
|
826 // Items that should always end a line, but get no more whitespace |
|
827 if (mFloatingLines < 0) |
|
828 mFloatingLines = 0; |
|
829 mLineBreakDue = true; |
|
830 } |
|
831 else if (aTag == nsGkAtoms::pre) { |
|
832 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; |
|
833 mLineBreakDue = true; |
|
834 } |
|
835 else if (aTag == nsGkAtoms::ul) { |
|
836 FlushLine(); |
|
837 mIndent -= kIndentSizeList; |
|
838 if (--mULCount + mOLStackIndex == 0) { |
|
839 mFloatingLines = 1; |
|
840 mLineBreakDue = true; |
|
841 } |
|
842 } |
|
843 else if (aTag == nsGkAtoms::ol) { |
|
844 FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. |
|
845 mIndent -= kIndentSizeList; |
|
846 NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); |
|
847 mOLStackIndex--; |
|
848 if (mULCount + mOLStackIndex == 0) { |
|
849 mFloatingLines = 1; |
|
850 mLineBreakDue = true; |
|
851 } |
|
852 } |
|
853 else if (aTag == nsGkAtoms::dl) { |
|
854 mFloatingLines = 1; |
|
855 mLineBreakDue = true; |
|
856 } |
|
857 else if (aTag == nsGkAtoms::dd) { |
|
858 FlushLine(); |
|
859 mIndent -= kIndentSizeDD; |
|
860 } |
|
861 else if (aTag == nsGkAtoms::span) { |
|
862 NS_ASSERTION(mSpanLevel, "Span level will be negative!"); |
|
863 --mSpanLevel; |
|
864 } |
|
865 else if (aTag == nsGkAtoms::div) { |
|
866 if (mFloatingLines < 0) |
|
867 mFloatingLines = 0; |
|
868 mLineBreakDue = true; |
|
869 } |
|
870 else if (aTag == nsGkAtoms::blockquote) { |
|
871 FlushLine(); // Is this needed? |
|
872 |
|
873 // Pop |
|
874 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); |
|
875 |
|
876 if (isInCiteBlockquote) { |
|
877 NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); |
|
878 mCiteQuoteLevel--; |
|
879 mFloatingLines = 0; |
|
880 mHasWrittenCiteBlockquote = true; |
|
881 } |
|
882 else { |
|
883 mIndent -= kTabSize; |
|
884 mFloatingLines = 1; |
|
885 } |
|
886 mLineBreakDue = true; |
|
887 } |
|
888 else if (aTag == nsGkAtoms::q) { |
|
889 Write(NS_LITERAL_STRING("\"")); |
|
890 } |
|
891 else if (nsContentUtils::IsHTMLBlock(aTag) |
|
892 && aTag != nsGkAtoms::script) { |
|
893 // All other blocks get 1 vertical space after them |
|
894 // in formatted mode, otherwise 0. |
|
895 // This is hard. Sometimes 0 is a better number, but |
|
896 // how to know? |
|
897 if (mFlags & nsIDocumentEncoder::OutputFormatted) |
|
898 EnsureVerticalSpace(1); |
|
899 else { |
|
900 if (mFloatingLines < 0) |
|
901 mFloatingLines = 0; |
|
902 mLineBreakDue = true; |
|
903 } |
|
904 } |
|
905 |
|
906 ////////////////////////////////////////////////////////////// |
|
907 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
|
908 return NS_OK; |
|
909 } |
|
910 ////////////////////////////////////////////////////////////// |
|
911 // The rest of this routine is formatted output stuff, |
|
912 // which we should skip if we're not formatted: |
|
913 ////////////////////////////////////////////////////////////// |
|
914 |
|
915 // Pop the currentConverted stack |
|
916 bool currentNodeIsConverted = IsCurrentNodeConverted(); |
|
917 |
|
918 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
|
919 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
|
920 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { |
|
921 |
|
922 if (mHeaderStrategy) { /*numbered or indent increasingly*/ |
|
923 mIndent -= kIndentSizeHeaders; |
|
924 } |
|
925 if (mHeaderStrategy == 1 /*indent increasingly*/ ) { |
|
926 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
|
927 // for h(x), run x-1 times |
|
928 mIndent -= kIndentIncrementHeaders; |
|
929 } |
|
930 } |
|
931 EnsureVerticalSpace(1); |
|
932 } |
|
933 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { |
|
934 nsAutoString temp; |
|
935 temp.AssignLiteral(" <"); |
|
936 temp += mURL; |
|
937 temp.Append(char16_t('>')); |
|
938 Write(temp); |
|
939 mURL.Truncate(); |
|
940 } |
|
941 else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) |
|
942 && mStructs && !currentNodeIsConverted) { |
|
943 Write(kSpace); |
|
944 } |
|
945 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
|
946 Write(NS_LITERAL_STRING("|")); |
|
947 } |
|
948 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
|
949 && mStructs && !currentNodeIsConverted) { |
|
950 Write(NS_LITERAL_STRING("*")); |
|
951 } |
|
952 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
|
953 && mStructs && !currentNodeIsConverted) { |
|
954 Write(NS_LITERAL_STRING("/")); |
|
955 } |
|
956 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
|
957 Write(NS_LITERAL_STRING("_")); |
|
958 } |
|
959 |
|
960 return NS_OK; |
|
961 } |
|
962 |
|
963 bool |
|
964 nsPlainTextSerializer::MustSuppressLeaf() |
|
965 { |
|
966 if (mIgnoredChildNodeLevel > 0) { |
|
967 return true; |
|
968 } |
|
969 |
|
970 if ((mTagStackIndex > 1 && |
|
971 mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || |
|
972 (mTagStackIndex > 0 && |
|
973 mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { |
|
974 // Don't output the contents of SELECT elements; |
|
975 // Might be nice, eventually, to output just the selected element. |
|
976 // Read more in bug 31994. |
|
977 return true; |
|
978 } |
|
979 |
|
980 if (mTagStackIndex > 0 && |
|
981 (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || |
|
982 mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { |
|
983 // Don't output the contents of <script> or <style> tags; |
|
984 return true; |
|
985 } |
|
986 |
|
987 return false; |
|
988 } |
|
989 |
|
990 void |
|
991 nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText) |
|
992 { |
|
993 // If we don't want any output, just return |
|
994 if (!DoOutput()) { |
|
995 return; |
|
996 } |
|
997 |
|
998 if (!aIsLineBreak) { |
|
999 // Make sure to reset this, since it's no longer true. |
|
1000 mHasWrittenCiteBlockquote = false; |
|
1001 } |
|
1002 |
|
1003 if (mLineBreakDue) |
|
1004 EnsureVerticalSpace(mFloatingLines); |
|
1005 |
|
1006 if (MustSuppressLeaf()) { |
|
1007 return; |
|
1008 } |
|
1009 |
|
1010 if (aIsLineBreak) { |
|
1011 // The only times we want to pass along whitespace from the original |
|
1012 // html source are if we're forced into preformatted mode via flags, |
|
1013 // or if we're prettyprinting and we're inside a <pre>. |
|
1014 // Otherwise, either we're collapsing to minimal text, or we're |
|
1015 // prettyprinting to mimic the html format, and in neither case |
|
1016 // does the formatting of the html source help us. |
|
1017 if ((mFlags & nsIDocumentEncoder::OutputPreformatted) || |
|
1018 (mPreFormatted && !mWrapColumn) || |
|
1019 IsInPre()) { |
|
1020 EnsureVerticalSpace(mEmptyLines+1); |
|
1021 } |
|
1022 else if (!mInWhitespace) { |
|
1023 Write(kSpace); |
|
1024 mInWhitespace = true; |
|
1025 } |
|
1026 return; |
|
1027 } |
|
1028 |
|
1029 /* Check, if we are in a link (symbolized with mURL containing the URL) |
|
1030 and the text is equal to the URL. In that case we don't want to output |
|
1031 the URL twice so we scrap the text in mURL. */ |
|
1032 if (!mURL.IsEmpty() && mURL.Equals(aText)) { |
|
1033 mURL.Truncate(); |
|
1034 } |
|
1035 Write(aText); |
|
1036 } |
|
1037 |
|
1038 nsresult |
|
1039 nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag) |
|
1040 { |
|
1041 // If we don't want any output, just return |
|
1042 if (!DoOutput()) { |
|
1043 return NS_OK; |
|
1044 } |
|
1045 |
|
1046 if (mLineBreakDue) |
|
1047 EnsureVerticalSpace(mFloatingLines); |
|
1048 |
|
1049 if (MustSuppressLeaf()) { |
|
1050 return NS_OK; |
|
1051 } |
|
1052 |
|
1053 if (aTag == nsGkAtoms::br) { |
|
1054 // Another egregious editor workaround, see bug 38194: |
|
1055 // ignore the bogus br tags that the editor sticks here and there. |
|
1056 nsAutoString tagAttr; |
|
1057 if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) |
|
1058 || !tagAttr.EqualsLiteral("_moz")) { |
|
1059 EnsureVerticalSpace(mEmptyLines+1); |
|
1060 } |
|
1061 } |
|
1062 else if (aTag == nsGkAtoms::hr && |
|
1063 (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
|
1064 EnsureVerticalSpace(0); |
|
1065 |
|
1066 // Make a line of dashes as wide as the wrap width |
|
1067 // XXX honoring percentage would be nice |
|
1068 nsAutoString line; |
|
1069 uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25); |
|
1070 while (line.Length() < width) { |
|
1071 line.Append(char16_t('-')); |
|
1072 } |
|
1073 Write(line); |
|
1074 |
|
1075 EnsureVerticalSpace(0); |
|
1076 } |
|
1077 else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) { |
|
1078 Write(NS_LITERAL_STRING("\xFFFC")); |
|
1079 } |
|
1080 else if (aTag == nsGkAtoms::img) { |
|
1081 /* Output (in decreasing order of preference) |
|
1082 alt, title or nothing */ |
|
1083 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG> |
|
1084 nsAutoString imageDescription; |
|
1085 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, |
|
1086 imageDescription))) { |
|
1087 // If the alt attribute has an empty value (|alt=""|), output nothing |
|
1088 } |
|
1089 else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title, |
|
1090 imageDescription)) |
|
1091 && !imageDescription.IsEmpty()) { |
|
1092 imageDescription = NS_LITERAL_STRING(" [") + |
|
1093 imageDescription + |
|
1094 NS_LITERAL_STRING("] "); |
|
1095 } |
|
1096 |
|
1097 Write(imageDescription); |
|
1098 } |
|
1099 |
|
1100 return NS_OK; |
|
1101 } |
|
1102 |
|
1103 /** |
|
1104 * Adds as many newline as necessary to get |noOfRows| empty lines |
|
1105 * |
|
1106 * noOfRows = -1 : Being in the middle of some line of text |
|
1107 * noOfRows = 0 : Being at the start of a line |
|
1108 * noOfRows = n>0 : Having n empty lines before the current line. |
|
1109 */ |
|
1110 void |
|
1111 nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) |
|
1112 { |
|
1113 // If we have something in the indent we probably want to output |
|
1114 // it and it's not included in the count for empty lines so we don't |
|
1115 // realize that we should start a new line. |
|
1116 if (noOfRows >= 0 && !mInIndentString.IsEmpty()) { |
|
1117 EndLine(false); |
|
1118 mInWhitespace = true; |
|
1119 } |
|
1120 |
|
1121 while(mEmptyLines < noOfRows) { |
|
1122 EndLine(false); |
|
1123 mInWhitespace = true; |
|
1124 } |
|
1125 mLineBreakDue = false; |
|
1126 mFloatingLines = -1; |
|
1127 } |
|
1128 |
|
1129 /** |
|
1130 * This empties the current line cache without adding a NEWLINE. |
|
1131 * Should not be used if line wrapping is of importance since |
|
1132 * this function destroys the cache information. |
|
1133 * |
|
1134 * It will also write indentation and quotes if we believe us to be |
|
1135 * at the start of the line. |
|
1136 */ |
|
1137 void |
|
1138 nsPlainTextSerializer::FlushLine() |
|
1139 { |
|
1140 if (!mCurrentLine.IsEmpty()) { |
|
1141 if (mAtFirstColumn) { |
|
1142 OutputQuotesAndIndent(); // XXX: Should we always do this? Bug? |
|
1143 } |
|
1144 |
|
1145 Output(mCurrentLine); |
|
1146 mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty(); |
|
1147 mCurrentLine.Truncate(); |
|
1148 mCurrentLineWidth = 0; |
|
1149 } |
|
1150 } |
|
1151 |
|
1152 /** |
|
1153 * Prints the text to output to our current output device (the string mOutputString). |
|
1154 * The only logic here is to replace non breaking spaces with a normal space since |
|
1155 * most (all?) receivers of the result won't understand the nbsp and even be |
|
1156 * confused by it. |
|
1157 */ |
|
1158 void |
|
1159 nsPlainTextSerializer::Output(nsString& aString) |
|
1160 { |
|
1161 if (!aString.IsEmpty()) { |
|
1162 mStartedOutput = true; |
|
1163 } |
|
1164 |
|
1165 if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) { |
|
1166 // First, replace all nbsp characters with spaces, |
|
1167 // which the unicode encoder won't do for us. |
|
1168 aString.ReplaceChar(kNBSP, kSPACE); |
|
1169 } |
|
1170 mOutputString->Append(aString); |
|
1171 } |
|
1172 |
|
1173 static bool |
|
1174 IsSpaceStuffable(const char16_t *s) |
|
1175 { |
|
1176 if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP || |
|
1177 nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0) |
|
1178 return true; |
|
1179 else |
|
1180 return false; |
|
1181 } |
|
1182 |
|
1183 /** |
|
1184 * This function adds a piece of text to the current stored line. If we are |
|
1185 * wrapping text and the stored line will become too long, a suitable |
|
1186 * location to wrap will be found and the line that's complete will be |
|
1187 * output. |
|
1188 */ |
|
1189 void |
|
1190 nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, |
|
1191 int32_t aLineFragmentLength) |
|
1192 { |
|
1193 uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent; |
|
1194 |
|
1195 if (mLineBreakDue) |
|
1196 EnsureVerticalSpace(mFloatingLines); |
|
1197 |
|
1198 int32_t linelength = mCurrentLine.Length(); |
|
1199 if (0 == linelength) { |
|
1200 if (0 == aLineFragmentLength) { |
|
1201 // Nothing at all. Are you kidding me? |
|
1202 return; |
|
1203 } |
|
1204 |
|
1205 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
|
1206 if (IsSpaceStuffable(aLineFragment) |
|
1207 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
|
1208 ) |
|
1209 { |
|
1210 // Space stuffing a la RFC 2646 (format=flowed). |
|
1211 mCurrentLine.Append(char16_t(' ')); |
|
1212 |
|
1213 if (MayWrap()) { |
|
1214 mCurrentLineWidth += GetUnicharWidth(' '); |
|
1215 #ifdef DEBUG_wrapping |
|
1216 NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(), |
|
1217 mCurrentLine.Length()) == |
|
1218 (int32_t)mCurrentLineWidth, |
|
1219 "mCurrentLineWidth and reality out of sync!"); |
|
1220 #endif |
|
1221 } |
|
1222 } |
|
1223 } |
|
1224 mEmptyLines=-1; |
|
1225 } |
|
1226 |
|
1227 mCurrentLine.Append(aLineFragment, aLineFragmentLength); |
|
1228 if (MayWrap()) { |
|
1229 mCurrentLineWidth += GetUnicharStringWidth(aLineFragment, |
|
1230 aLineFragmentLength); |
|
1231 #ifdef DEBUG_wrapping |
|
1232 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
|
1233 mCurrentLine.Length()) == |
|
1234 (int32_t)mCurrentLineWidth, |
|
1235 "mCurrentLineWidth and reality out of sync!"); |
|
1236 #endif |
|
1237 } |
|
1238 |
|
1239 linelength = mCurrentLine.Length(); |
|
1240 |
|
1241 // Wrap? |
|
1242 if (MayWrap()) |
|
1243 { |
|
1244 #ifdef DEBUG_wrapping |
|
1245 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
|
1246 mCurrentLine.Length()) == |
|
1247 (int32_t)mCurrentLineWidth, |
|
1248 "mCurrentLineWidth and reality out of sync!"); |
|
1249 #endif |
|
1250 // Yes, wrap! |
|
1251 // The "+4" is to avoid wrap lines that only would be a couple |
|
1252 // of letters too long. We give this bonus only if the |
|
1253 // wrapcolumn is more than 20. |
|
1254 uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0; |
|
1255 |
|
1256 // XXX: Should calculate prefixwidth with GetUnicharStringWidth |
|
1257 while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) { |
|
1258 // We go from the end removing one letter at a time until |
|
1259 // we have a reasonable width |
|
1260 int32_t goodSpace = mCurrentLine.Length(); |
|
1261 uint32_t width = mCurrentLineWidth; |
|
1262 while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) { |
|
1263 goodSpace--; |
|
1264 width -= GetUnicharWidth(mCurrentLine[goodSpace]); |
|
1265 } |
|
1266 |
|
1267 goodSpace++; |
|
1268 |
|
1269 if (mLineBreaker) { |
|
1270 goodSpace = mLineBreaker->Prev(mCurrentLine.get(), |
|
1271 mCurrentLine.Length(), goodSpace); |
|
1272 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT && |
|
1273 nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) { |
|
1274 --goodSpace; // adjust the position since line breaker returns a position next to space |
|
1275 } |
|
1276 } |
|
1277 // fallback if the line breaker is unavailable or failed |
|
1278 if (!mLineBreaker) { |
|
1279 goodSpace = mWrapColumn-prefixwidth; |
|
1280 while (goodSpace >= 0 && |
|
1281 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
|
1282 goodSpace--; |
|
1283 } |
|
1284 } |
|
1285 |
|
1286 nsAutoString restOfLine; |
|
1287 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) { |
|
1288 // If we don't found a good place to break, accept long line and |
|
1289 // try to find another place to break |
|
1290 goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1; |
|
1291 if (mLineBreaker) { |
|
1292 if ((uint32_t)goodSpace < mCurrentLine.Length()) |
|
1293 goodSpace = mLineBreaker->Next(mCurrentLine.get(), |
|
1294 mCurrentLine.Length(), goodSpace); |
|
1295 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) |
|
1296 goodSpace = mCurrentLine.Length(); |
|
1297 } |
|
1298 // fallback if the line breaker is unavailable or failed |
|
1299 if (!mLineBreaker) { |
|
1300 goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth; |
|
1301 while (goodSpace < linelength && |
|
1302 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
|
1303 goodSpace++; |
|
1304 } |
|
1305 } |
|
1306 } |
|
1307 |
|
1308 if ((goodSpace < linelength) && (goodSpace > 0)) { |
|
1309 // Found a place to break |
|
1310 |
|
1311 // -1 (trim a char at the break position) |
|
1312 // only if the line break was a space. |
|
1313 if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
|
1314 mCurrentLine.Right(restOfLine, linelength-goodSpace-1); |
|
1315 } |
|
1316 else { |
|
1317 mCurrentLine.Right(restOfLine, linelength-goodSpace); |
|
1318 } |
|
1319 // if breaker was U+0020, it has to consider for delsp=yes support |
|
1320 bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' '; |
|
1321 mCurrentLine.Truncate(goodSpace); |
|
1322 EndLine(true, breakBySpace); |
|
1323 mCurrentLine.Truncate(); |
|
1324 // Space stuff new line? |
|
1325 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
|
1326 if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) |
|
1327 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
|
1328 ) |
|
1329 { |
|
1330 // Space stuffing a la RFC 2646 (format=flowed). |
|
1331 mCurrentLine.Append(char16_t(' ')); |
|
1332 //XXX doesn't seem to work correctly for ' ' |
|
1333 } |
|
1334 } |
|
1335 mCurrentLine.Append(restOfLine); |
|
1336 mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(), |
|
1337 mCurrentLine.Length()); |
|
1338 linelength = mCurrentLine.Length(); |
|
1339 mEmptyLines = -1; |
|
1340 } |
|
1341 else { |
|
1342 // Nothing to do. Hopefully we get more data later |
|
1343 // to use for a place to break line |
|
1344 break; |
|
1345 } |
|
1346 } |
|
1347 } |
|
1348 else { |
|
1349 // No wrapping. |
|
1350 } |
|
1351 } |
|
1352 |
|
1353 /** |
|
1354 * Outputs the contents of mCurrentLine, and resets line specific |
|
1355 * variables. Also adds an indentation and prefix if there is |
|
1356 * one specified. Strips ending spaces from the line if it isn't |
|
1357 * preformatted. |
|
1358 */ |
|
1359 void |
|
1360 nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) |
|
1361 { |
|
1362 uint32_t currentlinelength = mCurrentLine.Length(); |
|
1363 |
|
1364 if (aSoftlinebreak && 0 == currentlinelength) { |
|
1365 // No meaning |
|
1366 return; |
|
1367 } |
|
1368 |
|
1369 /* In non-preformatted mode, remove spaces from the end of the line for |
|
1370 * format=flowed compatibility. Don't do this for these special cases: |
|
1371 * "-- ", the signature separator (RFC 2646) shouldn't be touched and |
|
1372 * "- -- ", the OpenPGP dash-escaped signature separator in inline |
|
1373 * signed messages according to the OpenPGP standard (RFC 2440). |
|
1374 */ |
|
1375 if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) && |
|
1376 !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) && |
|
1377 (aSoftlinebreak || |
|
1378 !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) { |
|
1379 // Remove spaces from the end of the line. |
|
1380 while(currentlinelength > 0 && |
|
1381 mCurrentLine[currentlinelength-1] == ' ') { |
|
1382 --currentlinelength; |
|
1383 } |
|
1384 mCurrentLine.SetLength(currentlinelength); |
|
1385 } |
|
1386 |
|
1387 if (aSoftlinebreak && |
|
1388 (mFlags & nsIDocumentEncoder::OutputFormatFlowed) && |
|
1389 (mIndent == 0)) { |
|
1390 // Add the soft part of the soft linebreak (RFC 2646 4.1) |
|
1391 // We only do this when there is no indentation since format=flowed |
|
1392 // lines and indentation doesn't work well together. |
|
1393 |
|
1394 // If breaker character is ASCII space with RFC 3676 support (delsp=yes), |
|
1395 // add twice space. |
|
1396 if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace) |
|
1397 mCurrentLine.Append(NS_LITERAL_STRING(" ")); |
|
1398 else |
|
1399 mCurrentLine.Append(char16_t(' ')); |
|
1400 } |
|
1401 |
|
1402 if (aSoftlinebreak) { |
|
1403 mEmptyLines=0; |
|
1404 } |
|
1405 else { |
|
1406 // Hard break |
|
1407 if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) { |
|
1408 mEmptyLines=-1; |
|
1409 } |
|
1410 |
|
1411 mEmptyLines++; |
|
1412 } |
|
1413 |
|
1414 if (mAtFirstColumn) { |
|
1415 // If we don't have anything "real" to output we have to |
|
1416 // make sure the indent doesn't end in a space since that |
|
1417 // would trick a format=flowed-aware receiver. |
|
1418 bool stripTrailingSpaces = mCurrentLine.IsEmpty(); |
|
1419 OutputQuotesAndIndent(stripTrailingSpaces); |
|
1420 } |
|
1421 |
|
1422 mCurrentLine.Append(mLineBreak); |
|
1423 Output(mCurrentLine); |
|
1424 mCurrentLine.Truncate(); |
|
1425 mCurrentLineWidth = 0; |
|
1426 mAtFirstColumn=true; |
|
1427 mInWhitespace=true; |
|
1428 mLineBreakDue = false; |
|
1429 mFloatingLines = -1; |
|
1430 } |
|
1431 |
|
1432 |
|
1433 /** |
|
1434 * Outputs the calculated and stored indent and text in the indentation. That is |
|
1435 * quote chars and numbers for numbered lists and such. It will also reset any |
|
1436 * stored text to put in the indentation after using it. |
|
1437 */ |
|
1438 void |
|
1439 nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */) |
|
1440 { |
|
1441 nsAutoString stringToOutput; |
|
1442 |
|
1443 // Put the mail quote "> " chars in, if appropriate: |
|
1444 if (mCiteQuoteLevel > 0) { |
|
1445 nsAutoString quotes; |
|
1446 for(int i=0; i < mCiteQuoteLevel; i++) { |
|
1447 quotes.Append(char16_t('>')); |
|
1448 } |
|
1449 if (!mCurrentLine.IsEmpty()) { |
|
1450 /* Better don't output a space here, if the line is empty, |
|
1451 in case a receiving f=f-aware UA thinks, this were a flowed line, |
|
1452 which it isn't - it's just empty. |
|
1453 (Flowed lines may be joined with the following one, |
|
1454 so the empty line may be lost completely.) */ |
|
1455 quotes.Append(char16_t(' ')); |
|
1456 } |
|
1457 stringToOutput = quotes; |
|
1458 mAtFirstColumn = false; |
|
1459 } |
|
1460 |
|
1461 // Indent if necessary |
|
1462 int32_t indentwidth = mIndent - mInIndentString.Length(); |
|
1463 if (indentwidth > 0 |
|
1464 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) |
|
1465 // Don't make empty lines look flowed |
|
1466 ) { |
|
1467 nsAutoString spaces; |
|
1468 for (int i=0; i < indentwidth; ++i) |
|
1469 spaces.Append(char16_t(' ')); |
|
1470 stringToOutput += spaces; |
|
1471 mAtFirstColumn = false; |
|
1472 } |
|
1473 |
|
1474 if (!mInIndentString.IsEmpty()) { |
|
1475 stringToOutput += mInIndentString; |
|
1476 mAtFirstColumn = false; |
|
1477 mInIndentString.Truncate(); |
|
1478 } |
|
1479 |
|
1480 if (stripTrailingSpaces) { |
|
1481 int32_t lineLength = stringToOutput.Length(); |
|
1482 while(lineLength > 0 && |
|
1483 ' ' == stringToOutput[lineLength-1]) { |
|
1484 --lineLength; |
|
1485 } |
|
1486 stringToOutput.SetLength(lineLength); |
|
1487 } |
|
1488 |
|
1489 if (!stringToOutput.IsEmpty()) { |
|
1490 Output(stringToOutput); |
|
1491 } |
|
1492 |
|
1493 } |
|
1494 |
|
1495 /** |
|
1496 * Write a string. This is the highlevel function to use to get text output. |
|
1497 * By using AddToLine, Output, EndLine and other functions it handles quotation, |
|
1498 * line wrapping, indentation, whitespace compression and other things. |
|
1499 */ |
|
1500 void |
|
1501 nsPlainTextSerializer::Write(const nsAString& aStr) |
|
1502 { |
|
1503 // XXX Copy necessary to use nsString methods and gain |
|
1504 // access to underlying buffer |
|
1505 nsAutoString str(aStr); |
|
1506 |
|
1507 #ifdef DEBUG_wrapping |
|
1508 printf("Write(%s): wrap col = %d\n", |
|
1509 NS_ConvertUTF16toUTF8(str).get(), mWrapColumn); |
|
1510 #endif |
|
1511 |
|
1512 int32_t bol = 0; |
|
1513 int32_t newline; |
|
1514 |
|
1515 int32_t totLen = str.Length(); |
|
1516 |
|
1517 // If the string is empty, do nothing: |
|
1518 if (totLen <= 0) return; |
|
1519 |
|
1520 // For Flowed text change nbsp-ses to spaces at end of lines to allow them |
|
1521 // to be cut off along with usual spaces if required. (bug #125928) |
|
1522 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
|
1523 for (int32_t i = totLen-1; i >= 0; i--) { |
|
1524 char16_t c = str[i]; |
|
1525 if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) |
|
1526 continue; |
|
1527 if (kNBSP == c) |
|
1528 str.Replace(i, 1, ' '); |
|
1529 else |
|
1530 break; |
|
1531 } |
|
1532 } |
|
1533 |
|
1534 // We have two major codepaths here. One that does preformatted text and one |
|
1535 // that does normal formatted text. The one for preformatted text calls |
|
1536 // Output directly while the other code path goes through AddToLine. |
|
1537 if ((mPreFormatted && !mWrapColumn) || IsInPre() |
|
1538 || ((mSpanLevel > 0 || mDontWrapAnyQuotes) |
|
1539 && mEmptyLines >= 0 && str.First() == char16_t('>'))) { |
|
1540 // No intelligent wrapping. |
|
1541 |
|
1542 // This mustn't be mixed with intelligent wrapping without clearing |
|
1543 // the mCurrentLine buffer before!!! |
|
1544 NS_ASSERTION(mCurrentLine.IsEmpty(), |
|
1545 "Mixed wrapping data and nonwrapping data on the same line"); |
|
1546 if (!mCurrentLine.IsEmpty()) { |
|
1547 FlushLine(); |
|
1548 } |
|
1549 |
|
1550 // Put the mail quote "> " chars in, if appropriate. |
|
1551 // Have to put it in before every line. |
|
1552 while(bol<totLen) { |
|
1553 bool outputQuotes = mAtFirstColumn; |
|
1554 bool atFirstColumn = mAtFirstColumn; |
|
1555 bool outputLineBreak = false; |
|
1556 bool spacesOnly = true; |
|
1557 |
|
1558 // Find one of '\n' or '\r' using iterators since nsAString |
|
1559 // doesn't have the old FindCharInSet function. |
|
1560 nsAString::const_iterator iter; str.BeginReading(iter); |
|
1561 nsAString::const_iterator done_searching; str.EndReading(done_searching); |
|
1562 iter.advance(bol); |
|
1563 int32_t new_newline = bol; |
|
1564 newline = kNotFound; |
|
1565 while(iter != done_searching) { |
|
1566 if ('\n' == *iter || '\r' == *iter) { |
|
1567 newline = new_newline; |
|
1568 break; |
|
1569 } |
|
1570 if (' ' != *iter) |
|
1571 spacesOnly = false; |
|
1572 ++new_newline; |
|
1573 ++iter; |
|
1574 } |
|
1575 |
|
1576 // Done searching |
|
1577 nsAutoString stringpart; |
|
1578 if (newline == kNotFound) { |
|
1579 // No new lines. |
|
1580 stringpart.Assign(Substring(str, bol, totLen - bol)); |
|
1581 if (!stringpart.IsEmpty()) { |
|
1582 char16_t lastchar = stringpart[stringpart.Length()-1]; |
|
1583 if ((lastchar == '\t') || (lastchar == ' ') || |
|
1584 (lastchar == '\r') ||(lastchar == '\n')) { |
|
1585 mInWhitespace = true; |
|
1586 } |
|
1587 else { |
|
1588 mInWhitespace = false; |
|
1589 } |
|
1590 } |
|
1591 mEmptyLines=-1; |
|
1592 atFirstColumn = mAtFirstColumn && (totLen-bol)==0; |
|
1593 bol = totLen; |
|
1594 } |
|
1595 else { |
|
1596 // There is a newline |
|
1597 stringpart.Assign(Substring(str, bol, newline-bol)); |
|
1598 mInWhitespace = true; |
|
1599 outputLineBreak = true; |
|
1600 mEmptyLines=0; |
|
1601 atFirstColumn = true; |
|
1602 bol = newline+1; |
|
1603 if ('\r' == *iter && bol < totLen && '\n' == *++iter) { |
|
1604 // There was a CRLF in the input. This used to be illegal and |
|
1605 // stripped by the parser. Apparently not anymore. Let's skip |
|
1606 // over the LF. |
|
1607 bol++; |
|
1608 } |
|
1609 } |
|
1610 |
|
1611 mCurrentLine.AssignLiteral(""); |
|
1612 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
|
1613 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928 |
|
1614 !stringpart.EqualsLiteral("-- ") && |
|
1615 !stringpart.EqualsLiteral("- -- ")) |
|
1616 stringpart.Trim(" ", false, true, true); |
|
1617 if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>') |
|
1618 mCurrentLine.Append(char16_t(' ')); |
|
1619 } |
|
1620 mCurrentLine.Append(stringpart); |
|
1621 |
|
1622 if (outputQuotes) { |
|
1623 // Note: this call messes with mAtFirstColumn |
|
1624 OutputQuotesAndIndent(); |
|
1625 } |
|
1626 |
|
1627 Output(mCurrentLine); |
|
1628 if (outputLineBreak) { |
|
1629 Output(mLineBreak); |
|
1630 } |
|
1631 mAtFirstColumn = atFirstColumn; |
|
1632 } |
|
1633 |
|
1634 // Reset mCurrentLine. |
|
1635 mCurrentLine.Truncate(); |
|
1636 |
|
1637 #ifdef DEBUG_wrapping |
|
1638 printf("No wrapping: newline is %d, totLen is %d\n", |
|
1639 newline, totLen); |
|
1640 #endif |
|
1641 return; |
|
1642 } |
|
1643 |
|
1644 // Intelligent handling of text |
|
1645 // If needed, strip out all "end of lines" |
|
1646 // and multiple whitespace between words |
|
1647 int32_t nextpos; |
|
1648 const char16_t * offsetIntoBuffer = nullptr; |
|
1649 |
|
1650 while (bol < totLen) { // Loop over lines |
|
1651 // Find a place where we may have to do whitespace compression |
|
1652 nextpos = str.FindCharInSet(" \t\n\r", bol); |
|
1653 #ifdef DEBUG_wrapping |
|
1654 nsAutoString remaining; |
|
1655 str.Right(remaining, totLen - bol); |
|
1656 foo = ToNewCString(remaining); |
|
1657 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n", |
|
1658 // bol, nextpos, totLen, foo); |
|
1659 nsMemory::Free(foo); |
|
1660 #endif |
|
1661 |
|
1662 if (nextpos == kNotFound) { |
|
1663 // The rest of the string |
|
1664 offsetIntoBuffer = str.get() + bol; |
|
1665 AddToLine(offsetIntoBuffer, totLen-bol); |
|
1666 bol=totLen; |
|
1667 mInWhitespace=false; |
|
1668 } |
|
1669 else { |
|
1670 // There's still whitespace left in the string |
|
1671 if (nextpos != 0 && (nextpos + 1) < totLen) { |
|
1672 offsetIntoBuffer = str.get() + nextpos; |
|
1673 // skip '\n' if it is between CJ chars |
|
1674 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) { |
|
1675 offsetIntoBuffer = str.get() + bol; |
|
1676 AddToLine(offsetIntoBuffer, nextpos-bol); |
|
1677 bol = nextpos + 1; |
|
1678 continue; |
|
1679 } |
|
1680 } |
|
1681 // If we're already in whitespace and not preformatted, just skip it: |
|
1682 if (mInWhitespace && (nextpos == bol) && !mPreFormatted && |
|
1683 !(mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
|
1684 // Skip whitespace |
|
1685 bol++; |
|
1686 continue; |
|
1687 } |
|
1688 |
|
1689 if (nextpos == bol) { |
|
1690 // Note that we are in whitespace. |
|
1691 mInWhitespace = true; |
|
1692 offsetIntoBuffer = str.get() + nextpos; |
|
1693 AddToLine(offsetIntoBuffer, 1); |
|
1694 bol++; |
|
1695 continue; |
|
1696 } |
|
1697 |
|
1698 mInWhitespace = true; |
|
1699 |
|
1700 offsetIntoBuffer = str.get() + bol; |
|
1701 if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
|
1702 // Preserve the real whitespace character |
|
1703 nextpos++; |
|
1704 AddToLine(offsetIntoBuffer, nextpos-bol); |
|
1705 bol = nextpos; |
|
1706 } |
|
1707 else { |
|
1708 // Replace the whitespace with a space |
|
1709 AddToLine(offsetIntoBuffer, nextpos-bol); |
|
1710 AddToLine(kSpace.get(),1); |
|
1711 bol = nextpos + 1; // Let's eat the whitespace |
|
1712 } |
|
1713 } |
|
1714 } // Continue looping over the string |
|
1715 } |
|
1716 |
|
1717 |
|
1718 /** |
|
1719 * Gets the value of an attribute in a string. If the function returns |
|
1720 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified. |
|
1721 */ |
|
1722 nsresult |
|
1723 nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName, |
|
1724 nsString& aValueRet) |
|
1725 { |
|
1726 if (mElement) { |
|
1727 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) { |
|
1728 return NS_OK; |
|
1729 } |
|
1730 } |
|
1731 |
|
1732 return NS_ERROR_NOT_AVAILABLE; |
|
1733 } |
|
1734 |
|
1735 /** |
|
1736 * Returns true, if the element was inserted by Moz' TXT->HTML converter. |
|
1737 * In this case, we should ignore it. |
|
1738 */ |
|
1739 bool |
|
1740 nsPlainTextSerializer::IsCurrentNodeConverted() |
|
1741 { |
|
1742 nsAutoString value; |
|
1743 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value); |
|
1744 return (NS_SUCCEEDED(rv) && |
|
1745 (value.EqualsIgnoreCase("moz-txt", 7) || |
|
1746 value.EqualsIgnoreCase("\"moz-txt", 8))); |
|
1747 } |
|
1748 |
|
1749 |
|
1750 // static |
|
1751 nsIAtom* |
|
1752 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) |
|
1753 { |
|
1754 if (!aContent->IsHTML()) { |
|
1755 return nullptr; |
|
1756 } |
|
1757 |
|
1758 nsIAtom* localName = aContent->Tag(); |
|
1759 return localName->IsStaticAtom() ? localName : nullptr; |
|
1760 } |
|
1761 |
|
1762 /** |
|
1763 * Returns true if we currently are inside a <pre>. The check is done |
|
1764 * by traversing the tag stack looking for <pre> until we hit a block |
|
1765 * level tag which is assumed to override any <pre>:s below it in |
|
1766 * the stack. To do this correctly to a 100% would require access |
|
1767 * to style which we don't support in this converter. |
|
1768 */ |
|
1769 bool |
|
1770 nsPlainTextSerializer::IsInPre() |
|
1771 { |
|
1772 int32_t i = mTagStackIndex; |
|
1773 while(i > 0) { |
|
1774 if (mTagStack[i - 1] == nsGkAtoms::pre) |
|
1775 return true; |
|
1776 if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) { |
|
1777 // We assume that every other block overrides a <pre> |
|
1778 return false; |
|
1779 } |
|
1780 --i; |
|
1781 } |
|
1782 |
|
1783 // Not a <pre> in the whole stack |
|
1784 return false; |
|
1785 } |
|
1786 |
|
1787 /** |
|
1788 * This method is required only to identify LI's inside OL. |
|
1789 * Returns TRUE if we are inside an OL tag and FALSE otherwise. |
|
1790 */ |
|
1791 bool |
|
1792 nsPlainTextSerializer::IsInOL() |
|
1793 { |
|
1794 int32_t i = mTagStackIndex; |
|
1795 while(--i >= 0) { |
|
1796 if (mTagStack[i] == nsGkAtoms::ol) |
|
1797 return true; |
|
1798 if (mTagStack[i] == nsGkAtoms::ul) { |
|
1799 // If a UL is reached first, LI belongs the UL nested in OL. |
|
1800 return false; |
|
1801 } |
|
1802 } |
|
1803 // We may reach here for orphan LI's. |
|
1804 return false; |
|
1805 } |
|
1806 |
|
1807 /* |
|
1808 @return 0 = no header, 1 = h1, ..., 6 = h6 |
|
1809 */ |
|
1810 int32_t HeaderLevel(nsIAtom* aTag) |
|
1811 { |
|
1812 if (aTag == nsGkAtoms::h1) { |
|
1813 return 1; |
|
1814 } |
|
1815 if (aTag == nsGkAtoms::h2) { |
|
1816 return 2; |
|
1817 } |
|
1818 if (aTag == nsGkAtoms::h3) { |
|
1819 return 3; |
|
1820 } |
|
1821 if (aTag == nsGkAtoms::h4) { |
|
1822 return 4; |
|
1823 } |
|
1824 if (aTag == nsGkAtoms::h5) { |
|
1825 return 5; |
|
1826 } |
|
1827 if (aTag == nsGkAtoms::h6) { |
|
1828 return 6; |
|
1829 } |
|
1830 return 0; |
|
1831 } |
|
1832 |
|
1833 |
|
1834 /* |
|
1835 * This is an implementation of GetUnicharWidth() and |
|
1836 * GetUnicharStringWidth() as defined in |
|
1837 * "The Single UNIX Specification, Version 2, The Open Group, 1997" |
|
1838 * <http://www.UNIX-systems.org/online.html> |
|
1839 * |
|
1840 * Markus Kuhn -- 2000-02-08 -- public domain |
|
1841 * |
|
1842 * Minor alterations to fit Mozilla's data types by Daniel Bratell |
|
1843 */ |
|
1844 |
|
1845 /* These functions define the column width of an ISO 10646 character |
|
1846 * as follows: |
|
1847 * |
|
1848 * - The null character (U+0000) has a column width of 0. |
|
1849 * |
|
1850 * - Other C0/C1 control characters and DEL will lead to a return |
|
1851 * value of -1. |
|
1852 * |
|
1853 * - Non-spacing and enclosing combining characters (general |
|
1854 * category code Mn or Me in the Unicode database) have a |
|
1855 * column width of 0. |
|
1856 * |
|
1857 * - Spacing characters in the East Asian Wide (W) or East Asian |
|
1858 * FullWidth (F) category as defined in Unicode Technical |
|
1859 * Report #11 have a column width of 2. |
|
1860 * |
|
1861 * - All remaining characters (including all printable |
|
1862 * ISO 8859-1 and WGL4 characters, Unicode control characters, |
|
1863 * etc.) have a column width of 1. |
|
1864 * |
|
1865 * This implementation assumes that wchar_t characters are encoded |
|
1866 * in ISO 10646. |
|
1867 */ |
|
1868 |
|
1869 int32_t GetUnicharWidth(char16_t ucs) |
|
1870 { |
|
1871 /* sorted list of non-overlapping intervals of non-spacing characters */ |
|
1872 static const struct interval { |
|
1873 uint16_t first; |
|
1874 uint16_t last; |
|
1875 } combining[] = { |
|
1876 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, |
|
1877 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, |
|
1878 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, |
|
1879 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, |
|
1880 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, |
|
1881 { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, |
|
1882 { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, |
|
1883 { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, |
|
1884 { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, |
|
1885 { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, |
|
1886 { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, |
|
1887 { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, |
|
1888 { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, |
|
1889 { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, |
|
1890 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, |
|
1891 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, |
|
1892 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, |
|
1893 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, |
|
1894 { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, |
|
1895 { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, |
|
1896 { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, |
|
1897 { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, |
|
1898 { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, |
|
1899 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, |
|
1900 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, |
|
1901 { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, |
|
1902 { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, |
|
1903 { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, |
|
1904 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, |
|
1905 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, |
|
1906 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } |
|
1907 }; |
|
1908 int32_t min = 0; |
|
1909 int32_t max = sizeof(combining) / sizeof(struct interval) - 1; |
|
1910 int32_t mid; |
|
1911 |
|
1912 /* test for 8-bit control characters */ |
|
1913 if (ucs == 0) |
|
1914 return 0; |
|
1915 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) |
|
1916 return -1; |
|
1917 |
|
1918 /* first quick check for Latin-1 etc. characters */ |
|
1919 if (ucs < combining[0].first) |
|
1920 return 1; |
|
1921 |
|
1922 /* binary search in table of non-spacing characters */ |
|
1923 while (max >= min) { |
|
1924 mid = (min + max) / 2; |
|
1925 if (combining[mid].last < ucs) |
|
1926 min = mid + 1; |
|
1927 else if (combining[mid].first > ucs) |
|
1928 max = mid - 1; |
|
1929 else if (combining[mid].first <= ucs && combining[mid].last >= ucs) |
|
1930 return 0; |
|
1931 } |
|
1932 |
|
1933 /* if we arrive here, ucs is not a combining or C0/C1 control character */ |
|
1934 |
|
1935 /* fast test for majority of non-wide scripts */ |
|
1936 if (ucs < 0x1100) |
|
1937 return 1; |
|
1938 |
|
1939 return 1 + |
|
1940 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ |
|
1941 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && |
|
1942 ucs != 0x303f) || /* CJK ... Yi */ |
|
1943 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ |
|
1944 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ |
|
1945 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ |
|
1946 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ |
|
1947 (ucs >= 0xffe0 && ucs <= 0xffe6)); |
|
1948 } |
|
1949 |
|
1950 |
|
1951 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) |
|
1952 { |
|
1953 int32_t w, width = 0; |
|
1954 |
|
1955 for (;*pwcs && n-- > 0; pwcs++) |
|
1956 if ((w = GetUnicharWidth(*pwcs)) < 0) |
|
1957 ++width; // Taking 1 as the width of non-printable character, for bug# 94475. |
|
1958 else |
|
1959 width += w; |
|
1960 |
|
1961 return width; |
|
1962 } |
|
1963 |