Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /*
7 * nsIContentSerializer implementation that can be used with an
8 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
9 * (eg for copy/paste as plaintext).
10 */
12 #include "nsPlainTextSerializer.h"
13 #include "nsLWBrkCIID.h"
14 #include "nsIServiceManager.h"
15 #include "nsGkAtoms.h"
16 #include "nsNameSpaceManager.h"
17 #include "nsTextFragment.h"
18 #include "nsContentUtils.h"
19 #include "nsReadableUtils.h"
20 #include "nsUnicharUtils.h"
21 #include "nsCRT.h"
22 #include "mozilla/dom/Element.h"
23 #include "mozilla/Preferences.h"
25 using namespace mozilla;
26 using namespace mozilla::dom;
28 #define PREF_STRUCTS "converter.html2txt.structs"
29 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
31 static const int32_t kTabSize=4;
32 static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if
33 mHeaderStrategy = 1 or = 2.
34 Indention of other headers
35 is derived from that.
36 XXX center h1? */
37 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
38 indent h(x+1) this many
39 columns more than h(x) */
40 static const int32_t kIndentSizeList = kTabSize;
41 // Indention of non-first lines of ul and ol
42 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
43 static const char16_t kNBSP = 160;
44 static const char16_t kSPACE = ' ';
46 static int32_t HeaderLevel(nsIAtom* aTag);
47 static int32_t GetUnicharWidth(char16_t ucs);
48 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
50 // Someday may want to make this non-const:
51 static const uint32_t TagStackSize = 500;
52 static const uint32_t OLStackSize = 100;
54 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
55 {
56 nsPlainTextSerializer* it = new nsPlainTextSerializer();
57 if (!it) {
58 return NS_ERROR_OUT_OF_MEMORY;
59 }
61 return CallQueryInterface(it, aSerializer);
62 }
64 nsPlainTextSerializer::nsPlainTextSerializer()
65 : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
66 {
68 mOutputString = nullptr;
69 mHeadLevel = 0;
70 mAtFirstColumn = true;
71 mIndent = 0;
72 mCiteQuoteLevel = 0;
73 mStructs = true; // will be read from prefs later
74 mHeaderStrategy = 1 /*indent increasingly*/; // ditto
75 mDontWrapAnyQuotes = false; // ditto
76 mHasWrittenCiteBlockquote = false;
77 mSpanLevel = 0;
78 for (int32_t i = 0; i <= 6; i++) {
79 mHeaderCounter[i] = 0;
80 }
82 // Line breaker
83 mWrapColumn = 72; // XXX magic number, we expect someone to reset this
84 mCurrentLineWidth = 0;
86 // Flow
87 mEmptyLines = 1; // The start of the document is an "empty line" in itself,
88 mInWhitespace = false;
89 mPreFormatted = false;
90 mStartedOutput = false;
92 // initialize the tag stack to zero:
93 // The stack only ever contains pointers to static atoms, so they don't
94 // need refcounting.
95 mTagStack = new nsIAtom*[TagStackSize];
96 mTagStackIndex = 0;
97 mIgnoreAboveIndex = (uint32_t)kNotFound;
99 // initialize the OL stack, where numbers for ordered lists are kept
100 mOLStack = new int32_t[OLStackSize];
101 mOLStackIndex = 0;
103 mULCount = 0;
105 mIgnoredChildNodeLevel = 0;
106 }
108 nsPlainTextSerializer::~nsPlainTextSerializer()
109 {
110 delete[] mTagStack;
111 delete[] mOLStack;
112 NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
113 }
115 NS_IMPL_ISUPPORTS(nsPlainTextSerializer,
116 nsIContentSerializer)
119 NS_IMETHODIMP
120 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
121 const char* aCharSet, bool aIsCopying,
122 bool aIsWholeDocument)
123 {
124 #ifdef DEBUG
125 // Check if the major control flags are set correctly.
126 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
127 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
128 "If you want format=flowed, you must combine it with "
129 "nsIDocumentEncoder::OutputFormatted");
130 }
132 if (aFlags & nsIDocumentEncoder::OutputFormatted) {
133 NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
134 "Can't do formatted and preformatted output at the same time!");
135 }
136 #endif
138 mFlags = aFlags;
139 mWrapColumn = aWrapColumn;
141 // Only create a linebreaker if we will handle wrapping.
142 if (MayWrap()) {
143 mLineBreaker = nsContentUtils::LineBreaker();
144 }
146 // Set the line break character:
147 if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
148 && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
149 // Windows
150 mLineBreak.AssignLiteral("\r\n");
151 }
152 else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
153 // Mac
154 mLineBreak.Assign(char16_t('\r'));
155 }
156 else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
157 // Unix/DOM
158 mLineBreak.Assign(char16_t('\n'));
159 }
160 else {
161 // Platform/default
162 mLineBreak.AssignLiteral(NS_LINEBREAK);
163 }
165 mLineBreakDue = false;
166 mFloatingLines = -1;
168 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
169 // Get some prefs that controls how we do formatted output
170 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
172 mHeaderStrategy =
173 Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
175 // DontWrapAnyQuotes is set according to whether plaintext mail
176 // is wrapping to window width -- see bug 134439.
177 // We'll only want this if we're wrapping and formatted.
178 if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
179 mDontWrapAnyQuotes =
180 Preferences::GetBool("mail.compose.wrap_to_window_width",
181 mDontWrapAnyQuotes);
182 }
183 }
185 // XXX We should let the caller pass this in.
186 if (Preferences::GetBool("browser.frames.enabled")) {
187 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
188 }
189 else {
190 mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
191 }
193 return NS_OK;
194 }
196 bool
197 nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
198 {
199 uint32_t size = aStack.Length();
200 if (size == 0) {
201 return false;
202 }
203 return aStack.ElementAt(size-1);
204 }
206 void
207 nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
208 {
209 uint32_t size = aStack.Length();
210 if (size > 0) {
211 aStack.ElementAt(size-1) = aValue;
212 }
213 else {
214 NS_ERROR("There is no \"Last\" value");
215 }
216 }
218 void
219 nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
220 {
221 aStack.AppendElement(bool(aValue));
222 }
224 bool
225 nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
226 {
227 bool returnValue = false;
228 uint32_t size = aStack.Length();
229 if (size > 0) {
230 returnValue = aStack.ElementAt(size-1);
231 aStack.RemoveElementAt(size-1);
232 }
233 return returnValue;
234 }
236 bool
237 nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag)
238 {
239 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
240 // non-textual container element should be serialized as placeholder
241 // character and its child nodes should be ignored. See bug 895239.
242 if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
243 return false;
244 }
246 return
247 (aTag == nsGkAtoms::audio) ||
248 (aTag == nsGkAtoms::canvas) ||
249 (aTag == nsGkAtoms::iframe) ||
250 (aTag == nsGkAtoms::meter) ||
251 (aTag == nsGkAtoms::progress) ||
252 (aTag == nsGkAtoms::object) ||
253 (aTag == nsGkAtoms::svg) ||
254 (aTag == nsGkAtoms::video);
255 }
257 NS_IMETHODIMP
258 nsPlainTextSerializer::AppendText(nsIContent* aText,
259 int32_t aStartOffset,
260 int32_t aEndOffset,
261 nsAString& aStr)
262 {
263 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
264 return NS_OK;
265 }
267 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
268 if ( aStartOffset < 0 )
269 return NS_ERROR_INVALID_ARG;
271 NS_ENSURE_ARG(aText);
273 nsresult rv = NS_OK;
275 nsIContent* content = aText;
276 const nsTextFragment* frag;
277 if (!content || !(frag = content->GetText())) {
278 return NS_ERROR_FAILURE;
279 }
281 int32_t fragLength = frag->GetLength();
282 int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
283 NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
285 int32_t length = endoffset - aStartOffset;
286 if (length <= 0) {
287 return NS_OK;
288 }
290 nsAutoString textstr;
291 if (frag->Is2b()) {
292 textstr.Assign(frag->Get2b() + aStartOffset, length);
293 }
294 else {
295 // AssignASCII is for 7-bit character only, so don't use it
296 const char *data = frag->Get1b();
297 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
298 }
300 mOutputString = &aStr;
302 // We have to split the string across newlines
303 // to match parser behavior
304 int32_t start = 0;
305 int32_t offset = textstr.FindCharInSet("\n\r");
306 while (offset != kNotFound) {
308 if (offset>start) {
309 // Pass in the line
310 DoAddText(false,
311 Substring(textstr, start, offset-start));
312 }
314 // Pass in a newline
315 DoAddText(true, mLineBreak);
317 start = offset+1;
318 offset = textstr.FindCharInSet("\n\r", start);
319 }
321 // Consume the last bit of the string if there's any left
322 if (start < length) {
323 if (start) {
324 DoAddText(false, Substring(textstr, start, length - start));
325 }
326 else {
327 DoAddText(false, textstr);
328 }
329 }
331 mOutputString = nullptr;
333 return rv;
334 }
336 NS_IMETHODIMP
337 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
338 int32_t aStartOffset,
339 int32_t aEndOffset,
340 nsAString& aStr)
341 {
342 return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
343 }
345 NS_IMETHODIMP
346 nsPlainTextSerializer::AppendElementStart(Element* aElement,
347 Element* aOriginalElement,
348 nsAString& aStr)
349 {
350 NS_ENSURE_ARG(aElement);
352 mElement = aElement;
354 nsresult rv;
355 nsIAtom* id = GetIdForContent(mElement);
357 bool isContainer = !nsContentUtils::IsHTMLVoid(id);
359 mOutputString = &aStr;
361 if (isContainer) {
362 rv = DoOpenContainer(id);
363 }
364 else {
365 rv = DoAddLeaf(id);
366 }
368 mElement = nullptr;
369 mOutputString = nullptr;
371 if (id == nsGkAtoms::head) {
372 ++mHeadLevel;
373 }
375 return rv;
376 }
378 NS_IMETHODIMP
379 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
380 nsAString& aStr)
381 {
382 NS_ENSURE_ARG(aElement);
384 mElement = aElement;
386 nsresult rv;
387 nsIAtom* id = GetIdForContent(mElement);
389 bool isContainer = !nsContentUtils::IsHTMLVoid(id);
391 mOutputString = &aStr;
393 rv = NS_OK;
394 if (isContainer) {
395 rv = DoCloseContainer(id);
396 }
398 mElement = nullptr;
399 mOutputString = nullptr;
401 if (id == nsGkAtoms::head) {
402 NS_ASSERTION(mHeadLevel != 0,
403 "mHeadLevel being decremented below 0");
404 --mHeadLevel;
405 }
407 return rv;
408 }
410 NS_IMETHODIMP
411 nsPlainTextSerializer::Flush(nsAString& aStr)
412 {
413 mOutputString = &aStr;
414 FlushLine();
415 mOutputString = nullptr;
416 return NS_OK;
417 }
419 NS_IMETHODIMP
420 nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
421 nsAString& aStr)
422 {
423 return NS_OK;
424 }
426 nsresult
427 nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
428 {
429 // Check if we need output current node as placeholder character and ignore
430 // child nodes.
431 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
432 if (mIgnoredChildNodeLevel == 0) {
433 // Serialize current node as placeholder character
434 Write(NS_LITERAL_STRING("\xFFFC"));
435 }
436 // Ignore child nodes.
437 mIgnoredChildNodeLevel++;
438 return NS_OK;
439 }
441 if (mFlags & nsIDocumentEncoder::OutputRaw) {
442 // Raw means raw. Don't even think about doing anything fancy
443 // here like indenting, adding line breaks or any other
444 // characters such as list item bullets, quote characters
445 // around <q>, etc. I mean it! Don't make me smack you!
447 return NS_OK;
448 }
450 if (mTagStackIndex < TagStackSize) {
451 mTagStack[mTagStackIndex++] = aTag;
452 }
454 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
455 return NS_OK;
456 }
458 // Reset this so that <blockquote type=cite> doesn't affect the whitespace
459 // above random <pre>s below it.
460 mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
461 aTag == nsGkAtoms::pre;
463 bool isInCiteBlockquote = false;
465 // XXX special-case <blockquote type=cite> so that we don't add additional
466 // newlines before the text.
467 if (aTag == nsGkAtoms::blockquote) {
468 nsAutoString value;
469 nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
470 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
471 }
473 if (mLineBreakDue && !isInCiteBlockquote)
474 EnsureVerticalSpace(mFloatingLines);
476 // Check if this tag's content that should not be output
477 if ((aTag == nsGkAtoms::noscript &&
478 !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
479 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
480 !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
481 // Ignore everything that follows the current tag in
482 // question until a matching end tag is encountered.
483 mIgnoreAboveIndex = mTagStackIndex - 1;
484 return NS_OK;
485 }
487 if (aTag == nsGkAtoms::body) {
488 // Try to figure out here whether we have a
489 // preformatted style attribute.
490 //
491 // Trigger on the presence of a "pre-wrap" in the
492 // style attribute. That's a very simplistic way to do
493 // it, but better than nothing.
494 // Also set mWrapColumn to the value given there
495 // (which arguably we should only do if told to do so).
496 nsAutoString style;
497 int32_t whitespace;
498 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
499 (kNotFound != (whitespace = style.Find("white-space:")))) {
501 if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
502 #ifdef DEBUG_preformatted
503 printf("Set mPreFormatted based on style pre-wrap\n");
504 #endif
505 mPreFormatted = true;
506 int32_t widthOffset = style.Find("width:");
507 if (widthOffset >= 0) {
508 // We have to search for the ch before the semicolon,
509 // not for the semicolon itself, because nsString::ToInteger()
510 // considers 'c' to be a valid numeric char (even if radix=10)
511 // but then gets confused if it sees it next to the number
512 // when the radix specified was 10, and returns an error code.
513 int32_t semiOffset = style.Find("ch", false, widthOffset+6);
514 int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
515 : style.Length() - widthOffset);
516 nsAutoString widthstr;
517 style.Mid(widthstr, widthOffset+6, length);
518 nsresult err;
519 int32_t col = widthstr.ToInteger(&err);
521 if (NS_SUCCEEDED(err)) {
522 mWrapColumn = (uint32_t)col;
523 #ifdef DEBUG_preformatted
524 printf("Set wrap column to %d based on style\n", mWrapColumn);
525 #endif
526 }
527 }
528 }
529 else if (kNotFound != style.Find("pre", true, whitespace)) {
530 #ifdef DEBUG_preformatted
531 printf("Set mPreFormatted based on style pre\n");
532 #endif
533 mPreFormatted = true;
534 mWrapColumn = 0;
535 }
536 }
537 else {
538 /* See comment at end of function. */
539 mInWhitespace = true;
540 mPreFormatted = false;
541 }
543 return NS_OK;
544 }
546 // Keep this in sync with DoCloseContainer!
547 if (!DoOutput()) {
548 return NS_OK;
549 }
551 if (aTag == nsGkAtoms::p)
552 EnsureVerticalSpace(1);
553 else if (aTag == nsGkAtoms::pre) {
554 if (GetLastBool(mIsInCiteBlockquote))
555 EnsureVerticalSpace(0);
556 else if (mHasWrittenCiteBlockquote) {
557 EnsureVerticalSpace(0);
558 mHasWrittenCiteBlockquote = false;
559 }
560 else
561 EnsureVerticalSpace(1);
562 }
563 else if (aTag == nsGkAtoms::tr) {
564 PushBool(mHasWrittenCellsForRow, false);
565 }
566 else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
567 // We must make sure that the content of two table cells get a
568 // space between them.
570 // To make the separation between cells most obvious and
571 // importable, we use a TAB.
572 if (GetLastBool(mHasWrittenCellsForRow)) {
573 // Bypass |Write| so that the TAB isn't compressed away.
574 AddToLine(MOZ_UTF16("\t"), 1);
575 mInWhitespace = true;
576 }
577 else if (mHasWrittenCellsForRow.IsEmpty()) {
578 // We don't always see a <tr> (nor a <table>) before the <td> if we're
579 // copying part of a table
580 PushBool(mHasWrittenCellsForRow, true); // will never be popped
581 }
582 else {
583 SetLastBool(mHasWrittenCellsForRow, true);
584 }
585 }
586 else if (aTag == nsGkAtoms::ul) {
587 // Indent here to support nested lists, which aren't included in li :-(
588 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
589 // Must end the current line before we change indention
590 mIndent += kIndentSizeList;
591 mULCount++;
592 }
593 else if (aTag == nsGkAtoms::ol) {
594 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
595 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
596 // Must end the current line before we change indention
597 if (mOLStackIndex < OLStackSize) {
598 nsAutoString startAttr;
599 int32_t startVal = 1;
600 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
601 nsresult rv = NS_OK;
602 startVal = startAttr.ToInteger(&rv);
603 if (NS_FAILED(rv))
604 startVal = 1;
605 }
606 mOLStack[mOLStackIndex++] = startVal;
607 }
608 } else {
609 mOLStackIndex++;
610 }
611 mIndent += kIndentSizeList; // see ul
612 }
613 else if (aTag == nsGkAtoms::li &&
614 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
615 if (mTagStackIndex > 1 && IsInOL()) {
616 if (mOLStackIndex > 0) {
617 nsAutoString valueAttr;
618 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
619 nsresult rv = NS_OK;
620 int32_t valueAttrVal = valueAttr.ToInteger(&rv);
621 if (NS_SUCCEEDED(rv))
622 mOLStack[mOLStackIndex-1] = valueAttrVal;
623 }
624 // This is what nsBulletFrame does for OLs:
625 mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
626 }
627 else {
628 mInIndentString.Append(char16_t('#'));
629 }
631 mInIndentString.Append(char16_t('.'));
633 }
634 else {
635 static char bulletCharArray[] = "*o+#";
636 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
637 char bulletChar = bulletCharArray[index % 4];
638 mInIndentString.Append(char16_t(bulletChar));
639 }
641 mInIndentString.Append(char16_t(' '));
642 }
643 else if (aTag == nsGkAtoms::dl) {
644 EnsureVerticalSpace(1);
645 }
646 else if (aTag == nsGkAtoms::dt) {
647 EnsureVerticalSpace(0);
648 }
649 else if (aTag == nsGkAtoms::dd) {
650 EnsureVerticalSpace(0);
651 mIndent += kIndentSizeDD;
652 }
653 else if (aTag == nsGkAtoms::span) {
654 ++mSpanLevel;
655 }
656 else if (aTag == nsGkAtoms::blockquote) {
657 // Push
658 PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
659 if (isInCiteBlockquote) {
660 EnsureVerticalSpace(0);
661 mCiteQuoteLevel++;
662 }
663 else {
664 EnsureVerticalSpace(1);
665 mIndent += kTabSize; // Check for some maximum value?
666 }
667 }
668 else if (aTag == nsGkAtoms::q) {
669 Write(NS_LITERAL_STRING("\""));
670 }
672 // Else make sure we'll separate block level tags,
673 // even if we're about to leave, before doing any other formatting.
674 else if (nsContentUtils::IsHTMLBlock(aTag)) {
675 EnsureVerticalSpace(0);
676 }
678 //////////////////////////////////////////////////////////////
679 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
680 return NS_OK;
681 }
682 //////////////////////////////////////////////////////////////
683 // The rest of this routine is formatted output stuff,
684 // which we should skip if we're not formatted:
685 //////////////////////////////////////////////////////////////
687 // Push on stack
688 bool currentNodeIsConverted = IsCurrentNodeConverted();
690 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
691 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
692 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
693 {
694 EnsureVerticalSpace(2);
695 if (mHeaderStrategy == 2) { // numbered
696 mIndent += kIndentSizeHeaders;
697 // Caching
698 int32_t level = HeaderLevel(aTag);
699 // Increase counter for current level
700 mHeaderCounter[level]++;
701 // Reset all lower levels
702 int32_t i;
704 for (i = level + 1; i <= 6; i++) {
705 mHeaderCounter[i] = 0;
706 }
708 // Construct numbers
709 nsAutoString leadup;
710 for (i = 1; i <= level; i++) {
711 leadup.AppendInt(mHeaderCounter[i]);
712 leadup.Append(char16_t('.'));
713 }
714 leadup.Append(char16_t(' '));
715 Write(leadup);
716 }
717 else if (mHeaderStrategy == 1) { // indent increasingly
718 mIndent += kIndentSizeHeaders;
719 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
720 // for h(x), run x-1 times
721 mIndent += kIndentIncrementHeaders;
722 }
723 }
724 }
725 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
726 nsAutoString url;
727 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
728 && !url.IsEmpty()) {
729 mURL = url;
730 }
731 }
732 else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
733 Write(NS_LITERAL_STRING("^"));
734 }
735 else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
736 Write(NS_LITERAL_STRING("_"));
737 }
738 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
739 Write(NS_LITERAL_STRING("|"));
740 }
741 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
742 && mStructs && !currentNodeIsConverted) {
743 Write(NS_LITERAL_STRING("*"));
744 }
745 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
746 && mStructs && !currentNodeIsConverted) {
747 Write(NS_LITERAL_STRING("/"));
748 }
749 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
750 Write(NS_LITERAL_STRING("_"));
751 }
753 /* Container elements are always block elements, so we shouldn't
754 output any whitespace immediately after the container tag even if
755 there's extra whitespace there because the HTML is pretty-printed
756 or something. To ensure that happens, tell the serializer we're
757 already in whitespace so it won't output more. */
758 mInWhitespace = true;
760 return NS_OK;
761 }
763 nsresult
764 nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
765 {
766 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
767 mIgnoredChildNodeLevel--;
768 return NS_OK;
769 }
771 if (mFlags & nsIDocumentEncoder::OutputRaw) {
772 // Raw means raw. Don't even think about doing anything fancy
773 // here like indenting, adding line breaks or any other
774 // characters such as list item bullets, quote characters
775 // around <q>, etc. I mean it! Don't make me smack you!
777 return NS_OK;
778 }
780 if (mTagStackIndex > 0) {
781 --mTagStackIndex;
782 }
784 if (mTagStackIndex >= mIgnoreAboveIndex) {
785 if (mTagStackIndex == mIgnoreAboveIndex) {
786 // We're dealing with the close tag whose matching
787 // open tag had set the mIgnoreAboveIndex value.
788 // Reset mIgnoreAboveIndex before discarding this tag.
789 mIgnoreAboveIndex = (uint32_t)kNotFound;
790 }
791 return NS_OK;
792 }
794 // End current line if we're ending a block level tag
795 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
796 // We want the output to end with a new line,
797 // but in preformatted areas like text fields,
798 // we can't emit newlines that weren't there.
799 // So add the newline only in the case of formatted output.
800 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
801 EnsureVerticalSpace(0);
802 }
803 else {
804 FlushLine();
805 }
806 // We won't want to do anything with these in formatted mode either,
807 // so just return now:
808 return NS_OK;
809 }
811 // Keep this in sync with DoOpenContainer!
812 if (!DoOutput()) {
813 return NS_OK;
814 }
816 if (aTag == nsGkAtoms::tr) {
817 PopBool(mHasWrittenCellsForRow);
818 // Should always end a line, but get no more whitespace
819 if (mFloatingLines < 0)
820 mFloatingLines = 0;
821 mLineBreakDue = true;
822 }
823 else if (((aTag == nsGkAtoms::li) ||
824 (aTag == nsGkAtoms::dt)) &&
825 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
826 // Items that should always end a line, but get no more whitespace
827 if (mFloatingLines < 0)
828 mFloatingLines = 0;
829 mLineBreakDue = true;
830 }
831 else if (aTag == nsGkAtoms::pre) {
832 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
833 mLineBreakDue = true;
834 }
835 else if (aTag == nsGkAtoms::ul) {
836 FlushLine();
837 mIndent -= kIndentSizeList;
838 if (--mULCount + mOLStackIndex == 0) {
839 mFloatingLines = 1;
840 mLineBreakDue = true;
841 }
842 }
843 else if (aTag == nsGkAtoms::ol) {
844 FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
845 mIndent -= kIndentSizeList;
846 NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
847 mOLStackIndex--;
848 if (mULCount + mOLStackIndex == 0) {
849 mFloatingLines = 1;
850 mLineBreakDue = true;
851 }
852 }
853 else if (aTag == nsGkAtoms::dl) {
854 mFloatingLines = 1;
855 mLineBreakDue = true;
856 }
857 else if (aTag == nsGkAtoms::dd) {
858 FlushLine();
859 mIndent -= kIndentSizeDD;
860 }
861 else if (aTag == nsGkAtoms::span) {
862 NS_ASSERTION(mSpanLevel, "Span level will be negative!");
863 --mSpanLevel;
864 }
865 else if (aTag == nsGkAtoms::div) {
866 if (mFloatingLines < 0)
867 mFloatingLines = 0;
868 mLineBreakDue = true;
869 }
870 else if (aTag == nsGkAtoms::blockquote) {
871 FlushLine(); // Is this needed?
873 // Pop
874 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
876 if (isInCiteBlockquote) {
877 NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
878 mCiteQuoteLevel--;
879 mFloatingLines = 0;
880 mHasWrittenCiteBlockquote = true;
881 }
882 else {
883 mIndent -= kTabSize;
884 mFloatingLines = 1;
885 }
886 mLineBreakDue = true;
887 }
888 else if (aTag == nsGkAtoms::q) {
889 Write(NS_LITERAL_STRING("\""));
890 }
891 else if (nsContentUtils::IsHTMLBlock(aTag)
892 && aTag != nsGkAtoms::script) {
893 // All other blocks get 1 vertical space after them
894 // in formatted mode, otherwise 0.
895 // This is hard. Sometimes 0 is a better number, but
896 // how to know?
897 if (mFlags & nsIDocumentEncoder::OutputFormatted)
898 EnsureVerticalSpace(1);
899 else {
900 if (mFloatingLines < 0)
901 mFloatingLines = 0;
902 mLineBreakDue = true;
903 }
904 }
906 //////////////////////////////////////////////////////////////
907 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
908 return NS_OK;
909 }
910 //////////////////////////////////////////////////////////////
911 // The rest of this routine is formatted output stuff,
912 // which we should skip if we're not formatted:
913 //////////////////////////////////////////////////////////////
915 // Pop the currentConverted stack
916 bool currentNodeIsConverted = IsCurrentNodeConverted();
918 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
919 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
920 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
922 if (mHeaderStrategy) { /*numbered or indent increasingly*/
923 mIndent -= kIndentSizeHeaders;
924 }
925 if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
926 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
927 // for h(x), run x-1 times
928 mIndent -= kIndentIncrementHeaders;
929 }
930 }
931 EnsureVerticalSpace(1);
932 }
933 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
934 nsAutoString temp;
935 temp.AssignLiteral(" <");
936 temp += mURL;
937 temp.Append(char16_t('>'));
938 Write(temp);
939 mURL.Truncate();
940 }
941 else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
942 && mStructs && !currentNodeIsConverted) {
943 Write(kSpace);
944 }
945 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
946 Write(NS_LITERAL_STRING("|"));
947 }
948 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
949 && mStructs && !currentNodeIsConverted) {
950 Write(NS_LITERAL_STRING("*"));
951 }
952 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
953 && mStructs && !currentNodeIsConverted) {
954 Write(NS_LITERAL_STRING("/"));
955 }
956 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
957 Write(NS_LITERAL_STRING("_"));
958 }
960 return NS_OK;
961 }
963 bool
964 nsPlainTextSerializer::MustSuppressLeaf()
965 {
966 if (mIgnoredChildNodeLevel > 0) {
967 return true;
968 }
970 if ((mTagStackIndex > 1 &&
971 mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
972 (mTagStackIndex > 0 &&
973 mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
974 // Don't output the contents of SELECT elements;
975 // Might be nice, eventually, to output just the selected element.
976 // Read more in bug 31994.
977 return true;
978 }
980 if (mTagStackIndex > 0 &&
981 (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
982 mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
983 // Don't output the contents of <script> or <style> tags;
984 return true;
985 }
987 return false;
988 }
990 void
991 nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
992 {
993 // If we don't want any output, just return
994 if (!DoOutput()) {
995 return;
996 }
998 if (!aIsLineBreak) {
999 // Make sure to reset this, since it's no longer true.
1000 mHasWrittenCiteBlockquote = false;
1001 }
1003 if (mLineBreakDue)
1004 EnsureVerticalSpace(mFloatingLines);
1006 if (MustSuppressLeaf()) {
1007 return;
1008 }
1010 if (aIsLineBreak) {
1011 // The only times we want to pass along whitespace from the original
1012 // html source are if we're forced into preformatted mode via flags,
1013 // or if we're prettyprinting and we're inside a <pre>.
1014 // Otherwise, either we're collapsing to minimal text, or we're
1015 // prettyprinting to mimic the html format, and in neither case
1016 // does the formatting of the html source help us.
1017 if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
1018 (mPreFormatted && !mWrapColumn) ||
1019 IsInPre()) {
1020 EnsureVerticalSpace(mEmptyLines+1);
1021 }
1022 else if (!mInWhitespace) {
1023 Write(kSpace);
1024 mInWhitespace = true;
1025 }
1026 return;
1027 }
1029 /* Check, if we are in a link (symbolized with mURL containing the URL)
1030 and the text is equal to the URL. In that case we don't want to output
1031 the URL twice so we scrap the text in mURL. */
1032 if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1033 mURL.Truncate();
1034 }
1035 Write(aText);
1036 }
1038 nsresult
1039 nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
1040 {
1041 // If we don't want any output, just return
1042 if (!DoOutput()) {
1043 return NS_OK;
1044 }
1046 if (mLineBreakDue)
1047 EnsureVerticalSpace(mFloatingLines);
1049 if (MustSuppressLeaf()) {
1050 return NS_OK;
1051 }
1053 if (aTag == nsGkAtoms::br) {
1054 // Another egregious editor workaround, see bug 38194:
1055 // ignore the bogus br tags that the editor sticks here and there.
1056 nsAutoString tagAttr;
1057 if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
1058 || !tagAttr.EqualsLiteral("_moz")) {
1059 EnsureVerticalSpace(mEmptyLines+1);
1060 }
1061 }
1062 else if (aTag == nsGkAtoms::hr &&
1063 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1064 EnsureVerticalSpace(0);
1066 // Make a line of dashes as wide as the wrap width
1067 // XXX honoring percentage would be nice
1068 nsAutoString line;
1069 uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
1070 while (line.Length() < width) {
1071 line.Append(char16_t('-'));
1072 }
1073 Write(line);
1075 EnsureVerticalSpace(0);
1076 }
1077 else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
1078 Write(NS_LITERAL_STRING("\xFFFC"));
1079 }
1080 else if (aTag == nsGkAtoms::img) {
1081 /* Output (in decreasing order of preference)
1082 alt, title or nothing */
1083 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1084 nsAutoString imageDescription;
1085 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
1086 imageDescription))) {
1087 // If the alt attribute has an empty value (|alt=""|), output nothing
1088 }
1089 else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
1090 imageDescription))
1091 && !imageDescription.IsEmpty()) {
1092 imageDescription = NS_LITERAL_STRING(" [") +
1093 imageDescription +
1094 NS_LITERAL_STRING("] ");
1095 }
1097 Write(imageDescription);
1098 }
1100 return NS_OK;
1101 }
1103 /**
1104 * Adds as many newline as necessary to get |noOfRows| empty lines
1105 *
1106 * noOfRows = -1 : Being in the middle of some line of text
1107 * noOfRows = 0 : Being at the start of a line
1108 * noOfRows = n>0 : Having n empty lines before the current line.
1109 */
1110 void
1111 nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
1112 {
1113 // If we have something in the indent we probably want to output
1114 // it and it's not included in the count for empty lines so we don't
1115 // realize that we should start a new line.
1116 if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1117 EndLine(false);
1118 mInWhitespace = true;
1119 }
1121 while(mEmptyLines < noOfRows) {
1122 EndLine(false);
1123 mInWhitespace = true;
1124 }
1125 mLineBreakDue = false;
1126 mFloatingLines = -1;
1127 }
1129 /**
1130 * This empties the current line cache without adding a NEWLINE.
1131 * Should not be used if line wrapping is of importance since
1132 * this function destroys the cache information.
1133 *
1134 * It will also write indentation and quotes if we believe us to be
1135 * at the start of the line.
1136 */
1137 void
1138 nsPlainTextSerializer::FlushLine()
1139 {
1140 if (!mCurrentLine.IsEmpty()) {
1141 if (mAtFirstColumn) {
1142 OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1143 }
1145 Output(mCurrentLine);
1146 mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1147 mCurrentLine.Truncate();
1148 mCurrentLineWidth = 0;
1149 }
1150 }
1152 /**
1153 * Prints the text to output to our current output device (the string mOutputString).
1154 * The only logic here is to replace non breaking spaces with a normal space since
1155 * most (all?) receivers of the result won't understand the nbsp and even be
1156 * confused by it.
1157 */
1158 void
1159 nsPlainTextSerializer::Output(nsString& aString)
1160 {
1161 if (!aString.IsEmpty()) {
1162 mStartedOutput = true;
1163 }
1165 if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1166 // First, replace all nbsp characters with spaces,
1167 // which the unicode encoder won't do for us.
1168 aString.ReplaceChar(kNBSP, kSPACE);
1169 }
1170 mOutputString->Append(aString);
1171 }
1173 static bool
1174 IsSpaceStuffable(const char16_t *s)
1175 {
1176 if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1177 nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0)
1178 return true;
1179 else
1180 return false;
1181 }
1183 /**
1184 * This function adds a piece of text to the current stored line. If we are
1185 * wrapping text and the stored line will become too long, a suitable
1186 * location to wrap will be found and the line that's complete will be
1187 * output.
1188 */
1189 void
1190 nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment,
1191 int32_t aLineFragmentLength)
1192 {
1193 uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
1195 if (mLineBreakDue)
1196 EnsureVerticalSpace(mFloatingLines);
1198 int32_t linelength = mCurrentLine.Length();
1199 if (0 == linelength) {
1200 if (0 == aLineFragmentLength) {
1201 // Nothing at all. Are you kidding me?
1202 return;
1203 }
1205 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1206 if (IsSpaceStuffable(aLineFragment)
1207 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1208 )
1209 {
1210 // Space stuffing a la RFC 2646 (format=flowed).
1211 mCurrentLine.Append(char16_t(' '));
1213 if (MayWrap()) {
1214 mCurrentLineWidth += GetUnicharWidth(' ');
1215 #ifdef DEBUG_wrapping
1216 NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1217 mCurrentLine.Length()) ==
1218 (int32_t)mCurrentLineWidth,
1219 "mCurrentLineWidth and reality out of sync!");
1220 #endif
1221 }
1222 }
1223 }
1224 mEmptyLines=-1;
1225 }
1227 mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1228 if (MayWrap()) {
1229 mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
1230 aLineFragmentLength);
1231 #ifdef DEBUG_wrapping
1232 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1233 mCurrentLine.Length()) ==
1234 (int32_t)mCurrentLineWidth,
1235 "mCurrentLineWidth and reality out of sync!");
1236 #endif
1237 }
1239 linelength = mCurrentLine.Length();
1241 // Wrap?
1242 if (MayWrap())
1243 {
1244 #ifdef DEBUG_wrapping
1245 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1246 mCurrentLine.Length()) ==
1247 (int32_t)mCurrentLineWidth,
1248 "mCurrentLineWidth and reality out of sync!");
1249 #endif
1250 // Yes, wrap!
1251 // The "+4" is to avoid wrap lines that only would be a couple
1252 // of letters too long. We give this bonus only if the
1253 // wrapcolumn is more than 20.
1254 uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1256 // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1257 while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
1258 // We go from the end removing one letter at a time until
1259 // we have a reasonable width
1260 int32_t goodSpace = mCurrentLine.Length();
1261 uint32_t width = mCurrentLineWidth;
1262 while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
1263 goodSpace--;
1264 width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1265 }
1267 goodSpace++;
1269 if (mLineBreaker) {
1270 goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1271 mCurrentLine.Length(), goodSpace);
1272 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1273 nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
1274 --goodSpace; // adjust the position since line breaker returns a position next to space
1275 }
1276 }
1277 // fallback if the line breaker is unavailable or failed
1278 if (!mLineBreaker) {
1279 goodSpace = mWrapColumn-prefixwidth;
1280 while (goodSpace >= 0 &&
1281 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1282 goodSpace--;
1283 }
1284 }
1286 nsAutoString restOfLine;
1287 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1288 // If we don't found a good place to break, accept long line and
1289 // try to find another place to break
1290 goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
1291 if (mLineBreaker) {
1292 if ((uint32_t)goodSpace < mCurrentLine.Length())
1293 goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1294 mCurrentLine.Length(), goodSpace);
1295 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1296 goodSpace = mCurrentLine.Length();
1297 }
1298 // fallback if the line breaker is unavailable or failed
1299 if (!mLineBreaker) {
1300 goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
1301 while (goodSpace < linelength &&
1302 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1303 goodSpace++;
1304 }
1305 }
1306 }
1308 if ((goodSpace < linelength) && (goodSpace > 0)) {
1309 // Found a place to break
1311 // -1 (trim a char at the break position)
1312 // only if the line break was a space.
1313 if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1314 mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
1315 }
1316 else {
1317 mCurrentLine.Right(restOfLine, linelength-goodSpace);
1318 }
1319 // if breaker was U+0020, it has to consider for delsp=yes support
1320 bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1321 mCurrentLine.Truncate(goodSpace);
1322 EndLine(true, breakBySpace);
1323 mCurrentLine.Truncate();
1324 // Space stuff new line?
1325 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1326 if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
1327 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1328 )
1329 {
1330 // Space stuffing a la RFC 2646 (format=flowed).
1331 mCurrentLine.Append(char16_t(' '));
1332 //XXX doesn't seem to work correctly for ' '
1333 }
1334 }
1335 mCurrentLine.Append(restOfLine);
1336 mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
1337 mCurrentLine.Length());
1338 linelength = mCurrentLine.Length();
1339 mEmptyLines = -1;
1340 }
1341 else {
1342 // Nothing to do. Hopefully we get more data later
1343 // to use for a place to break line
1344 break;
1345 }
1346 }
1347 }
1348 else {
1349 // No wrapping.
1350 }
1351 }
1353 /**
1354 * Outputs the contents of mCurrentLine, and resets line specific
1355 * variables. Also adds an indentation and prefix if there is
1356 * one specified. Strips ending spaces from the line if it isn't
1357 * preformatted.
1358 */
1359 void
1360 nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
1361 {
1362 uint32_t currentlinelength = mCurrentLine.Length();
1364 if (aSoftlinebreak && 0 == currentlinelength) {
1365 // No meaning
1366 return;
1367 }
1369 /* In non-preformatted mode, remove spaces from the end of the line for
1370 * format=flowed compatibility. Don't do this for these special cases:
1371 * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1372 * "- -- ", the OpenPGP dash-escaped signature separator in inline
1373 * signed messages according to the OpenPGP standard (RFC 2440).
1374 */
1375 if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1376 !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
1377 (aSoftlinebreak ||
1378 !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
1379 // Remove spaces from the end of the line.
1380 while(currentlinelength > 0 &&
1381 mCurrentLine[currentlinelength-1] == ' ') {
1382 --currentlinelength;
1383 }
1384 mCurrentLine.SetLength(currentlinelength);
1385 }
1387 if (aSoftlinebreak &&
1388 (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1389 (mIndent == 0)) {
1390 // Add the soft part of the soft linebreak (RFC 2646 4.1)
1391 // We only do this when there is no indentation since format=flowed
1392 // lines and indentation doesn't work well together.
1394 // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1395 // add twice space.
1396 if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1397 mCurrentLine.Append(NS_LITERAL_STRING(" "));
1398 else
1399 mCurrentLine.Append(char16_t(' '));
1400 }
1402 if (aSoftlinebreak) {
1403 mEmptyLines=0;
1404 }
1405 else {
1406 // Hard break
1407 if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1408 mEmptyLines=-1;
1409 }
1411 mEmptyLines++;
1412 }
1414 if (mAtFirstColumn) {
1415 // If we don't have anything "real" to output we have to
1416 // make sure the indent doesn't end in a space since that
1417 // would trick a format=flowed-aware receiver.
1418 bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1419 OutputQuotesAndIndent(stripTrailingSpaces);
1420 }
1422 mCurrentLine.Append(mLineBreak);
1423 Output(mCurrentLine);
1424 mCurrentLine.Truncate();
1425 mCurrentLineWidth = 0;
1426 mAtFirstColumn=true;
1427 mInWhitespace=true;
1428 mLineBreakDue = false;
1429 mFloatingLines = -1;
1430 }
1433 /**
1434 * Outputs the calculated and stored indent and text in the indentation. That is
1435 * quote chars and numbers for numbered lists and such. It will also reset any
1436 * stored text to put in the indentation after using it.
1437 */
1438 void
1439 nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
1440 {
1441 nsAutoString stringToOutput;
1443 // Put the mail quote "> " chars in, if appropriate:
1444 if (mCiteQuoteLevel > 0) {
1445 nsAutoString quotes;
1446 for(int i=0; i < mCiteQuoteLevel; i++) {
1447 quotes.Append(char16_t('>'));
1448 }
1449 if (!mCurrentLine.IsEmpty()) {
1450 /* Better don't output a space here, if the line is empty,
1451 in case a receiving f=f-aware UA thinks, this were a flowed line,
1452 which it isn't - it's just empty.
1453 (Flowed lines may be joined with the following one,
1454 so the empty line may be lost completely.) */
1455 quotes.Append(char16_t(' '));
1456 }
1457 stringToOutput = quotes;
1458 mAtFirstColumn = false;
1459 }
1461 // Indent if necessary
1462 int32_t indentwidth = mIndent - mInIndentString.Length();
1463 if (indentwidth > 0
1464 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1465 // Don't make empty lines look flowed
1466 ) {
1467 nsAutoString spaces;
1468 for (int i=0; i < indentwidth; ++i)
1469 spaces.Append(char16_t(' '));
1470 stringToOutput += spaces;
1471 mAtFirstColumn = false;
1472 }
1474 if (!mInIndentString.IsEmpty()) {
1475 stringToOutput += mInIndentString;
1476 mAtFirstColumn = false;
1477 mInIndentString.Truncate();
1478 }
1480 if (stripTrailingSpaces) {
1481 int32_t lineLength = stringToOutput.Length();
1482 while(lineLength > 0 &&
1483 ' ' == stringToOutput[lineLength-1]) {
1484 --lineLength;
1485 }
1486 stringToOutput.SetLength(lineLength);
1487 }
1489 if (!stringToOutput.IsEmpty()) {
1490 Output(stringToOutput);
1491 }
1493 }
1495 /**
1496 * Write a string. This is the highlevel function to use to get text output.
1497 * By using AddToLine, Output, EndLine and other functions it handles quotation,
1498 * line wrapping, indentation, whitespace compression and other things.
1499 */
1500 void
1501 nsPlainTextSerializer::Write(const nsAString& aStr)
1502 {
1503 // XXX Copy necessary to use nsString methods and gain
1504 // access to underlying buffer
1505 nsAutoString str(aStr);
1507 #ifdef DEBUG_wrapping
1508 printf("Write(%s): wrap col = %d\n",
1509 NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
1510 #endif
1512 int32_t bol = 0;
1513 int32_t newline;
1515 int32_t totLen = str.Length();
1517 // If the string is empty, do nothing:
1518 if (totLen <= 0) return;
1520 // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1521 // to be cut off along with usual spaces if required. (bug #125928)
1522 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1523 for (int32_t i = totLen-1; i >= 0; i--) {
1524 char16_t c = str[i];
1525 if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
1526 continue;
1527 if (kNBSP == c)
1528 str.Replace(i, 1, ' ');
1529 else
1530 break;
1531 }
1532 }
1534 // We have two major codepaths here. One that does preformatted text and one
1535 // that does normal formatted text. The one for preformatted text calls
1536 // Output directly while the other code path goes through AddToLine.
1537 if ((mPreFormatted && !mWrapColumn) || IsInPre()
1538 || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
1539 && mEmptyLines >= 0 && str.First() == char16_t('>'))) {
1540 // No intelligent wrapping.
1542 // This mustn't be mixed with intelligent wrapping without clearing
1543 // the mCurrentLine buffer before!!!
1544 NS_ASSERTION(mCurrentLine.IsEmpty(),
1545 "Mixed wrapping data and nonwrapping data on the same line");
1546 if (!mCurrentLine.IsEmpty()) {
1547 FlushLine();
1548 }
1550 // Put the mail quote "> " chars in, if appropriate.
1551 // Have to put it in before every line.
1552 while(bol<totLen) {
1553 bool outputQuotes = mAtFirstColumn;
1554 bool atFirstColumn = mAtFirstColumn;
1555 bool outputLineBreak = false;
1556 bool spacesOnly = true;
1558 // Find one of '\n' or '\r' using iterators since nsAString
1559 // doesn't have the old FindCharInSet function.
1560 nsAString::const_iterator iter; str.BeginReading(iter);
1561 nsAString::const_iterator done_searching; str.EndReading(done_searching);
1562 iter.advance(bol);
1563 int32_t new_newline = bol;
1564 newline = kNotFound;
1565 while(iter != done_searching) {
1566 if ('\n' == *iter || '\r' == *iter) {
1567 newline = new_newline;
1568 break;
1569 }
1570 if (' ' != *iter)
1571 spacesOnly = false;
1572 ++new_newline;
1573 ++iter;
1574 }
1576 // Done searching
1577 nsAutoString stringpart;
1578 if (newline == kNotFound) {
1579 // No new lines.
1580 stringpart.Assign(Substring(str, bol, totLen - bol));
1581 if (!stringpart.IsEmpty()) {
1582 char16_t lastchar = stringpart[stringpart.Length()-1];
1583 if ((lastchar == '\t') || (lastchar == ' ') ||
1584 (lastchar == '\r') ||(lastchar == '\n')) {
1585 mInWhitespace = true;
1586 }
1587 else {
1588 mInWhitespace = false;
1589 }
1590 }
1591 mEmptyLines=-1;
1592 atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
1593 bol = totLen;
1594 }
1595 else {
1596 // There is a newline
1597 stringpart.Assign(Substring(str, bol, newline-bol));
1598 mInWhitespace = true;
1599 outputLineBreak = true;
1600 mEmptyLines=0;
1601 atFirstColumn = true;
1602 bol = newline+1;
1603 if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1604 // There was a CRLF in the input. This used to be illegal and
1605 // stripped by the parser. Apparently not anymore. Let's skip
1606 // over the LF.
1607 bol++;
1608 }
1609 }
1611 mCurrentLine.AssignLiteral("");
1612 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1613 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1614 !stringpart.EqualsLiteral("-- ") &&
1615 !stringpart.EqualsLiteral("- -- "))
1616 stringpart.Trim(" ", false, true, true);
1617 if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
1618 mCurrentLine.Append(char16_t(' '));
1619 }
1620 mCurrentLine.Append(stringpart);
1622 if (outputQuotes) {
1623 // Note: this call messes with mAtFirstColumn
1624 OutputQuotesAndIndent();
1625 }
1627 Output(mCurrentLine);
1628 if (outputLineBreak) {
1629 Output(mLineBreak);
1630 }
1631 mAtFirstColumn = atFirstColumn;
1632 }
1634 // Reset mCurrentLine.
1635 mCurrentLine.Truncate();
1637 #ifdef DEBUG_wrapping
1638 printf("No wrapping: newline is %d, totLen is %d\n",
1639 newline, totLen);
1640 #endif
1641 return;
1642 }
1644 // Intelligent handling of text
1645 // If needed, strip out all "end of lines"
1646 // and multiple whitespace between words
1647 int32_t nextpos;
1648 const char16_t * offsetIntoBuffer = nullptr;
1650 while (bol < totLen) { // Loop over lines
1651 // Find a place where we may have to do whitespace compression
1652 nextpos = str.FindCharInSet(" \t\n\r", bol);
1653 #ifdef DEBUG_wrapping
1654 nsAutoString remaining;
1655 str.Right(remaining, totLen - bol);
1656 foo = ToNewCString(remaining);
1657 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
1658 // bol, nextpos, totLen, foo);
1659 nsMemory::Free(foo);
1660 #endif
1662 if (nextpos == kNotFound) {
1663 // The rest of the string
1664 offsetIntoBuffer = str.get() + bol;
1665 AddToLine(offsetIntoBuffer, totLen-bol);
1666 bol=totLen;
1667 mInWhitespace=false;
1668 }
1669 else {
1670 // There's still whitespace left in the string
1671 if (nextpos != 0 && (nextpos + 1) < totLen) {
1672 offsetIntoBuffer = str.get() + nextpos;
1673 // skip '\n' if it is between CJ chars
1674 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
1675 offsetIntoBuffer = str.get() + bol;
1676 AddToLine(offsetIntoBuffer, nextpos-bol);
1677 bol = nextpos + 1;
1678 continue;
1679 }
1680 }
1681 // If we're already in whitespace and not preformatted, just skip it:
1682 if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
1683 !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1684 // Skip whitespace
1685 bol++;
1686 continue;
1687 }
1689 if (nextpos == bol) {
1690 // Note that we are in whitespace.
1691 mInWhitespace = true;
1692 offsetIntoBuffer = str.get() + nextpos;
1693 AddToLine(offsetIntoBuffer, 1);
1694 bol++;
1695 continue;
1696 }
1698 mInWhitespace = true;
1700 offsetIntoBuffer = str.get() + bol;
1701 if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1702 // Preserve the real whitespace character
1703 nextpos++;
1704 AddToLine(offsetIntoBuffer, nextpos-bol);
1705 bol = nextpos;
1706 }
1707 else {
1708 // Replace the whitespace with a space
1709 AddToLine(offsetIntoBuffer, nextpos-bol);
1710 AddToLine(kSpace.get(),1);
1711 bol = nextpos + 1; // Let's eat the whitespace
1712 }
1713 }
1714 } // Continue looping over the string
1715 }
1718 /**
1719 * Gets the value of an attribute in a string. If the function returns
1720 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1721 */
1722 nsresult
1723 nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
1724 nsString& aValueRet)
1725 {
1726 if (mElement) {
1727 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1728 return NS_OK;
1729 }
1730 }
1732 return NS_ERROR_NOT_AVAILABLE;
1733 }
1735 /**
1736 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1737 * In this case, we should ignore it.
1738 */
1739 bool
1740 nsPlainTextSerializer::IsCurrentNodeConverted()
1741 {
1742 nsAutoString value;
1743 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1744 return (NS_SUCCEEDED(rv) &&
1745 (value.EqualsIgnoreCase("moz-txt", 7) ||
1746 value.EqualsIgnoreCase("\"moz-txt", 8)));
1747 }
1750 // static
1751 nsIAtom*
1752 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
1753 {
1754 if (!aContent->IsHTML()) {
1755 return nullptr;
1756 }
1758 nsIAtom* localName = aContent->Tag();
1759 return localName->IsStaticAtom() ? localName : nullptr;
1760 }
1762 /**
1763 * Returns true if we currently are inside a <pre>. The check is done
1764 * by traversing the tag stack looking for <pre> until we hit a block
1765 * level tag which is assumed to override any <pre>:s below it in
1766 * the stack. To do this correctly to a 100% would require access
1767 * to style which we don't support in this converter.
1768 */
1769 bool
1770 nsPlainTextSerializer::IsInPre()
1771 {
1772 int32_t i = mTagStackIndex;
1773 while(i > 0) {
1774 if (mTagStack[i - 1] == nsGkAtoms::pre)
1775 return true;
1776 if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
1777 // We assume that every other block overrides a <pre>
1778 return false;
1779 }
1780 --i;
1781 }
1783 // Not a <pre> in the whole stack
1784 return false;
1785 }
1787 /**
1788 * This method is required only to identify LI's inside OL.
1789 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1790 */
1791 bool
1792 nsPlainTextSerializer::IsInOL()
1793 {
1794 int32_t i = mTagStackIndex;
1795 while(--i >= 0) {
1796 if (mTagStack[i] == nsGkAtoms::ol)
1797 return true;
1798 if (mTagStack[i] == nsGkAtoms::ul) {
1799 // If a UL is reached first, LI belongs the UL nested in OL.
1800 return false;
1801 }
1802 }
1803 // We may reach here for orphan LI's.
1804 return false;
1805 }
1807 /*
1808 @return 0 = no header, 1 = h1, ..., 6 = h6
1809 */
1810 int32_t HeaderLevel(nsIAtom* aTag)
1811 {
1812 if (aTag == nsGkAtoms::h1) {
1813 return 1;
1814 }
1815 if (aTag == nsGkAtoms::h2) {
1816 return 2;
1817 }
1818 if (aTag == nsGkAtoms::h3) {
1819 return 3;
1820 }
1821 if (aTag == nsGkAtoms::h4) {
1822 return 4;
1823 }
1824 if (aTag == nsGkAtoms::h5) {
1825 return 5;
1826 }
1827 if (aTag == nsGkAtoms::h6) {
1828 return 6;
1829 }
1830 return 0;
1831 }
1834 /*
1835 * This is an implementation of GetUnicharWidth() and
1836 * GetUnicharStringWidth() as defined in
1837 * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1838 * <http://www.UNIX-systems.org/online.html>
1839 *
1840 * Markus Kuhn -- 2000-02-08 -- public domain
1841 *
1842 * Minor alterations to fit Mozilla's data types by Daniel Bratell
1843 */
1845 /* These functions define the column width of an ISO 10646 character
1846 * as follows:
1847 *
1848 * - The null character (U+0000) has a column width of 0.
1849 *
1850 * - Other C0/C1 control characters and DEL will lead to a return
1851 * value of -1.
1852 *
1853 * - Non-spacing and enclosing combining characters (general
1854 * category code Mn or Me in the Unicode database) have a
1855 * column width of 0.
1856 *
1857 * - Spacing characters in the East Asian Wide (W) or East Asian
1858 * FullWidth (F) category as defined in Unicode Technical
1859 * Report #11 have a column width of 2.
1860 *
1861 * - All remaining characters (including all printable
1862 * ISO 8859-1 and WGL4 characters, Unicode control characters,
1863 * etc.) have a column width of 1.
1864 *
1865 * This implementation assumes that wchar_t characters are encoded
1866 * in ISO 10646.
1867 */
1869 int32_t GetUnicharWidth(char16_t ucs)
1870 {
1871 /* sorted list of non-overlapping intervals of non-spacing characters */
1872 static const struct interval {
1873 uint16_t first;
1874 uint16_t last;
1875 } combining[] = {
1876 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1877 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1878 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1879 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1880 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1881 { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1882 { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1883 { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
1884 { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
1885 { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
1886 { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
1887 { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
1888 { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
1889 { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
1890 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
1891 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
1892 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
1893 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
1894 { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
1895 { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
1896 { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
1897 { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
1898 { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
1899 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
1900 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
1901 { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
1902 { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
1903 { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
1904 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
1905 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
1906 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
1907 };
1908 int32_t min = 0;
1909 int32_t max = sizeof(combining) / sizeof(struct interval) - 1;
1910 int32_t mid;
1912 /* test for 8-bit control characters */
1913 if (ucs == 0)
1914 return 0;
1915 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
1916 return -1;
1918 /* first quick check for Latin-1 etc. characters */
1919 if (ucs < combining[0].first)
1920 return 1;
1922 /* binary search in table of non-spacing characters */
1923 while (max >= min) {
1924 mid = (min + max) / 2;
1925 if (combining[mid].last < ucs)
1926 min = mid + 1;
1927 else if (combining[mid].first > ucs)
1928 max = mid - 1;
1929 else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
1930 return 0;
1931 }
1933 /* if we arrive here, ucs is not a combining or C0/C1 control character */
1935 /* fast test for majority of non-wide scripts */
1936 if (ucs < 0x1100)
1937 return 1;
1939 return 1 +
1940 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1941 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1942 ucs != 0x303f) || /* CJK ... Yi */
1943 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1944 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1945 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1946 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1947 (ucs >= 0xffe0 && ucs <= 0xffe6));
1948 }
1951 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
1952 {
1953 int32_t w, width = 0;
1955 for (;*pwcs && n-- > 0; pwcs++)
1956 if ((w = GetUnicharWidth(*pwcs)) < 0)
1957 ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
1958 else
1959 width += w;
1961 return width;
1962 }