1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/base/src/nsPlainTextSerializer.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1963 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* 1.10 + * nsIContentSerializer implementation that can be used with an 1.11 + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way 1.12 + * (eg for copy/paste as plaintext). 1.13 + */ 1.14 + 1.15 +#include "nsPlainTextSerializer.h" 1.16 +#include "nsLWBrkCIID.h" 1.17 +#include "nsIServiceManager.h" 1.18 +#include "nsGkAtoms.h" 1.19 +#include "nsNameSpaceManager.h" 1.20 +#include "nsTextFragment.h" 1.21 +#include "nsContentUtils.h" 1.22 +#include "nsReadableUtils.h" 1.23 +#include "nsUnicharUtils.h" 1.24 +#include "nsCRT.h" 1.25 +#include "mozilla/dom/Element.h" 1.26 +#include "mozilla/Preferences.h" 1.27 + 1.28 +using namespace mozilla; 1.29 +using namespace mozilla::dom; 1.30 + 1.31 +#define PREF_STRUCTS "converter.html2txt.structs" 1.32 +#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" 1.33 + 1.34 +static const int32_t kTabSize=4; 1.35 +static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if 1.36 + mHeaderStrategy = 1 or = 2. 1.37 + Indention of other headers 1.38 + is derived from that. 1.39 + XXX center h1? */ 1.40 +static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, 1.41 + indent h(x+1) this many 1.42 + columns more than h(x) */ 1.43 +static const int32_t kIndentSizeList = kTabSize; 1.44 + // Indention of non-first lines of ul and ol 1.45 +static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd> 1.46 +static const char16_t kNBSP = 160; 1.47 +static const char16_t kSPACE = ' '; 1.48 + 1.49 +static int32_t HeaderLevel(nsIAtom* aTag); 1.50 +static int32_t GetUnicharWidth(char16_t ucs); 1.51 +static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); 1.52 + 1.53 +// Someday may want to make this non-const: 1.54 +static const uint32_t TagStackSize = 500; 1.55 +static const uint32_t OLStackSize = 100; 1.56 + 1.57 +nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) 1.58 +{ 1.59 + nsPlainTextSerializer* it = new nsPlainTextSerializer(); 1.60 + if (!it) { 1.61 + return NS_ERROR_OUT_OF_MEMORY; 1.62 + } 1.63 + 1.64 + return CallQueryInterface(it, aSerializer); 1.65 +} 1.66 + 1.67 +nsPlainTextSerializer::nsPlainTextSerializer() 1.68 + : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" 1.69 +{ 1.70 + 1.71 + mOutputString = nullptr; 1.72 + mHeadLevel = 0; 1.73 + mAtFirstColumn = true; 1.74 + mIndent = 0; 1.75 + mCiteQuoteLevel = 0; 1.76 + mStructs = true; // will be read from prefs later 1.77 + mHeaderStrategy = 1 /*indent increasingly*/; // ditto 1.78 + mDontWrapAnyQuotes = false; // ditto 1.79 + mHasWrittenCiteBlockquote = false; 1.80 + mSpanLevel = 0; 1.81 + for (int32_t i = 0; i <= 6; i++) { 1.82 + mHeaderCounter[i] = 0; 1.83 + } 1.84 + 1.85 + // Line breaker 1.86 + mWrapColumn = 72; // XXX magic number, we expect someone to reset this 1.87 + mCurrentLineWidth = 0; 1.88 + 1.89 + // Flow 1.90 + mEmptyLines = 1; // The start of the document is an "empty line" in itself, 1.91 + mInWhitespace = false; 1.92 + mPreFormatted = false; 1.93 + mStartedOutput = false; 1.94 + 1.95 + // initialize the tag stack to zero: 1.96 + // The stack only ever contains pointers to static atoms, so they don't 1.97 + // need refcounting. 1.98 + mTagStack = new nsIAtom*[TagStackSize]; 1.99 + mTagStackIndex = 0; 1.100 + mIgnoreAboveIndex = (uint32_t)kNotFound; 1.101 + 1.102 + // initialize the OL stack, where numbers for ordered lists are kept 1.103 + mOLStack = new int32_t[OLStackSize]; 1.104 + mOLStackIndex = 0; 1.105 + 1.106 + mULCount = 0; 1.107 + 1.108 + mIgnoredChildNodeLevel = 0; 1.109 +} 1.110 + 1.111 +nsPlainTextSerializer::~nsPlainTextSerializer() 1.112 +{ 1.113 + delete[] mTagStack; 1.114 + delete[] mOLStack; 1.115 + NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); 1.116 +} 1.117 + 1.118 +NS_IMPL_ISUPPORTS(nsPlainTextSerializer, 1.119 + nsIContentSerializer) 1.120 + 1.121 + 1.122 +NS_IMETHODIMP 1.123 +nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, 1.124 + const char* aCharSet, bool aIsCopying, 1.125 + bool aIsWholeDocument) 1.126 +{ 1.127 +#ifdef DEBUG 1.128 + // Check if the major control flags are set correctly. 1.129 + if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { 1.130 + NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, 1.131 + "If you want format=flowed, you must combine it with " 1.132 + "nsIDocumentEncoder::OutputFormatted"); 1.133 + } 1.134 + 1.135 + if (aFlags & nsIDocumentEncoder::OutputFormatted) { 1.136 + NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), 1.137 + "Can't do formatted and preformatted output at the same time!"); 1.138 + } 1.139 +#endif 1.140 + 1.141 + mFlags = aFlags; 1.142 + mWrapColumn = aWrapColumn; 1.143 + 1.144 + // Only create a linebreaker if we will handle wrapping. 1.145 + if (MayWrap()) { 1.146 + mLineBreaker = nsContentUtils::LineBreaker(); 1.147 + } 1.148 + 1.149 + // Set the line break character: 1.150 + if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) 1.151 + && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { 1.152 + // Windows 1.153 + mLineBreak.AssignLiteral("\r\n"); 1.154 + } 1.155 + else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { 1.156 + // Mac 1.157 + mLineBreak.Assign(char16_t('\r')); 1.158 + } 1.159 + else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { 1.160 + // Unix/DOM 1.161 + mLineBreak.Assign(char16_t('\n')); 1.162 + } 1.163 + else { 1.164 + // Platform/default 1.165 + mLineBreak.AssignLiteral(NS_LINEBREAK); 1.166 + } 1.167 + 1.168 + mLineBreakDue = false; 1.169 + mFloatingLines = -1; 1.170 + 1.171 + if (mFlags & nsIDocumentEncoder::OutputFormatted) { 1.172 + // Get some prefs that controls how we do formatted output 1.173 + mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); 1.174 + 1.175 + mHeaderStrategy = 1.176 + Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); 1.177 + 1.178 + // DontWrapAnyQuotes is set according to whether plaintext mail 1.179 + // is wrapping to window width -- see bug 134439. 1.180 + // We'll only want this if we're wrapping and formatted. 1.181 + if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { 1.182 + mDontWrapAnyQuotes = 1.183 + Preferences::GetBool("mail.compose.wrap_to_window_width", 1.184 + mDontWrapAnyQuotes); 1.185 + } 1.186 + } 1.187 + 1.188 + // XXX We should let the caller pass this in. 1.189 + if (Preferences::GetBool("browser.frames.enabled")) { 1.190 + mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; 1.191 + } 1.192 + else { 1.193 + mFlags |= nsIDocumentEncoder::OutputNoFramesContent; 1.194 + } 1.195 + 1.196 + return NS_OK; 1.197 +} 1.198 + 1.199 +bool 1.200 +nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) 1.201 +{ 1.202 + uint32_t size = aStack.Length(); 1.203 + if (size == 0) { 1.204 + return false; 1.205 + } 1.206 + return aStack.ElementAt(size-1); 1.207 +} 1.208 + 1.209 +void 1.210 +nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) 1.211 +{ 1.212 + uint32_t size = aStack.Length(); 1.213 + if (size > 0) { 1.214 + aStack.ElementAt(size-1) = aValue; 1.215 + } 1.216 + else { 1.217 + NS_ERROR("There is no \"Last\" value"); 1.218 + } 1.219 +} 1.220 + 1.221 +void 1.222 +nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) 1.223 +{ 1.224 + aStack.AppendElement(bool(aValue)); 1.225 +} 1.226 + 1.227 +bool 1.228 +nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) 1.229 +{ 1.230 + bool returnValue = false; 1.231 + uint32_t size = aStack.Length(); 1.232 + if (size > 0) { 1.233 + returnValue = aStack.ElementAt(size-1); 1.234 + aStack.RemoveElementAt(size-1); 1.235 + } 1.236 + return returnValue; 1.237 +} 1.238 + 1.239 +bool 1.240 +nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) 1.241 +{ 1.242 + // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, 1.243 + // non-textual container element should be serialized as placeholder 1.244 + // character and its child nodes should be ignored. See bug 895239. 1.245 + if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { 1.246 + return false; 1.247 + } 1.248 + 1.249 + return 1.250 + (aTag == nsGkAtoms::audio) || 1.251 + (aTag == nsGkAtoms::canvas) || 1.252 + (aTag == nsGkAtoms::iframe) || 1.253 + (aTag == nsGkAtoms::meter) || 1.254 + (aTag == nsGkAtoms::progress) || 1.255 + (aTag == nsGkAtoms::object) || 1.256 + (aTag == nsGkAtoms::svg) || 1.257 + (aTag == nsGkAtoms::video); 1.258 +} 1.259 + 1.260 +NS_IMETHODIMP 1.261 +nsPlainTextSerializer::AppendText(nsIContent* aText, 1.262 + int32_t aStartOffset, 1.263 + int32_t aEndOffset, 1.264 + nsAString& aStr) 1.265 +{ 1.266 + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { 1.267 + return NS_OK; 1.268 + } 1.269 + 1.270 + NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); 1.271 + if ( aStartOffset < 0 ) 1.272 + return NS_ERROR_INVALID_ARG; 1.273 + 1.274 + NS_ENSURE_ARG(aText); 1.275 + 1.276 + nsresult rv = NS_OK; 1.277 + 1.278 + nsIContent* content = aText; 1.279 + const nsTextFragment* frag; 1.280 + if (!content || !(frag = content->GetText())) { 1.281 + return NS_ERROR_FAILURE; 1.282 + } 1.283 + 1.284 + int32_t fragLength = frag->GetLength(); 1.285 + int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); 1.286 + NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); 1.287 + 1.288 + int32_t length = endoffset - aStartOffset; 1.289 + if (length <= 0) { 1.290 + return NS_OK; 1.291 + } 1.292 + 1.293 + nsAutoString textstr; 1.294 + if (frag->Is2b()) { 1.295 + textstr.Assign(frag->Get2b() + aStartOffset, length); 1.296 + } 1.297 + else { 1.298 + // AssignASCII is for 7-bit character only, so don't use it 1.299 + const char *data = frag->Get1b(); 1.300 + CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); 1.301 + } 1.302 + 1.303 + mOutputString = &aStr; 1.304 + 1.305 + // We have to split the string across newlines 1.306 + // to match parser behavior 1.307 + int32_t start = 0; 1.308 + int32_t offset = textstr.FindCharInSet("\n\r"); 1.309 + while (offset != kNotFound) { 1.310 + 1.311 + if (offset>start) { 1.312 + // Pass in the line 1.313 + DoAddText(false, 1.314 + Substring(textstr, start, offset-start)); 1.315 + } 1.316 + 1.317 + // Pass in a newline 1.318 + DoAddText(true, mLineBreak); 1.319 + 1.320 + start = offset+1; 1.321 + offset = textstr.FindCharInSet("\n\r", start); 1.322 + } 1.323 + 1.324 + // Consume the last bit of the string if there's any left 1.325 + if (start < length) { 1.326 + if (start) { 1.327 + DoAddText(false, Substring(textstr, start, length - start)); 1.328 + } 1.329 + else { 1.330 + DoAddText(false, textstr); 1.331 + } 1.332 + } 1.333 + 1.334 + mOutputString = nullptr; 1.335 + 1.336 + return rv; 1.337 +} 1.338 + 1.339 +NS_IMETHODIMP 1.340 +nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, 1.341 + int32_t aStartOffset, 1.342 + int32_t aEndOffset, 1.343 + nsAString& aStr) 1.344 +{ 1.345 + return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); 1.346 +} 1.347 + 1.348 +NS_IMETHODIMP 1.349 +nsPlainTextSerializer::AppendElementStart(Element* aElement, 1.350 + Element* aOriginalElement, 1.351 + nsAString& aStr) 1.352 +{ 1.353 + NS_ENSURE_ARG(aElement); 1.354 + 1.355 + mElement = aElement; 1.356 + 1.357 + nsresult rv; 1.358 + nsIAtom* id = GetIdForContent(mElement); 1.359 + 1.360 + bool isContainer = !nsContentUtils::IsHTMLVoid(id); 1.361 + 1.362 + mOutputString = &aStr; 1.363 + 1.364 + if (isContainer) { 1.365 + rv = DoOpenContainer(id); 1.366 + } 1.367 + else { 1.368 + rv = DoAddLeaf(id); 1.369 + } 1.370 + 1.371 + mElement = nullptr; 1.372 + mOutputString = nullptr; 1.373 + 1.374 + if (id == nsGkAtoms::head) { 1.375 + ++mHeadLevel; 1.376 + } 1.377 + 1.378 + return rv; 1.379 +} 1.380 + 1.381 +NS_IMETHODIMP 1.382 +nsPlainTextSerializer::AppendElementEnd(Element* aElement, 1.383 + nsAString& aStr) 1.384 +{ 1.385 + NS_ENSURE_ARG(aElement); 1.386 + 1.387 + mElement = aElement; 1.388 + 1.389 + nsresult rv; 1.390 + nsIAtom* id = GetIdForContent(mElement); 1.391 + 1.392 + bool isContainer = !nsContentUtils::IsHTMLVoid(id); 1.393 + 1.394 + mOutputString = &aStr; 1.395 + 1.396 + rv = NS_OK; 1.397 + if (isContainer) { 1.398 + rv = DoCloseContainer(id); 1.399 + } 1.400 + 1.401 + mElement = nullptr; 1.402 + mOutputString = nullptr; 1.403 + 1.404 + if (id == nsGkAtoms::head) { 1.405 + NS_ASSERTION(mHeadLevel != 0, 1.406 + "mHeadLevel being decremented below 0"); 1.407 + --mHeadLevel; 1.408 + } 1.409 + 1.410 + return rv; 1.411 +} 1.412 + 1.413 +NS_IMETHODIMP 1.414 +nsPlainTextSerializer::Flush(nsAString& aStr) 1.415 +{ 1.416 + mOutputString = &aStr; 1.417 + FlushLine(); 1.418 + mOutputString = nullptr; 1.419 + return NS_OK; 1.420 +} 1.421 + 1.422 +NS_IMETHODIMP 1.423 +nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, 1.424 + nsAString& aStr) 1.425 +{ 1.426 + return NS_OK; 1.427 +} 1.428 + 1.429 +nsresult 1.430 +nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) 1.431 +{ 1.432 + // Check if we need output current node as placeholder character and ignore 1.433 + // child nodes. 1.434 + if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { 1.435 + if (mIgnoredChildNodeLevel == 0) { 1.436 + // Serialize current node as placeholder character 1.437 + Write(NS_LITERAL_STRING("\xFFFC")); 1.438 + } 1.439 + // Ignore child nodes. 1.440 + mIgnoredChildNodeLevel++; 1.441 + return NS_OK; 1.442 + } 1.443 + 1.444 + if (mFlags & nsIDocumentEncoder::OutputRaw) { 1.445 + // Raw means raw. Don't even think about doing anything fancy 1.446 + // here like indenting, adding line breaks or any other 1.447 + // characters such as list item bullets, quote characters 1.448 + // around <q>, etc. I mean it! Don't make me smack you! 1.449 + 1.450 + return NS_OK; 1.451 + } 1.452 + 1.453 + if (mTagStackIndex < TagStackSize) { 1.454 + mTagStack[mTagStackIndex++] = aTag; 1.455 + } 1.456 + 1.457 + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { 1.458 + return NS_OK; 1.459 + } 1.460 + 1.461 + // Reset this so that <blockquote type=cite> doesn't affect the whitespace 1.462 + // above random <pre>s below it. 1.463 + mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && 1.464 + aTag == nsGkAtoms::pre; 1.465 + 1.466 + bool isInCiteBlockquote = false; 1.467 + 1.468 + // XXX special-case <blockquote type=cite> so that we don't add additional 1.469 + // newlines before the text. 1.470 + if (aTag == nsGkAtoms::blockquote) { 1.471 + nsAutoString value; 1.472 + nsresult rv = GetAttributeValue(nsGkAtoms::type, value); 1.473 + isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); 1.474 + } 1.475 + 1.476 + if (mLineBreakDue && !isInCiteBlockquote) 1.477 + EnsureVerticalSpace(mFloatingLines); 1.478 + 1.479 + // Check if this tag's content that should not be output 1.480 + if ((aTag == nsGkAtoms::noscript && 1.481 + !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || 1.482 + ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && 1.483 + !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { 1.484 + // Ignore everything that follows the current tag in 1.485 + // question until a matching end tag is encountered. 1.486 + mIgnoreAboveIndex = mTagStackIndex - 1; 1.487 + return NS_OK; 1.488 + } 1.489 + 1.490 + if (aTag == nsGkAtoms::body) { 1.491 + // Try to figure out here whether we have a 1.492 + // preformatted style attribute. 1.493 + // 1.494 + // Trigger on the presence of a "pre-wrap" in the 1.495 + // style attribute. That's a very simplistic way to do 1.496 + // it, but better than nothing. 1.497 + // Also set mWrapColumn to the value given there 1.498 + // (which arguably we should only do if told to do so). 1.499 + nsAutoString style; 1.500 + int32_t whitespace; 1.501 + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && 1.502 + (kNotFound != (whitespace = style.Find("white-space:")))) { 1.503 + 1.504 + if (kNotFound != style.Find("pre-wrap", true, whitespace)) { 1.505 +#ifdef DEBUG_preformatted 1.506 + printf("Set mPreFormatted based on style pre-wrap\n"); 1.507 +#endif 1.508 + mPreFormatted = true; 1.509 + int32_t widthOffset = style.Find("width:"); 1.510 + if (widthOffset >= 0) { 1.511 + // We have to search for the ch before the semicolon, 1.512 + // not for the semicolon itself, because nsString::ToInteger() 1.513 + // considers 'c' to be a valid numeric char (even if radix=10) 1.514 + // but then gets confused if it sees it next to the number 1.515 + // when the radix specified was 10, and returns an error code. 1.516 + int32_t semiOffset = style.Find("ch", false, widthOffset+6); 1.517 + int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 1.518 + : style.Length() - widthOffset); 1.519 + nsAutoString widthstr; 1.520 + style.Mid(widthstr, widthOffset+6, length); 1.521 + nsresult err; 1.522 + int32_t col = widthstr.ToInteger(&err); 1.523 + 1.524 + if (NS_SUCCEEDED(err)) { 1.525 + mWrapColumn = (uint32_t)col; 1.526 +#ifdef DEBUG_preformatted 1.527 + printf("Set wrap column to %d based on style\n", mWrapColumn); 1.528 +#endif 1.529 + } 1.530 + } 1.531 + } 1.532 + else if (kNotFound != style.Find("pre", true, whitespace)) { 1.533 +#ifdef DEBUG_preformatted 1.534 + printf("Set mPreFormatted based on style pre\n"); 1.535 +#endif 1.536 + mPreFormatted = true; 1.537 + mWrapColumn = 0; 1.538 + } 1.539 + } 1.540 + else { 1.541 + /* See comment at end of function. */ 1.542 + mInWhitespace = true; 1.543 + mPreFormatted = false; 1.544 + } 1.545 + 1.546 + return NS_OK; 1.547 + } 1.548 + 1.549 + // Keep this in sync with DoCloseContainer! 1.550 + if (!DoOutput()) { 1.551 + return NS_OK; 1.552 + } 1.553 + 1.554 + if (aTag == nsGkAtoms::p) 1.555 + EnsureVerticalSpace(1); 1.556 + else if (aTag == nsGkAtoms::pre) { 1.557 + if (GetLastBool(mIsInCiteBlockquote)) 1.558 + EnsureVerticalSpace(0); 1.559 + else if (mHasWrittenCiteBlockquote) { 1.560 + EnsureVerticalSpace(0); 1.561 + mHasWrittenCiteBlockquote = false; 1.562 + } 1.563 + else 1.564 + EnsureVerticalSpace(1); 1.565 + } 1.566 + else if (aTag == nsGkAtoms::tr) { 1.567 + PushBool(mHasWrittenCellsForRow, false); 1.568 + } 1.569 + else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { 1.570 + // We must make sure that the content of two table cells get a 1.571 + // space between them. 1.572 + 1.573 + // To make the separation between cells most obvious and 1.574 + // importable, we use a TAB. 1.575 + if (GetLastBool(mHasWrittenCellsForRow)) { 1.576 + // Bypass |Write| so that the TAB isn't compressed away. 1.577 + AddToLine(MOZ_UTF16("\t"), 1); 1.578 + mInWhitespace = true; 1.579 + } 1.580 + else if (mHasWrittenCellsForRow.IsEmpty()) { 1.581 + // We don't always see a <tr> (nor a <table>) before the <td> if we're 1.582 + // copying part of a table 1.583 + PushBool(mHasWrittenCellsForRow, true); // will never be popped 1.584 + } 1.585 + else { 1.586 + SetLastBool(mHasWrittenCellsForRow, true); 1.587 + } 1.588 + } 1.589 + else if (aTag == nsGkAtoms::ul) { 1.590 + // Indent here to support nested lists, which aren't included in li :-( 1.591 + EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); 1.592 + // Must end the current line before we change indention 1.593 + mIndent += kIndentSizeList; 1.594 + mULCount++; 1.595 + } 1.596 + else if (aTag == nsGkAtoms::ol) { 1.597 + EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); 1.598 + if (mFlags & nsIDocumentEncoder::OutputFormatted) { 1.599 + // Must end the current line before we change indention 1.600 + if (mOLStackIndex < OLStackSize) { 1.601 + nsAutoString startAttr; 1.602 + int32_t startVal = 1; 1.603 + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { 1.604 + nsresult rv = NS_OK; 1.605 + startVal = startAttr.ToInteger(&rv); 1.606 + if (NS_FAILED(rv)) 1.607 + startVal = 1; 1.608 + } 1.609 + mOLStack[mOLStackIndex++] = startVal; 1.610 + } 1.611 + } else { 1.612 + mOLStackIndex++; 1.613 + } 1.614 + mIndent += kIndentSizeList; // see ul 1.615 + } 1.616 + else if (aTag == nsGkAtoms::li && 1.617 + (mFlags & nsIDocumentEncoder::OutputFormatted)) { 1.618 + if (mTagStackIndex > 1 && IsInOL()) { 1.619 + if (mOLStackIndex > 0) { 1.620 + nsAutoString valueAttr; 1.621 + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { 1.622 + nsresult rv = NS_OK; 1.623 + int32_t valueAttrVal = valueAttr.ToInteger(&rv); 1.624 + if (NS_SUCCEEDED(rv)) 1.625 + mOLStack[mOLStackIndex-1] = valueAttrVal; 1.626 + } 1.627 + // This is what nsBulletFrame does for OLs: 1.628 + mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); 1.629 + } 1.630 + else { 1.631 + mInIndentString.Append(char16_t('#')); 1.632 + } 1.633 + 1.634 + mInIndentString.Append(char16_t('.')); 1.635 + 1.636 + } 1.637 + else { 1.638 + static char bulletCharArray[] = "*o+#"; 1.639 + uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; 1.640 + char bulletChar = bulletCharArray[index % 4]; 1.641 + mInIndentString.Append(char16_t(bulletChar)); 1.642 + } 1.643 + 1.644 + mInIndentString.Append(char16_t(' ')); 1.645 + } 1.646 + else if (aTag == nsGkAtoms::dl) { 1.647 + EnsureVerticalSpace(1); 1.648 + } 1.649 + else if (aTag == nsGkAtoms::dt) { 1.650 + EnsureVerticalSpace(0); 1.651 + } 1.652 + else if (aTag == nsGkAtoms::dd) { 1.653 + EnsureVerticalSpace(0); 1.654 + mIndent += kIndentSizeDD; 1.655 + } 1.656 + else if (aTag == nsGkAtoms::span) { 1.657 + ++mSpanLevel; 1.658 + } 1.659 + else if (aTag == nsGkAtoms::blockquote) { 1.660 + // Push 1.661 + PushBool(mIsInCiteBlockquote, isInCiteBlockquote); 1.662 + if (isInCiteBlockquote) { 1.663 + EnsureVerticalSpace(0); 1.664 + mCiteQuoteLevel++; 1.665 + } 1.666 + else { 1.667 + EnsureVerticalSpace(1); 1.668 + mIndent += kTabSize; // Check for some maximum value? 1.669 + } 1.670 + } 1.671 + else if (aTag == nsGkAtoms::q) { 1.672 + Write(NS_LITERAL_STRING("\"")); 1.673 + } 1.674 + 1.675 + // Else make sure we'll separate block level tags, 1.676 + // even if we're about to leave, before doing any other formatting. 1.677 + else if (nsContentUtils::IsHTMLBlock(aTag)) { 1.678 + EnsureVerticalSpace(0); 1.679 + } 1.680 + 1.681 + ////////////////////////////////////////////////////////////// 1.682 + if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { 1.683 + return NS_OK; 1.684 + } 1.685 + ////////////////////////////////////////////////////////////// 1.686 + // The rest of this routine is formatted output stuff, 1.687 + // which we should skip if we're not formatted: 1.688 + ////////////////////////////////////////////////////////////// 1.689 + 1.690 + // Push on stack 1.691 + bool currentNodeIsConverted = IsCurrentNodeConverted(); 1.692 + 1.693 + if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || 1.694 + aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || 1.695 + aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) 1.696 + { 1.697 + EnsureVerticalSpace(2); 1.698 + if (mHeaderStrategy == 2) { // numbered 1.699 + mIndent += kIndentSizeHeaders; 1.700 + // Caching 1.701 + int32_t level = HeaderLevel(aTag); 1.702 + // Increase counter for current level 1.703 + mHeaderCounter[level]++; 1.704 + // Reset all lower levels 1.705 + int32_t i; 1.706 + 1.707 + for (i = level + 1; i <= 6; i++) { 1.708 + mHeaderCounter[i] = 0; 1.709 + } 1.710 + 1.711 + // Construct numbers 1.712 + nsAutoString leadup; 1.713 + for (i = 1; i <= level; i++) { 1.714 + leadup.AppendInt(mHeaderCounter[i]); 1.715 + leadup.Append(char16_t('.')); 1.716 + } 1.717 + leadup.Append(char16_t(' ')); 1.718 + Write(leadup); 1.719 + } 1.720 + else if (mHeaderStrategy == 1) { // indent increasingly 1.721 + mIndent += kIndentSizeHeaders; 1.722 + for (int32_t i = HeaderLevel(aTag); i > 1; i--) { 1.723 + // for h(x), run x-1 times 1.724 + mIndent += kIndentIncrementHeaders; 1.725 + } 1.726 + } 1.727 + } 1.728 + else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { 1.729 + nsAutoString url; 1.730 + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) 1.731 + && !url.IsEmpty()) { 1.732 + mURL = url; 1.733 + } 1.734 + } 1.735 + else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { 1.736 + Write(NS_LITERAL_STRING("^")); 1.737 + } 1.738 + else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { 1.739 + Write(NS_LITERAL_STRING("_")); 1.740 + } 1.741 + else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { 1.742 + Write(NS_LITERAL_STRING("|")); 1.743 + } 1.744 + else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) 1.745 + && mStructs && !currentNodeIsConverted) { 1.746 + Write(NS_LITERAL_STRING("*")); 1.747 + } 1.748 + else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) 1.749 + && mStructs && !currentNodeIsConverted) { 1.750 + Write(NS_LITERAL_STRING("/")); 1.751 + } 1.752 + else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { 1.753 + Write(NS_LITERAL_STRING("_")); 1.754 + } 1.755 + 1.756 + /* Container elements are always block elements, so we shouldn't 1.757 + output any whitespace immediately after the container tag even if 1.758 + there's extra whitespace there because the HTML is pretty-printed 1.759 + or something. To ensure that happens, tell the serializer we're 1.760 + already in whitespace so it won't output more. */ 1.761 + mInWhitespace = true; 1.762 + 1.763 + return NS_OK; 1.764 +} 1.765 + 1.766 +nsresult 1.767 +nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) 1.768 +{ 1.769 + if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { 1.770 + mIgnoredChildNodeLevel--; 1.771 + return NS_OK; 1.772 + } 1.773 + 1.774 + if (mFlags & nsIDocumentEncoder::OutputRaw) { 1.775 + // Raw means raw. Don't even think about doing anything fancy 1.776 + // here like indenting, adding line breaks or any other 1.777 + // characters such as list item bullets, quote characters 1.778 + // around <q>, etc. I mean it! Don't make me smack you! 1.779 + 1.780 + return NS_OK; 1.781 + } 1.782 + 1.783 + if (mTagStackIndex > 0) { 1.784 + --mTagStackIndex; 1.785 + } 1.786 + 1.787 + if (mTagStackIndex >= mIgnoreAboveIndex) { 1.788 + if (mTagStackIndex == mIgnoreAboveIndex) { 1.789 + // We're dealing with the close tag whose matching 1.790 + // open tag had set the mIgnoreAboveIndex value. 1.791 + // Reset mIgnoreAboveIndex before discarding this tag. 1.792 + mIgnoreAboveIndex = (uint32_t)kNotFound; 1.793 + } 1.794 + return NS_OK; 1.795 + } 1.796 + 1.797 + // End current line if we're ending a block level tag 1.798 + if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { 1.799 + // We want the output to end with a new line, 1.800 + // but in preformatted areas like text fields, 1.801 + // we can't emit newlines that weren't there. 1.802 + // So add the newline only in the case of formatted output. 1.803 + if (mFlags & nsIDocumentEncoder::OutputFormatted) { 1.804 + EnsureVerticalSpace(0); 1.805 + } 1.806 + else { 1.807 + FlushLine(); 1.808 + } 1.809 + // We won't want to do anything with these in formatted mode either, 1.810 + // so just return now: 1.811 + return NS_OK; 1.812 + } 1.813 + 1.814 + // Keep this in sync with DoOpenContainer! 1.815 + if (!DoOutput()) { 1.816 + return NS_OK; 1.817 + } 1.818 + 1.819 + if (aTag == nsGkAtoms::tr) { 1.820 + PopBool(mHasWrittenCellsForRow); 1.821 + // Should always end a line, but get no more whitespace 1.822 + if (mFloatingLines < 0) 1.823 + mFloatingLines = 0; 1.824 + mLineBreakDue = true; 1.825 + } 1.826 + else if (((aTag == nsGkAtoms::li) || 1.827 + (aTag == nsGkAtoms::dt)) && 1.828 + (mFlags & nsIDocumentEncoder::OutputFormatted)) { 1.829 + // Items that should always end a line, but get no more whitespace 1.830 + if (mFloatingLines < 0) 1.831 + mFloatingLines = 0; 1.832 + mLineBreakDue = true; 1.833 + } 1.834 + else if (aTag == nsGkAtoms::pre) { 1.835 + mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; 1.836 + mLineBreakDue = true; 1.837 + } 1.838 + else if (aTag == nsGkAtoms::ul) { 1.839 + FlushLine(); 1.840 + mIndent -= kIndentSizeList; 1.841 + if (--mULCount + mOLStackIndex == 0) { 1.842 + mFloatingLines = 1; 1.843 + mLineBreakDue = true; 1.844 + } 1.845 + } 1.846 + else if (aTag == nsGkAtoms::ol) { 1.847 + FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. 1.848 + mIndent -= kIndentSizeList; 1.849 + NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); 1.850 + mOLStackIndex--; 1.851 + if (mULCount + mOLStackIndex == 0) { 1.852 + mFloatingLines = 1; 1.853 + mLineBreakDue = true; 1.854 + } 1.855 + } 1.856 + else if (aTag == nsGkAtoms::dl) { 1.857 + mFloatingLines = 1; 1.858 + mLineBreakDue = true; 1.859 + } 1.860 + else if (aTag == nsGkAtoms::dd) { 1.861 + FlushLine(); 1.862 + mIndent -= kIndentSizeDD; 1.863 + } 1.864 + else if (aTag == nsGkAtoms::span) { 1.865 + NS_ASSERTION(mSpanLevel, "Span level will be negative!"); 1.866 + --mSpanLevel; 1.867 + } 1.868 + else if (aTag == nsGkAtoms::div) { 1.869 + if (mFloatingLines < 0) 1.870 + mFloatingLines = 0; 1.871 + mLineBreakDue = true; 1.872 + } 1.873 + else if (aTag == nsGkAtoms::blockquote) { 1.874 + FlushLine(); // Is this needed? 1.875 + 1.876 + // Pop 1.877 + bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); 1.878 + 1.879 + if (isInCiteBlockquote) { 1.880 + NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); 1.881 + mCiteQuoteLevel--; 1.882 + mFloatingLines = 0; 1.883 + mHasWrittenCiteBlockquote = true; 1.884 + } 1.885 + else { 1.886 + mIndent -= kTabSize; 1.887 + mFloatingLines = 1; 1.888 + } 1.889 + mLineBreakDue = true; 1.890 + } 1.891 + else if (aTag == nsGkAtoms::q) { 1.892 + Write(NS_LITERAL_STRING("\"")); 1.893 + } 1.894 + else if (nsContentUtils::IsHTMLBlock(aTag) 1.895 + && aTag != nsGkAtoms::script) { 1.896 + // All other blocks get 1 vertical space after them 1.897 + // in formatted mode, otherwise 0. 1.898 + // This is hard. Sometimes 0 is a better number, but 1.899 + // how to know? 1.900 + if (mFlags & nsIDocumentEncoder::OutputFormatted) 1.901 + EnsureVerticalSpace(1); 1.902 + else { 1.903 + if (mFloatingLines < 0) 1.904 + mFloatingLines = 0; 1.905 + mLineBreakDue = true; 1.906 + } 1.907 + } 1.908 + 1.909 + ////////////////////////////////////////////////////////////// 1.910 + if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { 1.911 + return NS_OK; 1.912 + } 1.913 + ////////////////////////////////////////////////////////////// 1.914 + // The rest of this routine is formatted output stuff, 1.915 + // which we should skip if we're not formatted: 1.916 + ////////////////////////////////////////////////////////////// 1.917 + 1.918 + // Pop the currentConverted stack 1.919 + bool currentNodeIsConverted = IsCurrentNodeConverted(); 1.920 + 1.921 + if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || 1.922 + aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || 1.923 + aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { 1.924 + 1.925 + if (mHeaderStrategy) { /*numbered or indent increasingly*/ 1.926 + mIndent -= kIndentSizeHeaders; 1.927 + } 1.928 + if (mHeaderStrategy == 1 /*indent increasingly*/ ) { 1.929 + for (int32_t i = HeaderLevel(aTag); i > 1; i--) { 1.930 + // for h(x), run x-1 times 1.931 + mIndent -= kIndentIncrementHeaders; 1.932 + } 1.933 + } 1.934 + EnsureVerticalSpace(1); 1.935 + } 1.936 + else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { 1.937 + nsAutoString temp; 1.938 + temp.AssignLiteral(" <"); 1.939 + temp += mURL; 1.940 + temp.Append(char16_t('>')); 1.941 + Write(temp); 1.942 + mURL.Truncate(); 1.943 + } 1.944 + else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) 1.945 + && mStructs && !currentNodeIsConverted) { 1.946 + Write(kSpace); 1.947 + } 1.948 + else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { 1.949 + Write(NS_LITERAL_STRING("|")); 1.950 + } 1.951 + else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) 1.952 + && mStructs && !currentNodeIsConverted) { 1.953 + Write(NS_LITERAL_STRING("*")); 1.954 + } 1.955 + else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) 1.956 + && mStructs && !currentNodeIsConverted) { 1.957 + Write(NS_LITERAL_STRING("/")); 1.958 + } 1.959 + else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { 1.960 + Write(NS_LITERAL_STRING("_")); 1.961 + } 1.962 + 1.963 + return NS_OK; 1.964 +} 1.965 + 1.966 +bool 1.967 +nsPlainTextSerializer::MustSuppressLeaf() 1.968 +{ 1.969 + if (mIgnoredChildNodeLevel > 0) { 1.970 + return true; 1.971 + } 1.972 + 1.973 + if ((mTagStackIndex > 1 && 1.974 + mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || 1.975 + (mTagStackIndex > 0 && 1.976 + mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { 1.977 + // Don't output the contents of SELECT elements; 1.978 + // Might be nice, eventually, to output just the selected element. 1.979 + // Read more in bug 31994. 1.980 + return true; 1.981 + } 1.982 + 1.983 + if (mTagStackIndex > 0 && 1.984 + (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || 1.985 + mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { 1.986 + // Don't output the contents of <script> or <style> tags; 1.987 + return true; 1.988 + } 1.989 + 1.990 + return false; 1.991 +} 1.992 + 1.993 +void 1.994 +nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText) 1.995 +{ 1.996 + // If we don't want any output, just return 1.997 + if (!DoOutput()) { 1.998 + return; 1.999 + } 1.1000 + 1.1001 + if (!aIsLineBreak) { 1.1002 + // Make sure to reset this, since it's no longer true. 1.1003 + mHasWrittenCiteBlockquote = false; 1.1004 + } 1.1005 + 1.1006 + if (mLineBreakDue) 1.1007 + EnsureVerticalSpace(mFloatingLines); 1.1008 + 1.1009 + if (MustSuppressLeaf()) { 1.1010 + return; 1.1011 + } 1.1012 + 1.1013 + if (aIsLineBreak) { 1.1014 + // The only times we want to pass along whitespace from the original 1.1015 + // html source are if we're forced into preformatted mode via flags, 1.1016 + // or if we're prettyprinting and we're inside a <pre>. 1.1017 + // Otherwise, either we're collapsing to minimal text, or we're 1.1018 + // prettyprinting to mimic the html format, and in neither case 1.1019 + // does the formatting of the html source help us. 1.1020 + if ((mFlags & nsIDocumentEncoder::OutputPreformatted) || 1.1021 + (mPreFormatted && !mWrapColumn) || 1.1022 + IsInPre()) { 1.1023 + EnsureVerticalSpace(mEmptyLines+1); 1.1024 + } 1.1025 + else if (!mInWhitespace) { 1.1026 + Write(kSpace); 1.1027 + mInWhitespace = true; 1.1028 + } 1.1029 + return; 1.1030 + } 1.1031 + 1.1032 + /* Check, if we are in a link (symbolized with mURL containing the URL) 1.1033 + and the text is equal to the URL. In that case we don't want to output 1.1034 + the URL twice so we scrap the text in mURL. */ 1.1035 + if (!mURL.IsEmpty() && mURL.Equals(aText)) { 1.1036 + mURL.Truncate(); 1.1037 + } 1.1038 + Write(aText); 1.1039 +} 1.1040 + 1.1041 +nsresult 1.1042 +nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag) 1.1043 +{ 1.1044 + // If we don't want any output, just return 1.1045 + if (!DoOutput()) { 1.1046 + return NS_OK; 1.1047 + } 1.1048 + 1.1049 + if (mLineBreakDue) 1.1050 + EnsureVerticalSpace(mFloatingLines); 1.1051 + 1.1052 + if (MustSuppressLeaf()) { 1.1053 + return NS_OK; 1.1054 + } 1.1055 + 1.1056 + if (aTag == nsGkAtoms::br) { 1.1057 + // Another egregious editor workaround, see bug 38194: 1.1058 + // ignore the bogus br tags that the editor sticks here and there. 1.1059 + nsAutoString tagAttr; 1.1060 + if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) 1.1061 + || !tagAttr.EqualsLiteral("_moz")) { 1.1062 + EnsureVerticalSpace(mEmptyLines+1); 1.1063 + } 1.1064 + } 1.1065 + else if (aTag == nsGkAtoms::hr && 1.1066 + (mFlags & nsIDocumentEncoder::OutputFormatted)) { 1.1067 + EnsureVerticalSpace(0); 1.1068 + 1.1069 + // Make a line of dashes as wide as the wrap width 1.1070 + // XXX honoring percentage would be nice 1.1071 + nsAutoString line; 1.1072 + uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25); 1.1073 + while (line.Length() < width) { 1.1074 + line.Append(char16_t('-')); 1.1075 + } 1.1076 + Write(line); 1.1077 + 1.1078 + EnsureVerticalSpace(0); 1.1079 + } 1.1080 + else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) { 1.1081 + Write(NS_LITERAL_STRING("\xFFFC")); 1.1082 + } 1.1083 + else if (aTag == nsGkAtoms::img) { 1.1084 + /* Output (in decreasing order of preference) 1.1085 + alt, title or nothing */ 1.1086 + // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG> 1.1087 + nsAutoString imageDescription; 1.1088 + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, 1.1089 + imageDescription))) { 1.1090 + // If the alt attribute has an empty value (|alt=""|), output nothing 1.1091 + } 1.1092 + else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title, 1.1093 + imageDescription)) 1.1094 + && !imageDescription.IsEmpty()) { 1.1095 + imageDescription = NS_LITERAL_STRING(" [") + 1.1096 + imageDescription + 1.1097 + NS_LITERAL_STRING("] "); 1.1098 + } 1.1099 + 1.1100 + Write(imageDescription); 1.1101 + } 1.1102 + 1.1103 + return NS_OK; 1.1104 +} 1.1105 + 1.1106 +/** 1.1107 + * Adds as many newline as necessary to get |noOfRows| empty lines 1.1108 + * 1.1109 + * noOfRows = -1 : Being in the middle of some line of text 1.1110 + * noOfRows = 0 : Being at the start of a line 1.1111 + * noOfRows = n>0 : Having n empty lines before the current line. 1.1112 + */ 1.1113 +void 1.1114 +nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) 1.1115 +{ 1.1116 + // If we have something in the indent we probably want to output 1.1117 + // it and it's not included in the count for empty lines so we don't 1.1118 + // realize that we should start a new line. 1.1119 + if (noOfRows >= 0 && !mInIndentString.IsEmpty()) { 1.1120 + EndLine(false); 1.1121 + mInWhitespace = true; 1.1122 + } 1.1123 + 1.1124 + while(mEmptyLines < noOfRows) { 1.1125 + EndLine(false); 1.1126 + mInWhitespace = true; 1.1127 + } 1.1128 + mLineBreakDue = false; 1.1129 + mFloatingLines = -1; 1.1130 +} 1.1131 + 1.1132 +/** 1.1133 + * This empties the current line cache without adding a NEWLINE. 1.1134 + * Should not be used if line wrapping is of importance since 1.1135 + * this function destroys the cache information. 1.1136 + * 1.1137 + * It will also write indentation and quotes if we believe us to be 1.1138 + * at the start of the line. 1.1139 + */ 1.1140 +void 1.1141 +nsPlainTextSerializer::FlushLine() 1.1142 +{ 1.1143 + if (!mCurrentLine.IsEmpty()) { 1.1144 + if (mAtFirstColumn) { 1.1145 + OutputQuotesAndIndent(); // XXX: Should we always do this? Bug? 1.1146 + } 1.1147 + 1.1148 + Output(mCurrentLine); 1.1149 + mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty(); 1.1150 + mCurrentLine.Truncate(); 1.1151 + mCurrentLineWidth = 0; 1.1152 + } 1.1153 +} 1.1154 + 1.1155 +/** 1.1156 + * Prints the text to output to our current output device (the string mOutputString). 1.1157 + * The only logic here is to replace non breaking spaces with a normal space since 1.1158 + * most (all?) receivers of the result won't understand the nbsp and even be 1.1159 + * confused by it. 1.1160 + */ 1.1161 +void 1.1162 +nsPlainTextSerializer::Output(nsString& aString) 1.1163 +{ 1.1164 + if (!aString.IsEmpty()) { 1.1165 + mStartedOutput = true; 1.1166 + } 1.1167 + 1.1168 + if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) { 1.1169 + // First, replace all nbsp characters with spaces, 1.1170 + // which the unicode encoder won't do for us. 1.1171 + aString.ReplaceChar(kNBSP, kSPACE); 1.1172 + } 1.1173 + mOutputString->Append(aString); 1.1174 +} 1.1175 + 1.1176 +static bool 1.1177 +IsSpaceStuffable(const char16_t *s) 1.1178 +{ 1.1179 + if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP || 1.1180 + nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0) 1.1181 + return true; 1.1182 + else 1.1183 + return false; 1.1184 +} 1.1185 + 1.1186 +/** 1.1187 + * This function adds a piece of text to the current stored line. If we are 1.1188 + * wrapping text and the stored line will become too long, a suitable 1.1189 + * location to wrap will be found and the line that's complete will be 1.1190 + * output. 1.1191 + */ 1.1192 +void 1.1193 +nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, 1.1194 + int32_t aLineFragmentLength) 1.1195 +{ 1.1196 + uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent; 1.1197 + 1.1198 + if (mLineBreakDue) 1.1199 + EnsureVerticalSpace(mFloatingLines); 1.1200 + 1.1201 + int32_t linelength = mCurrentLine.Length(); 1.1202 + if (0 == linelength) { 1.1203 + if (0 == aLineFragmentLength) { 1.1204 + // Nothing at all. Are you kidding me? 1.1205 + return; 1.1206 + } 1.1207 + 1.1208 + if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { 1.1209 + if (IsSpaceStuffable(aLineFragment) 1.1210 + && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway 1.1211 + ) 1.1212 + { 1.1213 + // Space stuffing a la RFC 2646 (format=flowed). 1.1214 + mCurrentLine.Append(char16_t(' ')); 1.1215 + 1.1216 + if (MayWrap()) { 1.1217 + mCurrentLineWidth += GetUnicharWidth(' '); 1.1218 +#ifdef DEBUG_wrapping 1.1219 + NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(), 1.1220 + mCurrentLine.Length()) == 1.1221 + (int32_t)mCurrentLineWidth, 1.1222 + "mCurrentLineWidth and reality out of sync!"); 1.1223 +#endif 1.1224 + } 1.1225 + } 1.1226 + } 1.1227 + mEmptyLines=-1; 1.1228 + } 1.1229 + 1.1230 + mCurrentLine.Append(aLineFragment, aLineFragmentLength); 1.1231 + if (MayWrap()) { 1.1232 + mCurrentLineWidth += GetUnicharStringWidth(aLineFragment, 1.1233 + aLineFragmentLength); 1.1234 +#ifdef DEBUG_wrapping 1.1235 + NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), 1.1236 + mCurrentLine.Length()) == 1.1237 + (int32_t)mCurrentLineWidth, 1.1238 + "mCurrentLineWidth and reality out of sync!"); 1.1239 +#endif 1.1240 + } 1.1241 + 1.1242 + linelength = mCurrentLine.Length(); 1.1243 + 1.1244 + // Wrap? 1.1245 + if (MayWrap()) 1.1246 + { 1.1247 +#ifdef DEBUG_wrapping 1.1248 + NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), 1.1249 + mCurrentLine.Length()) == 1.1250 + (int32_t)mCurrentLineWidth, 1.1251 + "mCurrentLineWidth and reality out of sync!"); 1.1252 +#endif 1.1253 + // Yes, wrap! 1.1254 + // The "+4" is to avoid wrap lines that only would be a couple 1.1255 + // of letters too long. We give this bonus only if the 1.1256 + // wrapcolumn is more than 20. 1.1257 + uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0; 1.1258 + 1.1259 + // XXX: Should calculate prefixwidth with GetUnicharStringWidth 1.1260 + while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) { 1.1261 + // We go from the end removing one letter at a time until 1.1262 + // we have a reasonable width 1.1263 + int32_t goodSpace = mCurrentLine.Length(); 1.1264 + uint32_t width = mCurrentLineWidth; 1.1265 + while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) { 1.1266 + goodSpace--; 1.1267 + width -= GetUnicharWidth(mCurrentLine[goodSpace]); 1.1268 + } 1.1269 + 1.1270 + goodSpace++; 1.1271 + 1.1272 + if (mLineBreaker) { 1.1273 + goodSpace = mLineBreaker->Prev(mCurrentLine.get(), 1.1274 + mCurrentLine.Length(), goodSpace); 1.1275 + if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT && 1.1276 + nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) { 1.1277 + --goodSpace; // adjust the position since line breaker returns a position next to space 1.1278 + } 1.1279 + } 1.1280 + // fallback if the line breaker is unavailable or failed 1.1281 + if (!mLineBreaker) { 1.1282 + goodSpace = mWrapColumn-prefixwidth; 1.1283 + while (goodSpace >= 0 && 1.1284 + !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { 1.1285 + goodSpace--; 1.1286 + } 1.1287 + } 1.1288 + 1.1289 + nsAutoString restOfLine; 1.1290 + if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) { 1.1291 + // If we don't found a good place to break, accept long line and 1.1292 + // try to find another place to break 1.1293 + goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1; 1.1294 + if (mLineBreaker) { 1.1295 + if ((uint32_t)goodSpace < mCurrentLine.Length()) 1.1296 + goodSpace = mLineBreaker->Next(mCurrentLine.get(), 1.1297 + mCurrentLine.Length(), goodSpace); 1.1298 + if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) 1.1299 + goodSpace = mCurrentLine.Length(); 1.1300 + } 1.1301 + // fallback if the line breaker is unavailable or failed 1.1302 + if (!mLineBreaker) { 1.1303 + goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth; 1.1304 + while (goodSpace < linelength && 1.1305 + !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { 1.1306 + goodSpace++; 1.1307 + } 1.1308 + } 1.1309 + } 1.1310 + 1.1311 + if ((goodSpace < linelength) && (goodSpace > 0)) { 1.1312 + // Found a place to break 1.1313 + 1.1314 + // -1 (trim a char at the break position) 1.1315 + // only if the line break was a space. 1.1316 + if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { 1.1317 + mCurrentLine.Right(restOfLine, linelength-goodSpace-1); 1.1318 + } 1.1319 + else { 1.1320 + mCurrentLine.Right(restOfLine, linelength-goodSpace); 1.1321 + } 1.1322 + // if breaker was U+0020, it has to consider for delsp=yes support 1.1323 + bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' '; 1.1324 + mCurrentLine.Truncate(goodSpace); 1.1325 + EndLine(true, breakBySpace); 1.1326 + mCurrentLine.Truncate(); 1.1327 + // Space stuff new line? 1.1328 + if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { 1.1329 + if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) 1.1330 + && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway 1.1331 + ) 1.1332 + { 1.1333 + // Space stuffing a la RFC 2646 (format=flowed). 1.1334 + mCurrentLine.Append(char16_t(' ')); 1.1335 + //XXX doesn't seem to work correctly for ' ' 1.1336 + } 1.1337 + } 1.1338 + mCurrentLine.Append(restOfLine); 1.1339 + mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(), 1.1340 + mCurrentLine.Length()); 1.1341 + linelength = mCurrentLine.Length(); 1.1342 + mEmptyLines = -1; 1.1343 + } 1.1344 + else { 1.1345 + // Nothing to do. Hopefully we get more data later 1.1346 + // to use for a place to break line 1.1347 + break; 1.1348 + } 1.1349 + } 1.1350 + } 1.1351 + else { 1.1352 + // No wrapping. 1.1353 + } 1.1354 +} 1.1355 + 1.1356 +/** 1.1357 + * Outputs the contents of mCurrentLine, and resets line specific 1.1358 + * variables. Also adds an indentation and prefix if there is 1.1359 + * one specified. Strips ending spaces from the line if it isn't 1.1360 + * preformatted. 1.1361 + */ 1.1362 +void 1.1363 +nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) 1.1364 +{ 1.1365 + uint32_t currentlinelength = mCurrentLine.Length(); 1.1366 + 1.1367 + if (aSoftlinebreak && 0 == currentlinelength) { 1.1368 + // No meaning 1.1369 + return; 1.1370 + } 1.1371 + 1.1372 + /* In non-preformatted mode, remove spaces from the end of the line for 1.1373 + * format=flowed compatibility. Don't do this for these special cases: 1.1374 + * "-- ", the signature separator (RFC 2646) shouldn't be touched and 1.1375 + * "- -- ", the OpenPGP dash-escaped signature separator in inline 1.1376 + * signed messages according to the OpenPGP standard (RFC 2440). 1.1377 + */ 1.1378 + if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) && 1.1379 + !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) && 1.1380 + (aSoftlinebreak || 1.1381 + !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) { 1.1382 + // Remove spaces from the end of the line. 1.1383 + while(currentlinelength > 0 && 1.1384 + mCurrentLine[currentlinelength-1] == ' ') { 1.1385 + --currentlinelength; 1.1386 + } 1.1387 + mCurrentLine.SetLength(currentlinelength); 1.1388 + } 1.1389 + 1.1390 + if (aSoftlinebreak && 1.1391 + (mFlags & nsIDocumentEncoder::OutputFormatFlowed) && 1.1392 + (mIndent == 0)) { 1.1393 + // Add the soft part of the soft linebreak (RFC 2646 4.1) 1.1394 + // We only do this when there is no indentation since format=flowed 1.1395 + // lines and indentation doesn't work well together. 1.1396 + 1.1397 + // If breaker character is ASCII space with RFC 3676 support (delsp=yes), 1.1398 + // add twice space. 1.1399 + if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace) 1.1400 + mCurrentLine.Append(NS_LITERAL_STRING(" ")); 1.1401 + else 1.1402 + mCurrentLine.Append(char16_t(' ')); 1.1403 + } 1.1404 + 1.1405 + if (aSoftlinebreak) { 1.1406 + mEmptyLines=0; 1.1407 + } 1.1408 + else { 1.1409 + // Hard break 1.1410 + if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) { 1.1411 + mEmptyLines=-1; 1.1412 + } 1.1413 + 1.1414 + mEmptyLines++; 1.1415 + } 1.1416 + 1.1417 + if (mAtFirstColumn) { 1.1418 + // If we don't have anything "real" to output we have to 1.1419 + // make sure the indent doesn't end in a space since that 1.1420 + // would trick a format=flowed-aware receiver. 1.1421 + bool stripTrailingSpaces = mCurrentLine.IsEmpty(); 1.1422 + OutputQuotesAndIndent(stripTrailingSpaces); 1.1423 + } 1.1424 + 1.1425 + mCurrentLine.Append(mLineBreak); 1.1426 + Output(mCurrentLine); 1.1427 + mCurrentLine.Truncate(); 1.1428 + mCurrentLineWidth = 0; 1.1429 + mAtFirstColumn=true; 1.1430 + mInWhitespace=true; 1.1431 + mLineBreakDue = false; 1.1432 + mFloatingLines = -1; 1.1433 +} 1.1434 + 1.1435 + 1.1436 +/** 1.1437 + * Outputs the calculated and stored indent and text in the indentation. That is 1.1438 + * quote chars and numbers for numbered lists and such. It will also reset any 1.1439 + * stored text to put in the indentation after using it. 1.1440 + */ 1.1441 +void 1.1442 +nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */) 1.1443 +{ 1.1444 + nsAutoString stringToOutput; 1.1445 + 1.1446 + // Put the mail quote "> " chars in, if appropriate: 1.1447 + if (mCiteQuoteLevel > 0) { 1.1448 + nsAutoString quotes; 1.1449 + for(int i=0; i < mCiteQuoteLevel; i++) { 1.1450 + quotes.Append(char16_t('>')); 1.1451 + } 1.1452 + if (!mCurrentLine.IsEmpty()) { 1.1453 + /* Better don't output a space here, if the line is empty, 1.1454 + in case a receiving f=f-aware UA thinks, this were a flowed line, 1.1455 + which it isn't - it's just empty. 1.1456 + (Flowed lines may be joined with the following one, 1.1457 + so the empty line may be lost completely.) */ 1.1458 + quotes.Append(char16_t(' ')); 1.1459 + } 1.1460 + stringToOutput = quotes; 1.1461 + mAtFirstColumn = false; 1.1462 + } 1.1463 + 1.1464 + // Indent if necessary 1.1465 + int32_t indentwidth = mIndent - mInIndentString.Length(); 1.1466 + if (indentwidth > 0 1.1467 + && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) 1.1468 + // Don't make empty lines look flowed 1.1469 + ) { 1.1470 + nsAutoString spaces; 1.1471 + for (int i=0; i < indentwidth; ++i) 1.1472 + spaces.Append(char16_t(' ')); 1.1473 + stringToOutput += spaces; 1.1474 + mAtFirstColumn = false; 1.1475 + } 1.1476 + 1.1477 + if (!mInIndentString.IsEmpty()) { 1.1478 + stringToOutput += mInIndentString; 1.1479 + mAtFirstColumn = false; 1.1480 + mInIndentString.Truncate(); 1.1481 + } 1.1482 + 1.1483 + if (stripTrailingSpaces) { 1.1484 + int32_t lineLength = stringToOutput.Length(); 1.1485 + while(lineLength > 0 && 1.1486 + ' ' == stringToOutput[lineLength-1]) { 1.1487 + --lineLength; 1.1488 + } 1.1489 + stringToOutput.SetLength(lineLength); 1.1490 + } 1.1491 + 1.1492 + if (!stringToOutput.IsEmpty()) { 1.1493 + Output(stringToOutput); 1.1494 + } 1.1495 + 1.1496 +} 1.1497 + 1.1498 +/** 1.1499 + * Write a string. This is the highlevel function to use to get text output. 1.1500 + * By using AddToLine, Output, EndLine and other functions it handles quotation, 1.1501 + * line wrapping, indentation, whitespace compression and other things. 1.1502 + */ 1.1503 +void 1.1504 +nsPlainTextSerializer::Write(const nsAString& aStr) 1.1505 +{ 1.1506 + // XXX Copy necessary to use nsString methods and gain 1.1507 + // access to underlying buffer 1.1508 + nsAutoString str(aStr); 1.1509 + 1.1510 +#ifdef DEBUG_wrapping 1.1511 + printf("Write(%s): wrap col = %d\n", 1.1512 + NS_ConvertUTF16toUTF8(str).get(), mWrapColumn); 1.1513 +#endif 1.1514 + 1.1515 + int32_t bol = 0; 1.1516 + int32_t newline; 1.1517 + 1.1518 + int32_t totLen = str.Length(); 1.1519 + 1.1520 + // If the string is empty, do nothing: 1.1521 + if (totLen <= 0) return; 1.1522 + 1.1523 + // For Flowed text change nbsp-ses to spaces at end of lines to allow them 1.1524 + // to be cut off along with usual spaces if required. (bug #125928) 1.1525 + if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { 1.1526 + for (int32_t i = totLen-1; i >= 0; i--) { 1.1527 + char16_t c = str[i]; 1.1528 + if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) 1.1529 + continue; 1.1530 + if (kNBSP == c) 1.1531 + str.Replace(i, 1, ' '); 1.1532 + else 1.1533 + break; 1.1534 + } 1.1535 + } 1.1536 + 1.1537 + // We have two major codepaths here. One that does preformatted text and one 1.1538 + // that does normal formatted text. The one for preformatted text calls 1.1539 + // Output directly while the other code path goes through AddToLine. 1.1540 + if ((mPreFormatted && !mWrapColumn) || IsInPre() 1.1541 + || ((mSpanLevel > 0 || mDontWrapAnyQuotes) 1.1542 + && mEmptyLines >= 0 && str.First() == char16_t('>'))) { 1.1543 + // No intelligent wrapping. 1.1544 + 1.1545 + // This mustn't be mixed with intelligent wrapping without clearing 1.1546 + // the mCurrentLine buffer before!!! 1.1547 + NS_ASSERTION(mCurrentLine.IsEmpty(), 1.1548 + "Mixed wrapping data and nonwrapping data on the same line"); 1.1549 + if (!mCurrentLine.IsEmpty()) { 1.1550 + FlushLine(); 1.1551 + } 1.1552 + 1.1553 + // Put the mail quote "> " chars in, if appropriate. 1.1554 + // Have to put it in before every line. 1.1555 + while(bol<totLen) { 1.1556 + bool outputQuotes = mAtFirstColumn; 1.1557 + bool atFirstColumn = mAtFirstColumn; 1.1558 + bool outputLineBreak = false; 1.1559 + bool spacesOnly = true; 1.1560 + 1.1561 + // Find one of '\n' or '\r' using iterators since nsAString 1.1562 + // doesn't have the old FindCharInSet function. 1.1563 + nsAString::const_iterator iter; str.BeginReading(iter); 1.1564 + nsAString::const_iterator done_searching; str.EndReading(done_searching); 1.1565 + iter.advance(bol); 1.1566 + int32_t new_newline = bol; 1.1567 + newline = kNotFound; 1.1568 + while(iter != done_searching) { 1.1569 + if ('\n' == *iter || '\r' == *iter) { 1.1570 + newline = new_newline; 1.1571 + break; 1.1572 + } 1.1573 + if (' ' != *iter) 1.1574 + spacesOnly = false; 1.1575 + ++new_newline; 1.1576 + ++iter; 1.1577 + } 1.1578 + 1.1579 + // Done searching 1.1580 + nsAutoString stringpart; 1.1581 + if (newline == kNotFound) { 1.1582 + // No new lines. 1.1583 + stringpart.Assign(Substring(str, bol, totLen - bol)); 1.1584 + if (!stringpart.IsEmpty()) { 1.1585 + char16_t lastchar = stringpart[stringpart.Length()-1]; 1.1586 + if ((lastchar == '\t') || (lastchar == ' ') || 1.1587 + (lastchar == '\r') ||(lastchar == '\n')) { 1.1588 + mInWhitespace = true; 1.1589 + } 1.1590 + else { 1.1591 + mInWhitespace = false; 1.1592 + } 1.1593 + } 1.1594 + mEmptyLines=-1; 1.1595 + atFirstColumn = mAtFirstColumn && (totLen-bol)==0; 1.1596 + bol = totLen; 1.1597 + } 1.1598 + else { 1.1599 + // There is a newline 1.1600 + stringpart.Assign(Substring(str, bol, newline-bol)); 1.1601 + mInWhitespace = true; 1.1602 + outputLineBreak = true; 1.1603 + mEmptyLines=0; 1.1604 + atFirstColumn = true; 1.1605 + bol = newline+1; 1.1606 + if ('\r' == *iter && bol < totLen && '\n' == *++iter) { 1.1607 + // There was a CRLF in the input. This used to be illegal and 1.1608 + // stripped by the parser. Apparently not anymore. Let's skip 1.1609 + // over the LF. 1.1610 + bol++; 1.1611 + } 1.1612 + } 1.1613 + 1.1614 + mCurrentLine.AssignLiteral(""); 1.1615 + if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { 1.1616 + if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928 1.1617 + !stringpart.EqualsLiteral("-- ") && 1.1618 + !stringpart.EqualsLiteral("- -- ")) 1.1619 + stringpart.Trim(" ", false, true, true); 1.1620 + if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>') 1.1621 + mCurrentLine.Append(char16_t(' ')); 1.1622 + } 1.1623 + mCurrentLine.Append(stringpart); 1.1624 + 1.1625 + if (outputQuotes) { 1.1626 + // Note: this call messes with mAtFirstColumn 1.1627 + OutputQuotesAndIndent(); 1.1628 + } 1.1629 + 1.1630 + Output(mCurrentLine); 1.1631 + if (outputLineBreak) { 1.1632 + Output(mLineBreak); 1.1633 + } 1.1634 + mAtFirstColumn = atFirstColumn; 1.1635 + } 1.1636 + 1.1637 + // Reset mCurrentLine. 1.1638 + mCurrentLine.Truncate(); 1.1639 + 1.1640 +#ifdef DEBUG_wrapping 1.1641 + printf("No wrapping: newline is %d, totLen is %d\n", 1.1642 + newline, totLen); 1.1643 +#endif 1.1644 + return; 1.1645 + } 1.1646 + 1.1647 + // Intelligent handling of text 1.1648 + // If needed, strip out all "end of lines" 1.1649 + // and multiple whitespace between words 1.1650 + int32_t nextpos; 1.1651 + const char16_t * offsetIntoBuffer = nullptr; 1.1652 + 1.1653 + while (bol < totLen) { // Loop over lines 1.1654 + // Find a place where we may have to do whitespace compression 1.1655 + nextpos = str.FindCharInSet(" \t\n\r", bol); 1.1656 +#ifdef DEBUG_wrapping 1.1657 + nsAutoString remaining; 1.1658 + str.Right(remaining, totLen - bol); 1.1659 + foo = ToNewCString(remaining); 1.1660 + // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n", 1.1661 + // bol, nextpos, totLen, foo); 1.1662 + nsMemory::Free(foo); 1.1663 +#endif 1.1664 + 1.1665 + if (nextpos == kNotFound) { 1.1666 + // The rest of the string 1.1667 + offsetIntoBuffer = str.get() + bol; 1.1668 + AddToLine(offsetIntoBuffer, totLen-bol); 1.1669 + bol=totLen; 1.1670 + mInWhitespace=false; 1.1671 + } 1.1672 + else { 1.1673 + // There's still whitespace left in the string 1.1674 + if (nextpos != 0 && (nextpos + 1) < totLen) { 1.1675 + offsetIntoBuffer = str.get() + nextpos; 1.1676 + // skip '\n' if it is between CJ chars 1.1677 + if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) { 1.1678 + offsetIntoBuffer = str.get() + bol; 1.1679 + AddToLine(offsetIntoBuffer, nextpos-bol); 1.1680 + bol = nextpos + 1; 1.1681 + continue; 1.1682 + } 1.1683 + } 1.1684 + // If we're already in whitespace and not preformatted, just skip it: 1.1685 + if (mInWhitespace && (nextpos == bol) && !mPreFormatted && 1.1686 + !(mFlags & nsIDocumentEncoder::OutputPreformatted)) { 1.1687 + // Skip whitespace 1.1688 + bol++; 1.1689 + continue; 1.1690 + } 1.1691 + 1.1692 + if (nextpos == bol) { 1.1693 + // Note that we are in whitespace. 1.1694 + mInWhitespace = true; 1.1695 + offsetIntoBuffer = str.get() + nextpos; 1.1696 + AddToLine(offsetIntoBuffer, 1); 1.1697 + bol++; 1.1698 + continue; 1.1699 + } 1.1700 + 1.1701 + mInWhitespace = true; 1.1702 + 1.1703 + offsetIntoBuffer = str.get() + bol; 1.1704 + if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) { 1.1705 + // Preserve the real whitespace character 1.1706 + nextpos++; 1.1707 + AddToLine(offsetIntoBuffer, nextpos-bol); 1.1708 + bol = nextpos; 1.1709 + } 1.1710 + else { 1.1711 + // Replace the whitespace with a space 1.1712 + AddToLine(offsetIntoBuffer, nextpos-bol); 1.1713 + AddToLine(kSpace.get(),1); 1.1714 + bol = nextpos + 1; // Let's eat the whitespace 1.1715 + } 1.1716 + } 1.1717 + } // Continue looping over the string 1.1718 +} 1.1719 + 1.1720 + 1.1721 +/** 1.1722 + * Gets the value of an attribute in a string. If the function returns 1.1723 + * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified. 1.1724 + */ 1.1725 +nsresult 1.1726 +nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName, 1.1727 + nsString& aValueRet) 1.1728 +{ 1.1729 + if (mElement) { 1.1730 + if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) { 1.1731 + return NS_OK; 1.1732 + } 1.1733 + } 1.1734 + 1.1735 + return NS_ERROR_NOT_AVAILABLE; 1.1736 +} 1.1737 + 1.1738 +/** 1.1739 + * Returns true, if the element was inserted by Moz' TXT->HTML converter. 1.1740 + * In this case, we should ignore it. 1.1741 + */ 1.1742 +bool 1.1743 +nsPlainTextSerializer::IsCurrentNodeConverted() 1.1744 +{ 1.1745 + nsAutoString value; 1.1746 + nsresult rv = GetAttributeValue(nsGkAtoms::_class, value); 1.1747 + return (NS_SUCCEEDED(rv) && 1.1748 + (value.EqualsIgnoreCase("moz-txt", 7) || 1.1749 + value.EqualsIgnoreCase("\"moz-txt", 8))); 1.1750 +} 1.1751 + 1.1752 + 1.1753 +// static 1.1754 +nsIAtom* 1.1755 +nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) 1.1756 +{ 1.1757 + if (!aContent->IsHTML()) { 1.1758 + return nullptr; 1.1759 + } 1.1760 + 1.1761 + nsIAtom* localName = aContent->Tag(); 1.1762 + return localName->IsStaticAtom() ? localName : nullptr; 1.1763 +} 1.1764 + 1.1765 +/** 1.1766 + * Returns true if we currently are inside a <pre>. The check is done 1.1767 + * by traversing the tag stack looking for <pre> until we hit a block 1.1768 + * level tag which is assumed to override any <pre>:s below it in 1.1769 + * the stack. To do this correctly to a 100% would require access 1.1770 + * to style which we don't support in this converter. 1.1771 + */ 1.1772 +bool 1.1773 +nsPlainTextSerializer::IsInPre() 1.1774 +{ 1.1775 + int32_t i = mTagStackIndex; 1.1776 + while(i > 0) { 1.1777 + if (mTagStack[i - 1] == nsGkAtoms::pre) 1.1778 + return true; 1.1779 + if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) { 1.1780 + // We assume that every other block overrides a <pre> 1.1781 + return false; 1.1782 + } 1.1783 + --i; 1.1784 + } 1.1785 + 1.1786 + // Not a <pre> in the whole stack 1.1787 + return false; 1.1788 +} 1.1789 + 1.1790 +/** 1.1791 + * This method is required only to identify LI's inside OL. 1.1792 + * Returns TRUE if we are inside an OL tag and FALSE otherwise. 1.1793 + */ 1.1794 +bool 1.1795 +nsPlainTextSerializer::IsInOL() 1.1796 +{ 1.1797 + int32_t i = mTagStackIndex; 1.1798 + while(--i >= 0) { 1.1799 + if (mTagStack[i] == nsGkAtoms::ol) 1.1800 + return true; 1.1801 + if (mTagStack[i] == nsGkAtoms::ul) { 1.1802 + // If a UL is reached first, LI belongs the UL nested in OL. 1.1803 + return false; 1.1804 + } 1.1805 + } 1.1806 + // We may reach here for orphan LI's. 1.1807 + return false; 1.1808 +} 1.1809 + 1.1810 +/* 1.1811 + @return 0 = no header, 1 = h1, ..., 6 = h6 1.1812 +*/ 1.1813 +int32_t HeaderLevel(nsIAtom* aTag) 1.1814 +{ 1.1815 + if (aTag == nsGkAtoms::h1) { 1.1816 + return 1; 1.1817 + } 1.1818 + if (aTag == nsGkAtoms::h2) { 1.1819 + return 2; 1.1820 + } 1.1821 + if (aTag == nsGkAtoms::h3) { 1.1822 + return 3; 1.1823 + } 1.1824 + if (aTag == nsGkAtoms::h4) { 1.1825 + return 4; 1.1826 + } 1.1827 + if (aTag == nsGkAtoms::h5) { 1.1828 + return 5; 1.1829 + } 1.1830 + if (aTag == nsGkAtoms::h6) { 1.1831 + return 6; 1.1832 + } 1.1833 + return 0; 1.1834 +} 1.1835 + 1.1836 + 1.1837 +/* 1.1838 + * This is an implementation of GetUnicharWidth() and 1.1839 + * GetUnicharStringWidth() as defined in 1.1840 + * "The Single UNIX Specification, Version 2, The Open Group, 1997" 1.1841 + * <http://www.UNIX-systems.org/online.html> 1.1842 + * 1.1843 + * Markus Kuhn -- 2000-02-08 -- public domain 1.1844 + * 1.1845 + * Minor alterations to fit Mozilla's data types by Daniel Bratell 1.1846 + */ 1.1847 + 1.1848 +/* These functions define the column width of an ISO 10646 character 1.1849 + * as follows: 1.1850 + * 1.1851 + * - The null character (U+0000) has a column width of 0. 1.1852 + * 1.1853 + * - Other C0/C1 control characters and DEL will lead to a return 1.1854 + * value of -1. 1.1855 + * 1.1856 + * - Non-spacing and enclosing combining characters (general 1.1857 + * category code Mn or Me in the Unicode database) have a 1.1858 + * column width of 0. 1.1859 + * 1.1860 + * - Spacing characters in the East Asian Wide (W) or East Asian 1.1861 + * FullWidth (F) category as defined in Unicode Technical 1.1862 + * Report #11 have a column width of 2. 1.1863 + * 1.1864 + * - All remaining characters (including all printable 1.1865 + * ISO 8859-1 and WGL4 characters, Unicode control characters, 1.1866 + * etc.) have a column width of 1. 1.1867 + * 1.1868 + * This implementation assumes that wchar_t characters are encoded 1.1869 + * in ISO 10646. 1.1870 + */ 1.1871 + 1.1872 +int32_t GetUnicharWidth(char16_t ucs) 1.1873 +{ 1.1874 + /* sorted list of non-overlapping intervals of non-spacing characters */ 1.1875 + static const struct interval { 1.1876 + uint16_t first; 1.1877 + uint16_t last; 1.1878 + } combining[] = { 1.1879 + { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, 1.1880 + { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, 1.1881 + { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 1.1882 + { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, 1.1883 + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 1.1884 + { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, 1.1885 + { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, 1.1886 + { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, 1.1887 + { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, 1.1888 + { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, 1.1889 + { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, 1.1890 + { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, 1.1891 + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, 1.1892 + { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, 1.1893 + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, 1.1894 + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, 1.1895 + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, 1.1896 + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, 1.1897 + { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, 1.1898 + { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, 1.1899 + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, 1.1900 + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, 1.1901 + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, 1.1902 + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, 1.1903 + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, 1.1904 + { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, 1.1905 + { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, 1.1906 + { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, 1.1907 + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, 1.1908 + { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, 1.1909 + { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } 1.1910 + }; 1.1911 + int32_t min = 0; 1.1912 + int32_t max = sizeof(combining) / sizeof(struct interval) - 1; 1.1913 + int32_t mid; 1.1914 + 1.1915 + /* test for 8-bit control characters */ 1.1916 + if (ucs == 0) 1.1917 + return 0; 1.1918 + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) 1.1919 + return -1; 1.1920 + 1.1921 + /* first quick check for Latin-1 etc. characters */ 1.1922 + if (ucs < combining[0].first) 1.1923 + return 1; 1.1924 + 1.1925 + /* binary search in table of non-spacing characters */ 1.1926 + while (max >= min) { 1.1927 + mid = (min + max) / 2; 1.1928 + if (combining[mid].last < ucs) 1.1929 + min = mid + 1; 1.1930 + else if (combining[mid].first > ucs) 1.1931 + max = mid - 1; 1.1932 + else if (combining[mid].first <= ucs && combining[mid].last >= ucs) 1.1933 + return 0; 1.1934 + } 1.1935 + 1.1936 + /* if we arrive here, ucs is not a combining or C0/C1 control character */ 1.1937 + 1.1938 + /* fast test for majority of non-wide scripts */ 1.1939 + if (ucs < 0x1100) 1.1940 + return 1; 1.1941 + 1.1942 + return 1 + 1.1943 + ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ 1.1944 + (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && 1.1945 + ucs != 0x303f) || /* CJK ... Yi */ 1.1946 + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ 1.1947 + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ 1.1948 + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ 1.1949 + (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ 1.1950 + (ucs >= 0xffe0 && ucs <= 0xffe6)); 1.1951 +} 1.1952 + 1.1953 + 1.1954 +int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) 1.1955 +{ 1.1956 + int32_t w, width = 0; 1.1957 + 1.1958 + for (;*pwcs && n-- > 0; pwcs++) 1.1959 + if ((w = GetUnicharWidth(*pwcs)) < 0) 1.1960 + ++width; // Taking 1 as the width of non-printable character, for bug# 94475. 1.1961 + else 1.1962 + width += w; 1.1963 + 1.1964 + return width; 1.1965 +} 1.1966 +