Thu, 15 Jan 2015 21:03:48 +0100
Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | /* |
michael@0 | 7 | * nsIContentSerializer implementation that can be used with an |
michael@0 | 8 | * nsIDocumentEncoder to convert a DOM into plaintext in a nice way |
michael@0 | 9 | * (eg for copy/paste as plaintext). |
michael@0 | 10 | */ |
michael@0 | 11 | |
michael@0 | 12 | #include "nsPlainTextSerializer.h" |
michael@0 | 13 | #include "nsLWBrkCIID.h" |
michael@0 | 14 | #include "nsIServiceManager.h" |
michael@0 | 15 | #include "nsGkAtoms.h" |
michael@0 | 16 | #include "nsNameSpaceManager.h" |
michael@0 | 17 | #include "nsTextFragment.h" |
michael@0 | 18 | #include "nsContentUtils.h" |
michael@0 | 19 | #include "nsReadableUtils.h" |
michael@0 | 20 | #include "nsUnicharUtils.h" |
michael@0 | 21 | #include "nsCRT.h" |
michael@0 | 22 | #include "mozilla/dom/Element.h" |
michael@0 | 23 | #include "mozilla/Preferences.h" |
michael@0 | 24 | |
michael@0 | 25 | using namespace mozilla; |
michael@0 | 26 | using namespace mozilla::dom; |
michael@0 | 27 | |
michael@0 | 28 | #define PREF_STRUCTS "converter.html2txt.structs" |
michael@0 | 29 | #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" |
michael@0 | 30 | |
michael@0 | 31 | static const int32_t kTabSize=4; |
michael@0 | 32 | static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if |
michael@0 | 33 | mHeaderStrategy = 1 or = 2. |
michael@0 | 34 | Indention of other headers |
michael@0 | 35 | is derived from that. |
michael@0 | 36 | XXX center h1? */ |
michael@0 | 37 | static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, |
michael@0 | 38 | indent h(x+1) this many |
michael@0 | 39 | columns more than h(x) */ |
michael@0 | 40 | static const int32_t kIndentSizeList = kTabSize; |
michael@0 | 41 | // Indention of non-first lines of ul and ol |
michael@0 | 42 | static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd> |
michael@0 | 43 | static const char16_t kNBSP = 160; |
michael@0 | 44 | static const char16_t kSPACE = ' '; |
michael@0 | 45 | |
michael@0 | 46 | static int32_t HeaderLevel(nsIAtom* aTag); |
michael@0 | 47 | static int32_t GetUnicharWidth(char16_t ucs); |
michael@0 | 48 | static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); |
michael@0 | 49 | |
michael@0 | 50 | // Someday may want to make this non-const: |
michael@0 | 51 | static const uint32_t TagStackSize = 500; |
michael@0 | 52 | static const uint32_t OLStackSize = 100; |
michael@0 | 53 | |
michael@0 | 54 | nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) |
michael@0 | 55 | { |
michael@0 | 56 | nsPlainTextSerializer* it = new nsPlainTextSerializer(); |
michael@0 | 57 | if (!it) { |
michael@0 | 58 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 59 | } |
michael@0 | 60 | |
michael@0 | 61 | return CallQueryInterface(it, aSerializer); |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | nsPlainTextSerializer::nsPlainTextSerializer() |
michael@0 | 65 | : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" |
michael@0 | 66 | { |
michael@0 | 67 | |
michael@0 | 68 | mOutputString = nullptr; |
michael@0 | 69 | mHeadLevel = 0; |
michael@0 | 70 | mAtFirstColumn = true; |
michael@0 | 71 | mIndent = 0; |
michael@0 | 72 | mCiteQuoteLevel = 0; |
michael@0 | 73 | mStructs = true; // will be read from prefs later |
michael@0 | 74 | mHeaderStrategy = 1 /*indent increasingly*/; // ditto |
michael@0 | 75 | mDontWrapAnyQuotes = false; // ditto |
michael@0 | 76 | mHasWrittenCiteBlockquote = false; |
michael@0 | 77 | mSpanLevel = 0; |
michael@0 | 78 | for (int32_t i = 0; i <= 6; i++) { |
michael@0 | 79 | mHeaderCounter[i] = 0; |
michael@0 | 80 | } |
michael@0 | 81 | |
michael@0 | 82 | // Line breaker |
michael@0 | 83 | mWrapColumn = 72; // XXX magic number, we expect someone to reset this |
michael@0 | 84 | mCurrentLineWidth = 0; |
michael@0 | 85 | |
michael@0 | 86 | // Flow |
michael@0 | 87 | mEmptyLines = 1; // The start of the document is an "empty line" in itself, |
michael@0 | 88 | mInWhitespace = false; |
michael@0 | 89 | mPreFormatted = false; |
michael@0 | 90 | mStartedOutput = false; |
michael@0 | 91 | |
michael@0 | 92 | // initialize the tag stack to zero: |
michael@0 | 93 | // The stack only ever contains pointers to static atoms, so they don't |
michael@0 | 94 | // need refcounting. |
michael@0 | 95 | mTagStack = new nsIAtom*[TagStackSize]; |
michael@0 | 96 | mTagStackIndex = 0; |
michael@0 | 97 | mIgnoreAboveIndex = (uint32_t)kNotFound; |
michael@0 | 98 | |
michael@0 | 99 | // initialize the OL stack, where numbers for ordered lists are kept |
michael@0 | 100 | mOLStack = new int32_t[OLStackSize]; |
michael@0 | 101 | mOLStackIndex = 0; |
michael@0 | 102 | |
michael@0 | 103 | mULCount = 0; |
michael@0 | 104 | |
michael@0 | 105 | mIgnoredChildNodeLevel = 0; |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | nsPlainTextSerializer::~nsPlainTextSerializer() |
michael@0 | 109 | { |
michael@0 | 110 | delete[] mTagStack; |
michael@0 | 111 | delete[] mOLStack; |
michael@0 | 112 | NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!"); |
michael@0 | 113 | } |
michael@0 | 114 | |
michael@0 | 115 | NS_IMPL_ISUPPORTS(nsPlainTextSerializer, |
michael@0 | 116 | nsIContentSerializer) |
michael@0 | 117 | |
michael@0 | 118 | |
michael@0 | 119 | NS_IMETHODIMP |
michael@0 | 120 | nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, |
michael@0 | 121 | const char* aCharSet, bool aIsCopying, |
michael@0 | 122 | bool aIsWholeDocument) |
michael@0 | 123 | { |
michael@0 | 124 | #ifdef DEBUG |
michael@0 | 125 | // Check if the major control flags are set correctly. |
michael@0 | 126 | if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
michael@0 | 127 | NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, |
michael@0 | 128 | "If you want format=flowed, you must combine it with " |
michael@0 | 129 | "nsIDocumentEncoder::OutputFormatted"); |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | if (aFlags & nsIDocumentEncoder::OutputFormatted) { |
michael@0 | 133 | NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), |
michael@0 | 134 | "Can't do formatted and preformatted output at the same time!"); |
michael@0 | 135 | } |
michael@0 | 136 | #endif |
michael@0 | 137 | |
michael@0 | 138 | mFlags = aFlags; |
michael@0 | 139 | mWrapColumn = aWrapColumn; |
michael@0 | 140 | |
michael@0 | 141 | // Only create a linebreaker if we will handle wrapping. |
michael@0 | 142 | if (MayWrap()) { |
michael@0 | 143 | mLineBreaker = nsContentUtils::LineBreaker(); |
michael@0 | 144 | } |
michael@0 | 145 | |
michael@0 | 146 | // Set the line break character: |
michael@0 | 147 | if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) |
michael@0 | 148 | && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { |
michael@0 | 149 | // Windows |
michael@0 | 150 | mLineBreak.AssignLiteral("\r\n"); |
michael@0 | 151 | } |
michael@0 | 152 | else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { |
michael@0 | 153 | // Mac |
michael@0 | 154 | mLineBreak.Assign(char16_t('\r')); |
michael@0 | 155 | } |
michael@0 | 156 | else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { |
michael@0 | 157 | // Unix/DOM |
michael@0 | 158 | mLineBreak.Assign(char16_t('\n')); |
michael@0 | 159 | } |
michael@0 | 160 | else { |
michael@0 | 161 | // Platform/default |
michael@0 | 162 | mLineBreak.AssignLiteral(NS_LINEBREAK); |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | mLineBreakDue = false; |
michael@0 | 166 | mFloatingLines = -1; |
michael@0 | 167 | |
michael@0 | 168 | if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
michael@0 | 169 | // Get some prefs that controls how we do formatted output |
michael@0 | 170 | mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); |
michael@0 | 171 | |
michael@0 | 172 | mHeaderStrategy = |
michael@0 | 173 | Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); |
michael@0 | 174 | |
michael@0 | 175 | // DontWrapAnyQuotes is set according to whether plaintext mail |
michael@0 | 176 | // is wrapping to window width -- see bug 134439. |
michael@0 | 177 | // We'll only want this if we're wrapping and formatted. |
michael@0 | 178 | if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { |
michael@0 | 179 | mDontWrapAnyQuotes = |
michael@0 | 180 | Preferences::GetBool("mail.compose.wrap_to_window_width", |
michael@0 | 181 | mDontWrapAnyQuotes); |
michael@0 | 182 | } |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | // XXX We should let the caller pass this in. |
michael@0 | 186 | if (Preferences::GetBool("browser.frames.enabled")) { |
michael@0 | 187 | mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; |
michael@0 | 188 | } |
michael@0 | 189 | else { |
michael@0 | 190 | mFlags |= nsIDocumentEncoder::OutputNoFramesContent; |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | return NS_OK; |
michael@0 | 194 | } |
michael@0 | 195 | |
michael@0 | 196 | bool |
michael@0 | 197 | nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) |
michael@0 | 198 | { |
michael@0 | 199 | uint32_t size = aStack.Length(); |
michael@0 | 200 | if (size == 0) { |
michael@0 | 201 | return false; |
michael@0 | 202 | } |
michael@0 | 203 | return aStack.ElementAt(size-1); |
michael@0 | 204 | } |
michael@0 | 205 | |
michael@0 | 206 | void |
michael@0 | 207 | nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) |
michael@0 | 208 | { |
michael@0 | 209 | uint32_t size = aStack.Length(); |
michael@0 | 210 | if (size > 0) { |
michael@0 | 211 | aStack.ElementAt(size-1) = aValue; |
michael@0 | 212 | } |
michael@0 | 213 | else { |
michael@0 | 214 | NS_ERROR("There is no \"Last\" value"); |
michael@0 | 215 | } |
michael@0 | 216 | } |
michael@0 | 217 | |
michael@0 | 218 | void |
michael@0 | 219 | nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) |
michael@0 | 220 | { |
michael@0 | 221 | aStack.AppendElement(bool(aValue)); |
michael@0 | 222 | } |
michael@0 | 223 | |
michael@0 | 224 | bool |
michael@0 | 225 | nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) |
michael@0 | 226 | { |
michael@0 | 227 | bool returnValue = false; |
michael@0 | 228 | uint32_t size = aStack.Length(); |
michael@0 | 229 | if (size > 0) { |
michael@0 | 230 | returnValue = aStack.ElementAt(size-1); |
michael@0 | 231 | aStack.RemoveElementAt(size-1); |
michael@0 | 232 | } |
michael@0 | 233 | return returnValue; |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | bool |
michael@0 | 237 | nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) |
michael@0 | 238 | { |
michael@0 | 239 | // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, |
michael@0 | 240 | // non-textual container element should be serialized as placeholder |
michael@0 | 241 | // character and its child nodes should be ignored. See bug 895239. |
michael@0 | 242 | if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { |
michael@0 | 243 | return false; |
michael@0 | 244 | } |
michael@0 | 245 | |
michael@0 | 246 | return |
michael@0 | 247 | (aTag == nsGkAtoms::audio) || |
michael@0 | 248 | (aTag == nsGkAtoms::canvas) || |
michael@0 | 249 | (aTag == nsGkAtoms::iframe) || |
michael@0 | 250 | (aTag == nsGkAtoms::meter) || |
michael@0 | 251 | (aTag == nsGkAtoms::progress) || |
michael@0 | 252 | (aTag == nsGkAtoms::object) || |
michael@0 | 253 | (aTag == nsGkAtoms::svg) || |
michael@0 | 254 | (aTag == nsGkAtoms::video); |
michael@0 | 255 | } |
michael@0 | 256 | |
michael@0 | 257 | NS_IMETHODIMP |
michael@0 | 258 | nsPlainTextSerializer::AppendText(nsIContent* aText, |
michael@0 | 259 | int32_t aStartOffset, |
michael@0 | 260 | int32_t aEndOffset, |
michael@0 | 261 | nsAString& aStr) |
michael@0 | 262 | { |
michael@0 | 263 | if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
michael@0 | 264 | return NS_OK; |
michael@0 | 265 | } |
michael@0 | 266 | |
michael@0 | 267 | NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); |
michael@0 | 268 | if ( aStartOffset < 0 ) |
michael@0 | 269 | return NS_ERROR_INVALID_ARG; |
michael@0 | 270 | |
michael@0 | 271 | NS_ENSURE_ARG(aText); |
michael@0 | 272 | |
michael@0 | 273 | nsresult rv = NS_OK; |
michael@0 | 274 | |
michael@0 | 275 | nsIContent* content = aText; |
michael@0 | 276 | const nsTextFragment* frag; |
michael@0 | 277 | if (!content || !(frag = content->GetText())) { |
michael@0 | 278 | return NS_ERROR_FAILURE; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | int32_t fragLength = frag->GetLength(); |
michael@0 | 282 | int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); |
michael@0 | 283 | NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); |
michael@0 | 284 | |
michael@0 | 285 | int32_t length = endoffset - aStartOffset; |
michael@0 | 286 | if (length <= 0) { |
michael@0 | 287 | return NS_OK; |
michael@0 | 288 | } |
michael@0 | 289 | |
michael@0 | 290 | nsAutoString textstr; |
michael@0 | 291 | if (frag->Is2b()) { |
michael@0 | 292 | textstr.Assign(frag->Get2b() + aStartOffset, length); |
michael@0 | 293 | } |
michael@0 | 294 | else { |
michael@0 | 295 | // AssignASCII is for 7-bit character only, so don't use it |
michael@0 | 296 | const char *data = frag->Get1b(); |
michael@0 | 297 | CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); |
michael@0 | 298 | } |
michael@0 | 299 | |
michael@0 | 300 | mOutputString = &aStr; |
michael@0 | 301 | |
michael@0 | 302 | // We have to split the string across newlines |
michael@0 | 303 | // to match parser behavior |
michael@0 | 304 | int32_t start = 0; |
michael@0 | 305 | int32_t offset = textstr.FindCharInSet("\n\r"); |
michael@0 | 306 | while (offset != kNotFound) { |
michael@0 | 307 | |
michael@0 | 308 | if (offset>start) { |
michael@0 | 309 | // Pass in the line |
michael@0 | 310 | DoAddText(false, |
michael@0 | 311 | Substring(textstr, start, offset-start)); |
michael@0 | 312 | } |
michael@0 | 313 | |
michael@0 | 314 | // Pass in a newline |
michael@0 | 315 | DoAddText(true, mLineBreak); |
michael@0 | 316 | |
michael@0 | 317 | start = offset+1; |
michael@0 | 318 | offset = textstr.FindCharInSet("\n\r", start); |
michael@0 | 319 | } |
michael@0 | 320 | |
michael@0 | 321 | // Consume the last bit of the string if there's any left |
michael@0 | 322 | if (start < length) { |
michael@0 | 323 | if (start) { |
michael@0 | 324 | DoAddText(false, Substring(textstr, start, length - start)); |
michael@0 | 325 | } |
michael@0 | 326 | else { |
michael@0 | 327 | DoAddText(false, textstr); |
michael@0 | 328 | } |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | mOutputString = nullptr; |
michael@0 | 332 | |
michael@0 | 333 | return rv; |
michael@0 | 334 | } |
michael@0 | 335 | |
michael@0 | 336 | NS_IMETHODIMP |
michael@0 | 337 | nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, |
michael@0 | 338 | int32_t aStartOffset, |
michael@0 | 339 | int32_t aEndOffset, |
michael@0 | 340 | nsAString& aStr) |
michael@0 | 341 | { |
michael@0 | 342 | return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); |
michael@0 | 343 | } |
michael@0 | 344 | |
michael@0 | 345 | NS_IMETHODIMP |
michael@0 | 346 | nsPlainTextSerializer::AppendElementStart(Element* aElement, |
michael@0 | 347 | Element* aOriginalElement, |
michael@0 | 348 | nsAString& aStr) |
michael@0 | 349 | { |
michael@0 | 350 | NS_ENSURE_ARG(aElement); |
michael@0 | 351 | |
michael@0 | 352 | mElement = aElement; |
michael@0 | 353 | |
michael@0 | 354 | nsresult rv; |
michael@0 | 355 | nsIAtom* id = GetIdForContent(mElement); |
michael@0 | 356 | |
michael@0 | 357 | bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
michael@0 | 358 | |
michael@0 | 359 | mOutputString = &aStr; |
michael@0 | 360 | |
michael@0 | 361 | if (isContainer) { |
michael@0 | 362 | rv = DoOpenContainer(id); |
michael@0 | 363 | } |
michael@0 | 364 | else { |
michael@0 | 365 | rv = DoAddLeaf(id); |
michael@0 | 366 | } |
michael@0 | 367 | |
michael@0 | 368 | mElement = nullptr; |
michael@0 | 369 | mOutputString = nullptr; |
michael@0 | 370 | |
michael@0 | 371 | if (id == nsGkAtoms::head) { |
michael@0 | 372 | ++mHeadLevel; |
michael@0 | 373 | } |
michael@0 | 374 | |
michael@0 | 375 | return rv; |
michael@0 | 376 | } |
michael@0 | 377 | |
michael@0 | 378 | NS_IMETHODIMP |
michael@0 | 379 | nsPlainTextSerializer::AppendElementEnd(Element* aElement, |
michael@0 | 380 | nsAString& aStr) |
michael@0 | 381 | { |
michael@0 | 382 | NS_ENSURE_ARG(aElement); |
michael@0 | 383 | |
michael@0 | 384 | mElement = aElement; |
michael@0 | 385 | |
michael@0 | 386 | nsresult rv; |
michael@0 | 387 | nsIAtom* id = GetIdForContent(mElement); |
michael@0 | 388 | |
michael@0 | 389 | bool isContainer = !nsContentUtils::IsHTMLVoid(id); |
michael@0 | 390 | |
michael@0 | 391 | mOutputString = &aStr; |
michael@0 | 392 | |
michael@0 | 393 | rv = NS_OK; |
michael@0 | 394 | if (isContainer) { |
michael@0 | 395 | rv = DoCloseContainer(id); |
michael@0 | 396 | } |
michael@0 | 397 | |
michael@0 | 398 | mElement = nullptr; |
michael@0 | 399 | mOutputString = nullptr; |
michael@0 | 400 | |
michael@0 | 401 | if (id == nsGkAtoms::head) { |
michael@0 | 402 | NS_ASSERTION(mHeadLevel != 0, |
michael@0 | 403 | "mHeadLevel being decremented below 0"); |
michael@0 | 404 | --mHeadLevel; |
michael@0 | 405 | } |
michael@0 | 406 | |
michael@0 | 407 | return rv; |
michael@0 | 408 | } |
michael@0 | 409 | |
michael@0 | 410 | NS_IMETHODIMP |
michael@0 | 411 | nsPlainTextSerializer::Flush(nsAString& aStr) |
michael@0 | 412 | { |
michael@0 | 413 | mOutputString = &aStr; |
michael@0 | 414 | FlushLine(); |
michael@0 | 415 | mOutputString = nullptr; |
michael@0 | 416 | return NS_OK; |
michael@0 | 417 | } |
michael@0 | 418 | |
michael@0 | 419 | NS_IMETHODIMP |
michael@0 | 420 | nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, |
michael@0 | 421 | nsAString& aStr) |
michael@0 | 422 | { |
michael@0 | 423 | return NS_OK; |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | nsresult |
michael@0 | 427 | nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) |
michael@0 | 428 | { |
michael@0 | 429 | // Check if we need output current node as placeholder character and ignore |
michael@0 | 430 | // child nodes. |
michael@0 | 431 | if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
michael@0 | 432 | if (mIgnoredChildNodeLevel == 0) { |
michael@0 | 433 | // Serialize current node as placeholder character |
michael@0 | 434 | Write(NS_LITERAL_STRING("\xFFFC")); |
michael@0 | 435 | } |
michael@0 | 436 | // Ignore child nodes. |
michael@0 | 437 | mIgnoredChildNodeLevel++; |
michael@0 | 438 | return NS_OK; |
michael@0 | 439 | } |
michael@0 | 440 | |
michael@0 | 441 | if (mFlags & nsIDocumentEncoder::OutputRaw) { |
michael@0 | 442 | // Raw means raw. Don't even think about doing anything fancy |
michael@0 | 443 | // here like indenting, adding line breaks or any other |
michael@0 | 444 | // characters such as list item bullets, quote characters |
michael@0 | 445 | // around <q>, etc. I mean it! Don't make me smack you! |
michael@0 | 446 | |
michael@0 | 447 | return NS_OK; |
michael@0 | 448 | } |
michael@0 | 449 | |
michael@0 | 450 | if (mTagStackIndex < TagStackSize) { |
michael@0 | 451 | mTagStack[mTagStackIndex++] = aTag; |
michael@0 | 452 | } |
michael@0 | 453 | |
michael@0 | 454 | if (mIgnoreAboveIndex != (uint32_t)kNotFound) { |
michael@0 | 455 | return NS_OK; |
michael@0 | 456 | } |
michael@0 | 457 | |
michael@0 | 458 | // Reset this so that <blockquote type=cite> doesn't affect the whitespace |
michael@0 | 459 | // above random <pre>s below it. |
michael@0 | 460 | mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && |
michael@0 | 461 | aTag == nsGkAtoms::pre; |
michael@0 | 462 | |
michael@0 | 463 | bool isInCiteBlockquote = false; |
michael@0 | 464 | |
michael@0 | 465 | // XXX special-case <blockquote type=cite> so that we don't add additional |
michael@0 | 466 | // newlines before the text. |
michael@0 | 467 | if (aTag == nsGkAtoms::blockquote) { |
michael@0 | 468 | nsAutoString value; |
michael@0 | 469 | nsresult rv = GetAttributeValue(nsGkAtoms::type, value); |
michael@0 | 470 | isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); |
michael@0 | 471 | } |
michael@0 | 472 | |
michael@0 | 473 | if (mLineBreakDue && !isInCiteBlockquote) |
michael@0 | 474 | EnsureVerticalSpace(mFloatingLines); |
michael@0 | 475 | |
michael@0 | 476 | // Check if this tag's content that should not be output |
michael@0 | 477 | if ((aTag == nsGkAtoms::noscript && |
michael@0 | 478 | !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || |
michael@0 | 479 | ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && |
michael@0 | 480 | !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { |
michael@0 | 481 | // Ignore everything that follows the current tag in |
michael@0 | 482 | // question until a matching end tag is encountered. |
michael@0 | 483 | mIgnoreAboveIndex = mTagStackIndex - 1; |
michael@0 | 484 | return NS_OK; |
michael@0 | 485 | } |
michael@0 | 486 | |
michael@0 | 487 | if (aTag == nsGkAtoms::body) { |
michael@0 | 488 | // Try to figure out here whether we have a |
michael@0 | 489 | // preformatted style attribute. |
michael@0 | 490 | // |
michael@0 | 491 | // Trigger on the presence of a "pre-wrap" in the |
michael@0 | 492 | // style attribute. That's a very simplistic way to do |
michael@0 | 493 | // it, but better than nothing. |
michael@0 | 494 | // Also set mWrapColumn to the value given there |
michael@0 | 495 | // (which arguably we should only do if told to do so). |
michael@0 | 496 | nsAutoString style; |
michael@0 | 497 | int32_t whitespace; |
michael@0 | 498 | if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && |
michael@0 | 499 | (kNotFound != (whitespace = style.Find("white-space:")))) { |
michael@0 | 500 | |
michael@0 | 501 | if (kNotFound != style.Find("pre-wrap", true, whitespace)) { |
michael@0 | 502 | #ifdef DEBUG_preformatted |
michael@0 | 503 | printf("Set mPreFormatted based on style pre-wrap\n"); |
michael@0 | 504 | #endif |
michael@0 | 505 | mPreFormatted = true; |
michael@0 | 506 | int32_t widthOffset = style.Find("width:"); |
michael@0 | 507 | if (widthOffset >= 0) { |
michael@0 | 508 | // We have to search for the ch before the semicolon, |
michael@0 | 509 | // not for the semicolon itself, because nsString::ToInteger() |
michael@0 | 510 | // considers 'c' to be a valid numeric char (even if radix=10) |
michael@0 | 511 | // but then gets confused if it sees it next to the number |
michael@0 | 512 | // when the radix specified was 10, and returns an error code. |
michael@0 | 513 | int32_t semiOffset = style.Find("ch", false, widthOffset+6); |
michael@0 | 514 | int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 |
michael@0 | 515 | : style.Length() - widthOffset); |
michael@0 | 516 | nsAutoString widthstr; |
michael@0 | 517 | style.Mid(widthstr, widthOffset+6, length); |
michael@0 | 518 | nsresult err; |
michael@0 | 519 | int32_t col = widthstr.ToInteger(&err); |
michael@0 | 520 | |
michael@0 | 521 | if (NS_SUCCEEDED(err)) { |
michael@0 | 522 | mWrapColumn = (uint32_t)col; |
michael@0 | 523 | #ifdef DEBUG_preformatted |
michael@0 | 524 | printf("Set wrap column to %d based on style\n", mWrapColumn); |
michael@0 | 525 | #endif |
michael@0 | 526 | } |
michael@0 | 527 | } |
michael@0 | 528 | } |
michael@0 | 529 | else if (kNotFound != style.Find("pre", true, whitespace)) { |
michael@0 | 530 | #ifdef DEBUG_preformatted |
michael@0 | 531 | printf("Set mPreFormatted based on style pre\n"); |
michael@0 | 532 | #endif |
michael@0 | 533 | mPreFormatted = true; |
michael@0 | 534 | mWrapColumn = 0; |
michael@0 | 535 | } |
michael@0 | 536 | } |
michael@0 | 537 | else { |
michael@0 | 538 | /* See comment at end of function. */ |
michael@0 | 539 | mInWhitespace = true; |
michael@0 | 540 | mPreFormatted = false; |
michael@0 | 541 | } |
michael@0 | 542 | |
michael@0 | 543 | return NS_OK; |
michael@0 | 544 | } |
michael@0 | 545 | |
michael@0 | 546 | // Keep this in sync with DoCloseContainer! |
michael@0 | 547 | if (!DoOutput()) { |
michael@0 | 548 | return NS_OK; |
michael@0 | 549 | } |
michael@0 | 550 | |
michael@0 | 551 | if (aTag == nsGkAtoms::p) |
michael@0 | 552 | EnsureVerticalSpace(1); |
michael@0 | 553 | else if (aTag == nsGkAtoms::pre) { |
michael@0 | 554 | if (GetLastBool(mIsInCiteBlockquote)) |
michael@0 | 555 | EnsureVerticalSpace(0); |
michael@0 | 556 | else if (mHasWrittenCiteBlockquote) { |
michael@0 | 557 | EnsureVerticalSpace(0); |
michael@0 | 558 | mHasWrittenCiteBlockquote = false; |
michael@0 | 559 | } |
michael@0 | 560 | else |
michael@0 | 561 | EnsureVerticalSpace(1); |
michael@0 | 562 | } |
michael@0 | 563 | else if (aTag == nsGkAtoms::tr) { |
michael@0 | 564 | PushBool(mHasWrittenCellsForRow, false); |
michael@0 | 565 | } |
michael@0 | 566 | else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { |
michael@0 | 567 | // We must make sure that the content of two table cells get a |
michael@0 | 568 | // space between them. |
michael@0 | 569 | |
michael@0 | 570 | // To make the separation between cells most obvious and |
michael@0 | 571 | // importable, we use a TAB. |
michael@0 | 572 | if (GetLastBool(mHasWrittenCellsForRow)) { |
michael@0 | 573 | // Bypass |Write| so that the TAB isn't compressed away. |
michael@0 | 574 | AddToLine(MOZ_UTF16("\t"), 1); |
michael@0 | 575 | mInWhitespace = true; |
michael@0 | 576 | } |
michael@0 | 577 | else if (mHasWrittenCellsForRow.IsEmpty()) { |
michael@0 | 578 | // We don't always see a <tr> (nor a <table>) before the <td> if we're |
michael@0 | 579 | // copying part of a table |
michael@0 | 580 | PushBool(mHasWrittenCellsForRow, true); // will never be popped |
michael@0 | 581 | } |
michael@0 | 582 | else { |
michael@0 | 583 | SetLastBool(mHasWrittenCellsForRow, true); |
michael@0 | 584 | } |
michael@0 | 585 | } |
michael@0 | 586 | else if (aTag == nsGkAtoms::ul) { |
michael@0 | 587 | // Indent here to support nested lists, which aren't included in li :-( |
michael@0 | 588 | EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
michael@0 | 589 | // Must end the current line before we change indention |
michael@0 | 590 | mIndent += kIndentSizeList; |
michael@0 | 591 | mULCount++; |
michael@0 | 592 | } |
michael@0 | 593 | else if (aTag == nsGkAtoms::ol) { |
michael@0 | 594 | EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); |
michael@0 | 595 | if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
michael@0 | 596 | // Must end the current line before we change indention |
michael@0 | 597 | if (mOLStackIndex < OLStackSize) { |
michael@0 | 598 | nsAutoString startAttr; |
michael@0 | 599 | int32_t startVal = 1; |
michael@0 | 600 | if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { |
michael@0 | 601 | nsresult rv = NS_OK; |
michael@0 | 602 | startVal = startAttr.ToInteger(&rv); |
michael@0 | 603 | if (NS_FAILED(rv)) |
michael@0 | 604 | startVal = 1; |
michael@0 | 605 | } |
michael@0 | 606 | mOLStack[mOLStackIndex++] = startVal; |
michael@0 | 607 | } |
michael@0 | 608 | } else { |
michael@0 | 609 | mOLStackIndex++; |
michael@0 | 610 | } |
michael@0 | 611 | mIndent += kIndentSizeList; // see ul |
michael@0 | 612 | } |
michael@0 | 613 | else if (aTag == nsGkAtoms::li && |
michael@0 | 614 | (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
michael@0 | 615 | if (mTagStackIndex > 1 && IsInOL()) { |
michael@0 | 616 | if (mOLStackIndex > 0) { |
michael@0 | 617 | nsAutoString valueAttr; |
michael@0 | 618 | if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { |
michael@0 | 619 | nsresult rv = NS_OK; |
michael@0 | 620 | int32_t valueAttrVal = valueAttr.ToInteger(&rv); |
michael@0 | 621 | if (NS_SUCCEEDED(rv)) |
michael@0 | 622 | mOLStack[mOLStackIndex-1] = valueAttrVal; |
michael@0 | 623 | } |
michael@0 | 624 | // This is what nsBulletFrame does for OLs: |
michael@0 | 625 | mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); |
michael@0 | 626 | } |
michael@0 | 627 | else { |
michael@0 | 628 | mInIndentString.Append(char16_t('#')); |
michael@0 | 629 | } |
michael@0 | 630 | |
michael@0 | 631 | mInIndentString.Append(char16_t('.')); |
michael@0 | 632 | |
michael@0 | 633 | } |
michael@0 | 634 | else { |
michael@0 | 635 | static char bulletCharArray[] = "*o+#"; |
michael@0 | 636 | uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; |
michael@0 | 637 | char bulletChar = bulletCharArray[index % 4]; |
michael@0 | 638 | mInIndentString.Append(char16_t(bulletChar)); |
michael@0 | 639 | } |
michael@0 | 640 | |
michael@0 | 641 | mInIndentString.Append(char16_t(' ')); |
michael@0 | 642 | } |
michael@0 | 643 | else if (aTag == nsGkAtoms::dl) { |
michael@0 | 644 | EnsureVerticalSpace(1); |
michael@0 | 645 | } |
michael@0 | 646 | else if (aTag == nsGkAtoms::dt) { |
michael@0 | 647 | EnsureVerticalSpace(0); |
michael@0 | 648 | } |
michael@0 | 649 | else if (aTag == nsGkAtoms::dd) { |
michael@0 | 650 | EnsureVerticalSpace(0); |
michael@0 | 651 | mIndent += kIndentSizeDD; |
michael@0 | 652 | } |
michael@0 | 653 | else if (aTag == nsGkAtoms::span) { |
michael@0 | 654 | ++mSpanLevel; |
michael@0 | 655 | } |
michael@0 | 656 | else if (aTag == nsGkAtoms::blockquote) { |
michael@0 | 657 | // Push |
michael@0 | 658 | PushBool(mIsInCiteBlockquote, isInCiteBlockquote); |
michael@0 | 659 | if (isInCiteBlockquote) { |
michael@0 | 660 | EnsureVerticalSpace(0); |
michael@0 | 661 | mCiteQuoteLevel++; |
michael@0 | 662 | } |
michael@0 | 663 | else { |
michael@0 | 664 | EnsureVerticalSpace(1); |
michael@0 | 665 | mIndent += kTabSize; // Check for some maximum value? |
michael@0 | 666 | } |
michael@0 | 667 | } |
michael@0 | 668 | else if (aTag == nsGkAtoms::q) { |
michael@0 | 669 | Write(NS_LITERAL_STRING("\"")); |
michael@0 | 670 | } |
michael@0 | 671 | |
michael@0 | 672 | // Else make sure we'll separate block level tags, |
michael@0 | 673 | // even if we're about to leave, before doing any other formatting. |
michael@0 | 674 | else if (nsContentUtils::IsHTMLBlock(aTag)) { |
michael@0 | 675 | EnsureVerticalSpace(0); |
michael@0 | 676 | } |
michael@0 | 677 | |
michael@0 | 678 | ////////////////////////////////////////////////////////////// |
michael@0 | 679 | if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
michael@0 | 680 | return NS_OK; |
michael@0 | 681 | } |
michael@0 | 682 | ////////////////////////////////////////////////////////////// |
michael@0 | 683 | // The rest of this routine is formatted output stuff, |
michael@0 | 684 | // which we should skip if we're not formatted: |
michael@0 | 685 | ////////////////////////////////////////////////////////////// |
michael@0 | 686 | |
michael@0 | 687 | // Push on stack |
michael@0 | 688 | bool currentNodeIsConverted = IsCurrentNodeConverted(); |
michael@0 | 689 | |
michael@0 | 690 | if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
michael@0 | 691 | aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
michael@0 | 692 | aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) |
michael@0 | 693 | { |
michael@0 | 694 | EnsureVerticalSpace(2); |
michael@0 | 695 | if (mHeaderStrategy == 2) { // numbered |
michael@0 | 696 | mIndent += kIndentSizeHeaders; |
michael@0 | 697 | // Caching |
michael@0 | 698 | int32_t level = HeaderLevel(aTag); |
michael@0 | 699 | // Increase counter for current level |
michael@0 | 700 | mHeaderCounter[level]++; |
michael@0 | 701 | // Reset all lower levels |
michael@0 | 702 | int32_t i; |
michael@0 | 703 | |
michael@0 | 704 | for (i = level + 1; i <= 6; i++) { |
michael@0 | 705 | mHeaderCounter[i] = 0; |
michael@0 | 706 | } |
michael@0 | 707 | |
michael@0 | 708 | // Construct numbers |
michael@0 | 709 | nsAutoString leadup; |
michael@0 | 710 | for (i = 1; i <= level; i++) { |
michael@0 | 711 | leadup.AppendInt(mHeaderCounter[i]); |
michael@0 | 712 | leadup.Append(char16_t('.')); |
michael@0 | 713 | } |
michael@0 | 714 | leadup.Append(char16_t(' ')); |
michael@0 | 715 | Write(leadup); |
michael@0 | 716 | } |
michael@0 | 717 | else if (mHeaderStrategy == 1) { // indent increasingly |
michael@0 | 718 | mIndent += kIndentSizeHeaders; |
michael@0 | 719 | for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
michael@0 | 720 | // for h(x), run x-1 times |
michael@0 | 721 | mIndent += kIndentIncrementHeaders; |
michael@0 | 722 | } |
michael@0 | 723 | } |
michael@0 | 724 | } |
michael@0 | 725 | else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { |
michael@0 | 726 | nsAutoString url; |
michael@0 | 727 | if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) |
michael@0 | 728 | && !url.IsEmpty()) { |
michael@0 | 729 | mURL = url; |
michael@0 | 730 | } |
michael@0 | 731 | } |
michael@0 | 732 | else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { |
michael@0 | 733 | Write(NS_LITERAL_STRING("^")); |
michael@0 | 734 | } |
michael@0 | 735 | else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { |
michael@0 | 736 | Write(NS_LITERAL_STRING("_")); |
michael@0 | 737 | } |
michael@0 | 738 | else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
michael@0 | 739 | Write(NS_LITERAL_STRING("|")); |
michael@0 | 740 | } |
michael@0 | 741 | else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
michael@0 | 742 | && mStructs && !currentNodeIsConverted) { |
michael@0 | 743 | Write(NS_LITERAL_STRING("*")); |
michael@0 | 744 | } |
michael@0 | 745 | else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
michael@0 | 746 | && mStructs && !currentNodeIsConverted) { |
michael@0 | 747 | Write(NS_LITERAL_STRING("/")); |
michael@0 | 748 | } |
michael@0 | 749 | else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
michael@0 | 750 | Write(NS_LITERAL_STRING("_")); |
michael@0 | 751 | } |
michael@0 | 752 | |
michael@0 | 753 | /* Container elements are always block elements, so we shouldn't |
michael@0 | 754 | output any whitespace immediately after the container tag even if |
michael@0 | 755 | there's extra whitespace there because the HTML is pretty-printed |
michael@0 | 756 | or something. To ensure that happens, tell the serializer we're |
michael@0 | 757 | already in whitespace so it won't output more. */ |
michael@0 | 758 | mInWhitespace = true; |
michael@0 | 759 | |
michael@0 | 760 | return NS_OK; |
michael@0 | 761 | } |
michael@0 | 762 | |
michael@0 | 763 | nsresult |
michael@0 | 764 | nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) |
michael@0 | 765 | { |
michael@0 | 766 | if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) { |
michael@0 | 767 | mIgnoredChildNodeLevel--; |
michael@0 | 768 | return NS_OK; |
michael@0 | 769 | } |
michael@0 | 770 | |
michael@0 | 771 | if (mFlags & nsIDocumentEncoder::OutputRaw) { |
michael@0 | 772 | // Raw means raw. Don't even think about doing anything fancy |
michael@0 | 773 | // here like indenting, adding line breaks or any other |
michael@0 | 774 | // characters such as list item bullets, quote characters |
michael@0 | 775 | // around <q>, etc. I mean it! Don't make me smack you! |
michael@0 | 776 | |
michael@0 | 777 | return NS_OK; |
michael@0 | 778 | } |
michael@0 | 779 | |
michael@0 | 780 | if (mTagStackIndex > 0) { |
michael@0 | 781 | --mTagStackIndex; |
michael@0 | 782 | } |
michael@0 | 783 | |
michael@0 | 784 | if (mTagStackIndex >= mIgnoreAboveIndex) { |
michael@0 | 785 | if (mTagStackIndex == mIgnoreAboveIndex) { |
michael@0 | 786 | // We're dealing with the close tag whose matching |
michael@0 | 787 | // open tag had set the mIgnoreAboveIndex value. |
michael@0 | 788 | // Reset mIgnoreAboveIndex before discarding this tag. |
michael@0 | 789 | mIgnoreAboveIndex = (uint32_t)kNotFound; |
michael@0 | 790 | } |
michael@0 | 791 | return NS_OK; |
michael@0 | 792 | } |
michael@0 | 793 | |
michael@0 | 794 | // End current line if we're ending a block level tag |
michael@0 | 795 | if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { |
michael@0 | 796 | // We want the output to end with a new line, |
michael@0 | 797 | // but in preformatted areas like text fields, |
michael@0 | 798 | // we can't emit newlines that weren't there. |
michael@0 | 799 | // So add the newline only in the case of formatted output. |
michael@0 | 800 | if (mFlags & nsIDocumentEncoder::OutputFormatted) { |
michael@0 | 801 | EnsureVerticalSpace(0); |
michael@0 | 802 | } |
michael@0 | 803 | else { |
michael@0 | 804 | FlushLine(); |
michael@0 | 805 | } |
michael@0 | 806 | // We won't want to do anything with these in formatted mode either, |
michael@0 | 807 | // so just return now: |
michael@0 | 808 | return NS_OK; |
michael@0 | 809 | } |
michael@0 | 810 | |
michael@0 | 811 | // Keep this in sync with DoOpenContainer! |
michael@0 | 812 | if (!DoOutput()) { |
michael@0 | 813 | return NS_OK; |
michael@0 | 814 | } |
michael@0 | 815 | |
michael@0 | 816 | if (aTag == nsGkAtoms::tr) { |
michael@0 | 817 | PopBool(mHasWrittenCellsForRow); |
michael@0 | 818 | // Should always end a line, but get no more whitespace |
michael@0 | 819 | if (mFloatingLines < 0) |
michael@0 | 820 | mFloatingLines = 0; |
michael@0 | 821 | mLineBreakDue = true; |
michael@0 | 822 | } |
michael@0 | 823 | else if (((aTag == nsGkAtoms::li) || |
michael@0 | 824 | (aTag == nsGkAtoms::dt)) && |
michael@0 | 825 | (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
michael@0 | 826 | // Items that should always end a line, but get no more whitespace |
michael@0 | 827 | if (mFloatingLines < 0) |
michael@0 | 828 | mFloatingLines = 0; |
michael@0 | 829 | mLineBreakDue = true; |
michael@0 | 830 | } |
michael@0 | 831 | else if (aTag == nsGkAtoms::pre) { |
michael@0 | 832 | mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; |
michael@0 | 833 | mLineBreakDue = true; |
michael@0 | 834 | } |
michael@0 | 835 | else if (aTag == nsGkAtoms::ul) { |
michael@0 | 836 | FlushLine(); |
michael@0 | 837 | mIndent -= kIndentSizeList; |
michael@0 | 838 | if (--mULCount + mOLStackIndex == 0) { |
michael@0 | 839 | mFloatingLines = 1; |
michael@0 | 840 | mLineBreakDue = true; |
michael@0 | 841 | } |
michael@0 | 842 | } |
michael@0 | 843 | else if (aTag == nsGkAtoms::ol) { |
michael@0 | 844 | FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. |
michael@0 | 845 | mIndent -= kIndentSizeList; |
michael@0 | 846 | NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); |
michael@0 | 847 | mOLStackIndex--; |
michael@0 | 848 | if (mULCount + mOLStackIndex == 0) { |
michael@0 | 849 | mFloatingLines = 1; |
michael@0 | 850 | mLineBreakDue = true; |
michael@0 | 851 | } |
michael@0 | 852 | } |
michael@0 | 853 | else if (aTag == nsGkAtoms::dl) { |
michael@0 | 854 | mFloatingLines = 1; |
michael@0 | 855 | mLineBreakDue = true; |
michael@0 | 856 | } |
michael@0 | 857 | else if (aTag == nsGkAtoms::dd) { |
michael@0 | 858 | FlushLine(); |
michael@0 | 859 | mIndent -= kIndentSizeDD; |
michael@0 | 860 | } |
michael@0 | 861 | else if (aTag == nsGkAtoms::span) { |
michael@0 | 862 | NS_ASSERTION(mSpanLevel, "Span level will be negative!"); |
michael@0 | 863 | --mSpanLevel; |
michael@0 | 864 | } |
michael@0 | 865 | else if (aTag == nsGkAtoms::div) { |
michael@0 | 866 | if (mFloatingLines < 0) |
michael@0 | 867 | mFloatingLines = 0; |
michael@0 | 868 | mLineBreakDue = true; |
michael@0 | 869 | } |
michael@0 | 870 | else if (aTag == nsGkAtoms::blockquote) { |
michael@0 | 871 | FlushLine(); // Is this needed? |
michael@0 | 872 | |
michael@0 | 873 | // Pop |
michael@0 | 874 | bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); |
michael@0 | 875 | |
michael@0 | 876 | if (isInCiteBlockquote) { |
michael@0 | 877 | NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); |
michael@0 | 878 | mCiteQuoteLevel--; |
michael@0 | 879 | mFloatingLines = 0; |
michael@0 | 880 | mHasWrittenCiteBlockquote = true; |
michael@0 | 881 | } |
michael@0 | 882 | else { |
michael@0 | 883 | mIndent -= kTabSize; |
michael@0 | 884 | mFloatingLines = 1; |
michael@0 | 885 | } |
michael@0 | 886 | mLineBreakDue = true; |
michael@0 | 887 | } |
michael@0 | 888 | else if (aTag == nsGkAtoms::q) { |
michael@0 | 889 | Write(NS_LITERAL_STRING("\"")); |
michael@0 | 890 | } |
michael@0 | 891 | else if (nsContentUtils::IsHTMLBlock(aTag) |
michael@0 | 892 | && aTag != nsGkAtoms::script) { |
michael@0 | 893 | // All other blocks get 1 vertical space after them |
michael@0 | 894 | // in formatted mode, otherwise 0. |
michael@0 | 895 | // This is hard. Sometimes 0 is a better number, but |
michael@0 | 896 | // how to know? |
michael@0 | 897 | if (mFlags & nsIDocumentEncoder::OutputFormatted) |
michael@0 | 898 | EnsureVerticalSpace(1); |
michael@0 | 899 | else { |
michael@0 | 900 | if (mFloatingLines < 0) |
michael@0 | 901 | mFloatingLines = 0; |
michael@0 | 902 | mLineBreakDue = true; |
michael@0 | 903 | } |
michael@0 | 904 | } |
michael@0 | 905 | |
michael@0 | 906 | ////////////////////////////////////////////////////////////// |
michael@0 | 907 | if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { |
michael@0 | 908 | return NS_OK; |
michael@0 | 909 | } |
michael@0 | 910 | ////////////////////////////////////////////////////////////// |
michael@0 | 911 | // The rest of this routine is formatted output stuff, |
michael@0 | 912 | // which we should skip if we're not formatted: |
michael@0 | 913 | ////////////////////////////////////////////////////////////// |
michael@0 | 914 | |
michael@0 | 915 | // Pop the currentConverted stack |
michael@0 | 916 | bool currentNodeIsConverted = IsCurrentNodeConverted(); |
michael@0 | 917 | |
michael@0 | 918 | if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || |
michael@0 | 919 | aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || |
michael@0 | 920 | aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { |
michael@0 | 921 | |
michael@0 | 922 | if (mHeaderStrategy) { /*numbered or indent increasingly*/ |
michael@0 | 923 | mIndent -= kIndentSizeHeaders; |
michael@0 | 924 | } |
michael@0 | 925 | if (mHeaderStrategy == 1 /*indent increasingly*/ ) { |
michael@0 | 926 | for (int32_t i = HeaderLevel(aTag); i > 1; i--) { |
michael@0 | 927 | // for h(x), run x-1 times |
michael@0 | 928 | mIndent -= kIndentIncrementHeaders; |
michael@0 | 929 | } |
michael@0 | 930 | } |
michael@0 | 931 | EnsureVerticalSpace(1); |
michael@0 | 932 | } |
michael@0 | 933 | else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { |
michael@0 | 934 | nsAutoString temp; |
michael@0 | 935 | temp.AssignLiteral(" <"); |
michael@0 | 936 | temp += mURL; |
michael@0 | 937 | temp.Append(char16_t('>')); |
michael@0 | 938 | Write(temp); |
michael@0 | 939 | mURL.Truncate(); |
michael@0 | 940 | } |
michael@0 | 941 | else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) |
michael@0 | 942 | && mStructs && !currentNodeIsConverted) { |
michael@0 | 943 | Write(kSpace); |
michael@0 | 944 | } |
michael@0 | 945 | else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { |
michael@0 | 946 | Write(NS_LITERAL_STRING("|")); |
michael@0 | 947 | } |
michael@0 | 948 | else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) |
michael@0 | 949 | && mStructs && !currentNodeIsConverted) { |
michael@0 | 950 | Write(NS_LITERAL_STRING("*")); |
michael@0 | 951 | } |
michael@0 | 952 | else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) |
michael@0 | 953 | && mStructs && !currentNodeIsConverted) { |
michael@0 | 954 | Write(NS_LITERAL_STRING("/")); |
michael@0 | 955 | } |
michael@0 | 956 | else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { |
michael@0 | 957 | Write(NS_LITERAL_STRING("_")); |
michael@0 | 958 | } |
michael@0 | 959 | |
michael@0 | 960 | return NS_OK; |
michael@0 | 961 | } |
michael@0 | 962 | |
michael@0 | 963 | bool |
michael@0 | 964 | nsPlainTextSerializer::MustSuppressLeaf() |
michael@0 | 965 | { |
michael@0 | 966 | if (mIgnoredChildNodeLevel > 0) { |
michael@0 | 967 | return true; |
michael@0 | 968 | } |
michael@0 | 969 | |
michael@0 | 970 | if ((mTagStackIndex > 1 && |
michael@0 | 971 | mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || |
michael@0 | 972 | (mTagStackIndex > 0 && |
michael@0 | 973 | mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { |
michael@0 | 974 | // Don't output the contents of SELECT elements; |
michael@0 | 975 | // Might be nice, eventually, to output just the selected element. |
michael@0 | 976 | // Read more in bug 31994. |
michael@0 | 977 | return true; |
michael@0 | 978 | } |
michael@0 | 979 | |
michael@0 | 980 | if (mTagStackIndex > 0 && |
michael@0 | 981 | (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || |
michael@0 | 982 | mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { |
michael@0 | 983 | // Don't output the contents of <script> or <style> tags; |
michael@0 | 984 | return true; |
michael@0 | 985 | } |
michael@0 | 986 | |
michael@0 | 987 | return false; |
michael@0 | 988 | } |
michael@0 | 989 | |
michael@0 | 990 | void |
michael@0 | 991 | nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText) |
michael@0 | 992 | { |
michael@0 | 993 | // If we don't want any output, just return |
michael@0 | 994 | if (!DoOutput()) { |
michael@0 | 995 | return; |
michael@0 | 996 | } |
michael@0 | 997 | |
michael@0 | 998 | if (!aIsLineBreak) { |
michael@0 | 999 | // Make sure to reset this, since it's no longer true. |
michael@0 | 1000 | mHasWrittenCiteBlockquote = false; |
michael@0 | 1001 | } |
michael@0 | 1002 | |
michael@0 | 1003 | if (mLineBreakDue) |
michael@0 | 1004 | EnsureVerticalSpace(mFloatingLines); |
michael@0 | 1005 | |
michael@0 | 1006 | if (MustSuppressLeaf()) { |
michael@0 | 1007 | return; |
michael@0 | 1008 | } |
michael@0 | 1009 | |
michael@0 | 1010 | if (aIsLineBreak) { |
michael@0 | 1011 | // The only times we want to pass along whitespace from the original |
michael@0 | 1012 | // html source are if we're forced into preformatted mode via flags, |
michael@0 | 1013 | // or if we're prettyprinting and we're inside a <pre>. |
michael@0 | 1014 | // Otherwise, either we're collapsing to minimal text, or we're |
michael@0 | 1015 | // prettyprinting to mimic the html format, and in neither case |
michael@0 | 1016 | // does the formatting of the html source help us. |
michael@0 | 1017 | if ((mFlags & nsIDocumentEncoder::OutputPreformatted) || |
michael@0 | 1018 | (mPreFormatted && !mWrapColumn) || |
michael@0 | 1019 | IsInPre()) { |
michael@0 | 1020 | EnsureVerticalSpace(mEmptyLines+1); |
michael@0 | 1021 | } |
michael@0 | 1022 | else if (!mInWhitespace) { |
michael@0 | 1023 | Write(kSpace); |
michael@0 | 1024 | mInWhitespace = true; |
michael@0 | 1025 | } |
michael@0 | 1026 | return; |
michael@0 | 1027 | } |
michael@0 | 1028 | |
michael@0 | 1029 | /* Check, if we are in a link (symbolized with mURL containing the URL) |
michael@0 | 1030 | and the text is equal to the URL. In that case we don't want to output |
michael@0 | 1031 | the URL twice so we scrap the text in mURL. */ |
michael@0 | 1032 | if (!mURL.IsEmpty() && mURL.Equals(aText)) { |
michael@0 | 1033 | mURL.Truncate(); |
michael@0 | 1034 | } |
michael@0 | 1035 | Write(aText); |
michael@0 | 1036 | } |
michael@0 | 1037 | |
michael@0 | 1038 | nsresult |
michael@0 | 1039 | nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag) |
michael@0 | 1040 | { |
michael@0 | 1041 | // If we don't want any output, just return |
michael@0 | 1042 | if (!DoOutput()) { |
michael@0 | 1043 | return NS_OK; |
michael@0 | 1044 | } |
michael@0 | 1045 | |
michael@0 | 1046 | if (mLineBreakDue) |
michael@0 | 1047 | EnsureVerticalSpace(mFloatingLines); |
michael@0 | 1048 | |
michael@0 | 1049 | if (MustSuppressLeaf()) { |
michael@0 | 1050 | return NS_OK; |
michael@0 | 1051 | } |
michael@0 | 1052 | |
michael@0 | 1053 | if (aTag == nsGkAtoms::br) { |
michael@0 | 1054 | // Another egregious editor workaround, see bug 38194: |
michael@0 | 1055 | // ignore the bogus br tags that the editor sticks here and there. |
michael@0 | 1056 | nsAutoString tagAttr; |
michael@0 | 1057 | if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr)) |
michael@0 | 1058 | || !tagAttr.EqualsLiteral("_moz")) { |
michael@0 | 1059 | EnsureVerticalSpace(mEmptyLines+1); |
michael@0 | 1060 | } |
michael@0 | 1061 | } |
michael@0 | 1062 | else if (aTag == nsGkAtoms::hr && |
michael@0 | 1063 | (mFlags & nsIDocumentEncoder::OutputFormatted)) { |
michael@0 | 1064 | EnsureVerticalSpace(0); |
michael@0 | 1065 | |
michael@0 | 1066 | // Make a line of dashes as wide as the wrap width |
michael@0 | 1067 | // XXX honoring percentage would be nice |
michael@0 | 1068 | nsAutoString line; |
michael@0 | 1069 | uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25); |
michael@0 | 1070 | while (line.Length() < width) { |
michael@0 | 1071 | line.Append(char16_t('-')); |
michael@0 | 1072 | } |
michael@0 | 1073 | Write(line); |
michael@0 | 1074 | |
michael@0 | 1075 | EnsureVerticalSpace(0); |
michael@0 | 1076 | } |
michael@0 | 1077 | else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) { |
michael@0 | 1078 | Write(NS_LITERAL_STRING("\xFFFC")); |
michael@0 | 1079 | } |
michael@0 | 1080 | else if (aTag == nsGkAtoms::img) { |
michael@0 | 1081 | /* Output (in decreasing order of preference) |
michael@0 | 1082 | alt, title or nothing */ |
michael@0 | 1083 | // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG> |
michael@0 | 1084 | nsAutoString imageDescription; |
michael@0 | 1085 | if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, |
michael@0 | 1086 | imageDescription))) { |
michael@0 | 1087 | // If the alt attribute has an empty value (|alt=""|), output nothing |
michael@0 | 1088 | } |
michael@0 | 1089 | else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title, |
michael@0 | 1090 | imageDescription)) |
michael@0 | 1091 | && !imageDescription.IsEmpty()) { |
michael@0 | 1092 | imageDescription = NS_LITERAL_STRING(" [") + |
michael@0 | 1093 | imageDescription + |
michael@0 | 1094 | NS_LITERAL_STRING("] "); |
michael@0 | 1095 | } |
michael@0 | 1096 | |
michael@0 | 1097 | Write(imageDescription); |
michael@0 | 1098 | } |
michael@0 | 1099 | |
michael@0 | 1100 | return NS_OK; |
michael@0 | 1101 | } |
michael@0 | 1102 | |
michael@0 | 1103 | /** |
michael@0 | 1104 | * Adds as many newline as necessary to get |noOfRows| empty lines |
michael@0 | 1105 | * |
michael@0 | 1106 | * noOfRows = -1 : Being in the middle of some line of text |
michael@0 | 1107 | * noOfRows = 0 : Being at the start of a line |
michael@0 | 1108 | * noOfRows = n>0 : Having n empty lines before the current line. |
michael@0 | 1109 | */ |
michael@0 | 1110 | void |
michael@0 | 1111 | nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows) |
michael@0 | 1112 | { |
michael@0 | 1113 | // If we have something in the indent we probably want to output |
michael@0 | 1114 | // it and it's not included in the count for empty lines so we don't |
michael@0 | 1115 | // realize that we should start a new line. |
michael@0 | 1116 | if (noOfRows >= 0 && !mInIndentString.IsEmpty()) { |
michael@0 | 1117 | EndLine(false); |
michael@0 | 1118 | mInWhitespace = true; |
michael@0 | 1119 | } |
michael@0 | 1120 | |
michael@0 | 1121 | while(mEmptyLines < noOfRows) { |
michael@0 | 1122 | EndLine(false); |
michael@0 | 1123 | mInWhitespace = true; |
michael@0 | 1124 | } |
michael@0 | 1125 | mLineBreakDue = false; |
michael@0 | 1126 | mFloatingLines = -1; |
michael@0 | 1127 | } |
michael@0 | 1128 | |
michael@0 | 1129 | /** |
michael@0 | 1130 | * This empties the current line cache without adding a NEWLINE. |
michael@0 | 1131 | * Should not be used if line wrapping is of importance since |
michael@0 | 1132 | * this function destroys the cache information. |
michael@0 | 1133 | * |
michael@0 | 1134 | * It will also write indentation and quotes if we believe us to be |
michael@0 | 1135 | * at the start of the line. |
michael@0 | 1136 | */ |
michael@0 | 1137 | void |
michael@0 | 1138 | nsPlainTextSerializer::FlushLine() |
michael@0 | 1139 | { |
michael@0 | 1140 | if (!mCurrentLine.IsEmpty()) { |
michael@0 | 1141 | if (mAtFirstColumn) { |
michael@0 | 1142 | OutputQuotesAndIndent(); // XXX: Should we always do this? Bug? |
michael@0 | 1143 | } |
michael@0 | 1144 | |
michael@0 | 1145 | Output(mCurrentLine); |
michael@0 | 1146 | mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty(); |
michael@0 | 1147 | mCurrentLine.Truncate(); |
michael@0 | 1148 | mCurrentLineWidth = 0; |
michael@0 | 1149 | } |
michael@0 | 1150 | } |
michael@0 | 1151 | |
michael@0 | 1152 | /** |
michael@0 | 1153 | * Prints the text to output to our current output device (the string mOutputString). |
michael@0 | 1154 | * The only logic here is to replace non breaking spaces with a normal space since |
michael@0 | 1155 | * most (all?) receivers of the result won't understand the nbsp and even be |
michael@0 | 1156 | * confused by it. |
michael@0 | 1157 | */ |
michael@0 | 1158 | void |
michael@0 | 1159 | nsPlainTextSerializer::Output(nsString& aString) |
michael@0 | 1160 | { |
michael@0 | 1161 | if (!aString.IsEmpty()) { |
michael@0 | 1162 | mStartedOutput = true; |
michael@0 | 1163 | } |
michael@0 | 1164 | |
michael@0 | 1165 | if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) { |
michael@0 | 1166 | // First, replace all nbsp characters with spaces, |
michael@0 | 1167 | // which the unicode encoder won't do for us. |
michael@0 | 1168 | aString.ReplaceChar(kNBSP, kSPACE); |
michael@0 | 1169 | } |
michael@0 | 1170 | mOutputString->Append(aString); |
michael@0 | 1171 | } |
michael@0 | 1172 | |
michael@0 | 1173 | static bool |
michael@0 | 1174 | IsSpaceStuffable(const char16_t *s) |
michael@0 | 1175 | { |
michael@0 | 1176 | if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP || |
michael@0 | 1177 | nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0) |
michael@0 | 1178 | return true; |
michael@0 | 1179 | else |
michael@0 | 1180 | return false; |
michael@0 | 1181 | } |
michael@0 | 1182 | |
michael@0 | 1183 | /** |
michael@0 | 1184 | * This function adds a piece of text to the current stored line. If we are |
michael@0 | 1185 | * wrapping text and the stored line will become too long, a suitable |
michael@0 | 1186 | * location to wrap will be found and the line that's complete will be |
michael@0 | 1187 | * output. |
michael@0 | 1188 | */ |
michael@0 | 1189 | void |
michael@0 | 1190 | nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, |
michael@0 | 1191 | int32_t aLineFragmentLength) |
michael@0 | 1192 | { |
michael@0 | 1193 | uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent; |
michael@0 | 1194 | |
michael@0 | 1195 | if (mLineBreakDue) |
michael@0 | 1196 | EnsureVerticalSpace(mFloatingLines); |
michael@0 | 1197 | |
michael@0 | 1198 | int32_t linelength = mCurrentLine.Length(); |
michael@0 | 1199 | if (0 == linelength) { |
michael@0 | 1200 | if (0 == aLineFragmentLength) { |
michael@0 | 1201 | // Nothing at all. Are you kidding me? |
michael@0 | 1202 | return; |
michael@0 | 1203 | } |
michael@0 | 1204 | |
michael@0 | 1205 | if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
michael@0 | 1206 | if (IsSpaceStuffable(aLineFragment) |
michael@0 | 1207 | && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
michael@0 | 1208 | ) |
michael@0 | 1209 | { |
michael@0 | 1210 | // Space stuffing a la RFC 2646 (format=flowed). |
michael@0 | 1211 | mCurrentLine.Append(char16_t(' ')); |
michael@0 | 1212 | |
michael@0 | 1213 | if (MayWrap()) { |
michael@0 | 1214 | mCurrentLineWidth += GetUnicharWidth(' '); |
michael@0 | 1215 | #ifdef DEBUG_wrapping |
michael@0 | 1216 | NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(), |
michael@0 | 1217 | mCurrentLine.Length()) == |
michael@0 | 1218 | (int32_t)mCurrentLineWidth, |
michael@0 | 1219 | "mCurrentLineWidth and reality out of sync!"); |
michael@0 | 1220 | #endif |
michael@0 | 1221 | } |
michael@0 | 1222 | } |
michael@0 | 1223 | } |
michael@0 | 1224 | mEmptyLines=-1; |
michael@0 | 1225 | } |
michael@0 | 1226 | |
michael@0 | 1227 | mCurrentLine.Append(aLineFragment, aLineFragmentLength); |
michael@0 | 1228 | if (MayWrap()) { |
michael@0 | 1229 | mCurrentLineWidth += GetUnicharStringWidth(aLineFragment, |
michael@0 | 1230 | aLineFragmentLength); |
michael@0 | 1231 | #ifdef DEBUG_wrapping |
michael@0 | 1232 | NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
michael@0 | 1233 | mCurrentLine.Length()) == |
michael@0 | 1234 | (int32_t)mCurrentLineWidth, |
michael@0 | 1235 | "mCurrentLineWidth and reality out of sync!"); |
michael@0 | 1236 | #endif |
michael@0 | 1237 | } |
michael@0 | 1238 | |
michael@0 | 1239 | linelength = mCurrentLine.Length(); |
michael@0 | 1240 | |
michael@0 | 1241 | // Wrap? |
michael@0 | 1242 | if (MayWrap()) |
michael@0 | 1243 | { |
michael@0 | 1244 | #ifdef DEBUG_wrapping |
michael@0 | 1245 | NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(), |
michael@0 | 1246 | mCurrentLine.Length()) == |
michael@0 | 1247 | (int32_t)mCurrentLineWidth, |
michael@0 | 1248 | "mCurrentLineWidth and reality out of sync!"); |
michael@0 | 1249 | #endif |
michael@0 | 1250 | // Yes, wrap! |
michael@0 | 1251 | // The "+4" is to avoid wrap lines that only would be a couple |
michael@0 | 1252 | // of letters too long. We give this bonus only if the |
michael@0 | 1253 | // wrapcolumn is more than 20. |
michael@0 | 1254 | uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0; |
michael@0 | 1255 | |
michael@0 | 1256 | // XXX: Should calculate prefixwidth with GetUnicharStringWidth |
michael@0 | 1257 | while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) { |
michael@0 | 1258 | // We go from the end removing one letter at a time until |
michael@0 | 1259 | // we have a reasonable width |
michael@0 | 1260 | int32_t goodSpace = mCurrentLine.Length(); |
michael@0 | 1261 | uint32_t width = mCurrentLineWidth; |
michael@0 | 1262 | while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) { |
michael@0 | 1263 | goodSpace--; |
michael@0 | 1264 | width -= GetUnicharWidth(mCurrentLine[goodSpace]); |
michael@0 | 1265 | } |
michael@0 | 1266 | |
michael@0 | 1267 | goodSpace++; |
michael@0 | 1268 | |
michael@0 | 1269 | if (mLineBreaker) { |
michael@0 | 1270 | goodSpace = mLineBreaker->Prev(mCurrentLine.get(), |
michael@0 | 1271 | mCurrentLine.Length(), goodSpace); |
michael@0 | 1272 | if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT && |
michael@0 | 1273 | nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) { |
michael@0 | 1274 | --goodSpace; // adjust the position since line breaker returns a position next to space |
michael@0 | 1275 | } |
michael@0 | 1276 | } |
michael@0 | 1277 | // fallback if the line breaker is unavailable or failed |
michael@0 | 1278 | if (!mLineBreaker) { |
michael@0 | 1279 | goodSpace = mWrapColumn-prefixwidth; |
michael@0 | 1280 | while (goodSpace >= 0 && |
michael@0 | 1281 | !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
michael@0 | 1282 | goodSpace--; |
michael@0 | 1283 | } |
michael@0 | 1284 | } |
michael@0 | 1285 | |
michael@0 | 1286 | nsAutoString restOfLine; |
michael@0 | 1287 | if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) { |
michael@0 | 1288 | // If we don't found a good place to break, accept long line and |
michael@0 | 1289 | // try to find another place to break |
michael@0 | 1290 | goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1; |
michael@0 | 1291 | if (mLineBreaker) { |
michael@0 | 1292 | if ((uint32_t)goodSpace < mCurrentLine.Length()) |
michael@0 | 1293 | goodSpace = mLineBreaker->Next(mCurrentLine.get(), |
michael@0 | 1294 | mCurrentLine.Length(), goodSpace); |
michael@0 | 1295 | if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) |
michael@0 | 1296 | goodSpace = mCurrentLine.Length(); |
michael@0 | 1297 | } |
michael@0 | 1298 | // fallback if the line breaker is unavailable or failed |
michael@0 | 1299 | if (!mLineBreaker) { |
michael@0 | 1300 | goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth; |
michael@0 | 1301 | while (goodSpace < linelength && |
michael@0 | 1302 | !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
michael@0 | 1303 | goodSpace++; |
michael@0 | 1304 | } |
michael@0 | 1305 | } |
michael@0 | 1306 | } |
michael@0 | 1307 | |
michael@0 | 1308 | if ((goodSpace < linelength) && (goodSpace > 0)) { |
michael@0 | 1309 | // Found a place to break |
michael@0 | 1310 | |
michael@0 | 1311 | // -1 (trim a char at the break position) |
michael@0 | 1312 | // only if the line break was a space. |
michael@0 | 1313 | if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) { |
michael@0 | 1314 | mCurrentLine.Right(restOfLine, linelength-goodSpace-1); |
michael@0 | 1315 | } |
michael@0 | 1316 | else { |
michael@0 | 1317 | mCurrentLine.Right(restOfLine, linelength-goodSpace); |
michael@0 | 1318 | } |
michael@0 | 1319 | // if breaker was U+0020, it has to consider for delsp=yes support |
michael@0 | 1320 | bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' '; |
michael@0 | 1321 | mCurrentLine.Truncate(goodSpace); |
michael@0 | 1322 | EndLine(true, breakBySpace); |
michael@0 | 1323 | mCurrentLine.Truncate(); |
michael@0 | 1324 | // Space stuff new line? |
michael@0 | 1325 | if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
michael@0 | 1326 | if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get()) |
michael@0 | 1327 | && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway |
michael@0 | 1328 | ) |
michael@0 | 1329 | { |
michael@0 | 1330 | // Space stuffing a la RFC 2646 (format=flowed). |
michael@0 | 1331 | mCurrentLine.Append(char16_t(' ')); |
michael@0 | 1332 | //XXX doesn't seem to work correctly for ' ' |
michael@0 | 1333 | } |
michael@0 | 1334 | } |
michael@0 | 1335 | mCurrentLine.Append(restOfLine); |
michael@0 | 1336 | mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(), |
michael@0 | 1337 | mCurrentLine.Length()); |
michael@0 | 1338 | linelength = mCurrentLine.Length(); |
michael@0 | 1339 | mEmptyLines = -1; |
michael@0 | 1340 | } |
michael@0 | 1341 | else { |
michael@0 | 1342 | // Nothing to do. Hopefully we get more data later |
michael@0 | 1343 | // to use for a place to break line |
michael@0 | 1344 | break; |
michael@0 | 1345 | } |
michael@0 | 1346 | } |
michael@0 | 1347 | } |
michael@0 | 1348 | else { |
michael@0 | 1349 | // No wrapping. |
michael@0 | 1350 | } |
michael@0 | 1351 | } |
michael@0 | 1352 | |
michael@0 | 1353 | /** |
michael@0 | 1354 | * Outputs the contents of mCurrentLine, and resets line specific |
michael@0 | 1355 | * variables. Also adds an indentation and prefix if there is |
michael@0 | 1356 | * one specified. Strips ending spaces from the line if it isn't |
michael@0 | 1357 | * preformatted. |
michael@0 | 1358 | */ |
michael@0 | 1359 | void |
michael@0 | 1360 | nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace) |
michael@0 | 1361 | { |
michael@0 | 1362 | uint32_t currentlinelength = mCurrentLine.Length(); |
michael@0 | 1363 | |
michael@0 | 1364 | if (aSoftlinebreak && 0 == currentlinelength) { |
michael@0 | 1365 | // No meaning |
michael@0 | 1366 | return; |
michael@0 | 1367 | } |
michael@0 | 1368 | |
michael@0 | 1369 | /* In non-preformatted mode, remove spaces from the end of the line for |
michael@0 | 1370 | * format=flowed compatibility. Don't do this for these special cases: |
michael@0 | 1371 | * "-- ", the signature separator (RFC 2646) shouldn't be touched and |
michael@0 | 1372 | * "- -- ", the OpenPGP dash-escaped signature separator in inline |
michael@0 | 1373 | * signed messages according to the OpenPGP standard (RFC 2440). |
michael@0 | 1374 | */ |
michael@0 | 1375 | if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) && |
michael@0 | 1376 | !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) && |
michael@0 | 1377 | (aSoftlinebreak || |
michael@0 | 1378 | !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) { |
michael@0 | 1379 | // Remove spaces from the end of the line. |
michael@0 | 1380 | while(currentlinelength > 0 && |
michael@0 | 1381 | mCurrentLine[currentlinelength-1] == ' ') { |
michael@0 | 1382 | --currentlinelength; |
michael@0 | 1383 | } |
michael@0 | 1384 | mCurrentLine.SetLength(currentlinelength); |
michael@0 | 1385 | } |
michael@0 | 1386 | |
michael@0 | 1387 | if (aSoftlinebreak && |
michael@0 | 1388 | (mFlags & nsIDocumentEncoder::OutputFormatFlowed) && |
michael@0 | 1389 | (mIndent == 0)) { |
michael@0 | 1390 | // Add the soft part of the soft linebreak (RFC 2646 4.1) |
michael@0 | 1391 | // We only do this when there is no indentation since format=flowed |
michael@0 | 1392 | // lines and indentation doesn't work well together. |
michael@0 | 1393 | |
michael@0 | 1394 | // If breaker character is ASCII space with RFC 3676 support (delsp=yes), |
michael@0 | 1395 | // add twice space. |
michael@0 | 1396 | if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace) |
michael@0 | 1397 | mCurrentLine.Append(NS_LITERAL_STRING(" ")); |
michael@0 | 1398 | else |
michael@0 | 1399 | mCurrentLine.Append(char16_t(' ')); |
michael@0 | 1400 | } |
michael@0 | 1401 | |
michael@0 | 1402 | if (aSoftlinebreak) { |
michael@0 | 1403 | mEmptyLines=0; |
michael@0 | 1404 | } |
michael@0 | 1405 | else { |
michael@0 | 1406 | // Hard break |
michael@0 | 1407 | if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) { |
michael@0 | 1408 | mEmptyLines=-1; |
michael@0 | 1409 | } |
michael@0 | 1410 | |
michael@0 | 1411 | mEmptyLines++; |
michael@0 | 1412 | } |
michael@0 | 1413 | |
michael@0 | 1414 | if (mAtFirstColumn) { |
michael@0 | 1415 | // If we don't have anything "real" to output we have to |
michael@0 | 1416 | // make sure the indent doesn't end in a space since that |
michael@0 | 1417 | // would trick a format=flowed-aware receiver. |
michael@0 | 1418 | bool stripTrailingSpaces = mCurrentLine.IsEmpty(); |
michael@0 | 1419 | OutputQuotesAndIndent(stripTrailingSpaces); |
michael@0 | 1420 | } |
michael@0 | 1421 | |
michael@0 | 1422 | mCurrentLine.Append(mLineBreak); |
michael@0 | 1423 | Output(mCurrentLine); |
michael@0 | 1424 | mCurrentLine.Truncate(); |
michael@0 | 1425 | mCurrentLineWidth = 0; |
michael@0 | 1426 | mAtFirstColumn=true; |
michael@0 | 1427 | mInWhitespace=true; |
michael@0 | 1428 | mLineBreakDue = false; |
michael@0 | 1429 | mFloatingLines = -1; |
michael@0 | 1430 | } |
michael@0 | 1431 | |
michael@0 | 1432 | |
michael@0 | 1433 | /** |
michael@0 | 1434 | * Outputs the calculated and stored indent and text in the indentation. That is |
michael@0 | 1435 | * quote chars and numbers for numbered lists and such. It will also reset any |
michael@0 | 1436 | * stored text to put in the indentation after using it. |
michael@0 | 1437 | */ |
michael@0 | 1438 | void |
michael@0 | 1439 | nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */) |
michael@0 | 1440 | { |
michael@0 | 1441 | nsAutoString stringToOutput; |
michael@0 | 1442 | |
michael@0 | 1443 | // Put the mail quote "> " chars in, if appropriate: |
michael@0 | 1444 | if (mCiteQuoteLevel > 0) { |
michael@0 | 1445 | nsAutoString quotes; |
michael@0 | 1446 | for(int i=0; i < mCiteQuoteLevel; i++) { |
michael@0 | 1447 | quotes.Append(char16_t('>')); |
michael@0 | 1448 | } |
michael@0 | 1449 | if (!mCurrentLine.IsEmpty()) { |
michael@0 | 1450 | /* Better don't output a space here, if the line is empty, |
michael@0 | 1451 | in case a receiving f=f-aware UA thinks, this were a flowed line, |
michael@0 | 1452 | which it isn't - it's just empty. |
michael@0 | 1453 | (Flowed lines may be joined with the following one, |
michael@0 | 1454 | so the empty line may be lost completely.) */ |
michael@0 | 1455 | quotes.Append(char16_t(' ')); |
michael@0 | 1456 | } |
michael@0 | 1457 | stringToOutput = quotes; |
michael@0 | 1458 | mAtFirstColumn = false; |
michael@0 | 1459 | } |
michael@0 | 1460 | |
michael@0 | 1461 | // Indent if necessary |
michael@0 | 1462 | int32_t indentwidth = mIndent - mInIndentString.Length(); |
michael@0 | 1463 | if (indentwidth > 0 |
michael@0 | 1464 | && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) |
michael@0 | 1465 | // Don't make empty lines look flowed |
michael@0 | 1466 | ) { |
michael@0 | 1467 | nsAutoString spaces; |
michael@0 | 1468 | for (int i=0; i < indentwidth; ++i) |
michael@0 | 1469 | spaces.Append(char16_t(' ')); |
michael@0 | 1470 | stringToOutput += spaces; |
michael@0 | 1471 | mAtFirstColumn = false; |
michael@0 | 1472 | } |
michael@0 | 1473 | |
michael@0 | 1474 | if (!mInIndentString.IsEmpty()) { |
michael@0 | 1475 | stringToOutput += mInIndentString; |
michael@0 | 1476 | mAtFirstColumn = false; |
michael@0 | 1477 | mInIndentString.Truncate(); |
michael@0 | 1478 | } |
michael@0 | 1479 | |
michael@0 | 1480 | if (stripTrailingSpaces) { |
michael@0 | 1481 | int32_t lineLength = stringToOutput.Length(); |
michael@0 | 1482 | while(lineLength > 0 && |
michael@0 | 1483 | ' ' == stringToOutput[lineLength-1]) { |
michael@0 | 1484 | --lineLength; |
michael@0 | 1485 | } |
michael@0 | 1486 | stringToOutput.SetLength(lineLength); |
michael@0 | 1487 | } |
michael@0 | 1488 | |
michael@0 | 1489 | if (!stringToOutput.IsEmpty()) { |
michael@0 | 1490 | Output(stringToOutput); |
michael@0 | 1491 | } |
michael@0 | 1492 | |
michael@0 | 1493 | } |
michael@0 | 1494 | |
michael@0 | 1495 | /** |
michael@0 | 1496 | * Write a string. This is the highlevel function to use to get text output. |
michael@0 | 1497 | * By using AddToLine, Output, EndLine and other functions it handles quotation, |
michael@0 | 1498 | * line wrapping, indentation, whitespace compression and other things. |
michael@0 | 1499 | */ |
michael@0 | 1500 | void |
michael@0 | 1501 | nsPlainTextSerializer::Write(const nsAString& aStr) |
michael@0 | 1502 | { |
michael@0 | 1503 | // XXX Copy necessary to use nsString methods and gain |
michael@0 | 1504 | // access to underlying buffer |
michael@0 | 1505 | nsAutoString str(aStr); |
michael@0 | 1506 | |
michael@0 | 1507 | #ifdef DEBUG_wrapping |
michael@0 | 1508 | printf("Write(%s): wrap col = %d\n", |
michael@0 | 1509 | NS_ConvertUTF16toUTF8(str).get(), mWrapColumn); |
michael@0 | 1510 | #endif |
michael@0 | 1511 | |
michael@0 | 1512 | int32_t bol = 0; |
michael@0 | 1513 | int32_t newline; |
michael@0 | 1514 | |
michael@0 | 1515 | int32_t totLen = str.Length(); |
michael@0 | 1516 | |
michael@0 | 1517 | // If the string is empty, do nothing: |
michael@0 | 1518 | if (totLen <= 0) return; |
michael@0 | 1519 | |
michael@0 | 1520 | // For Flowed text change nbsp-ses to spaces at end of lines to allow them |
michael@0 | 1521 | // to be cut off along with usual spaces if required. (bug #125928) |
michael@0 | 1522 | if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
michael@0 | 1523 | for (int32_t i = totLen-1; i >= 0; i--) { |
michael@0 | 1524 | char16_t c = str[i]; |
michael@0 | 1525 | if ('\n' == c || '\r' == c || ' ' == c || '\t' == c) |
michael@0 | 1526 | continue; |
michael@0 | 1527 | if (kNBSP == c) |
michael@0 | 1528 | str.Replace(i, 1, ' '); |
michael@0 | 1529 | else |
michael@0 | 1530 | break; |
michael@0 | 1531 | } |
michael@0 | 1532 | } |
michael@0 | 1533 | |
michael@0 | 1534 | // We have two major codepaths here. One that does preformatted text and one |
michael@0 | 1535 | // that does normal formatted text. The one for preformatted text calls |
michael@0 | 1536 | // Output directly while the other code path goes through AddToLine. |
michael@0 | 1537 | if ((mPreFormatted && !mWrapColumn) || IsInPre() |
michael@0 | 1538 | || ((mSpanLevel > 0 || mDontWrapAnyQuotes) |
michael@0 | 1539 | && mEmptyLines >= 0 && str.First() == char16_t('>'))) { |
michael@0 | 1540 | // No intelligent wrapping. |
michael@0 | 1541 | |
michael@0 | 1542 | // This mustn't be mixed with intelligent wrapping without clearing |
michael@0 | 1543 | // the mCurrentLine buffer before!!! |
michael@0 | 1544 | NS_ASSERTION(mCurrentLine.IsEmpty(), |
michael@0 | 1545 | "Mixed wrapping data and nonwrapping data on the same line"); |
michael@0 | 1546 | if (!mCurrentLine.IsEmpty()) { |
michael@0 | 1547 | FlushLine(); |
michael@0 | 1548 | } |
michael@0 | 1549 | |
michael@0 | 1550 | // Put the mail quote "> " chars in, if appropriate. |
michael@0 | 1551 | // Have to put it in before every line. |
michael@0 | 1552 | while(bol<totLen) { |
michael@0 | 1553 | bool outputQuotes = mAtFirstColumn; |
michael@0 | 1554 | bool atFirstColumn = mAtFirstColumn; |
michael@0 | 1555 | bool outputLineBreak = false; |
michael@0 | 1556 | bool spacesOnly = true; |
michael@0 | 1557 | |
michael@0 | 1558 | // Find one of '\n' or '\r' using iterators since nsAString |
michael@0 | 1559 | // doesn't have the old FindCharInSet function. |
michael@0 | 1560 | nsAString::const_iterator iter; str.BeginReading(iter); |
michael@0 | 1561 | nsAString::const_iterator done_searching; str.EndReading(done_searching); |
michael@0 | 1562 | iter.advance(bol); |
michael@0 | 1563 | int32_t new_newline = bol; |
michael@0 | 1564 | newline = kNotFound; |
michael@0 | 1565 | while(iter != done_searching) { |
michael@0 | 1566 | if ('\n' == *iter || '\r' == *iter) { |
michael@0 | 1567 | newline = new_newline; |
michael@0 | 1568 | break; |
michael@0 | 1569 | } |
michael@0 | 1570 | if (' ' != *iter) |
michael@0 | 1571 | spacesOnly = false; |
michael@0 | 1572 | ++new_newline; |
michael@0 | 1573 | ++iter; |
michael@0 | 1574 | } |
michael@0 | 1575 | |
michael@0 | 1576 | // Done searching |
michael@0 | 1577 | nsAutoString stringpart; |
michael@0 | 1578 | if (newline == kNotFound) { |
michael@0 | 1579 | // No new lines. |
michael@0 | 1580 | stringpart.Assign(Substring(str, bol, totLen - bol)); |
michael@0 | 1581 | if (!stringpart.IsEmpty()) { |
michael@0 | 1582 | char16_t lastchar = stringpart[stringpart.Length()-1]; |
michael@0 | 1583 | if ((lastchar == '\t') || (lastchar == ' ') || |
michael@0 | 1584 | (lastchar == '\r') ||(lastchar == '\n')) { |
michael@0 | 1585 | mInWhitespace = true; |
michael@0 | 1586 | } |
michael@0 | 1587 | else { |
michael@0 | 1588 | mInWhitespace = false; |
michael@0 | 1589 | } |
michael@0 | 1590 | } |
michael@0 | 1591 | mEmptyLines=-1; |
michael@0 | 1592 | atFirstColumn = mAtFirstColumn && (totLen-bol)==0; |
michael@0 | 1593 | bol = totLen; |
michael@0 | 1594 | } |
michael@0 | 1595 | else { |
michael@0 | 1596 | // There is a newline |
michael@0 | 1597 | stringpart.Assign(Substring(str, bol, newline-bol)); |
michael@0 | 1598 | mInWhitespace = true; |
michael@0 | 1599 | outputLineBreak = true; |
michael@0 | 1600 | mEmptyLines=0; |
michael@0 | 1601 | atFirstColumn = true; |
michael@0 | 1602 | bol = newline+1; |
michael@0 | 1603 | if ('\r' == *iter && bol < totLen && '\n' == *++iter) { |
michael@0 | 1604 | // There was a CRLF in the input. This used to be illegal and |
michael@0 | 1605 | // stripped by the parser. Apparently not anymore. Let's skip |
michael@0 | 1606 | // over the LF. |
michael@0 | 1607 | bol++; |
michael@0 | 1608 | } |
michael@0 | 1609 | } |
michael@0 | 1610 | |
michael@0 | 1611 | mCurrentLine.AssignLiteral(""); |
michael@0 | 1612 | if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) { |
michael@0 | 1613 | if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928 |
michael@0 | 1614 | !stringpart.EqualsLiteral("-- ") && |
michael@0 | 1615 | !stringpart.EqualsLiteral("- -- ")) |
michael@0 | 1616 | stringpart.Trim(" ", false, true, true); |
michael@0 | 1617 | if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>') |
michael@0 | 1618 | mCurrentLine.Append(char16_t(' ')); |
michael@0 | 1619 | } |
michael@0 | 1620 | mCurrentLine.Append(stringpart); |
michael@0 | 1621 | |
michael@0 | 1622 | if (outputQuotes) { |
michael@0 | 1623 | // Note: this call messes with mAtFirstColumn |
michael@0 | 1624 | OutputQuotesAndIndent(); |
michael@0 | 1625 | } |
michael@0 | 1626 | |
michael@0 | 1627 | Output(mCurrentLine); |
michael@0 | 1628 | if (outputLineBreak) { |
michael@0 | 1629 | Output(mLineBreak); |
michael@0 | 1630 | } |
michael@0 | 1631 | mAtFirstColumn = atFirstColumn; |
michael@0 | 1632 | } |
michael@0 | 1633 | |
michael@0 | 1634 | // Reset mCurrentLine. |
michael@0 | 1635 | mCurrentLine.Truncate(); |
michael@0 | 1636 | |
michael@0 | 1637 | #ifdef DEBUG_wrapping |
michael@0 | 1638 | printf("No wrapping: newline is %d, totLen is %d\n", |
michael@0 | 1639 | newline, totLen); |
michael@0 | 1640 | #endif |
michael@0 | 1641 | return; |
michael@0 | 1642 | } |
michael@0 | 1643 | |
michael@0 | 1644 | // Intelligent handling of text |
michael@0 | 1645 | // If needed, strip out all "end of lines" |
michael@0 | 1646 | // and multiple whitespace between words |
michael@0 | 1647 | int32_t nextpos; |
michael@0 | 1648 | const char16_t * offsetIntoBuffer = nullptr; |
michael@0 | 1649 | |
michael@0 | 1650 | while (bol < totLen) { // Loop over lines |
michael@0 | 1651 | // Find a place where we may have to do whitespace compression |
michael@0 | 1652 | nextpos = str.FindCharInSet(" \t\n\r", bol); |
michael@0 | 1653 | #ifdef DEBUG_wrapping |
michael@0 | 1654 | nsAutoString remaining; |
michael@0 | 1655 | str.Right(remaining, totLen - bol); |
michael@0 | 1656 | foo = ToNewCString(remaining); |
michael@0 | 1657 | // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n", |
michael@0 | 1658 | // bol, nextpos, totLen, foo); |
michael@0 | 1659 | nsMemory::Free(foo); |
michael@0 | 1660 | #endif |
michael@0 | 1661 | |
michael@0 | 1662 | if (nextpos == kNotFound) { |
michael@0 | 1663 | // The rest of the string |
michael@0 | 1664 | offsetIntoBuffer = str.get() + bol; |
michael@0 | 1665 | AddToLine(offsetIntoBuffer, totLen-bol); |
michael@0 | 1666 | bol=totLen; |
michael@0 | 1667 | mInWhitespace=false; |
michael@0 | 1668 | } |
michael@0 | 1669 | else { |
michael@0 | 1670 | // There's still whitespace left in the string |
michael@0 | 1671 | if (nextpos != 0 && (nextpos + 1) < totLen) { |
michael@0 | 1672 | offsetIntoBuffer = str.get() + nextpos; |
michael@0 | 1673 | // skip '\n' if it is between CJ chars |
michael@0 | 1674 | if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) { |
michael@0 | 1675 | offsetIntoBuffer = str.get() + bol; |
michael@0 | 1676 | AddToLine(offsetIntoBuffer, nextpos-bol); |
michael@0 | 1677 | bol = nextpos + 1; |
michael@0 | 1678 | continue; |
michael@0 | 1679 | } |
michael@0 | 1680 | } |
michael@0 | 1681 | // If we're already in whitespace and not preformatted, just skip it: |
michael@0 | 1682 | if (mInWhitespace && (nextpos == bol) && !mPreFormatted && |
michael@0 | 1683 | !(mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
michael@0 | 1684 | // Skip whitespace |
michael@0 | 1685 | bol++; |
michael@0 | 1686 | continue; |
michael@0 | 1687 | } |
michael@0 | 1688 | |
michael@0 | 1689 | if (nextpos == bol) { |
michael@0 | 1690 | // Note that we are in whitespace. |
michael@0 | 1691 | mInWhitespace = true; |
michael@0 | 1692 | offsetIntoBuffer = str.get() + nextpos; |
michael@0 | 1693 | AddToLine(offsetIntoBuffer, 1); |
michael@0 | 1694 | bol++; |
michael@0 | 1695 | continue; |
michael@0 | 1696 | } |
michael@0 | 1697 | |
michael@0 | 1698 | mInWhitespace = true; |
michael@0 | 1699 | |
michael@0 | 1700 | offsetIntoBuffer = str.get() + bol; |
michael@0 | 1701 | if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) { |
michael@0 | 1702 | // Preserve the real whitespace character |
michael@0 | 1703 | nextpos++; |
michael@0 | 1704 | AddToLine(offsetIntoBuffer, nextpos-bol); |
michael@0 | 1705 | bol = nextpos; |
michael@0 | 1706 | } |
michael@0 | 1707 | else { |
michael@0 | 1708 | // Replace the whitespace with a space |
michael@0 | 1709 | AddToLine(offsetIntoBuffer, nextpos-bol); |
michael@0 | 1710 | AddToLine(kSpace.get(),1); |
michael@0 | 1711 | bol = nextpos + 1; // Let's eat the whitespace |
michael@0 | 1712 | } |
michael@0 | 1713 | } |
michael@0 | 1714 | } // Continue looping over the string |
michael@0 | 1715 | } |
michael@0 | 1716 | |
michael@0 | 1717 | |
michael@0 | 1718 | /** |
michael@0 | 1719 | * Gets the value of an attribute in a string. If the function returns |
michael@0 | 1720 | * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified. |
michael@0 | 1721 | */ |
michael@0 | 1722 | nsresult |
michael@0 | 1723 | nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName, |
michael@0 | 1724 | nsString& aValueRet) |
michael@0 | 1725 | { |
michael@0 | 1726 | if (mElement) { |
michael@0 | 1727 | if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) { |
michael@0 | 1728 | return NS_OK; |
michael@0 | 1729 | } |
michael@0 | 1730 | } |
michael@0 | 1731 | |
michael@0 | 1732 | return NS_ERROR_NOT_AVAILABLE; |
michael@0 | 1733 | } |
michael@0 | 1734 | |
michael@0 | 1735 | /** |
michael@0 | 1736 | * Returns true, if the element was inserted by Moz' TXT->HTML converter. |
michael@0 | 1737 | * In this case, we should ignore it. |
michael@0 | 1738 | */ |
michael@0 | 1739 | bool |
michael@0 | 1740 | nsPlainTextSerializer::IsCurrentNodeConverted() |
michael@0 | 1741 | { |
michael@0 | 1742 | nsAutoString value; |
michael@0 | 1743 | nsresult rv = GetAttributeValue(nsGkAtoms::_class, value); |
michael@0 | 1744 | return (NS_SUCCEEDED(rv) && |
michael@0 | 1745 | (value.EqualsIgnoreCase("moz-txt", 7) || |
michael@0 | 1746 | value.EqualsIgnoreCase("\"moz-txt", 8))); |
michael@0 | 1747 | } |
michael@0 | 1748 | |
michael@0 | 1749 | |
michael@0 | 1750 | // static |
michael@0 | 1751 | nsIAtom* |
michael@0 | 1752 | nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) |
michael@0 | 1753 | { |
michael@0 | 1754 | if (!aContent->IsHTML()) { |
michael@0 | 1755 | return nullptr; |
michael@0 | 1756 | } |
michael@0 | 1757 | |
michael@0 | 1758 | nsIAtom* localName = aContent->Tag(); |
michael@0 | 1759 | return localName->IsStaticAtom() ? localName : nullptr; |
michael@0 | 1760 | } |
michael@0 | 1761 | |
michael@0 | 1762 | /** |
michael@0 | 1763 | * Returns true if we currently are inside a <pre>. The check is done |
michael@0 | 1764 | * by traversing the tag stack looking for <pre> until we hit a block |
michael@0 | 1765 | * level tag which is assumed to override any <pre>:s below it in |
michael@0 | 1766 | * the stack. To do this correctly to a 100% would require access |
michael@0 | 1767 | * to style which we don't support in this converter. |
michael@0 | 1768 | */ |
michael@0 | 1769 | bool |
michael@0 | 1770 | nsPlainTextSerializer::IsInPre() |
michael@0 | 1771 | { |
michael@0 | 1772 | int32_t i = mTagStackIndex; |
michael@0 | 1773 | while(i > 0) { |
michael@0 | 1774 | if (mTagStack[i - 1] == nsGkAtoms::pre) |
michael@0 | 1775 | return true; |
michael@0 | 1776 | if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) { |
michael@0 | 1777 | // We assume that every other block overrides a <pre> |
michael@0 | 1778 | return false; |
michael@0 | 1779 | } |
michael@0 | 1780 | --i; |
michael@0 | 1781 | } |
michael@0 | 1782 | |
michael@0 | 1783 | // Not a <pre> in the whole stack |
michael@0 | 1784 | return false; |
michael@0 | 1785 | } |
michael@0 | 1786 | |
michael@0 | 1787 | /** |
michael@0 | 1788 | * This method is required only to identify LI's inside OL. |
michael@0 | 1789 | * Returns TRUE if we are inside an OL tag and FALSE otherwise. |
michael@0 | 1790 | */ |
michael@0 | 1791 | bool |
michael@0 | 1792 | nsPlainTextSerializer::IsInOL() |
michael@0 | 1793 | { |
michael@0 | 1794 | int32_t i = mTagStackIndex; |
michael@0 | 1795 | while(--i >= 0) { |
michael@0 | 1796 | if (mTagStack[i] == nsGkAtoms::ol) |
michael@0 | 1797 | return true; |
michael@0 | 1798 | if (mTagStack[i] == nsGkAtoms::ul) { |
michael@0 | 1799 | // If a UL is reached first, LI belongs the UL nested in OL. |
michael@0 | 1800 | return false; |
michael@0 | 1801 | } |
michael@0 | 1802 | } |
michael@0 | 1803 | // We may reach here for orphan LI's. |
michael@0 | 1804 | return false; |
michael@0 | 1805 | } |
michael@0 | 1806 | |
michael@0 | 1807 | /* |
michael@0 | 1808 | @return 0 = no header, 1 = h1, ..., 6 = h6 |
michael@0 | 1809 | */ |
michael@0 | 1810 | int32_t HeaderLevel(nsIAtom* aTag) |
michael@0 | 1811 | { |
michael@0 | 1812 | if (aTag == nsGkAtoms::h1) { |
michael@0 | 1813 | return 1; |
michael@0 | 1814 | } |
michael@0 | 1815 | if (aTag == nsGkAtoms::h2) { |
michael@0 | 1816 | return 2; |
michael@0 | 1817 | } |
michael@0 | 1818 | if (aTag == nsGkAtoms::h3) { |
michael@0 | 1819 | return 3; |
michael@0 | 1820 | } |
michael@0 | 1821 | if (aTag == nsGkAtoms::h4) { |
michael@0 | 1822 | return 4; |
michael@0 | 1823 | } |
michael@0 | 1824 | if (aTag == nsGkAtoms::h5) { |
michael@0 | 1825 | return 5; |
michael@0 | 1826 | } |
michael@0 | 1827 | if (aTag == nsGkAtoms::h6) { |
michael@0 | 1828 | return 6; |
michael@0 | 1829 | } |
michael@0 | 1830 | return 0; |
michael@0 | 1831 | } |
michael@0 | 1832 | |
michael@0 | 1833 | |
michael@0 | 1834 | /* |
michael@0 | 1835 | * This is an implementation of GetUnicharWidth() and |
michael@0 | 1836 | * GetUnicharStringWidth() as defined in |
michael@0 | 1837 | * "The Single UNIX Specification, Version 2, The Open Group, 1997" |
michael@0 | 1838 | * <http://www.UNIX-systems.org/online.html> |
michael@0 | 1839 | * |
michael@0 | 1840 | * Markus Kuhn -- 2000-02-08 -- public domain |
michael@0 | 1841 | * |
michael@0 | 1842 | * Minor alterations to fit Mozilla's data types by Daniel Bratell |
michael@0 | 1843 | */ |
michael@0 | 1844 | |
michael@0 | 1845 | /* These functions define the column width of an ISO 10646 character |
michael@0 | 1846 | * as follows: |
michael@0 | 1847 | * |
michael@0 | 1848 | * - The null character (U+0000) has a column width of 0. |
michael@0 | 1849 | * |
michael@0 | 1850 | * - Other C0/C1 control characters and DEL will lead to a return |
michael@0 | 1851 | * value of -1. |
michael@0 | 1852 | * |
michael@0 | 1853 | * - Non-spacing and enclosing combining characters (general |
michael@0 | 1854 | * category code Mn or Me in the Unicode database) have a |
michael@0 | 1855 | * column width of 0. |
michael@0 | 1856 | * |
michael@0 | 1857 | * - Spacing characters in the East Asian Wide (W) or East Asian |
michael@0 | 1858 | * FullWidth (F) category as defined in Unicode Technical |
michael@0 | 1859 | * Report #11 have a column width of 2. |
michael@0 | 1860 | * |
michael@0 | 1861 | * - All remaining characters (including all printable |
michael@0 | 1862 | * ISO 8859-1 and WGL4 characters, Unicode control characters, |
michael@0 | 1863 | * etc.) have a column width of 1. |
michael@0 | 1864 | * |
michael@0 | 1865 | * This implementation assumes that wchar_t characters are encoded |
michael@0 | 1866 | * in ISO 10646. |
michael@0 | 1867 | */ |
michael@0 | 1868 | |
michael@0 | 1869 | int32_t GetUnicharWidth(char16_t ucs) |
michael@0 | 1870 | { |
michael@0 | 1871 | /* sorted list of non-overlapping intervals of non-spacing characters */ |
michael@0 | 1872 | static const struct interval { |
michael@0 | 1873 | uint16_t first; |
michael@0 | 1874 | uint16_t last; |
michael@0 | 1875 | } combining[] = { |
michael@0 | 1876 | { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, |
michael@0 | 1877 | { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, |
michael@0 | 1878 | { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, |
michael@0 | 1879 | { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, |
michael@0 | 1880 | { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, |
michael@0 | 1881 | { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, |
michael@0 | 1882 | { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, |
michael@0 | 1883 | { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, |
michael@0 | 1884 | { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, |
michael@0 | 1885 | { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, |
michael@0 | 1886 | { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, |
michael@0 | 1887 | { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, |
michael@0 | 1888 | { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, |
michael@0 | 1889 | { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, |
michael@0 | 1890 | { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, |
michael@0 | 1891 | { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, |
michael@0 | 1892 | { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, |
michael@0 | 1893 | { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, |
michael@0 | 1894 | { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, |
michael@0 | 1895 | { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, |
michael@0 | 1896 | { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, |
michael@0 | 1897 | { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, |
michael@0 | 1898 | { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, |
michael@0 | 1899 | { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, |
michael@0 | 1900 | { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, |
michael@0 | 1901 | { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, |
michael@0 | 1902 | { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, |
michael@0 | 1903 | { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, |
michael@0 | 1904 | { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, |
michael@0 | 1905 | { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, |
michael@0 | 1906 | { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } |
michael@0 | 1907 | }; |
michael@0 | 1908 | int32_t min = 0; |
michael@0 | 1909 | int32_t max = sizeof(combining) / sizeof(struct interval) - 1; |
michael@0 | 1910 | int32_t mid; |
michael@0 | 1911 | |
michael@0 | 1912 | /* test for 8-bit control characters */ |
michael@0 | 1913 | if (ucs == 0) |
michael@0 | 1914 | return 0; |
michael@0 | 1915 | if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) |
michael@0 | 1916 | return -1; |
michael@0 | 1917 | |
michael@0 | 1918 | /* first quick check for Latin-1 etc. characters */ |
michael@0 | 1919 | if (ucs < combining[0].first) |
michael@0 | 1920 | return 1; |
michael@0 | 1921 | |
michael@0 | 1922 | /* binary search in table of non-spacing characters */ |
michael@0 | 1923 | while (max >= min) { |
michael@0 | 1924 | mid = (min + max) / 2; |
michael@0 | 1925 | if (combining[mid].last < ucs) |
michael@0 | 1926 | min = mid + 1; |
michael@0 | 1927 | else if (combining[mid].first > ucs) |
michael@0 | 1928 | max = mid - 1; |
michael@0 | 1929 | else if (combining[mid].first <= ucs && combining[mid].last >= ucs) |
michael@0 | 1930 | return 0; |
michael@0 | 1931 | } |
michael@0 | 1932 | |
michael@0 | 1933 | /* if we arrive here, ucs is not a combining or C0/C1 control character */ |
michael@0 | 1934 | |
michael@0 | 1935 | /* fast test for majority of non-wide scripts */ |
michael@0 | 1936 | if (ucs < 0x1100) |
michael@0 | 1937 | return 1; |
michael@0 | 1938 | |
michael@0 | 1939 | return 1 + |
michael@0 | 1940 | ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ |
michael@0 | 1941 | (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && |
michael@0 | 1942 | ucs != 0x303f) || /* CJK ... Yi */ |
michael@0 | 1943 | (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ |
michael@0 | 1944 | (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ |
michael@0 | 1945 | (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ |
michael@0 | 1946 | (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ |
michael@0 | 1947 | (ucs >= 0xffe0 && ucs <= 0xffe6)); |
michael@0 | 1948 | } |
michael@0 | 1949 | |
michael@0 | 1950 | |
michael@0 | 1951 | int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n) |
michael@0 | 1952 | { |
michael@0 | 1953 | int32_t w, width = 0; |
michael@0 | 1954 | |
michael@0 | 1955 | for (;*pwcs && n-- > 0; pwcs++) |
michael@0 | 1956 | if ((w = GetUnicharWidth(*pwcs)) < 0) |
michael@0 | 1957 | ++width; // Taking 1 as the width of non-printable character, for bug# 94475. |
michael@0 | 1958 | else |
michael@0 | 1959 | width += w; |
michael@0 | 1960 | |
michael@0 | 1961 | return width; |
michael@0 | 1962 | } |
michael@0 | 1963 |