content/base/src/nsPlainTextSerializer.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /*
michael@0 7 * nsIContentSerializer implementation that can be used with an
michael@0 8 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
michael@0 9 * (eg for copy/paste as plaintext).
michael@0 10 */
michael@0 11
michael@0 12 #include "nsPlainTextSerializer.h"
michael@0 13 #include "nsLWBrkCIID.h"
michael@0 14 #include "nsIServiceManager.h"
michael@0 15 #include "nsGkAtoms.h"
michael@0 16 #include "nsNameSpaceManager.h"
michael@0 17 #include "nsTextFragment.h"
michael@0 18 #include "nsContentUtils.h"
michael@0 19 #include "nsReadableUtils.h"
michael@0 20 #include "nsUnicharUtils.h"
michael@0 21 #include "nsCRT.h"
michael@0 22 #include "mozilla/dom/Element.h"
michael@0 23 #include "mozilla/Preferences.h"
michael@0 24
michael@0 25 using namespace mozilla;
michael@0 26 using namespace mozilla::dom;
michael@0 27
michael@0 28 #define PREF_STRUCTS "converter.html2txt.structs"
michael@0 29 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
michael@0 30
michael@0 31 static const int32_t kTabSize=4;
michael@0 32 static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if
michael@0 33 mHeaderStrategy = 1 or = 2.
michael@0 34 Indention of other headers
michael@0 35 is derived from that.
michael@0 36 XXX center h1? */
michael@0 37 static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
michael@0 38 indent h(x+1) this many
michael@0 39 columns more than h(x) */
michael@0 40 static const int32_t kIndentSizeList = kTabSize;
michael@0 41 // Indention of non-first lines of ul and ol
michael@0 42 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
michael@0 43 static const char16_t kNBSP = 160;
michael@0 44 static const char16_t kSPACE = ' ';
michael@0 45
michael@0 46 static int32_t HeaderLevel(nsIAtom* aTag);
michael@0 47 static int32_t GetUnicharWidth(char16_t ucs);
michael@0 48 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
michael@0 49
michael@0 50 // Someday may want to make this non-const:
michael@0 51 static const uint32_t TagStackSize = 500;
michael@0 52 static const uint32_t OLStackSize = 100;
michael@0 53
michael@0 54 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
michael@0 55 {
michael@0 56 nsPlainTextSerializer* it = new nsPlainTextSerializer();
michael@0 57 if (!it) {
michael@0 58 return NS_ERROR_OUT_OF_MEMORY;
michael@0 59 }
michael@0 60
michael@0 61 return CallQueryInterface(it, aSerializer);
michael@0 62 }
michael@0 63
michael@0 64 nsPlainTextSerializer::nsPlainTextSerializer()
michael@0 65 : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
michael@0 66 {
michael@0 67
michael@0 68 mOutputString = nullptr;
michael@0 69 mHeadLevel = 0;
michael@0 70 mAtFirstColumn = true;
michael@0 71 mIndent = 0;
michael@0 72 mCiteQuoteLevel = 0;
michael@0 73 mStructs = true; // will be read from prefs later
michael@0 74 mHeaderStrategy = 1 /*indent increasingly*/; // ditto
michael@0 75 mDontWrapAnyQuotes = false; // ditto
michael@0 76 mHasWrittenCiteBlockquote = false;
michael@0 77 mSpanLevel = 0;
michael@0 78 for (int32_t i = 0; i <= 6; i++) {
michael@0 79 mHeaderCounter[i] = 0;
michael@0 80 }
michael@0 81
michael@0 82 // Line breaker
michael@0 83 mWrapColumn = 72; // XXX magic number, we expect someone to reset this
michael@0 84 mCurrentLineWidth = 0;
michael@0 85
michael@0 86 // Flow
michael@0 87 mEmptyLines = 1; // The start of the document is an "empty line" in itself,
michael@0 88 mInWhitespace = false;
michael@0 89 mPreFormatted = false;
michael@0 90 mStartedOutput = false;
michael@0 91
michael@0 92 // initialize the tag stack to zero:
michael@0 93 // The stack only ever contains pointers to static atoms, so they don't
michael@0 94 // need refcounting.
michael@0 95 mTagStack = new nsIAtom*[TagStackSize];
michael@0 96 mTagStackIndex = 0;
michael@0 97 mIgnoreAboveIndex = (uint32_t)kNotFound;
michael@0 98
michael@0 99 // initialize the OL stack, where numbers for ordered lists are kept
michael@0 100 mOLStack = new int32_t[OLStackSize];
michael@0 101 mOLStackIndex = 0;
michael@0 102
michael@0 103 mULCount = 0;
michael@0 104
michael@0 105 mIgnoredChildNodeLevel = 0;
michael@0 106 }
michael@0 107
michael@0 108 nsPlainTextSerializer::~nsPlainTextSerializer()
michael@0 109 {
michael@0 110 delete[] mTagStack;
michael@0 111 delete[] mOLStack;
michael@0 112 NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
michael@0 113 }
michael@0 114
michael@0 115 NS_IMPL_ISUPPORTS(nsPlainTextSerializer,
michael@0 116 nsIContentSerializer)
michael@0 117
michael@0 118
michael@0 119 NS_IMETHODIMP
michael@0 120 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
michael@0 121 const char* aCharSet, bool aIsCopying,
michael@0 122 bool aIsWholeDocument)
michael@0 123 {
michael@0 124 #ifdef DEBUG
michael@0 125 // Check if the major control flags are set correctly.
michael@0 126 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
michael@0 127 NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
michael@0 128 "If you want format=flowed, you must combine it with "
michael@0 129 "nsIDocumentEncoder::OutputFormatted");
michael@0 130 }
michael@0 131
michael@0 132 if (aFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0 133 NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
michael@0 134 "Can't do formatted and preformatted output at the same time!");
michael@0 135 }
michael@0 136 #endif
michael@0 137
michael@0 138 mFlags = aFlags;
michael@0 139 mWrapColumn = aWrapColumn;
michael@0 140
michael@0 141 // Only create a linebreaker if we will handle wrapping.
michael@0 142 if (MayWrap()) {
michael@0 143 mLineBreaker = nsContentUtils::LineBreaker();
michael@0 144 }
michael@0 145
michael@0 146 // Set the line break character:
michael@0 147 if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
michael@0 148 && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
michael@0 149 // Windows
michael@0 150 mLineBreak.AssignLiteral("\r\n");
michael@0 151 }
michael@0 152 else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
michael@0 153 // Mac
michael@0 154 mLineBreak.Assign(char16_t('\r'));
michael@0 155 }
michael@0 156 else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
michael@0 157 // Unix/DOM
michael@0 158 mLineBreak.Assign(char16_t('\n'));
michael@0 159 }
michael@0 160 else {
michael@0 161 // Platform/default
michael@0 162 mLineBreak.AssignLiteral(NS_LINEBREAK);
michael@0 163 }
michael@0 164
michael@0 165 mLineBreakDue = false;
michael@0 166 mFloatingLines = -1;
michael@0 167
michael@0 168 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0 169 // Get some prefs that controls how we do formatted output
michael@0 170 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
michael@0 171
michael@0 172 mHeaderStrategy =
michael@0 173 Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
michael@0 174
michael@0 175 // DontWrapAnyQuotes is set according to whether plaintext mail
michael@0 176 // is wrapping to window width -- see bug 134439.
michael@0 177 // We'll only want this if we're wrapping and formatted.
michael@0 178 if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
michael@0 179 mDontWrapAnyQuotes =
michael@0 180 Preferences::GetBool("mail.compose.wrap_to_window_width",
michael@0 181 mDontWrapAnyQuotes);
michael@0 182 }
michael@0 183 }
michael@0 184
michael@0 185 // XXX We should let the caller pass this in.
michael@0 186 if (Preferences::GetBool("browser.frames.enabled")) {
michael@0 187 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
michael@0 188 }
michael@0 189 else {
michael@0 190 mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
michael@0 191 }
michael@0 192
michael@0 193 return NS_OK;
michael@0 194 }
michael@0 195
michael@0 196 bool
michael@0 197 nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
michael@0 198 {
michael@0 199 uint32_t size = aStack.Length();
michael@0 200 if (size == 0) {
michael@0 201 return false;
michael@0 202 }
michael@0 203 return aStack.ElementAt(size-1);
michael@0 204 }
michael@0 205
michael@0 206 void
michael@0 207 nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
michael@0 208 {
michael@0 209 uint32_t size = aStack.Length();
michael@0 210 if (size > 0) {
michael@0 211 aStack.ElementAt(size-1) = aValue;
michael@0 212 }
michael@0 213 else {
michael@0 214 NS_ERROR("There is no \"Last\" value");
michael@0 215 }
michael@0 216 }
michael@0 217
michael@0 218 void
michael@0 219 nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
michael@0 220 {
michael@0 221 aStack.AppendElement(bool(aValue));
michael@0 222 }
michael@0 223
michael@0 224 bool
michael@0 225 nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
michael@0 226 {
michael@0 227 bool returnValue = false;
michael@0 228 uint32_t size = aStack.Length();
michael@0 229 if (size > 0) {
michael@0 230 returnValue = aStack.ElementAt(size-1);
michael@0 231 aStack.RemoveElementAt(size-1);
michael@0 232 }
michael@0 233 return returnValue;
michael@0 234 }
michael@0 235
michael@0 236 bool
michael@0 237 nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag)
michael@0 238 {
michael@0 239 // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
michael@0 240 // non-textual container element should be serialized as placeholder
michael@0 241 // character and its child nodes should be ignored. See bug 895239.
michael@0 242 if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
michael@0 243 return false;
michael@0 244 }
michael@0 245
michael@0 246 return
michael@0 247 (aTag == nsGkAtoms::audio) ||
michael@0 248 (aTag == nsGkAtoms::canvas) ||
michael@0 249 (aTag == nsGkAtoms::iframe) ||
michael@0 250 (aTag == nsGkAtoms::meter) ||
michael@0 251 (aTag == nsGkAtoms::progress) ||
michael@0 252 (aTag == nsGkAtoms::object) ||
michael@0 253 (aTag == nsGkAtoms::svg) ||
michael@0 254 (aTag == nsGkAtoms::video);
michael@0 255 }
michael@0 256
michael@0 257 NS_IMETHODIMP
michael@0 258 nsPlainTextSerializer::AppendText(nsIContent* aText,
michael@0 259 int32_t aStartOffset,
michael@0 260 int32_t aEndOffset,
michael@0 261 nsAString& aStr)
michael@0 262 {
michael@0 263 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
michael@0 264 return NS_OK;
michael@0 265 }
michael@0 266
michael@0 267 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
michael@0 268 if ( aStartOffset < 0 )
michael@0 269 return NS_ERROR_INVALID_ARG;
michael@0 270
michael@0 271 NS_ENSURE_ARG(aText);
michael@0 272
michael@0 273 nsresult rv = NS_OK;
michael@0 274
michael@0 275 nsIContent* content = aText;
michael@0 276 const nsTextFragment* frag;
michael@0 277 if (!content || !(frag = content->GetText())) {
michael@0 278 return NS_ERROR_FAILURE;
michael@0 279 }
michael@0 280
michael@0 281 int32_t fragLength = frag->GetLength();
michael@0 282 int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
michael@0 283 NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
michael@0 284
michael@0 285 int32_t length = endoffset - aStartOffset;
michael@0 286 if (length <= 0) {
michael@0 287 return NS_OK;
michael@0 288 }
michael@0 289
michael@0 290 nsAutoString textstr;
michael@0 291 if (frag->Is2b()) {
michael@0 292 textstr.Assign(frag->Get2b() + aStartOffset, length);
michael@0 293 }
michael@0 294 else {
michael@0 295 // AssignASCII is for 7-bit character only, so don't use it
michael@0 296 const char *data = frag->Get1b();
michael@0 297 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
michael@0 298 }
michael@0 299
michael@0 300 mOutputString = &aStr;
michael@0 301
michael@0 302 // We have to split the string across newlines
michael@0 303 // to match parser behavior
michael@0 304 int32_t start = 0;
michael@0 305 int32_t offset = textstr.FindCharInSet("\n\r");
michael@0 306 while (offset != kNotFound) {
michael@0 307
michael@0 308 if (offset>start) {
michael@0 309 // Pass in the line
michael@0 310 DoAddText(false,
michael@0 311 Substring(textstr, start, offset-start));
michael@0 312 }
michael@0 313
michael@0 314 // Pass in a newline
michael@0 315 DoAddText(true, mLineBreak);
michael@0 316
michael@0 317 start = offset+1;
michael@0 318 offset = textstr.FindCharInSet("\n\r", start);
michael@0 319 }
michael@0 320
michael@0 321 // Consume the last bit of the string if there's any left
michael@0 322 if (start < length) {
michael@0 323 if (start) {
michael@0 324 DoAddText(false, Substring(textstr, start, length - start));
michael@0 325 }
michael@0 326 else {
michael@0 327 DoAddText(false, textstr);
michael@0 328 }
michael@0 329 }
michael@0 330
michael@0 331 mOutputString = nullptr;
michael@0 332
michael@0 333 return rv;
michael@0 334 }
michael@0 335
michael@0 336 NS_IMETHODIMP
michael@0 337 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
michael@0 338 int32_t aStartOffset,
michael@0 339 int32_t aEndOffset,
michael@0 340 nsAString& aStr)
michael@0 341 {
michael@0 342 return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
michael@0 343 }
michael@0 344
michael@0 345 NS_IMETHODIMP
michael@0 346 nsPlainTextSerializer::AppendElementStart(Element* aElement,
michael@0 347 Element* aOriginalElement,
michael@0 348 nsAString& aStr)
michael@0 349 {
michael@0 350 NS_ENSURE_ARG(aElement);
michael@0 351
michael@0 352 mElement = aElement;
michael@0 353
michael@0 354 nsresult rv;
michael@0 355 nsIAtom* id = GetIdForContent(mElement);
michael@0 356
michael@0 357 bool isContainer = !nsContentUtils::IsHTMLVoid(id);
michael@0 358
michael@0 359 mOutputString = &aStr;
michael@0 360
michael@0 361 if (isContainer) {
michael@0 362 rv = DoOpenContainer(id);
michael@0 363 }
michael@0 364 else {
michael@0 365 rv = DoAddLeaf(id);
michael@0 366 }
michael@0 367
michael@0 368 mElement = nullptr;
michael@0 369 mOutputString = nullptr;
michael@0 370
michael@0 371 if (id == nsGkAtoms::head) {
michael@0 372 ++mHeadLevel;
michael@0 373 }
michael@0 374
michael@0 375 return rv;
michael@0 376 }
michael@0 377
michael@0 378 NS_IMETHODIMP
michael@0 379 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
michael@0 380 nsAString& aStr)
michael@0 381 {
michael@0 382 NS_ENSURE_ARG(aElement);
michael@0 383
michael@0 384 mElement = aElement;
michael@0 385
michael@0 386 nsresult rv;
michael@0 387 nsIAtom* id = GetIdForContent(mElement);
michael@0 388
michael@0 389 bool isContainer = !nsContentUtils::IsHTMLVoid(id);
michael@0 390
michael@0 391 mOutputString = &aStr;
michael@0 392
michael@0 393 rv = NS_OK;
michael@0 394 if (isContainer) {
michael@0 395 rv = DoCloseContainer(id);
michael@0 396 }
michael@0 397
michael@0 398 mElement = nullptr;
michael@0 399 mOutputString = nullptr;
michael@0 400
michael@0 401 if (id == nsGkAtoms::head) {
michael@0 402 NS_ASSERTION(mHeadLevel != 0,
michael@0 403 "mHeadLevel being decremented below 0");
michael@0 404 --mHeadLevel;
michael@0 405 }
michael@0 406
michael@0 407 return rv;
michael@0 408 }
michael@0 409
michael@0 410 NS_IMETHODIMP
michael@0 411 nsPlainTextSerializer::Flush(nsAString& aStr)
michael@0 412 {
michael@0 413 mOutputString = &aStr;
michael@0 414 FlushLine();
michael@0 415 mOutputString = nullptr;
michael@0 416 return NS_OK;
michael@0 417 }
michael@0 418
michael@0 419 NS_IMETHODIMP
michael@0 420 nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0 421 nsAString& aStr)
michael@0 422 {
michael@0 423 return NS_OK;
michael@0 424 }
michael@0 425
michael@0 426 nsresult
michael@0 427 nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
michael@0 428 {
michael@0 429 // Check if we need output current node as placeholder character and ignore
michael@0 430 // child nodes.
michael@0 431 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
michael@0 432 if (mIgnoredChildNodeLevel == 0) {
michael@0 433 // Serialize current node as placeholder character
michael@0 434 Write(NS_LITERAL_STRING("\xFFFC"));
michael@0 435 }
michael@0 436 // Ignore child nodes.
michael@0 437 mIgnoredChildNodeLevel++;
michael@0 438 return NS_OK;
michael@0 439 }
michael@0 440
michael@0 441 if (mFlags & nsIDocumentEncoder::OutputRaw) {
michael@0 442 // Raw means raw. Don't even think about doing anything fancy
michael@0 443 // here like indenting, adding line breaks or any other
michael@0 444 // characters such as list item bullets, quote characters
michael@0 445 // around <q>, etc. I mean it! Don't make me smack you!
michael@0 446
michael@0 447 return NS_OK;
michael@0 448 }
michael@0 449
michael@0 450 if (mTagStackIndex < TagStackSize) {
michael@0 451 mTagStack[mTagStackIndex++] = aTag;
michael@0 452 }
michael@0 453
michael@0 454 if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
michael@0 455 return NS_OK;
michael@0 456 }
michael@0 457
michael@0 458 // Reset this so that <blockquote type=cite> doesn't affect the whitespace
michael@0 459 // above random <pre>s below it.
michael@0 460 mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
michael@0 461 aTag == nsGkAtoms::pre;
michael@0 462
michael@0 463 bool isInCiteBlockquote = false;
michael@0 464
michael@0 465 // XXX special-case <blockquote type=cite> so that we don't add additional
michael@0 466 // newlines before the text.
michael@0 467 if (aTag == nsGkAtoms::blockquote) {
michael@0 468 nsAutoString value;
michael@0 469 nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
michael@0 470 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
michael@0 471 }
michael@0 472
michael@0 473 if (mLineBreakDue && !isInCiteBlockquote)
michael@0 474 EnsureVerticalSpace(mFloatingLines);
michael@0 475
michael@0 476 // Check if this tag's content that should not be output
michael@0 477 if ((aTag == nsGkAtoms::noscript &&
michael@0 478 !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
michael@0 479 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
michael@0 480 !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
michael@0 481 // Ignore everything that follows the current tag in
michael@0 482 // question until a matching end tag is encountered.
michael@0 483 mIgnoreAboveIndex = mTagStackIndex - 1;
michael@0 484 return NS_OK;
michael@0 485 }
michael@0 486
michael@0 487 if (aTag == nsGkAtoms::body) {
michael@0 488 // Try to figure out here whether we have a
michael@0 489 // preformatted style attribute.
michael@0 490 //
michael@0 491 // Trigger on the presence of a "pre-wrap" in the
michael@0 492 // style attribute. That's a very simplistic way to do
michael@0 493 // it, but better than nothing.
michael@0 494 // Also set mWrapColumn to the value given there
michael@0 495 // (which arguably we should only do if told to do so).
michael@0 496 nsAutoString style;
michael@0 497 int32_t whitespace;
michael@0 498 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
michael@0 499 (kNotFound != (whitespace = style.Find("white-space:")))) {
michael@0 500
michael@0 501 if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
michael@0 502 #ifdef DEBUG_preformatted
michael@0 503 printf("Set mPreFormatted based on style pre-wrap\n");
michael@0 504 #endif
michael@0 505 mPreFormatted = true;
michael@0 506 int32_t widthOffset = style.Find("width:");
michael@0 507 if (widthOffset >= 0) {
michael@0 508 // We have to search for the ch before the semicolon,
michael@0 509 // not for the semicolon itself, because nsString::ToInteger()
michael@0 510 // considers 'c' to be a valid numeric char (even if radix=10)
michael@0 511 // but then gets confused if it sees it next to the number
michael@0 512 // when the radix specified was 10, and returns an error code.
michael@0 513 int32_t semiOffset = style.Find("ch", false, widthOffset+6);
michael@0 514 int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
michael@0 515 : style.Length() - widthOffset);
michael@0 516 nsAutoString widthstr;
michael@0 517 style.Mid(widthstr, widthOffset+6, length);
michael@0 518 nsresult err;
michael@0 519 int32_t col = widthstr.ToInteger(&err);
michael@0 520
michael@0 521 if (NS_SUCCEEDED(err)) {
michael@0 522 mWrapColumn = (uint32_t)col;
michael@0 523 #ifdef DEBUG_preformatted
michael@0 524 printf("Set wrap column to %d based on style\n", mWrapColumn);
michael@0 525 #endif
michael@0 526 }
michael@0 527 }
michael@0 528 }
michael@0 529 else if (kNotFound != style.Find("pre", true, whitespace)) {
michael@0 530 #ifdef DEBUG_preformatted
michael@0 531 printf("Set mPreFormatted based on style pre\n");
michael@0 532 #endif
michael@0 533 mPreFormatted = true;
michael@0 534 mWrapColumn = 0;
michael@0 535 }
michael@0 536 }
michael@0 537 else {
michael@0 538 /* See comment at end of function. */
michael@0 539 mInWhitespace = true;
michael@0 540 mPreFormatted = false;
michael@0 541 }
michael@0 542
michael@0 543 return NS_OK;
michael@0 544 }
michael@0 545
michael@0 546 // Keep this in sync with DoCloseContainer!
michael@0 547 if (!DoOutput()) {
michael@0 548 return NS_OK;
michael@0 549 }
michael@0 550
michael@0 551 if (aTag == nsGkAtoms::p)
michael@0 552 EnsureVerticalSpace(1);
michael@0 553 else if (aTag == nsGkAtoms::pre) {
michael@0 554 if (GetLastBool(mIsInCiteBlockquote))
michael@0 555 EnsureVerticalSpace(0);
michael@0 556 else if (mHasWrittenCiteBlockquote) {
michael@0 557 EnsureVerticalSpace(0);
michael@0 558 mHasWrittenCiteBlockquote = false;
michael@0 559 }
michael@0 560 else
michael@0 561 EnsureVerticalSpace(1);
michael@0 562 }
michael@0 563 else if (aTag == nsGkAtoms::tr) {
michael@0 564 PushBool(mHasWrittenCellsForRow, false);
michael@0 565 }
michael@0 566 else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
michael@0 567 // We must make sure that the content of two table cells get a
michael@0 568 // space between them.
michael@0 569
michael@0 570 // To make the separation between cells most obvious and
michael@0 571 // importable, we use a TAB.
michael@0 572 if (GetLastBool(mHasWrittenCellsForRow)) {
michael@0 573 // Bypass |Write| so that the TAB isn't compressed away.
michael@0 574 AddToLine(MOZ_UTF16("\t"), 1);
michael@0 575 mInWhitespace = true;
michael@0 576 }
michael@0 577 else if (mHasWrittenCellsForRow.IsEmpty()) {
michael@0 578 // We don't always see a <tr> (nor a <table>) before the <td> if we're
michael@0 579 // copying part of a table
michael@0 580 PushBool(mHasWrittenCellsForRow, true); // will never be popped
michael@0 581 }
michael@0 582 else {
michael@0 583 SetLastBool(mHasWrittenCellsForRow, true);
michael@0 584 }
michael@0 585 }
michael@0 586 else if (aTag == nsGkAtoms::ul) {
michael@0 587 // Indent here to support nested lists, which aren't included in li :-(
michael@0 588 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
michael@0 589 // Must end the current line before we change indention
michael@0 590 mIndent += kIndentSizeList;
michael@0 591 mULCount++;
michael@0 592 }
michael@0 593 else if (aTag == nsGkAtoms::ol) {
michael@0 594 EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
michael@0 595 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0 596 // Must end the current line before we change indention
michael@0 597 if (mOLStackIndex < OLStackSize) {
michael@0 598 nsAutoString startAttr;
michael@0 599 int32_t startVal = 1;
michael@0 600 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
michael@0 601 nsresult rv = NS_OK;
michael@0 602 startVal = startAttr.ToInteger(&rv);
michael@0 603 if (NS_FAILED(rv))
michael@0 604 startVal = 1;
michael@0 605 }
michael@0 606 mOLStack[mOLStackIndex++] = startVal;
michael@0 607 }
michael@0 608 } else {
michael@0 609 mOLStackIndex++;
michael@0 610 }
michael@0 611 mIndent += kIndentSizeList; // see ul
michael@0 612 }
michael@0 613 else if (aTag == nsGkAtoms::li &&
michael@0 614 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0 615 if (mTagStackIndex > 1 && IsInOL()) {
michael@0 616 if (mOLStackIndex > 0) {
michael@0 617 nsAutoString valueAttr;
michael@0 618 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
michael@0 619 nsresult rv = NS_OK;
michael@0 620 int32_t valueAttrVal = valueAttr.ToInteger(&rv);
michael@0 621 if (NS_SUCCEEDED(rv))
michael@0 622 mOLStack[mOLStackIndex-1] = valueAttrVal;
michael@0 623 }
michael@0 624 // This is what nsBulletFrame does for OLs:
michael@0 625 mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
michael@0 626 }
michael@0 627 else {
michael@0 628 mInIndentString.Append(char16_t('#'));
michael@0 629 }
michael@0 630
michael@0 631 mInIndentString.Append(char16_t('.'));
michael@0 632
michael@0 633 }
michael@0 634 else {
michael@0 635 static char bulletCharArray[] = "*o+#";
michael@0 636 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
michael@0 637 char bulletChar = bulletCharArray[index % 4];
michael@0 638 mInIndentString.Append(char16_t(bulletChar));
michael@0 639 }
michael@0 640
michael@0 641 mInIndentString.Append(char16_t(' '));
michael@0 642 }
michael@0 643 else if (aTag == nsGkAtoms::dl) {
michael@0 644 EnsureVerticalSpace(1);
michael@0 645 }
michael@0 646 else if (aTag == nsGkAtoms::dt) {
michael@0 647 EnsureVerticalSpace(0);
michael@0 648 }
michael@0 649 else if (aTag == nsGkAtoms::dd) {
michael@0 650 EnsureVerticalSpace(0);
michael@0 651 mIndent += kIndentSizeDD;
michael@0 652 }
michael@0 653 else if (aTag == nsGkAtoms::span) {
michael@0 654 ++mSpanLevel;
michael@0 655 }
michael@0 656 else if (aTag == nsGkAtoms::blockquote) {
michael@0 657 // Push
michael@0 658 PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
michael@0 659 if (isInCiteBlockquote) {
michael@0 660 EnsureVerticalSpace(0);
michael@0 661 mCiteQuoteLevel++;
michael@0 662 }
michael@0 663 else {
michael@0 664 EnsureVerticalSpace(1);
michael@0 665 mIndent += kTabSize; // Check for some maximum value?
michael@0 666 }
michael@0 667 }
michael@0 668 else if (aTag == nsGkAtoms::q) {
michael@0 669 Write(NS_LITERAL_STRING("\""));
michael@0 670 }
michael@0 671
michael@0 672 // Else make sure we'll separate block level tags,
michael@0 673 // even if we're about to leave, before doing any other formatting.
michael@0 674 else if (nsContentUtils::IsHTMLBlock(aTag)) {
michael@0 675 EnsureVerticalSpace(0);
michael@0 676 }
michael@0 677
michael@0 678 //////////////////////////////////////////////////////////////
michael@0 679 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0 680 return NS_OK;
michael@0 681 }
michael@0 682 //////////////////////////////////////////////////////////////
michael@0 683 // The rest of this routine is formatted output stuff,
michael@0 684 // which we should skip if we're not formatted:
michael@0 685 //////////////////////////////////////////////////////////////
michael@0 686
michael@0 687 // Push on stack
michael@0 688 bool currentNodeIsConverted = IsCurrentNodeConverted();
michael@0 689
michael@0 690 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
michael@0 691 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
michael@0 692 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
michael@0 693 {
michael@0 694 EnsureVerticalSpace(2);
michael@0 695 if (mHeaderStrategy == 2) { // numbered
michael@0 696 mIndent += kIndentSizeHeaders;
michael@0 697 // Caching
michael@0 698 int32_t level = HeaderLevel(aTag);
michael@0 699 // Increase counter for current level
michael@0 700 mHeaderCounter[level]++;
michael@0 701 // Reset all lower levels
michael@0 702 int32_t i;
michael@0 703
michael@0 704 for (i = level + 1; i <= 6; i++) {
michael@0 705 mHeaderCounter[i] = 0;
michael@0 706 }
michael@0 707
michael@0 708 // Construct numbers
michael@0 709 nsAutoString leadup;
michael@0 710 for (i = 1; i <= level; i++) {
michael@0 711 leadup.AppendInt(mHeaderCounter[i]);
michael@0 712 leadup.Append(char16_t('.'));
michael@0 713 }
michael@0 714 leadup.Append(char16_t(' '));
michael@0 715 Write(leadup);
michael@0 716 }
michael@0 717 else if (mHeaderStrategy == 1) { // indent increasingly
michael@0 718 mIndent += kIndentSizeHeaders;
michael@0 719 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
michael@0 720 // for h(x), run x-1 times
michael@0 721 mIndent += kIndentIncrementHeaders;
michael@0 722 }
michael@0 723 }
michael@0 724 }
michael@0 725 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
michael@0 726 nsAutoString url;
michael@0 727 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
michael@0 728 && !url.IsEmpty()) {
michael@0 729 mURL = url;
michael@0 730 }
michael@0 731 }
michael@0 732 else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
michael@0 733 Write(NS_LITERAL_STRING("^"));
michael@0 734 }
michael@0 735 else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
michael@0 736 Write(NS_LITERAL_STRING("_"));
michael@0 737 }
michael@0 738 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
michael@0 739 Write(NS_LITERAL_STRING("|"));
michael@0 740 }
michael@0 741 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
michael@0 742 && mStructs && !currentNodeIsConverted) {
michael@0 743 Write(NS_LITERAL_STRING("*"));
michael@0 744 }
michael@0 745 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
michael@0 746 && mStructs && !currentNodeIsConverted) {
michael@0 747 Write(NS_LITERAL_STRING("/"));
michael@0 748 }
michael@0 749 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
michael@0 750 Write(NS_LITERAL_STRING("_"));
michael@0 751 }
michael@0 752
michael@0 753 /* Container elements are always block elements, so we shouldn't
michael@0 754 output any whitespace immediately after the container tag even if
michael@0 755 there's extra whitespace there because the HTML is pretty-printed
michael@0 756 or something. To ensure that happens, tell the serializer we're
michael@0 757 already in whitespace so it won't output more. */
michael@0 758 mInWhitespace = true;
michael@0 759
michael@0 760 return NS_OK;
michael@0 761 }
michael@0 762
michael@0 763 nsresult
michael@0 764 nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
michael@0 765 {
michael@0 766 if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
michael@0 767 mIgnoredChildNodeLevel--;
michael@0 768 return NS_OK;
michael@0 769 }
michael@0 770
michael@0 771 if (mFlags & nsIDocumentEncoder::OutputRaw) {
michael@0 772 // Raw means raw. Don't even think about doing anything fancy
michael@0 773 // here like indenting, adding line breaks or any other
michael@0 774 // characters such as list item bullets, quote characters
michael@0 775 // around <q>, etc. I mean it! Don't make me smack you!
michael@0 776
michael@0 777 return NS_OK;
michael@0 778 }
michael@0 779
michael@0 780 if (mTagStackIndex > 0) {
michael@0 781 --mTagStackIndex;
michael@0 782 }
michael@0 783
michael@0 784 if (mTagStackIndex >= mIgnoreAboveIndex) {
michael@0 785 if (mTagStackIndex == mIgnoreAboveIndex) {
michael@0 786 // We're dealing with the close tag whose matching
michael@0 787 // open tag had set the mIgnoreAboveIndex value.
michael@0 788 // Reset mIgnoreAboveIndex before discarding this tag.
michael@0 789 mIgnoreAboveIndex = (uint32_t)kNotFound;
michael@0 790 }
michael@0 791 return NS_OK;
michael@0 792 }
michael@0 793
michael@0 794 // End current line if we're ending a block level tag
michael@0 795 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
michael@0 796 // We want the output to end with a new line,
michael@0 797 // but in preformatted areas like text fields,
michael@0 798 // we can't emit newlines that weren't there.
michael@0 799 // So add the newline only in the case of formatted output.
michael@0 800 if (mFlags & nsIDocumentEncoder::OutputFormatted) {
michael@0 801 EnsureVerticalSpace(0);
michael@0 802 }
michael@0 803 else {
michael@0 804 FlushLine();
michael@0 805 }
michael@0 806 // We won't want to do anything with these in formatted mode either,
michael@0 807 // so just return now:
michael@0 808 return NS_OK;
michael@0 809 }
michael@0 810
michael@0 811 // Keep this in sync with DoOpenContainer!
michael@0 812 if (!DoOutput()) {
michael@0 813 return NS_OK;
michael@0 814 }
michael@0 815
michael@0 816 if (aTag == nsGkAtoms::tr) {
michael@0 817 PopBool(mHasWrittenCellsForRow);
michael@0 818 // Should always end a line, but get no more whitespace
michael@0 819 if (mFloatingLines < 0)
michael@0 820 mFloatingLines = 0;
michael@0 821 mLineBreakDue = true;
michael@0 822 }
michael@0 823 else if (((aTag == nsGkAtoms::li) ||
michael@0 824 (aTag == nsGkAtoms::dt)) &&
michael@0 825 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0 826 // Items that should always end a line, but get no more whitespace
michael@0 827 if (mFloatingLines < 0)
michael@0 828 mFloatingLines = 0;
michael@0 829 mLineBreakDue = true;
michael@0 830 }
michael@0 831 else if (aTag == nsGkAtoms::pre) {
michael@0 832 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
michael@0 833 mLineBreakDue = true;
michael@0 834 }
michael@0 835 else if (aTag == nsGkAtoms::ul) {
michael@0 836 FlushLine();
michael@0 837 mIndent -= kIndentSizeList;
michael@0 838 if (--mULCount + mOLStackIndex == 0) {
michael@0 839 mFloatingLines = 1;
michael@0 840 mLineBreakDue = true;
michael@0 841 }
michael@0 842 }
michael@0 843 else if (aTag == nsGkAtoms::ol) {
michael@0 844 FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
michael@0 845 mIndent -= kIndentSizeList;
michael@0 846 NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
michael@0 847 mOLStackIndex--;
michael@0 848 if (mULCount + mOLStackIndex == 0) {
michael@0 849 mFloatingLines = 1;
michael@0 850 mLineBreakDue = true;
michael@0 851 }
michael@0 852 }
michael@0 853 else if (aTag == nsGkAtoms::dl) {
michael@0 854 mFloatingLines = 1;
michael@0 855 mLineBreakDue = true;
michael@0 856 }
michael@0 857 else if (aTag == nsGkAtoms::dd) {
michael@0 858 FlushLine();
michael@0 859 mIndent -= kIndentSizeDD;
michael@0 860 }
michael@0 861 else if (aTag == nsGkAtoms::span) {
michael@0 862 NS_ASSERTION(mSpanLevel, "Span level will be negative!");
michael@0 863 --mSpanLevel;
michael@0 864 }
michael@0 865 else if (aTag == nsGkAtoms::div) {
michael@0 866 if (mFloatingLines < 0)
michael@0 867 mFloatingLines = 0;
michael@0 868 mLineBreakDue = true;
michael@0 869 }
michael@0 870 else if (aTag == nsGkAtoms::blockquote) {
michael@0 871 FlushLine(); // Is this needed?
michael@0 872
michael@0 873 // Pop
michael@0 874 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
michael@0 875
michael@0 876 if (isInCiteBlockquote) {
michael@0 877 NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
michael@0 878 mCiteQuoteLevel--;
michael@0 879 mFloatingLines = 0;
michael@0 880 mHasWrittenCiteBlockquote = true;
michael@0 881 }
michael@0 882 else {
michael@0 883 mIndent -= kTabSize;
michael@0 884 mFloatingLines = 1;
michael@0 885 }
michael@0 886 mLineBreakDue = true;
michael@0 887 }
michael@0 888 else if (aTag == nsGkAtoms::q) {
michael@0 889 Write(NS_LITERAL_STRING("\""));
michael@0 890 }
michael@0 891 else if (nsContentUtils::IsHTMLBlock(aTag)
michael@0 892 && aTag != nsGkAtoms::script) {
michael@0 893 // All other blocks get 1 vertical space after them
michael@0 894 // in formatted mode, otherwise 0.
michael@0 895 // This is hard. Sometimes 0 is a better number, but
michael@0 896 // how to know?
michael@0 897 if (mFlags & nsIDocumentEncoder::OutputFormatted)
michael@0 898 EnsureVerticalSpace(1);
michael@0 899 else {
michael@0 900 if (mFloatingLines < 0)
michael@0 901 mFloatingLines = 0;
michael@0 902 mLineBreakDue = true;
michael@0 903 }
michael@0 904 }
michael@0 905
michael@0 906 //////////////////////////////////////////////////////////////
michael@0 907 if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0 908 return NS_OK;
michael@0 909 }
michael@0 910 //////////////////////////////////////////////////////////////
michael@0 911 // The rest of this routine is formatted output stuff,
michael@0 912 // which we should skip if we're not formatted:
michael@0 913 //////////////////////////////////////////////////////////////
michael@0 914
michael@0 915 // Pop the currentConverted stack
michael@0 916 bool currentNodeIsConverted = IsCurrentNodeConverted();
michael@0 917
michael@0 918 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
michael@0 919 aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
michael@0 920 aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
michael@0 921
michael@0 922 if (mHeaderStrategy) { /*numbered or indent increasingly*/
michael@0 923 mIndent -= kIndentSizeHeaders;
michael@0 924 }
michael@0 925 if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
michael@0 926 for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
michael@0 927 // for h(x), run x-1 times
michael@0 928 mIndent -= kIndentIncrementHeaders;
michael@0 929 }
michael@0 930 }
michael@0 931 EnsureVerticalSpace(1);
michael@0 932 }
michael@0 933 else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
michael@0 934 nsAutoString temp;
michael@0 935 temp.AssignLiteral(" <");
michael@0 936 temp += mURL;
michael@0 937 temp.Append(char16_t('>'));
michael@0 938 Write(temp);
michael@0 939 mURL.Truncate();
michael@0 940 }
michael@0 941 else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
michael@0 942 && mStructs && !currentNodeIsConverted) {
michael@0 943 Write(kSpace);
michael@0 944 }
michael@0 945 else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
michael@0 946 Write(NS_LITERAL_STRING("|"));
michael@0 947 }
michael@0 948 else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
michael@0 949 && mStructs && !currentNodeIsConverted) {
michael@0 950 Write(NS_LITERAL_STRING("*"));
michael@0 951 }
michael@0 952 else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
michael@0 953 && mStructs && !currentNodeIsConverted) {
michael@0 954 Write(NS_LITERAL_STRING("/"));
michael@0 955 }
michael@0 956 else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
michael@0 957 Write(NS_LITERAL_STRING("_"));
michael@0 958 }
michael@0 959
michael@0 960 return NS_OK;
michael@0 961 }
michael@0 962
michael@0 963 bool
michael@0 964 nsPlainTextSerializer::MustSuppressLeaf()
michael@0 965 {
michael@0 966 if (mIgnoredChildNodeLevel > 0) {
michael@0 967 return true;
michael@0 968 }
michael@0 969
michael@0 970 if ((mTagStackIndex > 1 &&
michael@0 971 mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
michael@0 972 (mTagStackIndex > 0 &&
michael@0 973 mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
michael@0 974 // Don't output the contents of SELECT elements;
michael@0 975 // Might be nice, eventually, to output just the selected element.
michael@0 976 // Read more in bug 31994.
michael@0 977 return true;
michael@0 978 }
michael@0 979
michael@0 980 if (mTagStackIndex > 0 &&
michael@0 981 (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
michael@0 982 mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
michael@0 983 // Don't output the contents of <script> or <style> tags;
michael@0 984 return true;
michael@0 985 }
michael@0 986
michael@0 987 return false;
michael@0 988 }
michael@0 989
michael@0 990 void
michael@0 991 nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
michael@0 992 {
michael@0 993 // If we don't want any output, just return
michael@0 994 if (!DoOutput()) {
michael@0 995 return;
michael@0 996 }
michael@0 997
michael@0 998 if (!aIsLineBreak) {
michael@0 999 // Make sure to reset this, since it's no longer true.
michael@0 1000 mHasWrittenCiteBlockquote = false;
michael@0 1001 }
michael@0 1002
michael@0 1003 if (mLineBreakDue)
michael@0 1004 EnsureVerticalSpace(mFloatingLines);
michael@0 1005
michael@0 1006 if (MustSuppressLeaf()) {
michael@0 1007 return;
michael@0 1008 }
michael@0 1009
michael@0 1010 if (aIsLineBreak) {
michael@0 1011 // The only times we want to pass along whitespace from the original
michael@0 1012 // html source are if we're forced into preformatted mode via flags,
michael@0 1013 // or if we're prettyprinting and we're inside a <pre>.
michael@0 1014 // Otherwise, either we're collapsing to minimal text, or we're
michael@0 1015 // prettyprinting to mimic the html format, and in neither case
michael@0 1016 // does the formatting of the html source help us.
michael@0 1017 if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
michael@0 1018 (mPreFormatted && !mWrapColumn) ||
michael@0 1019 IsInPre()) {
michael@0 1020 EnsureVerticalSpace(mEmptyLines+1);
michael@0 1021 }
michael@0 1022 else if (!mInWhitespace) {
michael@0 1023 Write(kSpace);
michael@0 1024 mInWhitespace = true;
michael@0 1025 }
michael@0 1026 return;
michael@0 1027 }
michael@0 1028
michael@0 1029 /* Check, if we are in a link (symbolized with mURL containing the URL)
michael@0 1030 and the text is equal to the URL. In that case we don't want to output
michael@0 1031 the URL twice so we scrap the text in mURL. */
michael@0 1032 if (!mURL.IsEmpty() && mURL.Equals(aText)) {
michael@0 1033 mURL.Truncate();
michael@0 1034 }
michael@0 1035 Write(aText);
michael@0 1036 }
michael@0 1037
michael@0 1038 nsresult
michael@0 1039 nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
michael@0 1040 {
michael@0 1041 // If we don't want any output, just return
michael@0 1042 if (!DoOutput()) {
michael@0 1043 return NS_OK;
michael@0 1044 }
michael@0 1045
michael@0 1046 if (mLineBreakDue)
michael@0 1047 EnsureVerticalSpace(mFloatingLines);
michael@0 1048
michael@0 1049 if (MustSuppressLeaf()) {
michael@0 1050 return NS_OK;
michael@0 1051 }
michael@0 1052
michael@0 1053 if (aTag == nsGkAtoms::br) {
michael@0 1054 // Another egregious editor workaround, see bug 38194:
michael@0 1055 // ignore the bogus br tags that the editor sticks here and there.
michael@0 1056 nsAutoString tagAttr;
michael@0 1057 if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
michael@0 1058 || !tagAttr.EqualsLiteral("_moz")) {
michael@0 1059 EnsureVerticalSpace(mEmptyLines+1);
michael@0 1060 }
michael@0 1061 }
michael@0 1062 else if (aTag == nsGkAtoms::hr &&
michael@0 1063 (mFlags & nsIDocumentEncoder::OutputFormatted)) {
michael@0 1064 EnsureVerticalSpace(0);
michael@0 1065
michael@0 1066 // Make a line of dashes as wide as the wrap width
michael@0 1067 // XXX honoring percentage would be nice
michael@0 1068 nsAutoString line;
michael@0 1069 uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
michael@0 1070 while (line.Length() < width) {
michael@0 1071 line.Append(char16_t('-'));
michael@0 1072 }
michael@0 1073 Write(line);
michael@0 1074
michael@0 1075 EnsureVerticalSpace(0);
michael@0 1076 }
michael@0 1077 else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
michael@0 1078 Write(NS_LITERAL_STRING("\xFFFC"));
michael@0 1079 }
michael@0 1080 else if (aTag == nsGkAtoms::img) {
michael@0 1081 /* Output (in decreasing order of preference)
michael@0 1082 alt, title or nothing */
michael@0 1083 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
michael@0 1084 nsAutoString imageDescription;
michael@0 1085 if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
michael@0 1086 imageDescription))) {
michael@0 1087 // If the alt attribute has an empty value (|alt=""|), output nothing
michael@0 1088 }
michael@0 1089 else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
michael@0 1090 imageDescription))
michael@0 1091 && !imageDescription.IsEmpty()) {
michael@0 1092 imageDescription = NS_LITERAL_STRING(" [") +
michael@0 1093 imageDescription +
michael@0 1094 NS_LITERAL_STRING("] ");
michael@0 1095 }
michael@0 1096
michael@0 1097 Write(imageDescription);
michael@0 1098 }
michael@0 1099
michael@0 1100 return NS_OK;
michael@0 1101 }
michael@0 1102
michael@0 1103 /**
michael@0 1104 * Adds as many newline as necessary to get |noOfRows| empty lines
michael@0 1105 *
michael@0 1106 * noOfRows = -1 : Being in the middle of some line of text
michael@0 1107 * noOfRows = 0 : Being at the start of a line
michael@0 1108 * noOfRows = n>0 : Having n empty lines before the current line.
michael@0 1109 */
michael@0 1110 void
michael@0 1111 nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
michael@0 1112 {
michael@0 1113 // If we have something in the indent we probably want to output
michael@0 1114 // it and it's not included in the count for empty lines so we don't
michael@0 1115 // realize that we should start a new line.
michael@0 1116 if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
michael@0 1117 EndLine(false);
michael@0 1118 mInWhitespace = true;
michael@0 1119 }
michael@0 1120
michael@0 1121 while(mEmptyLines < noOfRows) {
michael@0 1122 EndLine(false);
michael@0 1123 mInWhitespace = true;
michael@0 1124 }
michael@0 1125 mLineBreakDue = false;
michael@0 1126 mFloatingLines = -1;
michael@0 1127 }
michael@0 1128
michael@0 1129 /**
michael@0 1130 * This empties the current line cache without adding a NEWLINE.
michael@0 1131 * Should not be used if line wrapping is of importance since
michael@0 1132 * this function destroys the cache information.
michael@0 1133 *
michael@0 1134 * It will also write indentation and quotes if we believe us to be
michael@0 1135 * at the start of the line.
michael@0 1136 */
michael@0 1137 void
michael@0 1138 nsPlainTextSerializer::FlushLine()
michael@0 1139 {
michael@0 1140 if (!mCurrentLine.IsEmpty()) {
michael@0 1141 if (mAtFirstColumn) {
michael@0 1142 OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
michael@0 1143 }
michael@0 1144
michael@0 1145 Output(mCurrentLine);
michael@0 1146 mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
michael@0 1147 mCurrentLine.Truncate();
michael@0 1148 mCurrentLineWidth = 0;
michael@0 1149 }
michael@0 1150 }
michael@0 1151
michael@0 1152 /**
michael@0 1153 * Prints the text to output to our current output device (the string mOutputString).
michael@0 1154 * The only logic here is to replace non breaking spaces with a normal space since
michael@0 1155 * most (all?) receivers of the result won't understand the nbsp and even be
michael@0 1156 * confused by it.
michael@0 1157 */
michael@0 1158 void
michael@0 1159 nsPlainTextSerializer::Output(nsString& aString)
michael@0 1160 {
michael@0 1161 if (!aString.IsEmpty()) {
michael@0 1162 mStartedOutput = true;
michael@0 1163 }
michael@0 1164
michael@0 1165 if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
michael@0 1166 // First, replace all nbsp characters with spaces,
michael@0 1167 // which the unicode encoder won't do for us.
michael@0 1168 aString.ReplaceChar(kNBSP, kSPACE);
michael@0 1169 }
michael@0 1170 mOutputString->Append(aString);
michael@0 1171 }
michael@0 1172
michael@0 1173 static bool
michael@0 1174 IsSpaceStuffable(const char16_t *s)
michael@0 1175 {
michael@0 1176 if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
michael@0 1177 nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0)
michael@0 1178 return true;
michael@0 1179 else
michael@0 1180 return false;
michael@0 1181 }
michael@0 1182
michael@0 1183 /**
michael@0 1184 * This function adds a piece of text to the current stored line. If we are
michael@0 1185 * wrapping text and the stored line will become too long, a suitable
michael@0 1186 * location to wrap will be found and the line that's complete will be
michael@0 1187 * output.
michael@0 1188 */
michael@0 1189 void
michael@0 1190 nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment,
michael@0 1191 int32_t aLineFragmentLength)
michael@0 1192 {
michael@0 1193 uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
michael@0 1194
michael@0 1195 if (mLineBreakDue)
michael@0 1196 EnsureVerticalSpace(mFloatingLines);
michael@0 1197
michael@0 1198 int32_t linelength = mCurrentLine.Length();
michael@0 1199 if (0 == linelength) {
michael@0 1200 if (0 == aLineFragmentLength) {
michael@0 1201 // Nothing at all. Are you kidding me?
michael@0 1202 return;
michael@0 1203 }
michael@0 1204
michael@0 1205 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
michael@0 1206 if (IsSpaceStuffable(aLineFragment)
michael@0 1207 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
michael@0 1208 )
michael@0 1209 {
michael@0 1210 // Space stuffing a la RFC 2646 (format=flowed).
michael@0 1211 mCurrentLine.Append(char16_t(' '));
michael@0 1212
michael@0 1213 if (MayWrap()) {
michael@0 1214 mCurrentLineWidth += GetUnicharWidth(' ');
michael@0 1215 #ifdef DEBUG_wrapping
michael@0 1216 NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
michael@0 1217 mCurrentLine.Length()) ==
michael@0 1218 (int32_t)mCurrentLineWidth,
michael@0 1219 "mCurrentLineWidth and reality out of sync!");
michael@0 1220 #endif
michael@0 1221 }
michael@0 1222 }
michael@0 1223 }
michael@0 1224 mEmptyLines=-1;
michael@0 1225 }
michael@0 1226
michael@0 1227 mCurrentLine.Append(aLineFragment, aLineFragmentLength);
michael@0 1228 if (MayWrap()) {
michael@0 1229 mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
michael@0 1230 aLineFragmentLength);
michael@0 1231 #ifdef DEBUG_wrapping
michael@0 1232 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
michael@0 1233 mCurrentLine.Length()) ==
michael@0 1234 (int32_t)mCurrentLineWidth,
michael@0 1235 "mCurrentLineWidth and reality out of sync!");
michael@0 1236 #endif
michael@0 1237 }
michael@0 1238
michael@0 1239 linelength = mCurrentLine.Length();
michael@0 1240
michael@0 1241 // Wrap?
michael@0 1242 if (MayWrap())
michael@0 1243 {
michael@0 1244 #ifdef DEBUG_wrapping
michael@0 1245 NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
michael@0 1246 mCurrentLine.Length()) ==
michael@0 1247 (int32_t)mCurrentLineWidth,
michael@0 1248 "mCurrentLineWidth and reality out of sync!");
michael@0 1249 #endif
michael@0 1250 // Yes, wrap!
michael@0 1251 // The "+4" is to avoid wrap lines that only would be a couple
michael@0 1252 // of letters too long. We give this bonus only if the
michael@0 1253 // wrapcolumn is more than 20.
michael@0 1254 uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
michael@0 1255
michael@0 1256 // XXX: Should calculate prefixwidth with GetUnicharStringWidth
michael@0 1257 while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
michael@0 1258 // We go from the end removing one letter at a time until
michael@0 1259 // we have a reasonable width
michael@0 1260 int32_t goodSpace = mCurrentLine.Length();
michael@0 1261 uint32_t width = mCurrentLineWidth;
michael@0 1262 while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
michael@0 1263 goodSpace--;
michael@0 1264 width -= GetUnicharWidth(mCurrentLine[goodSpace]);
michael@0 1265 }
michael@0 1266
michael@0 1267 goodSpace++;
michael@0 1268
michael@0 1269 if (mLineBreaker) {
michael@0 1270 goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
michael@0 1271 mCurrentLine.Length(), goodSpace);
michael@0 1272 if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
michael@0 1273 nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
michael@0 1274 --goodSpace; // adjust the position since line breaker returns a position next to space
michael@0 1275 }
michael@0 1276 }
michael@0 1277 // fallback if the line breaker is unavailable or failed
michael@0 1278 if (!mLineBreaker) {
michael@0 1279 goodSpace = mWrapColumn-prefixwidth;
michael@0 1280 while (goodSpace >= 0 &&
michael@0 1281 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
michael@0 1282 goodSpace--;
michael@0 1283 }
michael@0 1284 }
michael@0 1285
michael@0 1286 nsAutoString restOfLine;
michael@0 1287 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
michael@0 1288 // If we don't found a good place to break, accept long line and
michael@0 1289 // try to find another place to break
michael@0 1290 goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
michael@0 1291 if (mLineBreaker) {
michael@0 1292 if ((uint32_t)goodSpace < mCurrentLine.Length())
michael@0 1293 goodSpace = mLineBreaker->Next(mCurrentLine.get(),
michael@0 1294 mCurrentLine.Length(), goodSpace);
michael@0 1295 if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
michael@0 1296 goodSpace = mCurrentLine.Length();
michael@0 1297 }
michael@0 1298 // fallback if the line breaker is unavailable or failed
michael@0 1299 if (!mLineBreaker) {
michael@0 1300 goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
michael@0 1301 while (goodSpace < linelength &&
michael@0 1302 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
michael@0 1303 goodSpace++;
michael@0 1304 }
michael@0 1305 }
michael@0 1306 }
michael@0 1307
michael@0 1308 if ((goodSpace < linelength) && (goodSpace > 0)) {
michael@0 1309 // Found a place to break
michael@0 1310
michael@0 1311 // -1 (trim a char at the break position)
michael@0 1312 // only if the line break was a space.
michael@0 1313 if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
michael@0 1314 mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
michael@0 1315 }
michael@0 1316 else {
michael@0 1317 mCurrentLine.Right(restOfLine, linelength-goodSpace);
michael@0 1318 }
michael@0 1319 // if breaker was U+0020, it has to consider for delsp=yes support
michael@0 1320 bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
michael@0 1321 mCurrentLine.Truncate(goodSpace);
michael@0 1322 EndLine(true, breakBySpace);
michael@0 1323 mCurrentLine.Truncate();
michael@0 1324 // Space stuff new line?
michael@0 1325 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
michael@0 1326 if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
michael@0 1327 && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
michael@0 1328 )
michael@0 1329 {
michael@0 1330 // Space stuffing a la RFC 2646 (format=flowed).
michael@0 1331 mCurrentLine.Append(char16_t(' '));
michael@0 1332 //XXX doesn't seem to work correctly for ' '
michael@0 1333 }
michael@0 1334 }
michael@0 1335 mCurrentLine.Append(restOfLine);
michael@0 1336 mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
michael@0 1337 mCurrentLine.Length());
michael@0 1338 linelength = mCurrentLine.Length();
michael@0 1339 mEmptyLines = -1;
michael@0 1340 }
michael@0 1341 else {
michael@0 1342 // Nothing to do. Hopefully we get more data later
michael@0 1343 // to use for a place to break line
michael@0 1344 break;
michael@0 1345 }
michael@0 1346 }
michael@0 1347 }
michael@0 1348 else {
michael@0 1349 // No wrapping.
michael@0 1350 }
michael@0 1351 }
michael@0 1352
michael@0 1353 /**
michael@0 1354 * Outputs the contents of mCurrentLine, and resets line specific
michael@0 1355 * variables. Also adds an indentation and prefix if there is
michael@0 1356 * one specified. Strips ending spaces from the line if it isn't
michael@0 1357 * preformatted.
michael@0 1358 */
michael@0 1359 void
michael@0 1360 nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
michael@0 1361 {
michael@0 1362 uint32_t currentlinelength = mCurrentLine.Length();
michael@0 1363
michael@0 1364 if (aSoftlinebreak && 0 == currentlinelength) {
michael@0 1365 // No meaning
michael@0 1366 return;
michael@0 1367 }
michael@0 1368
michael@0 1369 /* In non-preformatted mode, remove spaces from the end of the line for
michael@0 1370 * format=flowed compatibility. Don't do this for these special cases:
michael@0 1371 * "-- ", the signature separator (RFC 2646) shouldn't be touched and
michael@0 1372 * "- -- ", the OpenPGP dash-escaped signature separator in inline
michael@0 1373 * signed messages according to the OpenPGP standard (RFC 2440).
michael@0 1374 */
michael@0 1375 if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
michael@0 1376 !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
michael@0 1377 (aSoftlinebreak ||
michael@0 1378 !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
michael@0 1379 // Remove spaces from the end of the line.
michael@0 1380 while(currentlinelength > 0 &&
michael@0 1381 mCurrentLine[currentlinelength-1] == ' ') {
michael@0 1382 --currentlinelength;
michael@0 1383 }
michael@0 1384 mCurrentLine.SetLength(currentlinelength);
michael@0 1385 }
michael@0 1386
michael@0 1387 if (aSoftlinebreak &&
michael@0 1388 (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
michael@0 1389 (mIndent == 0)) {
michael@0 1390 // Add the soft part of the soft linebreak (RFC 2646 4.1)
michael@0 1391 // We only do this when there is no indentation since format=flowed
michael@0 1392 // lines and indentation doesn't work well together.
michael@0 1393
michael@0 1394 // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
michael@0 1395 // add twice space.
michael@0 1396 if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
michael@0 1397 mCurrentLine.Append(NS_LITERAL_STRING(" "));
michael@0 1398 else
michael@0 1399 mCurrentLine.Append(char16_t(' '));
michael@0 1400 }
michael@0 1401
michael@0 1402 if (aSoftlinebreak) {
michael@0 1403 mEmptyLines=0;
michael@0 1404 }
michael@0 1405 else {
michael@0 1406 // Hard break
michael@0 1407 if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
michael@0 1408 mEmptyLines=-1;
michael@0 1409 }
michael@0 1410
michael@0 1411 mEmptyLines++;
michael@0 1412 }
michael@0 1413
michael@0 1414 if (mAtFirstColumn) {
michael@0 1415 // If we don't have anything "real" to output we have to
michael@0 1416 // make sure the indent doesn't end in a space since that
michael@0 1417 // would trick a format=flowed-aware receiver.
michael@0 1418 bool stripTrailingSpaces = mCurrentLine.IsEmpty();
michael@0 1419 OutputQuotesAndIndent(stripTrailingSpaces);
michael@0 1420 }
michael@0 1421
michael@0 1422 mCurrentLine.Append(mLineBreak);
michael@0 1423 Output(mCurrentLine);
michael@0 1424 mCurrentLine.Truncate();
michael@0 1425 mCurrentLineWidth = 0;
michael@0 1426 mAtFirstColumn=true;
michael@0 1427 mInWhitespace=true;
michael@0 1428 mLineBreakDue = false;
michael@0 1429 mFloatingLines = -1;
michael@0 1430 }
michael@0 1431
michael@0 1432
michael@0 1433 /**
michael@0 1434 * Outputs the calculated and stored indent and text in the indentation. That is
michael@0 1435 * quote chars and numbers for numbered lists and such. It will also reset any
michael@0 1436 * stored text to put in the indentation after using it.
michael@0 1437 */
michael@0 1438 void
michael@0 1439 nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
michael@0 1440 {
michael@0 1441 nsAutoString stringToOutput;
michael@0 1442
michael@0 1443 // Put the mail quote "> " chars in, if appropriate:
michael@0 1444 if (mCiteQuoteLevel > 0) {
michael@0 1445 nsAutoString quotes;
michael@0 1446 for(int i=0; i < mCiteQuoteLevel; i++) {
michael@0 1447 quotes.Append(char16_t('>'));
michael@0 1448 }
michael@0 1449 if (!mCurrentLine.IsEmpty()) {
michael@0 1450 /* Better don't output a space here, if the line is empty,
michael@0 1451 in case a receiving f=f-aware UA thinks, this were a flowed line,
michael@0 1452 which it isn't - it's just empty.
michael@0 1453 (Flowed lines may be joined with the following one,
michael@0 1454 so the empty line may be lost completely.) */
michael@0 1455 quotes.Append(char16_t(' '));
michael@0 1456 }
michael@0 1457 stringToOutput = quotes;
michael@0 1458 mAtFirstColumn = false;
michael@0 1459 }
michael@0 1460
michael@0 1461 // Indent if necessary
michael@0 1462 int32_t indentwidth = mIndent - mInIndentString.Length();
michael@0 1463 if (indentwidth > 0
michael@0 1464 && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
michael@0 1465 // Don't make empty lines look flowed
michael@0 1466 ) {
michael@0 1467 nsAutoString spaces;
michael@0 1468 for (int i=0; i < indentwidth; ++i)
michael@0 1469 spaces.Append(char16_t(' '));
michael@0 1470 stringToOutput += spaces;
michael@0 1471 mAtFirstColumn = false;
michael@0 1472 }
michael@0 1473
michael@0 1474 if (!mInIndentString.IsEmpty()) {
michael@0 1475 stringToOutput += mInIndentString;
michael@0 1476 mAtFirstColumn = false;
michael@0 1477 mInIndentString.Truncate();
michael@0 1478 }
michael@0 1479
michael@0 1480 if (stripTrailingSpaces) {
michael@0 1481 int32_t lineLength = stringToOutput.Length();
michael@0 1482 while(lineLength > 0 &&
michael@0 1483 ' ' == stringToOutput[lineLength-1]) {
michael@0 1484 --lineLength;
michael@0 1485 }
michael@0 1486 stringToOutput.SetLength(lineLength);
michael@0 1487 }
michael@0 1488
michael@0 1489 if (!stringToOutput.IsEmpty()) {
michael@0 1490 Output(stringToOutput);
michael@0 1491 }
michael@0 1492
michael@0 1493 }
michael@0 1494
michael@0 1495 /**
michael@0 1496 * Write a string. This is the highlevel function to use to get text output.
michael@0 1497 * By using AddToLine, Output, EndLine and other functions it handles quotation,
michael@0 1498 * line wrapping, indentation, whitespace compression and other things.
michael@0 1499 */
michael@0 1500 void
michael@0 1501 nsPlainTextSerializer::Write(const nsAString& aStr)
michael@0 1502 {
michael@0 1503 // XXX Copy necessary to use nsString methods and gain
michael@0 1504 // access to underlying buffer
michael@0 1505 nsAutoString str(aStr);
michael@0 1506
michael@0 1507 #ifdef DEBUG_wrapping
michael@0 1508 printf("Write(%s): wrap col = %d\n",
michael@0 1509 NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
michael@0 1510 #endif
michael@0 1511
michael@0 1512 int32_t bol = 0;
michael@0 1513 int32_t newline;
michael@0 1514
michael@0 1515 int32_t totLen = str.Length();
michael@0 1516
michael@0 1517 // If the string is empty, do nothing:
michael@0 1518 if (totLen <= 0) return;
michael@0 1519
michael@0 1520 // For Flowed text change nbsp-ses to spaces at end of lines to allow them
michael@0 1521 // to be cut off along with usual spaces if required. (bug #125928)
michael@0 1522 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
michael@0 1523 for (int32_t i = totLen-1; i >= 0; i--) {
michael@0 1524 char16_t c = str[i];
michael@0 1525 if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
michael@0 1526 continue;
michael@0 1527 if (kNBSP == c)
michael@0 1528 str.Replace(i, 1, ' ');
michael@0 1529 else
michael@0 1530 break;
michael@0 1531 }
michael@0 1532 }
michael@0 1533
michael@0 1534 // We have two major codepaths here. One that does preformatted text and one
michael@0 1535 // that does normal formatted text. The one for preformatted text calls
michael@0 1536 // Output directly while the other code path goes through AddToLine.
michael@0 1537 if ((mPreFormatted && !mWrapColumn) || IsInPre()
michael@0 1538 || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
michael@0 1539 && mEmptyLines >= 0 && str.First() == char16_t('>'))) {
michael@0 1540 // No intelligent wrapping.
michael@0 1541
michael@0 1542 // This mustn't be mixed with intelligent wrapping without clearing
michael@0 1543 // the mCurrentLine buffer before!!!
michael@0 1544 NS_ASSERTION(mCurrentLine.IsEmpty(),
michael@0 1545 "Mixed wrapping data and nonwrapping data on the same line");
michael@0 1546 if (!mCurrentLine.IsEmpty()) {
michael@0 1547 FlushLine();
michael@0 1548 }
michael@0 1549
michael@0 1550 // Put the mail quote "> " chars in, if appropriate.
michael@0 1551 // Have to put it in before every line.
michael@0 1552 while(bol<totLen) {
michael@0 1553 bool outputQuotes = mAtFirstColumn;
michael@0 1554 bool atFirstColumn = mAtFirstColumn;
michael@0 1555 bool outputLineBreak = false;
michael@0 1556 bool spacesOnly = true;
michael@0 1557
michael@0 1558 // Find one of '\n' or '\r' using iterators since nsAString
michael@0 1559 // doesn't have the old FindCharInSet function.
michael@0 1560 nsAString::const_iterator iter; str.BeginReading(iter);
michael@0 1561 nsAString::const_iterator done_searching; str.EndReading(done_searching);
michael@0 1562 iter.advance(bol);
michael@0 1563 int32_t new_newline = bol;
michael@0 1564 newline = kNotFound;
michael@0 1565 while(iter != done_searching) {
michael@0 1566 if ('\n' == *iter || '\r' == *iter) {
michael@0 1567 newline = new_newline;
michael@0 1568 break;
michael@0 1569 }
michael@0 1570 if (' ' != *iter)
michael@0 1571 spacesOnly = false;
michael@0 1572 ++new_newline;
michael@0 1573 ++iter;
michael@0 1574 }
michael@0 1575
michael@0 1576 // Done searching
michael@0 1577 nsAutoString stringpart;
michael@0 1578 if (newline == kNotFound) {
michael@0 1579 // No new lines.
michael@0 1580 stringpart.Assign(Substring(str, bol, totLen - bol));
michael@0 1581 if (!stringpart.IsEmpty()) {
michael@0 1582 char16_t lastchar = stringpart[stringpart.Length()-1];
michael@0 1583 if ((lastchar == '\t') || (lastchar == ' ') ||
michael@0 1584 (lastchar == '\r') ||(lastchar == '\n')) {
michael@0 1585 mInWhitespace = true;
michael@0 1586 }
michael@0 1587 else {
michael@0 1588 mInWhitespace = false;
michael@0 1589 }
michael@0 1590 }
michael@0 1591 mEmptyLines=-1;
michael@0 1592 atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
michael@0 1593 bol = totLen;
michael@0 1594 }
michael@0 1595 else {
michael@0 1596 // There is a newline
michael@0 1597 stringpart.Assign(Substring(str, bol, newline-bol));
michael@0 1598 mInWhitespace = true;
michael@0 1599 outputLineBreak = true;
michael@0 1600 mEmptyLines=0;
michael@0 1601 atFirstColumn = true;
michael@0 1602 bol = newline+1;
michael@0 1603 if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
michael@0 1604 // There was a CRLF in the input. This used to be illegal and
michael@0 1605 // stripped by the parser. Apparently not anymore. Let's skip
michael@0 1606 // over the LF.
michael@0 1607 bol++;
michael@0 1608 }
michael@0 1609 }
michael@0 1610
michael@0 1611 mCurrentLine.AssignLiteral("");
michael@0 1612 if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
michael@0 1613 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
michael@0 1614 !stringpart.EqualsLiteral("-- ") &&
michael@0 1615 !stringpart.EqualsLiteral("- -- "))
michael@0 1616 stringpart.Trim(" ", false, true, true);
michael@0 1617 if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
michael@0 1618 mCurrentLine.Append(char16_t(' '));
michael@0 1619 }
michael@0 1620 mCurrentLine.Append(stringpart);
michael@0 1621
michael@0 1622 if (outputQuotes) {
michael@0 1623 // Note: this call messes with mAtFirstColumn
michael@0 1624 OutputQuotesAndIndent();
michael@0 1625 }
michael@0 1626
michael@0 1627 Output(mCurrentLine);
michael@0 1628 if (outputLineBreak) {
michael@0 1629 Output(mLineBreak);
michael@0 1630 }
michael@0 1631 mAtFirstColumn = atFirstColumn;
michael@0 1632 }
michael@0 1633
michael@0 1634 // Reset mCurrentLine.
michael@0 1635 mCurrentLine.Truncate();
michael@0 1636
michael@0 1637 #ifdef DEBUG_wrapping
michael@0 1638 printf("No wrapping: newline is %d, totLen is %d\n",
michael@0 1639 newline, totLen);
michael@0 1640 #endif
michael@0 1641 return;
michael@0 1642 }
michael@0 1643
michael@0 1644 // Intelligent handling of text
michael@0 1645 // If needed, strip out all "end of lines"
michael@0 1646 // and multiple whitespace between words
michael@0 1647 int32_t nextpos;
michael@0 1648 const char16_t * offsetIntoBuffer = nullptr;
michael@0 1649
michael@0 1650 while (bol < totLen) { // Loop over lines
michael@0 1651 // Find a place where we may have to do whitespace compression
michael@0 1652 nextpos = str.FindCharInSet(" \t\n\r", bol);
michael@0 1653 #ifdef DEBUG_wrapping
michael@0 1654 nsAutoString remaining;
michael@0 1655 str.Right(remaining, totLen - bol);
michael@0 1656 foo = ToNewCString(remaining);
michael@0 1657 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
michael@0 1658 // bol, nextpos, totLen, foo);
michael@0 1659 nsMemory::Free(foo);
michael@0 1660 #endif
michael@0 1661
michael@0 1662 if (nextpos == kNotFound) {
michael@0 1663 // The rest of the string
michael@0 1664 offsetIntoBuffer = str.get() + bol;
michael@0 1665 AddToLine(offsetIntoBuffer, totLen-bol);
michael@0 1666 bol=totLen;
michael@0 1667 mInWhitespace=false;
michael@0 1668 }
michael@0 1669 else {
michael@0 1670 // There's still whitespace left in the string
michael@0 1671 if (nextpos != 0 && (nextpos + 1) < totLen) {
michael@0 1672 offsetIntoBuffer = str.get() + nextpos;
michael@0 1673 // skip '\n' if it is between CJ chars
michael@0 1674 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
michael@0 1675 offsetIntoBuffer = str.get() + bol;
michael@0 1676 AddToLine(offsetIntoBuffer, nextpos-bol);
michael@0 1677 bol = nextpos + 1;
michael@0 1678 continue;
michael@0 1679 }
michael@0 1680 }
michael@0 1681 // If we're already in whitespace and not preformatted, just skip it:
michael@0 1682 if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
michael@0 1683 !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
michael@0 1684 // Skip whitespace
michael@0 1685 bol++;
michael@0 1686 continue;
michael@0 1687 }
michael@0 1688
michael@0 1689 if (nextpos == bol) {
michael@0 1690 // Note that we are in whitespace.
michael@0 1691 mInWhitespace = true;
michael@0 1692 offsetIntoBuffer = str.get() + nextpos;
michael@0 1693 AddToLine(offsetIntoBuffer, 1);
michael@0 1694 bol++;
michael@0 1695 continue;
michael@0 1696 }
michael@0 1697
michael@0 1698 mInWhitespace = true;
michael@0 1699
michael@0 1700 offsetIntoBuffer = str.get() + bol;
michael@0 1701 if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
michael@0 1702 // Preserve the real whitespace character
michael@0 1703 nextpos++;
michael@0 1704 AddToLine(offsetIntoBuffer, nextpos-bol);
michael@0 1705 bol = nextpos;
michael@0 1706 }
michael@0 1707 else {
michael@0 1708 // Replace the whitespace with a space
michael@0 1709 AddToLine(offsetIntoBuffer, nextpos-bol);
michael@0 1710 AddToLine(kSpace.get(),1);
michael@0 1711 bol = nextpos + 1; // Let's eat the whitespace
michael@0 1712 }
michael@0 1713 }
michael@0 1714 } // Continue looping over the string
michael@0 1715 }
michael@0 1716
michael@0 1717
michael@0 1718 /**
michael@0 1719 * Gets the value of an attribute in a string. If the function returns
michael@0 1720 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
michael@0 1721 */
michael@0 1722 nsresult
michael@0 1723 nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
michael@0 1724 nsString& aValueRet)
michael@0 1725 {
michael@0 1726 if (mElement) {
michael@0 1727 if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
michael@0 1728 return NS_OK;
michael@0 1729 }
michael@0 1730 }
michael@0 1731
michael@0 1732 return NS_ERROR_NOT_AVAILABLE;
michael@0 1733 }
michael@0 1734
michael@0 1735 /**
michael@0 1736 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
michael@0 1737 * In this case, we should ignore it.
michael@0 1738 */
michael@0 1739 bool
michael@0 1740 nsPlainTextSerializer::IsCurrentNodeConverted()
michael@0 1741 {
michael@0 1742 nsAutoString value;
michael@0 1743 nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
michael@0 1744 return (NS_SUCCEEDED(rv) &&
michael@0 1745 (value.EqualsIgnoreCase("moz-txt", 7) ||
michael@0 1746 value.EqualsIgnoreCase("\"moz-txt", 8)));
michael@0 1747 }
michael@0 1748
michael@0 1749
michael@0 1750 // static
michael@0 1751 nsIAtom*
michael@0 1752 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
michael@0 1753 {
michael@0 1754 if (!aContent->IsHTML()) {
michael@0 1755 return nullptr;
michael@0 1756 }
michael@0 1757
michael@0 1758 nsIAtom* localName = aContent->Tag();
michael@0 1759 return localName->IsStaticAtom() ? localName : nullptr;
michael@0 1760 }
michael@0 1761
michael@0 1762 /**
michael@0 1763 * Returns true if we currently are inside a <pre>. The check is done
michael@0 1764 * by traversing the tag stack looking for <pre> until we hit a block
michael@0 1765 * level tag which is assumed to override any <pre>:s below it in
michael@0 1766 * the stack. To do this correctly to a 100% would require access
michael@0 1767 * to style which we don't support in this converter.
michael@0 1768 */
michael@0 1769 bool
michael@0 1770 nsPlainTextSerializer::IsInPre()
michael@0 1771 {
michael@0 1772 int32_t i = mTagStackIndex;
michael@0 1773 while(i > 0) {
michael@0 1774 if (mTagStack[i - 1] == nsGkAtoms::pre)
michael@0 1775 return true;
michael@0 1776 if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
michael@0 1777 // We assume that every other block overrides a <pre>
michael@0 1778 return false;
michael@0 1779 }
michael@0 1780 --i;
michael@0 1781 }
michael@0 1782
michael@0 1783 // Not a <pre> in the whole stack
michael@0 1784 return false;
michael@0 1785 }
michael@0 1786
michael@0 1787 /**
michael@0 1788 * This method is required only to identify LI's inside OL.
michael@0 1789 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
michael@0 1790 */
michael@0 1791 bool
michael@0 1792 nsPlainTextSerializer::IsInOL()
michael@0 1793 {
michael@0 1794 int32_t i = mTagStackIndex;
michael@0 1795 while(--i >= 0) {
michael@0 1796 if (mTagStack[i] == nsGkAtoms::ol)
michael@0 1797 return true;
michael@0 1798 if (mTagStack[i] == nsGkAtoms::ul) {
michael@0 1799 // If a UL is reached first, LI belongs the UL nested in OL.
michael@0 1800 return false;
michael@0 1801 }
michael@0 1802 }
michael@0 1803 // We may reach here for orphan LI's.
michael@0 1804 return false;
michael@0 1805 }
michael@0 1806
michael@0 1807 /*
michael@0 1808 @return 0 = no header, 1 = h1, ..., 6 = h6
michael@0 1809 */
michael@0 1810 int32_t HeaderLevel(nsIAtom* aTag)
michael@0 1811 {
michael@0 1812 if (aTag == nsGkAtoms::h1) {
michael@0 1813 return 1;
michael@0 1814 }
michael@0 1815 if (aTag == nsGkAtoms::h2) {
michael@0 1816 return 2;
michael@0 1817 }
michael@0 1818 if (aTag == nsGkAtoms::h3) {
michael@0 1819 return 3;
michael@0 1820 }
michael@0 1821 if (aTag == nsGkAtoms::h4) {
michael@0 1822 return 4;
michael@0 1823 }
michael@0 1824 if (aTag == nsGkAtoms::h5) {
michael@0 1825 return 5;
michael@0 1826 }
michael@0 1827 if (aTag == nsGkAtoms::h6) {
michael@0 1828 return 6;
michael@0 1829 }
michael@0 1830 return 0;
michael@0 1831 }
michael@0 1832
michael@0 1833
michael@0 1834 /*
michael@0 1835 * This is an implementation of GetUnicharWidth() and
michael@0 1836 * GetUnicharStringWidth() as defined in
michael@0 1837 * "The Single UNIX Specification, Version 2, The Open Group, 1997"
michael@0 1838 * <http://www.UNIX-systems.org/online.html>
michael@0 1839 *
michael@0 1840 * Markus Kuhn -- 2000-02-08 -- public domain
michael@0 1841 *
michael@0 1842 * Minor alterations to fit Mozilla's data types by Daniel Bratell
michael@0 1843 */
michael@0 1844
michael@0 1845 /* These functions define the column width of an ISO 10646 character
michael@0 1846 * as follows:
michael@0 1847 *
michael@0 1848 * - The null character (U+0000) has a column width of 0.
michael@0 1849 *
michael@0 1850 * - Other C0/C1 control characters and DEL will lead to a return
michael@0 1851 * value of -1.
michael@0 1852 *
michael@0 1853 * - Non-spacing and enclosing combining characters (general
michael@0 1854 * category code Mn or Me in the Unicode database) have a
michael@0 1855 * column width of 0.
michael@0 1856 *
michael@0 1857 * - Spacing characters in the East Asian Wide (W) or East Asian
michael@0 1858 * FullWidth (F) category as defined in Unicode Technical
michael@0 1859 * Report #11 have a column width of 2.
michael@0 1860 *
michael@0 1861 * - All remaining characters (including all printable
michael@0 1862 * ISO 8859-1 and WGL4 characters, Unicode control characters,
michael@0 1863 * etc.) have a column width of 1.
michael@0 1864 *
michael@0 1865 * This implementation assumes that wchar_t characters are encoded
michael@0 1866 * in ISO 10646.
michael@0 1867 */
michael@0 1868
michael@0 1869 int32_t GetUnicharWidth(char16_t ucs)
michael@0 1870 {
michael@0 1871 /* sorted list of non-overlapping intervals of non-spacing characters */
michael@0 1872 static const struct interval {
michael@0 1873 uint16_t first;
michael@0 1874 uint16_t last;
michael@0 1875 } combining[] = {
michael@0 1876 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
michael@0 1877 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
michael@0 1878 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
michael@0 1879 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
michael@0 1880 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
michael@0 1881 { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
michael@0 1882 { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
michael@0 1883 { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
michael@0 1884 { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
michael@0 1885 { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
michael@0 1886 { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
michael@0 1887 { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
michael@0 1888 { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
michael@0 1889 { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
michael@0 1890 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
michael@0 1891 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
michael@0 1892 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
michael@0 1893 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
michael@0 1894 { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
michael@0 1895 { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
michael@0 1896 { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
michael@0 1897 { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
michael@0 1898 { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
michael@0 1899 { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
michael@0 1900 { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
michael@0 1901 { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
michael@0 1902 { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
michael@0 1903 { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
michael@0 1904 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
michael@0 1905 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
michael@0 1906 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
michael@0 1907 };
michael@0 1908 int32_t min = 0;
michael@0 1909 int32_t max = sizeof(combining) / sizeof(struct interval) - 1;
michael@0 1910 int32_t mid;
michael@0 1911
michael@0 1912 /* test for 8-bit control characters */
michael@0 1913 if (ucs == 0)
michael@0 1914 return 0;
michael@0 1915 if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
michael@0 1916 return -1;
michael@0 1917
michael@0 1918 /* first quick check for Latin-1 etc. characters */
michael@0 1919 if (ucs < combining[0].first)
michael@0 1920 return 1;
michael@0 1921
michael@0 1922 /* binary search in table of non-spacing characters */
michael@0 1923 while (max >= min) {
michael@0 1924 mid = (min + max) / 2;
michael@0 1925 if (combining[mid].last < ucs)
michael@0 1926 min = mid + 1;
michael@0 1927 else if (combining[mid].first > ucs)
michael@0 1928 max = mid - 1;
michael@0 1929 else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
michael@0 1930 return 0;
michael@0 1931 }
michael@0 1932
michael@0 1933 /* if we arrive here, ucs is not a combining or C0/C1 control character */
michael@0 1934
michael@0 1935 /* fast test for majority of non-wide scripts */
michael@0 1936 if (ucs < 0x1100)
michael@0 1937 return 1;
michael@0 1938
michael@0 1939 return 1 +
michael@0 1940 ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
michael@0 1941 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
michael@0 1942 ucs != 0x303f) || /* CJK ... Yi */
michael@0 1943 (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
michael@0 1944 (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
michael@0 1945 (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
michael@0 1946 (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
michael@0 1947 (ucs >= 0xffe0 && ucs <= 0xffe6));
michael@0 1948 }
michael@0 1949
michael@0 1950
michael@0 1951 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
michael@0 1952 {
michael@0 1953 int32_t w, width = 0;
michael@0 1954
michael@0 1955 for (;*pwcs && n-- > 0; pwcs++)
michael@0 1956 if ((w = GetUnicharWidth(*pwcs)) < 0)
michael@0 1957 ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
michael@0 1958 else
michael@0 1959 width += w;
michael@0 1960
michael@0 1961 return width;
michael@0 1962 }
michael@0 1963

mercurial