content/base/src/nsPlainTextSerializer.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/base/src/nsPlainTextSerializer.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1963 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/*
    1.10 + * nsIContentSerializer implementation that can be used with an
    1.11 + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
    1.12 + * (eg for copy/paste as plaintext).
    1.13 + */
    1.14 +
    1.15 +#include "nsPlainTextSerializer.h"
    1.16 +#include "nsLWBrkCIID.h"
    1.17 +#include "nsIServiceManager.h"
    1.18 +#include "nsGkAtoms.h"
    1.19 +#include "nsNameSpaceManager.h"
    1.20 +#include "nsTextFragment.h"
    1.21 +#include "nsContentUtils.h"
    1.22 +#include "nsReadableUtils.h"
    1.23 +#include "nsUnicharUtils.h"
    1.24 +#include "nsCRT.h"
    1.25 +#include "mozilla/dom/Element.h"
    1.26 +#include "mozilla/Preferences.h"
    1.27 +
    1.28 +using namespace mozilla;
    1.29 +using namespace mozilla::dom;
    1.30 +
    1.31 +#define PREF_STRUCTS "converter.html2txt.structs"
    1.32 +#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
    1.33 +
    1.34 +static const  int32_t kTabSize=4;
    1.35 +static const  int32_t kIndentSizeHeaders = 2;  /* Indention of h1, if
    1.36 +                                                mHeaderStrategy = 1 or = 2.
    1.37 +                                                Indention of other headers
    1.38 +                                                is derived from that.
    1.39 +                                                XXX center h1? */
    1.40 +static const  int32_t kIndentIncrementHeaders = 2;  /* If mHeaderStrategy = 1,
    1.41 +                                                indent h(x+1) this many
    1.42 +                                                columns more than h(x) */
    1.43 +static const  int32_t kIndentSizeList = kTabSize;
    1.44 +                               // Indention of non-first lines of ul and ol
    1.45 +static const  int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
    1.46 +static const  char16_t  kNBSP = 160;
    1.47 +static const  char16_t kSPACE = ' ';
    1.48 +
    1.49 +static int32_t HeaderLevel(nsIAtom* aTag);
    1.50 +static int32_t GetUnicharWidth(char16_t ucs);
    1.51 +static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
    1.52 +
    1.53 +// Someday may want to make this non-const:
    1.54 +static const uint32_t TagStackSize = 500;
    1.55 +static const uint32_t OLStackSize = 100;
    1.56 +
    1.57 +nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
    1.58 +{
    1.59 +  nsPlainTextSerializer* it = new nsPlainTextSerializer();
    1.60 +  if (!it) {
    1.61 +    return NS_ERROR_OUT_OF_MEMORY;
    1.62 +  }
    1.63 +
    1.64 +  return CallQueryInterface(it, aSerializer);
    1.65 +}
    1.66 +
    1.67 +nsPlainTextSerializer::nsPlainTextSerializer()
    1.68 +  : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
    1.69 +{
    1.70 +
    1.71 +  mOutputString = nullptr;
    1.72 +  mHeadLevel = 0;
    1.73 +  mAtFirstColumn = true;
    1.74 +  mIndent = 0;
    1.75 +  mCiteQuoteLevel = 0;
    1.76 +  mStructs = true;       // will be read from prefs later
    1.77 +  mHeaderStrategy = 1 /*indent increasingly*/;   // ditto
    1.78 +  mDontWrapAnyQuotes = false;                 // ditto
    1.79 +  mHasWrittenCiteBlockquote = false;
    1.80 +  mSpanLevel = 0;
    1.81 +  for (int32_t i = 0; i <= 6; i++) {
    1.82 +    mHeaderCounter[i] = 0;
    1.83 +  }
    1.84 +
    1.85 +  // Line breaker
    1.86 +  mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
    1.87 +  mCurrentLineWidth = 0;
    1.88 +
    1.89 +  // Flow
    1.90 +  mEmptyLines = 1; // The start of the document is an "empty line" in itself,
    1.91 +  mInWhitespace = false;
    1.92 +  mPreFormatted = false;
    1.93 +  mStartedOutput = false;
    1.94 +
    1.95 +  // initialize the tag stack to zero:
    1.96 +  // The stack only ever contains pointers to static atoms, so they don't
    1.97 +  // need refcounting.
    1.98 +  mTagStack = new nsIAtom*[TagStackSize];
    1.99 +  mTagStackIndex = 0;
   1.100 +  mIgnoreAboveIndex = (uint32_t)kNotFound;
   1.101 +
   1.102 +  // initialize the OL stack, where numbers for ordered lists are kept
   1.103 +  mOLStack = new int32_t[OLStackSize];
   1.104 +  mOLStackIndex = 0;
   1.105 +
   1.106 +  mULCount = 0;
   1.107 +
   1.108 +  mIgnoredChildNodeLevel = 0;
   1.109 +}
   1.110 +
   1.111 +nsPlainTextSerializer::~nsPlainTextSerializer()
   1.112 +{
   1.113 +  delete[] mTagStack;
   1.114 +  delete[] mOLStack;
   1.115 +  NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
   1.116 +}
   1.117 +
   1.118 +NS_IMPL_ISUPPORTS(nsPlainTextSerializer,
   1.119 +                  nsIContentSerializer)
   1.120 +
   1.121 +
   1.122 +NS_IMETHODIMP 
   1.123 +nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
   1.124 +                            const char* aCharSet, bool aIsCopying,
   1.125 +                            bool aIsWholeDocument)
   1.126 +{
   1.127 +#ifdef DEBUG
   1.128 +  // Check if the major control flags are set correctly.
   1.129 +  if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
   1.130 +    NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
   1.131 +                 "If you want format=flowed, you must combine it with "
   1.132 +                 "nsIDocumentEncoder::OutputFormatted");
   1.133 +  }
   1.134 +
   1.135 +  if (aFlags & nsIDocumentEncoder::OutputFormatted) {
   1.136 +    NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
   1.137 +                 "Can't do formatted and preformatted output at the same time!");
   1.138 +  }
   1.139 +#endif
   1.140 +
   1.141 +  mFlags = aFlags;
   1.142 +  mWrapColumn = aWrapColumn;
   1.143 +
   1.144 +  // Only create a linebreaker if we will handle wrapping.
   1.145 +  if (MayWrap()) {
   1.146 +    mLineBreaker = nsContentUtils::LineBreaker();
   1.147 +  }
   1.148 +
   1.149 +  // Set the line break character:
   1.150 +  if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
   1.151 +      && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
   1.152 +    // Windows
   1.153 +    mLineBreak.AssignLiteral("\r\n");
   1.154 +  }
   1.155 +  else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
   1.156 +    // Mac
   1.157 +    mLineBreak.Assign(char16_t('\r'));
   1.158 +  }
   1.159 +  else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
   1.160 +    // Unix/DOM
   1.161 +    mLineBreak.Assign(char16_t('\n'));
   1.162 +  }
   1.163 +  else {
   1.164 +    // Platform/default
   1.165 +    mLineBreak.AssignLiteral(NS_LINEBREAK);
   1.166 +  }
   1.167 +
   1.168 +  mLineBreakDue = false;
   1.169 +  mFloatingLines = -1;
   1.170 +
   1.171 +  if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   1.172 +    // Get some prefs that controls how we do formatted output
   1.173 +    mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
   1.174 +
   1.175 +    mHeaderStrategy =
   1.176 +      Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
   1.177 +
   1.178 +    // DontWrapAnyQuotes is set according to whether plaintext mail
   1.179 +    // is wrapping to window width -- see bug 134439.
   1.180 +    // We'll only want this if we're wrapping and formatted.
   1.181 +    if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
   1.182 +      mDontWrapAnyQuotes =
   1.183 +        Preferences::GetBool("mail.compose.wrap_to_window_width",
   1.184 +                             mDontWrapAnyQuotes);
   1.185 +    }
   1.186 +  }
   1.187 +
   1.188 +  // XXX We should let the caller pass this in.
   1.189 +  if (Preferences::GetBool("browser.frames.enabled")) {
   1.190 +    mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
   1.191 +  }
   1.192 +  else {
   1.193 +    mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
   1.194 +  }
   1.195 +
   1.196 +  return NS_OK;
   1.197 +}
   1.198 +
   1.199 +bool
   1.200 +nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
   1.201 +{
   1.202 +  uint32_t size = aStack.Length();
   1.203 +  if (size == 0) {
   1.204 +    return false;
   1.205 +  }
   1.206 +  return aStack.ElementAt(size-1);
   1.207 +}
   1.208 +
   1.209 +void
   1.210 +nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
   1.211 +{
   1.212 +  uint32_t size = aStack.Length();
   1.213 +  if (size > 0) {
   1.214 +    aStack.ElementAt(size-1) = aValue;
   1.215 +  }
   1.216 +  else {
   1.217 +    NS_ERROR("There is no \"Last\" value");
   1.218 +  }
   1.219 +}
   1.220 +
   1.221 +void
   1.222 +nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
   1.223 +{
   1.224 +    aStack.AppendElement(bool(aValue));
   1.225 +}
   1.226 +
   1.227 +bool
   1.228 +nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
   1.229 +{
   1.230 +  bool returnValue = false;
   1.231 +  uint32_t size = aStack.Length();
   1.232 +  if (size > 0) {
   1.233 +    returnValue = aStack.ElementAt(size-1);
   1.234 +    aStack.RemoveElementAt(size-1);
   1.235 +  }
   1.236 +  return returnValue;
   1.237 +}
   1.238 +
   1.239 +bool
   1.240 +nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag)
   1.241 +{
   1.242 +  // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
   1.243 +  // non-textual container element should be serialized as placeholder
   1.244 +  // character and its child nodes should be ignored. See bug 895239.
   1.245 +  if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
   1.246 +    return false;
   1.247 +  }
   1.248 +
   1.249 +  return
   1.250 +    (aTag == nsGkAtoms::audio) ||
   1.251 +    (aTag == nsGkAtoms::canvas) ||
   1.252 +    (aTag == nsGkAtoms::iframe) ||
   1.253 +    (aTag == nsGkAtoms::meter) ||
   1.254 +    (aTag == nsGkAtoms::progress) ||
   1.255 +    (aTag == nsGkAtoms::object) ||
   1.256 +    (aTag == nsGkAtoms::svg) ||
   1.257 +    (aTag == nsGkAtoms::video);
   1.258 +}
   1.259 +
   1.260 +NS_IMETHODIMP 
   1.261 +nsPlainTextSerializer::AppendText(nsIContent* aText,
   1.262 +                                  int32_t aStartOffset,
   1.263 +                                  int32_t aEndOffset, 
   1.264 +                                  nsAString& aStr)
   1.265 +{
   1.266 +  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
   1.267 +    return NS_OK;
   1.268 +  }
   1.269 +    
   1.270 +  NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
   1.271 +  if ( aStartOffset < 0 )
   1.272 +    return NS_ERROR_INVALID_ARG;
   1.273 +
   1.274 +  NS_ENSURE_ARG(aText);
   1.275 +
   1.276 +  nsresult rv = NS_OK;
   1.277 +
   1.278 +  nsIContent* content = aText;
   1.279 +  const nsTextFragment* frag;
   1.280 +  if (!content || !(frag = content->GetText())) {
   1.281 +    return NS_ERROR_FAILURE;
   1.282 +  }
   1.283 +  
   1.284 +  int32_t fragLength = frag->GetLength();
   1.285 +  int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
   1.286 +  NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
   1.287 +
   1.288 +  int32_t length = endoffset - aStartOffset;
   1.289 +  if (length <= 0) {
   1.290 +    return NS_OK;
   1.291 +  }
   1.292 +
   1.293 +  nsAutoString textstr;
   1.294 +  if (frag->Is2b()) {
   1.295 +    textstr.Assign(frag->Get2b() + aStartOffset, length);
   1.296 +  }
   1.297 +  else {
   1.298 +    // AssignASCII is for 7-bit character only, so don't use it
   1.299 +    const char *data = frag->Get1b();
   1.300 +    CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
   1.301 +  }
   1.302 +
   1.303 +  mOutputString = &aStr;
   1.304 +
   1.305 +  // We have to split the string across newlines
   1.306 +  // to match parser behavior
   1.307 +  int32_t start = 0;
   1.308 +  int32_t offset = textstr.FindCharInSet("\n\r");
   1.309 +  while (offset != kNotFound) {
   1.310 +
   1.311 +    if (offset>start) {
   1.312 +      // Pass in the line
   1.313 +      DoAddText(false,
   1.314 +                Substring(textstr, start, offset-start));
   1.315 +    }
   1.316 +
   1.317 +    // Pass in a newline
   1.318 +    DoAddText(true, mLineBreak);
   1.319 +    
   1.320 +    start = offset+1;
   1.321 +    offset = textstr.FindCharInSet("\n\r", start);
   1.322 +  }
   1.323 +
   1.324 +  // Consume the last bit of the string if there's any left
   1.325 +  if (start < length) {
   1.326 +    if (start) {
   1.327 +      DoAddText(false, Substring(textstr, start, length - start));
   1.328 +    }
   1.329 +    else {
   1.330 +      DoAddText(false, textstr);
   1.331 +    }
   1.332 +  }
   1.333 +  
   1.334 +  mOutputString = nullptr;
   1.335 +
   1.336 +  return rv;
   1.337 +}
   1.338 +
   1.339 +NS_IMETHODIMP
   1.340 +nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
   1.341 +                                          int32_t aStartOffset,
   1.342 +                                          int32_t aEndOffset,
   1.343 +                                          nsAString& aStr)
   1.344 +{
   1.345 +  return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
   1.346 +}
   1.347 +
   1.348 +NS_IMETHODIMP
   1.349 +nsPlainTextSerializer::AppendElementStart(Element* aElement,
   1.350 +                                          Element* aOriginalElement,
   1.351 +                                          nsAString& aStr)
   1.352 +{
   1.353 +  NS_ENSURE_ARG(aElement);
   1.354 +
   1.355 +  mElement = aElement;
   1.356 +
   1.357 +  nsresult rv;
   1.358 +  nsIAtom* id = GetIdForContent(mElement);
   1.359 +
   1.360 +  bool isContainer = !nsContentUtils::IsHTMLVoid(id);
   1.361 +
   1.362 +  mOutputString = &aStr;
   1.363 +
   1.364 +  if (isContainer) {
   1.365 +    rv = DoOpenContainer(id);
   1.366 +  }
   1.367 +  else {
   1.368 +    rv = DoAddLeaf(id);
   1.369 +  }
   1.370 +
   1.371 +  mElement = nullptr;
   1.372 +  mOutputString = nullptr;
   1.373 +
   1.374 +  if (id == nsGkAtoms::head) {
   1.375 +    ++mHeadLevel;
   1.376 +  }
   1.377 +
   1.378 +  return rv;
   1.379 +} 
   1.380 + 
   1.381 +NS_IMETHODIMP 
   1.382 +nsPlainTextSerializer::AppendElementEnd(Element* aElement,
   1.383 +                                        nsAString& aStr)
   1.384 +{
   1.385 +  NS_ENSURE_ARG(aElement);
   1.386 +
   1.387 +  mElement = aElement;
   1.388 +
   1.389 +  nsresult rv;
   1.390 +  nsIAtom* id = GetIdForContent(mElement);
   1.391 +
   1.392 +  bool isContainer = !nsContentUtils::IsHTMLVoid(id);
   1.393 +
   1.394 +  mOutputString = &aStr;
   1.395 +
   1.396 +  rv = NS_OK;
   1.397 +  if (isContainer) {
   1.398 +    rv = DoCloseContainer(id);
   1.399 +  }
   1.400 +
   1.401 +  mElement = nullptr;
   1.402 +  mOutputString = nullptr;
   1.403 +
   1.404 +  if (id == nsGkAtoms::head) {
   1.405 +    NS_ASSERTION(mHeadLevel != 0,
   1.406 +                 "mHeadLevel being decremented below 0");
   1.407 +    --mHeadLevel;
   1.408 +  }
   1.409 +
   1.410 +  return rv;
   1.411 +}
   1.412 +
   1.413 +NS_IMETHODIMP 
   1.414 +nsPlainTextSerializer::Flush(nsAString& aStr)
   1.415 +{
   1.416 +  mOutputString = &aStr;
   1.417 +  FlushLine();
   1.418 +  mOutputString = nullptr;
   1.419 +  return NS_OK;
   1.420 +}
   1.421 +
   1.422 +NS_IMETHODIMP
   1.423 +nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
   1.424 +                                           nsAString& aStr)
   1.425 +{
   1.426 +  return NS_OK;
   1.427 +}
   1.428 +
   1.429 +nsresult
   1.430 +nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
   1.431 +{
   1.432 +  // Check if we need output current node as placeholder character and ignore
   1.433 +  // child nodes.
   1.434 +  if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
   1.435 +    if (mIgnoredChildNodeLevel == 0) {
   1.436 +      // Serialize current node as placeholder character
   1.437 +      Write(NS_LITERAL_STRING("\xFFFC"));
   1.438 +    }
   1.439 +    // Ignore child nodes.
   1.440 +    mIgnoredChildNodeLevel++;
   1.441 +    return NS_OK;
   1.442 +  }
   1.443 +
   1.444 +  if (mFlags & nsIDocumentEncoder::OutputRaw) {
   1.445 +    // Raw means raw.  Don't even think about doing anything fancy
   1.446 +    // here like indenting, adding line breaks or any other
   1.447 +    // characters such as list item bullets, quote characters
   1.448 +    // around <q>, etc.  I mean it!  Don't make me smack you!
   1.449 +
   1.450 +    return NS_OK;
   1.451 +  }
   1.452 +
   1.453 +  if (mTagStackIndex < TagStackSize) {
   1.454 +    mTagStack[mTagStackIndex++] = aTag;
   1.455 +  }
   1.456 +
   1.457 +  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
   1.458 +    return NS_OK;
   1.459 +  }
   1.460 +
   1.461 +  // Reset this so that <blockquote type=cite> doesn't affect the whitespace
   1.462 +  // above random <pre>s below it.
   1.463 +  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
   1.464 +                              aTag == nsGkAtoms::pre;
   1.465 +
   1.466 +  bool isInCiteBlockquote = false;
   1.467 +
   1.468 +  // XXX special-case <blockquote type=cite> so that we don't add additional
   1.469 +  // newlines before the text.
   1.470 +  if (aTag == nsGkAtoms::blockquote) {
   1.471 +    nsAutoString value;
   1.472 +    nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
   1.473 +    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
   1.474 +  }
   1.475 +
   1.476 +  if (mLineBreakDue && !isInCiteBlockquote)
   1.477 +    EnsureVerticalSpace(mFloatingLines);
   1.478 +
   1.479 +  // Check if this tag's content that should not be output
   1.480 +  if ((aTag == nsGkAtoms::noscript &&
   1.481 +       !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
   1.482 +      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
   1.483 +       !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
   1.484 +    // Ignore everything that follows the current tag in 
   1.485 +    // question until a matching end tag is encountered.
   1.486 +    mIgnoreAboveIndex = mTagStackIndex - 1;
   1.487 +    return NS_OK;
   1.488 +  }
   1.489 +
   1.490 +  if (aTag == nsGkAtoms::body) {
   1.491 +    // Try to figure out here whether we have a
   1.492 +    // preformatted style attribute.
   1.493 +    //
   1.494 +    // Trigger on the presence of a "pre-wrap" in the
   1.495 +    // style attribute. That's a very simplistic way to do
   1.496 +    // it, but better than nothing.
   1.497 +    // Also set mWrapColumn to the value given there
   1.498 +    // (which arguably we should only do if told to do so).
   1.499 +    nsAutoString style;
   1.500 +    int32_t whitespace;
   1.501 +    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
   1.502 +       (kNotFound != (whitespace = style.Find("white-space:")))) {
   1.503 +
   1.504 +      if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
   1.505 +#ifdef DEBUG_preformatted
   1.506 +        printf("Set mPreFormatted based on style pre-wrap\n");
   1.507 +#endif
   1.508 +        mPreFormatted = true;
   1.509 +        int32_t widthOffset = style.Find("width:");
   1.510 +        if (widthOffset >= 0) {
   1.511 +          // We have to search for the ch before the semicolon,
   1.512 +          // not for the semicolon itself, because nsString::ToInteger()
   1.513 +          // considers 'c' to be a valid numeric char (even if radix=10)
   1.514 +          // but then gets confused if it sees it next to the number
   1.515 +          // when the radix specified was 10, and returns an error code.
   1.516 +          int32_t semiOffset = style.Find("ch", false, widthOffset+6);
   1.517 +          int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
   1.518 +                            : style.Length() - widthOffset);
   1.519 +          nsAutoString widthstr;
   1.520 +          style.Mid(widthstr, widthOffset+6, length);
   1.521 +          nsresult err;
   1.522 +          int32_t col = widthstr.ToInteger(&err);
   1.523 +
   1.524 +          if (NS_SUCCEEDED(err)) {
   1.525 +            mWrapColumn = (uint32_t)col;
   1.526 +#ifdef DEBUG_preformatted
   1.527 +            printf("Set wrap column to %d based on style\n", mWrapColumn);
   1.528 +#endif
   1.529 +          }
   1.530 +        }
   1.531 +      }
   1.532 +      else if (kNotFound != style.Find("pre", true, whitespace)) {
   1.533 +#ifdef DEBUG_preformatted
   1.534 +        printf("Set mPreFormatted based on style pre\n");
   1.535 +#endif
   1.536 +        mPreFormatted = true;
   1.537 +        mWrapColumn = 0;
   1.538 +      }
   1.539 +    } 
   1.540 +    else {
   1.541 +      /* See comment at end of function. */
   1.542 +      mInWhitespace = true;
   1.543 +      mPreFormatted = false;
   1.544 +    }
   1.545 +
   1.546 +    return NS_OK;
   1.547 +  }
   1.548 +
   1.549 +  // Keep this in sync with DoCloseContainer!
   1.550 +  if (!DoOutput()) {
   1.551 +    return NS_OK;
   1.552 +  }
   1.553 +
   1.554 +  if (aTag == nsGkAtoms::p)
   1.555 +    EnsureVerticalSpace(1);
   1.556 +  else if (aTag == nsGkAtoms::pre) {
   1.557 +    if (GetLastBool(mIsInCiteBlockquote))
   1.558 +      EnsureVerticalSpace(0);
   1.559 +    else if (mHasWrittenCiteBlockquote) {
   1.560 +      EnsureVerticalSpace(0);
   1.561 +      mHasWrittenCiteBlockquote = false;
   1.562 +    }
   1.563 +    else
   1.564 +      EnsureVerticalSpace(1);
   1.565 +  }
   1.566 +  else if (aTag == nsGkAtoms::tr) {
   1.567 +    PushBool(mHasWrittenCellsForRow, false);
   1.568 +  }
   1.569 +  else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
   1.570 +    // We must make sure that the content of two table cells get a
   1.571 +    // space between them.
   1.572 +
   1.573 +    // To make the separation between cells most obvious and
   1.574 +    // importable, we use a TAB.
   1.575 +    if (GetLastBool(mHasWrittenCellsForRow)) {
   1.576 +      // Bypass |Write| so that the TAB isn't compressed away.
   1.577 +      AddToLine(MOZ_UTF16("\t"), 1);
   1.578 +      mInWhitespace = true;
   1.579 +    }
   1.580 +    else if (mHasWrittenCellsForRow.IsEmpty()) {
   1.581 +      // We don't always see a <tr> (nor a <table>) before the <td> if we're
   1.582 +      // copying part of a table
   1.583 +      PushBool(mHasWrittenCellsForRow, true); // will never be popped
   1.584 +    }
   1.585 +    else {
   1.586 +      SetLastBool(mHasWrittenCellsForRow, true);
   1.587 +    }
   1.588 +  }
   1.589 +  else if (aTag == nsGkAtoms::ul) {
   1.590 +    // Indent here to support nested lists, which aren't included in li :-(
   1.591 +    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
   1.592 +         // Must end the current line before we change indention
   1.593 +    mIndent += kIndentSizeList;
   1.594 +    mULCount++;
   1.595 +  }
   1.596 +  else if (aTag == nsGkAtoms::ol) {
   1.597 +    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
   1.598 +    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   1.599 +      // Must end the current line before we change indention
   1.600 +      if (mOLStackIndex < OLStackSize) {
   1.601 +        nsAutoString startAttr;
   1.602 +        int32_t startVal = 1;
   1.603 +        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
   1.604 +          nsresult rv = NS_OK;
   1.605 +          startVal = startAttr.ToInteger(&rv);
   1.606 +          if (NS_FAILED(rv))
   1.607 +            startVal = 1;
   1.608 +        }
   1.609 +        mOLStack[mOLStackIndex++] = startVal;
   1.610 +      }
   1.611 +    } else {
   1.612 +      mOLStackIndex++;
   1.613 +    }
   1.614 +    mIndent += kIndentSizeList;  // see ul
   1.615 +  }
   1.616 +  else if (aTag == nsGkAtoms::li &&
   1.617 +           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
   1.618 +    if (mTagStackIndex > 1 && IsInOL()) {
   1.619 +      if (mOLStackIndex > 0) {
   1.620 +        nsAutoString valueAttr;
   1.621 +        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
   1.622 +          nsresult rv = NS_OK;
   1.623 +          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
   1.624 +          if (NS_SUCCEEDED(rv))
   1.625 +            mOLStack[mOLStackIndex-1] = valueAttrVal;
   1.626 +        }
   1.627 +        // This is what nsBulletFrame does for OLs:
   1.628 +        mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
   1.629 +      }
   1.630 +      else {
   1.631 +        mInIndentString.Append(char16_t('#'));
   1.632 +      }
   1.633 +
   1.634 +      mInIndentString.Append(char16_t('.'));
   1.635 +
   1.636 +    }
   1.637 +    else {
   1.638 +      static char bulletCharArray[] = "*o+#";
   1.639 +      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
   1.640 +      char bulletChar = bulletCharArray[index % 4];
   1.641 +      mInIndentString.Append(char16_t(bulletChar));
   1.642 +    }
   1.643 +
   1.644 +    mInIndentString.Append(char16_t(' '));
   1.645 +  }
   1.646 +  else if (aTag == nsGkAtoms::dl) {
   1.647 +    EnsureVerticalSpace(1);
   1.648 +  }
   1.649 +  else if (aTag == nsGkAtoms::dt) {
   1.650 +    EnsureVerticalSpace(0);
   1.651 +  }
   1.652 +  else if (aTag == nsGkAtoms::dd) {
   1.653 +    EnsureVerticalSpace(0);
   1.654 +    mIndent += kIndentSizeDD;
   1.655 +  }
   1.656 +  else if (aTag == nsGkAtoms::span) {
   1.657 +    ++mSpanLevel;
   1.658 +  }
   1.659 +  else if (aTag == nsGkAtoms::blockquote) {
   1.660 +    // Push
   1.661 +    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
   1.662 +    if (isInCiteBlockquote) {
   1.663 +      EnsureVerticalSpace(0);
   1.664 +      mCiteQuoteLevel++;
   1.665 +    }
   1.666 +    else {
   1.667 +      EnsureVerticalSpace(1);
   1.668 +      mIndent += kTabSize; // Check for some maximum value?
   1.669 +    }
   1.670 +  }
   1.671 +  else if (aTag == nsGkAtoms::q) {
   1.672 +    Write(NS_LITERAL_STRING("\""));
   1.673 +  }
   1.674 +
   1.675 +  // Else make sure we'll separate block level tags,
   1.676 +  // even if we're about to leave, before doing any other formatting.
   1.677 +  else if (nsContentUtils::IsHTMLBlock(aTag)) {
   1.678 +    EnsureVerticalSpace(0);
   1.679 +  }
   1.680 +
   1.681 +  //////////////////////////////////////////////////////////////
   1.682 +  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
   1.683 +    return NS_OK;
   1.684 +  }
   1.685 +  //////////////////////////////////////////////////////////////
   1.686 +  // The rest of this routine is formatted output stuff,
   1.687 +  // which we should skip if we're not formatted:
   1.688 +  //////////////////////////////////////////////////////////////
   1.689 +
   1.690 +  // Push on stack
   1.691 +  bool currentNodeIsConverted = IsCurrentNodeConverted();
   1.692 +
   1.693 +  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
   1.694 +      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
   1.695 +      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
   1.696 +  {
   1.697 +    EnsureVerticalSpace(2);
   1.698 +    if (mHeaderStrategy == 2) {  // numbered
   1.699 +      mIndent += kIndentSizeHeaders;
   1.700 +      // Caching
   1.701 +      int32_t level = HeaderLevel(aTag);
   1.702 +      // Increase counter for current level
   1.703 +      mHeaderCounter[level]++;
   1.704 +      // Reset all lower levels
   1.705 +      int32_t i;
   1.706 +
   1.707 +      for (i = level + 1; i <= 6; i++) {
   1.708 +        mHeaderCounter[i] = 0;
   1.709 +      }
   1.710 +
   1.711 +      // Construct numbers
   1.712 +      nsAutoString leadup;
   1.713 +      for (i = 1; i <= level; i++) {
   1.714 +        leadup.AppendInt(mHeaderCounter[i]);
   1.715 +        leadup.Append(char16_t('.'));
   1.716 +      }
   1.717 +      leadup.Append(char16_t(' '));
   1.718 +      Write(leadup);
   1.719 +    }
   1.720 +    else if (mHeaderStrategy == 1) { // indent increasingly
   1.721 +      mIndent += kIndentSizeHeaders;
   1.722 +      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
   1.723 +           // for h(x), run x-1 times
   1.724 +        mIndent += kIndentIncrementHeaders;
   1.725 +      }
   1.726 +    }
   1.727 +  }
   1.728 +  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
   1.729 +    nsAutoString url;
   1.730 +    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
   1.731 +        && !url.IsEmpty()) {
   1.732 +      mURL = url;
   1.733 +    }
   1.734 +  }
   1.735 +  else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
   1.736 +    Write(NS_LITERAL_STRING("^"));
   1.737 +  }
   1.738 +  else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
   1.739 +    Write(NS_LITERAL_STRING("_"));
   1.740 +  }
   1.741 +  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
   1.742 +    Write(NS_LITERAL_STRING("|"));
   1.743 +  }
   1.744 +  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
   1.745 +           && mStructs && !currentNodeIsConverted) {
   1.746 +    Write(NS_LITERAL_STRING("*"));
   1.747 +  }
   1.748 +  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
   1.749 +           && mStructs && !currentNodeIsConverted) {
   1.750 +    Write(NS_LITERAL_STRING("/"));
   1.751 +  }
   1.752 +  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
   1.753 +    Write(NS_LITERAL_STRING("_"));
   1.754 +  }
   1.755 +
   1.756 +  /* Container elements are always block elements, so we shouldn't
   1.757 +     output any whitespace immediately after the container tag even if
   1.758 +     there's extra whitespace there because the HTML is pretty-printed
   1.759 +     or something. To ensure that happens, tell the serializer we're
   1.760 +     already in whitespace so it won't output more. */
   1.761 +  mInWhitespace = true;
   1.762 +
   1.763 +  return NS_OK;
   1.764 +}
   1.765 +
   1.766 +nsresult
   1.767 +nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
   1.768 +{
   1.769 +  if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
   1.770 +    mIgnoredChildNodeLevel--;
   1.771 +    return NS_OK;
   1.772 +  }
   1.773 +
   1.774 +  if (mFlags & nsIDocumentEncoder::OutputRaw) {
   1.775 +    // Raw means raw.  Don't even think about doing anything fancy
   1.776 +    // here like indenting, adding line breaks or any other
   1.777 +    // characters such as list item bullets, quote characters
   1.778 +    // around <q>, etc.  I mean it!  Don't make me smack you!
   1.779 +
   1.780 +    return NS_OK;
   1.781 +  }
   1.782 +
   1.783 +  if (mTagStackIndex > 0) {
   1.784 +    --mTagStackIndex;
   1.785 +  }
   1.786 +
   1.787 +  if (mTagStackIndex >= mIgnoreAboveIndex) {
   1.788 +    if (mTagStackIndex == mIgnoreAboveIndex) {
   1.789 +      // We're dealing with the close tag whose matching
   1.790 +      // open tag had set the mIgnoreAboveIndex value.
   1.791 +      // Reset mIgnoreAboveIndex before discarding this tag.
   1.792 +      mIgnoreAboveIndex = (uint32_t)kNotFound;
   1.793 +    }
   1.794 +    return NS_OK;
   1.795 +  }
   1.796 +
   1.797 +  // End current line if we're ending a block level tag
   1.798 +  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
   1.799 +    // We want the output to end with a new line,
   1.800 +    // but in preformatted areas like text fields,
   1.801 +    // we can't emit newlines that weren't there.
   1.802 +    // So add the newline only in the case of formatted output.
   1.803 +    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   1.804 +      EnsureVerticalSpace(0);
   1.805 +    }
   1.806 +    else {
   1.807 +      FlushLine();
   1.808 +    }
   1.809 +    // We won't want to do anything with these in formatted mode either,
   1.810 +    // so just return now:
   1.811 +    return NS_OK;
   1.812 +  }
   1.813 +
   1.814 +  // Keep this in sync with DoOpenContainer!
   1.815 +  if (!DoOutput()) {
   1.816 +    return NS_OK;
   1.817 +  }
   1.818 +
   1.819 +  if (aTag == nsGkAtoms::tr) {
   1.820 +    PopBool(mHasWrittenCellsForRow);
   1.821 +    // Should always end a line, but get no more whitespace
   1.822 +    if (mFloatingLines < 0)
   1.823 +      mFloatingLines = 0;
   1.824 +    mLineBreakDue = true;
   1.825 +  }
   1.826 +  else if (((aTag == nsGkAtoms::li) ||
   1.827 +            (aTag == nsGkAtoms::dt)) &&
   1.828 +           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
   1.829 +    // Items that should always end a line, but get no more whitespace
   1.830 +    if (mFloatingLines < 0)
   1.831 +      mFloatingLines = 0;
   1.832 +    mLineBreakDue = true;
   1.833 +  }
   1.834 +  else if (aTag == nsGkAtoms::pre) {
   1.835 +    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
   1.836 +    mLineBreakDue = true;
   1.837 +  }
   1.838 +  else if (aTag == nsGkAtoms::ul) {
   1.839 +    FlushLine();
   1.840 +    mIndent -= kIndentSizeList;
   1.841 +    if (--mULCount + mOLStackIndex == 0) {
   1.842 +      mFloatingLines = 1;
   1.843 +      mLineBreakDue = true;
   1.844 +    }
   1.845 +  }
   1.846 +  else if (aTag == nsGkAtoms::ol) {
   1.847 +    FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
   1.848 +    mIndent -= kIndentSizeList;
   1.849 +    NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
   1.850 +    mOLStackIndex--;
   1.851 +    if (mULCount + mOLStackIndex == 0) {
   1.852 +      mFloatingLines = 1;
   1.853 +      mLineBreakDue = true;
   1.854 +    }
   1.855 +  }  
   1.856 +  else if (aTag == nsGkAtoms::dl) {
   1.857 +    mFloatingLines = 1;
   1.858 +    mLineBreakDue = true;
   1.859 +  }
   1.860 +  else if (aTag == nsGkAtoms::dd) {
   1.861 +    FlushLine();
   1.862 +    mIndent -= kIndentSizeDD;
   1.863 +  }
   1.864 +  else if (aTag == nsGkAtoms::span) {
   1.865 +    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
   1.866 +    --mSpanLevel;
   1.867 +  }
   1.868 +  else if (aTag == nsGkAtoms::div) {
   1.869 +    if (mFloatingLines < 0)
   1.870 +      mFloatingLines = 0;
   1.871 +    mLineBreakDue = true;
   1.872 +  }
   1.873 +  else if (aTag == nsGkAtoms::blockquote) {
   1.874 +    FlushLine();    // Is this needed?
   1.875 +
   1.876 +    // Pop
   1.877 +    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
   1.878 +
   1.879 +    if (isInCiteBlockquote) {
   1.880 +      NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
   1.881 +      mCiteQuoteLevel--;
   1.882 +      mFloatingLines = 0;
   1.883 +      mHasWrittenCiteBlockquote = true;
   1.884 +    }
   1.885 +    else {
   1.886 +      mIndent -= kTabSize;
   1.887 +      mFloatingLines = 1;
   1.888 +    }
   1.889 +    mLineBreakDue = true;
   1.890 +  }
   1.891 +  else if (aTag == nsGkAtoms::q) {
   1.892 +    Write(NS_LITERAL_STRING("\""));
   1.893 +  }
   1.894 +  else if (nsContentUtils::IsHTMLBlock(aTag)
   1.895 +           && aTag != nsGkAtoms::script) {
   1.896 +    // All other blocks get 1 vertical space after them
   1.897 +    // in formatted mode, otherwise 0.
   1.898 +    // This is hard. Sometimes 0 is a better number, but
   1.899 +    // how to know?
   1.900 +    if (mFlags & nsIDocumentEncoder::OutputFormatted)
   1.901 +      EnsureVerticalSpace(1);
   1.902 +    else {
   1.903 +      if (mFloatingLines < 0)
   1.904 +        mFloatingLines = 0;
   1.905 +      mLineBreakDue = true;
   1.906 +    }
   1.907 +  }
   1.908 +
   1.909 +  //////////////////////////////////////////////////////////////
   1.910 +  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
   1.911 +    return NS_OK;
   1.912 +  }
   1.913 +  //////////////////////////////////////////////////////////////
   1.914 +  // The rest of this routine is formatted output stuff,
   1.915 +  // which we should skip if we're not formatted:
   1.916 +  //////////////////////////////////////////////////////////////
   1.917 +
   1.918 +  // Pop the currentConverted stack
   1.919 +  bool currentNodeIsConverted = IsCurrentNodeConverted();
   1.920 +  
   1.921 +  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
   1.922 +      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
   1.923 +      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
   1.924 +    
   1.925 +    if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
   1.926 +      mIndent -= kIndentSizeHeaders;
   1.927 +    }
   1.928 +    if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
   1.929 +      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
   1.930 +           // for h(x), run x-1 times
   1.931 +        mIndent -= kIndentIncrementHeaders;
   1.932 +      }
   1.933 +    }
   1.934 +    EnsureVerticalSpace(1);
   1.935 +  }
   1.936 +  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
   1.937 +    nsAutoString temp; 
   1.938 +    temp.AssignLiteral(" <");
   1.939 +    temp += mURL;
   1.940 +    temp.Append(char16_t('>'));
   1.941 +    Write(temp);
   1.942 +    mURL.Truncate();
   1.943 +  }
   1.944 +  else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
   1.945 +           && mStructs && !currentNodeIsConverted) {
   1.946 +    Write(kSpace);
   1.947 +  }
   1.948 +  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
   1.949 +    Write(NS_LITERAL_STRING("|"));
   1.950 +  }
   1.951 +  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
   1.952 +           && mStructs && !currentNodeIsConverted) {
   1.953 +    Write(NS_LITERAL_STRING("*"));
   1.954 +  }
   1.955 +  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
   1.956 +           && mStructs && !currentNodeIsConverted) {
   1.957 +    Write(NS_LITERAL_STRING("/"));
   1.958 +  }
   1.959 +  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
   1.960 +    Write(NS_LITERAL_STRING("_"));
   1.961 +  }
   1.962 +
   1.963 +  return NS_OK;
   1.964 +}
   1.965 +
   1.966 +bool
   1.967 +nsPlainTextSerializer::MustSuppressLeaf()
   1.968 +{
   1.969 +  if (mIgnoredChildNodeLevel > 0) {
   1.970 +    return true;
   1.971 +  }
   1.972 +
   1.973 +  if ((mTagStackIndex > 1 &&
   1.974 +       mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
   1.975 +      (mTagStackIndex > 0 &&
   1.976 +        mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
   1.977 +    // Don't output the contents of SELECT elements;
   1.978 +    // Might be nice, eventually, to output just the selected element.
   1.979 +    // Read more in bug 31994.
   1.980 +    return true;
   1.981 +  }
   1.982 +
   1.983 +  if (mTagStackIndex > 0 &&
   1.984 +      (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
   1.985 +       mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
   1.986 +    // Don't output the contents of <script> or <style> tags;
   1.987 +    return true;
   1.988 +  }
   1.989 +
   1.990 +  return false;
   1.991 +}
   1.992 +
   1.993 +void
   1.994 +nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
   1.995 +{
   1.996 +  // If we don't want any output, just return
   1.997 +  if (!DoOutput()) {
   1.998 +    return;
   1.999 +  }
  1.1000 +
  1.1001 +  if (!aIsLineBreak) {
  1.1002 +    // Make sure to reset this, since it's no longer true.
  1.1003 +    mHasWrittenCiteBlockquote = false;
  1.1004 +  }
  1.1005 +
  1.1006 +  if (mLineBreakDue)
  1.1007 +    EnsureVerticalSpace(mFloatingLines);
  1.1008 +
  1.1009 +  if (MustSuppressLeaf()) {
  1.1010 +    return;
  1.1011 +  }
  1.1012 +
  1.1013 +  if (aIsLineBreak) {
  1.1014 +    // The only times we want to pass along whitespace from the original
  1.1015 +    // html source are if we're forced into preformatted mode via flags,
  1.1016 +    // or if we're prettyprinting and we're inside a <pre>.
  1.1017 +    // Otherwise, either we're collapsing to minimal text, or we're
  1.1018 +    // prettyprinting to mimic the html format, and in neither case
  1.1019 +    // does the formatting of the html source help us.
  1.1020 +    if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
  1.1021 +        (mPreFormatted && !mWrapColumn) ||
  1.1022 +        IsInPre()) {
  1.1023 +      EnsureVerticalSpace(mEmptyLines+1);
  1.1024 +    }
  1.1025 +    else if (!mInWhitespace) {
  1.1026 +      Write(kSpace);
  1.1027 +      mInWhitespace = true;
  1.1028 +    }
  1.1029 +    return;
  1.1030 +  }
  1.1031 +
  1.1032 +  /* Check, if we are in a link (symbolized with mURL containing the URL)
  1.1033 +     and the text is equal to the URL. In that case we don't want to output
  1.1034 +     the URL twice so we scrap the text in mURL. */
  1.1035 +  if (!mURL.IsEmpty() && mURL.Equals(aText)) {
  1.1036 +    mURL.Truncate();
  1.1037 +  }
  1.1038 +  Write(aText);
  1.1039 +}
  1.1040 +
  1.1041 +nsresult
  1.1042 +nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
  1.1043 +{
  1.1044 +  // If we don't want any output, just return
  1.1045 +  if (!DoOutput()) {
  1.1046 +    return NS_OK;
  1.1047 +  }
  1.1048 +
  1.1049 +  if (mLineBreakDue)
  1.1050 +    EnsureVerticalSpace(mFloatingLines);
  1.1051 +
  1.1052 +  if (MustSuppressLeaf()) {
  1.1053 +    return NS_OK;
  1.1054 +  }
  1.1055 +
  1.1056 +  if (aTag == nsGkAtoms::br) {
  1.1057 +    // Another egregious editor workaround, see bug 38194:
  1.1058 +    // ignore the bogus br tags that the editor sticks here and there.
  1.1059 +    nsAutoString tagAttr;
  1.1060 +    if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
  1.1061 +        || !tagAttr.EqualsLiteral("_moz")) {
  1.1062 +      EnsureVerticalSpace(mEmptyLines+1);
  1.1063 +    }
  1.1064 +  }
  1.1065 +  else if (aTag == nsGkAtoms::hr &&
  1.1066 +           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
  1.1067 +    EnsureVerticalSpace(0);
  1.1068 +
  1.1069 +    // Make a line of dashes as wide as the wrap width
  1.1070 +    // XXX honoring percentage would be nice
  1.1071 +    nsAutoString line;
  1.1072 +    uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
  1.1073 +    while (line.Length() < width) {
  1.1074 +      line.Append(char16_t('-'));
  1.1075 +    }
  1.1076 +    Write(line);
  1.1077 +
  1.1078 +    EnsureVerticalSpace(0);
  1.1079 +  }
  1.1080 +  else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
  1.1081 +    Write(NS_LITERAL_STRING("\xFFFC"));
  1.1082 +  }
  1.1083 +  else if (aTag == nsGkAtoms::img) {
  1.1084 +    /* Output (in decreasing order of preference)
  1.1085 +       alt, title or nothing */
  1.1086 +    // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
  1.1087 +    nsAutoString imageDescription;
  1.1088 +    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
  1.1089 +                                       imageDescription))) {
  1.1090 +      // If the alt attribute has an empty value (|alt=""|), output nothing
  1.1091 +    }
  1.1092 +    else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
  1.1093 +                                            imageDescription))
  1.1094 +             && !imageDescription.IsEmpty()) {
  1.1095 +      imageDescription = NS_LITERAL_STRING(" [") +
  1.1096 +                         imageDescription +
  1.1097 +                         NS_LITERAL_STRING("] ");
  1.1098 +    }
  1.1099 +   
  1.1100 +    Write(imageDescription);
  1.1101 +  }
  1.1102 +
  1.1103 +  return NS_OK;
  1.1104 +}
  1.1105 +
  1.1106 +/**
  1.1107 + * Adds as many newline as necessary to get |noOfRows| empty lines
  1.1108 + *
  1.1109 + * noOfRows = -1    :   Being in the middle of some line of text
  1.1110 + * noOfRows =  0    :   Being at the start of a line
  1.1111 + * noOfRows =  n>0  :   Having n empty lines before the current line.
  1.1112 + */
  1.1113 +void
  1.1114 +nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
  1.1115 +{
  1.1116 +  // If we have something in the indent we probably want to output
  1.1117 +  // it and it's not included in the count for empty lines so we don't
  1.1118 +  // realize that we should start a new line.
  1.1119 +  if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
  1.1120 +    EndLine(false);
  1.1121 +    mInWhitespace = true;
  1.1122 +  }
  1.1123 +
  1.1124 +  while(mEmptyLines < noOfRows) {
  1.1125 +    EndLine(false);
  1.1126 +    mInWhitespace = true;
  1.1127 +  }
  1.1128 +  mLineBreakDue = false;
  1.1129 +  mFloatingLines = -1;
  1.1130 +}
  1.1131 +
  1.1132 +/**
  1.1133 + * This empties the current line cache without adding a NEWLINE.
  1.1134 + * Should not be used if line wrapping is of importance since
  1.1135 + * this function destroys the cache information.
  1.1136 + *
  1.1137 + * It will also write indentation and quotes if we believe us to be
  1.1138 + * at the start of the line.
  1.1139 + */
  1.1140 +void
  1.1141 +nsPlainTextSerializer::FlushLine()
  1.1142 +{
  1.1143 +  if (!mCurrentLine.IsEmpty()) {
  1.1144 +    if (mAtFirstColumn) {
  1.1145 +      OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
  1.1146 +    }
  1.1147 +
  1.1148 +    Output(mCurrentLine);
  1.1149 +    mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
  1.1150 +    mCurrentLine.Truncate();
  1.1151 +    mCurrentLineWidth = 0;
  1.1152 +  }
  1.1153 +}
  1.1154 +
  1.1155 +/**
  1.1156 + * Prints the text to output to our current output device (the string mOutputString).
  1.1157 + * The only logic here is to replace non breaking spaces with a normal space since
  1.1158 + * most (all?) receivers of the result won't understand the nbsp and even be
  1.1159 + * confused by it.
  1.1160 + */
  1.1161 +void 
  1.1162 +nsPlainTextSerializer::Output(nsString& aString)
  1.1163 +{
  1.1164 +  if (!aString.IsEmpty()) {
  1.1165 +    mStartedOutput = true;
  1.1166 +  }
  1.1167 +
  1.1168 +  if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
  1.1169 +    // First, replace all nbsp characters with spaces,
  1.1170 +    // which the unicode encoder won't do for us.
  1.1171 +    aString.ReplaceChar(kNBSP, kSPACE);
  1.1172 +  }
  1.1173 +  mOutputString->Append(aString);
  1.1174 +}
  1.1175 +
  1.1176 +static bool
  1.1177 +IsSpaceStuffable(const char16_t *s)
  1.1178 +{
  1.1179 +  if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
  1.1180 +      nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0)
  1.1181 +    return true;
  1.1182 +  else
  1.1183 +    return false;
  1.1184 +}
  1.1185 +
  1.1186 +/**
  1.1187 + * This function adds a piece of text to the current stored line. If we are
  1.1188 + * wrapping text and the stored line will become too long, a suitable
  1.1189 + * location to wrap will be found and the line that's complete will be
  1.1190 + * output.
  1.1191 + */
  1.1192 +void
  1.1193 +nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, 
  1.1194 +                                 int32_t aLineFragmentLength)
  1.1195 +{
  1.1196 +  uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
  1.1197 +  
  1.1198 +  if (mLineBreakDue)
  1.1199 +    EnsureVerticalSpace(mFloatingLines);
  1.1200 +
  1.1201 +  int32_t linelength = mCurrentLine.Length();
  1.1202 +  if (0 == linelength) {
  1.1203 +    if (0 == aLineFragmentLength) {
  1.1204 +      // Nothing at all. Are you kidding me?
  1.1205 +      return;
  1.1206 +    }
  1.1207 +
  1.1208 +    if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1.1209 +      if (IsSpaceStuffable(aLineFragment)
  1.1210 +         && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
  1.1211 +         )
  1.1212 +        {
  1.1213 +          // Space stuffing a la RFC 2646 (format=flowed).
  1.1214 +          mCurrentLine.Append(char16_t(' '));
  1.1215 +          
  1.1216 +          if (MayWrap()) {
  1.1217 +            mCurrentLineWidth += GetUnicharWidth(' ');
  1.1218 +#ifdef DEBUG_wrapping
  1.1219 +            NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
  1.1220 +                                               mCurrentLine.Length()) ==
  1.1221 +                         (int32_t)mCurrentLineWidth,
  1.1222 +                         "mCurrentLineWidth and reality out of sync!");
  1.1223 +#endif
  1.1224 +          }
  1.1225 +        }
  1.1226 +    }
  1.1227 +    mEmptyLines=-1;
  1.1228 +  }
  1.1229 +    
  1.1230 +  mCurrentLine.Append(aLineFragment, aLineFragmentLength);
  1.1231 +  if (MayWrap()) {
  1.1232 +    mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
  1.1233 +                                               aLineFragmentLength);
  1.1234 +#ifdef DEBUG_wrapping
  1.1235 +    NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
  1.1236 +                                       mCurrentLine.Length()) ==
  1.1237 +                 (int32_t)mCurrentLineWidth,
  1.1238 +                 "mCurrentLineWidth and reality out of sync!");
  1.1239 +#endif
  1.1240 +  }
  1.1241 +
  1.1242 +  linelength = mCurrentLine.Length();
  1.1243 +
  1.1244 +  //  Wrap?
  1.1245 +  if (MayWrap())
  1.1246 +  {
  1.1247 +#ifdef DEBUG_wrapping
  1.1248 +    NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
  1.1249 +                                  mCurrentLine.Length()) ==
  1.1250 +                 (int32_t)mCurrentLineWidth,
  1.1251 +                 "mCurrentLineWidth and reality out of sync!");
  1.1252 +#endif
  1.1253 +    // Yes, wrap!
  1.1254 +    // The "+4" is to avoid wrap lines that only would be a couple
  1.1255 +    // of letters too long. We give this bonus only if the
  1.1256 +    // wrapcolumn is more than 20.
  1.1257 +    uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
  1.1258 +
  1.1259 +    // XXX: Should calculate prefixwidth with GetUnicharStringWidth
  1.1260 +    while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {      
  1.1261 +      // We go from the end removing one letter at a time until
  1.1262 +      // we have a reasonable width
  1.1263 +      int32_t goodSpace = mCurrentLine.Length();
  1.1264 +      uint32_t width = mCurrentLineWidth;
  1.1265 +      while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
  1.1266 +        goodSpace--;
  1.1267 +        width -= GetUnicharWidth(mCurrentLine[goodSpace]);
  1.1268 +      }
  1.1269 +
  1.1270 +      goodSpace++;
  1.1271 +      
  1.1272 +      if (mLineBreaker) {
  1.1273 +        goodSpace = mLineBreaker->Prev(mCurrentLine.get(), 
  1.1274 +                                    mCurrentLine.Length(), goodSpace);
  1.1275 +        if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
  1.1276 +            nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
  1.1277 +          --goodSpace;    // adjust the position since line breaker returns a position next to space
  1.1278 +        }
  1.1279 +      }
  1.1280 +      // fallback if the line breaker is unavailable or failed
  1.1281 +      if (!mLineBreaker) {
  1.1282 +        goodSpace = mWrapColumn-prefixwidth;
  1.1283 +        while (goodSpace >= 0 &&
  1.1284 +               !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1.1285 +          goodSpace--;
  1.1286 +        }
  1.1287 +      }
  1.1288 +      
  1.1289 +      nsAutoString restOfLine;
  1.1290 +      if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
  1.1291 +        // If we don't found a good place to break, accept long line and
  1.1292 +        // try to find another place to break
  1.1293 +        goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
  1.1294 +        if (mLineBreaker) {
  1.1295 +          if ((uint32_t)goodSpace < mCurrentLine.Length())
  1.1296 +            goodSpace = mLineBreaker->Next(mCurrentLine.get(), 
  1.1297 +                                           mCurrentLine.Length(), goodSpace);
  1.1298 +          if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
  1.1299 +            goodSpace = mCurrentLine.Length();
  1.1300 +        }
  1.1301 +        // fallback if the line breaker is unavailable or failed
  1.1302 +        if (!mLineBreaker) {
  1.1303 +          goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
  1.1304 +          while (goodSpace < linelength &&
  1.1305 +                 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1.1306 +            goodSpace++;
  1.1307 +          }
  1.1308 +        }
  1.1309 +      }
  1.1310 +      
  1.1311 +      if ((goodSpace < linelength) && (goodSpace > 0)) {
  1.1312 +        // Found a place to break
  1.1313 +
  1.1314 +        // -1 (trim a char at the break position)
  1.1315 +        // only if the line break was a space.
  1.1316 +        if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1.1317 +          mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
  1.1318 +        }
  1.1319 +        else {
  1.1320 +          mCurrentLine.Right(restOfLine, linelength-goodSpace);
  1.1321 +        }
  1.1322 +        // if breaker was U+0020, it has to consider for delsp=yes support
  1.1323 +        bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
  1.1324 +        mCurrentLine.Truncate(goodSpace); 
  1.1325 +        EndLine(true, breakBySpace);
  1.1326 +        mCurrentLine.Truncate();
  1.1327 +        // Space stuff new line?
  1.1328 +        if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1.1329 +          if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
  1.1330 +              && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
  1.1331 +            )
  1.1332 +          {
  1.1333 +            // Space stuffing a la RFC 2646 (format=flowed).
  1.1334 +            mCurrentLine.Append(char16_t(' '));
  1.1335 +            //XXX doesn't seem to work correctly for ' '
  1.1336 +          }
  1.1337 +        }
  1.1338 +        mCurrentLine.Append(restOfLine);
  1.1339 +        mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
  1.1340 +                                                  mCurrentLine.Length());
  1.1341 +        linelength = mCurrentLine.Length();
  1.1342 +        mEmptyLines = -1;
  1.1343 +      } 
  1.1344 +      else {
  1.1345 +        // Nothing to do. Hopefully we get more data later
  1.1346 +        // to use for a place to break line
  1.1347 +        break;
  1.1348 +      }
  1.1349 +    }
  1.1350 +  } 
  1.1351 +  else {
  1.1352 +    // No wrapping.
  1.1353 +  }
  1.1354 +}
  1.1355 +
  1.1356 +/**
  1.1357 + * Outputs the contents of mCurrentLine, and resets line specific
  1.1358 + * variables. Also adds an indentation and prefix if there is
  1.1359 + * one specified. Strips ending spaces from the line if it isn't
  1.1360 + * preformatted.
  1.1361 + */
  1.1362 +void
  1.1363 +nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
  1.1364 +{
  1.1365 +  uint32_t currentlinelength = mCurrentLine.Length();
  1.1366 +
  1.1367 +  if (aSoftlinebreak && 0 == currentlinelength) {
  1.1368 +    // No meaning
  1.1369 +    return;
  1.1370 +  }
  1.1371 +
  1.1372 +  /* In non-preformatted mode, remove spaces from the end of the line for
  1.1373 +   * format=flowed compatibility. Don't do this for these special cases:
  1.1374 +   * "-- ", the signature separator (RFC 2646) shouldn't be touched and
  1.1375 +   * "- -- ", the OpenPGP dash-escaped signature separator in inline
  1.1376 +   * signed messages according to the OpenPGP standard (RFC 2440).
  1.1377 +   */  
  1.1378 +  if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
  1.1379 +      !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
  1.1380 +     (aSoftlinebreak || 
  1.1381 +     !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
  1.1382 +    // Remove spaces from the end of the line.
  1.1383 +    while(currentlinelength > 0 &&
  1.1384 +          mCurrentLine[currentlinelength-1] == ' ') {
  1.1385 +      --currentlinelength;
  1.1386 +    }
  1.1387 +    mCurrentLine.SetLength(currentlinelength);
  1.1388 +  }
  1.1389 +  
  1.1390 +  if (aSoftlinebreak &&
  1.1391 +     (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
  1.1392 +     (mIndent == 0)) {
  1.1393 +    // Add the soft part of the soft linebreak (RFC 2646 4.1)
  1.1394 +    // We only do this when there is no indentation since format=flowed
  1.1395 +    // lines and indentation doesn't work well together.
  1.1396 +
  1.1397 +    // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
  1.1398 +    // add twice space.
  1.1399 +    if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
  1.1400 +      mCurrentLine.Append(NS_LITERAL_STRING("  "));
  1.1401 +    else
  1.1402 +      mCurrentLine.Append(char16_t(' '));
  1.1403 +  }
  1.1404 +
  1.1405 +  if (aSoftlinebreak) {
  1.1406 +    mEmptyLines=0;
  1.1407 +  } 
  1.1408 +  else {
  1.1409 +    // Hard break
  1.1410 +    if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
  1.1411 +      mEmptyLines=-1;
  1.1412 +    }
  1.1413 +
  1.1414 +    mEmptyLines++;
  1.1415 +  }
  1.1416 +
  1.1417 +  if (mAtFirstColumn) {
  1.1418 +    // If we don't have anything "real" to output we have to
  1.1419 +    // make sure the indent doesn't end in a space since that
  1.1420 +    // would trick a format=flowed-aware receiver.
  1.1421 +    bool stripTrailingSpaces = mCurrentLine.IsEmpty();
  1.1422 +    OutputQuotesAndIndent(stripTrailingSpaces);
  1.1423 +  }
  1.1424 +
  1.1425 +  mCurrentLine.Append(mLineBreak);
  1.1426 +  Output(mCurrentLine);
  1.1427 +  mCurrentLine.Truncate();
  1.1428 +  mCurrentLineWidth = 0;
  1.1429 +  mAtFirstColumn=true;
  1.1430 +  mInWhitespace=true;
  1.1431 +  mLineBreakDue = false;
  1.1432 +  mFloatingLines = -1;
  1.1433 +}
  1.1434 +
  1.1435 +
  1.1436 +/**
  1.1437 + * Outputs the calculated and stored indent and text in the indentation. That is
  1.1438 + * quote chars and numbers for numbered lists and such. It will also reset any
  1.1439 + * stored text to put in the indentation after using it.
  1.1440 + */
  1.1441 +void
  1.1442 +nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
  1.1443 +{
  1.1444 +  nsAutoString stringToOutput;
  1.1445 +  
  1.1446 +  // Put the mail quote "> " chars in, if appropriate:
  1.1447 +  if (mCiteQuoteLevel > 0) {
  1.1448 +    nsAutoString quotes;
  1.1449 +    for(int i=0; i < mCiteQuoteLevel; i++) {
  1.1450 +      quotes.Append(char16_t('>'));
  1.1451 +    }
  1.1452 +    if (!mCurrentLine.IsEmpty()) {
  1.1453 +      /* Better don't output a space here, if the line is empty,
  1.1454 +         in case a receiving f=f-aware UA thinks, this were a flowed line,
  1.1455 +         which it isn't - it's just empty.
  1.1456 +         (Flowed lines may be joined with the following one,
  1.1457 +         so the empty line may be lost completely.) */
  1.1458 +      quotes.Append(char16_t(' '));
  1.1459 +    }
  1.1460 +    stringToOutput = quotes;
  1.1461 +    mAtFirstColumn = false;
  1.1462 +  }
  1.1463 +  
  1.1464 +  // Indent if necessary
  1.1465 +  int32_t indentwidth = mIndent - mInIndentString.Length();
  1.1466 +  if (indentwidth > 0
  1.1467 +      && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
  1.1468 +      // Don't make empty lines look flowed
  1.1469 +      ) {
  1.1470 +    nsAutoString spaces;
  1.1471 +    for (int i=0; i < indentwidth; ++i)
  1.1472 +      spaces.Append(char16_t(' '));
  1.1473 +    stringToOutput += spaces;
  1.1474 +    mAtFirstColumn = false;
  1.1475 +  }
  1.1476 +  
  1.1477 +  if (!mInIndentString.IsEmpty()) {
  1.1478 +    stringToOutput += mInIndentString;
  1.1479 +    mAtFirstColumn = false;
  1.1480 +    mInIndentString.Truncate();
  1.1481 +  }
  1.1482 +
  1.1483 +  if (stripTrailingSpaces) {
  1.1484 +    int32_t lineLength = stringToOutput.Length();
  1.1485 +    while(lineLength > 0 &&
  1.1486 +          ' ' == stringToOutput[lineLength-1]) {
  1.1487 +      --lineLength;
  1.1488 +    }
  1.1489 +    stringToOutput.SetLength(lineLength);
  1.1490 +  }
  1.1491 +
  1.1492 +  if (!stringToOutput.IsEmpty()) {
  1.1493 +    Output(stringToOutput);
  1.1494 +  }
  1.1495 +    
  1.1496 +}
  1.1497 +
  1.1498 +/**
  1.1499 + * Write a string. This is the highlevel function to use to get text output.
  1.1500 + * By using AddToLine, Output, EndLine and other functions it handles quotation,
  1.1501 + * line wrapping, indentation, whitespace compression and other things.
  1.1502 + */
  1.1503 +void
  1.1504 +nsPlainTextSerializer::Write(const nsAString& aStr)
  1.1505 +{
  1.1506 +  // XXX Copy necessary to use nsString methods and gain
  1.1507 +  // access to underlying buffer
  1.1508 +  nsAutoString str(aStr);
  1.1509 +
  1.1510 +#ifdef DEBUG_wrapping
  1.1511 +  printf("Write(%s): wrap col = %d\n",
  1.1512 +         NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
  1.1513 +#endif
  1.1514 +
  1.1515 +  int32_t bol = 0;
  1.1516 +  int32_t newline;
  1.1517 +  
  1.1518 +  int32_t totLen = str.Length();
  1.1519 +
  1.1520 +  // If the string is empty, do nothing:
  1.1521 +  if (totLen <= 0) return;
  1.1522 +
  1.1523 +  // For Flowed text change nbsp-ses to spaces at end of lines to allow them
  1.1524 +  // to be cut off along with usual spaces if required. (bug #125928)
  1.1525 +  if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1.1526 +    for (int32_t i = totLen-1; i >= 0; i--) {
  1.1527 +      char16_t c = str[i];
  1.1528 +      if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
  1.1529 +        continue;
  1.1530 +      if (kNBSP == c)
  1.1531 +        str.Replace(i, 1, ' ');
  1.1532 +      else
  1.1533 +        break;
  1.1534 +    }
  1.1535 +  }
  1.1536 +
  1.1537 +  // We have two major codepaths here. One that does preformatted text and one
  1.1538 +  // that does normal formatted text. The one for preformatted text calls
  1.1539 +  // Output directly while the other code path goes through AddToLine.
  1.1540 +  if ((mPreFormatted && !mWrapColumn) || IsInPre()
  1.1541 +      || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
  1.1542 +          && mEmptyLines >= 0 && str.First() == char16_t('>'))) {
  1.1543 +    // No intelligent wrapping.
  1.1544 +
  1.1545 +    // This mustn't be mixed with intelligent wrapping without clearing
  1.1546 +    // the mCurrentLine buffer before!!!
  1.1547 +    NS_ASSERTION(mCurrentLine.IsEmpty(),
  1.1548 +                 "Mixed wrapping data and nonwrapping data on the same line");
  1.1549 +    if (!mCurrentLine.IsEmpty()) {
  1.1550 +      FlushLine();
  1.1551 +    }
  1.1552 +
  1.1553 +    // Put the mail quote "> " chars in, if appropriate.
  1.1554 +    // Have to put it in before every line.
  1.1555 +    while(bol<totLen) {
  1.1556 +      bool outputQuotes = mAtFirstColumn;
  1.1557 +      bool atFirstColumn = mAtFirstColumn;
  1.1558 +      bool outputLineBreak = false;
  1.1559 +      bool spacesOnly = true;
  1.1560 +
  1.1561 +      // Find one of '\n' or '\r' using iterators since nsAString
  1.1562 +      // doesn't have the old FindCharInSet function.
  1.1563 +      nsAString::const_iterator iter;           str.BeginReading(iter);
  1.1564 +      nsAString::const_iterator done_searching; str.EndReading(done_searching);
  1.1565 +      iter.advance(bol); 
  1.1566 +      int32_t new_newline = bol;
  1.1567 +      newline = kNotFound;
  1.1568 +      while(iter != done_searching) {
  1.1569 +        if ('\n' == *iter || '\r' == *iter) {
  1.1570 +          newline = new_newline;
  1.1571 +          break;
  1.1572 +        }
  1.1573 +        if (' ' != *iter)
  1.1574 +          spacesOnly = false;
  1.1575 +        ++new_newline;
  1.1576 +        ++iter;
  1.1577 +      }
  1.1578 +
  1.1579 +      // Done searching
  1.1580 +      nsAutoString stringpart;
  1.1581 +      if (newline == kNotFound) {
  1.1582 +        // No new lines.
  1.1583 +        stringpart.Assign(Substring(str, bol, totLen - bol));
  1.1584 +        if (!stringpart.IsEmpty()) {
  1.1585 +          char16_t lastchar = stringpart[stringpart.Length()-1];
  1.1586 +          if ((lastchar == '\t') || (lastchar == ' ') ||
  1.1587 +             (lastchar == '\r') ||(lastchar == '\n')) {
  1.1588 +            mInWhitespace = true;
  1.1589 +          } 
  1.1590 +          else {
  1.1591 +            mInWhitespace = false;
  1.1592 +          }
  1.1593 +        }
  1.1594 +        mEmptyLines=-1;
  1.1595 +        atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
  1.1596 +        bol = totLen;
  1.1597 +      } 
  1.1598 +      else {
  1.1599 +        // There is a newline
  1.1600 +        stringpart.Assign(Substring(str, bol, newline-bol));
  1.1601 +        mInWhitespace = true;
  1.1602 +        outputLineBreak = true;
  1.1603 +        mEmptyLines=0;
  1.1604 +        atFirstColumn = true;
  1.1605 +        bol = newline+1;
  1.1606 +        if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
  1.1607 +          // There was a CRLF in the input. This used to be illegal and
  1.1608 +          // stripped by the parser. Apparently not anymore. Let's skip
  1.1609 +          // over the LF.
  1.1610 +          bol++;
  1.1611 +        }
  1.1612 +      }
  1.1613 +
  1.1614 +      mCurrentLine.AssignLiteral("");
  1.1615 +      if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1.1616 +        if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
  1.1617 +            !stringpart.EqualsLiteral("-- ") &&
  1.1618 +            !stringpart.EqualsLiteral("- -- "))
  1.1619 +          stringpart.Trim(" ", false, true, true);
  1.1620 +        if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
  1.1621 +          mCurrentLine.Append(char16_t(' '));
  1.1622 +      }
  1.1623 +      mCurrentLine.Append(stringpart);
  1.1624 +
  1.1625 +      if (outputQuotes) {
  1.1626 +        // Note: this call messes with mAtFirstColumn
  1.1627 +        OutputQuotesAndIndent();
  1.1628 +      }
  1.1629 +
  1.1630 +      Output(mCurrentLine);
  1.1631 +      if (outputLineBreak) {
  1.1632 +        Output(mLineBreak);
  1.1633 +      }
  1.1634 +      mAtFirstColumn = atFirstColumn;
  1.1635 +    }
  1.1636 +
  1.1637 +    // Reset mCurrentLine.
  1.1638 +    mCurrentLine.Truncate();
  1.1639 +
  1.1640 +#ifdef DEBUG_wrapping
  1.1641 +    printf("No wrapping: newline is %d, totLen is %d\n",
  1.1642 +           newline, totLen);
  1.1643 +#endif
  1.1644 +    return;
  1.1645 +  }
  1.1646 +
  1.1647 +  // Intelligent handling of text
  1.1648 +  // If needed, strip out all "end of lines"
  1.1649 +  // and multiple whitespace between words
  1.1650 +  int32_t nextpos;
  1.1651 +  const char16_t * offsetIntoBuffer = nullptr;
  1.1652 +  
  1.1653 +  while (bol < totLen) {    // Loop over lines
  1.1654 +    // Find a place where we may have to do whitespace compression
  1.1655 +    nextpos = str.FindCharInSet(" \t\n\r", bol);
  1.1656 +#ifdef DEBUG_wrapping
  1.1657 +    nsAutoString remaining;
  1.1658 +    str.Right(remaining, totLen - bol);
  1.1659 +    foo = ToNewCString(remaining);
  1.1660 +    //    printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
  1.1661 +    //           bol, nextpos, totLen, foo);
  1.1662 +    nsMemory::Free(foo);
  1.1663 +#endif
  1.1664 +
  1.1665 +    if (nextpos == kNotFound) {
  1.1666 +      // The rest of the string
  1.1667 +      offsetIntoBuffer = str.get() + bol;
  1.1668 +      AddToLine(offsetIntoBuffer, totLen-bol);
  1.1669 +      bol=totLen;
  1.1670 +      mInWhitespace=false;
  1.1671 +    } 
  1.1672 +    else {
  1.1673 +      // There's still whitespace left in the string
  1.1674 +      if (nextpos != 0 && (nextpos + 1) < totLen) {
  1.1675 +        offsetIntoBuffer = str.get() + nextpos;
  1.1676 +        // skip '\n' if it is between CJ chars
  1.1677 +        if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
  1.1678 +          offsetIntoBuffer = str.get() + bol;
  1.1679 +          AddToLine(offsetIntoBuffer, nextpos-bol);
  1.1680 +          bol = nextpos + 1;
  1.1681 +          continue;
  1.1682 +        }
  1.1683 +      }
  1.1684 +      // If we're already in whitespace and not preformatted, just skip it:
  1.1685 +      if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
  1.1686 +          !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
  1.1687 +        // Skip whitespace
  1.1688 +        bol++;
  1.1689 +        continue;
  1.1690 +      }
  1.1691 +
  1.1692 +      if (nextpos == bol) {
  1.1693 +        // Note that we are in whitespace.
  1.1694 +        mInWhitespace = true;
  1.1695 +        offsetIntoBuffer = str.get() + nextpos;
  1.1696 +        AddToLine(offsetIntoBuffer, 1);
  1.1697 +        bol++;
  1.1698 +        continue;
  1.1699 +      }
  1.1700 +      
  1.1701 +      mInWhitespace = true;
  1.1702 +      
  1.1703 +      offsetIntoBuffer = str.get() + bol;
  1.1704 +      if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
  1.1705 +        // Preserve the real whitespace character
  1.1706 +        nextpos++;
  1.1707 +        AddToLine(offsetIntoBuffer, nextpos-bol);
  1.1708 +        bol = nextpos;
  1.1709 +      } 
  1.1710 +      else {
  1.1711 +        // Replace the whitespace with a space
  1.1712 +        AddToLine(offsetIntoBuffer, nextpos-bol);
  1.1713 +        AddToLine(kSpace.get(),1);
  1.1714 +        bol = nextpos + 1; // Let's eat the whitespace
  1.1715 +      }
  1.1716 +    }
  1.1717 +  } // Continue looping over the string
  1.1718 +}
  1.1719 +
  1.1720 +
  1.1721 +/**
  1.1722 + * Gets the value of an attribute in a string. If the function returns
  1.1723 + * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
  1.1724 + */
  1.1725 +nsresult
  1.1726 +nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
  1.1727 +                                         nsString& aValueRet)
  1.1728 +{
  1.1729 +  if (mElement) {
  1.1730 +    if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
  1.1731 +      return NS_OK;
  1.1732 +    }
  1.1733 +  }
  1.1734 +
  1.1735 +  return NS_ERROR_NOT_AVAILABLE;
  1.1736 +}
  1.1737 +
  1.1738 +/**
  1.1739 + * Returns true, if the element was inserted by Moz' TXT->HTML converter.
  1.1740 + * In this case, we should ignore it.
  1.1741 + */
  1.1742 +bool 
  1.1743 +nsPlainTextSerializer::IsCurrentNodeConverted()
  1.1744 +{
  1.1745 +  nsAutoString value;
  1.1746 +  nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
  1.1747 +  return (NS_SUCCEEDED(rv) &&
  1.1748 +          (value.EqualsIgnoreCase("moz-txt", 7) ||
  1.1749 +           value.EqualsIgnoreCase("\"moz-txt", 8)));
  1.1750 +}
  1.1751 +
  1.1752 +
  1.1753 +// static
  1.1754 +nsIAtom*
  1.1755 +nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
  1.1756 +{
  1.1757 +  if (!aContent->IsHTML()) {
  1.1758 +    return nullptr;
  1.1759 +  }
  1.1760 +
  1.1761 +  nsIAtom* localName = aContent->Tag();
  1.1762 +  return localName->IsStaticAtom() ? localName : nullptr;
  1.1763 +}
  1.1764 +
  1.1765 +/**
  1.1766 + * Returns true if we currently are inside a <pre>. The check is done
  1.1767 + * by traversing the tag stack looking for <pre> until we hit a block
  1.1768 + * level tag which is assumed to override any <pre>:s below it in
  1.1769 + * the stack. To do this correctly to a 100% would require access
  1.1770 + * to style which we don't support in this converter.
  1.1771 + */  
  1.1772 +bool
  1.1773 +nsPlainTextSerializer::IsInPre()
  1.1774 +{
  1.1775 +  int32_t i = mTagStackIndex;
  1.1776 +  while(i > 0) {
  1.1777 +    if (mTagStack[i - 1] == nsGkAtoms::pre)
  1.1778 +      return true;
  1.1779 +    if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
  1.1780 +      // We assume that every other block overrides a <pre>
  1.1781 +      return false;
  1.1782 +    }
  1.1783 +    --i;
  1.1784 +  }
  1.1785 +
  1.1786 +  // Not a <pre> in the whole stack
  1.1787 +  return false;
  1.1788 +}
  1.1789 +
  1.1790 +/**
  1.1791 + * This method is required only to identify LI's inside OL.
  1.1792 + * Returns TRUE if we are inside an OL tag and FALSE otherwise.
  1.1793 + */
  1.1794 +bool
  1.1795 +nsPlainTextSerializer::IsInOL()
  1.1796 +{
  1.1797 +  int32_t i = mTagStackIndex;
  1.1798 +  while(--i >= 0) {
  1.1799 +    if (mTagStack[i] == nsGkAtoms::ol)
  1.1800 +      return true;
  1.1801 +    if (mTagStack[i] == nsGkAtoms::ul) {
  1.1802 +      // If a UL is reached first, LI belongs the UL nested in OL.
  1.1803 +      return false;
  1.1804 +    }
  1.1805 +  }
  1.1806 +  // We may reach here for orphan LI's.
  1.1807 +  return false;
  1.1808 +}
  1.1809 +
  1.1810 +/*
  1.1811 +  @return 0 = no header, 1 = h1, ..., 6 = h6
  1.1812 +*/
  1.1813 +int32_t HeaderLevel(nsIAtom* aTag)
  1.1814 +{
  1.1815 +  if (aTag == nsGkAtoms::h1) {
  1.1816 +    return 1;
  1.1817 +  }
  1.1818 +  if (aTag == nsGkAtoms::h2) {
  1.1819 +    return 2;
  1.1820 +  }
  1.1821 +  if (aTag == nsGkAtoms::h3) {
  1.1822 +    return 3;
  1.1823 +  }
  1.1824 +  if (aTag == nsGkAtoms::h4) {
  1.1825 +    return 4;
  1.1826 +  }
  1.1827 +  if (aTag == nsGkAtoms::h5) {
  1.1828 +    return 5;
  1.1829 +  }
  1.1830 +  if (aTag == nsGkAtoms::h6) {
  1.1831 +    return 6;
  1.1832 +  }
  1.1833 +  return 0;
  1.1834 +}
  1.1835 +
  1.1836 +
  1.1837 +/*
  1.1838 + * This is an implementation of GetUnicharWidth() and
  1.1839 + * GetUnicharStringWidth() as defined in
  1.1840 + * "The Single UNIX Specification, Version 2, The Open Group, 1997"
  1.1841 + * <http://www.UNIX-systems.org/online.html>
  1.1842 + *
  1.1843 + * Markus Kuhn -- 2000-02-08 -- public domain
  1.1844 + *
  1.1845 + * Minor alterations to fit Mozilla's data types by Daniel Bratell
  1.1846 + */
  1.1847 +
  1.1848 +/* These functions define the column width of an ISO 10646 character
  1.1849 + * as follows:
  1.1850 + *
  1.1851 + *    - The null character (U+0000) has a column width of 0.
  1.1852 + *
  1.1853 + *    - Other C0/C1 control characters and DEL will lead to a return
  1.1854 + *      value of -1.
  1.1855 + *
  1.1856 + *    - Non-spacing and enclosing combining characters (general
  1.1857 + *      category code Mn or Me in the Unicode database) have a
  1.1858 + *      column width of 0.
  1.1859 + *
  1.1860 + *    - Spacing characters in the East Asian Wide (W) or East Asian
  1.1861 + *      FullWidth (F) category as defined in Unicode Technical
  1.1862 + *      Report #11 have a column width of 2.
  1.1863 + *
  1.1864 + *    - All remaining characters (including all printable
  1.1865 + *      ISO 8859-1 and WGL4 characters, Unicode control characters,
  1.1866 + *      etc.) have a column width of 1.
  1.1867 + *
  1.1868 + * This implementation assumes that wchar_t characters are encoded
  1.1869 + * in ISO 10646.
  1.1870 + */
  1.1871 +
  1.1872 +int32_t GetUnicharWidth(char16_t ucs)
  1.1873 +{
  1.1874 +  /* sorted list of non-overlapping intervals of non-spacing characters */
  1.1875 +  static const struct interval {
  1.1876 +    uint16_t first;
  1.1877 +    uint16_t last;
  1.1878 +  } combining[] = {
  1.1879 +    { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
  1.1880 +    { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
  1.1881 +    { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
  1.1882 +    { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
  1.1883 +    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
  1.1884 +    { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
  1.1885 +    { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
  1.1886 +    { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
  1.1887 +    { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
  1.1888 +    { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
  1.1889 +    { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
  1.1890 +    { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
  1.1891 +    { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
  1.1892 +    { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
  1.1893 +    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
  1.1894 +    { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
  1.1895 +    { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
  1.1896 +    { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
  1.1897 +    { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
  1.1898 +    { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
  1.1899 +    { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
  1.1900 +    { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
  1.1901 +    { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
  1.1902 +    { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
  1.1903 +    { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
  1.1904 +    { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
  1.1905 +    { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
  1.1906 +    { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
  1.1907 +    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
  1.1908 +    { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
  1.1909 +    { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
  1.1910 +  };
  1.1911 +  int32_t min = 0;
  1.1912 +  int32_t max = sizeof(combining) / sizeof(struct interval) - 1;
  1.1913 +  int32_t mid;
  1.1914 +
  1.1915 +  /* test for 8-bit control characters */
  1.1916 +  if (ucs == 0)
  1.1917 +    return 0;
  1.1918 +  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
  1.1919 +    return -1;
  1.1920 +
  1.1921 +  /* first quick check for Latin-1 etc. characters */
  1.1922 +  if (ucs < combining[0].first)
  1.1923 +    return 1;
  1.1924 +
  1.1925 +  /* binary search in table of non-spacing characters */
  1.1926 +  while (max >= min) {
  1.1927 +    mid = (min + max) / 2;
  1.1928 +    if (combining[mid].last < ucs)
  1.1929 +      min = mid + 1;
  1.1930 +    else if (combining[mid].first > ucs)
  1.1931 +      max = mid - 1;
  1.1932 +    else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
  1.1933 +      return 0;
  1.1934 +  }
  1.1935 +
  1.1936 +  /* if we arrive here, ucs is not a combining or C0/C1 control character */
  1.1937 +
  1.1938 +  /* fast test for majority of non-wide scripts */
  1.1939 +  if (ucs < 0x1100)
  1.1940 +    return 1;
  1.1941 +
  1.1942 +  return 1 +
  1.1943 +    ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
  1.1944 +     (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
  1.1945 +      ucs != 0x303f) ||                  /* CJK ... Yi */
  1.1946 +     (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
  1.1947 +     (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
  1.1948 +     (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
  1.1949 +     (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
  1.1950 +     (ucs >= 0xffe0 && ucs <= 0xffe6));
  1.1951 +}
  1.1952 +
  1.1953 +
  1.1954 +int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
  1.1955 +{
  1.1956 +  int32_t w, width = 0;
  1.1957 +
  1.1958 +  for (;*pwcs && n-- > 0; pwcs++)
  1.1959 +    if ((w = GetUnicharWidth(*pwcs)) < 0)
  1.1960 +      ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
  1.1961 +    else
  1.1962 +      width += w;
  1.1963 +
  1.1964 +  return width;
  1.1965 +}
  1.1966 +

mercurial