content/base/src/nsPlainTextSerializer.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /*
     7  * nsIContentSerializer implementation that can be used with an
     8  * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
     9  * (eg for copy/paste as plaintext).
    10  */
    12 #include "nsPlainTextSerializer.h"
    13 #include "nsLWBrkCIID.h"
    14 #include "nsIServiceManager.h"
    15 #include "nsGkAtoms.h"
    16 #include "nsNameSpaceManager.h"
    17 #include "nsTextFragment.h"
    18 #include "nsContentUtils.h"
    19 #include "nsReadableUtils.h"
    20 #include "nsUnicharUtils.h"
    21 #include "nsCRT.h"
    22 #include "mozilla/dom/Element.h"
    23 #include "mozilla/Preferences.h"
    25 using namespace mozilla;
    26 using namespace mozilla::dom;
    28 #define PREF_STRUCTS "converter.html2txt.structs"
    29 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
    31 static const  int32_t kTabSize=4;
    32 static const  int32_t kIndentSizeHeaders = 2;  /* Indention of h1, if
    33                                                 mHeaderStrategy = 1 or = 2.
    34                                                 Indention of other headers
    35                                                 is derived from that.
    36                                                 XXX center h1? */
    37 static const  int32_t kIndentIncrementHeaders = 2;  /* If mHeaderStrategy = 1,
    38                                                 indent h(x+1) this many
    39                                                 columns more than h(x) */
    40 static const  int32_t kIndentSizeList = kTabSize;
    41                                // Indention of non-first lines of ul and ol
    42 static const  int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
    43 static const  char16_t  kNBSP = 160;
    44 static const  char16_t kSPACE = ' ';
    46 static int32_t HeaderLevel(nsIAtom* aTag);
    47 static int32_t GetUnicharWidth(char16_t ucs);
    48 static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
    50 // Someday may want to make this non-const:
    51 static const uint32_t TagStackSize = 500;
    52 static const uint32_t OLStackSize = 100;
    54 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
    55 {
    56   nsPlainTextSerializer* it = new nsPlainTextSerializer();
    57   if (!it) {
    58     return NS_ERROR_OUT_OF_MEMORY;
    59   }
    61   return CallQueryInterface(it, aSerializer);
    62 }
    64 nsPlainTextSerializer::nsPlainTextSerializer()
    65   : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
    66 {
    68   mOutputString = nullptr;
    69   mHeadLevel = 0;
    70   mAtFirstColumn = true;
    71   mIndent = 0;
    72   mCiteQuoteLevel = 0;
    73   mStructs = true;       // will be read from prefs later
    74   mHeaderStrategy = 1 /*indent increasingly*/;   // ditto
    75   mDontWrapAnyQuotes = false;                 // ditto
    76   mHasWrittenCiteBlockquote = false;
    77   mSpanLevel = 0;
    78   for (int32_t i = 0; i <= 6; i++) {
    79     mHeaderCounter[i] = 0;
    80   }
    82   // Line breaker
    83   mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
    84   mCurrentLineWidth = 0;
    86   // Flow
    87   mEmptyLines = 1; // The start of the document is an "empty line" in itself,
    88   mInWhitespace = false;
    89   mPreFormatted = false;
    90   mStartedOutput = false;
    92   // initialize the tag stack to zero:
    93   // The stack only ever contains pointers to static atoms, so they don't
    94   // need refcounting.
    95   mTagStack = new nsIAtom*[TagStackSize];
    96   mTagStackIndex = 0;
    97   mIgnoreAboveIndex = (uint32_t)kNotFound;
    99   // initialize the OL stack, where numbers for ordered lists are kept
   100   mOLStack = new int32_t[OLStackSize];
   101   mOLStackIndex = 0;
   103   mULCount = 0;
   105   mIgnoredChildNodeLevel = 0;
   106 }
   108 nsPlainTextSerializer::~nsPlainTextSerializer()
   109 {
   110   delete[] mTagStack;
   111   delete[] mOLStack;
   112   NS_WARN_IF_FALSE(mHeadLevel == 0, "Wrong head level!");
   113 }
   115 NS_IMPL_ISUPPORTS(nsPlainTextSerializer,
   116                   nsIContentSerializer)
   119 NS_IMETHODIMP 
   120 nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
   121                             const char* aCharSet, bool aIsCopying,
   122                             bool aIsWholeDocument)
   123 {
   124 #ifdef DEBUG
   125   // Check if the major control flags are set correctly.
   126   if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
   127     NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
   128                  "If you want format=flowed, you must combine it with "
   129                  "nsIDocumentEncoder::OutputFormatted");
   130   }
   132   if (aFlags & nsIDocumentEncoder::OutputFormatted) {
   133     NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
   134                  "Can't do formatted and preformatted output at the same time!");
   135   }
   136 #endif
   138   mFlags = aFlags;
   139   mWrapColumn = aWrapColumn;
   141   // Only create a linebreaker if we will handle wrapping.
   142   if (MayWrap()) {
   143     mLineBreaker = nsContentUtils::LineBreaker();
   144   }
   146   // Set the line break character:
   147   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
   148       && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
   149     // Windows
   150     mLineBreak.AssignLiteral("\r\n");
   151   }
   152   else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
   153     // Mac
   154     mLineBreak.Assign(char16_t('\r'));
   155   }
   156   else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
   157     // Unix/DOM
   158     mLineBreak.Assign(char16_t('\n'));
   159   }
   160   else {
   161     // Platform/default
   162     mLineBreak.AssignLiteral(NS_LINEBREAK);
   163   }
   165   mLineBreakDue = false;
   166   mFloatingLines = -1;
   168   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   169     // Get some prefs that controls how we do formatted output
   170     mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
   172     mHeaderStrategy =
   173       Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
   175     // DontWrapAnyQuotes is set according to whether plaintext mail
   176     // is wrapping to window width -- see bug 134439.
   177     // We'll only want this if we're wrapping and formatted.
   178     if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) {
   179       mDontWrapAnyQuotes =
   180         Preferences::GetBool("mail.compose.wrap_to_window_width",
   181                              mDontWrapAnyQuotes);
   182     }
   183   }
   185   // XXX We should let the caller pass this in.
   186   if (Preferences::GetBool("browser.frames.enabled")) {
   187     mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
   188   }
   189   else {
   190     mFlags |= nsIDocumentEncoder::OutputNoFramesContent;
   191   }
   193   return NS_OK;
   194 }
   196 bool
   197 nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
   198 {
   199   uint32_t size = aStack.Length();
   200   if (size == 0) {
   201     return false;
   202   }
   203   return aStack.ElementAt(size-1);
   204 }
   206 void
   207 nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
   208 {
   209   uint32_t size = aStack.Length();
   210   if (size > 0) {
   211     aStack.ElementAt(size-1) = aValue;
   212   }
   213   else {
   214     NS_ERROR("There is no \"Last\" value");
   215   }
   216 }
   218 void
   219 nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
   220 {
   221     aStack.AppendElement(bool(aValue));
   222 }
   224 bool
   225 nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
   226 {
   227   bool returnValue = false;
   228   uint32_t size = aStack.Length();
   229   if (size > 0) {
   230     returnValue = aStack.ElementAt(size-1);
   231     aStack.RemoveElementAt(size-1);
   232   }
   233   return returnValue;
   234 }
   236 bool
   237 nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag)
   238 {
   239   // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
   240   // non-textual container element should be serialized as placeholder
   241   // character and its child nodes should be ignored. See bug 895239.
   242   if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
   243     return false;
   244   }
   246   return
   247     (aTag == nsGkAtoms::audio) ||
   248     (aTag == nsGkAtoms::canvas) ||
   249     (aTag == nsGkAtoms::iframe) ||
   250     (aTag == nsGkAtoms::meter) ||
   251     (aTag == nsGkAtoms::progress) ||
   252     (aTag == nsGkAtoms::object) ||
   253     (aTag == nsGkAtoms::svg) ||
   254     (aTag == nsGkAtoms::video);
   255 }
   257 NS_IMETHODIMP 
   258 nsPlainTextSerializer::AppendText(nsIContent* aText,
   259                                   int32_t aStartOffset,
   260                                   int32_t aEndOffset, 
   261                                   nsAString& aStr)
   262 {
   263   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
   264     return NS_OK;
   265   }
   267   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
   268   if ( aStartOffset < 0 )
   269     return NS_ERROR_INVALID_ARG;
   271   NS_ENSURE_ARG(aText);
   273   nsresult rv = NS_OK;
   275   nsIContent* content = aText;
   276   const nsTextFragment* frag;
   277   if (!content || !(frag = content->GetText())) {
   278     return NS_ERROR_FAILURE;
   279   }
   281   int32_t fragLength = frag->GetLength();
   282   int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
   283   NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
   285   int32_t length = endoffset - aStartOffset;
   286   if (length <= 0) {
   287     return NS_OK;
   288   }
   290   nsAutoString textstr;
   291   if (frag->Is2b()) {
   292     textstr.Assign(frag->Get2b() + aStartOffset, length);
   293   }
   294   else {
   295     // AssignASCII is for 7-bit character only, so don't use it
   296     const char *data = frag->Get1b();
   297     CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
   298   }
   300   mOutputString = &aStr;
   302   // We have to split the string across newlines
   303   // to match parser behavior
   304   int32_t start = 0;
   305   int32_t offset = textstr.FindCharInSet("\n\r");
   306   while (offset != kNotFound) {
   308     if (offset>start) {
   309       // Pass in the line
   310       DoAddText(false,
   311                 Substring(textstr, start, offset-start));
   312     }
   314     // Pass in a newline
   315     DoAddText(true, mLineBreak);
   317     start = offset+1;
   318     offset = textstr.FindCharInSet("\n\r", start);
   319   }
   321   // Consume the last bit of the string if there's any left
   322   if (start < length) {
   323     if (start) {
   324       DoAddText(false, Substring(textstr, start, length - start));
   325     }
   326     else {
   327       DoAddText(false, textstr);
   328     }
   329   }
   331   mOutputString = nullptr;
   333   return rv;
   334 }
   336 NS_IMETHODIMP
   337 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
   338                                           int32_t aStartOffset,
   339                                           int32_t aEndOffset,
   340                                           nsAString& aStr)
   341 {
   342   return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
   343 }
   345 NS_IMETHODIMP
   346 nsPlainTextSerializer::AppendElementStart(Element* aElement,
   347                                           Element* aOriginalElement,
   348                                           nsAString& aStr)
   349 {
   350   NS_ENSURE_ARG(aElement);
   352   mElement = aElement;
   354   nsresult rv;
   355   nsIAtom* id = GetIdForContent(mElement);
   357   bool isContainer = !nsContentUtils::IsHTMLVoid(id);
   359   mOutputString = &aStr;
   361   if (isContainer) {
   362     rv = DoOpenContainer(id);
   363   }
   364   else {
   365     rv = DoAddLeaf(id);
   366   }
   368   mElement = nullptr;
   369   mOutputString = nullptr;
   371   if (id == nsGkAtoms::head) {
   372     ++mHeadLevel;
   373   }
   375   return rv;
   376 } 
   378 NS_IMETHODIMP 
   379 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
   380                                         nsAString& aStr)
   381 {
   382   NS_ENSURE_ARG(aElement);
   384   mElement = aElement;
   386   nsresult rv;
   387   nsIAtom* id = GetIdForContent(mElement);
   389   bool isContainer = !nsContentUtils::IsHTMLVoid(id);
   391   mOutputString = &aStr;
   393   rv = NS_OK;
   394   if (isContainer) {
   395     rv = DoCloseContainer(id);
   396   }
   398   mElement = nullptr;
   399   mOutputString = nullptr;
   401   if (id == nsGkAtoms::head) {
   402     NS_ASSERTION(mHeadLevel != 0,
   403                  "mHeadLevel being decremented below 0");
   404     --mHeadLevel;
   405   }
   407   return rv;
   408 }
   410 NS_IMETHODIMP 
   411 nsPlainTextSerializer::Flush(nsAString& aStr)
   412 {
   413   mOutputString = &aStr;
   414   FlushLine();
   415   mOutputString = nullptr;
   416   return NS_OK;
   417 }
   419 NS_IMETHODIMP
   420 nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
   421                                            nsAString& aStr)
   422 {
   423   return NS_OK;
   424 }
   426 nsresult
   427 nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
   428 {
   429   // Check if we need output current node as placeholder character and ignore
   430   // child nodes.
   431   if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
   432     if (mIgnoredChildNodeLevel == 0) {
   433       // Serialize current node as placeholder character
   434       Write(NS_LITERAL_STRING("\xFFFC"));
   435     }
   436     // Ignore child nodes.
   437     mIgnoredChildNodeLevel++;
   438     return NS_OK;
   439   }
   441   if (mFlags & nsIDocumentEncoder::OutputRaw) {
   442     // Raw means raw.  Don't even think about doing anything fancy
   443     // here like indenting, adding line breaks or any other
   444     // characters such as list item bullets, quote characters
   445     // around <q>, etc.  I mean it!  Don't make me smack you!
   447     return NS_OK;
   448   }
   450   if (mTagStackIndex < TagStackSize) {
   451     mTagStack[mTagStackIndex++] = aTag;
   452   }
   454   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
   455     return NS_OK;
   456   }
   458   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
   459   // above random <pre>s below it.
   460   mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
   461                               aTag == nsGkAtoms::pre;
   463   bool isInCiteBlockquote = false;
   465   // XXX special-case <blockquote type=cite> so that we don't add additional
   466   // newlines before the text.
   467   if (aTag == nsGkAtoms::blockquote) {
   468     nsAutoString value;
   469     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
   470     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
   471   }
   473   if (mLineBreakDue && !isInCiteBlockquote)
   474     EnsureVerticalSpace(mFloatingLines);
   476   // Check if this tag's content that should not be output
   477   if ((aTag == nsGkAtoms::noscript &&
   478        !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
   479       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
   480        !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
   481     // Ignore everything that follows the current tag in 
   482     // question until a matching end tag is encountered.
   483     mIgnoreAboveIndex = mTagStackIndex - 1;
   484     return NS_OK;
   485   }
   487   if (aTag == nsGkAtoms::body) {
   488     // Try to figure out here whether we have a
   489     // preformatted style attribute.
   490     //
   491     // Trigger on the presence of a "pre-wrap" in the
   492     // style attribute. That's a very simplistic way to do
   493     // it, but better than nothing.
   494     // Also set mWrapColumn to the value given there
   495     // (which arguably we should only do if told to do so).
   496     nsAutoString style;
   497     int32_t whitespace;
   498     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
   499        (kNotFound != (whitespace = style.Find("white-space:")))) {
   501       if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
   502 #ifdef DEBUG_preformatted
   503         printf("Set mPreFormatted based on style pre-wrap\n");
   504 #endif
   505         mPreFormatted = true;
   506         int32_t widthOffset = style.Find("width:");
   507         if (widthOffset >= 0) {
   508           // We have to search for the ch before the semicolon,
   509           // not for the semicolon itself, because nsString::ToInteger()
   510           // considers 'c' to be a valid numeric char (even if radix=10)
   511           // but then gets confused if it sees it next to the number
   512           // when the radix specified was 10, and returns an error code.
   513           int32_t semiOffset = style.Find("ch", false, widthOffset+6);
   514           int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
   515                             : style.Length() - widthOffset);
   516           nsAutoString widthstr;
   517           style.Mid(widthstr, widthOffset+6, length);
   518           nsresult err;
   519           int32_t col = widthstr.ToInteger(&err);
   521           if (NS_SUCCEEDED(err)) {
   522             mWrapColumn = (uint32_t)col;
   523 #ifdef DEBUG_preformatted
   524             printf("Set wrap column to %d based on style\n", mWrapColumn);
   525 #endif
   526           }
   527         }
   528       }
   529       else if (kNotFound != style.Find("pre", true, whitespace)) {
   530 #ifdef DEBUG_preformatted
   531         printf("Set mPreFormatted based on style pre\n");
   532 #endif
   533         mPreFormatted = true;
   534         mWrapColumn = 0;
   535       }
   536     } 
   537     else {
   538       /* See comment at end of function. */
   539       mInWhitespace = true;
   540       mPreFormatted = false;
   541     }
   543     return NS_OK;
   544   }
   546   // Keep this in sync with DoCloseContainer!
   547   if (!DoOutput()) {
   548     return NS_OK;
   549   }
   551   if (aTag == nsGkAtoms::p)
   552     EnsureVerticalSpace(1);
   553   else if (aTag == nsGkAtoms::pre) {
   554     if (GetLastBool(mIsInCiteBlockquote))
   555       EnsureVerticalSpace(0);
   556     else if (mHasWrittenCiteBlockquote) {
   557       EnsureVerticalSpace(0);
   558       mHasWrittenCiteBlockquote = false;
   559     }
   560     else
   561       EnsureVerticalSpace(1);
   562   }
   563   else if (aTag == nsGkAtoms::tr) {
   564     PushBool(mHasWrittenCellsForRow, false);
   565   }
   566   else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
   567     // We must make sure that the content of two table cells get a
   568     // space between them.
   570     // To make the separation between cells most obvious and
   571     // importable, we use a TAB.
   572     if (GetLastBool(mHasWrittenCellsForRow)) {
   573       // Bypass |Write| so that the TAB isn't compressed away.
   574       AddToLine(MOZ_UTF16("\t"), 1);
   575       mInWhitespace = true;
   576     }
   577     else if (mHasWrittenCellsForRow.IsEmpty()) {
   578       // We don't always see a <tr> (nor a <table>) before the <td> if we're
   579       // copying part of a table
   580       PushBool(mHasWrittenCellsForRow, true); // will never be popped
   581     }
   582     else {
   583       SetLastBool(mHasWrittenCellsForRow, true);
   584     }
   585   }
   586   else if (aTag == nsGkAtoms::ul) {
   587     // Indent here to support nested lists, which aren't included in li :-(
   588     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
   589          // Must end the current line before we change indention
   590     mIndent += kIndentSizeList;
   591     mULCount++;
   592   }
   593   else if (aTag == nsGkAtoms::ol) {
   594     EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
   595     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   596       // Must end the current line before we change indention
   597       if (mOLStackIndex < OLStackSize) {
   598         nsAutoString startAttr;
   599         int32_t startVal = 1;
   600         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
   601           nsresult rv = NS_OK;
   602           startVal = startAttr.ToInteger(&rv);
   603           if (NS_FAILED(rv))
   604             startVal = 1;
   605         }
   606         mOLStack[mOLStackIndex++] = startVal;
   607       }
   608     } else {
   609       mOLStackIndex++;
   610     }
   611     mIndent += kIndentSizeList;  // see ul
   612   }
   613   else if (aTag == nsGkAtoms::li &&
   614            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
   615     if (mTagStackIndex > 1 && IsInOL()) {
   616       if (mOLStackIndex > 0) {
   617         nsAutoString valueAttr;
   618         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
   619           nsresult rv = NS_OK;
   620           int32_t valueAttrVal = valueAttr.ToInteger(&rv);
   621           if (NS_SUCCEEDED(rv))
   622             mOLStack[mOLStackIndex-1] = valueAttrVal;
   623         }
   624         // This is what nsBulletFrame does for OLs:
   625         mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
   626       }
   627       else {
   628         mInIndentString.Append(char16_t('#'));
   629       }
   631       mInIndentString.Append(char16_t('.'));
   633     }
   634     else {
   635       static char bulletCharArray[] = "*o+#";
   636       uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
   637       char bulletChar = bulletCharArray[index % 4];
   638       mInIndentString.Append(char16_t(bulletChar));
   639     }
   641     mInIndentString.Append(char16_t(' '));
   642   }
   643   else if (aTag == nsGkAtoms::dl) {
   644     EnsureVerticalSpace(1);
   645   }
   646   else if (aTag == nsGkAtoms::dt) {
   647     EnsureVerticalSpace(0);
   648   }
   649   else if (aTag == nsGkAtoms::dd) {
   650     EnsureVerticalSpace(0);
   651     mIndent += kIndentSizeDD;
   652   }
   653   else if (aTag == nsGkAtoms::span) {
   654     ++mSpanLevel;
   655   }
   656   else if (aTag == nsGkAtoms::blockquote) {
   657     // Push
   658     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
   659     if (isInCiteBlockquote) {
   660       EnsureVerticalSpace(0);
   661       mCiteQuoteLevel++;
   662     }
   663     else {
   664       EnsureVerticalSpace(1);
   665       mIndent += kTabSize; // Check for some maximum value?
   666     }
   667   }
   668   else if (aTag == nsGkAtoms::q) {
   669     Write(NS_LITERAL_STRING("\""));
   670   }
   672   // Else make sure we'll separate block level tags,
   673   // even if we're about to leave, before doing any other formatting.
   674   else if (nsContentUtils::IsHTMLBlock(aTag)) {
   675     EnsureVerticalSpace(0);
   676   }
   678   //////////////////////////////////////////////////////////////
   679   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
   680     return NS_OK;
   681   }
   682   //////////////////////////////////////////////////////////////
   683   // The rest of this routine is formatted output stuff,
   684   // which we should skip if we're not formatted:
   685   //////////////////////////////////////////////////////////////
   687   // Push on stack
   688   bool currentNodeIsConverted = IsCurrentNodeConverted();
   690   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
   691       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
   692       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
   693   {
   694     EnsureVerticalSpace(2);
   695     if (mHeaderStrategy == 2) {  // numbered
   696       mIndent += kIndentSizeHeaders;
   697       // Caching
   698       int32_t level = HeaderLevel(aTag);
   699       // Increase counter for current level
   700       mHeaderCounter[level]++;
   701       // Reset all lower levels
   702       int32_t i;
   704       for (i = level + 1; i <= 6; i++) {
   705         mHeaderCounter[i] = 0;
   706       }
   708       // Construct numbers
   709       nsAutoString leadup;
   710       for (i = 1; i <= level; i++) {
   711         leadup.AppendInt(mHeaderCounter[i]);
   712         leadup.Append(char16_t('.'));
   713       }
   714       leadup.Append(char16_t(' '));
   715       Write(leadup);
   716     }
   717     else if (mHeaderStrategy == 1) { // indent increasingly
   718       mIndent += kIndentSizeHeaders;
   719       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
   720            // for h(x), run x-1 times
   721         mIndent += kIndentIncrementHeaders;
   722       }
   723     }
   724   }
   725   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
   726     nsAutoString url;
   727     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
   728         && !url.IsEmpty()) {
   729       mURL = url;
   730     }
   731   }
   732   else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
   733     Write(NS_LITERAL_STRING("^"));
   734   }
   735   else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
   736     Write(NS_LITERAL_STRING("_"));
   737   }
   738   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
   739     Write(NS_LITERAL_STRING("|"));
   740   }
   741   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
   742            && mStructs && !currentNodeIsConverted) {
   743     Write(NS_LITERAL_STRING("*"));
   744   }
   745   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
   746            && mStructs && !currentNodeIsConverted) {
   747     Write(NS_LITERAL_STRING("/"));
   748   }
   749   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
   750     Write(NS_LITERAL_STRING("_"));
   751   }
   753   /* Container elements are always block elements, so we shouldn't
   754      output any whitespace immediately after the container tag even if
   755      there's extra whitespace there because the HTML is pretty-printed
   756      or something. To ensure that happens, tell the serializer we're
   757      already in whitespace so it won't output more. */
   758   mInWhitespace = true;
   760   return NS_OK;
   761 }
   763 nsresult
   764 nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
   765 {
   766   if (ShouldReplaceContainerWithPlaceholder(mElement->Tag())) {
   767     mIgnoredChildNodeLevel--;
   768     return NS_OK;
   769   }
   771   if (mFlags & nsIDocumentEncoder::OutputRaw) {
   772     // Raw means raw.  Don't even think about doing anything fancy
   773     // here like indenting, adding line breaks or any other
   774     // characters such as list item bullets, quote characters
   775     // around <q>, etc.  I mean it!  Don't make me smack you!
   777     return NS_OK;
   778   }
   780   if (mTagStackIndex > 0) {
   781     --mTagStackIndex;
   782   }
   784   if (mTagStackIndex >= mIgnoreAboveIndex) {
   785     if (mTagStackIndex == mIgnoreAboveIndex) {
   786       // We're dealing with the close tag whose matching
   787       // open tag had set the mIgnoreAboveIndex value.
   788       // Reset mIgnoreAboveIndex before discarding this tag.
   789       mIgnoreAboveIndex = (uint32_t)kNotFound;
   790     }
   791     return NS_OK;
   792   }
   794   // End current line if we're ending a block level tag
   795   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
   796     // We want the output to end with a new line,
   797     // but in preformatted areas like text fields,
   798     // we can't emit newlines that weren't there.
   799     // So add the newline only in the case of formatted output.
   800     if (mFlags & nsIDocumentEncoder::OutputFormatted) {
   801       EnsureVerticalSpace(0);
   802     }
   803     else {
   804       FlushLine();
   805     }
   806     // We won't want to do anything with these in formatted mode either,
   807     // so just return now:
   808     return NS_OK;
   809   }
   811   // Keep this in sync with DoOpenContainer!
   812   if (!DoOutput()) {
   813     return NS_OK;
   814   }
   816   if (aTag == nsGkAtoms::tr) {
   817     PopBool(mHasWrittenCellsForRow);
   818     // Should always end a line, but get no more whitespace
   819     if (mFloatingLines < 0)
   820       mFloatingLines = 0;
   821     mLineBreakDue = true;
   822   }
   823   else if (((aTag == nsGkAtoms::li) ||
   824             (aTag == nsGkAtoms::dt)) &&
   825            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
   826     // Items that should always end a line, but get no more whitespace
   827     if (mFloatingLines < 0)
   828       mFloatingLines = 0;
   829     mLineBreakDue = true;
   830   }
   831   else if (aTag == nsGkAtoms::pre) {
   832     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
   833     mLineBreakDue = true;
   834   }
   835   else if (aTag == nsGkAtoms::ul) {
   836     FlushLine();
   837     mIndent -= kIndentSizeList;
   838     if (--mULCount + mOLStackIndex == 0) {
   839       mFloatingLines = 1;
   840       mLineBreakDue = true;
   841     }
   842   }
   843   else if (aTag == nsGkAtoms::ol) {
   844     FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
   845     mIndent -= kIndentSizeList;
   846     NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
   847     mOLStackIndex--;
   848     if (mULCount + mOLStackIndex == 0) {
   849       mFloatingLines = 1;
   850       mLineBreakDue = true;
   851     }
   852   }  
   853   else if (aTag == nsGkAtoms::dl) {
   854     mFloatingLines = 1;
   855     mLineBreakDue = true;
   856   }
   857   else if (aTag == nsGkAtoms::dd) {
   858     FlushLine();
   859     mIndent -= kIndentSizeDD;
   860   }
   861   else if (aTag == nsGkAtoms::span) {
   862     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
   863     --mSpanLevel;
   864   }
   865   else if (aTag == nsGkAtoms::div) {
   866     if (mFloatingLines < 0)
   867       mFloatingLines = 0;
   868     mLineBreakDue = true;
   869   }
   870   else if (aTag == nsGkAtoms::blockquote) {
   871     FlushLine();    // Is this needed?
   873     // Pop
   874     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
   876     if (isInCiteBlockquote) {
   877       NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
   878       mCiteQuoteLevel--;
   879       mFloatingLines = 0;
   880       mHasWrittenCiteBlockquote = true;
   881     }
   882     else {
   883       mIndent -= kTabSize;
   884       mFloatingLines = 1;
   885     }
   886     mLineBreakDue = true;
   887   }
   888   else if (aTag == nsGkAtoms::q) {
   889     Write(NS_LITERAL_STRING("\""));
   890   }
   891   else if (nsContentUtils::IsHTMLBlock(aTag)
   892            && aTag != nsGkAtoms::script) {
   893     // All other blocks get 1 vertical space after them
   894     // in formatted mode, otherwise 0.
   895     // This is hard. Sometimes 0 is a better number, but
   896     // how to know?
   897     if (mFlags & nsIDocumentEncoder::OutputFormatted)
   898       EnsureVerticalSpace(1);
   899     else {
   900       if (mFloatingLines < 0)
   901         mFloatingLines = 0;
   902       mLineBreakDue = true;
   903     }
   904   }
   906   //////////////////////////////////////////////////////////////
   907   if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
   908     return NS_OK;
   909   }
   910   //////////////////////////////////////////////////////////////
   911   // The rest of this routine is formatted output stuff,
   912   // which we should skip if we're not formatted:
   913   //////////////////////////////////////////////////////////////
   915   // Pop the currentConverted stack
   916   bool currentNodeIsConverted = IsCurrentNodeConverted();
   918   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
   919       aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
   920       aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
   922     if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
   923       mIndent -= kIndentSizeHeaders;
   924     }
   925     if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
   926       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
   927            // for h(x), run x-1 times
   928         mIndent -= kIndentIncrementHeaders;
   929       }
   930     }
   931     EnsureVerticalSpace(1);
   932   }
   933   else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
   934     nsAutoString temp; 
   935     temp.AssignLiteral(" <");
   936     temp += mURL;
   937     temp.Append(char16_t('>'));
   938     Write(temp);
   939     mURL.Truncate();
   940   }
   941   else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
   942            && mStructs && !currentNodeIsConverted) {
   943     Write(kSpace);
   944   }
   945   else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
   946     Write(NS_LITERAL_STRING("|"));
   947   }
   948   else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
   949            && mStructs && !currentNodeIsConverted) {
   950     Write(NS_LITERAL_STRING("*"));
   951   }
   952   else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
   953            && mStructs && !currentNodeIsConverted) {
   954     Write(NS_LITERAL_STRING("/"));
   955   }
   956   else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
   957     Write(NS_LITERAL_STRING("_"));
   958   }
   960   return NS_OK;
   961 }
   963 bool
   964 nsPlainTextSerializer::MustSuppressLeaf()
   965 {
   966   if (mIgnoredChildNodeLevel > 0) {
   967     return true;
   968   }
   970   if ((mTagStackIndex > 1 &&
   971        mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
   972       (mTagStackIndex > 0 &&
   973         mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
   974     // Don't output the contents of SELECT elements;
   975     // Might be nice, eventually, to output just the selected element.
   976     // Read more in bug 31994.
   977     return true;
   978   }
   980   if (mTagStackIndex > 0 &&
   981       (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
   982        mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
   983     // Don't output the contents of <script> or <style> tags;
   984     return true;
   985   }
   987   return false;
   988 }
   990 void
   991 nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
   992 {
   993   // If we don't want any output, just return
   994   if (!DoOutput()) {
   995     return;
   996   }
   998   if (!aIsLineBreak) {
   999     // Make sure to reset this, since it's no longer true.
  1000     mHasWrittenCiteBlockquote = false;
  1003   if (mLineBreakDue)
  1004     EnsureVerticalSpace(mFloatingLines);
  1006   if (MustSuppressLeaf()) {
  1007     return;
  1010   if (aIsLineBreak) {
  1011     // The only times we want to pass along whitespace from the original
  1012     // html source are if we're forced into preformatted mode via flags,
  1013     // or if we're prettyprinting and we're inside a <pre>.
  1014     // Otherwise, either we're collapsing to minimal text, or we're
  1015     // prettyprinting to mimic the html format, and in neither case
  1016     // does the formatting of the html source help us.
  1017     if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
  1018         (mPreFormatted && !mWrapColumn) ||
  1019         IsInPre()) {
  1020       EnsureVerticalSpace(mEmptyLines+1);
  1022     else if (!mInWhitespace) {
  1023       Write(kSpace);
  1024       mInWhitespace = true;
  1026     return;
  1029   /* Check, if we are in a link (symbolized with mURL containing the URL)
  1030      and the text is equal to the URL. In that case we don't want to output
  1031      the URL twice so we scrap the text in mURL. */
  1032   if (!mURL.IsEmpty() && mURL.Equals(aText)) {
  1033     mURL.Truncate();
  1035   Write(aText);
  1038 nsresult
  1039 nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
  1041   // If we don't want any output, just return
  1042   if (!DoOutput()) {
  1043     return NS_OK;
  1046   if (mLineBreakDue)
  1047     EnsureVerticalSpace(mFloatingLines);
  1049   if (MustSuppressLeaf()) {
  1050     return NS_OK;
  1053   if (aTag == nsGkAtoms::br) {
  1054     // Another egregious editor workaround, see bug 38194:
  1055     // ignore the bogus br tags that the editor sticks here and there.
  1056     nsAutoString tagAttr;
  1057     if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
  1058         || !tagAttr.EqualsLiteral("_moz")) {
  1059       EnsureVerticalSpace(mEmptyLines+1);
  1062   else if (aTag == nsGkAtoms::hr &&
  1063            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
  1064     EnsureVerticalSpace(0);
  1066     // Make a line of dashes as wide as the wrap width
  1067     // XXX honoring percentage would be nice
  1068     nsAutoString line;
  1069     uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
  1070     while (line.Length() < width) {
  1071       line.Append(char16_t('-'));
  1073     Write(line);
  1075     EnsureVerticalSpace(0);
  1077   else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
  1078     Write(NS_LITERAL_STRING("\xFFFC"));
  1080   else if (aTag == nsGkAtoms::img) {
  1081     /* Output (in decreasing order of preference)
  1082        alt, title or nothing */
  1083     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
  1084     nsAutoString imageDescription;
  1085     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
  1086                                        imageDescription))) {
  1087       // If the alt attribute has an empty value (|alt=""|), output nothing
  1089     else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
  1090                                             imageDescription))
  1091              && !imageDescription.IsEmpty()) {
  1092       imageDescription = NS_LITERAL_STRING(" [") +
  1093                          imageDescription +
  1094                          NS_LITERAL_STRING("] ");
  1097     Write(imageDescription);
  1100   return NS_OK;
  1103 /**
  1104  * Adds as many newline as necessary to get |noOfRows| empty lines
  1106  * noOfRows = -1    :   Being in the middle of some line of text
  1107  * noOfRows =  0    :   Being at the start of a line
  1108  * noOfRows =  n>0  :   Having n empty lines before the current line.
  1109  */
  1110 void
  1111 nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
  1113   // If we have something in the indent we probably want to output
  1114   // it and it's not included in the count for empty lines so we don't
  1115   // realize that we should start a new line.
  1116   if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
  1117     EndLine(false);
  1118     mInWhitespace = true;
  1121   while(mEmptyLines < noOfRows) {
  1122     EndLine(false);
  1123     mInWhitespace = true;
  1125   mLineBreakDue = false;
  1126   mFloatingLines = -1;
  1129 /**
  1130  * This empties the current line cache without adding a NEWLINE.
  1131  * Should not be used if line wrapping is of importance since
  1132  * this function destroys the cache information.
  1134  * It will also write indentation and quotes if we believe us to be
  1135  * at the start of the line.
  1136  */
  1137 void
  1138 nsPlainTextSerializer::FlushLine()
  1140   if (!mCurrentLine.IsEmpty()) {
  1141     if (mAtFirstColumn) {
  1142       OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
  1145     Output(mCurrentLine);
  1146     mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
  1147     mCurrentLine.Truncate();
  1148     mCurrentLineWidth = 0;
  1152 /**
  1153  * Prints the text to output to our current output device (the string mOutputString).
  1154  * The only logic here is to replace non breaking spaces with a normal space since
  1155  * most (all?) receivers of the result won't understand the nbsp and even be
  1156  * confused by it.
  1157  */
  1158 void 
  1159 nsPlainTextSerializer::Output(nsString& aString)
  1161   if (!aString.IsEmpty()) {
  1162     mStartedOutput = true;
  1165   if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
  1166     // First, replace all nbsp characters with spaces,
  1167     // which the unicode encoder won't do for us.
  1168     aString.ReplaceChar(kNBSP, kSPACE);
  1170   mOutputString->Append(aString);
  1173 static bool
  1174 IsSpaceStuffable(const char16_t *s)
  1176   if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
  1177       nsCRT::strncmp(s, MOZ_UTF16("From "), 5) == 0)
  1178     return true;
  1179   else
  1180     return false;
  1183 /**
  1184  * This function adds a piece of text to the current stored line. If we are
  1185  * wrapping text and the stored line will become too long, a suitable
  1186  * location to wrap will be found and the line that's complete will be
  1187  * output.
  1188  */
  1189 void
  1190 nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment, 
  1191                                  int32_t aLineFragmentLength)
  1193   uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
  1195   if (mLineBreakDue)
  1196     EnsureVerticalSpace(mFloatingLines);
  1198   int32_t linelength = mCurrentLine.Length();
  1199   if (0 == linelength) {
  1200     if (0 == aLineFragmentLength) {
  1201       // Nothing at all. Are you kidding me?
  1202       return;
  1205     if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1206       if (IsSpaceStuffable(aLineFragment)
  1207          && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
  1210           // Space stuffing a la RFC 2646 (format=flowed).
  1211           mCurrentLine.Append(char16_t(' '));
  1213           if (MayWrap()) {
  1214             mCurrentLineWidth += GetUnicharWidth(' ');
  1215 #ifdef DEBUG_wrapping
  1216             NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
  1217                                                mCurrentLine.Length()) ==
  1218                          (int32_t)mCurrentLineWidth,
  1219                          "mCurrentLineWidth and reality out of sync!");
  1220 #endif
  1224     mEmptyLines=-1;
  1227   mCurrentLine.Append(aLineFragment, aLineFragmentLength);
  1228   if (MayWrap()) {
  1229     mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
  1230                                                aLineFragmentLength);
  1231 #ifdef DEBUG_wrapping
  1232     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
  1233                                        mCurrentLine.Length()) ==
  1234                  (int32_t)mCurrentLineWidth,
  1235                  "mCurrentLineWidth and reality out of sync!");
  1236 #endif
  1239   linelength = mCurrentLine.Length();
  1241   //  Wrap?
  1242   if (MayWrap())
  1244 #ifdef DEBUG_wrapping
  1245     NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
  1246                                   mCurrentLine.Length()) ==
  1247                  (int32_t)mCurrentLineWidth,
  1248                  "mCurrentLineWidth and reality out of sync!");
  1249 #endif
  1250     // Yes, wrap!
  1251     // The "+4" is to avoid wrap lines that only would be a couple
  1252     // of letters too long. We give this bonus only if the
  1253     // wrapcolumn is more than 20.
  1254     uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
  1256     // XXX: Should calculate prefixwidth with GetUnicharStringWidth
  1257     while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {      
  1258       // We go from the end removing one letter at a time until
  1259       // we have a reasonable width
  1260       int32_t goodSpace = mCurrentLine.Length();
  1261       uint32_t width = mCurrentLineWidth;
  1262       while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
  1263         goodSpace--;
  1264         width -= GetUnicharWidth(mCurrentLine[goodSpace]);
  1267       goodSpace++;
  1269       if (mLineBreaker) {
  1270         goodSpace = mLineBreaker->Prev(mCurrentLine.get(), 
  1271                                     mCurrentLine.Length(), goodSpace);
  1272         if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
  1273             nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
  1274           --goodSpace;    // adjust the position since line breaker returns a position next to space
  1277       // fallback if the line breaker is unavailable or failed
  1278       if (!mLineBreaker) {
  1279         goodSpace = mWrapColumn-prefixwidth;
  1280         while (goodSpace >= 0 &&
  1281                !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1282           goodSpace--;
  1286       nsAutoString restOfLine;
  1287       if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
  1288         // If we don't found a good place to break, accept long line and
  1289         // try to find another place to break
  1290         goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
  1291         if (mLineBreaker) {
  1292           if ((uint32_t)goodSpace < mCurrentLine.Length())
  1293             goodSpace = mLineBreaker->Next(mCurrentLine.get(), 
  1294                                            mCurrentLine.Length(), goodSpace);
  1295           if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
  1296             goodSpace = mCurrentLine.Length();
  1298         // fallback if the line breaker is unavailable or failed
  1299         if (!mLineBreaker) {
  1300           goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
  1301           while (goodSpace < linelength &&
  1302                  !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1303             goodSpace++;
  1308       if ((goodSpace < linelength) && (goodSpace > 0)) {
  1309         // Found a place to break
  1311         // -1 (trim a char at the break position)
  1312         // only if the line break was a space.
  1313         if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
  1314           mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
  1316         else {
  1317           mCurrentLine.Right(restOfLine, linelength-goodSpace);
  1319         // if breaker was U+0020, it has to consider for delsp=yes support
  1320         bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
  1321         mCurrentLine.Truncate(goodSpace); 
  1322         EndLine(true, breakBySpace);
  1323         mCurrentLine.Truncate();
  1324         // Space stuff new line?
  1325         if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1326           if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
  1327               && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
  1330             // Space stuffing a la RFC 2646 (format=flowed).
  1331             mCurrentLine.Append(char16_t(' '));
  1332             //XXX doesn't seem to work correctly for ' '
  1335         mCurrentLine.Append(restOfLine);
  1336         mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
  1337                                                   mCurrentLine.Length());
  1338         linelength = mCurrentLine.Length();
  1339         mEmptyLines = -1;
  1341       else {
  1342         // Nothing to do. Hopefully we get more data later
  1343         // to use for a place to break line
  1344         break;
  1348   else {
  1349     // No wrapping.
  1353 /**
  1354  * Outputs the contents of mCurrentLine, and resets line specific
  1355  * variables. Also adds an indentation and prefix if there is
  1356  * one specified. Strips ending spaces from the line if it isn't
  1357  * preformatted.
  1358  */
  1359 void
  1360 nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
  1362   uint32_t currentlinelength = mCurrentLine.Length();
  1364   if (aSoftlinebreak && 0 == currentlinelength) {
  1365     // No meaning
  1366     return;
  1369   /* In non-preformatted mode, remove spaces from the end of the line for
  1370    * format=flowed compatibility. Don't do this for these special cases:
  1371    * "-- ", the signature separator (RFC 2646) shouldn't be touched and
  1372    * "- -- ", the OpenPGP dash-escaped signature separator in inline
  1373    * signed messages according to the OpenPGP standard (RFC 2440).
  1374    */  
  1375   if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
  1376       !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
  1377      (aSoftlinebreak || 
  1378      !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
  1379     // Remove spaces from the end of the line.
  1380     while(currentlinelength > 0 &&
  1381           mCurrentLine[currentlinelength-1] == ' ') {
  1382       --currentlinelength;
  1384     mCurrentLine.SetLength(currentlinelength);
  1387   if (aSoftlinebreak &&
  1388      (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
  1389      (mIndent == 0)) {
  1390     // Add the soft part of the soft linebreak (RFC 2646 4.1)
  1391     // We only do this when there is no indentation since format=flowed
  1392     // lines and indentation doesn't work well together.
  1394     // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
  1395     // add twice space.
  1396     if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
  1397       mCurrentLine.Append(NS_LITERAL_STRING("  "));
  1398     else
  1399       mCurrentLine.Append(char16_t(' '));
  1402   if (aSoftlinebreak) {
  1403     mEmptyLines=0;
  1405   else {
  1406     // Hard break
  1407     if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
  1408       mEmptyLines=-1;
  1411     mEmptyLines++;
  1414   if (mAtFirstColumn) {
  1415     // If we don't have anything "real" to output we have to
  1416     // make sure the indent doesn't end in a space since that
  1417     // would trick a format=flowed-aware receiver.
  1418     bool stripTrailingSpaces = mCurrentLine.IsEmpty();
  1419     OutputQuotesAndIndent(stripTrailingSpaces);
  1422   mCurrentLine.Append(mLineBreak);
  1423   Output(mCurrentLine);
  1424   mCurrentLine.Truncate();
  1425   mCurrentLineWidth = 0;
  1426   mAtFirstColumn=true;
  1427   mInWhitespace=true;
  1428   mLineBreakDue = false;
  1429   mFloatingLines = -1;
  1433 /**
  1434  * Outputs the calculated and stored indent and text in the indentation. That is
  1435  * quote chars and numbers for numbered lists and such. It will also reset any
  1436  * stored text to put in the indentation after using it.
  1437  */
  1438 void
  1439 nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
  1441   nsAutoString stringToOutput;
  1443   // Put the mail quote "> " chars in, if appropriate:
  1444   if (mCiteQuoteLevel > 0) {
  1445     nsAutoString quotes;
  1446     for(int i=0; i < mCiteQuoteLevel; i++) {
  1447       quotes.Append(char16_t('>'));
  1449     if (!mCurrentLine.IsEmpty()) {
  1450       /* Better don't output a space here, if the line is empty,
  1451          in case a receiving f=f-aware UA thinks, this were a flowed line,
  1452          which it isn't - it's just empty.
  1453          (Flowed lines may be joined with the following one,
  1454          so the empty line may be lost completely.) */
  1455       quotes.Append(char16_t(' '));
  1457     stringToOutput = quotes;
  1458     mAtFirstColumn = false;
  1461   // Indent if necessary
  1462   int32_t indentwidth = mIndent - mInIndentString.Length();
  1463   if (indentwidth > 0
  1464       && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
  1465       // Don't make empty lines look flowed
  1466       ) {
  1467     nsAutoString spaces;
  1468     for (int i=0; i < indentwidth; ++i)
  1469       spaces.Append(char16_t(' '));
  1470     stringToOutput += spaces;
  1471     mAtFirstColumn = false;
  1474   if (!mInIndentString.IsEmpty()) {
  1475     stringToOutput += mInIndentString;
  1476     mAtFirstColumn = false;
  1477     mInIndentString.Truncate();
  1480   if (stripTrailingSpaces) {
  1481     int32_t lineLength = stringToOutput.Length();
  1482     while(lineLength > 0 &&
  1483           ' ' == stringToOutput[lineLength-1]) {
  1484       --lineLength;
  1486     stringToOutput.SetLength(lineLength);
  1489   if (!stringToOutput.IsEmpty()) {
  1490     Output(stringToOutput);
  1495 /**
  1496  * Write a string. This is the highlevel function to use to get text output.
  1497  * By using AddToLine, Output, EndLine and other functions it handles quotation,
  1498  * line wrapping, indentation, whitespace compression and other things.
  1499  */
  1500 void
  1501 nsPlainTextSerializer::Write(const nsAString& aStr)
  1503   // XXX Copy necessary to use nsString methods and gain
  1504   // access to underlying buffer
  1505   nsAutoString str(aStr);
  1507 #ifdef DEBUG_wrapping
  1508   printf("Write(%s): wrap col = %d\n",
  1509          NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
  1510 #endif
  1512   int32_t bol = 0;
  1513   int32_t newline;
  1515   int32_t totLen = str.Length();
  1517   // If the string is empty, do nothing:
  1518   if (totLen <= 0) return;
  1520   // For Flowed text change nbsp-ses to spaces at end of lines to allow them
  1521   // to be cut off along with usual spaces if required. (bug #125928)
  1522   if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1523     for (int32_t i = totLen-1; i >= 0; i--) {
  1524       char16_t c = str[i];
  1525       if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
  1526         continue;
  1527       if (kNBSP == c)
  1528         str.Replace(i, 1, ' ');
  1529       else
  1530         break;
  1534   // We have two major codepaths here. One that does preformatted text and one
  1535   // that does normal formatted text. The one for preformatted text calls
  1536   // Output directly while the other code path goes through AddToLine.
  1537   if ((mPreFormatted && !mWrapColumn) || IsInPre()
  1538       || ((mSpanLevel > 0 || mDontWrapAnyQuotes)
  1539           && mEmptyLines >= 0 && str.First() == char16_t('>'))) {
  1540     // No intelligent wrapping.
  1542     // This mustn't be mixed with intelligent wrapping without clearing
  1543     // the mCurrentLine buffer before!!!
  1544     NS_ASSERTION(mCurrentLine.IsEmpty(),
  1545                  "Mixed wrapping data and nonwrapping data on the same line");
  1546     if (!mCurrentLine.IsEmpty()) {
  1547       FlushLine();
  1550     // Put the mail quote "> " chars in, if appropriate.
  1551     // Have to put it in before every line.
  1552     while(bol<totLen) {
  1553       bool outputQuotes = mAtFirstColumn;
  1554       bool atFirstColumn = mAtFirstColumn;
  1555       bool outputLineBreak = false;
  1556       bool spacesOnly = true;
  1558       // Find one of '\n' or '\r' using iterators since nsAString
  1559       // doesn't have the old FindCharInSet function.
  1560       nsAString::const_iterator iter;           str.BeginReading(iter);
  1561       nsAString::const_iterator done_searching; str.EndReading(done_searching);
  1562       iter.advance(bol); 
  1563       int32_t new_newline = bol;
  1564       newline = kNotFound;
  1565       while(iter != done_searching) {
  1566         if ('\n' == *iter || '\r' == *iter) {
  1567           newline = new_newline;
  1568           break;
  1570         if (' ' != *iter)
  1571           spacesOnly = false;
  1572         ++new_newline;
  1573         ++iter;
  1576       // Done searching
  1577       nsAutoString stringpart;
  1578       if (newline == kNotFound) {
  1579         // No new lines.
  1580         stringpart.Assign(Substring(str, bol, totLen - bol));
  1581         if (!stringpart.IsEmpty()) {
  1582           char16_t lastchar = stringpart[stringpart.Length()-1];
  1583           if ((lastchar == '\t') || (lastchar == ' ') ||
  1584              (lastchar == '\r') ||(lastchar == '\n')) {
  1585             mInWhitespace = true;
  1587           else {
  1588             mInWhitespace = false;
  1591         mEmptyLines=-1;
  1592         atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
  1593         bol = totLen;
  1595       else {
  1596         // There is a newline
  1597         stringpart.Assign(Substring(str, bol, newline-bol));
  1598         mInWhitespace = true;
  1599         outputLineBreak = true;
  1600         mEmptyLines=0;
  1601         atFirstColumn = true;
  1602         bol = newline+1;
  1603         if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
  1604           // There was a CRLF in the input. This used to be illegal and
  1605           // stripped by the parser. Apparently not anymore. Let's skip
  1606           // over the LF.
  1607           bol++;
  1611       mCurrentLine.AssignLiteral("");
  1612       if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
  1613         if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
  1614             !stringpart.EqualsLiteral("-- ") &&
  1615             !stringpart.EqualsLiteral("- -- "))
  1616           stringpart.Trim(" ", false, true, true);
  1617         if (IsSpaceStuffable(stringpart.get()) && stringpart[0] != '>')
  1618           mCurrentLine.Append(char16_t(' '));
  1620       mCurrentLine.Append(stringpart);
  1622       if (outputQuotes) {
  1623         // Note: this call messes with mAtFirstColumn
  1624         OutputQuotesAndIndent();
  1627       Output(mCurrentLine);
  1628       if (outputLineBreak) {
  1629         Output(mLineBreak);
  1631       mAtFirstColumn = atFirstColumn;
  1634     // Reset mCurrentLine.
  1635     mCurrentLine.Truncate();
  1637 #ifdef DEBUG_wrapping
  1638     printf("No wrapping: newline is %d, totLen is %d\n",
  1639            newline, totLen);
  1640 #endif
  1641     return;
  1644   // Intelligent handling of text
  1645   // If needed, strip out all "end of lines"
  1646   // and multiple whitespace between words
  1647   int32_t nextpos;
  1648   const char16_t * offsetIntoBuffer = nullptr;
  1650   while (bol < totLen) {    // Loop over lines
  1651     // Find a place where we may have to do whitespace compression
  1652     nextpos = str.FindCharInSet(" \t\n\r", bol);
  1653 #ifdef DEBUG_wrapping
  1654     nsAutoString remaining;
  1655     str.Right(remaining, totLen - bol);
  1656     foo = ToNewCString(remaining);
  1657     //    printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
  1658     //           bol, nextpos, totLen, foo);
  1659     nsMemory::Free(foo);
  1660 #endif
  1662     if (nextpos == kNotFound) {
  1663       // The rest of the string
  1664       offsetIntoBuffer = str.get() + bol;
  1665       AddToLine(offsetIntoBuffer, totLen-bol);
  1666       bol=totLen;
  1667       mInWhitespace=false;
  1669     else {
  1670       // There's still whitespace left in the string
  1671       if (nextpos != 0 && (nextpos + 1) < totLen) {
  1672         offsetIntoBuffer = str.get() + nextpos;
  1673         // skip '\n' if it is between CJ chars
  1674         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
  1675           offsetIntoBuffer = str.get() + bol;
  1676           AddToLine(offsetIntoBuffer, nextpos-bol);
  1677           bol = nextpos + 1;
  1678           continue;
  1681       // If we're already in whitespace and not preformatted, just skip it:
  1682       if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
  1683           !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
  1684         // Skip whitespace
  1685         bol++;
  1686         continue;
  1689       if (nextpos == bol) {
  1690         // Note that we are in whitespace.
  1691         mInWhitespace = true;
  1692         offsetIntoBuffer = str.get() + nextpos;
  1693         AddToLine(offsetIntoBuffer, 1);
  1694         bol++;
  1695         continue;
  1698       mInWhitespace = true;
  1700       offsetIntoBuffer = str.get() + bol;
  1701       if (mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
  1702         // Preserve the real whitespace character
  1703         nextpos++;
  1704         AddToLine(offsetIntoBuffer, nextpos-bol);
  1705         bol = nextpos;
  1707       else {
  1708         // Replace the whitespace with a space
  1709         AddToLine(offsetIntoBuffer, nextpos-bol);
  1710         AddToLine(kSpace.get(),1);
  1711         bol = nextpos + 1; // Let's eat the whitespace
  1714   } // Continue looping over the string
  1718 /**
  1719  * Gets the value of an attribute in a string. If the function returns
  1720  * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
  1721  */
  1722 nsresult
  1723 nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
  1724                                          nsString& aValueRet)
  1726   if (mElement) {
  1727     if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
  1728       return NS_OK;
  1732   return NS_ERROR_NOT_AVAILABLE;
  1735 /**
  1736  * Returns true, if the element was inserted by Moz' TXT->HTML converter.
  1737  * In this case, we should ignore it.
  1738  */
  1739 bool 
  1740 nsPlainTextSerializer::IsCurrentNodeConverted()
  1742   nsAutoString value;
  1743   nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
  1744   return (NS_SUCCEEDED(rv) &&
  1745           (value.EqualsIgnoreCase("moz-txt", 7) ||
  1746            value.EqualsIgnoreCase("\"moz-txt", 8)));
  1750 // static
  1751 nsIAtom*
  1752 nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
  1754   if (!aContent->IsHTML()) {
  1755     return nullptr;
  1758   nsIAtom* localName = aContent->Tag();
  1759   return localName->IsStaticAtom() ? localName : nullptr;
  1762 /**
  1763  * Returns true if we currently are inside a <pre>. The check is done
  1764  * by traversing the tag stack looking for <pre> until we hit a block
  1765  * level tag which is assumed to override any <pre>:s below it in
  1766  * the stack. To do this correctly to a 100% would require access
  1767  * to style which we don't support in this converter.
  1768  */  
  1769 bool
  1770 nsPlainTextSerializer::IsInPre()
  1772   int32_t i = mTagStackIndex;
  1773   while(i > 0) {
  1774     if (mTagStack[i - 1] == nsGkAtoms::pre)
  1775       return true;
  1776     if (nsContentUtils::IsHTMLBlock(mTagStack[i - 1])) {
  1777       // We assume that every other block overrides a <pre>
  1778       return false;
  1780     --i;
  1783   // Not a <pre> in the whole stack
  1784   return false;
  1787 /**
  1788  * This method is required only to identify LI's inside OL.
  1789  * Returns TRUE if we are inside an OL tag and FALSE otherwise.
  1790  */
  1791 bool
  1792 nsPlainTextSerializer::IsInOL()
  1794   int32_t i = mTagStackIndex;
  1795   while(--i >= 0) {
  1796     if (mTagStack[i] == nsGkAtoms::ol)
  1797       return true;
  1798     if (mTagStack[i] == nsGkAtoms::ul) {
  1799       // If a UL is reached first, LI belongs the UL nested in OL.
  1800       return false;
  1803   // We may reach here for orphan LI's.
  1804   return false;
  1807 /*
  1808   @return 0 = no header, 1 = h1, ..., 6 = h6
  1809 */
  1810 int32_t HeaderLevel(nsIAtom* aTag)
  1812   if (aTag == nsGkAtoms::h1) {
  1813     return 1;
  1815   if (aTag == nsGkAtoms::h2) {
  1816     return 2;
  1818   if (aTag == nsGkAtoms::h3) {
  1819     return 3;
  1821   if (aTag == nsGkAtoms::h4) {
  1822     return 4;
  1824   if (aTag == nsGkAtoms::h5) {
  1825     return 5;
  1827   if (aTag == nsGkAtoms::h6) {
  1828     return 6;
  1830   return 0;
  1834 /*
  1835  * This is an implementation of GetUnicharWidth() and
  1836  * GetUnicharStringWidth() as defined in
  1837  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
  1838  * <http://www.UNIX-systems.org/online.html>
  1840  * Markus Kuhn -- 2000-02-08 -- public domain
  1842  * Minor alterations to fit Mozilla's data types by Daniel Bratell
  1843  */
  1845 /* These functions define the column width of an ISO 10646 character
  1846  * as follows:
  1848  *    - The null character (U+0000) has a column width of 0.
  1850  *    - Other C0/C1 control characters and DEL will lead to a return
  1851  *      value of -1.
  1853  *    - Non-spacing and enclosing combining characters (general
  1854  *      category code Mn or Me in the Unicode database) have a
  1855  *      column width of 0.
  1857  *    - Spacing characters in the East Asian Wide (W) or East Asian
  1858  *      FullWidth (F) category as defined in Unicode Technical
  1859  *      Report #11 have a column width of 2.
  1861  *    - All remaining characters (including all printable
  1862  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
  1863  *      etc.) have a column width of 1.
  1865  * This implementation assumes that wchar_t characters are encoded
  1866  * in ISO 10646.
  1867  */
  1869 int32_t GetUnicharWidth(char16_t ucs)
  1871   /* sorted list of non-overlapping intervals of non-spacing characters */
  1872   static const struct interval {
  1873     uint16_t first;
  1874     uint16_t last;
  1875   } combining[] = {
  1876     { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
  1877     { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
  1878     { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
  1879     { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
  1880     { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
  1881     { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
  1882     { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
  1883     { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
  1884     { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
  1885     { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
  1886     { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
  1887     { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
  1888     { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
  1889     { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
  1890     { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
  1891     { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
  1892     { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
  1893     { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
  1894     { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
  1895     { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
  1896     { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
  1897     { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
  1898     { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
  1899     { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
  1900     { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
  1901     { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
  1902     { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
  1903     { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
  1904     { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
  1905     { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
  1906     { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
  1907   };
  1908   int32_t min = 0;
  1909   int32_t max = sizeof(combining) / sizeof(struct interval) - 1;
  1910   int32_t mid;
  1912   /* test for 8-bit control characters */
  1913   if (ucs == 0)
  1914     return 0;
  1915   if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
  1916     return -1;
  1918   /* first quick check for Latin-1 etc. characters */
  1919   if (ucs < combining[0].first)
  1920     return 1;
  1922   /* binary search in table of non-spacing characters */
  1923   while (max >= min) {
  1924     mid = (min + max) / 2;
  1925     if (combining[mid].last < ucs)
  1926       min = mid + 1;
  1927     else if (combining[mid].first > ucs)
  1928       max = mid - 1;
  1929     else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
  1930       return 0;
  1933   /* if we arrive here, ucs is not a combining or C0/C1 control character */
  1935   /* fast test for majority of non-wide scripts */
  1936   if (ucs < 0x1100)
  1937     return 1;
  1939   return 1 +
  1940     ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
  1941      (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
  1942       ucs != 0x303f) ||                  /* CJK ... Yi */
  1943      (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
  1944      (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
  1945      (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
  1946      (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
  1947      (ucs >= 0xffe0 && ucs <= 0xffe6));
  1951 int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
  1953   int32_t w, width = 0;
  1955   for (;*pwcs && n-- > 0; pwcs++)
  1956     if ((w = GetUnicharWidth(*pwcs)) < 0)
  1957       ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
  1958     else
  1959       width += w;
  1961   return width;

mercurial