michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set ts=2 sw=2 et tw=80: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: /*
michael@0:  * nsIContentSerializer implementation that can be used with an
michael@0:  * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
michael@0:  * string that could be parsed into more or less the original DOM.
michael@0:  */
michael@0: 
michael@0: #include "nsHTMLContentSerializer.h"
michael@0: 
michael@0: #include "nsIDOMElement.h"
michael@0: #include "nsIContent.h"
michael@0: #include "nsIDocument.h"
michael@0: #include "nsNameSpaceManager.h"
michael@0: #include "nsString.h"
michael@0: #include "nsUnicharUtils.h"
michael@0: #include "nsXPIDLString.h"
michael@0: #include "nsIServiceManager.h"
michael@0: #include "nsIDocumentEncoder.h"
michael@0: #include "nsGkAtoms.h"
michael@0: #include "nsIURI.h"
michael@0: #include "nsNetUtil.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsITextToSubURI.h"
michael@0: #include "nsCRT.h"
michael@0: #include "nsIParserService.h"
michael@0: #include "nsContentUtils.h"
michael@0: #include "nsLWBrkCIID.h"
michael@0: #include "nsIScriptElement.h"
michael@0: #include "nsAttrName.h"
michael@0: #include "nsIDocShell.h"
michael@0: #include "nsIEditor.h"
michael@0: #include "nsIHTMLEditor.h"
michael@0: #include "mozilla/dom/Element.h"
michael@0: #include "nsParserConstants.h"
michael@0: 
michael@0: using namespace mozilla::dom;
michael@0: 
michael@0: nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
michael@0: {
michael@0:   nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
michael@0:   if (!it) {
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0:   }
michael@0: 
michael@0:   return CallQueryInterface(it, aSerializer);
michael@0: }
michael@0: 
michael@0: nsHTMLContentSerializer::nsHTMLContentSerializer()
michael@0: {
michael@0:     mIsHTMLSerializer = true;
michael@0: }
michael@0: 
michael@0: nsHTMLContentSerializer::~nsHTMLContentSerializer()
michael@0: {
michael@0: }
michael@0: 
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0:                                              nsAString& aStr)
michael@0: {
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: void 
michael@0: nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
michael@0:                                                  nsIContent *aOriginalElement,
michael@0:                                                  nsAString& aTagPrefix,
michael@0:                                                  const nsAString& aTagNamespaceURI,
michael@0:                                                  nsIAtom* aTagName,
michael@0:                                                  int32_t aNamespace,
michael@0:                                                  nsAString& aStr)
michael@0: {
michael@0:   int32_t count = aContent->GetAttrCount();
michael@0:   if (!count)
michael@0:     return;
michael@0: 
michael@0:   nsresult rv;
michael@0:   nsAutoString valueStr;
michael@0:   NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
michael@0: 
michael@0:   for (int32_t index = count; index > 0;) {
michael@0:     --index;
michael@0:     const nsAttrName* name = aContent->GetAttrNameAt(index);
michael@0:     int32_t namespaceID = name->NamespaceID();
michael@0:     nsIAtom* attrName = name->LocalName();
michael@0: 
michael@0:     // Filter out any attribute starting with [-|_]moz
michael@0:     nsDependentAtomString attrNameStr(attrName);
michael@0:     if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
michael@0:         StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
michael@0:       continue;
michael@0:     }
michael@0:     aContent->GetAttr(namespaceID, attrName, valueStr);
michael@0: 
michael@0:     // 
michael@0:     // Filter out special case of <br type="_moz"> or <br _moz*>,
michael@0:     // used by the editor.  Bug 16988.  Yuck.
michael@0:     //
michael@0:     if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
michael@0:         attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
michael@0:         StringBeginsWith(valueStr, _mozStr)) {
michael@0:       continue;
michael@0:     }
michael@0: 
michael@0:     if (mIsCopying && mIsFirstChildOfOL &&
michael@0:         aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
michael@0:         attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
michael@0:       // This is handled separately in SerializeLIValueAttribute()
michael@0:       continue;
michael@0:     }
michael@0:     bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0:     
michael@0:     if (((attrName == nsGkAtoms::href &&
michael@0:           (namespaceID == kNameSpaceID_None ||
michael@0:            namespaceID == kNameSpaceID_XLink)) ||
michael@0:          (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
michael@0:       // Make all links absolute when converting only the selection:
michael@0:       if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
michael@0:         // Would be nice to handle OBJECT and APPLET tags,
michael@0:         // but that gets more complicated since we have to
michael@0:         // search the tag list for CODEBASE as well.
michael@0:         // For now, just leave them relative.
michael@0:         nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
michael@0:         if (uri) {
michael@0:           nsAutoString absURI;
michael@0:           rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
michael@0:           if (NS_SUCCEEDED(rv)) {
michael@0:             valueStr = absURI;
michael@0:           }
michael@0:         }
michael@0:       }
michael@0:       // Need to escape URI.
michael@0:       nsAutoString tempURI(valueStr);
michael@0:       if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
michael@0:         valueStr = tempURI;
michael@0:     }
michael@0: 
michael@0:     if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
michael@0:         aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
michael@0:         && namespaceID == kNameSpaceID_None) {
michael@0:       // If we're serializing a <meta http-equiv="content-type">,
michael@0:       // use the proper value, rather than what's in the document.
michael@0:       nsAutoString header;
michael@0:       aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0:       if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0:         valueStr = NS_LITERAL_STRING("text/html; charset=") +
michael@0:           NS_ConvertASCIItoUTF16(mCharset);
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     nsDependentAtomString nameStr(attrName);
michael@0:     nsAutoString prefix;
michael@0:     if (namespaceID == kNameSpaceID_XML) {
michael@0:       prefix.Assign(NS_LITERAL_STRING("xml"));
michael@0:     } else if (namespaceID == kNameSpaceID_XLink) {
michael@0:       prefix.Assign(NS_LITERAL_STRING("xlink"));
michael@0:     }
michael@0: 
michael@0:     // Expand shorthand attribute.
michael@0:     if (aNamespace == kNameSpaceID_XHTML &&
michael@0:         namespaceID == kNameSpaceID_None &&
michael@0:         IsShorthandAttr(attrName, aTagName) &&
michael@0:         valueStr.IsEmpty()) {
michael@0:       valueStr = nameStr;
michael@0:     }
michael@0:     SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS);
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsHTMLContentSerializer::AppendElementStart(Element* aElement,
michael@0:                                             Element* aOriginalElement,
michael@0:                                             nsAString& aStr)
michael@0: {
michael@0:   NS_ENSURE_ARG(aElement);
michael@0: 
michael@0:   nsIContent* content = aElement;
michael@0: 
michael@0:   bool forceFormat = false;
michael@0:   if (!CheckElementStart(content, forceFormat, aStr)) {
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   nsIAtom *name = content->Tag();
michael@0:   int32_t ns = content->GetNameSpaceID();
michael@0: 
michael@0:   bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
michael@0: 
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0:     if (mColPos && lineBreakBeforeOpen) {
michael@0:       AppendNewLineToString(aStr);
michael@0:     }
michael@0:     else {
michael@0:       MaybeAddNewlineForRootNode(aStr);
michael@0:     }
michael@0:     if (!mColPos) {
michael@0:       AppendIndentation(aStr);
michael@0:     }
michael@0:     else if (mAddSpace) {
michael@0:       AppendToString(char16_t(' '), aStr);
michael@0:       mAddSpace = false;
michael@0:     }
michael@0:   }
michael@0:   else if (mAddSpace) {
michael@0:     AppendToString(char16_t(' '), aStr);
michael@0:     mAddSpace = false;
michael@0:   }
michael@0:   else {
michael@0:     MaybeAddNewlineForRootNode(aStr);
michael@0:   }
michael@0:   // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
michael@0:   // called
michael@0:   mAddNewlineForRootNode = false;
michael@0:   
michael@0:   AppendToString(kLessThan, aStr);
michael@0: 
michael@0:   AppendToString(nsDependentAtomString(name), aStr);
michael@0: 
michael@0:   MaybeEnterInPreContent(content);
michael@0: 
michael@0:   // for block elements, we increase the indentation
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw)
michael@0:     IncrIndentation(name);
michael@0: 
michael@0:   // Need to keep track of OL and LI elements in order to get ordinal number 
michael@0:   // for the LI.
michael@0:   if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
michael@0:     // We are copying and current node is an OL;
michael@0:     // Store its start attribute value in olState->startVal.
michael@0:     nsAutoString start;
michael@0:     int32_t startAttrVal = 0;
michael@0: 
michael@0:     aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
michael@0:     if (!start.IsEmpty()){
michael@0:       nsresult rv = NS_OK;
michael@0:       startAttrVal = start.ToInteger(&rv);
michael@0:       //If OL has "start" attribute, first LI element has to start with that value
michael@0:       //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
michael@0:       //In failure of ToInteger(), default StartAttrValue to 0.
michael@0:       if (NS_SUCCEEDED(rv))
michael@0:         startAttrVal--; 
michael@0:       else
michael@0:         startAttrVal = 0;
michael@0:     }
michael@0:     mOLStateStack.AppendElement(olState(startAttrVal, true));
michael@0:   }
michael@0: 
michael@0:   if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
michael@0:     mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
michael@0:     if (mIsFirstChildOfOL){
michael@0:       // If OL is parent of this LI, serialize attributes in different manner.
michael@0:       SerializeLIValueAttribute(aElement, aStr);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // Even LI passed above have to go through this 
michael@0:   // for serializing attributes other than "value".
michael@0:   nsAutoString dummyPrefix;
michael@0:   SerializeHTMLAttributes(content,
michael@0:                           aOriginalElement,
michael@0:                           dummyPrefix,
michael@0:                           EmptyString(),
michael@0:                           name,
michael@0:                           ns,
michael@0:                           aStr);
michael@0: 
michael@0:   AppendToString(kGreaterThan, aStr);
michael@0: 
michael@0:   if (ns == kNameSpaceID_XHTML &&
michael@0:       (name == nsGkAtoms::script ||
michael@0:        name == nsGkAtoms::style ||
michael@0:        name == nsGkAtoms::noscript ||
michael@0:        name == nsGkAtoms::noframes)) {
michael@0:     ++mDisableEntityEncoding;
michael@0:   }
michael@0: 
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel &&
michael@0:     !mDoRaw && LineBreakAfterOpen(ns, name)) {
michael@0:     AppendNewLineToString(aStr);
michael@0:   }
michael@0: 
michael@0:   AfterElementStart(content, aOriginalElement, aStr);
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0:   
michael@0: NS_IMETHODIMP 
michael@0: nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
michael@0:                                           nsAString& aStr)
michael@0: {
michael@0:   NS_ENSURE_ARG(aElement);
michael@0: 
michael@0:   nsIContent* content = aElement;
michael@0: 
michael@0:   nsIAtom *name = content->Tag();
michael@0:   int32_t ns = content->GetNameSpaceID();
michael@0: 
michael@0:   if (ns == kNameSpaceID_XHTML &&
michael@0:       (name == nsGkAtoms::script ||
michael@0:        name == nsGkAtoms::style ||
michael@0:        name == nsGkAtoms::noscript ||
michael@0:        name == nsGkAtoms::noframes)) {
michael@0:     --mDisableEntityEncoding;
michael@0:   }
michael@0: 
michael@0:   bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0:                      content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0: 
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0:     DecrIndentation(name);
michael@0:   }
michael@0: 
michael@0:   if (name == nsGkAtoms::script) {
michael@0:     nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
michael@0: 
michael@0:     if (script && script->IsMalformed()) {
michael@0:       // We're looking at a malformed script tag. This means that the end tag
michael@0:       // was missing in the source. Imitate that here by not serializing the end
michael@0:       // tag.
michael@0:       --mPreLevel;
michael@0:       return NS_OK;
michael@0:     }
michael@0:   }
michael@0:   else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
michael@0:     NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
michael@0:     /* Though at this point we must always have an state to be deleted as all 
michael@0:     the OL opening tags are supposed to push an olState object to the stack*/
michael@0:     if (!mOLStateStack.IsEmpty()) {
michael@0:       mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
michael@0:     }
michael@0:   }
michael@0:   
michael@0:   if (ns == kNameSpaceID_XHTML) {
michael@0:     nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0: 
michael@0:     if (parserService) {
michael@0:       bool isContainer;
michael@0: 
michael@0:       parserService->
michael@0:         IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
michael@0:                     isContainer);
michael@0:       if (!isContainer) {
michael@0:         return NS_OK;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0: 
michael@0:     bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
michael@0: 
michael@0:     if (mColPos && lineBreakBeforeClose) {
michael@0:       AppendNewLineToString(aStr);
michael@0:     }
michael@0:     if (!mColPos) {
michael@0:       AppendIndentation(aStr);
michael@0:     }
michael@0:     else if (mAddSpace) {
michael@0:       AppendToString(char16_t(' '), aStr);
michael@0:       mAddSpace = false;
michael@0:     }
michael@0:   }
michael@0:   else if (mAddSpace) {
michael@0:     AppendToString(char16_t(' '), aStr);
michael@0:     mAddSpace = false;
michael@0:   }
michael@0: 
michael@0:   AppendToString(kEndTag, aStr);
michael@0:   AppendToString(nsDependentAtomString(name), aStr);
michael@0:   AppendToString(kGreaterThan, aStr);
michael@0: 
michael@0:   MaybeLeaveFromPreContent(content);
michael@0: 
michael@0:   if ((mDoFormat || forceFormat) && !mPreLevel
michael@0:       && !mDoRaw && LineBreakAfterClose(ns, name)) {
michael@0:     AppendNewLineToString(aStr);
michael@0:   }
michael@0:   else {
michael@0:     MaybeFlagNewlineForRootNode(aElement);
michael@0:   }
michael@0: 
michael@0:   if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
michael@0:     --mInBody;
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: static const uint16_t kValNBSP = 160;
michael@0: static const char* kEntities[] = {
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&amp;", nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   "&nbsp;"
michael@0: };
michael@0: 
michael@0: static const char* kAttrEntities[] = {
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, "&quot;", nullptr, nullptr, nullptr, "&amp;", nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   "&lt;", nullptr, "&gt;", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0:   "&nbsp;"
michael@0: };
michael@0: 
michael@0: uint32_t FindNextBasicEntity(const nsAString& aStr,
michael@0:                              const uint32_t aLen,
michael@0:                              uint32_t aIndex,
michael@0:                              const char** aEntityTable,
michael@0:                              const char** aEntity)
michael@0: {
michael@0:   for (; aIndex < aLen; ++aIndex) {
michael@0:     // for each character in this chunk, check if it
michael@0:     // needs to be replaced
michael@0:     char16_t val = aStr[aIndex];
michael@0:     if (val <= kValNBSP && aEntityTable[val]) {
michael@0:       *aEntity = aEntityTable[val];
michael@0:       return aIndex;
michael@0:     }
michael@0:   }
michael@0:   return aIndex;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
michael@0:                                                      nsAString& aOutputStr)
michael@0: {
michael@0:   if (mBodyOnly && !mInBody) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   if (mDisableEntityEncoding) {
michael@0:     aOutputStr.Append(aStr);
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   bool nonBasicEntities =
michael@0:     !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
michael@0:                  nsIDocumentEncoder::OutputEncodeHTMLEntities   |
michael@0:                  nsIDocumentEncoder::OutputEncodeW3CEntities));
michael@0: 
michael@0:   if (!nonBasicEntities &&
michael@0:       (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
michael@0:     const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0:     uint32_t start = 0;
michael@0:     const uint32_t len = aStr.Length();
michael@0:     for (uint32_t i = 0; i < len; ++i) {
michael@0:       const char* entity = nullptr;
michael@0:       i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
michael@0:       uint32_t normalTextLen = i - start; 
michael@0:       if (normalTextLen) {
michael@0:         aOutputStr.Append(Substring(aStr, start, normalTextLen));
michael@0:       }
michael@0:       if (entity) {
michael@0:         aOutputStr.AppendASCII(entity);
michael@0:         start = i + 1;
michael@0:       }
michael@0:     }
michael@0:     return;
michael@0:   } else if (nonBasicEntities) {
michael@0:     nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0: 
michael@0:     if (!parserService) {
michael@0:       NS_ERROR("Can't get parser service");
michael@0:       return;
michael@0:     }
michael@0: 
michael@0:     nsReadingIterator<char16_t> done_reading;
michael@0:     aStr.EndReading(done_reading);
michael@0: 
michael@0:     // for each chunk of |aString|...
michael@0:     uint32_t advanceLength = 0;
michael@0:     nsReadingIterator<char16_t> iter;
michael@0: 
michael@0:     const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0:     nsAutoCString entityReplacement;
michael@0: 
michael@0:     for (aStr.BeginReading(iter);
michael@0:          iter != done_reading;
michael@0:          iter.advance(int32_t(advanceLength))) {
michael@0:       uint32_t fragmentLength = iter.size_forward();
michael@0:       uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints
michael@0:                                     //  replaced by a particular entity
michael@0:       const char16_t* c = iter.get();
michael@0:       const char16_t* fragmentStart = c;
michael@0:       const char16_t* fragmentEnd = c + fragmentLength;
michael@0:       const char* entityText = nullptr;
michael@0:       const char* fullConstEntityText = nullptr;
michael@0:       char* fullEntityText = nullptr;
michael@0: 
michael@0:       advanceLength = 0;
michael@0:       // for each character in this chunk, check if it
michael@0:       // needs to be replaced
michael@0:       for (; c < fragmentEnd; c++, advanceLength++) {
michael@0:         char16_t val = *c;
michael@0:         if (val <= kValNBSP && entityTable[val]) {
michael@0:           fullConstEntityText = entityTable[val];
michael@0:           break;
michael@0:         } else if (val > 127 &&
michael@0:                   ((val < 256 &&
michael@0:                     mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
michael@0:                     mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
michael@0:           entityReplacement.Truncate();
michael@0:           parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
michael@0: 
michael@0:           if (!entityReplacement.IsEmpty()) {
michael@0:             entityText = entityReplacement.get();
michael@0:             break;
michael@0:           }
michael@0:         }
michael@0:         else if (val > 127 &&
michael@0:                   mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
michael@0:                   mEntityConverter) {
michael@0:           if (NS_IS_HIGH_SURROGATE(val) &&
michael@0:               c + 1 < fragmentEnd &&
michael@0:               NS_IS_LOW_SURROGATE(*(c + 1))) {
michael@0:             uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
michael@0:             if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
michael@0:                               nsIEntityConverter::entityW3C, &fullEntityText))) {
michael@0:               lengthReplaced = 2;
michael@0:               break;
michael@0:             }
michael@0:             else {
michael@0:               advanceLength++;
michael@0:             }
michael@0:           }
michael@0:           else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
michael@0:                                 nsIEntityConverter::entityW3C, 
michael@0:                                 &fullEntityText))) {
michael@0:             lengthReplaced = 1;
michael@0:             break;
michael@0:           }
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       aOutputStr.Append(fragmentStart, advanceLength);
michael@0:       if (entityText) {
michael@0:         aOutputStr.Append(char16_t('&'));
michael@0:         AppendASCIItoUTF16(entityText, aOutputStr);
michael@0:         aOutputStr.Append(char16_t(';'));
michael@0:         advanceLength++;
michael@0:       }
michael@0:       else if (fullConstEntityText) {
michael@0:         aOutputStr.AppendASCII(fullConstEntityText);
michael@0:         ++advanceLength;
michael@0:       }
michael@0:       // if it comes from nsIEntityConverter, it already has '&' and ';'
michael@0:       else if (fullEntityText) {
michael@0:         AppendASCIItoUTF16(fullEntityText, aOutputStr);
michael@0:         nsMemory::Free(fullEntityText);
michael@0:         advanceLength += lengthReplaced;
michael@0:       }
michael@0:     }
michael@0:   } else {
michael@0:     nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
michael@0:   }
michael@0: }