michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set ts=2 sw=2 et tw=80: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0:
michael@0: /*
michael@0: * nsIContentSerializer implementation that can be used with an
michael@0: * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
michael@0: * string that could be parsed into more or less the original DOM.
michael@0: */
michael@0:
michael@0: #include "nsHTMLContentSerializer.h"
michael@0:
michael@0: #include "nsIDOMElement.h"
michael@0: #include "nsIContent.h"
michael@0: #include "nsIDocument.h"
michael@0: #include "nsNameSpaceManager.h"
michael@0: #include "nsString.h"
michael@0: #include "nsUnicharUtils.h"
michael@0: #include "nsXPIDLString.h"
michael@0: #include "nsIServiceManager.h"
michael@0: #include "nsIDocumentEncoder.h"
michael@0: #include "nsGkAtoms.h"
michael@0: #include "nsIURI.h"
michael@0: #include "nsNetUtil.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsITextToSubURI.h"
michael@0: #include "nsCRT.h"
michael@0: #include "nsIParserService.h"
michael@0: #include "nsContentUtils.h"
michael@0: #include "nsLWBrkCIID.h"
michael@0: #include "nsIScriptElement.h"
michael@0: #include "nsAttrName.h"
michael@0: #include "nsIDocShell.h"
michael@0: #include "nsIEditor.h"
michael@0: #include "nsIHTMLEditor.h"
michael@0: #include "mozilla/dom/Element.h"
michael@0: #include "nsParserConstants.h"
michael@0:
michael@0: using namespace mozilla::dom;
michael@0:
michael@0: nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
michael@0: {
michael@0: nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
michael@0: if (!it) {
michael@0: return NS_ERROR_OUT_OF_MEMORY;
michael@0: }
michael@0:
michael@0: return CallQueryInterface(it, aSerializer);
michael@0: }
michael@0:
michael@0: nsHTMLContentSerializer::nsHTMLContentSerializer()
michael@0: {
michael@0: mIsHTMLSerializer = true;
michael@0: }
michael@0:
michael@0: nsHTMLContentSerializer::~nsHTMLContentSerializer()
michael@0: {
michael@0: }
michael@0:
michael@0:
michael@0: NS_IMETHODIMP
michael@0: nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0: nsAString& aStr)
michael@0: {
michael@0: return NS_OK;
michael@0: }
michael@0:
michael@0: void
michael@0: nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
michael@0: nsIContent *aOriginalElement,
michael@0: nsAString& aTagPrefix,
michael@0: const nsAString& aTagNamespaceURI,
michael@0: nsIAtom* aTagName,
michael@0: int32_t aNamespace,
michael@0: nsAString& aStr)
michael@0: {
michael@0: int32_t count = aContent->GetAttrCount();
michael@0: if (!count)
michael@0: return;
michael@0:
michael@0: nsresult rv;
michael@0: nsAutoString valueStr;
michael@0: NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
michael@0:
michael@0: for (int32_t index = count; index > 0;) {
michael@0: --index;
michael@0: const nsAttrName* name = aContent->GetAttrNameAt(index);
michael@0: int32_t namespaceID = name->NamespaceID();
michael@0: nsIAtom* attrName = name->LocalName();
michael@0:
michael@0: // Filter out any attribute starting with [-|_]moz
michael@0: nsDependentAtomString attrNameStr(attrName);
michael@0: if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
michael@0: StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
michael@0: continue;
michael@0: }
michael@0: aContent->GetAttr(namespaceID, attrName, valueStr);
michael@0:
michael@0: //
michael@0: // Filter out special case of
or
,
michael@0: // used by the editor. Bug 16988. Yuck.
michael@0: //
michael@0: if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
michael@0: attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
michael@0: StringBeginsWith(valueStr, _mozStr)) {
michael@0: continue;
michael@0: }
michael@0:
michael@0: if (mIsCopying && mIsFirstChildOfOL &&
michael@0: aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
michael@0: attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
michael@0: // This is handled separately in SerializeLIValueAttribute()
michael@0: continue;
michael@0: }
michael@0: bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0:
michael@0: if (((attrName == nsGkAtoms::href &&
michael@0: (namespaceID == kNameSpaceID_None ||
michael@0: namespaceID == kNameSpaceID_XLink)) ||
michael@0: (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
michael@0: // Make all links absolute when converting only the selection:
michael@0: if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
michael@0: // Would be nice to handle OBJECT and APPLET tags,
michael@0: // but that gets more complicated since we have to
michael@0: // search the tag list for CODEBASE as well.
michael@0: // For now, just leave them relative.
michael@0: nsCOMPtr uri = aContent->GetBaseURI();
michael@0: if (uri) {
michael@0: nsAutoString absURI;
michael@0: rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
michael@0: if (NS_SUCCEEDED(rv)) {
michael@0: valueStr = absURI;
michael@0: }
michael@0: }
michael@0: }
michael@0: // Need to escape URI.
michael@0: nsAutoString tempURI(valueStr);
michael@0: if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
michael@0: valueStr = tempURI;
michael@0: }
michael@0:
michael@0: if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
michael@0: aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
michael@0: && namespaceID == kNameSpaceID_None) {
michael@0: // If we're serializing a ,
michael@0: // use the proper value, rather than what's in the document.
michael@0: nsAutoString header;
michael@0: aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0: if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0: valueStr = NS_LITERAL_STRING("text/html; charset=") +
michael@0: NS_ConvertASCIItoUTF16(mCharset);
michael@0: }
michael@0: }
michael@0:
michael@0: nsDependentAtomString nameStr(attrName);
michael@0: nsAutoString prefix;
michael@0: if (namespaceID == kNameSpaceID_XML) {
michael@0: prefix.Assign(NS_LITERAL_STRING("xml"));
michael@0: } else if (namespaceID == kNameSpaceID_XLink) {
michael@0: prefix.Assign(NS_LITERAL_STRING("xlink"));
michael@0: }
michael@0:
michael@0: // Expand shorthand attribute.
michael@0: if (aNamespace == kNameSpaceID_XHTML &&
michael@0: namespaceID == kNameSpaceID_None &&
michael@0: IsShorthandAttr(attrName, aTagName) &&
michael@0: valueStr.IsEmpty()) {
michael@0: valueStr = nameStr;
michael@0: }
michael@0: SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS);
michael@0: }
michael@0: }
michael@0:
michael@0: NS_IMETHODIMP
michael@0: nsHTMLContentSerializer::AppendElementStart(Element* aElement,
michael@0: Element* aOriginalElement,
michael@0: nsAString& aStr)
michael@0: {
michael@0: NS_ENSURE_ARG(aElement);
michael@0:
michael@0: nsIContent* content = aElement;
michael@0:
michael@0: bool forceFormat = false;
michael@0: if (!CheckElementStart(content, forceFormat, aStr)) {
michael@0: return NS_OK;
michael@0: }
michael@0:
michael@0: nsIAtom *name = content->Tag();
michael@0: int32_t ns = content->GetNameSpaceID();
michael@0:
michael@0: bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
michael@0:
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0: if (mColPos && lineBreakBeforeOpen) {
michael@0: AppendNewLineToString(aStr);
michael@0: }
michael@0: else {
michael@0: MaybeAddNewlineForRootNode(aStr);
michael@0: }
michael@0: if (!mColPos) {
michael@0: AppendIndentation(aStr);
michael@0: }
michael@0: else if (mAddSpace) {
michael@0: AppendToString(char16_t(' '), aStr);
michael@0: mAddSpace = false;
michael@0: }
michael@0: }
michael@0: else if (mAddSpace) {
michael@0: AppendToString(char16_t(' '), aStr);
michael@0: mAddSpace = false;
michael@0: }
michael@0: else {
michael@0: MaybeAddNewlineForRootNode(aStr);
michael@0: }
michael@0: // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
michael@0: // called
michael@0: mAddNewlineForRootNode = false;
michael@0:
michael@0: AppendToString(kLessThan, aStr);
michael@0:
michael@0: AppendToString(nsDependentAtomString(name), aStr);
michael@0:
michael@0: MaybeEnterInPreContent(content);
michael@0:
michael@0: // for block elements, we increase the indentation
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw)
michael@0: IncrIndentation(name);
michael@0:
michael@0: // Need to keep track of OL and LI elements in order to get ordinal number
michael@0: // for the LI.
michael@0: if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
michael@0: // We are copying and current node is an OL;
michael@0: // Store its start attribute value in olState->startVal.
michael@0: nsAutoString start;
michael@0: int32_t startAttrVal = 0;
michael@0:
michael@0: aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
michael@0: if (!start.IsEmpty()){
michael@0: nsresult rv = NS_OK;
michael@0: startAttrVal = start.ToInteger(&rv);
michael@0: //If OL has "start" attribute, first LI element has to start with that value
michael@0: //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
michael@0: //In failure of ToInteger(), default StartAttrValue to 0.
michael@0: if (NS_SUCCEEDED(rv))
michael@0: startAttrVal--;
michael@0: else
michael@0: startAttrVal = 0;
michael@0: }
michael@0: mOLStateStack.AppendElement(olState(startAttrVal, true));
michael@0: }
michael@0:
michael@0: if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
michael@0: mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
michael@0: if (mIsFirstChildOfOL){
michael@0: // If OL is parent of this LI, serialize attributes in different manner.
michael@0: SerializeLIValueAttribute(aElement, aStr);
michael@0: }
michael@0: }
michael@0:
michael@0: // Even LI passed above have to go through this
michael@0: // for serializing attributes other than "value".
michael@0: nsAutoString dummyPrefix;
michael@0: SerializeHTMLAttributes(content,
michael@0: aOriginalElement,
michael@0: dummyPrefix,
michael@0: EmptyString(),
michael@0: name,
michael@0: ns,
michael@0: aStr);
michael@0:
michael@0: AppendToString(kGreaterThan, aStr);
michael@0:
michael@0: if (ns == kNameSpaceID_XHTML &&
michael@0: (name == nsGkAtoms::script ||
michael@0: name == nsGkAtoms::style ||
michael@0: name == nsGkAtoms::noscript ||
michael@0: name == nsGkAtoms::noframes)) {
michael@0: ++mDisableEntityEncoding;
michael@0: }
michael@0:
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel &&
michael@0: !mDoRaw && LineBreakAfterOpen(ns, name)) {
michael@0: AppendNewLineToString(aStr);
michael@0: }
michael@0:
michael@0: AfterElementStart(content, aOriginalElement, aStr);
michael@0:
michael@0: return NS_OK;
michael@0: }
michael@0:
michael@0: NS_IMETHODIMP
michael@0: nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
michael@0: nsAString& aStr)
michael@0: {
michael@0: NS_ENSURE_ARG(aElement);
michael@0:
michael@0: nsIContent* content = aElement;
michael@0:
michael@0: nsIAtom *name = content->Tag();
michael@0: int32_t ns = content->GetNameSpaceID();
michael@0:
michael@0: if (ns == kNameSpaceID_XHTML &&
michael@0: (name == nsGkAtoms::script ||
michael@0: name == nsGkAtoms::style ||
michael@0: name == nsGkAtoms::noscript ||
michael@0: name == nsGkAtoms::noframes)) {
michael@0: --mDisableEntityEncoding;
michael@0: }
michael@0:
michael@0: bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0: content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0:
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0: DecrIndentation(name);
michael@0: }
michael@0:
michael@0: if (name == nsGkAtoms::script) {
michael@0: nsCOMPtr script = do_QueryInterface(aElement);
michael@0:
michael@0: if (script && script->IsMalformed()) {
michael@0: // We're looking at a malformed script tag. This means that the end tag
michael@0: // was missing in the source. Imitate that here by not serializing the end
michael@0: // tag.
michael@0: --mPreLevel;
michael@0: return NS_OK;
michael@0: }
michael@0: }
michael@0: else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
michael@0: NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
michael@0: /* Though at this point we must always have an state to be deleted as all
michael@0: the OL opening tags are supposed to push an olState object to the stack*/
michael@0: if (!mOLStateStack.IsEmpty()) {
michael@0: mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
michael@0: }
michael@0: }
michael@0:
michael@0: if (ns == kNameSpaceID_XHTML) {
michael@0: nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0:
michael@0: if (parserService) {
michael@0: bool isContainer;
michael@0:
michael@0: parserService->
michael@0: IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
michael@0: isContainer);
michael@0: if (!isContainer) {
michael@0: return NS_OK;
michael@0: }
michael@0: }
michael@0: }
michael@0:
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
michael@0:
michael@0: bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
michael@0:
michael@0: if (mColPos && lineBreakBeforeClose) {
michael@0: AppendNewLineToString(aStr);
michael@0: }
michael@0: if (!mColPos) {
michael@0: AppendIndentation(aStr);
michael@0: }
michael@0: else if (mAddSpace) {
michael@0: AppendToString(char16_t(' '), aStr);
michael@0: mAddSpace = false;
michael@0: }
michael@0: }
michael@0: else if (mAddSpace) {
michael@0: AppendToString(char16_t(' '), aStr);
michael@0: mAddSpace = false;
michael@0: }
michael@0:
michael@0: AppendToString(kEndTag, aStr);
michael@0: AppendToString(nsDependentAtomString(name), aStr);
michael@0: AppendToString(kGreaterThan, aStr);
michael@0:
michael@0: MaybeLeaveFromPreContent(content);
michael@0:
michael@0: if ((mDoFormat || forceFormat) && !mPreLevel
michael@0: && !mDoRaw && LineBreakAfterClose(ns, name)) {
michael@0: AppendNewLineToString(aStr);
michael@0: }
michael@0: else {
michael@0: MaybeFlagNewlineForRootNode(aElement);
michael@0: }
michael@0:
michael@0: if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
michael@0: --mInBody;
michael@0: }
michael@0:
michael@0: return NS_OK;
michael@0: }
michael@0:
michael@0: static const uint16_t kValNBSP = 160;
michael@0: static const char* kEntities[] = {
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "&", nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: " "
michael@0: };
michael@0:
michael@0: static const char* kAttrEntities[] = {
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, """, nullptr, nullptr, nullptr, "&", nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: "<", nullptr, ">", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
michael@0: " "
michael@0: };
michael@0:
michael@0: uint32_t FindNextBasicEntity(const nsAString& aStr,
michael@0: const uint32_t aLen,
michael@0: uint32_t aIndex,
michael@0: const char** aEntityTable,
michael@0: const char** aEntity)
michael@0: {
michael@0: for (; aIndex < aLen; ++aIndex) {
michael@0: // for each character in this chunk, check if it
michael@0: // needs to be replaced
michael@0: char16_t val = aStr[aIndex];
michael@0: if (val <= kValNBSP && aEntityTable[val]) {
michael@0: *aEntity = aEntityTable[val];
michael@0: return aIndex;
michael@0: }
michael@0: }
michael@0: return aIndex;
michael@0: }
michael@0:
michael@0: void
michael@0: nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
michael@0: nsAString& aOutputStr)
michael@0: {
michael@0: if (mBodyOnly && !mInBody) {
michael@0: return;
michael@0: }
michael@0:
michael@0: if (mDisableEntityEncoding) {
michael@0: aOutputStr.Append(aStr);
michael@0: return;
michael@0: }
michael@0:
michael@0: bool nonBasicEntities =
michael@0: !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
michael@0: nsIDocumentEncoder::OutputEncodeHTMLEntities |
michael@0: nsIDocumentEncoder::OutputEncodeW3CEntities));
michael@0:
michael@0: if (!nonBasicEntities &&
michael@0: (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
michael@0: const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0: uint32_t start = 0;
michael@0: const uint32_t len = aStr.Length();
michael@0: for (uint32_t i = 0; i < len; ++i) {
michael@0: const char* entity = nullptr;
michael@0: i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
michael@0: uint32_t normalTextLen = i - start;
michael@0: if (normalTextLen) {
michael@0: aOutputStr.Append(Substring(aStr, start, normalTextLen));
michael@0: }
michael@0: if (entity) {
michael@0: aOutputStr.AppendASCII(entity);
michael@0: start = i + 1;
michael@0: }
michael@0: }
michael@0: return;
michael@0: } else if (nonBasicEntities) {
michael@0: nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0:
michael@0: if (!parserService) {
michael@0: NS_ERROR("Can't get parser service");
michael@0: return;
michael@0: }
michael@0:
michael@0: nsReadingIterator done_reading;
michael@0: aStr.EndReading(done_reading);
michael@0:
michael@0: // for each chunk of |aString|...
michael@0: uint32_t advanceLength = 0;
michael@0: nsReadingIterator iter;
michael@0:
michael@0: const char **entityTable = mInAttribute ? kAttrEntities : kEntities;
michael@0: nsAutoCString entityReplacement;
michael@0:
michael@0: for (aStr.BeginReading(iter);
michael@0: iter != done_reading;
michael@0: iter.advance(int32_t(advanceLength))) {
michael@0: uint32_t fragmentLength = iter.size_forward();
michael@0: uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints
michael@0: // replaced by a particular entity
michael@0: const char16_t* c = iter.get();
michael@0: const char16_t* fragmentStart = c;
michael@0: const char16_t* fragmentEnd = c + fragmentLength;
michael@0: const char* entityText = nullptr;
michael@0: const char* fullConstEntityText = nullptr;
michael@0: char* fullEntityText = nullptr;
michael@0:
michael@0: advanceLength = 0;
michael@0: // for each character in this chunk, check if it
michael@0: // needs to be replaced
michael@0: for (; c < fragmentEnd; c++, advanceLength++) {
michael@0: char16_t val = *c;
michael@0: if (val <= kValNBSP && entityTable[val]) {
michael@0: fullConstEntityText = entityTable[val];
michael@0: break;
michael@0: } else if (val > 127 &&
michael@0: ((val < 256 &&
michael@0: mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
michael@0: mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
michael@0: entityReplacement.Truncate();
michael@0: parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
michael@0:
michael@0: if (!entityReplacement.IsEmpty()) {
michael@0: entityText = entityReplacement.get();
michael@0: break;
michael@0: }
michael@0: }
michael@0: else if (val > 127 &&
michael@0: mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
michael@0: mEntityConverter) {
michael@0: if (NS_IS_HIGH_SURROGATE(val) &&
michael@0: c + 1 < fragmentEnd &&
michael@0: NS_IS_LOW_SURROGATE(*(c + 1))) {
michael@0: uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
michael@0: if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
michael@0: nsIEntityConverter::entityW3C, &fullEntityText))) {
michael@0: lengthReplaced = 2;
michael@0: break;
michael@0: }
michael@0: else {
michael@0: advanceLength++;
michael@0: }
michael@0: }
michael@0: else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
michael@0: nsIEntityConverter::entityW3C,
michael@0: &fullEntityText))) {
michael@0: lengthReplaced = 1;
michael@0: break;
michael@0: }
michael@0: }
michael@0: }
michael@0:
michael@0: aOutputStr.Append(fragmentStart, advanceLength);
michael@0: if (entityText) {
michael@0: aOutputStr.Append(char16_t('&'));
michael@0: AppendASCIItoUTF16(entityText, aOutputStr);
michael@0: aOutputStr.Append(char16_t(';'));
michael@0: advanceLength++;
michael@0: }
michael@0: else if (fullConstEntityText) {
michael@0: aOutputStr.AppendASCII(fullConstEntityText);
michael@0: ++advanceLength;
michael@0: }
michael@0: // if it comes from nsIEntityConverter, it already has '&' and ';'
michael@0: else if (fullEntityText) {
michael@0: AppendASCIItoUTF16(fullEntityText, aOutputStr);
michael@0: nsMemory::Free(fullEntityText);
michael@0: advanceLength += lengthReplaced;
michael@0: }
michael@0: }
michael@0: } else {
michael@0: nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
michael@0: }
michael@0: }