michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set ts=2 sw=2 et tw=80: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: /*
michael@0:  * nsIContentSerializer implementation that can be used with an
michael@0:  * nsIDocumentEncoder to convert an XHTML (not HTML!) DOM to an XHTML
michael@0:  * string that could be parsed into more or less the original DOM.
michael@0:  */
michael@0: 
michael@0: #include "nsXHTMLContentSerializer.h"
michael@0: 
michael@0: #include "nsIDOMElement.h"
michael@0: #include "nsIContent.h"
michael@0: #include "nsIDocument.h"
michael@0: #include "nsNameSpaceManager.h"
michael@0: #include "nsString.h"
michael@0: #include "nsUnicharUtils.h"
michael@0: #include "nsXPIDLString.h"
michael@0: #include "nsIServiceManager.h"
michael@0: #include "nsIDocumentEncoder.h"
michael@0: #include "nsGkAtoms.h"
michael@0: #include "nsIURI.h"
michael@0: #include "nsNetUtil.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsITextToSubURI.h"
michael@0: #include "nsCRT.h"
michael@0: #include "nsIParserService.h"
michael@0: #include "nsContentUtils.h"
michael@0: #include "nsLWBrkCIID.h"
michael@0: #include "nsIScriptElement.h"
michael@0: #include "nsAttrName.h"
michael@0: #include "nsParserConstants.h"
michael@0: 
michael@0: static const int32_t kLongLineLen = 128;
michael@0: 
michael@0: #define kXMLNS "xmlns"
michael@0: 
michael@0: nsresult NS_NewXHTMLContentSerializer(nsIContentSerializer** aSerializer)
michael@0: {
michael@0:   nsXHTMLContentSerializer* it = new nsXHTMLContentSerializer();
michael@0:   if (!it) {
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0:   }
michael@0: 
michael@0:   return CallQueryInterface(it, aSerializer);
michael@0: }
michael@0: 
michael@0: nsXHTMLContentSerializer::nsXHTMLContentSerializer()
michael@0:   : mIsHTMLSerializer(false)
michael@0: {
michael@0: }
michael@0: 
michael@0: nsXHTMLContentSerializer::~nsXHTMLContentSerializer()
michael@0: {
michael@0:   NS_ASSERTION(mOLStateStack.IsEmpty(), "Expected OL State stack to be empty");
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsXHTMLContentSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
michael@0:                               const char* aCharSet, bool aIsCopying,
michael@0:                               bool aRewriteEncodingDeclaration)
michael@0: {
michael@0:   // The previous version of the HTML serializer did implicit wrapping
michael@0:   // when there is no flags, so we keep wrapping in order to keep
michael@0:   // compatibility with the existing calling code
michael@0:   // XXXLJ perhaps should we remove this default settings later ?
michael@0:   if (aFlags & nsIDocumentEncoder::OutputFormatted ) {
michael@0:       aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
michael@0:   }
michael@0: 
michael@0:   nsresult rv;
michael@0:   rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aRewriteEncodingDeclaration);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   mRewriteEncodingDeclaration = aRewriteEncodingDeclaration;
michael@0:   mIsCopying = aIsCopying;
michael@0:   mIsFirstChildOfOL = false;
michael@0:   mInBody = 0;
michael@0:   mDisableEntityEncoding = 0;
michael@0:   mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? true
michael@0:                                                             : false;
michael@0: 
michael@0:   // set up entity converter if we are going to need it
michael@0:   if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
michael@0:     mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
michael@0:   }
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: 
michael@0: // See if the string has any lines longer than longLineLen:
michael@0: // if so, we presume formatting is wonky (e.g. the node has been edited)
michael@0: // and we'd better rewrap the whole text node.
michael@0: bool
michael@0: nsXHTMLContentSerializer::HasLongLines(const nsString& text, int32_t& aLastNewlineOffset)
michael@0: {
michael@0:   uint32_t start=0;
michael@0:   uint32_t theLen = text.Length();
michael@0:   bool rv = false;
michael@0:   aLastNewlineOffset = kNotFound;
michael@0:   for (start = 0; start < theLen; ) {
michael@0:     int32_t eol = text.FindChar('\n', start);
michael@0:     if (eol < 0) {
michael@0:       eol = text.Length();
michael@0:     }
michael@0:     else {
michael@0:       aLastNewlineOffset = eol;
michael@0:     }
michael@0:     if (int32_t(eol - start) > kLongLineLen)
michael@0:       rv = true;
michael@0:     start = eol + 1;
michael@0:   }
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsXHTMLContentSerializer::AppendText(nsIContent* aText,
michael@0:                                      int32_t aStartOffset,
michael@0:                                      int32_t aEndOffset,
michael@0:                                      nsAString& aStr)
michael@0: {
michael@0:   NS_ENSURE_ARG(aText);
michael@0: 
michael@0:   nsAutoString data;
michael@0:   nsresult rv;
michael@0: 
michael@0:   rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true);
michael@0:   if (NS_FAILED(rv))
michael@0:     return NS_ERROR_FAILURE;
michael@0: 
michael@0:   if (mPreLevel > 0 || mDoRaw) {
michael@0:     AppendToStringConvertLF(data, aStr);
michael@0:   }
michael@0:   else if (mDoFormat) {
michael@0:     AppendToStringFormatedWrapped(data, aStr);
michael@0:   }
michael@0:   else if (mDoWrap) {
michael@0:     AppendToStringWrapped(data, aStr);
michael@0:   }
michael@0:   else {
michael@0:     int32_t lastNewlineOffset = kNotFound;
michael@0:     if (HasLongLines(data, lastNewlineOffset)) {
michael@0:       // We have long lines, rewrap
michael@0:       mDoWrap = true;
michael@0:       AppendToStringWrapped(data, aStr);
michael@0:       mDoWrap = false;
michael@0:     }
michael@0:     else {
michael@0:       AppendToStringConvertLF(data, aStr);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsXHTMLContentSerializer::EscapeURI(nsIContent* aContent, const nsAString& aURI, nsAString& aEscapedURI)
michael@0: {
michael@0:   // URL escape %xx cannot be used in JS.
michael@0:   // No escaping if the scheme is 'javascript'.
michael@0:   if (IsJavaScript(aContent, nsGkAtoms::href, kNameSpaceID_None, aURI)) {
michael@0:     aEscapedURI = aURI;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // nsITextToSubURI does charset convert plus uri escape
michael@0:   // This is needed to convert to a document charset which is needed to support existing browsers.
michael@0:   // But we eventually want to use UTF-8 instead of a document charset, then the code would be much simpler.
michael@0:   // See HTML 4.01 spec, "Appendix B.2.1 Non-ASCII characters in URI attribute values"
michael@0:   nsCOMPtr<nsITextToSubURI> textToSubURI;
michael@0:   nsAutoString uri(aURI); // in order to use FindCharInSet()
michael@0:   nsresult rv = NS_OK;
michael@0: 
michael@0:   if (!mCharset.IsEmpty() && !IsASCII(uri)) {
michael@0:     textToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
michael@0:     NS_ENSURE_SUCCESS(rv, rv);
michael@0:   }
michael@0: 
michael@0:   int32_t start = 0;
michael@0:   int32_t end;
michael@0:   nsAutoString part;
michael@0:   nsXPIDLCString escapedURI;
michael@0:   aEscapedURI.Truncate(0);
michael@0: 
michael@0:   // Loop and escape parts by avoiding escaping reserved characters
michael@0:   // (and '%', '#', as well as '[' and ']' for IPv6 address literals).
michael@0:   while ((end = uri.FindCharInSet("%#;/?:@&=+$,[]", start)) != -1) {
michael@0:     part = Substring(aURI, start, (end-start));
michael@0:     if (textToSubURI && !IsASCII(part)) {
michael@0:       rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
michael@0:       NS_ENSURE_SUCCESS(rv, rv);
michael@0:     }
michael@0:     else {
michael@0:       escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
michael@0:     }
michael@0:     AppendASCIItoUTF16(escapedURI, aEscapedURI);
michael@0: 
michael@0:     // Append a reserved character without escaping.
michael@0:     part = Substring(aURI, end, 1);
michael@0:     aEscapedURI.Append(part);
michael@0:     start = end + 1;
michael@0:   }
michael@0: 
michael@0:   if (start < (int32_t) aURI.Length()) {
michael@0:     // Escape the remaining part.
michael@0:     part = Substring(aURI, start, aURI.Length()-start);
michael@0:     if (textToSubURI) {
michael@0:       rv = textToSubURI->ConvertAndEscape(mCharset.get(), part.get(), getter_Copies(escapedURI));
michael@0:       NS_ENSURE_SUCCESS(rv, rv);
michael@0:     }
michael@0:     else {
michael@0:       escapedURI.Adopt(nsEscape(NS_ConvertUTF16toUTF8(part).get(), url_Path));
michael@0:     }
michael@0:     AppendASCIItoUTF16(escapedURI, aEscapedURI);
michael@0:   }
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::SerializeAttributes(nsIContent* aContent,
michael@0:                                               nsIContent *aOriginalElement,
michael@0:                                               nsAString& aTagPrefix,
michael@0:                                               const nsAString& aTagNamespaceURI,
michael@0:                                               nsIAtom* aTagName,
michael@0:                                               nsAString& aStr,
michael@0:                                               uint32_t aSkipAttr,
michael@0:                                               bool aAddNSAttr)
michael@0: {
michael@0:   nsresult rv;
michael@0:   uint32_t index, count;
michael@0:   nsAutoString prefixStr, uriStr, valueStr;
michael@0:   nsAutoString xmlnsStr;
michael@0:   xmlnsStr.AssignLiteral(kXMLNS);
michael@0: 
michael@0:   int32_t contentNamespaceID = aContent->GetNameSpaceID();
michael@0: 
michael@0:   // this method is not called by nsHTMLContentSerializer
michael@0:   // so we don't have to check HTML element, just XHTML
michael@0: 
michael@0:   if (mIsCopying && kNameSpaceID_XHTML == contentNamespaceID) {
michael@0: 
michael@0:     // Need to keep track of OL and LI elements in order to get ordinal number 
michael@0:     // for the LI.
michael@0:     if (aTagName == nsGkAtoms::ol) {
michael@0:       // We are copying and current node is an OL;
michael@0:       // Store its start attribute value in olState->startVal.
michael@0:       nsAutoString start;
michael@0:       int32_t startAttrVal = 0;
michael@0:       aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
michael@0:       if (!start.IsEmpty()) {
michael@0:         nsresult rv = NS_OK;
michael@0:         startAttrVal = start.ToInteger(&rv);
michael@0:         //If OL has "start" attribute, first LI element has to start with that value
michael@0:         //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
michael@0:         //In failure of ToInteger(), default StartAttrValue to 0.
michael@0:         if (NS_SUCCEEDED(rv))
michael@0:           --startAttrVal;
michael@0:         else
michael@0:           startAttrVal = 0;
michael@0:       }
michael@0:       olState state (startAttrVal, true);
michael@0:       mOLStateStack.AppendElement(state);
michael@0:     }
michael@0:     else if (aTagName == nsGkAtoms::li) {
michael@0:       mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
michael@0:       if (mIsFirstChildOfOL) {
michael@0:         // If OL is parent of this LI, serialize attributes in different manner.
michael@0:         SerializeLIValueAttribute(aContent, aStr);
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // If we had to add a new namespace declaration, serialize
michael@0:   // and push it on the namespace stack
michael@0:   if (aAddNSAttr) {
michael@0:     if (aTagPrefix.IsEmpty()) {
michael@0:       // Serialize default namespace decl
michael@0:       SerializeAttr(EmptyString(), xmlnsStr, aTagNamespaceURI, aStr, true);
michael@0:     } else {
michael@0:       // Serialize namespace decl
michael@0:       SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true);
michael@0:     }
michael@0:     PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement);
michael@0:   }
michael@0: 
michael@0:   NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
michael@0: 
michael@0:   count = aContent->GetAttrCount();
michael@0: 
michael@0:   // Now serialize each of the attributes
michael@0:   // XXX Unfortunately we need a namespace manager to get
michael@0:   // attribute URIs.
michael@0:   for (index = 0; index < count; index++) {
michael@0: 
michael@0:     if (aSkipAttr == index) {
michael@0:         continue;
michael@0:     }
michael@0: 
michael@0:     const nsAttrName* name = aContent->GetAttrNameAt(index);
michael@0:     int32_t namespaceID = name->NamespaceID();
michael@0:     nsIAtom* attrName = name->LocalName();
michael@0:     nsIAtom* attrPrefix = name->GetPrefix();
michael@0: 
michael@0:     // Filter out any attribute starting with [-|_]moz
michael@0:     nsDependentAtomString attrNameStr(attrName);
michael@0:     if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
michael@0:         StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
michael@0:       continue;
michael@0:     }
michael@0: 
michael@0:     if (attrPrefix) {
michael@0:       attrPrefix->ToString(prefixStr);
michael@0:     }
michael@0:     else {
michael@0:       prefixStr.Truncate();
michael@0:     }
michael@0: 
michael@0:     bool addNSAttr = false;
michael@0:     if (kNameSpaceID_XMLNS != namespaceID) {
michael@0:       nsContentUtils::NameSpaceManager()->GetNameSpaceURI(namespaceID, uriStr);
michael@0:       addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true);
michael@0:     }
michael@0: 
michael@0:     aContent->GetAttr(namespaceID, attrName, valueStr);
michael@0: 
michael@0:     nsDependentAtomString nameStr(attrName);
michael@0:     bool isJS = false;
michael@0: 
michael@0:     if (kNameSpaceID_XHTML == contentNamespaceID) {
michael@0:       //
michael@0:       // Filter out special case of <br type="_moz"> or <br _moz*>,
michael@0:       // used by the editor.  Bug 16988.  Yuck.
michael@0:       //
michael@0:       if (namespaceID == kNameSpaceID_None && aTagName == nsGkAtoms::br && attrName == nsGkAtoms::type
michael@0:           && StringBeginsWith(valueStr, _mozStr)) {
michael@0:         continue;
michael@0:       }
michael@0: 
michael@0:       if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li)
michael@0:           && (attrName == nsGkAtoms::value)) {
michael@0:         // This is handled separately in SerializeLIValueAttribute()
michael@0:         continue;
michael@0:       }
michael@0: 
michael@0:       isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0: 
michael@0:       if (namespaceID == kNameSpaceID_None && 
michael@0:           ((attrName == nsGkAtoms::href) ||
michael@0:           (attrName == nsGkAtoms::src))) {
michael@0:         // Make all links absolute when converting only the selection:
michael@0:         if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
michael@0:           // Would be nice to handle OBJECT and APPLET tags,
michael@0:           // but that gets more complicated since we have to
michael@0:           // search the tag list for CODEBASE as well.
michael@0:           // For now, just leave them relative.
michael@0:           nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
michael@0:           if (uri) {
michael@0:             nsAutoString absURI;
michael@0:             rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
michael@0:             if (NS_SUCCEEDED(rv)) {
michael@0:               valueStr = absURI;
michael@0:             }
michael@0:           }
michael@0:         }
michael@0:         // Need to escape URI.
michael@0:         nsAutoString tempURI(valueStr);
michael@0:         if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
michael@0:           valueStr = tempURI;
michael@0:       }
michael@0: 
michael@0:       if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
michael@0:           attrName == nsGkAtoms::content) {
michael@0:         // If we're serializing a <meta http-equiv="content-type">,
michael@0:         // use the proper value, rather than what's in the document.
michael@0:         nsAutoString header;
michael@0:         aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0:         if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0:           valueStr = NS_LITERAL_STRING("text/html; charset=") +
michael@0:             NS_ConvertASCIItoUTF16(mCharset);
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       // Expand shorthand attribute.
michael@0:       if (namespaceID == kNameSpaceID_None && IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
michael@0:         valueStr = nameStr;
michael@0:       }
michael@0:     }
michael@0:     else {
michael@0:       isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
michael@0:     }
michael@0: 
michael@0:     SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS);
michael@0: 
michael@0:     if (addNSAttr) {
michael@0:       NS_ASSERTION(!prefixStr.IsEmpty(),
michael@0:                    "Namespaced attributes must have a prefix");
michael@0:       SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true);
michael@0:       PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement);
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: 
michael@0: void 
michael@0: nsXHTMLContentSerializer::AppendEndOfElementStart(nsIContent *aOriginalElement,
michael@0:                                                   nsIAtom * aName,
michael@0:                                                   int32_t aNamespaceID,
michael@0:                                                   nsAString& aStr)
michael@0: {
michael@0:   // this method is not called by nsHTMLContentSerializer
michael@0:   // so we don't have to check HTML element, just XHTML
michael@0:   NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0: 
michael@0:   if (kNameSpaceID_XHTML != aNamespaceID) {
michael@0:     nsXMLContentSerializer::AppendEndOfElementStart(aOriginalElement, aName,
michael@0:                                                     aNamespaceID, aStr);
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   nsIContent* content = aOriginalElement;
michael@0: 
michael@0:   // for non empty elements, even if they are not a container, we always
michael@0:   // serialize their content, because the XHTML element could contain non XHTML
michael@0:   // nodes useful in some context, like in an XSLT stylesheet
michael@0:   if (HasNoChildren(content)) {
michael@0: 
michael@0:     nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0:   
michael@0:     if (parserService) {
michael@0:       bool isContainer;
michael@0:       parserService->
michael@0:         IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(aName),
michael@0:                     isContainer);
michael@0:       if (!isContainer) {
michael@0:         // for backward compatibility with HTML 4 user agents
michael@0:         // only non-container HTML elements can be closed immediatly,
michael@0:         // and a space is added before />
michael@0:         AppendToString(NS_LITERAL_STRING(" />"), aStr);
michael@0:         return;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   AppendToString(kGreaterThan, aStr);
michael@0: }
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::AfterElementStart(nsIContent * aContent,
michael@0:                                             nsIContent *aOriginalElement,
michael@0:                                             nsAString& aStr)
michael@0: {
michael@0:   nsIAtom *name = aContent->Tag();
michael@0:   if (aContent->GetNameSpaceID() == kNameSpaceID_XHTML &&
michael@0:       mRewriteEncodingDeclaration &&
michael@0:       name == nsGkAtoms::head) {
michael@0: 
michael@0:     // Check if there already are any content-type meta children.
michael@0:     // If there are, they will be modified to use the correct charset.
michael@0:     // If there aren't, we'll insert one here.
michael@0:     bool hasMeta = false;
michael@0:     for (nsIContent* child = aContent->GetFirstChild();
michael@0:          child;
michael@0:          child = child->GetNextSibling()) {
michael@0:       if (child->IsHTML(nsGkAtoms::meta) &&
michael@0:           child->HasAttr(kNameSpaceID_None, nsGkAtoms::content)) {
michael@0:         nsAutoString header;
michael@0:         child->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
michael@0: 
michael@0:         if (header.LowerCaseEqualsLiteral("content-type")) {
michael@0:           hasMeta = true;
michael@0:           break;
michael@0:         }
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     if (!hasMeta) {
michael@0:       AppendNewLineToString(aStr);
michael@0:       if (mDoFormat) {
michael@0:         AppendIndentation(aStr);
michael@0:       }
michael@0:       AppendToString(NS_LITERAL_STRING("<meta http-equiv=\"content-type\""),
michael@0:                     aStr);
michael@0:       AppendToString(NS_LITERAL_STRING(" content=\"text/html; charset="), aStr);
michael@0:       AppendToString(NS_ConvertASCIItoUTF16(mCharset), aStr);
michael@0:       if (mIsHTMLSerializer)
michael@0:         AppendToString(NS_LITERAL_STRING("\">"), aStr);
michael@0:       else
michael@0:         AppendToString(NS_LITERAL_STRING("\" />"), aStr);
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::AfterElementEnd(nsIContent * aContent,
michael@0:                                           nsAString& aStr)
michael@0: {
michael@0:   NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0: 
michael@0:   int32_t namespaceID = aContent->GetNameSpaceID();
michael@0:   nsIAtom *name = aContent->Tag();
michael@0: 
michael@0:   // this method is not called by nsHTMLContentSerializer
michael@0:   // so we don't have to check HTML element, just XHTML
michael@0:   if (kNameSpaceID_XHTML == namespaceID && name == nsGkAtoms::body) {
michael@0:     --mInBody;
michael@0:   }
michael@0: }
michael@0: 
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsXHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
michael@0:                                               nsAString& aStr)
michael@0: {
michael@0:   if (!mBodyOnly)
michael@0:     return nsXMLContentSerializer::AppendDocumentStart(aDocument, aStr);
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::CheckElementStart(nsIContent * aContent,
michael@0:                                             bool & aForceFormat,
michael@0:                                             nsAString& aStr)
michael@0: {
michael@0:   // The _moz_dirty attribute is emitted by the editor to
michael@0:   // indicate that this element should be pretty printed
michael@0:   // even if we're not in pretty printing mode
michael@0:   aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0:                  aContent->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0: 
michael@0:   nsIAtom *name = aContent->Tag();
michael@0:   int32_t namespaceID = aContent->GetNameSpaceID();
michael@0: 
michael@0:   if (namespaceID == kNameSpaceID_XHTML) {
michael@0:     if (name == nsGkAtoms::br && mPreLevel > 0 && 
michael@0:         (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
michael@0:       AppendNewLineToString(aStr);
michael@0:       return false;
michael@0:     }
michael@0: 
michael@0:     if (name == nsGkAtoms::body) {
michael@0:       ++mInBody;
michael@0:     }
michael@0:   }
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::CheckElementEnd(nsIContent * aContent,
michael@0:                                           bool & aForceFormat,
michael@0:                                           nsAString& aStr)
michael@0: {
michael@0:   NS_ASSERTION(!mIsHTMLSerializer, "nsHTMLContentSerializer shouldn't call this method !");
michael@0: 
michael@0:   aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
michael@0:                  aContent->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
michael@0: 
michael@0:   nsIAtom *name = aContent->Tag();
michael@0:   int32_t namespaceID = aContent->GetNameSpaceID();
michael@0: 
michael@0:   // this method is not called by nsHTMLContentSerializer
michael@0:   // so we don't have to check HTML element, just XHTML
michael@0:   if (namespaceID == kNameSpaceID_XHTML) {
michael@0:     if (mIsCopying && name == nsGkAtoms::ol) {
michael@0:       NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
michael@0:       /* Though at this point we must always have an state to be deleted as all 
michael@0:       the OL opening tags are supposed to push an olState object to the stack*/
michael@0:       if (!mOLStateStack.IsEmpty()) {
michael@0:         mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     if (HasNoChildren(aContent)) {
michael@0:       nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0: 
michael@0:       if (parserService) {
michael@0:         bool isContainer;
michael@0: 
michael@0:         parserService->
michael@0:           IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
michael@0:                       isContainer);
michael@0:         if (!isContainer) {
michael@0:           // non-container HTML elements are already closed,
michael@0:           // see AppendEndOfElementStart
michael@0:           return false;
michael@0:         }
michael@0:       }
michael@0:     }
michael@0:     // for backward compatibility with old HTML user agents,
michael@0:     // empty elements should have an ending tag, so we mustn't call
michael@0:     // nsXMLContentSerializer::CheckElementEnd
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   bool dummyFormat;
michael@0:   return nsXMLContentSerializer::CheckElementEnd(aContent, dummyFormat, aStr);
michael@0: }
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
michael@0:                                                      nsAString& aOutputStr)
michael@0: {
michael@0:   if (mBodyOnly && !mInBody) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   if (mDisableEntityEncoding) {
michael@0:     aOutputStr.Append(aStr);
michael@0:     return;
michael@0:   }
michael@0:  
michael@0:   nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::IsShorthandAttr(const nsIAtom* aAttrName,
michael@0:                                           const nsIAtom* aElementName)
michael@0: {
michael@0:   // checked
michael@0:   if ((aAttrName == nsGkAtoms::checked) &&
michael@0:       (aElementName == nsGkAtoms::input)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // compact
michael@0:   if ((aAttrName == nsGkAtoms::compact) &&
michael@0:       (aElementName == nsGkAtoms::dir || 
michael@0:        aElementName == nsGkAtoms::dl ||
michael@0:        aElementName == nsGkAtoms::menu ||
michael@0:        aElementName == nsGkAtoms::ol ||
michael@0:        aElementName == nsGkAtoms::ul)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // declare
michael@0:   if ((aAttrName == nsGkAtoms::declare) &&
michael@0:       (aElementName == nsGkAtoms::object)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // defer
michael@0:   if ((aAttrName == nsGkAtoms::defer) &&
michael@0:       (aElementName == nsGkAtoms::script)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // disabled
michael@0:   if ((aAttrName == nsGkAtoms::disabled) &&
michael@0:       (aElementName == nsGkAtoms::button ||
michael@0:        aElementName == nsGkAtoms::input ||
michael@0:        aElementName == nsGkAtoms::optgroup ||
michael@0:        aElementName == nsGkAtoms::option ||
michael@0:        aElementName == nsGkAtoms::select ||
michael@0:        aElementName == nsGkAtoms::textarea)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // ismap
michael@0:   if ((aAttrName == nsGkAtoms::ismap) &&
michael@0:       (aElementName == nsGkAtoms::img ||
michael@0:        aElementName == nsGkAtoms::input)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // multiple
michael@0:   if ((aAttrName == nsGkAtoms::multiple) &&
michael@0:       (aElementName == nsGkAtoms::select)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // noresize
michael@0:   if ((aAttrName == nsGkAtoms::noresize) &&
michael@0:       (aElementName == nsGkAtoms::frame)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // noshade
michael@0:   if ((aAttrName == nsGkAtoms::noshade) &&
michael@0:       (aElementName == nsGkAtoms::hr)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // nowrap
michael@0:   if ((aAttrName == nsGkAtoms::nowrap) &&
michael@0:       (aElementName == nsGkAtoms::td ||
michael@0:        aElementName == nsGkAtoms::th)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // readonly
michael@0:   if ((aAttrName == nsGkAtoms::readonly) &&
michael@0:       (aElementName == nsGkAtoms::input ||
michael@0:        aElementName == nsGkAtoms::textarea)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // selected
michael@0:   if ((aAttrName == nsGkAtoms::selected) &&
michael@0:       (aElementName == nsGkAtoms::option)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   // autoplay and controls
michael@0:   if ((aElementName == nsGkAtoms::video || aElementName == nsGkAtoms::audio) &&
michael@0:     (aAttrName == nsGkAtoms::autoplay || aAttrName == nsGkAtoms::muted ||
michael@0:      aAttrName == nsGkAtoms::controls)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::LineBreakBeforeOpen(int32_t aNamespaceID, nsIAtom* aName)
michael@0: {
michael@0: 
michael@0:   if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0:     return mAddSpace;
michael@0:   }
michael@0: 
michael@0:   if (aName == nsGkAtoms::title ||
michael@0:       aName == nsGkAtoms::meta  ||
michael@0:       aName == nsGkAtoms::link  ||
michael@0:       aName == nsGkAtoms::style ||
michael@0:       aName == nsGkAtoms::select ||
michael@0:       aName == nsGkAtoms::option ||
michael@0:       aName == nsGkAtoms::script ||
michael@0:       aName == nsGkAtoms::html) {
michael@0:     return true;
michael@0:   }
michael@0:   else {
michael@0:     nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0: 
michael@0:     if (parserService) {
michael@0:       bool res;
michael@0:       parserService->
michael@0:         IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
michael@0:       return res;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return mAddSpace;
michael@0: }
michael@0: 
michael@0: bool 
michael@0: nsXHTMLContentSerializer::LineBreakAfterOpen(int32_t aNamespaceID, nsIAtom* aName)
michael@0: {
michael@0: 
michael@0:   if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if ((aName == nsGkAtoms::html) ||
michael@0:       (aName == nsGkAtoms::head) ||
michael@0:       (aName == nsGkAtoms::body) ||
michael@0:       (aName == nsGkAtoms::ul) ||
michael@0:       (aName == nsGkAtoms::ol) ||
michael@0:       (aName == nsGkAtoms::dl) ||
michael@0:       (aName == nsGkAtoms::table) ||
michael@0:       (aName == nsGkAtoms::tbody) ||
michael@0:       (aName == nsGkAtoms::tr) ||
michael@0:       (aName == nsGkAtoms::br) ||
michael@0:       (aName == nsGkAtoms::meta) ||
michael@0:       (aName == nsGkAtoms::link) ||
michael@0:       (aName == nsGkAtoms::script) ||
michael@0:       (aName == nsGkAtoms::select) ||
michael@0:       (aName == nsGkAtoms::map) ||
michael@0:       (aName == nsGkAtoms::area) ||
michael@0:       (aName == nsGkAtoms::style)) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: bool 
michael@0: nsXHTMLContentSerializer::LineBreakBeforeClose(int32_t aNamespaceID, nsIAtom* aName)
michael@0: {
michael@0: 
michael@0:   if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if ((aName == nsGkAtoms::html) ||
michael@0:       (aName == nsGkAtoms::head) ||
michael@0:       (aName == nsGkAtoms::body) ||
michael@0:       (aName == nsGkAtoms::ul) ||
michael@0:       (aName == nsGkAtoms::ol) ||
michael@0:       (aName == nsGkAtoms::dl) ||
michael@0:       (aName == nsGkAtoms::select) ||
michael@0:       (aName == nsGkAtoms::table) ||
michael@0:       (aName == nsGkAtoms::tbody)) {
michael@0:     return true;
michael@0:   }
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: bool 
michael@0: nsXHTMLContentSerializer::LineBreakAfterClose(int32_t aNamespaceID, nsIAtom* aName)
michael@0: {
michael@0: 
michael@0:   if (aNamespaceID != kNameSpaceID_XHTML) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if ((aName == nsGkAtoms::html) ||
michael@0:       (aName == nsGkAtoms::head) ||
michael@0:       (aName == nsGkAtoms::body) ||
michael@0:       (aName == nsGkAtoms::tr) ||
michael@0:       (aName == nsGkAtoms::th) ||
michael@0:       (aName == nsGkAtoms::td) ||
michael@0:       (aName == nsGkAtoms::pre) ||
michael@0:       (aName == nsGkAtoms::title) ||
michael@0:       (aName == nsGkAtoms::li) ||
michael@0:       (aName == nsGkAtoms::dt) ||
michael@0:       (aName == nsGkAtoms::dd) ||
michael@0:       (aName == nsGkAtoms::blockquote) ||
michael@0:       (aName == nsGkAtoms::select) ||
michael@0:       (aName == nsGkAtoms::option) ||
michael@0:       (aName == nsGkAtoms::p) ||
michael@0:       (aName == nsGkAtoms::map) ||
michael@0:       (aName == nsGkAtoms::div)) {
michael@0:     return true;
michael@0:   }
michael@0:   else {
michael@0:     nsIParserService* parserService = nsContentUtils::GetParserService();
michael@0: 
michael@0:     if (parserService) {
michael@0:       bool res;
michael@0:       parserService->
michael@0:         IsBlock(parserService->HTMLCaseSensitiveAtomTagToId(aName), res);
michael@0:       return res;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode)
michael@0: {
michael@0: 
michael@0:   if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   nsIAtom *name = aNode->Tag();
michael@0: 
michael@0:   if (name == nsGkAtoms::pre ||
michael@0:       name == nsGkAtoms::script ||
michael@0:       name == nsGkAtoms::style ||
michael@0:       name == nsGkAtoms::noscript ||
michael@0:       name == nsGkAtoms::noframes
michael@0:       ) {
michael@0:     mPreLevel++;
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsXHTMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode)
michael@0: {
michael@0:   if (aNode->GetNameSpaceID() != kNameSpaceID_XHTML) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   nsIAtom *name = aNode->Tag();
michael@0:   if (name == nsGkAtoms::pre ||
michael@0:       name == nsGkAtoms::script ||
michael@0:       name == nsGkAtoms::style ||
michael@0:       name == nsGkAtoms::noscript ||
michael@0:       name == nsGkAtoms::noframes
michael@0:     ) {
michael@0:     --mPreLevel;
michael@0:   }
michael@0: }
michael@0: 
michael@0: void 
michael@0: nsXHTMLContentSerializer::SerializeLIValueAttribute(nsIContent* aElement,
michael@0:                                                     nsAString& aStr)
michael@0: {
michael@0:   // We are copying and we are at the "first" LI node of OL in selected range.
michael@0:   // It may not be the first LI child of OL but it's first in the selected range.
michael@0:   // Note that we get into this condition only once per a OL.
michael@0:   bool found = false;
michael@0:   nsCOMPtr<nsIDOMNode> currNode = do_QueryInterface(aElement);
michael@0:   nsAutoString valueStr;
michael@0: 
michael@0:   olState state (0, false);
michael@0: 
michael@0:   if (!mOLStateStack.IsEmpty()) {
michael@0:     state = mOLStateStack[mOLStateStack.Length()-1];
michael@0:     // isFirstListItem should be true only before the serialization of the
michael@0:     // first item in the list.
michael@0:     state.isFirstListItem = false;
michael@0:     mOLStateStack[mOLStateStack.Length()-1] = state;
michael@0:   }
michael@0: 
michael@0:   int32_t startVal = state.startVal;
michael@0:   int32_t offset = 0;
michael@0: 
michael@0:   // Traverse previous siblings until we find one with "value" attribute.
michael@0:   // offset keeps track of how many previous siblings we had tocurrNode traverse.
michael@0:   while (currNode && !found) {
michael@0:     nsCOMPtr<nsIDOMElement> currElement = do_QueryInterface(currNode);
michael@0:     // currElement may be null if it were a text node.
michael@0:     if (currElement) {
michael@0:       nsAutoString tagName;
michael@0:       currElement->GetTagName(tagName);
michael@0:       if (tagName.LowerCaseEqualsLiteral("li")) {
michael@0:         currElement->GetAttribute(NS_LITERAL_STRING("value"), valueStr);
michael@0:         if (valueStr.IsEmpty())
michael@0:           offset++;
michael@0:         else {
michael@0:           found = true;
michael@0:           nsresult rv = NS_OK;
michael@0:           startVal = valueStr.ToInteger(&rv);
michael@0:         }
michael@0:       }
michael@0:     }
michael@0:     nsCOMPtr<nsIDOMNode> tmp;
michael@0:     currNode->GetPreviousSibling(getter_AddRefs(tmp));
michael@0:     currNode.swap(tmp);
michael@0:   }
michael@0:   // If LI was not having "value", Set the "value" attribute for it.
michael@0:   // Note that We are at the first LI in the selected range of OL.
michael@0:   if (offset == 0 && found) {
michael@0:     // offset = 0 => LI itself has the value attribute and we did not need to traverse back.
michael@0:     // Just serialize value attribute like other tags.
michael@0:     SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
michael@0:   }
michael@0:   else if (offset == 1 && !found) {
michael@0:     /*(offset = 1 && !found) means either LI is the first child node of OL
michael@0:     and LI is not having "value" attribute.
michael@0:     In that case we would not like to set "value" attribute to reduce the changes.
michael@0:     */
michael@0:     //do nothing...
michael@0:   }
michael@0:   else if (offset > 0) {
michael@0:     // Set value attribute.
michael@0:     nsAutoString valueStr;
michael@0: 
michael@0:     //As serializer needs to use this valueAttr we are creating here, 
michael@0:     valueStr.AppendInt(startVal + offset);
michael@0:     SerializeAttr(EmptyString(), NS_LITERAL_STRING("value"), valueStr, aStr, false);
michael@0:   }
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::IsFirstChildOfOL(nsIContent* aElement)
michael@0: {
michael@0:   nsCOMPtr<nsIDOMNode> node = do_QueryInterface(aElement);
michael@0:   nsAutoString parentName;
michael@0: 
michael@0:   nsCOMPtr<nsIDOMNode> parentNode;
michael@0:   node->GetParentNode(getter_AddRefs(parentNode));
michael@0:   if (parentNode)
michael@0:     parentNode->GetNodeName(parentName);
michael@0:   else
michael@0:     return false;
michael@0: 
michael@0:   if (parentName.LowerCaseEqualsLiteral("ol")) {
michael@0: 
michael@0:     if (!mOLStateStack.IsEmpty()) {
michael@0:       olState state = mOLStateStack[mOLStateStack.Length()-1];
michael@0:       if (state.isFirstListItem)
michael@0:         return true;
michael@0:     }
michael@0: 
michael@0:     return false;
michael@0:   }
michael@0:   else
michael@0:     return false;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsXHTMLContentSerializer::HasNoChildren(nsIContent * aContent) {
michael@0: 
michael@0:   for (nsIContent* child = aContent->GetFirstChild();
michael@0:        child;
michael@0:        child = child->GetNextSibling()) {
michael@0:        
michael@0:     if (!child->IsNodeOfType(nsINode::eTEXT))
michael@0:       return false;
michael@0: 
michael@0:     if (child->TextLength())
michael@0:       return false;
michael@0:   }
michael@0: 
michael@0:   return true;
michael@0: }