michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsString.h" michael@0: #include "nsIComponentManager.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsXPCOM.h" michael@0: #include "nsISupportsPrimitives.h" michael@0: #include "nsXPIDLString.h" michael@0: #include "nsScriptLoader.h" michael@0: #include "nsEscape.h" michael@0: #include "nsIParser.h" michael@0: #include "nsIDTD.h" michael@0: #include "nsNetCID.h" michael@0: #include "nsNetUtil.h" michael@0: #include "nsParserCIID.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsIContentSink.h" michael@0: #include "nsIDocumentEncoder.h" michael@0: #include "nsIDOMDocumentFragment.h" michael@0: #include "nsIFragmentContentSink.h" michael@0: #include "nsIDOMDocument.h" michael@0: #include "nsIDOMNodeList.h" michael@0: #include "nsIDOMNode.h" michael@0: #include "nsIDOMElement.h" michael@0: #include "nsIDocument.h" michael@0: #include "nsIContent.h" michael@0: #include "nsAttrName.h" michael@0: #include "nsHTMLParts.h" michael@0: #include "nsContentCID.h" michael@0: #include "nsIScriptableUnescapeHTML.h" michael@0: #include "nsParserUtils.h" michael@0: #include "nsAutoPtr.h" michael@0: #include "nsTreeSanitizer.h" michael@0: #include "nsHtml5Module.h" michael@0: #include "mozilla/dom/DocumentFragment.h" michael@0: michael@0: #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\"" michael@0: michael@0: using namespace mozilla::dom; michael@0: michael@0: NS_IMPL_ISUPPORTS(nsParserUtils, michael@0: nsIScriptableUnescapeHTML, michael@0: nsIParserUtils) michael@0: michael@0: static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); michael@0: michael@0: michael@0: michael@0: NS_IMETHODIMP michael@0: nsParserUtils::ConvertToPlainText(const nsAString& aFromStr, michael@0: uint32_t aFlags, michael@0: uint32_t aWrapCol, michael@0: nsAString& aToStr) michael@0: { michael@0: return nsContentUtils::ConvertToPlainText(aFromStr, michael@0: aToStr, michael@0: aFlags, michael@0: aWrapCol); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParserUtils::Unescape(const nsAString& aFromStr, michael@0: nsAString& aToStr) michael@0: { michael@0: return nsContentUtils::ConvertToPlainText(aFromStr, michael@0: aToStr, michael@0: nsIDocumentEncoder::OutputSelectionOnly | michael@0: nsIDocumentEncoder::OutputAbsoluteLinks, michael@0: 0); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParserUtils::Sanitize(const nsAString& aFromStr, michael@0: uint32_t aFlags, michael@0: nsAString& aToStr) michael@0: { michael@0: nsCOMPtr uri; michael@0: NS_NewURI(getter_AddRefs(uri), "about:blank"); michael@0: nsCOMPtr principal = michael@0: do_CreateInstance("@mozilla.org/nullprincipal;1"); michael@0: nsCOMPtr domDocument; michael@0: nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument), michael@0: EmptyString(), michael@0: EmptyString(), michael@0: nullptr, michael@0: uri, michael@0: uri, michael@0: principal, michael@0: true, michael@0: nullptr, michael@0: DocumentFlavorHTML); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsCOMPtr document = do_QueryInterface(domDocument); michael@0: rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsTreeSanitizer sanitizer(aFlags); michael@0: sanitizer.Sanitize(document); michael@0: michael@0: nsCOMPtr encoder = michael@0: do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html"); michael@0: michael@0: encoder->NativeInit(document, michael@0: NS_LITERAL_STRING("text/html"), michael@0: nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration | michael@0: nsIDocumentEncoder::OutputNoScriptContent | michael@0: nsIDocumentEncoder::OutputEncodeBasicEntities | michael@0: nsIDocumentEncoder::OutputLFLineBreak | michael@0: nsIDocumentEncoder::OutputRaw); michael@0: michael@0: return encoder->EncodeToString(aToStr); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParserUtils::ParseFragment(const nsAString& aFragment, michael@0: bool aIsXML, michael@0: nsIURI* aBaseURI, michael@0: nsIDOMElement* aContextElement, michael@0: nsIDOMDocumentFragment** aReturn) michael@0: { michael@0: return nsParserUtils::ParseFragment(aFragment, michael@0: 0, michael@0: aIsXML, michael@0: aBaseURI, michael@0: aContextElement, michael@0: aReturn); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParserUtils::ParseFragment(const nsAString& aFragment, michael@0: uint32_t aFlags, michael@0: bool aIsXML, michael@0: nsIURI* aBaseURI, michael@0: nsIDOMElement* aContextElement, michael@0: nsIDOMDocumentFragment** aReturn) michael@0: { michael@0: NS_ENSURE_ARG(aContextElement); michael@0: *aReturn = nullptr; michael@0: michael@0: nsCOMPtr document; michael@0: nsCOMPtr domDocument; michael@0: nsCOMPtr contextNode; michael@0: contextNode = do_QueryInterface(aContextElement); michael@0: contextNode->GetOwnerDocument(getter_AddRefs(domDocument)); michael@0: document = do_QueryInterface(domDocument); michael@0: NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: nsAutoScriptBlockerSuppressNodeRemoved autoBlocker; michael@0: michael@0: // stop scripts michael@0: nsRefPtr loader; michael@0: bool scripts_enabled = false; michael@0: if (document) { michael@0: loader = document->ScriptLoader(); michael@0: scripts_enabled = loader->GetEnabled(); michael@0: } michael@0: if (scripts_enabled) { michael@0: loader->SetEnabled(false); michael@0: } michael@0: michael@0: // Wrap things in a div or body for parsing, but it won't show up in michael@0: // the fragment. michael@0: nsAutoTArray tagStack; michael@0: nsAutoCString base, spec; michael@0: if (aIsXML) { michael@0: // XHTML michael@0: if (aBaseURI) { michael@0: base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG)); michael@0: base.Append(NS_LITERAL_CSTRING(" xml:base=\"")); michael@0: aBaseURI->GetSpec(spec); michael@0: // nsEscapeHTML is good enough, because we only need to get michael@0: // quotes, ampersands, and angle brackets michael@0: char* escapedSpec = nsEscapeHTML(spec.get()); michael@0: if (escapedSpec) michael@0: base += escapedSpec; michael@0: NS_Free(escapedSpec); michael@0: base.Append(NS_LITERAL_CSTRING("\"")); michael@0: tagStack.AppendElement(NS_ConvertUTF8toUTF16(base)); michael@0: } else { michael@0: tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG)); michael@0: } michael@0: } michael@0: michael@0: nsresult rv = NS_OK; michael@0: nsCOMPtr fragment; michael@0: if (aIsXML) { michael@0: rv = nsContentUtils::ParseFragmentXML(aFragment, michael@0: document, michael@0: tagStack, michael@0: true, michael@0: aReturn); michael@0: fragment = do_QueryInterface(*aReturn); michael@0: } else { michael@0: NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager())); michael@0: fragment = do_QueryInterface(*aReturn); michael@0: rv = nsContentUtils::ParseFragmentHTML(aFragment, michael@0: fragment, michael@0: nsGkAtoms::body, michael@0: kNameSpaceID_XHTML, michael@0: false, michael@0: true); michael@0: // Now, set the base URI on all subtree roots. michael@0: if (aBaseURI) { michael@0: aBaseURI->GetSpec(spec); michael@0: nsAutoString spec16; michael@0: CopyUTF8toUTF16(spec, spec16); michael@0: nsIContent* node = fragment->GetFirstChild(); michael@0: while (node) { michael@0: if (node->IsElement()) { michael@0: node->SetAttr(kNameSpaceID_XML, michael@0: nsGkAtoms::base, michael@0: nsGkAtoms::xml, michael@0: spec16, michael@0: false); michael@0: } michael@0: node = node->GetNextSibling(); michael@0: } michael@0: } michael@0: } michael@0: if (fragment) { michael@0: nsTreeSanitizer sanitizer(aFlags); michael@0: sanitizer.Sanitize(fragment); michael@0: } michael@0: michael@0: if (scripts_enabled) { michael@0: loader->SetEnabled(true); michael@0: } michael@0: michael@0: return rv; michael@0: }