1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/html/nsParserUtils.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,234 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsString.h" 1.10 +#include "nsIComponentManager.h" 1.11 +#include "nsCOMPtr.h" 1.12 +#include "nsXPCOM.h" 1.13 +#include "nsISupportsPrimitives.h" 1.14 +#include "nsXPIDLString.h" 1.15 +#include "nsScriptLoader.h" 1.16 +#include "nsEscape.h" 1.17 +#include "nsIParser.h" 1.18 +#include "nsIDTD.h" 1.19 +#include "nsNetCID.h" 1.20 +#include "nsNetUtil.h" 1.21 +#include "nsParserCIID.h" 1.22 +#include "nsContentUtils.h" 1.23 +#include "nsIContentSink.h" 1.24 +#include "nsIDocumentEncoder.h" 1.25 +#include "nsIDOMDocumentFragment.h" 1.26 +#include "nsIFragmentContentSink.h" 1.27 +#include "nsIDOMDocument.h" 1.28 +#include "nsIDOMNodeList.h" 1.29 +#include "nsIDOMNode.h" 1.30 +#include "nsIDOMElement.h" 1.31 +#include "nsIDocument.h" 1.32 +#include "nsIContent.h" 1.33 +#include "nsAttrName.h" 1.34 +#include "nsHTMLParts.h" 1.35 +#include "nsContentCID.h" 1.36 +#include "nsIScriptableUnescapeHTML.h" 1.37 +#include "nsParserUtils.h" 1.38 +#include "nsAutoPtr.h" 1.39 +#include "nsTreeSanitizer.h" 1.40 +#include "nsHtml5Module.h" 1.41 +#include "mozilla/dom/DocumentFragment.h" 1.42 + 1.43 +#define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\"" 1.44 + 1.45 +using namespace mozilla::dom; 1.46 + 1.47 +NS_IMPL_ISUPPORTS(nsParserUtils, 1.48 + nsIScriptableUnescapeHTML, 1.49 + nsIParserUtils) 1.50 + 1.51 +static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); 1.52 + 1.53 + 1.54 + 1.55 +NS_IMETHODIMP 1.56 +nsParserUtils::ConvertToPlainText(const nsAString& aFromStr, 1.57 + uint32_t aFlags, 1.58 + uint32_t aWrapCol, 1.59 + nsAString& aToStr) 1.60 +{ 1.61 + return nsContentUtils::ConvertToPlainText(aFromStr, 1.62 + aToStr, 1.63 + aFlags, 1.64 + aWrapCol); 1.65 +} 1.66 + 1.67 +NS_IMETHODIMP 1.68 +nsParserUtils::Unescape(const nsAString& aFromStr, 1.69 + nsAString& aToStr) 1.70 +{ 1.71 + return nsContentUtils::ConvertToPlainText(aFromStr, 1.72 + aToStr, 1.73 + nsIDocumentEncoder::OutputSelectionOnly | 1.74 + nsIDocumentEncoder::OutputAbsoluteLinks, 1.75 + 0); 1.76 +} 1.77 + 1.78 +NS_IMETHODIMP 1.79 +nsParserUtils::Sanitize(const nsAString& aFromStr, 1.80 + uint32_t aFlags, 1.81 + nsAString& aToStr) 1.82 +{ 1.83 + nsCOMPtr<nsIURI> uri; 1.84 + NS_NewURI(getter_AddRefs(uri), "about:blank"); 1.85 + nsCOMPtr<nsIPrincipal> principal = 1.86 + do_CreateInstance("@mozilla.org/nullprincipal;1"); 1.87 + nsCOMPtr<nsIDOMDocument> domDocument; 1.88 + nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument), 1.89 + EmptyString(), 1.90 + EmptyString(), 1.91 + nullptr, 1.92 + uri, 1.93 + uri, 1.94 + principal, 1.95 + true, 1.96 + nullptr, 1.97 + DocumentFlavorHTML); 1.98 + NS_ENSURE_SUCCESS(rv, rv); 1.99 + 1.100 + nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument); 1.101 + rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false); 1.102 + NS_ENSURE_SUCCESS(rv, rv); 1.103 + 1.104 + nsTreeSanitizer sanitizer(aFlags); 1.105 + sanitizer.Sanitize(document); 1.106 + 1.107 + nsCOMPtr<nsIDocumentEncoder> encoder = 1.108 + do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html"); 1.109 + 1.110 + encoder->NativeInit(document, 1.111 + NS_LITERAL_STRING("text/html"), 1.112 + nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration | 1.113 + nsIDocumentEncoder::OutputNoScriptContent | 1.114 + nsIDocumentEncoder::OutputEncodeBasicEntities | 1.115 + nsIDocumentEncoder::OutputLFLineBreak | 1.116 + nsIDocumentEncoder::OutputRaw); 1.117 + 1.118 + return encoder->EncodeToString(aToStr); 1.119 +} 1.120 + 1.121 +NS_IMETHODIMP 1.122 +nsParserUtils::ParseFragment(const nsAString& aFragment, 1.123 + bool aIsXML, 1.124 + nsIURI* aBaseURI, 1.125 + nsIDOMElement* aContextElement, 1.126 + nsIDOMDocumentFragment** aReturn) 1.127 +{ 1.128 + return nsParserUtils::ParseFragment(aFragment, 1.129 + 0, 1.130 + aIsXML, 1.131 + aBaseURI, 1.132 + aContextElement, 1.133 + aReturn); 1.134 +} 1.135 + 1.136 +NS_IMETHODIMP 1.137 +nsParserUtils::ParseFragment(const nsAString& aFragment, 1.138 + uint32_t aFlags, 1.139 + bool aIsXML, 1.140 + nsIURI* aBaseURI, 1.141 + nsIDOMElement* aContextElement, 1.142 + nsIDOMDocumentFragment** aReturn) 1.143 +{ 1.144 + NS_ENSURE_ARG(aContextElement); 1.145 + *aReturn = nullptr; 1.146 + 1.147 + nsCOMPtr<nsIDocument> document; 1.148 + nsCOMPtr<nsIDOMDocument> domDocument; 1.149 + nsCOMPtr<nsIDOMNode> contextNode; 1.150 + contextNode = do_QueryInterface(aContextElement); 1.151 + contextNode->GetOwnerDocument(getter_AddRefs(domDocument)); 1.152 + document = do_QueryInterface(domDocument); 1.153 + NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE); 1.154 + 1.155 + nsAutoScriptBlockerSuppressNodeRemoved autoBlocker; 1.156 + 1.157 + // stop scripts 1.158 + nsRefPtr<nsScriptLoader> loader; 1.159 + bool scripts_enabled = false; 1.160 + if (document) { 1.161 + loader = document->ScriptLoader(); 1.162 + scripts_enabled = loader->GetEnabled(); 1.163 + } 1.164 + if (scripts_enabled) { 1.165 + loader->SetEnabled(false); 1.166 + } 1.167 + 1.168 + // Wrap things in a div or body for parsing, but it won't show up in 1.169 + // the fragment. 1.170 + nsAutoTArray<nsString, 2> tagStack; 1.171 + nsAutoCString base, spec; 1.172 + if (aIsXML) { 1.173 + // XHTML 1.174 + if (aBaseURI) { 1.175 + base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG)); 1.176 + base.Append(NS_LITERAL_CSTRING(" xml:base=\"")); 1.177 + aBaseURI->GetSpec(spec); 1.178 + // nsEscapeHTML is good enough, because we only need to get 1.179 + // quotes, ampersands, and angle brackets 1.180 + char* escapedSpec = nsEscapeHTML(spec.get()); 1.181 + if (escapedSpec) 1.182 + base += escapedSpec; 1.183 + NS_Free(escapedSpec); 1.184 + base.Append(NS_LITERAL_CSTRING("\"")); 1.185 + tagStack.AppendElement(NS_ConvertUTF8toUTF16(base)); 1.186 + } else { 1.187 + tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG)); 1.188 + } 1.189 + } 1.190 + 1.191 + nsresult rv = NS_OK; 1.192 + nsCOMPtr<nsIContent> fragment; 1.193 + if (aIsXML) { 1.194 + rv = nsContentUtils::ParseFragmentXML(aFragment, 1.195 + document, 1.196 + tagStack, 1.197 + true, 1.198 + aReturn); 1.199 + fragment = do_QueryInterface(*aReturn); 1.200 + } else { 1.201 + NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager())); 1.202 + fragment = do_QueryInterface(*aReturn); 1.203 + rv = nsContentUtils::ParseFragmentHTML(aFragment, 1.204 + fragment, 1.205 + nsGkAtoms::body, 1.206 + kNameSpaceID_XHTML, 1.207 + false, 1.208 + true); 1.209 + // Now, set the base URI on all subtree roots. 1.210 + if (aBaseURI) { 1.211 + aBaseURI->GetSpec(spec); 1.212 + nsAutoString spec16; 1.213 + CopyUTF8toUTF16(spec, spec16); 1.214 + nsIContent* node = fragment->GetFirstChild(); 1.215 + while (node) { 1.216 + if (node->IsElement()) { 1.217 + node->SetAttr(kNameSpaceID_XML, 1.218 + nsGkAtoms::base, 1.219 + nsGkAtoms::xml, 1.220 + spec16, 1.221 + false); 1.222 + } 1.223 + node = node->GetNextSibling(); 1.224 + } 1.225 + } 1.226 + } 1.227 + if (fragment) { 1.228 + nsTreeSanitizer sanitizer(aFlags); 1.229 + sanitizer.Sanitize(fragment); 1.230 + } 1.231 + 1.232 + if (scripts_enabled) { 1.233 + loader->SetEnabled(true); 1.234 + } 1.235 + 1.236 + return rv; 1.237 +}