parser/html/nsParserUtils.cpp

Wed, 31 Dec 2014 13:27:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 13:27:57 +0100
branch
TOR_BUG_3246
changeset 6
8bccb770b82d
permissions
-rw-r--r--

Ignore runtime configuration files generated during quality assurance.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsString.h"
michael@0 7 #include "nsIComponentManager.h"
michael@0 8 #include "nsCOMPtr.h"
michael@0 9 #include "nsXPCOM.h"
michael@0 10 #include "nsISupportsPrimitives.h"
michael@0 11 #include "nsXPIDLString.h"
michael@0 12 #include "nsScriptLoader.h"
michael@0 13 #include "nsEscape.h"
michael@0 14 #include "nsIParser.h"
michael@0 15 #include "nsIDTD.h"
michael@0 16 #include "nsNetCID.h"
michael@0 17 #include "nsNetUtil.h"
michael@0 18 #include "nsParserCIID.h"
michael@0 19 #include "nsContentUtils.h"
michael@0 20 #include "nsIContentSink.h"
michael@0 21 #include "nsIDocumentEncoder.h"
michael@0 22 #include "nsIDOMDocumentFragment.h"
michael@0 23 #include "nsIFragmentContentSink.h"
michael@0 24 #include "nsIDOMDocument.h"
michael@0 25 #include "nsIDOMNodeList.h"
michael@0 26 #include "nsIDOMNode.h"
michael@0 27 #include "nsIDOMElement.h"
michael@0 28 #include "nsIDocument.h"
michael@0 29 #include "nsIContent.h"
michael@0 30 #include "nsAttrName.h"
michael@0 31 #include "nsHTMLParts.h"
michael@0 32 #include "nsContentCID.h"
michael@0 33 #include "nsIScriptableUnescapeHTML.h"
michael@0 34 #include "nsParserUtils.h"
michael@0 35 #include "nsAutoPtr.h"
michael@0 36 #include "nsTreeSanitizer.h"
michael@0 37 #include "nsHtml5Module.h"
michael@0 38 #include "mozilla/dom/DocumentFragment.h"
michael@0 39
michael@0 40 #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\""
michael@0 41
michael@0 42 using namespace mozilla::dom;
michael@0 43
michael@0 44 NS_IMPL_ISUPPORTS(nsParserUtils,
michael@0 45 nsIScriptableUnescapeHTML,
michael@0 46 nsIParserUtils)
michael@0 47
michael@0 48 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
michael@0 49
michael@0 50
michael@0 51
michael@0 52 NS_IMETHODIMP
michael@0 53 nsParserUtils::ConvertToPlainText(const nsAString& aFromStr,
michael@0 54 uint32_t aFlags,
michael@0 55 uint32_t aWrapCol,
michael@0 56 nsAString& aToStr)
michael@0 57 {
michael@0 58 return nsContentUtils::ConvertToPlainText(aFromStr,
michael@0 59 aToStr,
michael@0 60 aFlags,
michael@0 61 aWrapCol);
michael@0 62 }
michael@0 63
michael@0 64 NS_IMETHODIMP
michael@0 65 nsParserUtils::Unescape(const nsAString& aFromStr,
michael@0 66 nsAString& aToStr)
michael@0 67 {
michael@0 68 return nsContentUtils::ConvertToPlainText(aFromStr,
michael@0 69 aToStr,
michael@0 70 nsIDocumentEncoder::OutputSelectionOnly |
michael@0 71 nsIDocumentEncoder::OutputAbsoluteLinks,
michael@0 72 0);
michael@0 73 }
michael@0 74
michael@0 75 NS_IMETHODIMP
michael@0 76 nsParserUtils::Sanitize(const nsAString& aFromStr,
michael@0 77 uint32_t aFlags,
michael@0 78 nsAString& aToStr)
michael@0 79 {
michael@0 80 nsCOMPtr<nsIURI> uri;
michael@0 81 NS_NewURI(getter_AddRefs(uri), "about:blank");
michael@0 82 nsCOMPtr<nsIPrincipal> principal =
michael@0 83 do_CreateInstance("@mozilla.org/nullprincipal;1");
michael@0 84 nsCOMPtr<nsIDOMDocument> domDocument;
michael@0 85 nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument),
michael@0 86 EmptyString(),
michael@0 87 EmptyString(),
michael@0 88 nullptr,
michael@0 89 uri,
michael@0 90 uri,
michael@0 91 principal,
michael@0 92 true,
michael@0 93 nullptr,
michael@0 94 DocumentFlavorHTML);
michael@0 95 NS_ENSURE_SUCCESS(rv, rv);
michael@0 96
michael@0 97 nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
michael@0 98 rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false);
michael@0 99 NS_ENSURE_SUCCESS(rv, rv);
michael@0 100
michael@0 101 nsTreeSanitizer sanitizer(aFlags);
michael@0 102 sanitizer.Sanitize(document);
michael@0 103
michael@0 104 nsCOMPtr<nsIDocumentEncoder> encoder =
michael@0 105 do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html");
michael@0 106
michael@0 107 encoder->NativeInit(document,
michael@0 108 NS_LITERAL_STRING("text/html"),
michael@0 109 nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration |
michael@0 110 nsIDocumentEncoder::OutputNoScriptContent |
michael@0 111 nsIDocumentEncoder::OutputEncodeBasicEntities |
michael@0 112 nsIDocumentEncoder::OutputLFLineBreak |
michael@0 113 nsIDocumentEncoder::OutputRaw);
michael@0 114
michael@0 115 return encoder->EncodeToString(aToStr);
michael@0 116 }
michael@0 117
michael@0 118 NS_IMETHODIMP
michael@0 119 nsParserUtils::ParseFragment(const nsAString& aFragment,
michael@0 120 bool aIsXML,
michael@0 121 nsIURI* aBaseURI,
michael@0 122 nsIDOMElement* aContextElement,
michael@0 123 nsIDOMDocumentFragment** aReturn)
michael@0 124 {
michael@0 125 return nsParserUtils::ParseFragment(aFragment,
michael@0 126 0,
michael@0 127 aIsXML,
michael@0 128 aBaseURI,
michael@0 129 aContextElement,
michael@0 130 aReturn);
michael@0 131 }
michael@0 132
michael@0 133 NS_IMETHODIMP
michael@0 134 nsParserUtils::ParseFragment(const nsAString& aFragment,
michael@0 135 uint32_t aFlags,
michael@0 136 bool aIsXML,
michael@0 137 nsIURI* aBaseURI,
michael@0 138 nsIDOMElement* aContextElement,
michael@0 139 nsIDOMDocumentFragment** aReturn)
michael@0 140 {
michael@0 141 NS_ENSURE_ARG(aContextElement);
michael@0 142 *aReturn = nullptr;
michael@0 143
michael@0 144 nsCOMPtr<nsIDocument> document;
michael@0 145 nsCOMPtr<nsIDOMDocument> domDocument;
michael@0 146 nsCOMPtr<nsIDOMNode> contextNode;
michael@0 147 contextNode = do_QueryInterface(aContextElement);
michael@0 148 contextNode->GetOwnerDocument(getter_AddRefs(domDocument));
michael@0 149 document = do_QueryInterface(domDocument);
michael@0 150 NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE);
michael@0 151
michael@0 152 nsAutoScriptBlockerSuppressNodeRemoved autoBlocker;
michael@0 153
michael@0 154 // stop scripts
michael@0 155 nsRefPtr<nsScriptLoader> loader;
michael@0 156 bool scripts_enabled = false;
michael@0 157 if (document) {
michael@0 158 loader = document->ScriptLoader();
michael@0 159 scripts_enabled = loader->GetEnabled();
michael@0 160 }
michael@0 161 if (scripts_enabled) {
michael@0 162 loader->SetEnabled(false);
michael@0 163 }
michael@0 164
michael@0 165 // Wrap things in a div or body for parsing, but it won't show up in
michael@0 166 // the fragment.
michael@0 167 nsAutoTArray<nsString, 2> tagStack;
michael@0 168 nsAutoCString base, spec;
michael@0 169 if (aIsXML) {
michael@0 170 // XHTML
michael@0 171 if (aBaseURI) {
michael@0 172 base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG));
michael@0 173 base.Append(NS_LITERAL_CSTRING(" xml:base=\""));
michael@0 174 aBaseURI->GetSpec(spec);
michael@0 175 // nsEscapeHTML is good enough, because we only need to get
michael@0 176 // quotes, ampersands, and angle brackets
michael@0 177 char* escapedSpec = nsEscapeHTML(spec.get());
michael@0 178 if (escapedSpec)
michael@0 179 base += escapedSpec;
michael@0 180 NS_Free(escapedSpec);
michael@0 181 base.Append(NS_LITERAL_CSTRING("\""));
michael@0 182 tagStack.AppendElement(NS_ConvertUTF8toUTF16(base));
michael@0 183 } else {
michael@0 184 tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG));
michael@0 185 }
michael@0 186 }
michael@0 187
michael@0 188 nsresult rv = NS_OK;
michael@0 189 nsCOMPtr<nsIContent> fragment;
michael@0 190 if (aIsXML) {
michael@0 191 rv = nsContentUtils::ParseFragmentXML(aFragment,
michael@0 192 document,
michael@0 193 tagStack,
michael@0 194 true,
michael@0 195 aReturn);
michael@0 196 fragment = do_QueryInterface(*aReturn);
michael@0 197 } else {
michael@0 198 NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager()));
michael@0 199 fragment = do_QueryInterface(*aReturn);
michael@0 200 rv = nsContentUtils::ParseFragmentHTML(aFragment,
michael@0 201 fragment,
michael@0 202 nsGkAtoms::body,
michael@0 203 kNameSpaceID_XHTML,
michael@0 204 false,
michael@0 205 true);
michael@0 206 // Now, set the base URI on all subtree roots.
michael@0 207 if (aBaseURI) {
michael@0 208 aBaseURI->GetSpec(spec);
michael@0 209 nsAutoString spec16;
michael@0 210 CopyUTF8toUTF16(spec, spec16);
michael@0 211 nsIContent* node = fragment->GetFirstChild();
michael@0 212 while (node) {
michael@0 213 if (node->IsElement()) {
michael@0 214 node->SetAttr(kNameSpaceID_XML,
michael@0 215 nsGkAtoms::base,
michael@0 216 nsGkAtoms::xml,
michael@0 217 spec16,
michael@0 218 false);
michael@0 219 }
michael@0 220 node = node->GetNextSibling();
michael@0 221 }
michael@0 222 }
michael@0 223 }
michael@0 224 if (fragment) {
michael@0 225 nsTreeSanitizer sanitizer(aFlags);
michael@0 226 sanitizer.Sanitize(fragment);
michael@0 227 }
michael@0 228
michael@0 229 if (scripts_enabled) {
michael@0 230 loader->SetEnabled(true);
michael@0 231 }
michael@0 232
michael@0 233 return rv;
michael@0 234 }

mercurial