parser/html/nsParserUtils.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/html/nsParserUtils.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,234 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsString.h"
    1.10 +#include "nsIComponentManager.h"
    1.11 +#include "nsCOMPtr.h"
    1.12 +#include "nsXPCOM.h"
    1.13 +#include "nsISupportsPrimitives.h"
    1.14 +#include "nsXPIDLString.h"
    1.15 +#include "nsScriptLoader.h"
    1.16 +#include "nsEscape.h"
    1.17 +#include "nsIParser.h"
    1.18 +#include "nsIDTD.h"
    1.19 +#include "nsNetCID.h"
    1.20 +#include "nsNetUtil.h"
    1.21 +#include "nsParserCIID.h"
    1.22 +#include "nsContentUtils.h"
    1.23 +#include "nsIContentSink.h"
    1.24 +#include "nsIDocumentEncoder.h"
    1.25 +#include "nsIDOMDocumentFragment.h"
    1.26 +#include "nsIFragmentContentSink.h"
    1.27 +#include "nsIDOMDocument.h"
    1.28 +#include "nsIDOMNodeList.h"
    1.29 +#include "nsIDOMNode.h"
    1.30 +#include "nsIDOMElement.h"
    1.31 +#include "nsIDocument.h"
    1.32 +#include "nsIContent.h"
    1.33 +#include "nsAttrName.h"
    1.34 +#include "nsHTMLParts.h"
    1.35 +#include "nsContentCID.h"
    1.36 +#include "nsIScriptableUnescapeHTML.h"
    1.37 +#include "nsParserUtils.h"
    1.38 +#include "nsAutoPtr.h"
    1.39 +#include "nsTreeSanitizer.h"
    1.40 +#include "nsHtml5Module.h"
    1.41 +#include "mozilla/dom/DocumentFragment.h"
    1.42 +
    1.43 +#define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\""
    1.44 +
    1.45 +using namespace mozilla::dom;
    1.46 +
    1.47 +NS_IMPL_ISUPPORTS(nsParserUtils,
    1.48 +                  nsIScriptableUnescapeHTML,
    1.49 +                  nsIParserUtils)
    1.50 +
    1.51 +static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
    1.52 +
    1.53 +
    1.54 +
    1.55 +NS_IMETHODIMP
    1.56 +nsParserUtils::ConvertToPlainText(const nsAString& aFromStr,
    1.57 +                                  uint32_t aFlags,
    1.58 +                                  uint32_t aWrapCol,
    1.59 +                                  nsAString& aToStr)
    1.60 +{
    1.61 +  return nsContentUtils::ConvertToPlainText(aFromStr,
    1.62 +    aToStr,
    1.63 +    aFlags,
    1.64 +    aWrapCol);
    1.65 +}
    1.66 +
    1.67 +NS_IMETHODIMP
    1.68 +nsParserUtils::Unescape(const nsAString& aFromStr,
    1.69 +                        nsAString& aToStr)
    1.70 +{
    1.71 +  return nsContentUtils::ConvertToPlainText(aFromStr,
    1.72 +    aToStr,
    1.73 +    nsIDocumentEncoder::OutputSelectionOnly |
    1.74 +    nsIDocumentEncoder::OutputAbsoluteLinks,
    1.75 +    0);
    1.76 +}
    1.77 +
    1.78 +NS_IMETHODIMP
    1.79 +nsParserUtils::Sanitize(const nsAString& aFromStr,
    1.80 +                        uint32_t aFlags,
    1.81 +                        nsAString& aToStr)
    1.82 +{
    1.83 +  nsCOMPtr<nsIURI> uri;
    1.84 +  NS_NewURI(getter_AddRefs(uri), "about:blank");
    1.85 +  nsCOMPtr<nsIPrincipal> principal =
    1.86 +    do_CreateInstance("@mozilla.org/nullprincipal;1");
    1.87 +  nsCOMPtr<nsIDOMDocument> domDocument;
    1.88 +  nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument),
    1.89 +                                  EmptyString(),
    1.90 +                                  EmptyString(),
    1.91 +                                  nullptr,
    1.92 +                                  uri,
    1.93 +                                  uri,
    1.94 +                                  principal,
    1.95 +                                  true,
    1.96 +                                  nullptr,
    1.97 +                                  DocumentFlavorHTML);
    1.98 +  NS_ENSURE_SUCCESS(rv, rv);
    1.99 +
   1.100 +  nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
   1.101 +  rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false);
   1.102 +  NS_ENSURE_SUCCESS(rv, rv);
   1.103 +
   1.104 +  nsTreeSanitizer sanitizer(aFlags);
   1.105 +  sanitizer.Sanitize(document);
   1.106 +
   1.107 +  nsCOMPtr<nsIDocumentEncoder> encoder =
   1.108 +    do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html");
   1.109 +
   1.110 +  encoder->NativeInit(document,
   1.111 +                      NS_LITERAL_STRING("text/html"),
   1.112 +                      nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration |
   1.113 +                      nsIDocumentEncoder::OutputNoScriptContent |
   1.114 +                      nsIDocumentEncoder::OutputEncodeBasicEntities |
   1.115 +                      nsIDocumentEncoder::OutputLFLineBreak |
   1.116 +                      nsIDocumentEncoder::OutputRaw);
   1.117 +
   1.118 +  return encoder->EncodeToString(aToStr);
   1.119 +}
   1.120 +
   1.121 +NS_IMETHODIMP
   1.122 +nsParserUtils::ParseFragment(const nsAString& aFragment,
   1.123 +                             bool aIsXML,
   1.124 +                             nsIURI* aBaseURI,
   1.125 +                             nsIDOMElement* aContextElement,
   1.126 +                             nsIDOMDocumentFragment** aReturn)
   1.127 +{
   1.128 +  return nsParserUtils::ParseFragment(aFragment,
   1.129 +                                      0,
   1.130 +                                      aIsXML,
   1.131 +                                      aBaseURI,
   1.132 +                                      aContextElement,
   1.133 +                                      aReturn);
   1.134 +}
   1.135 +
   1.136 +NS_IMETHODIMP
   1.137 +nsParserUtils::ParseFragment(const nsAString& aFragment,
   1.138 +                             uint32_t aFlags,
   1.139 +                             bool aIsXML,
   1.140 +                             nsIURI* aBaseURI,
   1.141 +                             nsIDOMElement* aContextElement,
   1.142 +                             nsIDOMDocumentFragment** aReturn)
   1.143 +{
   1.144 +  NS_ENSURE_ARG(aContextElement);
   1.145 +  *aReturn = nullptr;
   1.146 +
   1.147 +  nsCOMPtr<nsIDocument> document;
   1.148 +  nsCOMPtr<nsIDOMDocument> domDocument;
   1.149 +  nsCOMPtr<nsIDOMNode> contextNode;
   1.150 +  contextNode = do_QueryInterface(aContextElement);
   1.151 +  contextNode->GetOwnerDocument(getter_AddRefs(domDocument));
   1.152 +  document = do_QueryInterface(domDocument);
   1.153 +  NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE);
   1.154 +
   1.155 +  nsAutoScriptBlockerSuppressNodeRemoved autoBlocker;
   1.156 +
   1.157 +  // stop scripts
   1.158 +  nsRefPtr<nsScriptLoader> loader;
   1.159 +  bool scripts_enabled = false;
   1.160 +  if (document) {
   1.161 +    loader = document->ScriptLoader();
   1.162 +    scripts_enabled = loader->GetEnabled();
   1.163 +  }
   1.164 +  if (scripts_enabled) {
   1.165 +    loader->SetEnabled(false);
   1.166 +  }
   1.167 +
   1.168 +  // Wrap things in a div or body for parsing, but it won't show up in
   1.169 +  // the fragment.
   1.170 +  nsAutoTArray<nsString, 2> tagStack;
   1.171 +  nsAutoCString base, spec;
   1.172 +  if (aIsXML) {
   1.173 +    // XHTML
   1.174 +    if (aBaseURI) {
   1.175 +      base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG));
   1.176 +      base.Append(NS_LITERAL_CSTRING(" xml:base=\""));
   1.177 +      aBaseURI->GetSpec(spec);
   1.178 +      // nsEscapeHTML is good enough, because we only need to get
   1.179 +      // quotes, ampersands, and angle brackets
   1.180 +      char* escapedSpec = nsEscapeHTML(spec.get());
   1.181 +      if (escapedSpec)
   1.182 +        base += escapedSpec;
   1.183 +      NS_Free(escapedSpec);
   1.184 +      base.Append(NS_LITERAL_CSTRING("\""));
   1.185 +      tagStack.AppendElement(NS_ConvertUTF8toUTF16(base));
   1.186 +    }  else {
   1.187 +      tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG));
   1.188 +    }
   1.189 +  }
   1.190 +
   1.191 +  nsresult rv = NS_OK;
   1.192 +  nsCOMPtr<nsIContent> fragment;
   1.193 +  if (aIsXML) {
   1.194 +    rv = nsContentUtils::ParseFragmentXML(aFragment,
   1.195 +                                          document,
   1.196 +                                          tagStack,
   1.197 +                                          true,
   1.198 +                                          aReturn);
   1.199 +    fragment = do_QueryInterface(*aReturn);
   1.200 +  } else {
   1.201 +    NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager()));
   1.202 +    fragment = do_QueryInterface(*aReturn);
   1.203 +    rv = nsContentUtils::ParseFragmentHTML(aFragment,
   1.204 +                                           fragment,
   1.205 +                                           nsGkAtoms::body,
   1.206 +                                           kNameSpaceID_XHTML,
   1.207 +                                           false,
   1.208 +                                           true);
   1.209 +    // Now, set the base URI on all subtree roots.
   1.210 +    if (aBaseURI) {
   1.211 +      aBaseURI->GetSpec(spec);
   1.212 +      nsAutoString spec16;
   1.213 +      CopyUTF8toUTF16(spec, spec16);
   1.214 +      nsIContent* node = fragment->GetFirstChild();
   1.215 +      while (node) {
   1.216 +        if (node->IsElement()) {
   1.217 +          node->SetAttr(kNameSpaceID_XML,
   1.218 +                        nsGkAtoms::base,
   1.219 +                        nsGkAtoms::xml,
   1.220 +                        spec16,
   1.221 +                        false);
   1.222 +        }
   1.223 +        node = node->GetNextSibling();
   1.224 +      }
   1.225 +    }
   1.226 +  }
   1.227 +  if (fragment) {
   1.228 +    nsTreeSanitizer sanitizer(aFlags);
   1.229 +    sanitizer.Sanitize(fragment);
   1.230 +  }
   1.231 +
   1.232 +  if (scripts_enabled) {
   1.233 +    loader->SetEnabled(true);
   1.234 +  }
   1.235 +
   1.236 +  return rv;
   1.237 +}

mercurial