parser/html/nsParserUtils.cpp

Wed, 31 Dec 2014 13:27:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 13:27:57 +0100
branch
TOR_BUG_3246
changeset 6
8bccb770b82d
permissions
-rw-r--r--

Ignore runtime configuration files generated during quality assurance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsString.h"
     7 #include "nsIComponentManager.h"
     8 #include "nsCOMPtr.h"
     9 #include "nsXPCOM.h"
    10 #include "nsISupportsPrimitives.h"
    11 #include "nsXPIDLString.h"
    12 #include "nsScriptLoader.h"
    13 #include "nsEscape.h"
    14 #include "nsIParser.h"
    15 #include "nsIDTD.h"
    16 #include "nsNetCID.h"
    17 #include "nsNetUtil.h"
    18 #include "nsParserCIID.h"
    19 #include "nsContentUtils.h"
    20 #include "nsIContentSink.h"
    21 #include "nsIDocumentEncoder.h"
    22 #include "nsIDOMDocumentFragment.h"
    23 #include "nsIFragmentContentSink.h"
    24 #include "nsIDOMDocument.h"
    25 #include "nsIDOMNodeList.h"
    26 #include "nsIDOMNode.h"
    27 #include "nsIDOMElement.h"
    28 #include "nsIDocument.h"
    29 #include "nsIContent.h"
    30 #include "nsAttrName.h"
    31 #include "nsHTMLParts.h"
    32 #include "nsContentCID.h"
    33 #include "nsIScriptableUnescapeHTML.h"
    34 #include "nsParserUtils.h"
    35 #include "nsAutoPtr.h"
    36 #include "nsTreeSanitizer.h"
    37 #include "nsHtml5Module.h"
    38 #include "mozilla/dom/DocumentFragment.h"
    40 #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\""
    42 using namespace mozilla::dom;
    44 NS_IMPL_ISUPPORTS(nsParserUtils,
    45                   nsIScriptableUnescapeHTML,
    46                   nsIParserUtils)
    48 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
    52 NS_IMETHODIMP
    53 nsParserUtils::ConvertToPlainText(const nsAString& aFromStr,
    54                                   uint32_t aFlags,
    55                                   uint32_t aWrapCol,
    56                                   nsAString& aToStr)
    57 {
    58   return nsContentUtils::ConvertToPlainText(aFromStr,
    59     aToStr,
    60     aFlags,
    61     aWrapCol);
    62 }
    64 NS_IMETHODIMP
    65 nsParserUtils::Unescape(const nsAString& aFromStr,
    66                         nsAString& aToStr)
    67 {
    68   return nsContentUtils::ConvertToPlainText(aFromStr,
    69     aToStr,
    70     nsIDocumentEncoder::OutputSelectionOnly |
    71     nsIDocumentEncoder::OutputAbsoluteLinks,
    72     0);
    73 }
    75 NS_IMETHODIMP
    76 nsParserUtils::Sanitize(const nsAString& aFromStr,
    77                         uint32_t aFlags,
    78                         nsAString& aToStr)
    79 {
    80   nsCOMPtr<nsIURI> uri;
    81   NS_NewURI(getter_AddRefs(uri), "about:blank");
    82   nsCOMPtr<nsIPrincipal> principal =
    83     do_CreateInstance("@mozilla.org/nullprincipal;1");
    84   nsCOMPtr<nsIDOMDocument> domDocument;
    85   nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument),
    86                                   EmptyString(),
    87                                   EmptyString(),
    88                                   nullptr,
    89                                   uri,
    90                                   uri,
    91                                   principal,
    92                                   true,
    93                                   nullptr,
    94                                   DocumentFlavorHTML);
    95   NS_ENSURE_SUCCESS(rv, rv);
    97   nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
    98   rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false);
    99   NS_ENSURE_SUCCESS(rv, rv);
   101   nsTreeSanitizer sanitizer(aFlags);
   102   sanitizer.Sanitize(document);
   104   nsCOMPtr<nsIDocumentEncoder> encoder =
   105     do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html");
   107   encoder->NativeInit(document,
   108                       NS_LITERAL_STRING("text/html"),
   109                       nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration |
   110                       nsIDocumentEncoder::OutputNoScriptContent |
   111                       nsIDocumentEncoder::OutputEncodeBasicEntities |
   112                       nsIDocumentEncoder::OutputLFLineBreak |
   113                       nsIDocumentEncoder::OutputRaw);
   115   return encoder->EncodeToString(aToStr);
   116 }
   118 NS_IMETHODIMP
   119 nsParserUtils::ParseFragment(const nsAString& aFragment,
   120                              bool aIsXML,
   121                              nsIURI* aBaseURI,
   122                              nsIDOMElement* aContextElement,
   123                              nsIDOMDocumentFragment** aReturn)
   124 {
   125   return nsParserUtils::ParseFragment(aFragment,
   126                                       0,
   127                                       aIsXML,
   128                                       aBaseURI,
   129                                       aContextElement,
   130                                       aReturn);
   131 }
   133 NS_IMETHODIMP
   134 nsParserUtils::ParseFragment(const nsAString& aFragment,
   135                              uint32_t aFlags,
   136                              bool aIsXML,
   137                              nsIURI* aBaseURI,
   138                              nsIDOMElement* aContextElement,
   139                              nsIDOMDocumentFragment** aReturn)
   140 {
   141   NS_ENSURE_ARG(aContextElement);
   142   *aReturn = nullptr;
   144   nsCOMPtr<nsIDocument> document;
   145   nsCOMPtr<nsIDOMDocument> domDocument;
   146   nsCOMPtr<nsIDOMNode> contextNode;
   147   contextNode = do_QueryInterface(aContextElement);
   148   contextNode->GetOwnerDocument(getter_AddRefs(domDocument));
   149   document = do_QueryInterface(domDocument);
   150   NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE);
   152   nsAutoScriptBlockerSuppressNodeRemoved autoBlocker;
   154   // stop scripts
   155   nsRefPtr<nsScriptLoader> loader;
   156   bool scripts_enabled = false;
   157   if (document) {
   158     loader = document->ScriptLoader();
   159     scripts_enabled = loader->GetEnabled();
   160   }
   161   if (scripts_enabled) {
   162     loader->SetEnabled(false);
   163   }
   165   // Wrap things in a div or body for parsing, but it won't show up in
   166   // the fragment.
   167   nsAutoTArray<nsString, 2> tagStack;
   168   nsAutoCString base, spec;
   169   if (aIsXML) {
   170     // XHTML
   171     if (aBaseURI) {
   172       base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG));
   173       base.Append(NS_LITERAL_CSTRING(" xml:base=\""));
   174       aBaseURI->GetSpec(spec);
   175       // nsEscapeHTML is good enough, because we only need to get
   176       // quotes, ampersands, and angle brackets
   177       char* escapedSpec = nsEscapeHTML(spec.get());
   178       if (escapedSpec)
   179         base += escapedSpec;
   180       NS_Free(escapedSpec);
   181       base.Append(NS_LITERAL_CSTRING("\""));
   182       tagStack.AppendElement(NS_ConvertUTF8toUTF16(base));
   183     }  else {
   184       tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG));
   185     }
   186   }
   188   nsresult rv = NS_OK;
   189   nsCOMPtr<nsIContent> fragment;
   190   if (aIsXML) {
   191     rv = nsContentUtils::ParseFragmentXML(aFragment,
   192                                           document,
   193                                           tagStack,
   194                                           true,
   195                                           aReturn);
   196     fragment = do_QueryInterface(*aReturn);
   197   } else {
   198     NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager()));
   199     fragment = do_QueryInterface(*aReturn);
   200     rv = nsContentUtils::ParseFragmentHTML(aFragment,
   201                                            fragment,
   202                                            nsGkAtoms::body,
   203                                            kNameSpaceID_XHTML,
   204                                            false,
   205                                            true);
   206     // Now, set the base URI on all subtree roots.
   207     if (aBaseURI) {
   208       aBaseURI->GetSpec(spec);
   209       nsAutoString spec16;
   210       CopyUTF8toUTF16(spec, spec16);
   211       nsIContent* node = fragment->GetFirstChild();
   212       while (node) {
   213         if (node->IsElement()) {
   214           node->SetAttr(kNameSpaceID_XML,
   215                         nsGkAtoms::base,
   216                         nsGkAtoms::xml,
   217                         spec16,
   218                         false);
   219         }
   220         node = node->GetNextSibling();
   221       }
   222     }
   223   }
   224   if (fragment) {
   225     nsTreeSanitizer sanitizer(aFlags);
   226     sanitizer.Sanitize(fragment);
   227   }
   229   if (scripts_enabled) {
   230     loader->SetEnabled(true);
   231   }
   233   return rv;
   234 }

mercurial