|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsString.h" |
|
7 #include "nsIComponentManager.h" |
|
8 #include "nsCOMPtr.h" |
|
9 #include "nsXPCOM.h" |
|
10 #include "nsISupportsPrimitives.h" |
|
11 #include "nsXPIDLString.h" |
|
12 #include "nsScriptLoader.h" |
|
13 #include "nsEscape.h" |
|
14 #include "nsIParser.h" |
|
15 #include "nsIDTD.h" |
|
16 #include "nsNetCID.h" |
|
17 #include "nsNetUtil.h" |
|
18 #include "nsParserCIID.h" |
|
19 #include "nsContentUtils.h" |
|
20 #include "nsIContentSink.h" |
|
21 #include "nsIDocumentEncoder.h" |
|
22 #include "nsIDOMDocumentFragment.h" |
|
23 #include "nsIFragmentContentSink.h" |
|
24 #include "nsIDOMDocument.h" |
|
25 #include "nsIDOMNodeList.h" |
|
26 #include "nsIDOMNode.h" |
|
27 #include "nsIDOMElement.h" |
|
28 #include "nsIDocument.h" |
|
29 #include "nsIContent.h" |
|
30 #include "nsAttrName.h" |
|
31 #include "nsHTMLParts.h" |
|
32 #include "nsContentCID.h" |
|
33 #include "nsIScriptableUnescapeHTML.h" |
|
34 #include "nsParserUtils.h" |
|
35 #include "nsAutoPtr.h" |
|
36 #include "nsTreeSanitizer.h" |
|
37 #include "nsHtml5Module.h" |
|
38 #include "mozilla/dom/DocumentFragment.h" |
|
39 |
|
40 #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\"" |
|
41 |
|
42 using namespace mozilla::dom; |
|
43 |
|
44 NS_IMPL_ISUPPORTS(nsParserUtils, |
|
45 nsIScriptableUnescapeHTML, |
|
46 nsIParserUtils) |
|
47 |
|
48 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID); |
|
49 |
|
50 |
|
51 |
|
52 NS_IMETHODIMP |
|
53 nsParserUtils::ConvertToPlainText(const nsAString& aFromStr, |
|
54 uint32_t aFlags, |
|
55 uint32_t aWrapCol, |
|
56 nsAString& aToStr) |
|
57 { |
|
58 return nsContentUtils::ConvertToPlainText(aFromStr, |
|
59 aToStr, |
|
60 aFlags, |
|
61 aWrapCol); |
|
62 } |
|
63 |
|
64 NS_IMETHODIMP |
|
65 nsParserUtils::Unescape(const nsAString& aFromStr, |
|
66 nsAString& aToStr) |
|
67 { |
|
68 return nsContentUtils::ConvertToPlainText(aFromStr, |
|
69 aToStr, |
|
70 nsIDocumentEncoder::OutputSelectionOnly | |
|
71 nsIDocumentEncoder::OutputAbsoluteLinks, |
|
72 0); |
|
73 } |
|
74 |
|
75 NS_IMETHODIMP |
|
76 nsParserUtils::Sanitize(const nsAString& aFromStr, |
|
77 uint32_t aFlags, |
|
78 nsAString& aToStr) |
|
79 { |
|
80 nsCOMPtr<nsIURI> uri; |
|
81 NS_NewURI(getter_AddRefs(uri), "about:blank"); |
|
82 nsCOMPtr<nsIPrincipal> principal = |
|
83 do_CreateInstance("@mozilla.org/nullprincipal;1"); |
|
84 nsCOMPtr<nsIDOMDocument> domDocument; |
|
85 nsresult rv = NS_NewDOMDocument(getter_AddRefs(domDocument), |
|
86 EmptyString(), |
|
87 EmptyString(), |
|
88 nullptr, |
|
89 uri, |
|
90 uri, |
|
91 principal, |
|
92 true, |
|
93 nullptr, |
|
94 DocumentFlavorHTML); |
|
95 NS_ENSURE_SUCCESS(rv, rv); |
|
96 |
|
97 nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument); |
|
98 rv = nsContentUtils::ParseDocumentHTML(aFromStr, document, false); |
|
99 NS_ENSURE_SUCCESS(rv, rv); |
|
100 |
|
101 nsTreeSanitizer sanitizer(aFlags); |
|
102 sanitizer.Sanitize(document); |
|
103 |
|
104 nsCOMPtr<nsIDocumentEncoder> encoder = |
|
105 do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html"); |
|
106 |
|
107 encoder->NativeInit(document, |
|
108 NS_LITERAL_STRING("text/html"), |
|
109 nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration | |
|
110 nsIDocumentEncoder::OutputNoScriptContent | |
|
111 nsIDocumentEncoder::OutputEncodeBasicEntities | |
|
112 nsIDocumentEncoder::OutputLFLineBreak | |
|
113 nsIDocumentEncoder::OutputRaw); |
|
114 |
|
115 return encoder->EncodeToString(aToStr); |
|
116 } |
|
117 |
|
118 NS_IMETHODIMP |
|
119 nsParserUtils::ParseFragment(const nsAString& aFragment, |
|
120 bool aIsXML, |
|
121 nsIURI* aBaseURI, |
|
122 nsIDOMElement* aContextElement, |
|
123 nsIDOMDocumentFragment** aReturn) |
|
124 { |
|
125 return nsParserUtils::ParseFragment(aFragment, |
|
126 0, |
|
127 aIsXML, |
|
128 aBaseURI, |
|
129 aContextElement, |
|
130 aReturn); |
|
131 } |
|
132 |
|
133 NS_IMETHODIMP |
|
134 nsParserUtils::ParseFragment(const nsAString& aFragment, |
|
135 uint32_t aFlags, |
|
136 bool aIsXML, |
|
137 nsIURI* aBaseURI, |
|
138 nsIDOMElement* aContextElement, |
|
139 nsIDOMDocumentFragment** aReturn) |
|
140 { |
|
141 NS_ENSURE_ARG(aContextElement); |
|
142 *aReturn = nullptr; |
|
143 |
|
144 nsCOMPtr<nsIDocument> document; |
|
145 nsCOMPtr<nsIDOMDocument> domDocument; |
|
146 nsCOMPtr<nsIDOMNode> contextNode; |
|
147 contextNode = do_QueryInterface(aContextElement); |
|
148 contextNode->GetOwnerDocument(getter_AddRefs(domDocument)); |
|
149 document = do_QueryInterface(domDocument); |
|
150 NS_ENSURE_TRUE(document, NS_ERROR_NOT_AVAILABLE); |
|
151 |
|
152 nsAutoScriptBlockerSuppressNodeRemoved autoBlocker; |
|
153 |
|
154 // stop scripts |
|
155 nsRefPtr<nsScriptLoader> loader; |
|
156 bool scripts_enabled = false; |
|
157 if (document) { |
|
158 loader = document->ScriptLoader(); |
|
159 scripts_enabled = loader->GetEnabled(); |
|
160 } |
|
161 if (scripts_enabled) { |
|
162 loader->SetEnabled(false); |
|
163 } |
|
164 |
|
165 // Wrap things in a div or body for parsing, but it won't show up in |
|
166 // the fragment. |
|
167 nsAutoTArray<nsString, 2> tagStack; |
|
168 nsAutoCString base, spec; |
|
169 if (aIsXML) { |
|
170 // XHTML |
|
171 if (aBaseURI) { |
|
172 base.Append(NS_LITERAL_CSTRING(XHTML_DIV_TAG)); |
|
173 base.Append(NS_LITERAL_CSTRING(" xml:base=\"")); |
|
174 aBaseURI->GetSpec(spec); |
|
175 // nsEscapeHTML is good enough, because we only need to get |
|
176 // quotes, ampersands, and angle brackets |
|
177 char* escapedSpec = nsEscapeHTML(spec.get()); |
|
178 if (escapedSpec) |
|
179 base += escapedSpec; |
|
180 NS_Free(escapedSpec); |
|
181 base.Append(NS_LITERAL_CSTRING("\"")); |
|
182 tagStack.AppendElement(NS_ConvertUTF8toUTF16(base)); |
|
183 } else { |
|
184 tagStack.AppendElement(NS_LITERAL_STRING(XHTML_DIV_TAG)); |
|
185 } |
|
186 } |
|
187 |
|
188 nsresult rv = NS_OK; |
|
189 nsCOMPtr<nsIContent> fragment; |
|
190 if (aIsXML) { |
|
191 rv = nsContentUtils::ParseFragmentXML(aFragment, |
|
192 document, |
|
193 tagStack, |
|
194 true, |
|
195 aReturn); |
|
196 fragment = do_QueryInterface(*aReturn); |
|
197 } else { |
|
198 NS_ADDREF(*aReturn = new DocumentFragment(document->NodeInfoManager())); |
|
199 fragment = do_QueryInterface(*aReturn); |
|
200 rv = nsContentUtils::ParseFragmentHTML(aFragment, |
|
201 fragment, |
|
202 nsGkAtoms::body, |
|
203 kNameSpaceID_XHTML, |
|
204 false, |
|
205 true); |
|
206 // Now, set the base URI on all subtree roots. |
|
207 if (aBaseURI) { |
|
208 aBaseURI->GetSpec(spec); |
|
209 nsAutoString spec16; |
|
210 CopyUTF8toUTF16(spec, spec16); |
|
211 nsIContent* node = fragment->GetFirstChild(); |
|
212 while (node) { |
|
213 if (node->IsElement()) { |
|
214 node->SetAttr(kNameSpaceID_XML, |
|
215 nsGkAtoms::base, |
|
216 nsGkAtoms::xml, |
|
217 spec16, |
|
218 false); |
|
219 } |
|
220 node = node->GetNextSibling(); |
|
221 } |
|
222 } |
|
223 } |
|
224 if (fragment) { |
|
225 nsTreeSanitizer sanitizer(aFlags); |
|
226 sanitizer.Sanitize(fragment); |
|
227 } |
|
228 |
|
229 if (scripts_enabled) { |
|
230 loader->SetEnabled(true); |
|
231 } |
|
232 |
|
233 return rv; |
|
234 } |