Wed, 31 Dec 2014 13:27:57 +0100
Ignore runtime configuration files generated during quality assurance.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsFeedSniffer.h"
9 #include "nsNetCID.h"
10 #include "nsXPCOM.h"
11 #include "nsCOMPtr.h"
12 #include "nsStringStream.h"
14 #include "nsBrowserCompsCID.h"
16 #include "nsICategoryManager.h"
17 #include "nsIServiceManager.h"
18 #include "nsComponentManagerUtils.h"
19 #include "nsServiceManagerUtils.h"
21 #include "nsIStreamConverterService.h"
22 #include "nsIStreamConverter.h"
24 #include "nsIStreamListener.h"
26 #include "nsIHttpChannel.h"
27 #include "nsIMIMEHeaderParam.h"
29 #include "nsMimeTypes.h"
30 #include "nsIURI.h"
31 #include <algorithm>
33 #define TYPE_ATOM "application/atom+xml"
34 #define TYPE_RSS "application/rss+xml"
35 #define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed"
37 #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
38 #define NS_RSS "http://purl.org/rss/1.0/"
40 #define MAX_BYTES 512u
42 NS_IMPL_ISUPPORTS(nsFeedSniffer,
43 nsIContentSniffer,
44 nsIStreamListener,
45 nsIRequestObserver)
47 nsresult
48 nsFeedSniffer::ConvertEncodedData(nsIRequest* request,
49 const uint8_t* data,
50 uint32_t length)
51 {
52 nsresult rv = NS_OK;
54 mDecodedData = "";
55 nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request));
56 if (!httpChannel)
57 return NS_ERROR_NO_INTERFACE;
59 nsAutoCString contentEncoding;
60 httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
61 contentEncoding);
62 if (!contentEncoding.IsEmpty()) {
63 nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID));
64 if (converterService) {
65 ToLowerCase(contentEncoding);
67 nsCOMPtr<nsIStreamListener> converter;
68 rv = converterService->AsyncConvertData(contentEncoding.get(),
69 "uncompressed", this, nullptr,
70 getter_AddRefs(converter));
71 NS_ENSURE_SUCCESS(rv, rv);
73 converter->OnStartRequest(request, nullptr);
75 nsCOMPtr<nsIStringInputStream> rawStream =
76 do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
77 if (!rawStream)
78 return NS_ERROR_FAILURE;
80 rv = rawStream->SetData((const char*)data, length);
81 NS_ENSURE_SUCCESS(rv, rv);
83 rv = converter->OnDataAvailable(request, nullptr, rawStream, 0, length);
84 NS_ENSURE_SUCCESS(rv, rv);
86 converter->OnStopRequest(request, nullptr, NS_OK);
87 }
88 }
89 return rv;
90 }
92 template<int N>
93 static bool
94 StringBeginsWithLowercaseLiteral(nsAString& aString,
95 const char (&aSubstring)[N])
96 {
97 return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring);
98 }
100 bool
101 HasAttachmentDisposition(nsIHttpChannel* httpChannel)
102 {
103 if (!httpChannel)
104 return false;
106 uint32_t disp;
107 nsresult rv = httpChannel->GetContentDisposition(&disp);
109 if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT)
110 return true;
112 return false;
113 }
115 /**
116 * @return the first occurrence of a character within a string buffer,
117 * or nullptr if not found
118 */
119 static const char*
120 FindChar(char c, const char *begin, const char *end)
121 {
122 for (; begin < end; ++begin) {
123 if (*begin == c)
124 return begin;
125 }
126 return nullptr;
127 }
129 /**
130 *
131 * Determine if a substring is the "documentElement" in the document.
132 *
133 * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document"
134 * element within the XML DOM, i.e. the root container element. Otherwise,
135 * it's possible that someone embedded one of these tags inside a document of
136 * another type, e.g. a HTML document, and we don't want to show the preview
137 * page if the document isn't actually a feed.
138 *
139 * @param start
140 * The beginning of the data being sniffed
141 * @param end
142 * The end of the data being sniffed, right before the substring that
143 * was found.
144 * @returns true if the found substring is the documentElement, false
145 * otherwise.
146 */
147 static bool
148 IsDocumentElement(const char *start, const char* end)
149 {
150 // For every tag in the buffer, check to see if it's a PI, Doctype or
151 // comment, our desired substring or something invalid.
152 while ( (start = FindChar('<', start, end)) ) {
153 ++start;
154 if (start >= end)
155 return false;
157 // Check to see if the character following the '<' is either '?' or '!'
158 // (processing instruction or doctype or comment)... these are valid nodes
159 // to have in the prologue.
160 if (*start != '?' && *start != '!')
161 return false;
163 // Now advance the iterator until the '>' (We do this because we don't want
164 // to sniff indicator substrings that are embedded within other nodes, e.g.
165 // comments: <!-- <rdf:RDF .. > -->
166 start = FindChar('>', start, end);
167 if (!start)
168 return false;
170 ++start;
171 }
172 return true;
173 }
175 /**
176 * Determines whether or not a string exists as the root element in an XML data
177 * string buffer.
178 * @param dataString
179 * The data being sniffed
180 * @param substring
181 * The substring being tested for existence and root-ness.
182 * @returns true if the substring exists and is the documentElement, false
183 * otherwise.
184 */
185 static bool
186 ContainsTopLevelSubstring(nsACString& dataString, const char *substring)
187 {
188 int32_t offset = dataString.Find(substring);
189 if (offset == -1)
190 return false;
192 const char *begin = dataString.BeginReading();
194 // Only do the validation when we find the substring.
195 return IsDocumentElement(begin, begin + offset);
196 }
198 NS_IMETHODIMP
199 nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request,
200 const uint8_t* data,
201 uint32_t length,
202 nsACString& sniffedType)
203 {
204 nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request));
205 if (!channel)
206 return NS_ERROR_NO_INTERFACE;
208 // Check that this is a GET request, since you can't subscribe to a POST...
209 nsAutoCString method;
210 channel->GetRequestMethod(method);
211 if (!method.Equals("GET")) {
212 sniffedType.Truncate();
213 return NS_OK;
214 }
216 // We need to find out if this is a load of a view-source document. In this
217 // case we do not want to override the content type, since the source display
218 // does not need to be converted from feed format to XUL. More importantly,
219 // we don't want to change the content type from something
220 // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html
221 // etc) to something that only the application fe knows about (maybe.feed)
222 // thus deactivating syntax highlighting.
223 nsCOMPtr<nsIURI> originalURI;
224 channel->GetOriginalURI(getter_AddRefs(originalURI));
226 nsAutoCString scheme;
227 originalURI->GetScheme(scheme);
228 if (scheme.EqualsLiteral("view-source")) {
229 sniffedType.Truncate();
230 return NS_OK;
231 }
233 // Check the Content-Type to see if it is set correctly. If it is set to
234 // something specific that we think is a reliable indication of a feed, don't
235 // bother sniffing since we assume the site maintainer knows what they're
236 // doing.
237 nsAutoCString contentType;
238 channel->GetContentType(contentType);
239 bool noSniff = contentType.EqualsLiteral(TYPE_RSS) ||
240 contentType.EqualsLiteral(TYPE_ATOM);
242 // Check to see if this was a feed request from the location bar or from
243 // the feed: protocol. This is also a reliable indication.
244 // The value of the header doesn't matter.
245 if (!noSniff) {
246 nsAutoCString sniffHeader;
247 nsresult foundHeader =
248 channel->GetRequestHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
249 sniffHeader);
250 noSniff = NS_SUCCEEDED(foundHeader);
251 }
253 if (noSniff) {
254 // check for an attachment after we have a likely feed.
255 if(HasAttachmentDisposition(channel)) {
256 sniffedType.Truncate();
257 return NS_OK;
258 }
260 // set the feed header as a response header, since we have good metadata
261 // telling us that the feed is supposed to be RSS or Atom
262 channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
263 NS_LITERAL_CSTRING("1"), false);
264 sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
265 return NS_OK;
266 }
268 // Don't sniff arbitrary types. Limit sniffing to situations that
269 // we think can reasonably arise.
270 if (!contentType.EqualsLiteral(TEXT_HTML) &&
271 !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) &&
272 // Same criterion as XMLHttpRequest. Should we be checking for "+xml"
273 // and check for text/xml and application/xml by hand instead?
274 contentType.Find("xml") == -1) {
275 sniffedType.Truncate();
276 return NS_OK;
277 }
279 // Now we need to potentially decompress data served with
280 // Content-Encoding: gzip
281 nsresult rv = ConvertEncodedData(request, data, length);
282 if (NS_FAILED(rv))
283 return rv;
285 // We cap the number of bytes to scan at MAX_BYTES to prevent picking up
286 // false positives by accidentally reading document content, e.g. a "how to
287 // make a feed" page.
288 const char* testData;
289 if (mDecodedData.IsEmpty()) {
290 testData = (const char*)data;
291 length = std::min(length, MAX_BYTES);
292 } else {
293 testData = mDecodedData.get();
294 length = std::min(mDecodedData.Length(), MAX_BYTES);
295 }
297 // The strategy here is based on that described in:
298 // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
299 // for interoperarbility purposes.
301 // Thus begins the actual sniffing.
302 nsDependentCSubstring dataString((const char*)testData, length);
304 bool isFeed = false;
306 // RSS 0.91/0.92/2.0
307 isFeed = ContainsTopLevelSubstring(dataString, "<rss");
309 // Atom 1.0
310 if (!isFeed)
311 isFeed = ContainsTopLevelSubstring(dataString, "<feed");
313 // RSS 1.0
314 if (!isFeed) {
315 isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") &&
316 dataString.Find(NS_RDF) != -1 &&
317 dataString.Find(NS_RSS) != -1;
318 }
320 // If we sniffed a feed, coerce our internal type
321 if (isFeed && !HasAttachmentDisposition(channel))
322 sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
323 else
324 sniffedType.Truncate();
325 return NS_OK;
326 }
328 NS_IMETHODIMP
329 nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context)
330 {
331 return NS_OK;
332 }
334 NS_METHOD
335 nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream,
336 void* closure,
337 const char* rawSegment,
338 uint32_t toOffset,
339 uint32_t count,
340 uint32_t* writeCount)
341 {
342 nsCString* decodedData = static_cast<nsCString*>(closure);
343 decodedData->Append(rawSegment, count);
344 *writeCount = count;
345 return NS_OK;
346 }
348 NS_IMETHODIMP
349 nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context,
350 nsIInputStream* stream, uint64_t offset,
351 uint32_t count)
352 {
353 uint32_t read;
354 return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count,
355 &read);
356 }
358 NS_IMETHODIMP
359 nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context,
360 nsresult status)
361 {
362 return NS_OK;
363 }