Wed, 31 Dec 2014 06:55:46 +0100
Added tag TORBROWSER_REPLICA for changeset 6474c204b198
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include "nsUnknownDecoder.h" |
michael@0 | 7 | #include "nsIPipe.h" |
michael@0 | 8 | #include "nsIInputStream.h" |
michael@0 | 9 | #include "nsIOutputStream.h" |
michael@0 | 10 | #include "nsMimeTypes.h" |
michael@0 | 11 | #include "nsIPrefService.h" |
michael@0 | 12 | #include "nsIPrefBranch.h" |
michael@0 | 13 | |
michael@0 | 14 | #include "nsCRT.h" |
michael@0 | 15 | |
michael@0 | 16 | #include "nsIMIMEService.h" |
michael@0 | 17 | |
michael@0 | 18 | #include "nsIViewSourceChannel.h" |
michael@0 | 19 | #include "nsIHttpChannel.h" |
michael@0 | 20 | #include "nsNetCID.h" |
michael@0 | 21 | #include "nsNetUtil.h" |
michael@0 | 22 | |
michael@0 | 23 | |
michael@0 | 24 | #define MAX_BUFFER_SIZE 512 |
michael@0 | 25 | |
michael@0 | 26 | nsUnknownDecoder::nsUnknownDecoder() |
michael@0 | 27 | : mBuffer(nullptr) |
michael@0 | 28 | , mBufferLen(0) |
michael@0 | 29 | , mRequireHTMLsuffix(false) |
michael@0 | 30 | { |
michael@0 | 31 | nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); |
michael@0 | 32 | if (prefs) { |
michael@0 | 33 | bool val; |
michael@0 | 34 | if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val))) |
michael@0 | 35 | mRequireHTMLsuffix = val; |
michael@0 | 36 | } |
michael@0 | 37 | } |
michael@0 | 38 | |
michael@0 | 39 | nsUnknownDecoder::~nsUnknownDecoder() |
michael@0 | 40 | { |
michael@0 | 41 | if (mBuffer) { |
michael@0 | 42 | delete [] mBuffer; |
michael@0 | 43 | mBuffer = nullptr; |
michael@0 | 44 | } |
michael@0 | 45 | } |
michael@0 | 46 | |
michael@0 | 47 | // ---- |
michael@0 | 48 | // |
michael@0 | 49 | // nsISupports implementation... |
michael@0 | 50 | // |
michael@0 | 51 | // ---- |
michael@0 | 52 | |
michael@0 | 53 | NS_IMPL_ADDREF(nsUnknownDecoder) |
michael@0 | 54 | NS_IMPL_RELEASE(nsUnknownDecoder) |
michael@0 | 55 | |
michael@0 | 56 | NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) |
michael@0 | 57 | NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) |
michael@0 | 58 | NS_INTERFACE_MAP_ENTRY(nsIStreamListener) |
michael@0 | 59 | NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) |
michael@0 | 60 | NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) |
michael@0 | 61 | NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener) |
michael@0 | 62 | NS_INTERFACE_MAP_END |
michael@0 | 63 | |
michael@0 | 64 | |
michael@0 | 65 | // ---- |
michael@0 | 66 | // |
michael@0 | 67 | // nsIStreamConverter methods... |
michael@0 | 68 | // |
michael@0 | 69 | // ---- |
michael@0 | 70 | |
michael@0 | 71 | NS_IMETHODIMP |
michael@0 | 72 | nsUnknownDecoder::Convert(nsIInputStream *aFromStream, |
michael@0 | 73 | const char *aFromType, |
michael@0 | 74 | const char *aToType, |
michael@0 | 75 | nsISupports *aCtxt, |
michael@0 | 76 | nsIInputStream **aResultStream) |
michael@0 | 77 | { |
michael@0 | 78 | return NS_ERROR_NOT_IMPLEMENTED; |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | NS_IMETHODIMP |
michael@0 | 82 | nsUnknownDecoder::AsyncConvertData(const char *aFromType, |
michael@0 | 83 | const char *aToType, |
michael@0 | 84 | nsIStreamListener *aListener, |
michael@0 | 85 | nsISupports *aCtxt) |
michael@0 | 86 | { |
michael@0 | 87 | NS_ASSERTION(aListener && aFromType && aToType, |
michael@0 | 88 | "null pointer passed into multi mixed converter"); |
michael@0 | 89 | // hook up our final listener. this guy gets the various On*() calls we want to throw |
michael@0 | 90 | // at him. |
michael@0 | 91 | // |
michael@0 | 92 | mNextListener = aListener; |
michael@0 | 93 | return (aListener) ? NS_OK : NS_ERROR_FAILURE; |
michael@0 | 94 | } |
michael@0 | 95 | |
michael@0 | 96 | // ---- |
michael@0 | 97 | // |
michael@0 | 98 | // nsIStreamListener methods... |
michael@0 | 99 | // |
michael@0 | 100 | // ---- |
michael@0 | 101 | |
michael@0 | 102 | NS_IMETHODIMP |
michael@0 | 103 | nsUnknownDecoder::OnDataAvailable(nsIRequest* request, |
michael@0 | 104 | nsISupports *aCtxt, |
michael@0 | 105 | nsIInputStream *aStream, |
michael@0 | 106 | uint64_t aSourceOffset, |
michael@0 | 107 | uint32_t aCount) |
michael@0 | 108 | { |
michael@0 | 109 | nsresult rv = NS_OK; |
michael@0 | 110 | |
michael@0 | 111 | if (!mNextListener) return NS_ERROR_FAILURE; |
michael@0 | 112 | |
michael@0 | 113 | if (mContentType.IsEmpty()) { |
michael@0 | 114 | uint32_t count, len; |
michael@0 | 115 | |
michael@0 | 116 | // If the buffer has not been allocated by now, just fail... |
michael@0 | 117 | if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 118 | |
michael@0 | 119 | // |
michael@0 | 120 | // Determine how much of the stream should be read to fill up the |
michael@0 | 121 | // sniffer buffer... |
michael@0 | 122 | // |
michael@0 | 123 | if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { |
michael@0 | 124 | count = MAX_BUFFER_SIZE-mBufferLen; |
michael@0 | 125 | } else { |
michael@0 | 126 | count = aCount; |
michael@0 | 127 | } |
michael@0 | 128 | |
michael@0 | 129 | // Read the data into the buffer... |
michael@0 | 130 | rv = aStream->Read((mBuffer+mBufferLen), count, &len); |
michael@0 | 131 | if (NS_FAILED(rv)) return rv; |
michael@0 | 132 | |
michael@0 | 133 | mBufferLen += len; |
michael@0 | 134 | aCount -= len; |
michael@0 | 135 | |
michael@0 | 136 | if (aCount) { |
michael@0 | 137 | // |
michael@0 | 138 | // Adjust the source offset... The call to FireListenerNotifications(...) |
michael@0 | 139 | // will make the first OnDataAvailable(...) call with an offset of 0. |
michael@0 | 140 | // So, this offset needs to be adjusted to reflect that... |
michael@0 | 141 | // |
michael@0 | 142 | aSourceOffset += mBufferLen; |
michael@0 | 143 | |
michael@0 | 144 | DetermineContentType(request); |
michael@0 | 145 | |
michael@0 | 146 | rv = FireListenerNotifications(request, aCtxt); |
michael@0 | 147 | } |
michael@0 | 148 | } |
michael@0 | 149 | |
michael@0 | 150 | // Must not fire ODA again if it failed once |
michael@0 | 151 | if (aCount && NS_SUCCEEDED(rv)) { |
michael@0 | 152 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 153 | "Content type should be known by now."); |
michael@0 | 154 | |
michael@0 | 155 | rv = mNextListener->OnDataAvailable(request, aCtxt, aStream, |
michael@0 | 156 | aSourceOffset, aCount); |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | return rv; |
michael@0 | 160 | } |
michael@0 | 161 | |
michael@0 | 162 | // ---- |
michael@0 | 163 | // |
michael@0 | 164 | // nsIRequestObserver methods... |
michael@0 | 165 | // |
michael@0 | 166 | // ---- |
michael@0 | 167 | |
michael@0 | 168 | NS_IMETHODIMP |
michael@0 | 169 | nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt) |
michael@0 | 170 | { |
michael@0 | 171 | nsresult rv = NS_OK; |
michael@0 | 172 | |
michael@0 | 173 | if (!mNextListener) return NS_ERROR_FAILURE; |
michael@0 | 174 | |
michael@0 | 175 | // Allocate the sniffer buffer... |
michael@0 | 176 | if (NS_SUCCEEDED(rv) && !mBuffer) { |
michael@0 | 177 | mBuffer = new char[MAX_BUFFER_SIZE]; |
michael@0 | 178 | |
michael@0 | 179 | if (!mBuffer) { |
michael@0 | 180 | rv = NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 181 | } |
michael@0 | 182 | } |
michael@0 | 183 | |
michael@0 | 184 | // Do not pass the OnStartRequest on to the next listener (yet)... |
michael@0 | 185 | return rv; |
michael@0 | 186 | } |
michael@0 | 187 | |
michael@0 | 188 | NS_IMETHODIMP |
michael@0 | 189 | nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt, |
michael@0 | 190 | nsresult aStatus) |
michael@0 | 191 | { |
michael@0 | 192 | nsresult rv = NS_OK; |
michael@0 | 193 | |
michael@0 | 194 | if (!mNextListener) return NS_ERROR_FAILURE; |
michael@0 | 195 | |
michael@0 | 196 | // |
michael@0 | 197 | // The total amount of data is less than the size of the sniffer buffer. |
michael@0 | 198 | // Analyze the buffer now... |
michael@0 | 199 | // |
michael@0 | 200 | if (mContentType.IsEmpty()) { |
michael@0 | 201 | DetermineContentType(request); |
michael@0 | 202 | |
michael@0 | 203 | rv = FireListenerNotifications(request, aCtxt); |
michael@0 | 204 | |
michael@0 | 205 | if (NS_FAILED(rv)) { |
michael@0 | 206 | aStatus = rv; |
michael@0 | 207 | } |
michael@0 | 208 | } |
michael@0 | 209 | |
michael@0 | 210 | rv = mNextListener->OnStopRequest(request, aCtxt, aStatus); |
michael@0 | 211 | mNextListener = 0; |
michael@0 | 212 | |
michael@0 | 213 | return rv; |
michael@0 | 214 | } |
michael@0 | 215 | |
michael@0 | 216 | // ---- |
michael@0 | 217 | // |
michael@0 | 218 | // nsIContentSniffer methods... |
michael@0 | 219 | // |
michael@0 | 220 | // ---- |
michael@0 | 221 | NS_IMETHODIMP |
michael@0 | 222 | nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, |
michael@0 | 223 | const uint8_t* aData, |
michael@0 | 224 | uint32_t aLength, |
michael@0 | 225 | nsACString& type) |
michael@0 | 226 | { |
michael@0 | 227 | mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData)); |
michael@0 | 228 | mBufferLen = aLength; |
michael@0 | 229 | DetermineContentType(aRequest); |
michael@0 | 230 | mBuffer = nullptr; |
michael@0 | 231 | mBufferLen = 0; |
michael@0 | 232 | type.Assign(mContentType); |
michael@0 | 233 | mContentType.Truncate(); |
michael@0 | 234 | return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; |
michael@0 | 235 | } |
michael@0 | 236 | |
michael@0 | 237 | |
michael@0 | 238 | // Actual sniffing code |
michael@0 | 239 | |
michael@0 | 240 | bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest) |
michael@0 | 241 | { |
michael@0 | 242 | if (!mRequireHTMLsuffix) { |
michael@0 | 243 | return true; |
michael@0 | 244 | } |
michael@0 | 245 | |
michael@0 | 246 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); |
michael@0 | 247 | if (!channel) { |
michael@0 | 248 | NS_ERROR("QI failed"); |
michael@0 | 249 | return false; |
michael@0 | 250 | } |
michael@0 | 251 | |
michael@0 | 252 | nsCOMPtr<nsIURI> uri; |
michael@0 | 253 | if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) { |
michael@0 | 254 | return false; |
michael@0 | 255 | } |
michael@0 | 256 | |
michael@0 | 257 | bool isLocalFile = false; |
michael@0 | 258 | if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) { |
michael@0 | 259 | return false; |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | return true; |
michael@0 | 263 | } |
michael@0 | 264 | |
michael@0 | 265 | /** |
michael@0 | 266 | * This is the array of sniffer entries that depend on "magic numbers" |
michael@0 | 267 | * in the file. Each entry has either a type associated with it (set |
michael@0 | 268 | * these with the SNIFFER_ENTRY macro) or a function to be executed |
michael@0 | 269 | * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function |
michael@0 | 270 | * should take a single nsIRequest* and returns bool -- true if |
michael@0 | 271 | * it sets mContentType, false otherwise |
michael@0 | 272 | */ |
michael@0 | 273 | nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { |
michael@0 | 274 | SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), |
michael@0 | 275 | |
michael@0 | 276 | SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), |
michael@0 | 277 | |
michael@0 | 278 | // Files that start with mailbox delimiters let's provisionally call |
michael@0 | 279 | // text/plain |
michael@0 | 280 | SNIFFER_ENTRY("From", TEXT_PLAIN), |
michael@0 | 281 | SNIFFER_ENTRY(">From", TEXT_PLAIN), |
michael@0 | 282 | |
michael@0 | 283 | // If the buffer begins with "#!" or "%!" then it is a script of |
michael@0 | 284 | // some sort... "Scripts" can include arbitrary data to be passed |
michael@0 | 285 | // to an interpreter, so we need to decide whether we can call this |
michael@0 | 286 | // text or whether it's data. |
michael@0 | 287 | SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), |
michael@0 | 288 | |
michael@0 | 289 | // XXXbz should (and can) we also include the various ways that <?xml can |
michael@0 | 290 | // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing |
michael@0 | 291 | SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML) |
michael@0 | 292 | }; |
michael@0 | 293 | |
michael@0 | 294 | uint32_t nsUnknownDecoder::sSnifferEntryNum = |
michael@0 | 295 | sizeof(nsUnknownDecoder::sSnifferEntries) / |
michael@0 | 296 | sizeof(nsUnknownDecoder::nsSnifferEntry); |
michael@0 | 297 | |
michael@0 | 298 | void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) |
michael@0 | 299 | { |
michael@0 | 300 | NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known."); |
michael@0 | 301 | if (!mContentType.IsEmpty()) return; |
michael@0 | 302 | |
michael@0 | 303 | // First, run through all the types we can detect reliably based on |
michael@0 | 304 | // magic numbers |
michael@0 | 305 | uint32_t i; |
michael@0 | 306 | for (i = 0; i < sSnifferEntryNum; ++i) { |
michael@0 | 307 | if (mBufferLen >= sSnifferEntries[i].mByteLen && // enough data |
michael@0 | 308 | memcmp(mBuffer, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) { // and type matches |
michael@0 | 309 | NS_ASSERTION(sSnifferEntries[i].mMimeType || |
michael@0 | 310 | sSnifferEntries[i].mContentTypeSniffer, |
michael@0 | 311 | "Must have either a type string or a function to set the type"); |
michael@0 | 312 | NS_ASSERTION(!sSnifferEntries[i].mMimeType || |
michael@0 | 313 | !sSnifferEntries[i].mContentTypeSniffer, |
michael@0 | 314 | "Both a type string and a type sniffing function set;" |
michael@0 | 315 | " using type string"); |
michael@0 | 316 | if (sSnifferEntries[i].mMimeType) { |
michael@0 | 317 | mContentType = sSnifferEntries[i].mMimeType; |
michael@0 | 318 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 319 | "Content type should be known by now."); |
michael@0 | 320 | return; |
michael@0 | 321 | } |
michael@0 | 322 | if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { |
michael@0 | 323 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 324 | "Content type should be known by now."); |
michael@0 | 325 | return; |
michael@0 | 326 | } |
michael@0 | 327 | } |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, |
michael@0 | 331 | (const uint8_t*)mBuffer, mBufferLen, mContentType); |
michael@0 | 332 | if (!mContentType.IsEmpty()) { |
michael@0 | 333 | return; |
michael@0 | 334 | } |
michael@0 | 335 | |
michael@0 | 336 | if (SniffForHTML(aRequest)) { |
michael@0 | 337 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 338 | "Content type should be known by now."); |
michael@0 | 339 | return; |
michael@0 | 340 | } |
michael@0 | 341 | |
michael@0 | 342 | // We don't know what this is yet. Before we just give up, try |
michael@0 | 343 | // the URI from the request. |
michael@0 | 344 | if (SniffURI(aRequest)) { |
michael@0 | 345 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 346 | "Content type should be known by now."); |
michael@0 | 347 | return; |
michael@0 | 348 | } |
michael@0 | 349 | |
michael@0 | 350 | LastDitchSniff(aRequest); |
michael@0 | 351 | NS_ASSERTION(!mContentType.IsEmpty(), |
michael@0 | 352 | "Content type should be known by now."); |
michael@0 | 353 | } |
michael@0 | 354 | |
michael@0 | 355 | bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) |
michael@0 | 356 | { |
michael@0 | 357 | /* |
michael@0 | 358 | * To prevent a possible attack, we will not consider this to be |
michael@0 | 359 | * html content if it comes from the local file system and our prefs |
michael@0 | 360 | * are set right |
michael@0 | 361 | */ |
michael@0 | 362 | if (!AllowSniffing(aRequest)) { |
michael@0 | 363 | return false; |
michael@0 | 364 | } |
michael@0 | 365 | |
michael@0 | 366 | // Now look for HTML. |
michael@0 | 367 | const char* str = mBuffer; |
michael@0 | 368 | const char* end = mBuffer + mBufferLen; |
michael@0 | 369 | |
michael@0 | 370 | // skip leading whitespace |
michael@0 | 371 | while (str != end && nsCRT::IsAsciiSpace(*str)) { |
michael@0 | 372 | ++str; |
michael@0 | 373 | } |
michael@0 | 374 | |
michael@0 | 375 | // did we find something like a start tag? |
michael@0 | 376 | if (str == end || *str != '<' || ++str == end) { |
michael@0 | 377 | return false; |
michael@0 | 378 | } |
michael@0 | 379 | |
michael@0 | 380 | // If we seem to be SGML or XML and we got down here, just pretend we're HTML |
michael@0 | 381 | if (*str == '!' || *str == '?') { |
michael@0 | 382 | mContentType = TEXT_HTML; |
michael@0 | 383 | return true; |
michael@0 | 384 | } |
michael@0 | 385 | |
michael@0 | 386 | uint32_t bufSize = end - str; |
michael@0 | 387 | // We use sizeof(_tagstr) below because that's the length of _tagstr |
michael@0 | 388 | // with the one char " " or ">" appended. |
michael@0 | 389 | #define MATCHES_TAG(_tagstr) \ |
michael@0 | 390 | (bufSize >= sizeof(_tagstr) && \ |
michael@0 | 391 | (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \ |
michael@0 | 392 | PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0)) |
michael@0 | 393 | |
michael@0 | 394 | if (MATCHES_TAG("html") || |
michael@0 | 395 | MATCHES_TAG("frameset") || |
michael@0 | 396 | MATCHES_TAG("body") || |
michael@0 | 397 | MATCHES_TAG("head") || |
michael@0 | 398 | MATCHES_TAG("script") || |
michael@0 | 399 | MATCHES_TAG("iframe") || |
michael@0 | 400 | MATCHES_TAG("a") || |
michael@0 | 401 | MATCHES_TAG("img") || |
michael@0 | 402 | MATCHES_TAG("table") || |
michael@0 | 403 | MATCHES_TAG("title") || |
michael@0 | 404 | MATCHES_TAG("link") || |
michael@0 | 405 | MATCHES_TAG("base") || |
michael@0 | 406 | MATCHES_TAG("style") || |
michael@0 | 407 | MATCHES_TAG("div") || |
michael@0 | 408 | MATCHES_TAG("p") || |
michael@0 | 409 | MATCHES_TAG("font") || |
michael@0 | 410 | MATCHES_TAG("applet") || |
michael@0 | 411 | MATCHES_TAG("meta") || |
michael@0 | 412 | MATCHES_TAG("center") || |
michael@0 | 413 | MATCHES_TAG("form") || |
michael@0 | 414 | MATCHES_TAG("isindex") || |
michael@0 | 415 | MATCHES_TAG("h1") || |
michael@0 | 416 | MATCHES_TAG("h2") || |
michael@0 | 417 | MATCHES_TAG("h3") || |
michael@0 | 418 | MATCHES_TAG("h4") || |
michael@0 | 419 | MATCHES_TAG("h5") || |
michael@0 | 420 | MATCHES_TAG("h6") || |
michael@0 | 421 | MATCHES_TAG("b") || |
michael@0 | 422 | MATCHES_TAG("pre")) { |
michael@0 | 423 | |
michael@0 | 424 | mContentType = TEXT_HTML; |
michael@0 | 425 | return true; |
michael@0 | 426 | } |
michael@0 | 427 | |
michael@0 | 428 | #undef MATCHES_TAG |
michael@0 | 429 | |
michael@0 | 430 | return false; |
michael@0 | 431 | } |
michael@0 | 432 | |
michael@0 | 433 | bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) |
michael@0 | 434 | { |
michael@0 | 435 | // Just like HTML, this should be able to be shut off. |
michael@0 | 436 | if (!AllowSniffing(aRequest)) { |
michael@0 | 437 | return false; |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | // First see whether we can glean anything from the uri... |
michael@0 | 441 | if (!SniffURI(aRequest)) { |
michael@0 | 442 | // Oh well; just generic XML will have to do |
michael@0 | 443 | mContentType = TEXT_XML; |
michael@0 | 444 | } |
michael@0 | 445 | |
michael@0 | 446 | return true; |
michael@0 | 447 | } |
michael@0 | 448 | |
michael@0 | 449 | bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) |
michael@0 | 450 | { |
michael@0 | 451 | nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1")); |
michael@0 | 452 | if (mimeService) { |
michael@0 | 453 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); |
michael@0 | 454 | if (channel) { |
michael@0 | 455 | nsCOMPtr<nsIURI> uri; |
michael@0 | 456 | nsresult result = channel->GetURI(getter_AddRefs(uri)); |
michael@0 | 457 | if (NS_SUCCEEDED(result) && uri) { |
michael@0 | 458 | nsAutoCString type; |
michael@0 | 459 | result = mimeService->GetTypeFromURI(uri, type); |
michael@0 | 460 | if (NS_SUCCEEDED(result)) { |
michael@0 | 461 | mContentType = type; |
michael@0 | 462 | return true; |
michael@0 | 463 | } |
michael@0 | 464 | } |
michael@0 | 465 | } |
michael@0 | 466 | } |
michael@0 | 467 | |
michael@0 | 468 | return false; |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | // This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 |
michael@0 | 472 | // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by |
michael@0 | 473 | // encodings like Shift_JIS) as non-text |
michael@0 | 474 | #define IS_TEXT_CHAR(ch) \ |
michael@0 | 475 | (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) |
michael@0 | 476 | |
michael@0 | 477 | bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) |
michael@0 | 478 | { |
michael@0 | 479 | // All we can do now is try to guess whether this is text/plain or |
michael@0 | 480 | // application/octet-stream |
michael@0 | 481 | |
michael@0 | 482 | // First, check for a BOM. If we see one, assume this is text/plain |
michael@0 | 483 | // in whatever encoding. If there is a BOM _and_ text we will |
michael@0 | 484 | // always have at least 4 bytes in the buffer (since the 2-byte BOMs |
michael@0 | 485 | // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). |
michael@0 | 486 | if (mBufferLen >= 4) { |
michael@0 | 487 | const unsigned char* buf = (const unsigned char*)mBuffer; |
michael@0 | 488 | if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian |
michael@0 | 489 | (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian |
michael@0 | 490 | (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 |
michael@0 | 491 | (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian |
michael@0 | 492 | |
michael@0 | 493 | mContentType = TEXT_PLAIN; |
michael@0 | 494 | return true; |
michael@0 | 495 | } |
michael@0 | 496 | } |
michael@0 | 497 | |
michael@0 | 498 | // Now see whether the buffer has any non-text chars. If not, then let's |
michael@0 | 499 | // just call it text/plain... |
michael@0 | 500 | // |
michael@0 | 501 | uint32_t i; |
michael@0 | 502 | for (i = 0; i < mBufferLen && IS_TEXT_CHAR(mBuffer[i]); i++) { |
michael@0 | 503 | continue; |
michael@0 | 504 | } |
michael@0 | 505 | |
michael@0 | 506 | if (i == mBufferLen) { |
michael@0 | 507 | mContentType = TEXT_PLAIN; |
michael@0 | 508 | } |
michael@0 | 509 | else { |
michael@0 | 510 | mContentType = APPLICATION_OCTET_STREAM; |
michael@0 | 511 | } |
michael@0 | 512 | |
michael@0 | 513 | return true; |
michael@0 | 514 | } |
michael@0 | 515 | |
michael@0 | 516 | |
michael@0 | 517 | nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, |
michael@0 | 518 | nsISupports *aCtxt) |
michael@0 | 519 | { |
michael@0 | 520 | nsresult rv = NS_OK; |
michael@0 | 521 | |
michael@0 | 522 | if (!mNextListener) return NS_ERROR_FAILURE; |
michael@0 | 523 | |
michael@0 | 524 | if (!mContentType.IsEmpty()) { |
michael@0 | 525 | nsCOMPtr<nsIViewSourceChannel> viewSourceChannel = |
michael@0 | 526 | do_QueryInterface(request); |
michael@0 | 527 | if (viewSourceChannel) { |
michael@0 | 528 | rv = viewSourceChannel->SetOriginalContentType(mContentType); |
michael@0 | 529 | } else { |
michael@0 | 530 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv); |
michael@0 | 531 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 532 | // Set the new content type on the channel... |
michael@0 | 533 | rv = channel->SetContentType(mContentType); |
michael@0 | 534 | } |
michael@0 | 535 | } |
michael@0 | 536 | |
michael@0 | 537 | NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); |
michael@0 | 538 | |
michael@0 | 539 | if (NS_FAILED(rv)) { |
michael@0 | 540 | // Cancel the request to make sure it has the correct status if |
michael@0 | 541 | // mNextListener looks at it. |
michael@0 | 542 | request->Cancel(rv); |
michael@0 | 543 | mNextListener->OnStartRequest(request, aCtxt); |
michael@0 | 544 | return rv; |
michael@0 | 545 | } |
michael@0 | 546 | } |
michael@0 | 547 | |
michael@0 | 548 | // Fire the OnStartRequest(...) |
michael@0 | 549 | rv = mNextListener->OnStartRequest(request, aCtxt); |
michael@0 | 550 | |
michael@0 | 551 | if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 552 | |
michael@0 | 553 | // If the request was canceled, then we need to treat that equivalently |
michael@0 | 554 | // to an error returned by OnStartRequest. |
michael@0 | 555 | if (NS_SUCCEEDED(rv)) |
michael@0 | 556 | request->GetStatus(&rv); |
michael@0 | 557 | |
michael@0 | 558 | // Fire the first OnDataAvailable for the data that was read from the |
michael@0 | 559 | // stream into the sniffer buffer... |
michael@0 | 560 | if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { |
michael@0 | 561 | uint32_t len = 0; |
michael@0 | 562 | nsCOMPtr<nsIInputStream> in; |
michael@0 | 563 | nsCOMPtr<nsIOutputStream> out; |
michael@0 | 564 | |
michael@0 | 565 | // Create a pipe and fill it with the data from the sniffer buffer. |
michael@0 | 566 | rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), |
michael@0 | 567 | MAX_BUFFER_SIZE, MAX_BUFFER_SIZE); |
michael@0 | 568 | |
michael@0 | 569 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 570 | rv = out->Write(mBuffer, mBufferLen, &len); |
michael@0 | 571 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 572 | if (len == mBufferLen) { |
michael@0 | 573 | rv = mNextListener->OnDataAvailable(request, aCtxt, in, 0, len); |
michael@0 | 574 | } else { |
michael@0 | 575 | NS_ERROR("Unable to write all the data into the pipe."); |
michael@0 | 576 | rv = NS_ERROR_FAILURE; |
michael@0 | 577 | } |
michael@0 | 578 | } |
michael@0 | 579 | } |
michael@0 | 580 | } |
michael@0 | 581 | |
michael@0 | 582 | delete [] mBuffer; |
michael@0 | 583 | mBuffer = nullptr; |
michael@0 | 584 | mBufferLen = 0; |
michael@0 | 585 | |
michael@0 | 586 | return rv; |
michael@0 | 587 | } |
michael@0 | 588 | |
michael@0 | 589 | void |
michael@0 | 590 | nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) |
michael@0 | 591 | { |
michael@0 | 592 | nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest); |
michael@0 | 593 | if (!httpChannel) { |
michael@0 | 594 | return; |
michael@0 | 595 | } |
michael@0 | 596 | |
michael@0 | 597 | // It's an HTTP channel. Check for the text/plain mess |
michael@0 | 598 | nsAutoCString contentTypeHdr; |
michael@0 | 599 | httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"), |
michael@0 | 600 | contentTypeHdr); |
michael@0 | 601 | nsAutoCString contentType; |
michael@0 | 602 | httpChannel->GetContentType(contentType); |
michael@0 | 603 | |
michael@0 | 604 | // Make sure to do a case-sensitive exact match comparison here. Apache |
michael@0 | 605 | // 1.x just sends text/plain for "unknown", while Apache 2.x sends |
michael@0 | 606 | // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to |
michael@0 | 607 | // be different, sends text/plain with iso-8859-1 charset. For extra fun, |
michael@0 | 608 | // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general |
michael@0 | 609 | // case-insensitive comparison, since we really want to apply this crap as |
michael@0 | 610 | // rarely as we can. |
michael@0 | 611 | if (!contentType.EqualsLiteral("text/plain") || |
michael@0 | 612 | (!contentTypeHdr.EqualsLiteral("text/plain") && |
michael@0 | 613 | !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && |
michael@0 | 614 | !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && |
michael@0 | 615 | !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { |
michael@0 | 616 | return; |
michael@0 | 617 | } |
michael@0 | 618 | |
michael@0 | 619 | // Check whether we have content-encoding. If we do, don't try to |
michael@0 | 620 | // detect the type. |
michael@0 | 621 | // XXXbz we could improve this by doing a local decompress if we |
michael@0 | 622 | // wanted, I'm sure. |
michael@0 | 623 | nsAutoCString contentEncoding; |
michael@0 | 624 | httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), |
michael@0 | 625 | contentEncoding); |
michael@0 | 626 | if (!contentEncoding.IsEmpty()) { |
michael@0 | 627 | return; |
michael@0 | 628 | } |
michael@0 | 629 | |
michael@0 | 630 | LastDitchSniff(aRequest); |
michael@0 | 631 | if (mContentType.Equals(APPLICATION_OCTET_STREAM)) { |
michael@0 | 632 | // We want to guess at it instead |
michael@0 | 633 | mContentType = APPLICATION_GUESS_FROM_EXT; |
michael@0 | 634 | } else { |
michael@0 | 635 | // Let the text/plain type we already have be, so that other content |
michael@0 | 636 | // sniffers can also get a shot at this data. |
michael@0 | 637 | mContentType.Truncate(); |
michael@0 | 638 | } |
michael@0 | 639 | } |