diff -r 000000000000 -r 6474c204b198 netwerk/streamconv/converters/nsUnknownDecoder.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,639 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnknownDecoder.h" +#include "nsIPipe.h" +#include "nsIInputStream.h" +#include "nsIOutputStream.h" +#include "nsMimeTypes.h" +#include "nsIPrefService.h" +#include "nsIPrefBranch.h" + +#include "nsCRT.h" + +#include "nsIMIMEService.h" + +#include "nsIViewSourceChannel.h" +#include "nsIHttpChannel.h" +#include "nsNetCID.h" +#include "nsNetUtil.h" + + +#define MAX_BUFFER_SIZE 512 + +nsUnknownDecoder::nsUnknownDecoder() + : mBuffer(nullptr) + , mBufferLen(0) + , mRequireHTMLsuffix(false) +{ + nsCOMPtr prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); + if (prefs) { + bool val; + if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val))) + mRequireHTMLsuffix = val; + } +} + +nsUnknownDecoder::~nsUnknownDecoder() +{ + if (mBuffer) { + delete [] mBuffer; + mBuffer = nullptr; + } +} + +// ---- +// +// nsISupports implementation... +// +// ---- + +NS_IMPL_ADDREF(nsUnknownDecoder) +NS_IMPL_RELEASE(nsUnknownDecoder) + +NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) + NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) + NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener) +NS_INTERFACE_MAP_END + + +// ---- +// +// nsIStreamConverter methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::Convert(nsIInputStream *aFromStream, + const char *aFromType, + const char *aToType, + nsISupports *aCtxt, + nsIInputStream **aResultStream) +{ + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsUnknownDecoder::AsyncConvertData(const char *aFromType, + const char *aToType, + nsIStreamListener *aListener, + nsISupports *aCtxt) +{ + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into multi mixed converter"); + // hook up our final listener. this guy gets the various On*() calls we want to throw + // at him. + // + mNextListener = aListener; + return (aListener) ? NS_OK : NS_ERROR_FAILURE; +} + +// ---- +// +// nsIStreamListener methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnDataAvailable(nsIRequest* request, + nsISupports *aCtxt, + nsIInputStream *aStream, + uint64_t aSourceOffset, + uint32_t aCount) +{ + nsresult rv = NS_OK; + + if (!mNextListener) return NS_ERROR_FAILURE; + + if (mContentType.IsEmpty()) { + uint32_t count, len; + + // If the buffer has not been allocated by now, just fail... + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // + // Determine how much of the stream should be read to fill up the + // sniffer buffer... + // + if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { + count = MAX_BUFFER_SIZE-mBufferLen; + } else { + count = aCount; + } + + // Read the data into the buffer... + rv = aStream->Read((mBuffer+mBufferLen), count, &len); + if (NS_FAILED(rv)) return rv; + + mBufferLen += len; + aCount -= len; + + if (aCount) { + // + // Adjust the source offset... The call to FireListenerNotifications(...) + // will make the first OnDataAvailable(...) call with an offset of 0. + // So, this offset needs to be adjusted to reflect that... + // + aSourceOffset += mBufferLen; + + DetermineContentType(request); + + rv = FireListenerNotifications(request, aCtxt); + } + } + + // Must not fire ODA again if it failed once + if (aCount && NS_SUCCEEDED(rv)) { + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + + rv = mNextListener->OnDataAvailable(request, aCtxt, aStream, + aSourceOffset, aCount); + } + + return rv; +} + +// ---- +// +// nsIRequestObserver methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt) +{ + nsresult rv = NS_OK; + + if (!mNextListener) return NS_ERROR_FAILURE; + + // Allocate the sniffer buffer... + if (NS_SUCCEEDED(rv) && !mBuffer) { + mBuffer = new char[MAX_BUFFER_SIZE]; + + if (!mBuffer) { + rv = NS_ERROR_OUT_OF_MEMORY; + } + } + + // Do not pass the OnStartRequest on to the next listener (yet)... + return rv; +} + +NS_IMETHODIMP +nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt, + nsresult aStatus) +{ + nsresult rv = NS_OK; + + if (!mNextListener) return NS_ERROR_FAILURE; + + // + // The total amount of data is less than the size of the sniffer buffer. + // Analyze the buffer now... + // + if (mContentType.IsEmpty()) { + DetermineContentType(request); + + rv = FireListenerNotifications(request, aCtxt); + + if (NS_FAILED(rv)) { + aStatus = rv; + } + } + + rv = mNextListener->OnStopRequest(request, aCtxt, aStatus); + mNextListener = 0; + + return rv; +} + +// ---- +// +// nsIContentSniffer methods... +// +// ---- +NS_IMETHODIMP +nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, + const uint8_t* aData, + uint32_t aLength, + nsACString& type) +{ + mBuffer = const_cast(reinterpret_cast(aData)); + mBufferLen = aLength; + DetermineContentType(aRequest); + mBuffer = nullptr; + mBufferLen = 0; + type.Assign(mContentType); + mContentType.Truncate(); + return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; +} + + +// Actual sniffing code + +bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest) +{ + if (!mRequireHTMLsuffix) { + return true; + } + + nsCOMPtr channel = do_QueryInterface(aRequest); + if (!channel) { + NS_ERROR("QI failed"); + return false; + } + + nsCOMPtr uri; + if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) { + return false; + } + + bool isLocalFile = false; + if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) { + return false; + } + + return true; +} + +/** + * This is the array of sniffer entries that depend on "magic numbers" + * in the file. Each entry has either a type associated with it (set + * these with the SNIFFER_ENTRY macro) or a function to be executed + * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function + * should take a single nsIRequest* and returns bool -- true if + * it sets mContentType, false otherwise + */ +nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { + SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), + + SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), + + // Files that start with mailbox delimiters let's provisionally call + // text/plain + SNIFFER_ENTRY("From", TEXT_PLAIN), + SNIFFER_ENTRY(">From", TEXT_PLAIN), + + // If the buffer begins with "#!" or "%!" then it is a script of + // some sort... "Scripts" can include arbitrary data to be passed + // to an interpreter, so we need to decide whether we can call this + // text or whether it's data. + SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), + + // XXXbz should (and can) we also include the various ways that = sSnifferEntries[i].mByteLen && // enough data + memcmp(mBuffer, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) { // and type matches + NS_ASSERTION(sSnifferEntries[i].mMimeType || + sSnifferEntries[i].mContentTypeSniffer, + "Must have either a type string or a function to set the type"); + NS_ASSERTION(!sSnifferEntries[i].mMimeType || + !sSnifferEntries[i].mContentTypeSniffer, + "Both a type string and a type sniffing function set;" + " using type string"); + if (sSnifferEntries[i].mMimeType) { + mContentType = sSnifferEntries[i].mMimeType; + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + } + } + + NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, + (const uint8_t*)mBuffer, mBufferLen, mContentType); + if (!mContentType.IsEmpty()) { + return; + } + + if (SniffForHTML(aRequest)) { + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + + // We don't know what this is yet. Before we just give up, try + // the URI from the request. + if (SniffURI(aRequest)) { + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + + LastDitchSniff(aRequest); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +} + +bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) +{ + /* + * To prevent a possible attack, we will not consider this to be + * html content if it comes from the local file system and our prefs + * are set right + */ + if (!AllowSniffing(aRequest)) { + return false; + } + + // Now look for HTML. + const char* str = mBuffer; + const char* end = mBuffer + mBufferLen; + + // skip leading whitespace + while (str != end && nsCRT::IsAsciiSpace(*str)) { + ++str; + } + + // did we find something like a start tag? + if (str == end || *str != '<' || ++str == end) { + return false; + } + + // If we seem to be SGML or XML and we got down here, just pretend we're HTML + if (*str == '!' || *str == '?') { + mContentType = TEXT_HTML; + return true; + } + + uint32_t bufSize = end - str; + // We use sizeof(_tagstr) below because that's the length of _tagstr + // with the one char " " or ">" appended. +#define MATCHES_TAG(_tagstr) \ + (bufSize >= sizeof(_tagstr) && \ + (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \ + PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0)) + + if (MATCHES_TAG("html") || + MATCHES_TAG("frameset") || + MATCHES_TAG("body") || + MATCHES_TAG("head") || + MATCHES_TAG("script") || + MATCHES_TAG("iframe") || + MATCHES_TAG("a") || + MATCHES_TAG("img") || + MATCHES_TAG("table") || + MATCHES_TAG("title") || + MATCHES_TAG("link") || + MATCHES_TAG("base") || + MATCHES_TAG("style") || + MATCHES_TAG("div") || + MATCHES_TAG("p") || + MATCHES_TAG("font") || + MATCHES_TAG("applet") || + MATCHES_TAG("meta") || + MATCHES_TAG("center") || + MATCHES_TAG("form") || + MATCHES_TAG("isindex") || + MATCHES_TAG("h1") || + MATCHES_TAG("h2") || + MATCHES_TAG("h3") || + MATCHES_TAG("h4") || + MATCHES_TAG("h5") || + MATCHES_TAG("h6") || + MATCHES_TAG("b") || + MATCHES_TAG("pre")) { + + mContentType = TEXT_HTML; + return true; + } + +#undef MATCHES_TAG + + return false; +} + +bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) +{ + // Just like HTML, this should be able to be shut off. + if (!AllowSniffing(aRequest)) { + return false; + } + + // First see whether we can glean anything from the uri... + if (!SniffURI(aRequest)) { + // Oh well; just generic XML will have to do + mContentType = TEXT_XML; + } + + return true; +} + +bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) +{ + nsCOMPtr mimeService(do_GetService("@mozilla.org/mime;1")); + if (mimeService) { + nsCOMPtr channel = do_QueryInterface(aRequest); + if (channel) { + nsCOMPtr uri; + nsresult result = channel->GetURI(getter_AddRefs(uri)); + if (NS_SUCCEEDED(result) && uri) { + nsAutoCString type; + result = mimeService->GetTypeFromURI(uri, type); + if (NS_SUCCEEDED(result)) { + mContentType = type; + return true; + } + } + } + } + + return false; +} + +// This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 +// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by +// encodings like Shift_JIS) as non-text +#define IS_TEXT_CHAR(ch) \ + (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) + +bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) +{ + // All we can do now is try to guess whether this is text/plain or + // application/octet-stream + + // First, check for a BOM. If we see one, assume this is text/plain + // in whatever encoding. If there is a BOM _and_ text we will + // always have at least 4 bytes in the buffer (since the 2-byte BOMs + // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). + if (mBufferLen >= 4) { + const unsigned char* buf = (const unsigned char*)mBuffer; + if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian + (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian + (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 + (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian + + mContentType = TEXT_PLAIN; + return true; + } + } + + // Now see whether the buffer has any non-text chars. If not, then let's + // just call it text/plain... + // + uint32_t i; + for (i = 0; i < mBufferLen && IS_TEXT_CHAR(mBuffer[i]); i++) { + continue; + } + + if (i == mBufferLen) { + mContentType = TEXT_PLAIN; + } + else { + mContentType = APPLICATION_OCTET_STREAM; + } + + return true; +} + + +nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, + nsISupports *aCtxt) +{ + nsresult rv = NS_OK; + + if (!mNextListener) return NS_ERROR_FAILURE; + + if (!mContentType.IsEmpty()) { + nsCOMPtr viewSourceChannel = + do_QueryInterface(request); + if (viewSourceChannel) { + rv = viewSourceChannel->SetOriginalContentType(mContentType); + } else { + nsCOMPtr channel = do_QueryInterface(request, &rv); + if (NS_SUCCEEDED(rv)) { + // Set the new content type on the channel... + rv = channel->SetContentType(mContentType); + } + } + + NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); + + if (NS_FAILED(rv)) { + // Cancel the request to make sure it has the correct status if + // mNextListener looks at it. + request->Cancel(rv); + mNextListener->OnStartRequest(request, aCtxt); + return rv; + } + } + + // Fire the OnStartRequest(...) + rv = mNextListener->OnStartRequest(request, aCtxt); + + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // If the request was canceled, then we need to treat that equivalently + // to an error returned by OnStartRequest. + if (NS_SUCCEEDED(rv)) + request->GetStatus(&rv); + + // Fire the first OnDataAvailable for the data that was read from the + // stream into the sniffer buffer... + if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { + uint32_t len = 0; + nsCOMPtr in; + nsCOMPtr out; + + // Create a pipe and fill it with the data from the sniffer buffer. + rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), + MAX_BUFFER_SIZE, MAX_BUFFER_SIZE); + + if (NS_SUCCEEDED(rv)) { + rv = out->Write(mBuffer, mBufferLen, &len); + if (NS_SUCCEEDED(rv)) { + if (len == mBufferLen) { + rv = mNextListener->OnDataAvailable(request, aCtxt, in, 0, len); + } else { + NS_ERROR("Unable to write all the data into the pipe."); + rv = NS_ERROR_FAILURE; + } + } + } + } + + delete [] mBuffer; + mBuffer = nullptr; + mBufferLen = 0; + + return rv; +} + +void +nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) +{ + nsCOMPtr httpChannel = do_QueryInterface(aRequest); + if (!httpChannel) { + return; + } + + // It's an HTTP channel. Check for the text/plain mess + nsAutoCString contentTypeHdr; + httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"), + contentTypeHdr); + nsAutoCString contentType; + httpChannel->GetContentType(contentType); + + // Make sure to do a case-sensitive exact match comparison here. Apache + // 1.x just sends text/plain for "unknown", while Apache 2.x sends + // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to + // be different, sends text/plain with iso-8859-1 charset. For extra fun, + // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general + // case-insensitive comparison, since we really want to apply this crap as + // rarely as we can. + if (!contentType.EqualsLiteral("text/plain") || + (!contentTypeHdr.EqualsLiteral("text/plain") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { + return; + } + + // Check whether we have content-encoding. If we do, don't try to + // detect the type. + // XXXbz we could improve this by doing a local decompress if we + // wanted, I'm sure. + nsAutoCString contentEncoding; + httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), + contentEncoding); + if (!contentEncoding.IsEmpty()) { + return; + } + + LastDitchSniff(aRequest); + if (mContentType.Equals(APPLICATION_OCTET_STREAM)) { + // We want to guess at it instead + mContentType = APPLICATION_GUESS_FROM_EXT; + } else { + // Let the text/plain type we already have be, so that other content + // sniffers can also get a shot at this data. + mContentType.Truncate(); + } +}