michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/DebugOnly.h" michael@0: michael@0: #include "nsUnicharStreamLoader.h" michael@0: #include "nsIInputStream.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: #include michael@0: michael@0: // 1024 bytes is specified in michael@0: // http://www.whatwg.org/specs/web-apps/current-work/#charset for HTML; for michael@0: // other resource types (e.g. CSS) typically fewer bytes are fine too, since michael@0: // they only look at things right at the beginning of the data. michael@0: #define SNIFFING_BUFFER_SIZE 1024 michael@0: michael@0: using namespace mozilla; michael@0: michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::Init(nsIUnicharStreamLoaderObserver *aObserver) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(aObserver); michael@0: michael@0: mObserver = aObserver; michael@0: michael@0: if (!mRawData.SetCapacity(SNIFFING_BUFFER_SIZE, fallible_t())) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsUnicharStreamLoader::Create(nsISupports *aOuter, michael@0: REFNSIID aIID, michael@0: void **aResult) michael@0: { michael@0: if (aOuter) return NS_ERROR_NO_AGGREGATION; michael@0: michael@0: nsUnicharStreamLoader* it = new nsUnicharStreamLoader(); michael@0: NS_ADDREF(it); michael@0: nsresult rv = it->QueryInterface(aIID, aResult); michael@0: NS_RELEASE(it); michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(nsUnicharStreamLoader, nsIUnicharStreamLoader, michael@0: nsIRequestObserver, nsIStreamListener) michael@0: michael@0: /* readonly attribute nsIChannel channel; */ michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::GetChannel(nsIChannel **aChannel) michael@0: { michael@0: NS_IF_ADDREF(*aChannel = mChannel); michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* readonly attribute nsACString charset */ michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::GetCharset(nsACString& aCharset) michael@0: { michael@0: aCharset = mCharset; michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* nsIRequestObserver implementation */ michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::OnStartRequest(nsIRequest*, nsISupports*) michael@0: { michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::OnStopRequest(nsIRequest *aRequest, michael@0: nsISupports *aContext, michael@0: nsresult aStatus) michael@0: { michael@0: if (!mObserver) { michael@0: NS_ERROR("nsUnicharStreamLoader::OnStopRequest called before ::Init"); michael@0: return NS_ERROR_UNEXPECTED; michael@0: } michael@0: michael@0: mContext = aContext; michael@0: mChannel = do_QueryInterface(aRequest); michael@0: michael@0: nsresult rv = NS_OK; michael@0: if (mRawData.Length() > 0 && NS_SUCCEEDED(aStatus)) { michael@0: NS_ABORT_IF_FALSE(mBuffer.Length() == 0, michael@0: "should not have both decoded and raw data"); michael@0: rv = DetermineCharset(); michael@0: } michael@0: michael@0: if (NS_FAILED(rv)) { michael@0: // Call the observer but pass it no data. michael@0: mObserver->OnStreamComplete(this, mContext, rv, EmptyString()); michael@0: } else { michael@0: mObserver->OnStreamComplete(this, mContext, aStatus, mBuffer); michael@0: } michael@0: michael@0: mObserver = nullptr; michael@0: mDecoder = nullptr; michael@0: mContext = nullptr; michael@0: mChannel = nullptr; michael@0: mCharset.Truncate(); michael@0: mBuffer.Truncate(); michael@0: return rv; michael@0: } michael@0: michael@0: /* nsIStreamListener implementation */ michael@0: NS_IMETHODIMP michael@0: nsUnicharStreamLoader::OnDataAvailable(nsIRequest *aRequest, michael@0: nsISupports *aContext, michael@0: nsIInputStream *aInputStream, michael@0: uint64_t aSourceOffset, michael@0: uint32_t aCount) michael@0: { michael@0: if (!mObserver) { michael@0: NS_ERROR("nsUnicharStreamLoader::OnDataAvailable called before ::Init"); michael@0: return NS_ERROR_UNEXPECTED; michael@0: } michael@0: michael@0: mContext = aContext; michael@0: mChannel = do_QueryInterface(aRequest); michael@0: michael@0: nsresult rv = NS_OK; michael@0: if (mDecoder) { michael@0: // process everything we've got michael@0: uint32_t dummy; michael@0: aInputStream->ReadSegments(WriteSegmentFun, this, aCount, &dummy); michael@0: } else { michael@0: // no decoder yet. Read up to SNIFFING_BUFFER_SIZE octets into michael@0: // mRawData (this is the cutoff specified in michael@0: // draft-abarth-mime-sniff-06). If we can get that much, then go michael@0: // ahead and fire charset detection and read the rest. Otherwise michael@0: // wait for more data. michael@0: michael@0: uint32_t haveRead = mRawData.Length(); michael@0: uint32_t toRead = std::min(SNIFFING_BUFFER_SIZE - haveRead, aCount); michael@0: uint32_t n; michael@0: char *here = mRawData.BeginWriting() + haveRead; michael@0: michael@0: rv = aInputStream->Read(here, toRead, &n); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: mRawData.SetLength(haveRead + n); michael@0: if (mRawData.Length() == SNIFFING_BUFFER_SIZE) { michael@0: rv = DetermineCharset(); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: // process what's left michael@0: uint32_t dummy; michael@0: aInputStream->ReadSegments(WriteSegmentFun, this, aCount - n, &dummy); michael@0: } michael@0: } else { michael@0: NS_ABORT_IF_FALSE(n == aCount, "didn't read as much as was available"); michael@0: } michael@0: } michael@0: } michael@0: michael@0: mContext = nullptr; michael@0: mChannel = nullptr; michael@0: return rv; michael@0: } michael@0: michael@0: /* internal */ michael@0: static NS_DEFINE_CID(kCharsetConverterManagerCID, michael@0: NS_ICHARSETCONVERTERMANAGER_CID); michael@0: michael@0: nsresult michael@0: nsUnicharStreamLoader::DetermineCharset() michael@0: { michael@0: nsresult rv = mObserver->OnDetermineCharset(this, mContext, michael@0: mRawData, mCharset); michael@0: if (NS_FAILED(rv) || mCharset.IsEmpty()) { michael@0: // The observer told us nothing useful michael@0: mCharset.AssignLiteral("UTF-8"); michael@0: } michael@0: michael@0: // Create the decoder for this character set michael@0: nsCOMPtr ccm = michael@0: do_GetService(kCharsetConverterManagerCID, &rv); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: // Sadly, nsIUnicharStreamLoader is exposed to extensions, so we can't michael@0: // assume mozilla::css::Loader to be the only caller. Since legacy michael@0: // charset alias code doesn't know about the replacement encoding, michael@0: // special-case it here, but let other stuff go through legacy alias michael@0: // resolution for now. michael@0: if (mCharset.EqualsLiteral("replacement")) { michael@0: rv = ccm->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mDecoder)); michael@0: } else { michael@0: rv = ccm->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mDecoder)); michael@0: } michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: // Process the data into mBuffer michael@0: uint32_t dummy; michael@0: rv = WriteSegmentFun(nullptr, this, michael@0: mRawData.BeginReading(), michael@0: 0, mRawData.Length(), michael@0: &dummy); michael@0: mRawData.Truncate(); michael@0: return rv; michael@0: } michael@0: michael@0: NS_METHOD michael@0: nsUnicharStreamLoader::WriteSegmentFun(nsIInputStream *, michael@0: void *aClosure, michael@0: const char *aSegment, michael@0: uint32_t, michael@0: uint32_t aCount, michael@0: uint32_t *aWriteCount) michael@0: { michael@0: nsUnicharStreamLoader* self = static_cast(aClosure); michael@0: michael@0: uint32_t haveRead = self->mBuffer.Length(); michael@0: int32_t srcLen = aCount; michael@0: int32_t dstLen; michael@0: self->mDecoder->GetMaxLength(aSegment, srcLen, &dstLen); michael@0: michael@0: uint32_t capacity = haveRead + dstLen; michael@0: if (!self->mBuffer.SetCapacity(capacity, fallible_t())) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: DebugOnly rv = michael@0: self->mDecoder->Convert(aSegment, michael@0: &srcLen, michael@0: self->mBuffer.BeginWriting() + haveRead, michael@0: &dstLen); michael@0: MOZ_ASSERT(NS_SUCCEEDED(rv)); michael@0: MOZ_ASSERT(srcLen == static_cast(aCount)); michael@0: haveRead += dstLen; michael@0: michael@0: self->mBuffer.SetLength(haveRead); michael@0: *aWriteCount = aCount; michael@0: return NS_OK; michael@0: }