michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsConverterInputStream.h" michael@0: #include "nsIInputStream.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsReadLine.h" michael@0: #include "nsStreamUtils.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: #include michael@0: michael@0: #define CONVERTER_BUFFER_SIZE 8192 michael@0: michael@0: NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, michael@0: nsIUnicharInputStream, nsIUnicharLineInputStream) michael@0: michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::Init(nsIInputStream* aStream, michael@0: const char *aCharset, michael@0: int32_t aBufferSize, michael@0: char16_t aReplacementChar) michael@0: { michael@0: static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); michael@0: michael@0: if (!aCharset) michael@0: aCharset = "UTF-8"; michael@0: michael@0: nsresult rv; michael@0: michael@0: if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE; michael@0: michael@0: // get the decoder michael@0: nsCOMPtr ccm = michael@0: do_GetService(kCharsetConverterManagerCID, &rv); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter)); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: // set up our buffers michael@0: if (!mByteData.SetCapacity(aBufferSize) || michael@0: !mUnicharData.SetCapacity(aBufferSize)) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: mInput = aStream; michael@0: mReplacementChar = aReplacementChar; michael@0: if (!aReplacementChar || michael@0: aReplacementChar != mConverter->GetCharacterForUnMapped()) { michael@0: mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::Close() michael@0: { michael@0: nsresult rv = mInput ? mInput->Close() : NS_OK; michael@0: mLineBuffer = nullptr; michael@0: mInput = nullptr; michael@0: mConverter = nullptr; michael@0: mByteData.Clear(); michael@0: mUnicharData.Clear(); michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::Read(char16_t* aBuf, michael@0: uint32_t aCount, michael@0: uint32_t *aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; michael@0: if (0 == readCount) { michael@0: // Fill the unichar buffer michael@0: readCount = Fill(&mLastErrorCode); michael@0: if (readCount == 0) { michael@0: *aReadCount = 0; michael@0: return mLastErrorCode; michael@0: } michael@0: } michael@0: if (readCount > aCount) { michael@0: readCount = aCount; michael@0: } michael@0: memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, michael@0: readCount * sizeof(char16_t)); michael@0: mUnicharDataOffset += readCount; michael@0: *aReadCount = readCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, michael@0: void* aClosure, michael@0: uint32_t aCount, uint32_t *aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; michael@0: nsresult rv; michael@0: if (0 == bytesToWrite) { michael@0: // Fill the unichar buffer michael@0: bytesToWrite = Fill(&rv); michael@0: if (bytesToWrite <= 0) { michael@0: *aReadCount = 0; michael@0: return rv; michael@0: } michael@0: } michael@0: michael@0: if (bytesToWrite > aCount) michael@0: bytesToWrite = aCount; michael@0: michael@0: uint32_t bytesWritten; michael@0: uint32_t totalBytesWritten = 0; michael@0: michael@0: while (bytesToWrite) { michael@0: rv = aWriter(this, aClosure, michael@0: mUnicharData.Elements() + mUnicharDataOffset, michael@0: totalBytesWritten, bytesToWrite, &bytesWritten); michael@0: if (NS_FAILED(rv)) { michael@0: // don't propagate errors to the caller michael@0: break; michael@0: } michael@0: michael@0: bytesToWrite -= bytesWritten; michael@0: totalBytesWritten += bytesWritten; michael@0: mUnicharDataOffset += bytesWritten; michael@0: michael@0: } michael@0: michael@0: *aReadCount = totalBytesWritten; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, michael@0: uint32_t* aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; michael@0: if (0 == readCount) { michael@0: // Fill the unichar buffer michael@0: readCount = Fill(&mLastErrorCode); michael@0: if (readCount == 0) { michael@0: *aReadCount = 0; michael@0: return mLastErrorCode; michael@0: } michael@0: } michael@0: if (readCount > aCount) { michael@0: readCount = aCount; michael@0: } michael@0: const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; michael@0: aString.Assign(buf, readCount); michael@0: mUnicharDataOffset += readCount; michael@0: *aReadCount = readCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: uint32_t michael@0: nsConverterInputStream::Fill(nsresult * aErrorCode) michael@0: { michael@0: if (nullptr == mInput) { michael@0: // We already closed the stream! michael@0: *aErrorCode = NS_BASE_STREAM_CLOSED; michael@0: return 0; michael@0: } michael@0: michael@0: if (NS_FAILED(mLastErrorCode)) { michael@0: // We failed to completely convert last time, and error-recovery michael@0: // is disabled. We will fare no better this time, so... michael@0: *aErrorCode = mLastErrorCode; michael@0: return 0; michael@0: } michael@0: michael@0: // We assume a many to one conversion and are using equal sizes for michael@0: // the two buffers. However if an error happens at the very start michael@0: // of a byte buffer we may end up in a situation where n bytes lead michael@0: // to n+1 unicode chars. Thus we need to keep track of the leftover michael@0: // bytes as we convert. michael@0: michael@0: uint32_t nb; michael@0: *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); michael@0: if (nb == 0 && mLeftOverBytes == 0) { michael@0: // No more data michael@0: *aErrorCode = NS_OK; michael@0: return 0; michael@0: } michael@0: michael@0: NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), michael@0: "mByteData is lying to us somewhere"); michael@0: michael@0: // Now convert as much of the byte buffer to unicode as possible michael@0: mUnicharDataOffset = 0; michael@0: mUnicharDataLength = 0; michael@0: uint32_t srcConsumed = 0; michael@0: do { michael@0: int32_t srcLen = mByteData.Length() - srcConsumed; michael@0: int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength; michael@0: *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed, michael@0: &srcLen, michael@0: mUnicharData.Elements()+mUnicharDataLength, michael@0: &dstLen); michael@0: mUnicharDataLength += dstLen; michael@0: // XXX if srcLen is negative, we want to drop the _first_ byte in michael@0: // the erroneous byte sequence and try again. This is not quite michael@0: // possible right now -- see bug 160784 michael@0: srcConsumed += srcLen; michael@0: if (NS_FAILED(*aErrorCode) && mReplacementChar) { michael@0: NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength, michael@0: "Decoder returned an error but filled the output buffer! " michael@0: "Should not happen."); michael@0: mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar; michael@0: ++srcConsumed; michael@0: // XXX this is needed to make sure we don't underrun our buffer; michael@0: // bug 160784 again michael@0: srcConsumed = std::max(srcConsumed, 0); michael@0: mConverter->Reset(); michael@0: } michael@0: NS_ASSERTION(srcConsumed <= mByteData.Length(), michael@0: "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!"); michael@0: } while (mReplacementChar && michael@0: NS_FAILED(*aErrorCode) && michael@0: mUnicharData.Capacity() > mUnicharDataLength); michael@0: michael@0: mLeftOverBytes = mByteData.Length() - srcConsumed; michael@0: michael@0: return mUnicharDataLength; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) michael@0: { michael@0: if (!mLineBuffer) { michael@0: mLineBuffer = new nsLineBuffer; michael@0: } michael@0: return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); michael@0: }