michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsUnicharInputStream.h" michael@0: #include "nsIInputStream.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsString.h" michael@0: #include "nsTArray.h" michael@0: #include "nsAutoPtr.h" michael@0: #include "nsCRT.h" michael@0: #include "nsStreamUtils.h" michael@0: #include "nsUTF8Utils.h" michael@0: #include "mozilla/Attributes.h" michael@0: #include michael@0: #if defined(XP_WIN) michael@0: #include michael@0: #else michael@0: #include michael@0: #endif michael@0: michael@0: #define STRING_BUFFER_SIZE 8192 michael@0: michael@0: class StringUnicharInputStream MOZ_FINAL : public nsIUnicharInputStream { michael@0: public: michael@0: StringUnicharInputStream(const nsAString& aString) : michael@0: mString(aString), mPos(0), mLen(aString.Length()) { } michael@0: michael@0: NS_DECL_ISUPPORTS michael@0: NS_DECL_NSIUNICHARINPUTSTREAM michael@0: michael@0: nsString mString; michael@0: uint32_t mPos; michael@0: uint32_t mLen; michael@0: michael@0: private: michael@0: ~StringUnicharInputStream() { } michael@0: }; michael@0: michael@0: NS_IMETHODIMP michael@0: StringUnicharInputStream::Read(char16_t* aBuf, michael@0: uint32_t aCount, michael@0: uint32_t *aReadCount) michael@0: { michael@0: if (mPos >= mLen) { michael@0: *aReadCount = 0; michael@0: return NS_OK; michael@0: } michael@0: nsAString::const_iterator iter; michael@0: mString.BeginReading(iter); michael@0: const char16_t* us = iter.get(); michael@0: uint32_t amount = mLen - mPos; michael@0: if (amount > aCount) { michael@0: amount = aCount; michael@0: } michael@0: memcpy(aBuf, us + mPos, sizeof(char16_t) * amount); michael@0: mPos += amount; michael@0: *aReadCount = amount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, michael@0: void* aClosure, michael@0: uint32_t aCount, uint32_t *aReadCount) michael@0: { michael@0: uint32_t bytesWritten; michael@0: uint32_t totalBytesWritten = 0; michael@0: michael@0: nsresult rv; michael@0: aCount = XPCOM_MIN(mString.Length() - mPos, aCount); michael@0: michael@0: nsAString::const_iterator iter; michael@0: mString.BeginReading(iter); michael@0: michael@0: while (aCount) { michael@0: rv = aWriter(this, aClosure, iter.get() + mPos, michael@0: totalBytesWritten, aCount, &bytesWritten); michael@0: michael@0: if (NS_FAILED(rv)) { michael@0: // don't propagate errors to the caller michael@0: break; michael@0: } michael@0: michael@0: aCount -= bytesWritten; michael@0: totalBytesWritten += bytesWritten; michael@0: mPos += bytesWritten; michael@0: } michael@0: michael@0: *aReadCount = totalBytesWritten; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString, michael@0: uint32_t* aReadCount) michael@0: { michael@0: if (mPos >= mLen) { michael@0: *aReadCount = 0; michael@0: return NS_OK; michael@0: } michael@0: uint32_t amount = mLen - mPos; michael@0: if (amount > aCount) { michael@0: amount = aCount; michael@0: } michael@0: aString = Substring(mString, mPos, amount); michael@0: mPos += amount; michael@0: *aReadCount = amount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult StringUnicharInputStream::Close() michael@0: { michael@0: mPos = mLen; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream) michael@0: michael@0: //---------------------------------------------------------------------- michael@0: michael@0: class UTF8InputStream MOZ_FINAL : public nsIUnicharInputStream { michael@0: public: michael@0: UTF8InputStream(); michael@0: nsresult Init(nsIInputStream* aStream); michael@0: michael@0: NS_DECL_ISUPPORTS michael@0: NS_DECL_NSIUNICHARINPUTSTREAM michael@0: michael@0: private: michael@0: ~UTF8InputStream(); michael@0: michael@0: protected: michael@0: int32_t Fill(nsresult * aErrorCode); michael@0: michael@0: static void CountValidUTF8Bytes(const char *aBuf, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits); michael@0: michael@0: nsCOMPtr mInput; michael@0: FallibleTArray mByteData; michael@0: FallibleTArray mUnicharData; michael@0: michael@0: uint32_t mByteDataOffset; michael@0: uint32_t mUnicharDataOffset; michael@0: uint32_t mUnicharDataLength; michael@0: }; michael@0: michael@0: UTF8InputStream::UTF8InputStream() : michael@0: mByteDataOffset(0), michael@0: mUnicharDataOffset(0), michael@0: mUnicharDataLength(0) michael@0: { michael@0: } michael@0: michael@0: nsresult michael@0: UTF8InputStream::Init(nsIInputStream* aStream) michael@0: { michael@0: if (!mByteData.SetCapacity(STRING_BUFFER_SIZE) || michael@0: !mUnicharData.SetCapacity(STRING_BUFFER_SIZE)) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: mInput = aStream; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(UTF8InputStream,nsIUnicharInputStream) michael@0: michael@0: UTF8InputStream::~UTF8InputStream() michael@0: { michael@0: Close(); michael@0: } michael@0: michael@0: nsresult UTF8InputStream::Close() michael@0: { michael@0: mInput = nullptr; michael@0: mByteData.Clear(); michael@0: mUnicharData.Clear(); michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult UTF8InputStream::Read(char16_t* aBuf, michael@0: uint32_t aCount, michael@0: uint32_t *aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; michael@0: nsresult errorCode; michael@0: if (0 == readCount) { michael@0: // Fill the unichar buffer michael@0: int32_t bytesRead = Fill(&errorCode); michael@0: if (bytesRead <= 0) { michael@0: *aReadCount = 0; michael@0: return errorCode; michael@0: } michael@0: readCount = bytesRead; michael@0: } michael@0: if (readCount > aCount) { michael@0: readCount = aCount; michael@0: } michael@0: memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, michael@0: readCount * sizeof(char16_t)); michael@0: mUnicharDataOffset += readCount; michael@0: *aReadCount = readCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, michael@0: void* aClosure, michael@0: uint32_t aCount, uint32_t *aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; michael@0: nsresult rv = NS_OK; michael@0: if (0 == bytesToWrite) { michael@0: // Fill the unichar buffer michael@0: int32_t bytesRead = Fill(&rv); michael@0: if (bytesRead <= 0) { michael@0: *aReadCount = 0; michael@0: return rv; michael@0: } michael@0: bytesToWrite = bytesRead; michael@0: } michael@0: michael@0: if (bytesToWrite > aCount) michael@0: bytesToWrite = aCount; michael@0: michael@0: uint32_t bytesWritten; michael@0: uint32_t totalBytesWritten = 0; michael@0: michael@0: while (bytesToWrite) { michael@0: rv = aWriter(this, aClosure, michael@0: mUnicharData.Elements() + mUnicharDataOffset, michael@0: totalBytesWritten, bytesToWrite, &bytesWritten); michael@0: michael@0: if (NS_FAILED(rv)) { michael@0: // don't propagate errors to the caller michael@0: break; michael@0: } michael@0: michael@0: bytesToWrite -= bytesWritten; michael@0: totalBytesWritten += bytesWritten; michael@0: mUnicharDataOffset += bytesWritten; michael@0: } michael@0: michael@0: *aReadCount = totalBytesWritten; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString, michael@0: uint32_t* aReadCount) michael@0: { michael@0: NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); michael@0: uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; michael@0: nsresult errorCode; michael@0: if (0 == readCount) { michael@0: // Fill the unichar buffer michael@0: int32_t bytesRead = Fill(&errorCode); michael@0: if (bytesRead <= 0) { michael@0: *aReadCount = 0; michael@0: return errorCode; michael@0: } michael@0: readCount = bytesRead; michael@0: } michael@0: if (readCount > aCount) { michael@0: readCount = aCount; michael@0: } michael@0: const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; michael@0: aString.Assign(buf, readCount); michael@0: michael@0: mUnicharDataOffset += readCount; michael@0: *aReadCount = readCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: int32_t UTF8InputStream::Fill(nsresult * aErrorCode) michael@0: { michael@0: if (nullptr == mInput) { michael@0: // We already closed the stream! michael@0: *aErrorCode = NS_BASE_STREAM_CLOSED; michael@0: return -1; michael@0: } michael@0: michael@0: NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness"); michael@0: uint32_t remainder = mByteData.Length() - mByteDataOffset; michael@0: mByteDataOffset = remainder; michael@0: uint32_t nb; michael@0: *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb); michael@0: if (nb == 0) { michael@0: // Because we assume a many to one conversion, the lingering data michael@0: // in the byte buffer must be a partial conversion michael@0: // fragment. Because we know that we have received no more new michael@0: // data to add to it, we can't convert it. Therefore, we discard michael@0: // it. michael@0: return nb; michael@0: } michael@0: NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb"); michael@0: michael@0: // Now convert as much of the byte buffer to unicode as possible michael@0: uint32_t srcLen, dstLen; michael@0: CountValidUTF8Bytes(mByteData.Elements(),remainder + nb, srcLen, dstLen); michael@0: michael@0: // the number of UCS2 characters should always be <= the number of michael@0: // UTF8 chars michael@0: NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer"); michael@0: NS_ASSERTION(dstLen <= mUnicharData.Capacity(), michael@0: "Ouch. I would overflow my buffer if I wasn't so careful."); michael@0: if (dstLen > mUnicharData.Capacity()) return 0; michael@0: michael@0: ConvertUTF8toUTF16 converter(mUnicharData.Elements()); michael@0: michael@0: nsASingleFragmentCString::const_char_iterator start = mByteData.Elements(); michael@0: nsASingleFragmentCString::const_char_iterator end = mByteData.Elements() + srcLen; michael@0: michael@0: copy_string(start, end, converter); michael@0: if (converter.Length() != dstLen) { michael@0: *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION; michael@0: return -1; michael@0: } michael@0: michael@0: mUnicharDataOffset = 0; michael@0: mUnicharDataLength = dstLen; michael@0: mByteDataOffset = srcLen; michael@0: michael@0: return dstLen; michael@0: } michael@0: michael@0: void michael@0: UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits) michael@0: { michael@0: const char *c = aBuffer; michael@0: const char *end = aBuffer + aMaxBytes; michael@0: const char *lastchar = c; // pre-initialize in case of 0-length buffer michael@0: uint32_t utf16length = 0; michael@0: while (c < end && *c) { michael@0: lastchar = c; michael@0: utf16length++; michael@0: michael@0: if (UTF8traits::isASCII(*c)) michael@0: c++; michael@0: else if (UTF8traits::is2byte(*c)) michael@0: c += 2; michael@0: else if (UTF8traits::is3byte(*c)) michael@0: c += 3; michael@0: else if (UTF8traits::is4byte(*c)) { michael@0: c += 4; michael@0: utf16length++; // add 1 more because this will be converted to a michael@0: // surrogate pair. michael@0: } michael@0: else if (UTF8traits::is5byte(*c)) michael@0: c += 5; michael@0: else if (UTF8traits::is6byte(*c)) michael@0: c += 6; michael@0: else { michael@0: NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()"); michael@0: break; // Otherwise we go into an infinite loop. But what happens now? michael@0: } michael@0: } michael@0: if (c > end) { michael@0: c = lastchar; michael@0: utf16length--; michael@0: } michael@0: michael@0: aValidUTF8bytes = c - aBuffer; michael@0: aValidUTF16CodeUnits = utf16length; michael@0: } michael@0: michael@0: NS_IMPL_QUERY_INTERFACE(nsSimpleUnicharStreamFactory, michael@0: nsIFactory, michael@0: nsISimpleUnicharStreamFactory) michael@0: michael@0: NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::AddRef() { return 2; } michael@0: NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::Release() { return 1; } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSimpleUnicharStreamFactory::CreateInstance(nsISupports* aOuter, REFNSIID aIID, michael@0: void **aResult) michael@0: { michael@0: return NS_ERROR_NOT_IMPLEMENTED; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSimpleUnicharStreamFactory::LockFactory(bool aLock) michael@0: { michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSimpleUnicharStreamFactory::CreateInstanceFromString(const nsAString& aString, michael@0: nsIUnicharInputStream* *aResult) michael@0: { michael@0: StringUnicharInputStream* it = new StringUnicharInputStream(aString); michael@0: if (!it) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: NS_ADDREF(*aResult = it); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSimpleUnicharStreamFactory::CreateInstanceFromUTF8Stream(nsIInputStream* aStreamToWrap, michael@0: nsIUnicharInputStream* *aResult) michael@0: { michael@0: *aResult = nullptr; michael@0: michael@0: // Create converter input stream michael@0: nsRefPtr it = new UTF8InputStream(); michael@0: if (!it) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: nsresult rv = it->Init(aStreamToWrap); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: NS_ADDREF(*aResult = it); michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsSimpleUnicharStreamFactory* michael@0: nsSimpleUnicharStreamFactory::GetInstance() michael@0: { michael@0: static const nsSimpleUnicharStreamFactory kInstance; michael@0: return const_cast(&kInstance); michael@0: }