intl/uconv/src/nsConverterInputStream.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsConverterInputStream.h"
michael@0 7 #include "nsIInputStream.h"
michael@0 8 #include "nsICharsetConverterManager.h"
michael@0 9 #include "nsReadLine.h"
michael@0 10 #include "nsStreamUtils.h"
michael@0 11 #include "nsServiceManagerUtils.h"
michael@0 12 #include <algorithm>
michael@0 13
michael@0 14 #define CONVERTER_BUFFER_SIZE 8192
michael@0 15
michael@0 16 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
michael@0 17 nsIUnicharInputStream, nsIUnicharLineInputStream)
michael@0 18
michael@0 19
michael@0 20 NS_IMETHODIMP
michael@0 21 nsConverterInputStream::Init(nsIInputStream* aStream,
michael@0 22 const char *aCharset,
michael@0 23 int32_t aBufferSize,
michael@0 24 char16_t aReplacementChar)
michael@0 25 {
michael@0 26 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
michael@0 27
michael@0 28 if (!aCharset)
michael@0 29 aCharset = "UTF-8";
michael@0 30
michael@0 31 nsresult rv;
michael@0 32
michael@0 33 if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
michael@0 34
michael@0 35 // get the decoder
michael@0 36 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 37 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 38 if (NS_FAILED(rv)) return rv;
michael@0 39
michael@0 40 rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter));
michael@0 41 if (NS_FAILED(rv)) return rv;
michael@0 42
michael@0 43 // set up our buffers
michael@0 44 if (!mByteData.SetCapacity(aBufferSize) ||
michael@0 45 !mUnicharData.SetCapacity(aBufferSize)) {
michael@0 46 return NS_ERROR_OUT_OF_MEMORY;
michael@0 47 }
michael@0 48
michael@0 49 mInput = aStream;
michael@0 50 mReplacementChar = aReplacementChar;
michael@0 51 if (!aReplacementChar ||
michael@0 52 aReplacementChar != mConverter->GetCharacterForUnMapped()) {
michael@0 53 mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
michael@0 54 }
michael@0 55
michael@0 56 return NS_OK;
michael@0 57 }
michael@0 58
michael@0 59 NS_IMETHODIMP
michael@0 60 nsConverterInputStream::Close()
michael@0 61 {
michael@0 62 nsresult rv = mInput ? mInput->Close() : NS_OK;
michael@0 63 mLineBuffer = nullptr;
michael@0 64 mInput = nullptr;
michael@0 65 mConverter = nullptr;
michael@0 66 mByteData.Clear();
michael@0 67 mUnicharData.Clear();
michael@0 68 return rv;
michael@0 69 }
michael@0 70
michael@0 71 NS_IMETHODIMP
michael@0 72 nsConverterInputStream::Read(char16_t* aBuf,
michael@0 73 uint32_t aCount,
michael@0 74 uint32_t *aReadCount)
michael@0 75 {
michael@0 76 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
michael@0 77 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
michael@0 78 if (0 == readCount) {
michael@0 79 // Fill the unichar buffer
michael@0 80 readCount = Fill(&mLastErrorCode);
michael@0 81 if (readCount == 0) {
michael@0 82 *aReadCount = 0;
michael@0 83 return mLastErrorCode;
michael@0 84 }
michael@0 85 }
michael@0 86 if (readCount > aCount) {
michael@0 87 readCount = aCount;
michael@0 88 }
michael@0 89 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
michael@0 90 readCount * sizeof(char16_t));
michael@0 91 mUnicharDataOffset += readCount;
michael@0 92 *aReadCount = readCount;
michael@0 93 return NS_OK;
michael@0 94 }
michael@0 95
michael@0 96 NS_IMETHODIMP
michael@0 97 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
michael@0 98 void* aClosure,
michael@0 99 uint32_t aCount, uint32_t *aReadCount)
michael@0 100 {
michael@0 101 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
michael@0 102 uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
michael@0 103 nsresult rv;
michael@0 104 if (0 == bytesToWrite) {
michael@0 105 // Fill the unichar buffer
michael@0 106 bytesToWrite = Fill(&rv);
michael@0 107 if (bytesToWrite <= 0) {
michael@0 108 *aReadCount = 0;
michael@0 109 return rv;
michael@0 110 }
michael@0 111 }
michael@0 112
michael@0 113 if (bytesToWrite > aCount)
michael@0 114 bytesToWrite = aCount;
michael@0 115
michael@0 116 uint32_t bytesWritten;
michael@0 117 uint32_t totalBytesWritten = 0;
michael@0 118
michael@0 119 while (bytesToWrite) {
michael@0 120 rv = aWriter(this, aClosure,
michael@0 121 mUnicharData.Elements() + mUnicharDataOffset,
michael@0 122 totalBytesWritten, bytesToWrite, &bytesWritten);
michael@0 123 if (NS_FAILED(rv)) {
michael@0 124 // don't propagate errors to the caller
michael@0 125 break;
michael@0 126 }
michael@0 127
michael@0 128 bytesToWrite -= bytesWritten;
michael@0 129 totalBytesWritten += bytesWritten;
michael@0 130 mUnicharDataOffset += bytesWritten;
michael@0 131
michael@0 132 }
michael@0 133
michael@0 134 *aReadCount = totalBytesWritten;
michael@0 135
michael@0 136 return NS_OK;
michael@0 137 }
michael@0 138
michael@0 139 NS_IMETHODIMP
michael@0 140 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
michael@0 141 uint32_t* aReadCount)
michael@0 142 {
michael@0 143 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
michael@0 144 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
michael@0 145 if (0 == readCount) {
michael@0 146 // Fill the unichar buffer
michael@0 147 readCount = Fill(&mLastErrorCode);
michael@0 148 if (readCount == 0) {
michael@0 149 *aReadCount = 0;
michael@0 150 return mLastErrorCode;
michael@0 151 }
michael@0 152 }
michael@0 153 if (readCount > aCount) {
michael@0 154 readCount = aCount;
michael@0 155 }
michael@0 156 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
michael@0 157 aString.Assign(buf, readCount);
michael@0 158 mUnicharDataOffset += readCount;
michael@0 159 *aReadCount = readCount;
michael@0 160 return NS_OK;
michael@0 161 }
michael@0 162
michael@0 163 uint32_t
michael@0 164 nsConverterInputStream::Fill(nsresult * aErrorCode)
michael@0 165 {
michael@0 166 if (nullptr == mInput) {
michael@0 167 // We already closed the stream!
michael@0 168 *aErrorCode = NS_BASE_STREAM_CLOSED;
michael@0 169 return 0;
michael@0 170 }
michael@0 171
michael@0 172 if (NS_FAILED(mLastErrorCode)) {
michael@0 173 // We failed to completely convert last time, and error-recovery
michael@0 174 // is disabled. We will fare no better this time, so...
michael@0 175 *aErrorCode = mLastErrorCode;
michael@0 176 return 0;
michael@0 177 }
michael@0 178
michael@0 179 // We assume a many to one conversion and are using equal sizes for
michael@0 180 // the two buffers. However if an error happens at the very start
michael@0 181 // of a byte buffer we may end up in a situation where n bytes lead
michael@0 182 // to n+1 unicode chars. Thus we need to keep track of the leftover
michael@0 183 // bytes as we convert.
michael@0 184
michael@0 185 uint32_t nb;
michael@0 186 *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
michael@0 187 if (nb == 0 && mLeftOverBytes == 0) {
michael@0 188 // No more data
michael@0 189 *aErrorCode = NS_OK;
michael@0 190 return 0;
michael@0 191 }
michael@0 192
michael@0 193 NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
michael@0 194 "mByteData is lying to us somewhere");
michael@0 195
michael@0 196 // Now convert as much of the byte buffer to unicode as possible
michael@0 197 mUnicharDataOffset = 0;
michael@0 198 mUnicharDataLength = 0;
michael@0 199 uint32_t srcConsumed = 0;
michael@0 200 do {
michael@0 201 int32_t srcLen = mByteData.Length() - srcConsumed;
michael@0 202 int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength;
michael@0 203 *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed,
michael@0 204 &srcLen,
michael@0 205 mUnicharData.Elements()+mUnicharDataLength,
michael@0 206 &dstLen);
michael@0 207 mUnicharDataLength += dstLen;
michael@0 208 // XXX if srcLen is negative, we want to drop the _first_ byte in
michael@0 209 // the erroneous byte sequence and try again. This is not quite
michael@0 210 // possible right now -- see bug 160784
michael@0 211 srcConsumed += srcLen;
michael@0 212 if (NS_FAILED(*aErrorCode) && mReplacementChar) {
michael@0 213 NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength,
michael@0 214 "Decoder returned an error but filled the output buffer! "
michael@0 215 "Should not happen.");
michael@0 216 mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar;
michael@0 217 ++srcConsumed;
michael@0 218 // XXX this is needed to make sure we don't underrun our buffer;
michael@0 219 // bug 160784 again
michael@0 220 srcConsumed = std::max<uint32_t>(srcConsumed, 0);
michael@0 221 mConverter->Reset();
michael@0 222 }
michael@0 223 NS_ASSERTION(srcConsumed <= mByteData.Length(),
michael@0 224 "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
michael@0 225 } while (mReplacementChar &&
michael@0 226 NS_FAILED(*aErrorCode) &&
michael@0 227 mUnicharData.Capacity() > mUnicharDataLength);
michael@0 228
michael@0 229 mLeftOverBytes = mByteData.Length() - srcConsumed;
michael@0 230
michael@0 231 return mUnicharDataLength;
michael@0 232 }
michael@0 233
michael@0 234 NS_IMETHODIMP
michael@0 235 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
michael@0 236 {
michael@0 237 if (!mLineBuffer) {
michael@0 238 mLineBuffer = new nsLineBuffer<char16_t>;
michael@0 239 }
michael@0 240 return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
michael@0 241 }

mercurial