|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsConverterInputStream.h" |
|
7 #include "nsIInputStream.h" |
|
8 #include "nsICharsetConverterManager.h" |
|
9 #include "nsReadLine.h" |
|
10 #include "nsStreamUtils.h" |
|
11 #include "nsServiceManagerUtils.h" |
|
12 #include <algorithm> |
|
13 |
|
14 #define CONVERTER_BUFFER_SIZE 8192 |
|
15 |
|
16 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, |
|
17 nsIUnicharInputStream, nsIUnicharLineInputStream) |
|
18 |
|
19 |
|
20 NS_IMETHODIMP |
|
21 nsConverterInputStream::Init(nsIInputStream* aStream, |
|
22 const char *aCharset, |
|
23 int32_t aBufferSize, |
|
24 char16_t aReplacementChar) |
|
25 { |
|
26 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
|
27 |
|
28 if (!aCharset) |
|
29 aCharset = "UTF-8"; |
|
30 |
|
31 nsresult rv; |
|
32 |
|
33 if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE; |
|
34 |
|
35 // get the decoder |
|
36 nsCOMPtr<nsICharsetConverterManager> ccm = |
|
37 do_GetService(kCharsetConverterManagerCID, &rv); |
|
38 if (NS_FAILED(rv)) return rv; |
|
39 |
|
40 rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter)); |
|
41 if (NS_FAILED(rv)) return rv; |
|
42 |
|
43 // set up our buffers |
|
44 if (!mByteData.SetCapacity(aBufferSize) || |
|
45 !mUnicharData.SetCapacity(aBufferSize)) { |
|
46 return NS_ERROR_OUT_OF_MEMORY; |
|
47 } |
|
48 |
|
49 mInput = aStream; |
|
50 mReplacementChar = aReplacementChar; |
|
51 if (!aReplacementChar || |
|
52 aReplacementChar != mConverter->GetCharacterForUnMapped()) { |
|
53 mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
|
54 } |
|
55 |
|
56 return NS_OK; |
|
57 } |
|
58 |
|
59 NS_IMETHODIMP |
|
60 nsConverterInputStream::Close() |
|
61 { |
|
62 nsresult rv = mInput ? mInput->Close() : NS_OK; |
|
63 mLineBuffer = nullptr; |
|
64 mInput = nullptr; |
|
65 mConverter = nullptr; |
|
66 mByteData.Clear(); |
|
67 mUnicharData.Clear(); |
|
68 return rv; |
|
69 } |
|
70 |
|
71 NS_IMETHODIMP |
|
72 nsConverterInputStream::Read(char16_t* aBuf, |
|
73 uint32_t aCount, |
|
74 uint32_t *aReadCount) |
|
75 { |
|
76 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
77 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
|
78 if (0 == readCount) { |
|
79 // Fill the unichar buffer |
|
80 readCount = Fill(&mLastErrorCode); |
|
81 if (readCount == 0) { |
|
82 *aReadCount = 0; |
|
83 return mLastErrorCode; |
|
84 } |
|
85 } |
|
86 if (readCount > aCount) { |
|
87 readCount = aCount; |
|
88 } |
|
89 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, |
|
90 readCount * sizeof(char16_t)); |
|
91 mUnicharDataOffset += readCount; |
|
92 *aReadCount = readCount; |
|
93 return NS_OK; |
|
94 } |
|
95 |
|
96 NS_IMETHODIMP |
|
97 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, |
|
98 void* aClosure, |
|
99 uint32_t aCount, uint32_t *aReadCount) |
|
100 { |
|
101 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
102 uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; |
|
103 nsresult rv; |
|
104 if (0 == bytesToWrite) { |
|
105 // Fill the unichar buffer |
|
106 bytesToWrite = Fill(&rv); |
|
107 if (bytesToWrite <= 0) { |
|
108 *aReadCount = 0; |
|
109 return rv; |
|
110 } |
|
111 } |
|
112 |
|
113 if (bytesToWrite > aCount) |
|
114 bytesToWrite = aCount; |
|
115 |
|
116 uint32_t bytesWritten; |
|
117 uint32_t totalBytesWritten = 0; |
|
118 |
|
119 while (bytesToWrite) { |
|
120 rv = aWriter(this, aClosure, |
|
121 mUnicharData.Elements() + mUnicharDataOffset, |
|
122 totalBytesWritten, bytesToWrite, &bytesWritten); |
|
123 if (NS_FAILED(rv)) { |
|
124 // don't propagate errors to the caller |
|
125 break; |
|
126 } |
|
127 |
|
128 bytesToWrite -= bytesWritten; |
|
129 totalBytesWritten += bytesWritten; |
|
130 mUnicharDataOffset += bytesWritten; |
|
131 |
|
132 } |
|
133 |
|
134 *aReadCount = totalBytesWritten; |
|
135 |
|
136 return NS_OK; |
|
137 } |
|
138 |
|
139 NS_IMETHODIMP |
|
140 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, |
|
141 uint32_t* aReadCount) |
|
142 { |
|
143 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
144 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
|
145 if (0 == readCount) { |
|
146 // Fill the unichar buffer |
|
147 readCount = Fill(&mLastErrorCode); |
|
148 if (readCount == 0) { |
|
149 *aReadCount = 0; |
|
150 return mLastErrorCode; |
|
151 } |
|
152 } |
|
153 if (readCount > aCount) { |
|
154 readCount = aCount; |
|
155 } |
|
156 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; |
|
157 aString.Assign(buf, readCount); |
|
158 mUnicharDataOffset += readCount; |
|
159 *aReadCount = readCount; |
|
160 return NS_OK; |
|
161 } |
|
162 |
|
163 uint32_t |
|
164 nsConverterInputStream::Fill(nsresult * aErrorCode) |
|
165 { |
|
166 if (nullptr == mInput) { |
|
167 // We already closed the stream! |
|
168 *aErrorCode = NS_BASE_STREAM_CLOSED; |
|
169 return 0; |
|
170 } |
|
171 |
|
172 if (NS_FAILED(mLastErrorCode)) { |
|
173 // We failed to completely convert last time, and error-recovery |
|
174 // is disabled. We will fare no better this time, so... |
|
175 *aErrorCode = mLastErrorCode; |
|
176 return 0; |
|
177 } |
|
178 |
|
179 // We assume a many to one conversion and are using equal sizes for |
|
180 // the two buffers. However if an error happens at the very start |
|
181 // of a byte buffer we may end up in a situation where n bytes lead |
|
182 // to n+1 unicode chars. Thus we need to keep track of the leftover |
|
183 // bytes as we convert. |
|
184 |
|
185 uint32_t nb; |
|
186 *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); |
|
187 if (nb == 0 && mLeftOverBytes == 0) { |
|
188 // No more data |
|
189 *aErrorCode = NS_OK; |
|
190 return 0; |
|
191 } |
|
192 |
|
193 NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), |
|
194 "mByteData is lying to us somewhere"); |
|
195 |
|
196 // Now convert as much of the byte buffer to unicode as possible |
|
197 mUnicharDataOffset = 0; |
|
198 mUnicharDataLength = 0; |
|
199 uint32_t srcConsumed = 0; |
|
200 do { |
|
201 int32_t srcLen = mByteData.Length() - srcConsumed; |
|
202 int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength; |
|
203 *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed, |
|
204 &srcLen, |
|
205 mUnicharData.Elements()+mUnicharDataLength, |
|
206 &dstLen); |
|
207 mUnicharDataLength += dstLen; |
|
208 // XXX if srcLen is negative, we want to drop the _first_ byte in |
|
209 // the erroneous byte sequence and try again. This is not quite |
|
210 // possible right now -- see bug 160784 |
|
211 srcConsumed += srcLen; |
|
212 if (NS_FAILED(*aErrorCode) && mReplacementChar) { |
|
213 NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength, |
|
214 "Decoder returned an error but filled the output buffer! " |
|
215 "Should not happen."); |
|
216 mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar; |
|
217 ++srcConsumed; |
|
218 // XXX this is needed to make sure we don't underrun our buffer; |
|
219 // bug 160784 again |
|
220 srcConsumed = std::max<uint32_t>(srcConsumed, 0); |
|
221 mConverter->Reset(); |
|
222 } |
|
223 NS_ASSERTION(srcConsumed <= mByteData.Length(), |
|
224 "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!"); |
|
225 } while (mReplacementChar && |
|
226 NS_FAILED(*aErrorCode) && |
|
227 mUnicharData.Capacity() > mUnicharDataLength); |
|
228 |
|
229 mLeftOverBytes = mByteData.Length() - srcConsumed; |
|
230 |
|
231 return mUnicharDataLength; |
|
232 } |
|
233 |
|
234 NS_IMETHODIMP |
|
235 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) |
|
236 { |
|
237 if (!mLineBuffer) { |
|
238 mLineBuffer = new nsLineBuffer<char16_t>; |
|
239 } |
|
240 return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); |
|
241 } |