|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsUnicharInputStream.h" |
|
7 #include "nsIInputStream.h" |
|
8 #include "nsIServiceManager.h" |
|
9 #include "nsString.h" |
|
10 #include "nsTArray.h" |
|
11 #include "nsAutoPtr.h" |
|
12 #include "nsCRT.h" |
|
13 #include "nsStreamUtils.h" |
|
14 #include "nsUTF8Utils.h" |
|
15 #include "mozilla/Attributes.h" |
|
16 #include <fcntl.h> |
|
17 #if defined(XP_WIN) |
|
18 #include <io.h> |
|
19 #else |
|
20 #include <unistd.h> |
|
21 #endif |
|
22 |
|
23 #define STRING_BUFFER_SIZE 8192 |
|
24 |
|
25 class StringUnicharInputStream MOZ_FINAL : public nsIUnicharInputStream { |
|
26 public: |
|
27 StringUnicharInputStream(const nsAString& aString) : |
|
28 mString(aString), mPos(0), mLen(aString.Length()) { } |
|
29 |
|
30 NS_DECL_ISUPPORTS |
|
31 NS_DECL_NSIUNICHARINPUTSTREAM |
|
32 |
|
33 nsString mString; |
|
34 uint32_t mPos; |
|
35 uint32_t mLen; |
|
36 |
|
37 private: |
|
38 ~StringUnicharInputStream() { } |
|
39 }; |
|
40 |
|
41 NS_IMETHODIMP |
|
42 StringUnicharInputStream::Read(char16_t* aBuf, |
|
43 uint32_t aCount, |
|
44 uint32_t *aReadCount) |
|
45 { |
|
46 if (mPos >= mLen) { |
|
47 *aReadCount = 0; |
|
48 return NS_OK; |
|
49 } |
|
50 nsAString::const_iterator iter; |
|
51 mString.BeginReading(iter); |
|
52 const char16_t* us = iter.get(); |
|
53 uint32_t amount = mLen - mPos; |
|
54 if (amount > aCount) { |
|
55 amount = aCount; |
|
56 } |
|
57 memcpy(aBuf, us + mPos, sizeof(char16_t) * amount); |
|
58 mPos += amount; |
|
59 *aReadCount = amount; |
|
60 return NS_OK; |
|
61 } |
|
62 |
|
63 NS_IMETHODIMP |
|
64 StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, |
|
65 void* aClosure, |
|
66 uint32_t aCount, uint32_t *aReadCount) |
|
67 { |
|
68 uint32_t bytesWritten; |
|
69 uint32_t totalBytesWritten = 0; |
|
70 |
|
71 nsresult rv; |
|
72 aCount = XPCOM_MIN(mString.Length() - mPos, aCount); |
|
73 |
|
74 nsAString::const_iterator iter; |
|
75 mString.BeginReading(iter); |
|
76 |
|
77 while (aCount) { |
|
78 rv = aWriter(this, aClosure, iter.get() + mPos, |
|
79 totalBytesWritten, aCount, &bytesWritten); |
|
80 |
|
81 if (NS_FAILED(rv)) { |
|
82 // don't propagate errors to the caller |
|
83 break; |
|
84 } |
|
85 |
|
86 aCount -= bytesWritten; |
|
87 totalBytesWritten += bytesWritten; |
|
88 mPos += bytesWritten; |
|
89 } |
|
90 |
|
91 *aReadCount = totalBytesWritten; |
|
92 |
|
93 return NS_OK; |
|
94 } |
|
95 |
|
96 NS_IMETHODIMP |
|
97 StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString, |
|
98 uint32_t* aReadCount) |
|
99 { |
|
100 if (mPos >= mLen) { |
|
101 *aReadCount = 0; |
|
102 return NS_OK; |
|
103 } |
|
104 uint32_t amount = mLen - mPos; |
|
105 if (amount > aCount) { |
|
106 amount = aCount; |
|
107 } |
|
108 aString = Substring(mString, mPos, amount); |
|
109 mPos += amount; |
|
110 *aReadCount = amount; |
|
111 return NS_OK; |
|
112 } |
|
113 |
|
114 nsresult StringUnicharInputStream::Close() |
|
115 { |
|
116 mPos = mLen; |
|
117 return NS_OK; |
|
118 } |
|
119 |
|
120 NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream) |
|
121 |
|
122 //---------------------------------------------------------------------- |
|
123 |
|
124 class UTF8InputStream MOZ_FINAL : public nsIUnicharInputStream { |
|
125 public: |
|
126 UTF8InputStream(); |
|
127 nsresult Init(nsIInputStream* aStream); |
|
128 |
|
129 NS_DECL_ISUPPORTS |
|
130 NS_DECL_NSIUNICHARINPUTSTREAM |
|
131 |
|
132 private: |
|
133 ~UTF8InputStream(); |
|
134 |
|
135 protected: |
|
136 int32_t Fill(nsresult * aErrorCode); |
|
137 |
|
138 static void CountValidUTF8Bytes(const char *aBuf, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits); |
|
139 |
|
140 nsCOMPtr<nsIInputStream> mInput; |
|
141 FallibleTArray<char> mByteData; |
|
142 FallibleTArray<char16_t> mUnicharData; |
|
143 |
|
144 uint32_t mByteDataOffset; |
|
145 uint32_t mUnicharDataOffset; |
|
146 uint32_t mUnicharDataLength; |
|
147 }; |
|
148 |
|
149 UTF8InputStream::UTF8InputStream() : |
|
150 mByteDataOffset(0), |
|
151 mUnicharDataOffset(0), |
|
152 mUnicharDataLength(0) |
|
153 { |
|
154 } |
|
155 |
|
156 nsresult |
|
157 UTF8InputStream::Init(nsIInputStream* aStream) |
|
158 { |
|
159 if (!mByteData.SetCapacity(STRING_BUFFER_SIZE) || |
|
160 !mUnicharData.SetCapacity(STRING_BUFFER_SIZE)) { |
|
161 return NS_ERROR_OUT_OF_MEMORY; |
|
162 } |
|
163 mInput = aStream; |
|
164 |
|
165 return NS_OK; |
|
166 } |
|
167 |
|
168 NS_IMPL_ISUPPORTS(UTF8InputStream,nsIUnicharInputStream) |
|
169 |
|
170 UTF8InputStream::~UTF8InputStream() |
|
171 { |
|
172 Close(); |
|
173 } |
|
174 |
|
175 nsresult UTF8InputStream::Close() |
|
176 { |
|
177 mInput = nullptr; |
|
178 mByteData.Clear(); |
|
179 mUnicharData.Clear(); |
|
180 return NS_OK; |
|
181 } |
|
182 |
|
183 nsresult UTF8InputStream::Read(char16_t* aBuf, |
|
184 uint32_t aCount, |
|
185 uint32_t *aReadCount) |
|
186 { |
|
187 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
188 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
|
189 nsresult errorCode; |
|
190 if (0 == readCount) { |
|
191 // Fill the unichar buffer |
|
192 int32_t bytesRead = Fill(&errorCode); |
|
193 if (bytesRead <= 0) { |
|
194 *aReadCount = 0; |
|
195 return errorCode; |
|
196 } |
|
197 readCount = bytesRead; |
|
198 } |
|
199 if (readCount > aCount) { |
|
200 readCount = aCount; |
|
201 } |
|
202 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, |
|
203 readCount * sizeof(char16_t)); |
|
204 mUnicharDataOffset += readCount; |
|
205 *aReadCount = readCount; |
|
206 return NS_OK; |
|
207 } |
|
208 |
|
209 NS_IMETHODIMP |
|
210 UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, |
|
211 void* aClosure, |
|
212 uint32_t aCount, uint32_t *aReadCount) |
|
213 { |
|
214 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
215 uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; |
|
216 nsresult rv = NS_OK; |
|
217 if (0 == bytesToWrite) { |
|
218 // Fill the unichar buffer |
|
219 int32_t bytesRead = Fill(&rv); |
|
220 if (bytesRead <= 0) { |
|
221 *aReadCount = 0; |
|
222 return rv; |
|
223 } |
|
224 bytesToWrite = bytesRead; |
|
225 } |
|
226 |
|
227 if (bytesToWrite > aCount) |
|
228 bytesToWrite = aCount; |
|
229 |
|
230 uint32_t bytesWritten; |
|
231 uint32_t totalBytesWritten = 0; |
|
232 |
|
233 while (bytesToWrite) { |
|
234 rv = aWriter(this, aClosure, |
|
235 mUnicharData.Elements() + mUnicharDataOffset, |
|
236 totalBytesWritten, bytesToWrite, &bytesWritten); |
|
237 |
|
238 if (NS_FAILED(rv)) { |
|
239 // don't propagate errors to the caller |
|
240 break; |
|
241 } |
|
242 |
|
243 bytesToWrite -= bytesWritten; |
|
244 totalBytesWritten += bytesWritten; |
|
245 mUnicharDataOffset += bytesWritten; |
|
246 } |
|
247 |
|
248 *aReadCount = totalBytesWritten; |
|
249 |
|
250 return NS_OK; |
|
251 } |
|
252 |
|
253 NS_IMETHODIMP |
|
254 UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString, |
|
255 uint32_t* aReadCount) |
|
256 { |
|
257 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
|
258 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
|
259 nsresult errorCode; |
|
260 if (0 == readCount) { |
|
261 // Fill the unichar buffer |
|
262 int32_t bytesRead = Fill(&errorCode); |
|
263 if (bytesRead <= 0) { |
|
264 *aReadCount = 0; |
|
265 return errorCode; |
|
266 } |
|
267 readCount = bytesRead; |
|
268 } |
|
269 if (readCount > aCount) { |
|
270 readCount = aCount; |
|
271 } |
|
272 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; |
|
273 aString.Assign(buf, readCount); |
|
274 |
|
275 mUnicharDataOffset += readCount; |
|
276 *aReadCount = readCount; |
|
277 return NS_OK; |
|
278 } |
|
279 |
|
280 int32_t UTF8InputStream::Fill(nsresult * aErrorCode) |
|
281 { |
|
282 if (nullptr == mInput) { |
|
283 // We already closed the stream! |
|
284 *aErrorCode = NS_BASE_STREAM_CLOSED; |
|
285 return -1; |
|
286 } |
|
287 |
|
288 NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness"); |
|
289 uint32_t remainder = mByteData.Length() - mByteDataOffset; |
|
290 mByteDataOffset = remainder; |
|
291 uint32_t nb; |
|
292 *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb); |
|
293 if (nb == 0) { |
|
294 // Because we assume a many to one conversion, the lingering data |
|
295 // in the byte buffer must be a partial conversion |
|
296 // fragment. Because we know that we have received no more new |
|
297 // data to add to it, we can't convert it. Therefore, we discard |
|
298 // it. |
|
299 return nb; |
|
300 } |
|
301 NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb"); |
|
302 |
|
303 // Now convert as much of the byte buffer to unicode as possible |
|
304 uint32_t srcLen, dstLen; |
|
305 CountValidUTF8Bytes(mByteData.Elements(),remainder + nb, srcLen, dstLen); |
|
306 |
|
307 // the number of UCS2 characters should always be <= the number of |
|
308 // UTF8 chars |
|
309 NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer"); |
|
310 NS_ASSERTION(dstLen <= mUnicharData.Capacity(), |
|
311 "Ouch. I would overflow my buffer if I wasn't so careful."); |
|
312 if (dstLen > mUnicharData.Capacity()) return 0; |
|
313 |
|
314 ConvertUTF8toUTF16 converter(mUnicharData.Elements()); |
|
315 |
|
316 nsASingleFragmentCString::const_char_iterator start = mByteData.Elements(); |
|
317 nsASingleFragmentCString::const_char_iterator end = mByteData.Elements() + srcLen; |
|
318 |
|
319 copy_string(start, end, converter); |
|
320 if (converter.Length() != dstLen) { |
|
321 *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION; |
|
322 return -1; |
|
323 } |
|
324 |
|
325 mUnicharDataOffset = 0; |
|
326 mUnicharDataLength = dstLen; |
|
327 mByteDataOffset = srcLen; |
|
328 |
|
329 return dstLen; |
|
330 } |
|
331 |
|
332 void |
|
333 UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits) |
|
334 { |
|
335 const char *c = aBuffer; |
|
336 const char *end = aBuffer + aMaxBytes; |
|
337 const char *lastchar = c; // pre-initialize in case of 0-length buffer |
|
338 uint32_t utf16length = 0; |
|
339 while (c < end && *c) { |
|
340 lastchar = c; |
|
341 utf16length++; |
|
342 |
|
343 if (UTF8traits::isASCII(*c)) |
|
344 c++; |
|
345 else if (UTF8traits::is2byte(*c)) |
|
346 c += 2; |
|
347 else if (UTF8traits::is3byte(*c)) |
|
348 c += 3; |
|
349 else if (UTF8traits::is4byte(*c)) { |
|
350 c += 4; |
|
351 utf16length++; // add 1 more because this will be converted to a |
|
352 // surrogate pair. |
|
353 } |
|
354 else if (UTF8traits::is5byte(*c)) |
|
355 c += 5; |
|
356 else if (UTF8traits::is6byte(*c)) |
|
357 c += 6; |
|
358 else { |
|
359 NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()"); |
|
360 break; // Otherwise we go into an infinite loop. But what happens now? |
|
361 } |
|
362 } |
|
363 if (c > end) { |
|
364 c = lastchar; |
|
365 utf16length--; |
|
366 } |
|
367 |
|
368 aValidUTF8bytes = c - aBuffer; |
|
369 aValidUTF16CodeUnits = utf16length; |
|
370 } |
|
371 |
|
372 NS_IMPL_QUERY_INTERFACE(nsSimpleUnicharStreamFactory, |
|
373 nsIFactory, |
|
374 nsISimpleUnicharStreamFactory) |
|
375 |
|
376 NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::AddRef() { return 2; } |
|
377 NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::Release() { return 1; } |
|
378 |
|
379 NS_IMETHODIMP |
|
380 nsSimpleUnicharStreamFactory::CreateInstance(nsISupports* aOuter, REFNSIID aIID, |
|
381 void **aResult) |
|
382 { |
|
383 return NS_ERROR_NOT_IMPLEMENTED; |
|
384 } |
|
385 |
|
386 NS_IMETHODIMP |
|
387 nsSimpleUnicharStreamFactory::LockFactory(bool aLock) |
|
388 { |
|
389 return NS_OK; |
|
390 } |
|
391 |
|
392 NS_IMETHODIMP |
|
393 nsSimpleUnicharStreamFactory::CreateInstanceFromString(const nsAString& aString, |
|
394 nsIUnicharInputStream* *aResult) |
|
395 { |
|
396 StringUnicharInputStream* it = new StringUnicharInputStream(aString); |
|
397 if (!it) { |
|
398 return NS_ERROR_OUT_OF_MEMORY; |
|
399 } |
|
400 |
|
401 NS_ADDREF(*aResult = it); |
|
402 return NS_OK; |
|
403 } |
|
404 |
|
405 NS_IMETHODIMP |
|
406 nsSimpleUnicharStreamFactory::CreateInstanceFromUTF8Stream(nsIInputStream* aStreamToWrap, |
|
407 nsIUnicharInputStream* *aResult) |
|
408 { |
|
409 *aResult = nullptr; |
|
410 |
|
411 // Create converter input stream |
|
412 nsRefPtr<UTF8InputStream> it = new UTF8InputStream(); |
|
413 if (!it) |
|
414 return NS_ERROR_OUT_OF_MEMORY; |
|
415 |
|
416 nsresult rv = it->Init(aStreamToWrap); |
|
417 if (NS_FAILED(rv)) |
|
418 return rv; |
|
419 |
|
420 NS_ADDREF(*aResult = it); |
|
421 return NS_OK; |
|
422 } |
|
423 |
|
424 nsSimpleUnicharStreamFactory* |
|
425 nsSimpleUnicharStreamFactory::GetInstance() |
|
426 { |
|
427 static const nsSimpleUnicharStreamFactory kInstance; |
|
428 return const_cast<nsSimpleUnicharStreamFactory*>(&kInstance); |
|
429 } |