1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/io/nsUnicharInputStream.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,429 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsUnicharInputStream.h" 1.10 +#include "nsIInputStream.h" 1.11 +#include "nsIServiceManager.h" 1.12 +#include "nsString.h" 1.13 +#include "nsTArray.h" 1.14 +#include "nsAutoPtr.h" 1.15 +#include "nsCRT.h" 1.16 +#include "nsStreamUtils.h" 1.17 +#include "nsUTF8Utils.h" 1.18 +#include "mozilla/Attributes.h" 1.19 +#include <fcntl.h> 1.20 +#if defined(XP_WIN) 1.21 +#include <io.h> 1.22 +#else 1.23 +#include <unistd.h> 1.24 +#endif 1.25 + 1.26 +#define STRING_BUFFER_SIZE 8192 1.27 + 1.28 +class StringUnicharInputStream MOZ_FINAL : public nsIUnicharInputStream { 1.29 +public: 1.30 + StringUnicharInputStream(const nsAString& aString) : 1.31 + mString(aString), mPos(0), mLen(aString.Length()) { } 1.32 + 1.33 + NS_DECL_ISUPPORTS 1.34 + NS_DECL_NSIUNICHARINPUTSTREAM 1.35 + 1.36 + nsString mString; 1.37 + uint32_t mPos; 1.38 + uint32_t mLen; 1.39 + 1.40 +private: 1.41 + ~StringUnicharInputStream() { } 1.42 +}; 1.43 + 1.44 +NS_IMETHODIMP 1.45 +StringUnicharInputStream::Read(char16_t* aBuf, 1.46 + uint32_t aCount, 1.47 + uint32_t *aReadCount) 1.48 +{ 1.49 + if (mPos >= mLen) { 1.50 + *aReadCount = 0; 1.51 + return NS_OK; 1.52 + } 1.53 + nsAString::const_iterator iter; 1.54 + mString.BeginReading(iter); 1.55 + const char16_t* us = iter.get(); 1.56 + uint32_t amount = mLen - mPos; 1.57 + if (amount > aCount) { 1.58 + amount = aCount; 1.59 + } 1.60 + memcpy(aBuf, us + mPos, sizeof(char16_t) * amount); 1.61 + mPos += amount; 1.62 + *aReadCount = amount; 1.63 + return NS_OK; 1.64 +} 1.65 + 1.66 +NS_IMETHODIMP 1.67 +StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, 1.68 + void* aClosure, 1.69 + uint32_t aCount, uint32_t *aReadCount) 1.70 +{ 1.71 + uint32_t bytesWritten; 1.72 + uint32_t totalBytesWritten = 0; 1.73 + 1.74 + nsresult rv; 1.75 + aCount = XPCOM_MIN(mString.Length() - mPos, aCount); 1.76 + 1.77 + nsAString::const_iterator iter; 1.78 + mString.BeginReading(iter); 1.79 + 1.80 + while (aCount) { 1.81 + rv = aWriter(this, aClosure, iter.get() + mPos, 1.82 + totalBytesWritten, aCount, &bytesWritten); 1.83 + 1.84 + if (NS_FAILED(rv)) { 1.85 + // don't propagate errors to the caller 1.86 + break; 1.87 + } 1.88 + 1.89 + aCount -= bytesWritten; 1.90 + totalBytesWritten += bytesWritten; 1.91 + mPos += bytesWritten; 1.92 + } 1.93 + 1.94 + *aReadCount = totalBytesWritten; 1.95 + 1.96 + return NS_OK; 1.97 +} 1.98 + 1.99 +NS_IMETHODIMP 1.100 +StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString, 1.101 + uint32_t* aReadCount) 1.102 +{ 1.103 + if (mPos >= mLen) { 1.104 + *aReadCount = 0; 1.105 + return NS_OK; 1.106 + } 1.107 + uint32_t amount = mLen - mPos; 1.108 + if (amount > aCount) { 1.109 + amount = aCount; 1.110 + } 1.111 + aString = Substring(mString, mPos, amount); 1.112 + mPos += amount; 1.113 + *aReadCount = amount; 1.114 + return NS_OK; 1.115 +} 1.116 + 1.117 +nsresult StringUnicharInputStream::Close() 1.118 +{ 1.119 + mPos = mLen; 1.120 + return NS_OK; 1.121 +} 1.122 + 1.123 +NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream) 1.124 + 1.125 +//---------------------------------------------------------------------- 1.126 + 1.127 +class UTF8InputStream MOZ_FINAL : public nsIUnicharInputStream { 1.128 +public: 1.129 + UTF8InputStream(); 1.130 + nsresult Init(nsIInputStream* aStream); 1.131 + 1.132 + NS_DECL_ISUPPORTS 1.133 + NS_DECL_NSIUNICHARINPUTSTREAM 1.134 + 1.135 +private: 1.136 + ~UTF8InputStream(); 1.137 + 1.138 +protected: 1.139 + int32_t Fill(nsresult * aErrorCode); 1.140 + 1.141 + static void CountValidUTF8Bytes(const char *aBuf, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits); 1.142 + 1.143 + nsCOMPtr<nsIInputStream> mInput; 1.144 + FallibleTArray<char> mByteData; 1.145 + FallibleTArray<char16_t> mUnicharData; 1.146 + 1.147 + uint32_t mByteDataOffset; 1.148 + uint32_t mUnicharDataOffset; 1.149 + uint32_t mUnicharDataLength; 1.150 +}; 1.151 + 1.152 +UTF8InputStream::UTF8InputStream() : 1.153 + mByteDataOffset(0), 1.154 + mUnicharDataOffset(0), 1.155 + mUnicharDataLength(0) 1.156 +{ 1.157 +} 1.158 + 1.159 +nsresult 1.160 +UTF8InputStream::Init(nsIInputStream* aStream) 1.161 +{ 1.162 + if (!mByteData.SetCapacity(STRING_BUFFER_SIZE) || 1.163 + !mUnicharData.SetCapacity(STRING_BUFFER_SIZE)) { 1.164 + return NS_ERROR_OUT_OF_MEMORY; 1.165 + } 1.166 + mInput = aStream; 1.167 + 1.168 + return NS_OK; 1.169 +} 1.170 + 1.171 +NS_IMPL_ISUPPORTS(UTF8InputStream,nsIUnicharInputStream) 1.172 + 1.173 +UTF8InputStream::~UTF8InputStream() 1.174 +{ 1.175 + Close(); 1.176 +} 1.177 + 1.178 +nsresult UTF8InputStream::Close() 1.179 +{ 1.180 + mInput = nullptr; 1.181 + mByteData.Clear(); 1.182 + mUnicharData.Clear(); 1.183 + return NS_OK; 1.184 +} 1.185 + 1.186 +nsresult UTF8InputStream::Read(char16_t* aBuf, 1.187 + uint32_t aCount, 1.188 + uint32_t *aReadCount) 1.189 +{ 1.190 + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 1.191 + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; 1.192 + nsresult errorCode; 1.193 + if (0 == readCount) { 1.194 + // Fill the unichar buffer 1.195 + int32_t bytesRead = Fill(&errorCode); 1.196 + if (bytesRead <= 0) { 1.197 + *aReadCount = 0; 1.198 + return errorCode; 1.199 + } 1.200 + readCount = bytesRead; 1.201 + } 1.202 + if (readCount > aCount) { 1.203 + readCount = aCount; 1.204 + } 1.205 + memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, 1.206 + readCount * sizeof(char16_t)); 1.207 + mUnicharDataOffset += readCount; 1.208 + *aReadCount = readCount; 1.209 + return NS_OK; 1.210 +} 1.211 + 1.212 +NS_IMETHODIMP 1.213 +UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, 1.214 + void* aClosure, 1.215 + uint32_t aCount, uint32_t *aReadCount) 1.216 +{ 1.217 + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 1.218 + uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; 1.219 + nsresult rv = NS_OK; 1.220 + if (0 == bytesToWrite) { 1.221 + // Fill the unichar buffer 1.222 + int32_t bytesRead = Fill(&rv); 1.223 + if (bytesRead <= 0) { 1.224 + *aReadCount = 0; 1.225 + return rv; 1.226 + } 1.227 + bytesToWrite = bytesRead; 1.228 + } 1.229 + 1.230 + if (bytesToWrite > aCount) 1.231 + bytesToWrite = aCount; 1.232 + 1.233 + uint32_t bytesWritten; 1.234 + uint32_t totalBytesWritten = 0; 1.235 + 1.236 + while (bytesToWrite) { 1.237 + rv = aWriter(this, aClosure, 1.238 + mUnicharData.Elements() + mUnicharDataOffset, 1.239 + totalBytesWritten, bytesToWrite, &bytesWritten); 1.240 + 1.241 + if (NS_FAILED(rv)) { 1.242 + // don't propagate errors to the caller 1.243 + break; 1.244 + } 1.245 + 1.246 + bytesToWrite -= bytesWritten; 1.247 + totalBytesWritten += bytesWritten; 1.248 + mUnicharDataOffset += bytesWritten; 1.249 + } 1.250 + 1.251 + *aReadCount = totalBytesWritten; 1.252 + 1.253 + return NS_OK; 1.254 +} 1.255 + 1.256 +NS_IMETHODIMP 1.257 +UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString, 1.258 + uint32_t* aReadCount) 1.259 +{ 1.260 + NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 1.261 + uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; 1.262 + nsresult errorCode; 1.263 + if (0 == readCount) { 1.264 + // Fill the unichar buffer 1.265 + int32_t bytesRead = Fill(&errorCode); 1.266 + if (bytesRead <= 0) { 1.267 + *aReadCount = 0; 1.268 + return errorCode; 1.269 + } 1.270 + readCount = bytesRead; 1.271 + } 1.272 + if (readCount > aCount) { 1.273 + readCount = aCount; 1.274 + } 1.275 + const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; 1.276 + aString.Assign(buf, readCount); 1.277 + 1.278 + mUnicharDataOffset += readCount; 1.279 + *aReadCount = readCount; 1.280 + return NS_OK; 1.281 +} 1.282 + 1.283 +int32_t UTF8InputStream::Fill(nsresult * aErrorCode) 1.284 +{ 1.285 + if (nullptr == mInput) { 1.286 + // We already closed the stream! 1.287 + *aErrorCode = NS_BASE_STREAM_CLOSED; 1.288 + return -1; 1.289 + } 1.290 + 1.291 + NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness"); 1.292 + uint32_t remainder = mByteData.Length() - mByteDataOffset; 1.293 + mByteDataOffset = remainder; 1.294 + uint32_t nb; 1.295 + *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb); 1.296 + if (nb == 0) { 1.297 + // Because we assume a many to one conversion, the lingering data 1.298 + // in the byte buffer must be a partial conversion 1.299 + // fragment. Because we know that we have received no more new 1.300 + // data to add to it, we can't convert it. Therefore, we discard 1.301 + // it. 1.302 + return nb; 1.303 + } 1.304 + NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb"); 1.305 + 1.306 + // Now convert as much of the byte buffer to unicode as possible 1.307 + uint32_t srcLen, dstLen; 1.308 + CountValidUTF8Bytes(mByteData.Elements(),remainder + nb, srcLen, dstLen); 1.309 + 1.310 + // the number of UCS2 characters should always be <= the number of 1.311 + // UTF8 chars 1.312 + NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer"); 1.313 + NS_ASSERTION(dstLen <= mUnicharData.Capacity(), 1.314 + "Ouch. I would overflow my buffer if I wasn't so careful."); 1.315 + if (dstLen > mUnicharData.Capacity()) return 0; 1.316 + 1.317 + ConvertUTF8toUTF16 converter(mUnicharData.Elements()); 1.318 + 1.319 + nsASingleFragmentCString::const_char_iterator start = mByteData.Elements(); 1.320 + nsASingleFragmentCString::const_char_iterator end = mByteData.Elements() + srcLen; 1.321 + 1.322 + copy_string(start, end, converter); 1.323 + if (converter.Length() != dstLen) { 1.324 + *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION; 1.325 + return -1; 1.326 + } 1.327 + 1.328 + mUnicharDataOffset = 0; 1.329 + mUnicharDataLength = dstLen; 1.330 + mByteDataOffset = srcLen; 1.331 + 1.332 + return dstLen; 1.333 +} 1.334 + 1.335 +void 1.336 +UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, uint32_t aMaxBytes, uint32_t& aValidUTF8bytes, uint32_t& aValidUTF16CodeUnits) 1.337 +{ 1.338 + const char *c = aBuffer; 1.339 + const char *end = aBuffer + aMaxBytes; 1.340 + const char *lastchar = c; // pre-initialize in case of 0-length buffer 1.341 + uint32_t utf16length = 0; 1.342 + while (c < end && *c) { 1.343 + lastchar = c; 1.344 + utf16length++; 1.345 + 1.346 + if (UTF8traits::isASCII(*c)) 1.347 + c++; 1.348 + else if (UTF8traits::is2byte(*c)) 1.349 + c += 2; 1.350 + else if (UTF8traits::is3byte(*c)) 1.351 + c += 3; 1.352 + else if (UTF8traits::is4byte(*c)) { 1.353 + c += 4; 1.354 + utf16length++; // add 1 more because this will be converted to a 1.355 + // surrogate pair. 1.356 + } 1.357 + else if (UTF8traits::is5byte(*c)) 1.358 + c += 5; 1.359 + else if (UTF8traits::is6byte(*c)) 1.360 + c += 6; 1.361 + else { 1.362 + NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()"); 1.363 + break; // Otherwise we go into an infinite loop. But what happens now? 1.364 + } 1.365 + } 1.366 + if (c > end) { 1.367 + c = lastchar; 1.368 + utf16length--; 1.369 + } 1.370 + 1.371 + aValidUTF8bytes = c - aBuffer; 1.372 + aValidUTF16CodeUnits = utf16length; 1.373 +} 1.374 + 1.375 +NS_IMPL_QUERY_INTERFACE(nsSimpleUnicharStreamFactory, 1.376 + nsIFactory, 1.377 + nsISimpleUnicharStreamFactory) 1.378 + 1.379 +NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::AddRef() { return 2; } 1.380 +NS_IMETHODIMP_(MozExternalRefCountType) nsSimpleUnicharStreamFactory::Release() { return 1; } 1.381 + 1.382 +NS_IMETHODIMP 1.383 +nsSimpleUnicharStreamFactory::CreateInstance(nsISupports* aOuter, REFNSIID aIID, 1.384 + void **aResult) 1.385 +{ 1.386 + return NS_ERROR_NOT_IMPLEMENTED; 1.387 +} 1.388 + 1.389 +NS_IMETHODIMP 1.390 +nsSimpleUnicharStreamFactory::LockFactory(bool aLock) 1.391 +{ 1.392 + return NS_OK; 1.393 +} 1.394 + 1.395 +NS_IMETHODIMP 1.396 +nsSimpleUnicharStreamFactory::CreateInstanceFromString(const nsAString& aString, 1.397 + nsIUnicharInputStream* *aResult) 1.398 +{ 1.399 + StringUnicharInputStream* it = new StringUnicharInputStream(aString); 1.400 + if (!it) { 1.401 + return NS_ERROR_OUT_OF_MEMORY; 1.402 + } 1.403 + 1.404 + NS_ADDREF(*aResult = it); 1.405 + return NS_OK; 1.406 +} 1.407 + 1.408 +NS_IMETHODIMP 1.409 +nsSimpleUnicharStreamFactory::CreateInstanceFromUTF8Stream(nsIInputStream* aStreamToWrap, 1.410 + nsIUnicharInputStream* *aResult) 1.411 +{ 1.412 + *aResult = nullptr; 1.413 + 1.414 + // Create converter input stream 1.415 + nsRefPtr<UTF8InputStream> it = new UTF8InputStream(); 1.416 + if (!it) 1.417 + return NS_ERROR_OUT_OF_MEMORY; 1.418 + 1.419 + nsresult rv = it->Init(aStreamToWrap); 1.420 + if (NS_FAILED(rv)) 1.421 + return rv; 1.422 + 1.423 + NS_ADDREF(*aResult = it); 1.424 + return NS_OK; 1.425 +} 1.426 + 1.427 +nsSimpleUnicharStreamFactory* 1.428 +nsSimpleUnicharStreamFactory::GetInstance() 1.429 +{ 1.430 + static const nsSimpleUnicharStreamFactory kInstance; 1.431 + return const_cast<nsSimpleUnicharStreamFactory*>(&kInstance); 1.432 +}