intl/uconv/src/nsConverterInputStream.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/src/nsConverterInputStream.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,241 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsConverterInputStream.h"
    1.10 +#include "nsIInputStream.h"
    1.11 +#include "nsICharsetConverterManager.h"
    1.12 +#include "nsReadLine.h"
    1.13 +#include "nsStreamUtils.h"
    1.14 +#include "nsServiceManagerUtils.h"
    1.15 +#include <algorithm>
    1.16 +
    1.17 +#define CONVERTER_BUFFER_SIZE 8192
    1.18 +
    1.19 +NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
    1.20 +                  nsIUnicharInputStream, nsIUnicharLineInputStream)
    1.21 +
    1.22 +
    1.23 +NS_IMETHODIMP
    1.24 +nsConverterInputStream::Init(nsIInputStream* aStream,
    1.25 +                             const char *aCharset,
    1.26 +                             int32_t aBufferSize,
    1.27 +                             char16_t aReplacementChar)
    1.28 +{
    1.29 +    static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
    1.30 +
    1.31 +    if (!aCharset)
    1.32 +        aCharset = "UTF-8";
    1.33 +
    1.34 +    nsresult rv;
    1.35 +
    1.36 +    if (aBufferSize <=0) aBufferSize=CONVERTER_BUFFER_SIZE;
    1.37 +    
    1.38 +    // get the decoder
    1.39 +    nsCOMPtr<nsICharsetConverterManager> ccm =
    1.40 +        do_GetService(kCharsetConverterManagerCID, &rv);
    1.41 +    if (NS_FAILED(rv)) return rv;
    1.42 +
    1.43 +    rv = ccm->GetUnicodeDecoder(aCharset ? aCharset : "ISO-8859-1", getter_AddRefs(mConverter));
    1.44 +    if (NS_FAILED(rv)) return rv;
    1.45 + 
    1.46 +    // set up our buffers
    1.47 +    if (!mByteData.SetCapacity(aBufferSize) ||
    1.48 +        !mUnicharData.SetCapacity(aBufferSize)) {
    1.49 +      return NS_ERROR_OUT_OF_MEMORY;
    1.50 +    }
    1.51 +
    1.52 +    mInput = aStream;
    1.53 +    mReplacementChar = aReplacementChar;
    1.54 +    if (!aReplacementChar ||
    1.55 +        aReplacementChar != mConverter->GetCharacterForUnMapped()) {
    1.56 +        mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
    1.57 +    }
    1.58 +
    1.59 +    return NS_OK;
    1.60 +}
    1.61 +
    1.62 +NS_IMETHODIMP
    1.63 +nsConverterInputStream::Close()
    1.64 +{
    1.65 +    nsresult rv = mInput ? mInput->Close() : NS_OK;
    1.66 +    mLineBuffer = nullptr;
    1.67 +    mInput = nullptr;
    1.68 +    mConverter = nullptr;
    1.69 +    mByteData.Clear();
    1.70 +    mUnicharData.Clear();
    1.71 +    return rv;
    1.72 +}
    1.73 +
    1.74 +NS_IMETHODIMP
    1.75 +nsConverterInputStream::Read(char16_t* aBuf,
    1.76 +                             uint32_t aCount,
    1.77 +                             uint32_t *aReadCount)
    1.78 +{
    1.79 +  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
    1.80 +  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
    1.81 +  if (0 == readCount) {
    1.82 +    // Fill the unichar buffer
    1.83 +    readCount = Fill(&mLastErrorCode);
    1.84 +    if (readCount == 0) {
    1.85 +      *aReadCount = 0;
    1.86 +      return mLastErrorCode;
    1.87 +    }
    1.88 +  }
    1.89 +  if (readCount > aCount) {
    1.90 +    readCount = aCount;
    1.91 +  }
    1.92 +  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
    1.93 +         readCount * sizeof(char16_t));
    1.94 +  mUnicharDataOffset += readCount;
    1.95 +  *aReadCount = readCount;
    1.96 +  return NS_OK;
    1.97 +}
    1.98 +
    1.99 +NS_IMETHODIMP
   1.100 +nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
   1.101 +                                     void* aClosure,
   1.102 +                                     uint32_t aCount, uint32_t *aReadCount)
   1.103 +{
   1.104 +  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
   1.105 +  uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
   1.106 +  nsresult rv;
   1.107 +  if (0 == bytesToWrite) {
   1.108 +    // Fill the unichar buffer
   1.109 +    bytesToWrite = Fill(&rv);
   1.110 +    if (bytesToWrite <= 0) {
   1.111 +      *aReadCount = 0;
   1.112 +      return rv;
   1.113 +    }
   1.114 +  }
   1.115 +  
   1.116 +  if (bytesToWrite > aCount)
   1.117 +    bytesToWrite = aCount;
   1.118 +  
   1.119 +  uint32_t bytesWritten;
   1.120 +  uint32_t totalBytesWritten = 0;
   1.121 +
   1.122 +  while (bytesToWrite) {
   1.123 +    rv = aWriter(this, aClosure,
   1.124 +                 mUnicharData.Elements() + mUnicharDataOffset,
   1.125 +                 totalBytesWritten, bytesToWrite, &bytesWritten);
   1.126 +    if (NS_FAILED(rv)) {
   1.127 +      // don't propagate errors to the caller
   1.128 +      break;
   1.129 +    }
   1.130 +    
   1.131 +    bytesToWrite -= bytesWritten;
   1.132 +    totalBytesWritten += bytesWritten;
   1.133 +    mUnicharDataOffset += bytesWritten;
   1.134 +    
   1.135 +  }
   1.136 +
   1.137 +  *aReadCount = totalBytesWritten;
   1.138 +
   1.139 +  return NS_OK;
   1.140 +}
   1.141 +
   1.142 +NS_IMETHODIMP
   1.143 +nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
   1.144 +                                   uint32_t* aReadCount)
   1.145 +{
   1.146 +  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
   1.147 +  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
   1.148 +  if (0 == readCount) {
   1.149 +    // Fill the unichar buffer
   1.150 +    readCount = Fill(&mLastErrorCode);
   1.151 +    if (readCount == 0) {
   1.152 +      *aReadCount = 0;
   1.153 +      return mLastErrorCode;
   1.154 +    }
   1.155 +  }
   1.156 +  if (readCount > aCount) {
   1.157 +    readCount = aCount;
   1.158 +  }
   1.159 +  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
   1.160 +  aString.Assign(buf, readCount);
   1.161 +  mUnicharDataOffset += readCount;
   1.162 +  *aReadCount = readCount;
   1.163 +  return NS_OK;
   1.164 +}
   1.165 +
   1.166 +uint32_t
   1.167 +nsConverterInputStream::Fill(nsresult * aErrorCode)
   1.168 +{
   1.169 +  if (nullptr == mInput) {
   1.170 +    // We already closed the stream!
   1.171 +    *aErrorCode = NS_BASE_STREAM_CLOSED;
   1.172 +    return 0;
   1.173 +  }
   1.174 +
   1.175 +  if (NS_FAILED(mLastErrorCode)) {
   1.176 +    // We failed to completely convert last time, and error-recovery
   1.177 +    // is disabled.  We will fare no better this time, so...
   1.178 +    *aErrorCode = mLastErrorCode;
   1.179 +    return 0;
   1.180 +  }
   1.181 +  
   1.182 +  // We assume a many to one conversion and are using equal sizes for
   1.183 +  // the two buffers.  However if an error happens at the very start
   1.184 +  // of a byte buffer we may end up in a situation where n bytes lead
   1.185 +  // to n+1 unicode chars.  Thus we need to keep track of the leftover
   1.186 +  // bytes as we convert.
   1.187 +  
   1.188 +  uint32_t nb;
   1.189 +  *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
   1.190 +  if (nb == 0 && mLeftOverBytes == 0) {
   1.191 +    // No more data 
   1.192 +    *aErrorCode = NS_OK;
   1.193 +    return 0;
   1.194 +  }
   1.195 +
   1.196 +  NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
   1.197 +               "mByteData is lying to us somewhere");
   1.198 +
   1.199 +  // Now convert as much of the byte buffer to unicode as possible
   1.200 +  mUnicharDataOffset = 0;
   1.201 +  mUnicharDataLength = 0;
   1.202 +  uint32_t srcConsumed = 0;
   1.203 +  do {
   1.204 +    int32_t srcLen = mByteData.Length() - srcConsumed;
   1.205 +    int32_t dstLen = mUnicharData.Capacity() - mUnicharDataLength;
   1.206 +    *aErrorCode = mConverter->Convert(mByteData.Elements()+srcConsumed,
   1.207 +                                      &srcLen,
   1.208 +                                      mUnicharData.Elements()+mUnicharDataLength,
   1.209 +                                      &dstLen);
   1.210 +    mUnicharDataLength += dstLen;
   1.211 +    // XXX if srcLen is negative, we want to drop the _first_ byte in
   1.212 +    // the erroneous byte sequence and try again.  This is not quite
   1.213 +    // possible right now -- see bug 160784
   1.214 +    srcConsumed += srcLen;
   1.215 +    if (NS_FAILED(*aErrorCode) && mReplacementChar) {
   1.216 +      NS_ASSERTION(0 < mUnicharData.Capacity() - mUnicharDataLength,
   1.217 +                   "Decoder returned an error but filled the output buffer! "
   1.218 +                   "Should not happen.");
   1.219 +      mUnicharData.Elements()[mUnicharDataLength++] = mReplacementChar;
   1.220 +      ++srcConsumed;
   1.221 +      // XXX this is needed to make sure we don't underrun our buffer;
   1.222 +      // bug 160784 again
   1.223 +      srcConsumed = std::max<uint32_t>(srcConsumed, 0);
   1.224 +      mConverter->Reset();
   1.225 +    }
   1.226 +    NS_ASSERTION(srcConsumed <= mByteData.Length(),
   1.227 +                 "Whoa.  The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
   1.228 +  } while (mReplacementChar &&
   1.229 +           NS_FAILED(*aErrorCode) &&
   1.230 +           mUnicharData.Capacity() > mUnicharDataLength);
   1.231 +
   1.232 +  mLeftOverBytes = mByteData.Length() - srcConsumed;
   1.233 +
   1.234 +  return mUnicharDataLength;
   1.235 +}
   1.236 +
   1.237 +NS_IMETHODIMP
   1.238 +nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
   1.239 +{
   1.240 +  if (!mLineBuffer) {
   1.241 +    mLineBuffer = new nsLineBuffer<char16_t>;
   1.242 +  }
   1.243 +  return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
   1.244 +}

mercurial