intl/uconv/src/nsTextToSubURI.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/src/nsTextToSubURI.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,239 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "nsString.h"
     1.9 +#include "nsIUnicodeEncoder.h"
    1.10 +#include "nsICharsetConverterManager.h"
    1.11 +#include "nsITextToSubURI.h"
    1.12 +#include "nsEscape.h"
    1.13 +#include "nsTextToSubURI.h"
    1.14 +#include "nsCRT.h"
    1.15 +#include "nsServiceManagerUtils.h"
    1.16 +
    1.17 +static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
    1.18 +
    1.19 +nsTextToSubURI::nsTextToSubURI()
    1.20 +{
    1.21 +}
    1.22 +nsTextToSubURI::~nsTextToSubURI()
    1.23 +{
    1.24 +}
    1.25 +
    1.26 +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
    1.27 +
    1.28 +NS_IMETHODIMP  nsTextToSubURI::ConvertAndEscape(
    1.29 +  const char *charset, const char16_t *text, char **_retval) 
    1.30 +{
    1.31 +  if(nullptr == _retval)
    1.32 +    return NS_ERROR_NULL_POINTER;
    1.33 +  *_retval = nullptr;
    1.34 +  nsresult rv = NS_OK;
    1.35 +  
    1.36 +  // Get Charset, get the encoder.
    1.37 +  nsICharsetConverterManager *ccm;
    1.38 +  rv = CallGetService(kCharsetConverterManagerCID, &ccm);
    1.39 +  if(NS_SUCCEEDED(rv)) {
    1.40 +     nsIUnicodeEncoder *encoder;
    1.41 +     rv = ccm->GetUnicodeEncoder(charset, &encoder);
    1.42 +     NS_RELEASE(ccm);
    1.43 +     if (NS_SUCCEEDED(rv)) {
    1.44 +       rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?');
    1.45 +       if(NS_SUCCEEDED(rv))
    1.46 +       {
    1.47 +          char buf[256];
    1.48 +          char *pBuf = buf;
    1.49 +          int32_t ulen = text ? NS_strlen(text) : 0;
    1.50 +          int32_t outlen = 0;
    1.51 +          if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) 
    1.52 +          {
    1.53 +             if(outlen >= 256) {
    1.54 +                pBuf = (char*)NS_Alloc(outlen+1);
    1.55 +             }
    1.56 +             if(nullptr == pBuf) {
    1.57 +                outlen = 255;
    1.58 +                pBuf = buf;
    1.59 +             }
    1.60 +             int32_t bufLen = outlen;
    1.61 +             if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
    1.62 +                // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
    1.63 +                int32_t finLen = bufLen - outlen;
    1.64 +                if (finLen > 0) {
    1.65 +                  if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
    1.66 +                    outlen += finLen;
    1.67 +                }
    1.68 +                pBuf[outlen] = '\0';
    1.69 +                *_retval = nsEscape(pBuf, url_XPAlphas);
    1.70 +                if(nullptr == *_retval)
    1.71 +                  rv = NS_ERROR_OUT_OF_MEMORY;
    1.72 +             }
    1.73 +          }
    1.74 +          if(pBuf != buf)
    1.75 +             NS_Free(pBuf);
    1.76 +       }
    1.77 +       NS_RELEASE(encoder);
    1.78 +     }
    1.79 +  }
    1.80 +  
    1.81 +  return rv;
    1.82 +}
    1.83 +
    1.84 +NS_IMETHODIMP  nsTextToSubURI::UnEscapeAndConvert(
    1.85 +  const char *charset, const char *text, char16_t **_retval) 
    1.86 +{
    1.87 +  if(nullptr == _retval)
    1.88 +    return NS_ERROR_NULL_POINTER;
    1.89 +  if(nullptr == text) {
    1.90 +    // set empty string instead of returning error
    1.91 +    // due to compatibility for old version
    1.92 +    text = "";
    1.93 +  }
    1.94 +  *_retval = nullptr;
    1.95 +  nsresult rv = NS_OK;
    1.96 +  
    1.97 +  // unescape the string, unescape changes the input
    1.98 +  char *unescaped = NS_strdup(text);
    1.99 +  if (nullptr == unescaped)
   1.100 +    return NS_ERROR_OUT_OF_MEMORY;
   1.101 +  unescaped = nsUnescape(unescaped);
   1.102 +  NS_ASSERTION(unescaped, "nsUnescape returned null");
   1.103 +
   1.104 +  // Convert from the charset to unicode
   1.105 +  nsCOMPtr<nsICharsetConverterManager> ccm = 
   1.106 +           do_GetService(kCharsetConverterManagerCID, &rv); 
   1.107 +  if (NS_SUCCEEDED(rv)) {
   1.108 +    nsIUnicodeDecoder *decoder;
   1.109 +    rv = ccm->GetUnicodeDecoder(charset, &decoder);
   1.110 +    if (NS_SUCCEEDED(rv)) {
   1.111 +      char16_t *pBuf = nullptr;
   1.112 +      int32_t len = strlen(unescaped);
   1.113 +      int32_t outlen = 0;
   1.114 +      if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
   1.115 +        pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t));
   1.116 +        if (nullptr == pBuf)
   1.117 +          rv = NS_ERROR_OUT_OF_MEMORY;
   1.118 +        else {
   1.119 +          if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
   1.120 +            pBuf[outlen] = 0;
   1.121 +            *_retval = pBuf;
   1.122 +          }
   1.123 +          else
   1.124 +            NS_Free(pBuf);
   1.125 +        }
   1.126 +      }
   1.127 +      NS_RELEASE(decoder);
   1.128 +    }
   1.129 +  }
   1.130 +  NS_Free(unescaped);
   1.131 +
   1.132 +  return rv;
   1.133 +}
   1.134 +
   1.135 +static bool statefulCharset(const char *charset)
   1.136 +{
   1.137 +  if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
   1.138 +      !nsCRT::strcasecmp(charset, "UTF-7") ||
   1.139 +      !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
   1.140 +    return true;
   1.141 +
   1.142 +  return false;
   1.143 +}
   1.144 +
   1.145 +nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
   1.146 +                                             const nsAFlatCString &aURI, 
   1.147 +                                             bool aIRI, 
   1.148 +                                             nsAString &_retval)
   1.149 +{
   1.150 +  nsresult rv = NS_OK;
   1.151 +
   1.152 +  // check for 7bit encoding the data may not be ASCII after we decode
   1.153 +  bool isStatefulCharset = statefulCharset(aCharset.get());
   1.154 +
   1.155 +  if (!isStatefulCharset && IsASCII(aURI)) {
   1.156 +    CopyASCIItoUTF16(aURI, _retval);
   1.157 +    return rv;
   1.158 +  }
   1.159 +
   1.160 +  if (!isStatefulCharset && aIRI) {
   1.161 +    if (IsUTF8(aURI)) {
   1.162 +      CopyUTF8toUTF16(aURI, _retval);
   1.163 +      return rv;
   1.164 +    }
   1.165 +  }
   1.166 +
   1.167 +  // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
   1.168 +  NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
   1.169 +
   1.170 +  nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
   1.171 +
   1.172 +  charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
   1.173 +  NS_ENSURE_SUCCESS(rv, rv);
   1.174 +
   1.175 +  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
   1.176 +  rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), 
   1.177 +                                                  getter_AddRefs(unicodeDecoder));
   1.178 +  NS_ENSURE_SUCCESS(rv, rv);
   1.179 +  unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
   1.180 +
   1.181 +  int32_t srcLen = aURI.Length();
   1.182 +  int32_t dstLen;
   1.183 +  rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
   1.184 +  NS_ENSURE_SUCCESS(rv, rv);
   1.185 +
   1.186 +  char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t));
   1.187 +  NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
   1.188 +
   1.189 +  rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
   1.190 +
   1.191 +  if (NS_SUCCEEDED(rv))
   1.192 +    _retval.Assign(ustr, dstLen);
   1.193 +  
   1.194 +  NS_Free(ustr);
   1.195 +
   1.196 +  return rv;
   1.197 +}
   1.198 +
   1.199 +NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, 
   1.200 +                                                const nsACString &aURIFragment, 
   1.201 +                                                nsAString &_retval)
   1.202 +{
   1.203 +  nsAutoCString unescapedSpec;
   1.204 +  // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
   1.205 +  NS_UnescapeURL(PromiseFlatCString(aURIFragment), 
   1.206 +                 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
   1.207 +
   1.208 +  // in case of failure, return escaped URI
   1.209 +  // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
   1.210 +  // sequences are also considered failure in this context
   1.211 +  if (convertURItoUnicode(
   1.212 +                PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
   1.213 +      != NS_OK)
   1.214 +    // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
   1.215 +    CopyUTF8toUTF16(aURIFragment, _retval); 
   1.216 +  return NS_OK;
   1.217 +}
   1.218 +
   1.219 +NS_IMETHODIMP  nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, 
   1.220 +                                                   const nsACString & aURIFragment, 
   1.221 +                                                   nsAString &_retval)
   1.222 +{
   1.223 +  nsAutoCString unescapedSpec;
   1.224 +  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
   1.225 +                 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
   1.226 +  // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
   1.227 +  // superset since converting "http:" with such an encoding is always a bad 
   1.228 +  // idea.
   1.229 +  if (!IsUTF8(unescapedSpec) && 
   1.230 +      (aCharset.LowerCaseEqualsLiteral("utf-16") ||
   1.231 +       aCharset.LowerCaseEqualsLiteral("utf-16be") ||
   1.232 +       aCharset.LowerCaseEqualsLiteral("utf-16le") ||
   1.233 +       aCharset.LowerCaseEqualsLiteral("utf-7") ||
   1.234 +       aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
   1.235 +    CopyASCIItoUTF16(aURIFragment, _retval);
   1.236 +    return NS_OK;
   1.237 +  }
   1.238 +
   1.239 +  return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
   1.240 +}
   1.241 +
   1.242 +//----------------------------------------------------------------------

mercurial