intl/uconv/src/nsTextToSubURI.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "nsString.h"
     6 #include "nsIUnicodeEncoder.h"
     7 #include "nsICharsetConverterManager.h"
     8 #include "nsITextToSubURI.h"
     9 #include "nsEscape.h"
    10 #include "nsTextToSubURI.h"
    11 #include "nsCRT.h"
    12 #include "nsServiceManagerUtils.h"
    14 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
    16 nsTextToSubURI::nsTextToSubURI()
    17 {
    18 }
    19 nsTextToSubURI::~nsTextToSubURI()
    20 {
    21 }
    23 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
    25 NS_IMETHODIMP  nsTextToSubURI::ConvertAndEscape(
    26   const char *charset, const char16_t *text, char **_retval) 
    27 {
    28   if(nullptr == _retval)
    29     return NS_ERROR_NULL_POINTER;
    30   *_retval = nullptr;
    31   nsresult rv = NS_OK;
    33   // Get Charset, get the encoder.
    34   nsICharsetConverterManager *ccm;
    35   rv = CallGetService(kCharsetConverterManagerCID, &ccm);
    36   if(NS_SUCCEEDED(rv)) {
    37      nsIUnicodeEncoder *encoder;
    38      rv = ccm->GetUnicodeEncoder(charset, &encoder);
    39      NS_RELEASE(ccm);
    40      if (NS_SUCCEEDED(rv)) {
    41        rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?');
    42        if(NS_SUCCEEDED(rv))
    43        {
    44           char buf[256];
    45           char *pBuf = buf;
    46           int32_t ulen = text ? NS_strlen(text) : 0;
    47           int32_t outlen = 0;
    48           if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) 
    49           {
    50              if(outlen >= 256) {
    51                 pBuf = (char*)NS_Alloc(outlen+1);
    52              }
    53              if(nullptr == pBuf) {
    54                 outlen = 255;
    55                 pBuf = buf;
    56              }
    57              int32_t bufLen = outlen;
    58              if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
    59                 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
    60                 int32_t finLen = bufLen - outlen;
    61                 if (finLen > 0) {
    62                   if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
    63                     outlen += finLen;
    64                 }
    65                 pBuf[outlen] = '\0';
    66                 *_retval = nsEscape(pBuf, url_XPAlphas);
    67                 if(nullptr == *_retval)
    68                   rv = NS_ERROR_OUT_OF_MEMORY;
    69              }
    70           }
    71           if(pBuf != buf)
    72              NS_Free(pBuf);
    73        }
    74        NS_RELEASE(encoder);
    75      }
    76   }
    78   return rv;
    79 }
    81 NS_IMETHODIMP  nsTextToSubURI::UnEscapeAndConvert(
    82   const char *charset, const char *text, char16_t **_retval) 
    83 {
    84   if(nullptr == _retval)
    85     return NS_ERROR_NULL_POINTER;
    86   if(nullptr == text) {
    87     // set empty string instead of returning error
    88     // due to compatibility for old version
    89     text = "";
    90   }
    91   *_retval = nullptr;
    92   nsresult rv = NS_OK;
    94   // unescape the string, unescape changes the input
    95   char *unescaped = NS_strdup(text);
    96   if (nullptr == unescaped)
    97     return NS_ERROR_OUT_OF_MEMORY;
    98   unescaped = nsUnescape(unescaped);
    99   NS_ASSERTION(unescaped, "nsUnescape returned null");
   101   // Convert from the charset to unicode
   102   nsCOMPtr<nsICharsetConverterManager> ccm = 
   103            do_GetService(kCharsetConverterManagerCID, &rv); 
   104   if (NS_SUCCEEDED(rv)) {
   105     nsIUnicodeDecoder *decoder;
   106     rv = ccm->GetUnicodeDecoder(charset, &decoder);
   107     if (NS_SUCCEEDED(rv)) {
   108       char16_t *pBuf = nullptr;
   109       int32_t len = strlen(unescaped);
   110       int32_t outlen = 0;
   111       if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
   112         pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t));
   113         if (nullptr == pBuf)
   114           rv = NS_ERROR_OUT_OF_MEMORY;
   115         else {
   116           if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
   117             pBuf[outlen] = 0;
   118             *_retval = pBuf;
   119           }
   120           else
   121             NS_Free(pBuf);
   122         }
   123       }
   124       NS_RELEASE(decoder);
   125     }
   126   }
   127   NS_Free(unescaped);
   129   return rv;
   130 }
   132 static bool statefulCharset(const char *charset)
   133 {
   134   if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
   135       !nsCRT::strcasecmp(charset, "UTF-7") ||
   136       !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
   137     return true;
   139   return false;
   140 }
   142 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
   143                                              const nsAFlatCString &aURI, 
   144                                              bool aIRI, 
   145                                              nsAString &_retval)
   146 {
   147   nsresult rv = NS_OK;
   149   // check for 7bit encoding the data may not be ASCII after we decode
   150   bool isStatefulCharset = statefulCharset(aCharset.get());
   152   if (!isStatefulCharset && IsASCII(aURI)) {
   153     CopyASCIItoUTF16(aURI, _retval);
   154     return rv;
   155   }
   157   if (!isStatefulCharset && aIRI) {
   158     if (IsUTF8(aURI)) {
   159       CopyUTF8toUTF16(aURI, _retval);
   160       return rv;
   161     }
   162   }
   164   // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
   165   NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
   167   nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
   169   charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
   170   NS_ENSURE_SUCCESS(rv, rv);
   172   nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
   173   rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), 
   174                                                   getter_AddRefs(unicodeDecoder));
   175   NS_ENSURE_SUCCESS(rv, rv);
   176   unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
   178   int32_t srcLen = aURI.Length();
   179   int32_t dstLen;
   180   rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
   181   NS_ENSURE_SUCCESS(rv, rv);
   183   char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t));
   184   NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
   186   rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
   188   if (NS_SUCCEEDED(rv))
   189     _retval.Assign(ustr, dstLen);
   191   NS_Free(ustr);
   193   return rv;
   194 }
   196 NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, 
   197                                                 const nsACString &aURIFragment, 
   198                                                 nsAString &_retval)
   199 {
   200   nsAutoCString unescapedSpec;
   201   // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
   202   NS_UnescapeURL(PromiseFlatCString(aURIFragment), 
   203                  esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
   205   // in case of failure, return escaped URI
   206   // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
   207   // sequences are also considered failure in this context
   208   if (convertURItoUnicode(
   209                 PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
   210       != NS_OK)
   211     // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
   212     CopyUTF8toUTF16(aURIFragment, _retval); 
   213   return NS_OK;
   214 }
   216 NS_IMETHODIMP  nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, 
   217                                                    const nsACString & aURIFragment, 
   218                                                    nsAString &_retval)
   219 {
   220   nsAutoCString unescapedSpec;
   221   NS_UnescapeURL(PromiseFlatCString(aURIFragment),
   222                  esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
   223   // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
   224   // superset since converting "http:" with such an encoding is always a bad 
   225   // idea.
   226   if (!IsUTF8(unescapedSpec) && 
   227       (aCharset.LowerCaseEqualsLiteral("utf-16") ||
   228        aCharset.LowerCaseEqualsLiteral("utf-16be") ||
   229        aCharset.LowerCaseEqualsLiteral("utf-16le") ||
   230        aCharset.LowerCaseEqualsLiteral("utf-7") ||
   231        aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
   232     CopyASCIItoUTF16(aURIFragment, _retval);
   233     return NS_OK;
   234   }
   236   return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
   237 }
   239 //----------------------------------------------------------------------

mercurial