intl/uconv/src/nsUTF8ConverterService.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:expandtab:shiftwidth=2:tabstop=4: 
     3  */
     4 /* This Source Code Form is subject to the terms of the Mozilla Public
     5  * License, v. 2.0. If a copy of the MPL was not distributed with this
     6  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "nsString.h"
     8 #include "nsICharsetConverterManager.h"
     9 #include "nsUTF8ConverterService.h"
    10 #include "nsEscape.h"
    11 #include "nsAutoPtr.h"
    12 #include "nsServiceManagerUtils.h"
    14 NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
    16 static nsresult 
    17 ToUTF8(const nsACString &aString, const char *aCharset,
    18        bool aAllowSubstitution, nsACString &aResult)
    19 {
    20   nsresult rv;
    21   if (!aCharset || !*aCharset)
    22     return NS_ERROR_INVALID_ARG;
    24   nsCOMPtr<nsICharsetConverterManager> ccm;
    26   ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
    27   NS_ENSURE_SUCCESS(rv, rv);
    29   nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
    30   rv = ccm->GetUnicodeDecoder(aCharset,
    31                               getter_AddRefs(unicodeDecoder));
    32   NS_ENSURE_SUCCESS(rv, rv);
    34   if (!aAllowSubstitution)
    35     unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
    37   int32_t srcLen = aString.Length();
    38   int32_t dstLen;
    39   const nsAFlatCString& inStr = PromiseFlatCString(aString);
    40   rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
    41   NS_ENSURE_SUCCESS(rv, rv);
    43   nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]);
    44   NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
    46   rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
    47   if (NS_SUCCEEDED(rv)){
    48     // Tru64 Cxx needs an explicit get()
    49     CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
    50   }
    51   return rv;
    52 }
    54 NS_IMETHODIMP  
    55 nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, 
    56                                             const char *aCharset, 
    57                                             bool aSkipCheck, 
    58                                             bool aAllowSubstitution,
    59                                             uint8_t aOptionalArgc,
    60                                             nsACString &aUTF8String)
    61 {
    62   bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
    64   // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
    65   // check is requested. It may not be asked for if a caller suspects
    66   // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or 
    67   // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
    68   if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
    69     aUTF8String = aString;
    70     return NS_OK;
    71   }
    73   aUTF8String.Truncate();
    75   nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
    77   // additional protection for cases where check is skipped and  the input
    78   // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
    79   // was wrong.) We don't check ASCIIness assuming there's no charset
    80   // incompatible with ASCII (we don't support EBCDIC).
    81   if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
    82     aUTF8String = aString;
    83     return NS_OK;
    84   }
    86   return rv;
    87 }
    89 NS_IMETHODIMP  
    90 nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, 
    91                                              const char *aCharset, 
    92                                              nsACString &aUTF8Spec)
    93 {
    94   // assume UTF-8 if the spec contains unescaped non-ASCII characters.
    95   // No valid spec in Mozilla would break this assumption.
    96   if (!IsASCII(aSpec)) {
    97     aUTF8Spec = aSpec;
    98     return NS_OK;
    99   }
   101   aUTF8Spec.Truncate();
   103   nsAutoCString unescapedSpec; 
   104   // NS_UnescapeURL does not fill up unescapedSpec unless there's at least 
   105   // one character to unescape.
   106   bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), 
   107                                   esc_OnlyNonASCII, unescapedSpec);
   109   if (!written) {
   110     aUTF8Spec = aSpec;
   111     return NS_OK;
   112   }
   113   // return if ASCII only or escaped UTF-8
   114   if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
   115     aUTF8Spec = unescapedSpec;
   116     return NS_OK;
   117   }
   119   return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
   120 }

mercurial