intl/uconv/src/nsUTF8ConverterService.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/src/nsUTF8ConverterService.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,121 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim:expandtab:shiftwidth=2:tabstop=4: 
     1.6 + */
     1.7 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.8 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.9 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    1.10 +#include "nsString.h"
    1.11 +#include "nsICharsetConverterManager.h"
    1.12 +#include "nsUTF8ConverterService.h"
    1.13 +#include "nsEscape.h"
    1.14 +#include "nsAutoPtr.h"
    1.15 +#include "nsServiceManagerUtils.h"
    1.16 +
    1.17 +NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
    1.18 +
    1.19 +static nsresult 
    1.20 +ToUTF8(const nsACString &aString, const char *aCharset,
    1.21 +       bool aAllowSubstitution, nsACString &aResult)
    1.22 +{
    1.23 +  nsresult rv;
    1.24 +  if (!aCharset || !*aCharset)
    1.25 +    return NS_ERROR_INVALID_ARG;
    1.26 +
    1.27 +  nsCOMPtr<nsICharsetConverterManager> ccm;
    1.28 +
    1.29 +  ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
    1.30 +  NS_ENSURE_SUCCESS(rv, rv);
    1.31 +
    1.32 +  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
    1.33 +  rv = ccm->GetUnicodeDecoder(aCharset,
    1.34 +                              getter_AddRefs(unicodeDecoder));
    1.35 +  NS_ENSURE_SUCCESS(rv, rv);
    1.36 +
    1.37 +  if (!aAllowSubstitution)
    1.38 +    unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
    1.39 +
    1.40 +  int32_t srcLen = aString.Length();
    1.41 +  int32_t dstLen;
    1.42 +  const nsAFlatCString& inStr = PromiseFlatCString(aString);
    1.43 +  rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
    1.44 +  NS_ENSURE_SUCCESS(rv, rv);
    1.45 +
    1.46 +  nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]);
    1.47 +  NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
    1.48 +
    1.49 +  rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
    1.50 +  if (NS_SUCCEEDED(rv)){
    1.51 +    // Tru64 Cxx needs an explicit get()
    1.52 +    CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
    1.53 +  }
    1.54 +  return rv;
    1.55 +}
    1.56 +
    1.57 +NS_IMETHODIMP  
    1.58 +nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, 
    1.59 +                                            const char *aCharset, 
    1.60 +                                            bool aSkipCheck, 
    1.61 +                                            bool aAllowSubstitution,
    1.62 +                                            uint8_t aOptionalArgc,
    1.63 +                                            nsACString &aUTF8String)
    1.64 +{
    1.65 +  bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
    1.66 +
    1.67 +  // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
    1.68 +  // check is requested. It may not be asked for if a caller suspects
    1.69 +  // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or 
    1.70 +  // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
    1.71 +  if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
    1.72 +    aUTF8String = aString;
    1.73 +    return NS_OK;
    1.74 +  }
    1.75 +
    1.76 +  aUTF8String.Truncate();
    1.77 +
    1.78 +  nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
    1.79 +
    1.80 +  // additional protection for cases where check is skipped and  the input
    1.81 +  // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
    1.82 +  // was wrong.) We don't check ASCIIness assuming there's no charset
    1.83 +  // incompatible with ASCII (we don't support EBCDIC).
    1.84 +  if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
    1.85 +    aUTF8String = aString;
    1.86 +    return NS_OK;
    1.87 +  }
    1.88 +
    1.89 +  return rv;
    1.90 +}
    1.91 +
    1.92 +NS_IMETHODIMP  
    1.93 +nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, 
    1.94 +                                             const char *aCharset, 
    1.95 +                                             nsACString &aUTF8Spec)
    1.96 +{
    1.97 +  // assume UTF-8 if the spec contains unescaped non-ASCII characters.
    1.98 +  // No valid spec in Mozilla would break this assumption.
    1.99 +  if (!IsASCII(aSpec)) {
   1.100 +    aUTF8Spec = aSpec;
   1.101 +    return NS_OK;
   1.102 +  }
   1.103 +
   1.104 +  aUTF8Spec.Truncate();
   1.105 +
   1.106 +  nsAutoCString unescapedSpec; 
   1.107 +  // NS_UnescapeURL does not fill up unescapedSpec unless there's at least 
   1.108 +  // one character to unescape.
   1.109 +  bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), 
   1.110 +                                  esc_OnlyNonASCII, unescapedSpec);
   1.111 +
   1.112 +  if (!written) {
   1.113 +    aUTF8Spec = aSpec;
   1.114 +    return NS_OK;
   1.115 +  }
   1.116 +  // return if ASCII only or escaped UTF-8
   1.117 +  if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
   1.118 +    aUTF8Spec = unescapedSpec;
   1.119 +    return NS_OK;
   1.120 +  }
   1.121 +
   1.122 +  return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
   1.123 +}
   1.124 +

mercurial