1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/src/nsUTF8ConverterService.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,121 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim:expandtab:shiftwidth=2:tabstop=4: 1.6 + */ 1.7 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.8 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.9 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.10 +#include "nsString.h" 1.11 +#include "nsICharsetConverterManager.h" 1.12 +#include "nsUTF8ConverterService.h" 1.13 +#include "nsEscape.h" 1.14 +#include "nsAutoPtr.h" 1.15 +#include "nsServiceManagerUtils.h" 1.16 + 1.17 +NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService) 1.18 + 1.19 +static nsresult 1.20 +ToUTF8(const nsACString &aString, const char *aCharset, 1.21 + bool aAllowSubstitution, nsACString &aResult) 1.22 +{ 1.23 + nsresult rv; 1.24 + if (!aCharset || !*aCharset) 1.25 + return NS_ERROR_INVALID_ARG; 1.26 + 1.27 + nsCOMPtr<nsICharsetConverterManager> ccm; 1.28 + 1.29 + ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); 1.30 + NS_ENSURE_SUCCESS(rv, rv); 1.31 + 1.32 + nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; 1.33 + rv = ccm->GetUnicodeDecoder(aCharset, 1.34 + getter_AddRefs(unicodeDecoder)); 1.35 + NS_ENSURE_SUCCESS(rv, rv); 1.36 + 1.37 + if (!aAllowSubstitution) 1.38 + unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); 1.39 + 1.40 + int32_t srcLen = aString.Length(); 1.41 + int32_t dstLen; 1.42 + const nsAFlatCString& inStr = PromiseFlatCString(aString); 1.43 + rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen); 1.44 + NS_ENSURE_SUCCESS(rv, rv); 1.45 + 1.46 + nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]); 1.47 + NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); 1.48 + 1.49 + rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen); 1.50 + if (NS_SUCCEEDED(rv)){ 1.51 + // Tru64 Cxx needs an explicit get() 1.52 + CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult); 1.53 + } 1.54 + return rv; 1.55 +} 1.56 + 1.57 +NS_IMETHODIMP 1.58 +nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, 1.59 + const char *aCharset, 1.60 + bool aSkipCheck, 1.61 + bool aAllowSubstitution, 1.62 + uint8_t aOptionalArgc, 1.63 + nsACString &aUTF8String) 1.64 +{ 1.65 + bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true; 1.66 + 1.67 + // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 1.68 + // check is requested. It may not be asked for if a caller suspects 1.69 + // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or 1.70 + // it's in a charset other than UTF-8 that can be mistaken for UTF-8. 1.71 + if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { 1.72 + aUTF8String = aString; 1.73 + return NS_OK; 1.74 + } 1.75 + 1.76 + aUTF8String.Truncate(); 1.77 + 1.78 + nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String); 1.79 + 1.80 + // additional protection for cases where check is skipped and the input 1.81 + // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch 1.82 + // was wrong.) We don't check ASCIIness assuming there's no charset 1.83 + // incompatible with ASCII (we don't support EBCDIC). 1.84 + if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { 1.85 + aUTF8String = aString; 1.86 + return NS_OK; 1.87 + } 1.88 + 1.89 + return rv; 1.90 +} 1.91 + 1.92 +NS_IMETHODIMP 1.93 +nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, 1.94 + const char *aCharset, 1.95 + nsACString &aUTF8Spec) 1.96 +{ 1.97 + // assume UTF-8 if the spec contains unescaped non-ASCII characters. 1.98 + // No valid spec in Mozilla would break this assumption. 1.99 + if (!IsASCII(aSpec)) { 1.100 + aUTF8Spec = aSpec; 1.101 + return NS_OK; 1.102 + } 1.103 + 1.104 + aUTF8Spec.Truncate(); 1.105 + 1.106 + nsAutoCString unescapedSpec; 1.107 + // NS_UnescapeURL does not fill up unescapedSpec unless there's at least 1.108 + // one character to unescape. 1.109 + bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), 1.110 + esc_OnlyNonASCII, unescapedSpec); 1.111 + 1.112 + if (!written) { 1.113 + aUTF8Spec = aSpec; 1.114 + return NS_OK; 1.115 + } 1.116 + // return if ASCII only or escaped UTF-8 1.117 + if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) { 1.118 + aUTF8Spec = unescapedSpec; 1.119 + return NS_OK; 1.120 + } 1.121 + 1.122 + return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec); 1.123 +} 1.124 +