Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim:expandtab:shiftwidth=2:tabstop=4: |
michael@0 | 3 | */ |
michael@0 | 4 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 7 | #include "nsString.h" |
michael@0 | 8 | #include "nsICharsetConverterManager.h" |
michael@0 | 9 | #include "nsUTF8ConverterService.h" |
michael@0 | 10 | #include "nsEscape.h" |
michael@0 | 11 | #include "nsAutoPtr.h" |
michael@0 | 12 | #include "nsServiceManagerUtils.h" |
michael@0 | 13 | |
michael@0 | 14 | NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService) |
michael@0 | 15 | |
michael@0 | 16 | static nsresult |
michael@0 | 17 | ToUTF8(const nsACString &aString, const char *aCharset, |
michael@0 | 18 | bool aAllowSubstitution, nsACString &aResult) |
michael@0 | 19 | { |
michael@0 | 20 | nsresult rv; |
michael@0 | 21 | if (!aCharset || !*aCharset) |
michael@0 | 22 | return NS_ERROR_INVALID_ARG; |
michael@0 | 23 | |
michael@0 | 24 | nsCOMPtr<nsICharsetConverterManager> ccm; |
michael@0 | 25 | |
michael@0 | 26 | ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
michael@0 | 27 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 28 | |
michael@0 | 29 | nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; |
michael@0 | 30 | rv = ccm->GetUnicodeDecoder(aCharset, |
michael@0 | 31 | getter_AddRefs(unicodeDecoder)); |
michael@0 | 32 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 33 | |
michael@0 | 34 | if (!aAllowSubstitution) |
michael@0 | 35 | unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
michael@0 | 36 | |
michael@0 | 37 | int32_t srcLen = aString.Length(); |
michael@0 | 38 | int32_t dstLen; |
michael@0 | 39 | const nsAFlatCString& inStr = PromiseFlatCString(aString); |
michael@0 | 40 | rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen); |
michael@0 | 41 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 42 | |
michael@0 | 43 | nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]); |
michael@0 | 44 | NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 45 | |
michael@0 | 46 | rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen); |
michael@0 | 47 | if (NS_SUCCEEDED(rv)){ |
michael@0 | 48 | // Tru64 Cxx needs an explicit get() |
michael@0 | 49 | CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult); |
michael@0 | 50 | } |
michael@0 | 51 | return rv; |
michael@0 | 52 | } |
michael@0 | 53 | |
michael@0 | 54 | NS_IMETHODIMP |
michael@0 | 55 | nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, |
michael@0 | 56 | const char *aCharset, |
michael@0 | 57 | bool aSkipCheck, |
michael@0 | 58 | bool aAllowSubstitution, |
michael@0 | 59 | uint8_t aOptionalArgc, |
michael@0 | 60 | nsACString &aUTF8String) |
michael@0 | 61 | { |
michael@0 | 62 | bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true; |
michael@0 | 63 | |
michael@0 | 64 | // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 |
michael@0 | 65 | // check is requested. It may not be asked for if a caller suspects |
michael@0 | 66 | // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or |
michael@0 | 67 | // it's in a charset other than UTF-8 that can be mistaken for UTF-8. |
michael@0 | 68 | if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { |
michael@0 | 69 | aUTF8String = aString; |
michael@0 | 70 | return NS_OK; |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | aUTF8String.Truncate(); |
michael@0 | 74 | |
michael@0 | 75 | nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String); |
michael@0 | 76 | |
michael@0 | 77 | // additional protection for cases where check is skipped and the input |
michael@0 | 78 | // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch |
michael@0 | 79 | // was wrong.) We don't check ASCIIness assuming there's no charset |
michael@0 | 80 | // incompatible with ASCII (we don't support EBCDIC). |
michael@0 | 81 | if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { |
michael@0 | 82 | aUTF8String = aString; |
michael@0 | 83 | return NS_OK; |
michael@0 | 84 | } |
michael@0 | 85 | |
michael@0 | 86 | return rv; |
michael@0 | 87 | } |
michael@0 | 88 | |
michael@0 | 89 | NS_IMETHODIMP |
michael@0 | 90 | nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, |
michael@0 | 91 | const char *aCharset, |
michael@0 | 92 | nsACString &aUTF8Spec) |
michael@0 | 93 | { |
michael@0 | 94 | // assume UTF-8 if the spec contains unescaped non-ASCII characters. |
michael@0 | 95 | // No valid spec in Mozilla would break this assumption. |
michael@0 | 96 | if (!IsASCII(aSpec)) { |
michael@0 | 97 | aUTF8Spec = aSpec; |
michael@0 | 98 | return NS_OK; |
michael@0 | 99 | } |
michael@0 | 100 | |
michael@0 | 101 | aUTF8Spec.Truncate(); |
michael@0 | 102 | |
michael@0 | 103 | nsAutoCString unescapedSpec; |
michael@0 | 104 | // NS_UnescapeURL does not fill up unescapedSpec unless there's at least |
michael@0 | 105 | // one character to unescape. |
michael@0 | 106 | bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), |
michael@0 | 107 | esc_OnlyNonASCII, unescapedSpec); |
michael@0 | 108 | |
michael@0 | 109 | if (!written) { |
michael@0 | 110 | aUTF8Spec = aSpec; |
michael@0 | 111 | return NS_OK; |
michael@0 | 112 | } |
michael@0 | 113 | // return if ASCII only or escaped UTF-8 |
michael@0 | 114 | if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) { |
michael@0 | 115 | aUTF8Spec = unescapedSpec; |
michael@0 | 116 | return NS_OK; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec); |
michael@0 | 120 | } |
michael@0 | 121 |