| |
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
| |
2 /* vim:expandtab:shiftwidth=2:tabstop=4: |
| |
3 */ |
| |
4 /* This Source Code Form is subject to the terms of the Mozilla Public |
| |
5 * License, v. 2.0. If a copy of the MPL was not distributed with this |
| |
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
7 #include "nsString.h" |
| |
8 #include "nsICharsetConverterManager.h" |
| |
9 #include "nsUTF8ConverterService.h" |
| |
10 #include "nsEscape.h" |
| |
11 #include "nsAutoPtr.h" |
| |
12 #include "nsServiceManagerUtils.h" |
| |
13 |
| |
14 NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService) |
| |
15 |
| |
16 static nsresult |
| |
17 ToUTF8(const nsACString &aString, const char *aCharset, |
| |
18 bool aAllowSubstitution, nsACString &aResult) |
| |
19 { |
| |
20 nsresult rv; |
| |
21 if (!aCharset || !*aCharset) |
| |
22 return NS_ERROR_INVALID_ARG; |
| |
23 |
| |
24 nsCOMPtr<nsICharsetConverterManager> ccm; |
| |
25 |
| |
26 ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
| |
27 NS_ENSURE_SUCCESS(rv, rv); |
| |
28 |
| |
29 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; |
| |
30 rv = ccm->GetUnicodeDecoder(aCharset, |
| |
31 getter_AddRefs(unicodeDecoder)); |
| |
32 NS_ENSURE_SUCCESS(rv, rv); |
| |
33 |
| |
34 if (!aAllowSubstitution) |
| |
35 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
| |
36 |
| |
37 int32_t srcLen = aString.Length(); |
| |
38 int32_t dstLen; |
| |
39 const nsAFlatCString& inStr = PromiseFlatCString(aString); |
| |
40 rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen); |
| |
41 NS_ENSURE_SUCCESS(rv, rv); |
| |
42 |
| |
43 nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]); |
| |
44 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); |
| |
45 |
| |
46 rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen); |
| |
47 if (NS_SUCCEEDED(rv)){ |
| |
48 // Tru64 Cxx needs an explicit get() |
| |
49 CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult); |
| |
50 } |
| |
51 return rv; |
| |
52 } |
| |
53 |
| |
54 NS_IMETHODIMP |
| |
55 nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, |
| |
56 const char *aCharset, |
| |
57 bool aSkipCheck, |
| |
58 bool aAllowSubstitution, |
| |
59 uint8_t aOptionalArgc, |
| |
60 nsACString &aUTF8String) |
| |
61 { |
| |
62 bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true; |
| |
63 |
| |
64 // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 |
| |
65 // check is requested. It may not be asked for if a caller suspects |
| |
66 // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or |
| |
67 // it's in a charset other than UTF-8 that can be mistaken for UTF-8. |
| |
68 if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { |
| |
69 aUTF8String = aString; |
| |
70 return NS_OK; |
| |
71 } |
| |
72 |
| |
73 aUTF8String.Truncate(); |
| |
74 |
| |
75 nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String); |
| |
76 |
| |
77 // additional protection for cases where check is skipped and the input |
| |
78 // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch |
| |
79 // was wrong.) We don't check ASCIIness assuming there's no charset |
| |
80 // incompatible with ASCII (we don't support EBCDIC). |
| |
81 if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { |
| |
82 aUTF8String = aString; |
| |
83 return NS_OK; |
| |
84 } |
| |
85 |
| |
86 return rv; |
| |
87 } |
| |
88 |
| |
89 NS_IMETHODIMP |
| |
90 nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, |
| |
91 const char *aCharset, |
| |
92 nsACString &aUTF8Spec) |
| |
93 { |
| |
94 // assume UTF-8 if the spec contains unescaped non-ASCII characters. |
| |
95 // No valid spec in Mozilla would break this assumption. |
| |
96 if (!IsASCII(aSpec)) { |
| |
97 aUTF8Spec = aSpec; |
| |
98 return NS_OK; |
| |
99 } |
| |
100 |
| |
101 aUTF8Spec.Truncate(); |
| |
102 |
| |
103 nsAutoCString unescapedSpec; |
| |
104 // NS_UnescapeURL does not fill up unescapedSpec unless there's at least |
| |
105 // one character to unescape. |
| |
106 bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), |
| |
107 esc_OnlyNonASCII, unescapedSpec); |
| |
108 |
| |
109 if (!written) { |
| |
110 aUTF8Spec = aSpec; |
| |
111 return NS_OK; |
| |
112 } |
| |
113 // return if ASCII only or escaped UTF-8 |
| |
114 if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) { |
| |
115 aUTF8Spec = unescapedSpec; |
| |
116 return NS_OK; |
| |
117 } |
| |
118 |
| |
119 return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec); |
| |
120 } |
| |
121 |