|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:expandtab:shiftwidth=2:tabstop=4: |
|
3 */ |
|
4 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
5 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
7 #include "nsString.h" |
|
8 #include "nsICharsetConverterManager.h" |
|
9 #include "nsUTF8ConverterService.h" |
|
10 #include "nsEscape.h" |
|
11 #include "nsAutoPtr.h" |
|
12 #include "nsServiceManagerUtils.h" |
|
13 |
|
14 NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService) |
|
15 |
|
16 static nsresult |
|
17 ToUTF8(const nsACString &aString, const char *aCharset, |
|
18 bool aAllowSubstitution, nsACString &aResult) |
|
19 { |
|
20 nsresult rv; |
|
21 if (!aCharset || !*aCharset) |
|
22 return NS_ERROR_INVALID_ARG; |
|
23 |
|
24 nsCOMPtr<nsICharsetConverterManager> ccm; |
|
25 |
|
26 ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
|
27 NS_ENSURE_SUCCESS(rv, rv); |
|
28 |
|
29 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; |
|
30 rv = ccm->GetUnicodeDecoder(aCharset, |
|
31 getter_AddRefs(unicodeDecoder)); |
|
32 NS_ENSURE_SUCCESS(rv, rv); |
|
33 |
|
34 if (!aAllowSubstitution) |
|
35 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
|
36 |
|
37 int32_t srcLen = aString.Length(); |
|
38 int32_t dstLen; |
|
39 const nsAFlatCString& inStr = PromiseFlatCString(aString); |
|
40 rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen); |
|
41 NS_ENSURE_SUCCESS(rv, rv); |
|
42 |
|
43 nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]); |
|
44 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); |
|
45 |
|
46 rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen); |
|
47 if (NS_SUCCEEDED(rv)){ |
|
48 // Tru64 Cxx needs an explicit get() |
|
49 CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult); |
|
50 } |
|
51 return rv; |
|
52 } |
|
53 |
|
54 NS_IMETHODIMP |
|
55 nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, |
|
56 const char *aCharset, |
|
57 bool aSkipCheck, |
|
58 bool aAllowSubstitution, |
|
59 uint8_t aOptionalArgc, |
|
60 nsACString &aUTF8String) |
|
61 { |
|
62 bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true; |
|
63 |
|
64 // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 |
|
65 // check is requested. It may not be asked for if a caller suspects |
|
66 // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or |
|
67 // it's in a charset other than UTF-8 that can be mistaken for UTF-8. |
|
68 if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { |
|
69 aUTF8String = aString; |
|
70 return NS_OK; |
|
71 } |
|
72 |
|
73 aUTF8String.Truncate(); |
|
74 |
|
75 nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String); |
|
76 |
|
77 // additional protection for cases where check is skipped and the input |
|
78 // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch |
|
79 // was wrong.) We don't check ASCIIness assuming there's no charset |
|
80 // incompatible with ASCII (we don't support EBCDIC). |
|
81 if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { |
|
82 aUTF8String = aString; |
|
83 return NS_OK; |
|
84 } |
|
85 |
|
86 return rv; |
|
87 } |
|
88 |
|
89 NS_IMETHODIMP |
|
90 nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, |
|
91 const char *aCharset, |
|
92 nsACString &aUTF8Spec) |
|
93 { |
|
94 // assume UTF-8 if the spec contains unescaped non-ASCII characters. |
|
95 // No valid spec in Mozilla would break this assumption. |
|
96 if (!IsASCII(aSpec)) { |
|
97 aUTF8Spec = aSpec; |
|
98 return NS_OK; |
|
99 } |
|
100 |
|
101 aUTF8Spec.Truncate(); |
|
102 |
|
103 nsAutoCString unescapedSpec; |
|
104 // NS_UnescapeURL does not fill up unescapedSpec unless there's at least |
|
105 // one character to unescape. |
|
106 bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), |
|
107 esc_OnlyNonASCII, unescapedSpec); |
|
108 |
|
109 if (!written) { |
|
110 aUTF8Spec = aSpec; |
|
111 return NS_OK; |
|
112 } |
|
113 // return if ASCII only or escaped UTF-8 |
|
114 if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) { |
|
115 aUTF8Spec = unescapedSpec; |
|
116 return NS_OK; |
|
117 } |
|
118 |
|
119 return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec); |
|
120 } |
|
121 |