michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:expandtab:shiftwidth=2:tabstop=4: michael@0: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "nsString.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsUTF8ConverterService.h" michael@0: #include "nsEscape.h" michael@0: #include "nsAutoPtr.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: michael@0: NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService) michael@0: michael@0: static nsresult michael@0: ToUTF8(const nsACString &aString, const char *aCharset, michael@0: bool aAllowSubstitution, nsACString &aResult) michael@0: { michael@0: nsresult rv; michael@0: if (!aCharset || !*aCharset) michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: nsCOMPtr ccm; michael@0: michael@0: ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsCOMPtr unicodeDecoder; michael@0: rv = ccm->GetUnicodeDecoder(aCharset, michael@0: getter_AddRefs(unicodeDecoder)); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: if (!aAllowSubstitution) michael@0: unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); michael@0: michael@0: int32_t srcLen = aString.Length(); michael@0: int32_t dstLen; michael@0: const nsAFlatCString& inStr = PromiseFlatCString(aString); michael@0: rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsAutoArrayPtr ustr(new char16_t[dstLen]); michael@0: NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen); michael@0: if (NS_SUCCEEDED(rv)){ michael@0: // Tru64 Cxx needs an explicit get() michael@0: CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult); michael@0: } michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, michael@0: const char *aCharset, michael@0: bool aSkipCheck, michael@0: bool aAllowSubstitution, michael@0: uint8_t aOptionalArgc, michael@0: nsACString &aUTF8String) michael@0: { michael@0: bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true; michael@0: michael@0: // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8 michael@0: // check is requested. It may not be asked for if a caller suspects michael@0: // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or michael@0: // it's in a charset other than UTF-8 that can be mistaken for UTF-8. michael@0: if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) { michael@0: aUTF8String = aString; michael@0: return NS_OK; michael@0: } michael@0: michael@0: aUTF8String.Truncate(); michael@0: michael@0: nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String); michael@0: michael@0: // additional protection for cases where check is skipped and the input michael@0: // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch michael@0: // was wrong.) We don't check ASCIIness assuming there's no charset michael@0: // incompatible with ASCII (we don't support EBCDIC). michael@0: if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) { michael@0: aUTF8String = aString; michael@0: return NS_OK; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, michael@0: const char *aCharset, michael@0: nsACString &aUTF8Spec) michael@0: { michael@0: // assume UTF-8 if the spec contains unescaped non-ASCII characters. michael@0: // No valid spec in Mozilla would break this assumption. michael@0: if (!IsASCII(aSpec)) { michael@0: aUTF8Spec = aSpec; michael@0: return NS_OK; michael@0: } michael@0: michael@0: aUTF8Spec.Truncate(); michael@0: michael@0: nsAutoCString unescapedSpec; michael@0: // NS_UnescapeURL does not fill up unescapedSpec unless there's at least michael@0: // one character to unescape. michael@0: bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), michael@0: esc_OnlyNonASCII, unescapedSpec); michael@0: michael@0: if (!written) { michael@0: aUTF8Spec = aSpec; michael@0: return NS_OK; michael@0: } michael@0: // return if ASCII only or escaped UTF-8 michael@0: if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) { michael@0: aUTF8Spec = unescapedSpec; michael@0: return NS_OK; michael@0: } michael@0: michael@0: return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec); michael@0: } michael@0: