michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim:expandtab:shiftwidth=2:tabstop=4: 
michael@0:  */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: #include "nsString.h"
michael@0: #include "nsICharsetConverterManager.h"
michael@0: #include "nsUTF8ConverterService.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsAutoPtr.h"
michael@0: #include "nsServiceManagerUtils.h"
michael@0: 
michael@0: NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
michael@0: 
michael@0: static nsresult 
michael@0: ToUTF8(const nsACString &aString, const char *aCharset,
michael@0:        bool aAllowSubstitution, nsACString &aResult)
michael@0: {
michael@0:   nsresult rv;
michael@0:   if (!aCharset || !*aCharset)
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0: 
michael@0:   nsCOMPtr<nsICharsetConverterManager> ccm;
michael@0: 
michael@0:   ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
michael@0:   rv = ccm->GetUnicodeDecoder(aCharset,
michael@0:                               getter_AddRefs(unicodeDecoder));
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   if (!aAllowSubstitution)
michael@0:     unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
michael@0: 
michael@0:   int32_t srcLen = aString.Length();
michael@0:   int32_t dstLen;
michael@0:   const nsAFlatCString& inStr = PromiseFlatCString(aString);
michael@0:   rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]);
michael@0:   NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
michael@0: 
michael@0:   rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
michael@0:   if (NS_SUCCEEDED(rv)){
michael@0:     // Tru64 Cxx needs an explicit get()
michael@0:     CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
michael@0:   }
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP  
michael@0: nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString, 
michael@0:                                             const char *aCharset, 
michael@0:                                             bool aSkipCheck, 
michael@0:                                             bool aAllowSubstitution,
michael@0:                                             uint8_t aOptionalArgc,
michael@0:                                             nsACString &aUTF8String)
michael@0: {
michael@0:   bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
michael@0: 
michael@0:   // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
michael@0:   // check is requested. It may not be asked for if a caller suspects
michael@0:   // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or 
michael@0:   // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
michael@0:   if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
michael@0:     aUTF8String = aString;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   aUTF8String.Truncate();
michael@0: 
michael@0:   nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
michael@0: 
michael@0:   // additional protection for cases where check is skipped and  the input
michael@0:   // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
michael@0:   // was wrong.) We don't check ASCIIness assuming there's no charset
michael@0:   // incompatible with ASCII (we don't support EBCDIC).
michael@0:   if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
michael@0:     aUTF8String = aString;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP  
michael@0: nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec, 
michael@0:                                              const char *aCharset, 
michael@0:                                              nsACString &aUTF8Spec)
michael@0: {
michael@0:   // assume UTF-8 if the spec contains unescaped non-ASCII characters.
michael@0:   // No valid spec in Mozilla would break this assumption.
michael@0:   if (!IsASCII(aSpec)) {
michael@0:     aUTF8Spec = aSpec;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   aUTF8Spec.Truncate();
michael@0: 
michael@0:   nsAutoCString unescapedSpec; 
michael@0:   // NS_UnescapeURL does not fill up unescapedSpec unless there's at least 
michael@0:   // one character to unescape.
michael@0:   bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(), 
michael@0:                                   esc_OnlyNonASCII, unescapedSpec);
michael@0: 
michael@0:   if (!written) {
michael@0:     aUTF8Spec = aSpec;
michael@0:     return NS_OK;
michael@0:   }
michael@0:   // return if ASCII only or escaped UTF-8
michael@0:   if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
michael@0:     aUTF8Spec = unescapedSpec;
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
michael@0: }
michael@0: