michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "nsString.h" michael@0: #include "nsIUnicodeEncoder.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsITextToSubURI.h" michael@0: #include "nsEscape.h" michael@0: #include "nsTextToSubURI.h" michael@0: #include "nsCRT.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: michael@0: static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); michael@0: michael@0: nsTextToSubURI::nsTextToSubURI() michael@0: { michael@0: } michael@0: nsTextToSubURI::~nsTextToSubURI() michael@0: { michael@0: } michael@0: michael@0: NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) michael@0: michael@0: NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape( michael@0: const char *charset, const char16_t *text, char **_retval) michael@0: { michael@0: if(nullptr == _retval) michael@0: return NS_ERROR_NULL_POINTER; michael@0: *_retval = nullptr; michael@0: nsresult rv = NS_OK; michael@0: michael@0: // Get Charset, get the encoder. michael@0: nsICharsetConverterManager *ccm; michael@0: rv = CallGetService(kCharsetConverterManagerCID, &ccm); michael@0: if(NS_SUCCEEDED(rv)) { michael@0: nsIUnicodeEncoder *encoder; michael@0: rv = ccm->GetUnicodeEncoder(charset, &encoder); michael@0: NS_RELEASE(ccm); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?'); michael@0: if(NS_SUCCEEDED(rv)) michael@0: { michael@0: char buf[256]; michael@0: char *pBuf = buf; michael@0: int32_t ulen = text ? NS_strlen(text) : 0; michael@0: int32_t outlen = 0; michael@0: if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) michael@0: { michael@0: if(outlen >= 256) { michael@0: pBuf = (char*)NS_Alloc(outlen+1); michael@0: } michael@0: if(nullptr == pBuf) { michael@0: outlen = 255; michael@0: pBuf = buf; michael@0: } michael@0: int32_t bufLen = outlen; michael@0: if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) { michael@0: // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary michael@0: int32_t finLen = bufLen - outlen; michael@0: if (finLen > 0) { michael@0: if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen))) michael@0: outlen += finLen; michael@0: } michael@0: pBuf[outlen] = '\0'; michael@0: *_retval = nsEscape(pBuf, url_XPAlphas); michael@0: if(nullptr == *_retval) michael@0: rv = NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: } michael@0: if(pBuf != buf) michael@0: NS_Free(pBuf); michael@0: } michael@0: NS_RELEASE(encoder); michael@0: } michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert( michael@0: const char *charset, const char *text, char16_t **_retval) michael@0: { michael@0: if(nullptr == _retval) michael@0: return NS_ERROR_NULL_POINTER; michael@0: if(nullptr == text) { michael@0: // set empty string instead of returning error michael@0: // due to compatibility for old version michael@0: text = ""; michael@0: } michael@0: *_retval = nullptr; michael@0: nsresult rv = NS_OK; michael@0: michael@0: // unescape the string, unescape changes the input michael@0: char *unescaped = NS_strdup(text); michael@0: if (nullptr == unescaped) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: unescaped = nsUnescape(unescaped); michael@0: NS_ASSERTION(unescaped, "nsUnescape returned null"); michael@0: michael@0: // Convert from the charset to unicode michael@0: nsCOMPtr ccm = michael@0: do_GetService(kCharsetConverterManagerCID, &rv); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: nsIUnicodeDecoder *decoder; michael@0: rv = ccm->GetUnicodeDecoder(charset, &decoder); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: char16_t *pBuf = nullptr; michael@0: int32_t len = strlen(unescaped); michael@0: int32_t outlen = 0; michael@0: if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) { michael@0: pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t)); michael@0: if (nullptr == pBuf) michael@0: rv = NS_ERROR_OUT_OF_MEMORY; michael@0: else { michael@0: if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) { michael@0: pBuf[outlen] = 0; michael@0: *_retval = pBuf; michael@0: } michael@0: else michael@0: NS_Free(pBuf); michael@0: } michael@0: } michael@0: NS_RELEASE(decoder); michael@0: } michael@0: } michael@0: NS_Free(unescaped); michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: static bool statefulCharset(const char *charset) michael@0: { michael@0: if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) || michael@0: !nsCRT::strcasecmp(charset, "UTF-7") || michael@0: !nsCRT::strcasecmp(charset, "HZ-GB-2312")) michael@0: return true; michael@0: michael@0: return false; michael@0: } michael@0: michael@0: nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset, michael@0: const nsAFlatCString &aURI, michael@0: bool aIRI, michael@0: nsAString &_retval) michael@0: { michael@0: nsresult rv = NS_OK; michael@0: michael@0: // check for 7bit encoding the data may not be ASCII after we decode michael@0: bool isStatefulCharset = statefulCharset(aCharset.get()); michael@0: michael@0: if (!isStatefulCharset && IsASCII(aURI)) { michael@0: CopyASCIItoUTF16(aURI, _retval); michael@0: return rv; michael@0: } michael@0: michael@0: if (!isStatefulCharset && aIRI) { michael@0: if (IsUTF8(aURI)) { michael@0: CopyUTF8toUTF16(aURI, _retval); michael@0: return rv; michael@0: } michael@0: } michael@0: michael@0: // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. michael@0: NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); michael@0: michael@0: nsCOMPtr charsetConverterManager; michael@0: michael@0: charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsCOMPtr unicodeDecoder; michael@0: rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), michael@0: getter_AddRefs(unicodeDecoder)); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); michael@0: michael@0: int32_t srcLen = aURI.Length(); michael@0: int32_t dstLen; michael@0: rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t)); michael@0: NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen); michael@0: michael@0: if (NS_SUCCEEDED(rv)) michael@0: _retval.Assign(ustr, dstLen); michael@0: michael@0: NS_Free(ustr); michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, michael@0: const nsACString &aURIFragment, michael@0: nsAString &_retval) michael@0: { michael@0: nsAutoCString unescapedSpec; michael@0: // skip control octets (0x00 - 0x1f and 0x7f) when unescaping michael@0: NS_UnescapeURL(PromiseFlatCString(aURIFragment), michael@0: esc_SkipControl | esc_AlwaysCopy, unescapedSpec); michael@0: michael@0: // in case of failure, return escaped URI michael@0: // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte michael@0: // sequences are also considered failure in this context michael@0: if (convertURItoUnicode( michael@0: PromiseFlatCString(aCharset), unescapedSpec, true, _retval) michael@0: != NS_OK) michael@0: // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 michael@0: CopyUTF8toUTF16(aURIFragment, _retval); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, michael@0: const nsACString & aURIFragment, michael@0: nsAString &_retval) michael@0: { michael@0: nsAutoCString unescapedSpec; michael@0: NS_UnescapeURL(PromiseFlatCString(aURIFragment), michael@0: esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); michael@0: // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII michael@0: // superset since converting "http:" with such an encoding is always a bad michael@0: // idea. michael@0: if (!IsUTF8(unescapedSpec) && michael@0: (aCharset.LowerCaseEqualsLiteral("utf-16") || michael@0: aCharset.LowerCaseEqualsLiteral("utf-16be") || michael@0: aCharset.LowerCaseEqualsLiteral("utf-16le") || michael@0: aCharset.LowerCaseEqualsLiteral("utf-7") || michael@0: aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){ michael@0: CopyASCIItoUTF16(aURIFragment, _retval); michael@0: return NS_OK; michael@0: } michael@0: michael@0: return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval); michael@0: } michael@0: michael@0: //----------------------------------------------------------------------