michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* vim:expandtab:shiftwidth=4:tabstop=4: michael@0: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsISupports.idl" michael@0: michael@0: [scriptable, uuid(249f52a3-2599-4b00-ba40-0481364831a2)] michael@0: interface nsIUTF8ConverterService : nsISupports michael@0: { michael@0: /** michael@0: * Ensure that |aString| is encoded in UTF-8. If not, michael@0: * convert to UTF-8 assuming it's encoded in |aCharset| michael@0: * and return the converted string in UTF-8. michael@0: * michael@0: * @param aString a string to ensure its UTF8ness michael@0: * @param aCharset the charset to convert from if |aString| is not in UTF-8 michael@0: * @param aSkipCheck determines whether or not to skip 'ASCIIness' and michael@0: * 'UTF8ness' check. Set this to PR_TRUE only if you suspect that michael@0: * aString can be mistaken for ASCII / UTF-8 but is actually NOT michael@0: * in ASCII / UTF-8 so that aString has to go through the conversion. michael@0: * skipping ASCIIness/UTF8ness check. michael@0: * The most common case is the input is in 7bit non-ASCII charsets michael@0: * like ISO-2022-JP, HZ or UTF-7 (in its original form or michael@0: * a modified form used in IMAP folder names). michael@0: * @param aAllowSubstitution when true, allow the decoder to substitute michael@0: * invalid input sequences by replacement characters (defaults to michael@0: * true) michael@0: * @return the converted string in UTF-8. michael@0: * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset michael@0: * or error code of nsIUnicodeDecoder in case of conversion failure michael@0: */ michael@0: michael@0: [optional_argc] michael@0: AUTF8String convertStringToUTF8(in ACString aString, michael@0: in string aCharset, michael@0: in boolean aSkipCheck, michael@0: [optional] in boolean aAllowSubstitution); michael@0: michael@0: /* XXX : To-be-added. convertStringFromUTF8 */ michael@0: michael@0: /** michael@0: * Ensure that |aSpec| (after URL-unescaping it) is encoded in UTF-8. michael@0: * If not, convert it to UTF-8, assuming it's encoded in |aCharset|, michael@0: * and return the result. michael@0: * michael@0: *

Make sure that all characters outside US-ASCII in your input spec michael@0: * are url-escaped if your spec is not in UTF-8 (before url-escaping) michael@0: * because the presence of non-ASCII characters is blindly michael@0: * regarded as an indication that your input spec is in unescaped UTF-8 michael@0: * and it will be returned without further processing. No valid spec michael@0: * going around in Mozilla code would break this assumption. michael@0: * michael@0: *

XXX The above may change in the future depending on the usage pattern. michael@0: * michael@0: * @param aSpec an url-escaped URI spec to ensure its UTF8ness michael@0: * @param aCharset the charset to convert from if |aSpec| is not in UTF-8 michael@0: * @return the converted spec in UTF-8. michael@0: * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset michael@0: * or error code of nsIUnicodeDecoder in case of conversion failure michael@0: */ michael@0: michael@0: AUTF8String convertURISpecToUTF8(in ACString aSpec, michael@0: in string aCharset); michael@0: }; michael@0: