michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0: /* vim:expandtab:shiftwidth=4:tabstop=4:
michael@0:  */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "nsISupports.idl"
michael@0: 
michael@0: [scriptable, uuid(249f52a3-2599-4b00-ba40-0481364831a2)]
michael@0: interface nsIUTF8ConverterService : nsISupports
michael@0: {
michael@0:   /**
michael@0:    * Ensure that |aString| is encoded in UTF-8.  If not, 
michael@0:    * convert to UTF-8 assuming it's encoded in |aCharset|
michael@0:    * and return the converted string in UTF-8.
michael@0:    *
michael@0:    * @param aString a string to  ensure its UTF8ness
michael@0:    * @param aCharset the charset to convert from if |aString| is not in UTF-8
michael@0:    * @param aSkipCheck determines whether or not to skip 'ASCIIness' and 
michael@0:    *        'UTF8ness' check. Set this to PR_TRUE only if you suspect that 
michael@0:    *        aString can be mistaken for ASCII / UTF-8 but is actually NOT 
michael@0:    *        in ASCII / UTF-8 so that aString has to go through the conversion.
michael@0:    *        skipping ASCIIness/UTF8ness check.
michael@0:    *        The most common case is the input is in 7bit non-ASCII charsets
michael@0:    *        like ISO-2022-JP, HZ or UTF-7 (in its original form or
michael@0:    *        a modified form used in IMAP folder names).
michael@0:    * @param aAllowSubstitution when true, allow the decoder to substitute
michael@0:    *        invalid input sequences by replacement characters (defaults to
michael@0:    *        true)
michael@0:    * @return the converted string in UTF-8.
michael@0:    * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset
michael@0:    *         or error code of nsIUnicodeDecoder in case of conversion failure
michael@0:    */
michael@0: 
michael@0:     [optional_argc]
michael@0:     AUTF8String convertStringToUTF8(in ACString aString, 
michael@0:                                     in string   aCharset,
michael@0:                                     in boolean  aSkipCheck,
michael@0:                                     [optional] in boolean aAllowSubstitution);
michael@0: 
michael@0: /* XXX : To-be-added. convertStringFromUTF8 */
michael@0:    
michael@0:   /**
michael@0:    * Ensure that |aSpec| (after URL-unescaping it) is encoded in UTF-8.  
michael@0:    * If not,  convert it to UTF-8, assuming it's encoded in |aCharset|,  
michael@0:    * and return the result.
michael@0:    *
michael@0:    * <p>Make sure that all characters outside US-ASCII in your input spec 
michael@0:    * are url-escaped if  your spec is not in UTF-8 (before url-escaping) 
michael@0:    * because the presence of non-ASCII characters is <strong>blindly</strong>
michael@0:    * regarded as an indication that your input spec is in unescaped UTF-8
michael@0:    * and it will be returned without further processing. No valid spec
michael@0:    * going around in Mozilla code would break this assumption. 
michael@0:    *
michael@0:    * <p>XXX The above may change in the future depending on the usage pattern.
michael@0:    *
michael@0:    * @param aSpec an url-escaped URI spec to  ensure its UTF8ness
michael@0:    * @param aCharset the charset to convert from if |aSpec| is not in UTF-8
michael@0:    * @return the converted spec in UTF-8.
michael@0:    * @throws NS_ERROR_UCONV_NOCONV when there is no decoder for aCharset
michael@0:    *         or error code of nsIUnicodeDecoder in case of conversion failure
michael@0:    */
michael@0: 
michael@0:     AUTF8String convertURISpecToUTF8(in ACString aSpec, 
michael@0:                                      in string   aCharset);
michael@0: };
michael@0: