Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: IDL; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 2 | /* vim:expandtab:shiftwidth=4:tabstop=4: |
michael@0 | 3 | */ |
michael@0 | 4 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 5 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 6 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 7 | |
michael@0 | 8 | /* |
michael@0 | 9 | * This interface allows any module to access the routine |
michael@0 | 10 | * for MIME header parameter parsing (RFC 2231/5987) |
michael@0 | 11 | */ |
michael@0 | 12 | |
michael@0 | 13 | #include "nsISupports.idl" |
michael@0 | 14 | |
michael@0 | 15 | [scriptable, uuid(9c9252a1-fdaf-40a2-9c2b-a3dc45e28dde)] |
michael@0 | 16 | interface nsIMIMEHeaderParam : nsISupports { |
michael@0 | 17 | |
michael@0 | 18 | /** |
michael@0 | 19 | * Given the value of a single header field (such as |
michael@0 | 20 | * Content-Disposition and Content-Type) and the name of a parameter |
michael@0 | 21 | * (e.g. filename, name, charset), returns the value of the parameter. |
michael@0 | 22 | * The value is obtained by decoding RFC 2231/5987-style encoding, |
michael@0 | 23 | * RFC 2047-style encoding, and converting to UniChar(UTF-16) |
michael@0 | 24 | * from charset specified in RFC 2231/2047 encoding, UTF-8, |
michael@0 | 25 | * <code>aFallbackCharset</code>, the locale charset as fallback if |
michael@0 | 26 | * <code>TryLocaleCharset</code> is set, and null-padding as last resort |
michael@0 | 27 | * if all else fails. |
michael@0 | 28 | * |
michael@0 | 29 | * <p> |
michael@0 | 30 | * This method internally invokes <code>getParameterInternal</code>, |
michael@0 | 31 | * However, it does not stop at decoding RFC 2231 (the task for |
michael@0 | 32 | * <code>getParameterInternal</code> but tries to cope |
michael@0 | 33 | * with several non-standard-compliant cases mentioned below. |
michael@0 | 34 | * |
michael@0 | 35 | * <p> |
michael@0 | 36 | * Note that a lot of MUAs put RFC 2047-encoded parameters. Unfortunately, |
michael@0 | 37 | * this includes Mozilla as of 2003-05-30. Even more standard-ignorant MUAs, |
michael@0 | 38 | * web servers and application servers put 'raw 8bit characters'. This will |
michael@0 | 39 | * try to cope with all these cases as gracefully as possible. Additionally, |
michael@0 | 40 | * it returns the language tag if the parameter is encoded per RFC 2231 and |
michael@0 | 41 | * includes lang. |
michael@0 | 42 | * |
michael@0 | 43 | * <p> |
michael@0 | 44 | * Note that GetParameterHTTP skips some of the workarounds used for |
michael@0 | 45 | * mail (MIME) header fields, and thus SHOULD be used from non-mail |
michael@0 | 46 | * code. |
michael@0 | 47 | * |
michael@0 | 48 | * |
michael@0 | 49 | * @param aHeaderVal a header string to get the value of a parameter |
michael@0 | 50 | * from. |
michael@0 | 51 | * @param aParamName the name of a MIME header parameter (e.g. |
michael@0 | 52 | * filename, name, charset). If empty, returns |
michael@0 | 53 | * the first (possibly) _unnamed_ 'parameter'. |
michael@0 | 54 | * @param aFallbackCharset fallback charset to try if the string after |
michael@0 | 55 | * RFC 2231/2047 decoding or the raw 8bit |
michael@0 | 56 | * string is not UTF-8 |
michael@0 | 57 | * @param aTryLocaleCharset If set, makes yet another attempt |
michael@0 | 58 | * with the locale charset. |
michael@0 | 59 | * @param aLang If non-null, assigns it to a pointer |
michael@0 | 60 | * to a string containing the value of language |
michael@0 | 61 | * obtained from RFC 2231 parsing. Caller has to |
michael@0 | 62 | * nsMemory::Free it. |
michael@0 | 63 | * @return the value of <code>aParamName</code> in Unichar(UTF-16). |
michael@0 | 64 | */ |
michael@0 | 65 | AString getParameter(in ACString aHeaderVal, |
michael@0 | 66 | in string aParamName, |
michael@0 | 67 | in ACString aFallbackCharset, |
michael@0 | 68 | in boolean aTryLocaleCharset, |
michael@0 | 69 | out string aLang); |
michael@0 | 70 | |
michael@0 | 71 | |
michael@0 | 72 | /** |
michael@0 | 73 | * Like getParameter, but disabling encodings and workarounds specific to |
michael@0 | 74 | * MIME (as opposed to HTTP). |
michael@0 | 75 | */ |
michael@0 | 76 | AString getParameterHTTP(in ACString aHeaderVal, |
michael@0 | 77 | in string aParamName, |
michael@0 | 78 | in ACString aFallbackCharset, |
michael@0 | 79 | in boolean aTryLocaleCharset, |
michael@0 | 80 | out string aLang); |
michael@0 | 81 | |
michael@0 | 82 | /** |
michael@0 | 83 | * Given the value of a header field parameter using the encoding |
michael@0 | 84 | * defined in RFC 5987, decode the value into a Unicode string, and extract |
michael@0 | 85 | * the optional language parameter. |
michael@0 | 86 | * |
michael@0 | 87 | * <p> |
michael@0 | 88 | * This function is purposefully picky; it will abort for all (most?) |
michael@0 | 89 | * invalid inputs. This is by design. In particular, it does not support |
michael@0 | 90 | * any character encodings other than UTF-8, in order not to promote |
michael@0 | 91 | * non-interoperable usage. |
michael@0 | 92 | * |
michael@0 | 93 | * <p> |
michael@0 | 94 | * Code that parses HTTP header fields (as opposed to MIME header fields) |
michael@0 | 95 | * should use this function. |
michael@0 | 96 | * |
michael@0 | 97 | * @param aParamVal a header field parameter to decode. |
michael@0 | 98 | * @param aLang will be set to the language part (possibly |
michael@0 | 99 | * empty). |
michael@0 | 100 | * @return the decoded parameter value. |
michael@0 | 101 | */ |
michael@0 | 102 | AString decodeRFC5987Param(in ACString aParamVal, |
michael@0 | 103 | out ACString aLang); |
michael@0 | 104 | |
michael@0 | 105 | /** |
michael@0 | 106 | * Given the value of a single header field (such as |
michael@0 | 107 | * Content-Disposition and Content-Type) and the name of a parameter |
michael@0 | 108 | * (e.g. filename, name, charset), returns the value of the parameter |
michael@0 | 109 | * after decoding RFC 2231-style encoding. |
michael@0 | 110 | * <p> |
michael@0 | 111 | * For <strong>internal use only</strong>. The only other place where |
michael@0 | 112 | * this needs to be invoked is |MimeHeaders_get_parameter| in |
michael@0 | 113 | * mailnews/mime/src/mimehdrs.cpp defined as |
michael@0 | 114 | * char * MimeHeaders_get_parameter (const char *header_value, |
michael@0 | 115 | * const char *parm_name, |
michael@0 | 116 | * char **charset, char **language) |
michael@0 | 117 | * |
michael@0 | 118 | * Otherwise, this method would have been made static. |
michael@0 | 119 | * |
michael@0 | 120 | * @param aHeaderVal a header string to get the value of a parameter from. |
michael@0 | 121 | * @param aParamName the name of a MIME header parameter (e.g. |
michael@0 | 122 | * filename, name, charset). If empty, returns |
michael@0 | 123 | * the first (possibly) _unnamed_ 'parameter'. |
michael@0 | 124 | * @param aCharset If non-null, it gets assigned a new pointer |
michael@0 | 125 | * to a string containing the value of charset obtained |
michael@0 | 126 | * from RFC 2231 parsing. Caller has to nsMemory::Free it. |
michael@0 | 127 | * @param aLang If non-null, it gets assigned a new pointer |
michael@0 | 128 | * to a string containing the value of language obtained |
michael@0 | 129 | * from RFC 2231 parsing. Caller has to nsMemory::Free it. |
michael@0 | 130 | * @return the value of <code>aParamName</code> after |
michael@0 | 131 | * RFC 2231 decoding but without charset conversion. |
michael@0 | 132 | */ |
michael@0 | 133 | |
michael@0 | 134 | [noscript] |
michael@0 | 135 | string getParameterInternal(in string aHeaderVal, |
michael@0 | 136 | in string aParamName, |
michael@0 | 137 | out string aCharset, |
michael@0 | 138 | out string aLang); |
michael@0 | 139 | |
michael@0 | 140 | |
michael@0 | 141 | /** |
michael@0 | 142 | * Given a header value, decodes RFC 2047-style encoding and |
michael@0 | 143 | * returns the decoded header value in UTF-8 if either it's |
michael@0 | 144 | * RFC-2047-encoded or aDefaultCharset is given. Otherwise, |
michael@0 | 145 | * returns the input header value (in whatever encoding) |
michael@0 | 146 | * as it is except that RFC 822 (using backslash) quotation and |
michael@0 | 147 | * CRLF (if aEatContinuation is set) are stripped away |
michael@0 | 148 | * <p> |
michael@0 | 149 | * For internal use only. The only other place where this needs to be |
michael@0 | 150 | * invoked is <code>MIME_DecodeMimeHeader</code> in |
michael@0 | 151 | * mailnews/mime/src/mimehdrs.cpp defined as |
michael@0 | 152 | * char * Mime_DecodeMimeHeader(char *header_val, const char *charset, |
michael@0 | 153 | * bool override, bool eatcontinuation) |
michael@0 | 154 | * |
michael@0 | 155 | * @param aHeaderVal a header value to decode |
michael@0 | 156 | * @param aDefaultCharset MIME charset to use in place of MIME charset |
michael@0 | 157 | * specified in RFC 2047 style encoding |
michael@0 | 158 | * when <code>aOverrideCharset</code> is set. |
michael@0 | 159 | * @param aOverrideCharset When set, overrides MIME charset specified |
michael@0 | 160 | * in RFC 2047 style encoding with <code>aDefaultCharset</code> |
michael@0 | 161 | * @param aEatContinuation When set, removes CR/LF |
michael@0 | 162 | * @return decoded header value |
michael@0 | 163 | */ |
michael@0 | 164 | [noscript] |
michael@0 | 165 | ACString decodeRFC2047Header(in string aHeaderVal, |
michael@0 | 166 | in string aDefaultCharset, |
michael@0 | 167 | in boolean aOverrideCharset, |
michael@0 | 168 | in boolean aEatContinuation); |
michael@0 | 169 | |
michael@0 | 170 | |
michael@0 | 171 | /** |
michael@0 | 172 | * Given a header parameter, decodes RFC 2047 style encoding (if it's |
michael@0 | 173 | * not obtained from RFC 2231 encoding), converts it to |
michael@0 | 174 | * UTF-8 and returns the result in UTF-8 if an attempt to extract |
michael@0 | 175 | * charset info. from a few different sources succeeds. |
michael@0 | 176 | * Otherwise, returns the input header value (in whatever encoding) |
michael@0 | 177 | * as it is except that RFC 822 (using backslash) quotation is |
michael@0 | 178 | * stripped off. |
michael@0 | 179 | * <p> |
michael@0 | 180 | * For internal use only. The only other place where this needs to be |
michael@0 | 181 | * invoked is <code>mime_decode_filename</code> in |
michael@0 | 182 | * mailnews/mime/src/mimehdrs.cpp defined as |
michael@0 | 183 | * char * mime_decode_filename(char *name, const char *charset, |
michael@0 | 184 | * MimeDisplayOptions *opt) |
michael@0 | 185 | * |
michael@0 | 186 | * @param aParamValue the value of a parameter to decode and convert |
michael@0 | 187 | * @param aCharset charset obtained from RFC 2231 decoding in which |
michael@0 | 188 | * <code>aParamValue</code> is encoded. If null, |
michael@0 | 189 | * indicates that it needs to try RFC 2047, instead. |
michael@0 | 190 | * @param aDefaultCharset MIME charset to use when aCharset is null and |
michael@0 | 191 | * cannot be obtained per RFC 2047 (most likely |
michael@0 | 192 | * because 'bare' string is used.) Besides, it |
michael@0 | 193 | * overrides aCharset/MIME charset obtained from |
michael@0 | 194 | * RFC 2047 if <code>aOverrideCharset</code> is set. |
michael@0 | 195 | * @param aOverrideCharset When set, overrides MIME charset specified |
michael@0 | 196 | * in RFC 2047 style encoding with |
michael@0 | 197 | * <code>aDefaultCharset</code> |
michael@0 | 198 | * @return decoded parameter |
michael@0 | 199 | */ |
michael@0 | 200 | |
michael@0 | 201 | [noscript] |
michael@0 | 202 | ACString decodeParameter(in ACString aParamValue, |
michael@0 | 203 | in string aCharset, |
michael@0 | 204 | in string aDefaultCharset, |
michael@0 | 205 | in boolean aOverrideCharset); |
michael@0 | 206 | }; |
michael@0 | 207 |