Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 4 | |
michael@0 | 5 | #include "xpcom-private.h" |
michael@0 | 6 | |
michael@0 | 7 | //----------------------------------------------------------------------------- |
michael@0 | 8 | // XP_MACOSX or ANDROID |
michael@0 | 9 | //----------------------------------------------------------------------------- |
michael@0 | 10 | #if defined(XP_MACOSX) || defined(ANDROID) |
michael@0 | 11 | |
michael@0 | 12 | #include "nsAString.h" |
michael@0 | 13 | #include "nsReadableUtils.h" |
michael@0 | 14 | #include "nsString.h" |
michael@0 | 15 | |
michael@0 | 16 | nsresult |
michael@0 | 17 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) |
michael@0 | 18 | { |
michael@0 | 19 | CopyUTF8toUTF16(input, output); |
michael@0 | 20 | return NS_OK; |
michael@0 | 21 | } |
michael@0 | 22 | |
michael@0 | 23 | nsresult |
michael@0 | 24 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) |
michael@0 | 25 | { |
michael@0 | 26 | CopyUTF16toUTF8(input, output); |
michael@0 | 27 | return NS_OK; |
michael@0 | 28 | } |
michael@0 | 29 | |
michael@0 | 30 | void |
michael@0 | 31 | NS_StartupNativeCharsetUtils() |
michael@0 | 32 | { |
michael@0 | 33 | } |
michael@0 | 34 | |
michael@0 | 35 | void |
michael@0 | 36 | NS_ShutdownNativeCharsetUtils() |
michael@0 | 37 | { |
michael@0 | 38 | } |
michael@0 | 39 | |
michael@0 | 40 | |
michael@0 | 41 | //----------------------------------------------------------------------------- |
michael@0 | 42 | // XP_UNIX |
michael@0 | 43 | //----------------------------------------------------------------------------- |
michael@0 | 44 | #elif defined(XP_UNIX) |
michael@0 | 45 | |
michael@0 | 46 | #include <stdlib.h> // mbtowc, wctomb |
michael@0 | 47 | #include <locale.h> // setlocale |
michael@0 | 48 | #include "mozilla/Mutex.h" |
michael@0 | 49 | #include "nscore.h" |
michael@0 | 50 | #include "nsAString.h" |
michael@0 | 51 | #include "nsReadableUtils.h" |
michael@0 | 52 | |
michael@0 | 53 | using namespace mozilla; |
michael@0 | 54 | |
michael@0 | 55 | // |
michael@0 | 56 | // choose a conversion library. we used to use mbrtowc/wcrtomb under Linux, |
michael@0 | 57 | // but that doesn't work for non-BMP characters whether we use '-fshort-wchar' |
michael@0 | 58 | // or not (see bug 206811 and |
michael@0 | 59 | // news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use |
michael@0 | 60 | // iconv for all platforms where nltypes.h and nllanginfo.h are present |
michael@0 | 61 | // along with iconv. |
michael@0 | 62 | // |
michael@0 | 63 | #if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET) |
michael@0 | 64 | #define USE_ICONV 1 |
michael@0 | 65 | #else |
michael@0 | 66 | #define USE_STDCONV 1 |
michael@0 | 67 | #endif |
michael@0 | 68 | |
michael@0 | 69 | static void |
michael@0 | 70 | isolatin1_to_utf16(const char **input, uint32_t *inputLeft, char16_t **output, uint32_t *outputLeft) |
michael@0 | 71 | { |
michael@0 | 72 | while (*inputLeft && *outputLeft) { |
michael@0 | 73 | **output = (unsigned char) **input; |
michael@0 | 74 | (*input)++; |
michael@0 | 75 | (*inputLeft)--; |
michael@0 | 76 | (*output)++; |
michael@0 | 77 | (*outputLeft)--; |
michael@0 | 78 | } |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | static void |
michael@0 | 82 | utf16_to_isolatin1(const char16_t **input, uint32_t *inputLeft, char **output, uint32_t *outputLeft) |
michael@0 | 83 | { |
michael@0 | 84 | while (*inputLeft && *outputLeft) { |
michael@0 | 85 | **output = (unsigned char) **input; |
michael@0 | 86 | (*input)++; |
michael@0 | 87 | (*inputLeft)--; |
michael@0 | 88 | (*output)++; |
michael@0 | 89 | (*outputLeft)--; |
michael@0 | 90 | } |
michael@0 | 91 | } |
michael@0 | 92 | |
michael@0 | 93 | //----------------------------------------------------------------------------- |
michael@0 | 94 | // conversion using iconv |
michael@0 | 95 | //----------------------------------------------------------------------------- |
michael@0 | 96 | #if defined(USE_ICONV) |
michael@0 | 97 | #include <nl_types.h> // CODESET |
michael@0 | 98 | #include <langinfo.h> // nl_langinfo |
michael@0 | 99 | #include <iconv.h> // iconv_open, iconv, iconv_close |
michael@0 | 100 | #include <errno.h> |
michael@0 | 101 | #include "plstr.h" |
michael@0 | 102 | |
michael@0 | 103 | #if defined(HAVE_ICONV_WITH_CONST_INPUT) |
michael@0 | 104 | #define ICONV_INPUT(x) (x) |
michael@0 | 105 | #else |
michael@0 | 106 | #define ICONV_INPUT(x) ((char **)x) |
michael@0 | 107 | #endif |
michael@0 | 108 | |
michael@0 | 109 | // solaris definitely needs this, but we'll enable it by default |
michael@0 | 110 | // just in case... but we know for sure that iconv(3) in glibc |
michael@0 | 111 | // doesn't need this. |
michael@0 | 112 | #if !defined(__GLIBC__) |
michael@0 | 113 | #define ENABLE_UTF8_FALLBACK_SUPPORT |
michael@0 | 114 | #endif |
michael@0 | 115 | |
michael@0 | 116 | #define INVALID_ICONV_T ((iconv_t) -1) |
michael@0 | 117 | |
michael@0 | 118 | static inline size_t |
michael@0 | 119 | xp_iconv(iconv_t converter, |
michael@0 | 120 | const char **input, |
michael@0 | 121 | size_t *inputLeft, |
michael@0 | 122 | char **output, |
michael@0 | 123 | size_t *outputLeft) |
michael@0 | 124 | { |
michael@0 | 125 | size_t res, outputAvail = outputLeft ? *outputLeft : 0; |
michael@0 | 126 | res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft); |
michael@0 | 127 | if (res == (size_t) -1) { |
michael@0 | 128 | // on some platforms (e.g., linux) iconv will fail with |
michael@0 | 129 | // E2BIG if it cannot convert _all_ of its input. it'll |
michael@0 | 130 | // still adjust all of the in/out params correctly, so we |
michael@0 | 131 | // can ignore this error. the assumption is that we will |
michael@0 | 132 | // be called again to complete the conversion. |
michael@0 | 133 | if ((errno == E2BIG) && (*outputLeft < outputAvail)) |
michael@0 | 134 | res = 0; |
michael@0 | 135 | } |
michael@0 | 136 | return res; |
michael@0 | 137 | } |
michael@0 | 138 | |
michael@0 | 139 | static inline void |
michael@0 | 140 | xp_iconv_reset(iconv_t converter) |
michael@0 | 141 | { |
michael@0 | 142 | // NOTE: the man pages on Solaris claim that you can pass nullptr |
michael@0 | 143 | // for all parameter to reset the converter, but beware the |
michael@0 | 144 | // evil Solaris crash if you go down this route >:-) |
michael@0 | 145 | |
michael@0 | 146 | const char *zero_char_in_ptr = nullptr; |
michael@0 | 147 | char *zero_char_out_ptr = nullptr; |
michael@0 | 148 | size_t zero_size_in = 0, |
michael@0 | 149 | zero_size_out = 0; |
michael@0 | 150 | |
michael@0 | 151 | xp_iconv(converter, &zero_char_in_ptr, |
michael@0 | 152 | &zero_size_in, |
michael@0 | 153 | &zero_char_out_ptr, |
michael@0 | 154 | &zero_size_out); |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | static inline iconv_t |
michael@0 | 158 | xp_iconv_open(const char **to_list, const char **from_list) |
michael@0 | 159 | { |
michael@0 | 160 | iconv_t res; |
michael@0 | 161 | const char **from_name; |
michael@0 | 162 | const char **to_name; |
michael@0 | 163 | |
michael@0 | 164 | // try all possible combinations to locate a converter. |
michael@0 | 165 | to_name = to_list; |
michael@0 | 166 | while (*to_name) { |
michael@0 | 167 | if (**to_name) { |
michael@0 | 168 | from_name = from_list; |
michael@0 | 169 | while (*from_name) { |
michael@0 | 170 | if (**from_name) { |
michael@0 | 171 | res = iconv_open(*to_name, *from_name); |
michael@0 | 172 | if (res != INVALID_ICONV_T) |
michael@0 | 173 | return res; |
michael@0 | 174 | } |
michael@0 | 175 | from_name++; |
michael@0 | 176 | } |
michael@0 | 177 | } |
michael@0 | 178 | to_name++; |
michael@0 | 179 | } |
michael@0 | 180 | |
michael@0 | 181 | return INVALID_ICONV_T; |
michael@0 | 182 | } |
michael@0 | 183 | |
michael@0 | 184 | /* |
michael@0 | 185 | * char16_t[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we |
michael@0 | 186 | * have to use UTF-16 with iconv(3) on platforms where it's supported. |
michael@0 | 187 | * However, the way UTF-16 and UCS-2 are interpreted varies across platforms |
michael@0 | 188 | * and implementations of iconv(3). On Tru64, it also depends on the environment |
michael@0 | 189 | * variable. To avoid the trouble arising from byte-swapping |
michael@0 | 190 | * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling |
michael@0 | 191 | * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2 |
michael@0 | 192 | * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness, |
michael@0 | 193 | * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE' |
michael@0 | 194 | * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment |
michael@0 | 195 | * variable ICONV_BYTEORDER is set to 'big-endian', about which not much |
michael@0 | 196 | * can be done other than adding a note in the release notes. (bug 206811) |
michael@0 | 197 | */ |
michael@0 | 198 | static const char *UTF_16_NAMES[] = { |
michael@0 | 199 | #if defined(IS_LITTLE_ENDIAN) |
michael@0 | 200 | "UTF-16LE", |
michael@0 | 201 | #if defined(__GLIBC__) |
michael@0 | 202 | "UNICODELITTLE", |
michael@0 | 203 | #endif |
michael@0 | 204 | "UCS-2LE", |
michael@0 | 205 | #else |
michael@0 | 206 | "UTF-16BE", |
michael@0 | 207 | #if defined(__GLIBC__) |
michael@0 | 208 | "UNICODEBIG", |
michael@0 | 209 | #endif |
michael@0 | 210 | "UCS-2BE", |
michael@0 | 211 | #endif |
michael@0 | 212 | "UTF-16", |
michael@0 | 213 | "UCS-2", |
michael@0 | 214 | "UCS2", |
michael@0 | 215 | "UCS_2", |
michael@0 | 216 | "ucs-2", |
michael@0 | 217 | "ucs2", |
michael@0 | 218 | "ucs_2", |
michael@0 | 219 | nullptr |
michael@0 | 220 | }; |
michael@0 | 221 | |
michael@0 | 222 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 223 | static const char *UTF_8_NAMES[] = { |
michael@0 | 224 | "UTF-8", |
michael@0 | 225 | "UTF8", |
michael@0 | 226 | "UTF_8", |
michael@0 | 227 | "utf-8", |
michael@0 | 228 | "utf8", |
michael@0 | 229 | "utf_8", |
michael@0 | 230 | nullptr |
michael@0 | 231 | }; |
michael@0 | 232 | #endif |
michael@0 | 233 | |
michael@0 | 234 | static const char *ISO_8859_1_NAMES[] = { |
michael@0 | 235 | "ISO-8859-1", |
michael@0 | 236 | #if !defined(__GLIBC__) |
michael@0 | 237 | "ISO8859-1", |
michael@0 | 238 | "ISO88591", |
michael@0 | 239 | "ISO_8859_1", |
michael@0 | 240 | "ISO8859_1", |
michael@0 | 241 | "iso-8859-1", |
michael@0 | 242 | "iso8859-1", |
michael@0 | 243 | "iso88591", |
michael@0 | 244 | "iso_8859_1", |
michael@0 | 245 | "iso8859_1", |
michael@0 | 246 | #endif |
michael@0 | 247 | nullptr |
michael@0 | 248 | }; |
michael@0 | 249 | |
michael@0 | 250 | class nsNativeCharsetConverter |
michael@0 | 251 | { |
michael@0 | 252 | public: |
michael@0 | 253 | nsNativeCharsetConverter(); |
michael@0 | 254 | ~nsNativeCharsetConverter(); |
michael@0 | 255 | |
michael@0 | 256 | nsresult NativeToUnicode(const char **input , uint32_t *inputLeft, |
michael@0 | 257 | char16_t **output, uint32_t *outputLeft); |
michael@0 | 258 | nsresult UnicodeToNative(const char16_t **input , uint32_t *inputLeft, |
michael@0 | 259 | char **output, uint32_t *outputLeft); |
michael@0 | 260 | |
michael@0 | 261 | static void GlobalInit(); |
michael@0 | 262 | static void GlobalShutdown(); |
michael@0 | 263 | static bool IsNativeUTF8(); |
michael@0 | 264 | |
michael@0 | 265 | private: |
michael@0 | 266 | static iconv_t gNativeToUnicode; |
michael@0 | 267 | static iconv_t gUnicodeToNative; |
michael@0 | 268 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 269 | static iconv_t gNativeToUTF8; |
michael@0 | 270 | static iconv_t gUTF8ToNative; |
michael@0 | 271 | static iconv_t gUnicodeToUTF8; |
michael@0 | 272 | static iconv_t gUTF8ToUnicode; |
michael@0 | 273 | #endif |
michael@0 | 274 | static Mutex *gLock; |
michael@0 | 275 | static bool gInitialized; |
michael@0 | 276 | static bool gIsNativeUTF8; |
michael@0 | 277 | |
michael@0 | 278 | static void LazyInit(); |
michael@0 | 279 | |
michael@0 | 280 | static void Lock() { if (gLock) gLock->Lock(); } |
michael@0 | 281 | static void Unlock() { if (gLock) gLock->Unlock(); } |
michael@0 | 282 | }; |
michael@0 | 283 | |
michael@0 | 284 | iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T; |
michael@0 | 285 | iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T; |
michael@0 | 286 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 287 | iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T; |
michael@0 | 288 | iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T; |
michael@0 | 289 | iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T; |
michael@0 | 290 | iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T; |
michael@0 | 291 | #endif |
michael@0 | 292 | Mutex *nsNativeCharsetConverter::gLock = nullptr; |
michael@0 | 293 | bool nsNativeCharsetConverter::gInitialized = false; |
michael@0 | 294 | bool nsNativeCharsetConverter::gIsNativeUTF8 = false; |
michael@0 | 295 | |
michael@0 | 296 | void |
michael@0 | 297 | nsNativeCharsetConverter::LazyInit() |
michael@0 | 298 | { |
michael@0 | 299 | // LazyInit may be called before NS_StartupNativeCharsetUtils, but |
michael@0 | 300 | // the setlocale it does has to be called before nl_langinfo. Like in |
michael@0 | 301 | // NS_StartupNativeCharsetUtils, assume we are called early enough that |
michael@0 | 302 | // we are the first to care about the locale's charset. |
michael@0 | 303 | if (!gLock) |
michael@0 | 304 | setlocale(LC_CTYPE, ""); |
michael@0 | 305 | const char *blank_list[] = { "", nullptr }; |
michael@0 | 306 | const char **native_charset_list = blank_list; |
michael@0 | 307 | const char *native_charset = nl_langinfo(CODESET); |
michael@0 | 308 | if (native_charset == nullptr) { |
michael@0 | 309 | NS_ERROR("native charset is unknown"); |
michael@0 | 310 | // fallback to ISO-8859-1 |
michael@0 | 311 | native_charset_list = ISO_8859_1_NAMES; |
michael@0 | 312 | } |
michael@0 | 313 | else |
michael@0 | 314 | native_charset_list[0] = native_charset; |
michael@0 | 315 | |
michael@0 | 316 | // Most, if not all, Unixen supporting UTF-8 and nl_langinfo(CODESET) |
michael@0 | 317 | // return 'UTF-8' (or 'utf-8') |
michael@0 | 318 | if (!PL_strcasecmp(native_charset, "UTF-8")) |
michael@0 | 319 | gIsNativeUTF8 = true; |
michael@0 | 320 | |
michael@0 | 321 | gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list); |
michael@0 | 322 | gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES); |
michael@0 | 323 | |
michael@0 | 324 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 325 | if (gNativeToUnicode == INVALID_ICONV_T) { |
michael@0 | 326 | gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list); |
michael@0 | 327 | gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES); |
michael@0 | 328 | NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter"); |
michael@0 | 329 | NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter"); |
michael@0 | 330 | } |
michael@0 | 331 | if (gUnicodeToNative == INVALID_ICONV_T) { |
michael@0 | 332 | gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES); |
michael@0 | 333 | gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES); |
michael@0 | 334 | NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter"); |
michael@0 | 335 | NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter"); |
michael@0 | 336 | } |
michael@0 | 337 | #else |
michael@0 | 338 | NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter"); |
michael@0 | 339 | NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter"); |
michael@0 | 340 | #endif |
michael@0 | 341 | |
michael@0 | 342 | /* |
michael@0 | 343 | * On Solaris 8 (and newer?), the iconv modules converting to UCS-2 |
michael@0 | 344 | * prepend a byte order mark unicode character (BOM, u+FEFF) during |
michael@0 | 345 | * the first use of the iconv converter. The same is the case of |
michael@0 | 346 | * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used. |
michael@0 | 347 | * However, we use 'UTF-16LE/BE' in both cases, instead so that we |
michael@0 | 348 | * should be safe. But just in case... |
michael@0 | 349 | * |
michael@0 | 350 | * This dummy conversion gets rid of the BOMs and fixes bug 153562. |
michael@0 | 351 | */ |
michael@0 | 352 | char dummy_input[1] = { ' ' }; |
michael@0 | 353 | char dummy_output[4]; |
michael@0 | 354 | |
michael@0 | 355 | if (gNativeToUnicode != INVALID_ICONV_T) { |
michael@0 | 356 | const char *input = dummy_input; |
michael@0 | 357 | size_t input_left = sizeof(dummy_input); |
michael@0 | 358 | char *output = dummy_output; |
michael@0 | 359 | size_t output_left = sizeof(dummy_output); |
michael@0 | 360 | |
michael@0 | 361 | xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left); |
michael@0 | 362 | } |
michael@0 | 363 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 364 | if (gUTF8ToUnicode != INVALID_ICONV_T) { |
michael@0 | 365 | const char *input = dummy_input; |
michael@0 | 366 | size_t input_left = sizeof(dummy_input); |
michael@0 | 367 | char *output = dummy_output; |
michael@0 | 368 | size_t output_left = sizeof(dummy_output); |
michael@0 | 369 | |
michael@0 | 370 | xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left); |
michael@0 | 371 | } |
michael@0 | 372 | #endif |
michael@0 | 373 | |
michael@0 | 374 | gInitialized = true; |
michael@0 | 375 | } |
michael@0 | 376 | |
michael@0 | 377 | void |
michael@0 | 378 | nsNativeCharsetConverter::GlobalInit() |
michael@0 | 379 | { |
michael@0 | 380 | gLock = new Mutex("nsNativeCharsetConverter.gLock"); |
michael@0 | 381 | } |
michael@0 | 382 | |
michael@0 | 383 | void |
michael@0 | 384 | nsNativeCharsetConverter::GlobalShutdown() |
michael@0 | 385 | { |
michael@0 | 386 | if (gLock) { |
michael@0 | 387 | delete gLock; |
michael@0 | 388 | gLock = nullptr; |
michael@0 | 389 | } |
michael@0 | 390 | |
michael@0 | 391 | if (gNativeToUnicode != INVALID_ICONV_T) { |
michael@0 | 392 | iconv_close(gNativeToUnicode); |
michael@0 | 393 | gNativeToUnicode = INVALID_ICONV_T; |
michael@0 | 394 | } |
michael@0 | 395 | |
michael@0 | 396 | if (gUnicodeToNative != INVALID_ICONV_T) { |
michael@0 | 397 | iconv_close(gUnicodeToNative); |
michael@0 | 398 | gUnicodeToNative = INVALID_ICONV_T; |
michael@0 | 399 | } |
michael@0 | 400 | |
michael@0 | 401 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 402 | if (gNativeToUTF8 != INVALID_ICONV_T) { |
michael@0 | 403 | iconv_close(gNativeToUTF8); |
michael@0 | 404 | gNativeToUTF8 = INVALID_ICONV_T; |
michael@0 | 405 | } |
michael@0 | 406 | if (gUTF8ToNative != INVALID_ICONV_T) { |
michael@0 | 407 | iconv_close(gUTF8ToNative); |
michael@0 | 408 | gUTF8ToNative = INVALID_ICONV_T; |
michael@0 | 409 | } |
michael@0 | 410 | if (gUnicodeToUTF8 != INVALID_ICONV_T) { |
michael@0 | 411 | iconv_close(gUnicodeToUTF8); |
michael@0 | 412 | gUnicodeToUTF8 = INVALID_ICONV_T; |
michael@0 | 413 | } |
michael@0 | 414 | if (gUTF8ToUnicode != INVALID_ICONV_T) { |
michael@0 | 415 | iconv_close(gUTF8ToUnicode); |
michael@0 | 416 | gUTF8ToUnicode = INVALID_ICONV_T; |
michael@0 | 417 | } |
michael@0 | 418 | #endif |
michael@0 | 419 | |
michael@0 | 420 | gInitialized = false; |
michael@0 | 421 | } |
michael@0 | 422 | |
michael@0 | 423 | nsNativeCharsetConverter::nsNativeCharsetConverter() |
michael@0 | 424 | { |
michael@0 | 425 | Lock(); |
michael@0 | 426 | if (!gInitialized) |
michael@0 | 427 | LazyInit(); |
michael@0 | 428 | } |
michael@0 | 429 | |
michael@0 | 430 | nsNativeCharsetConverter::~nsNativeCharsetConverter() |
michael@0 | 431 | { |
michael@0 | 432 | // reset converters for next time |
michael@0 | 433 | if (gNativeToUnicode != INVALID_ICONV_T) |
michael@0 | 434 | xp_iconv_reset(gNativeToUnicode); |
michael@0 | 435 | if (gUnicodeToNative != INVALID_ICONV_T) |
michael@0 | 436 | xp_iconv_reset(gUnicodeToNative); |
michael@0 | 437 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 438 | if (gNativeToUTF8 != INVALID_ICONV_T) |
michael@0 | 439 | xp_iconv_reset(gNativeToUTF8); |
michael@0 | 440 | if (gUTF8ToNative != INVALID_ICONV_T) |
michael@0 | 441 | xp_iconv_reset(gUTF8ToNative); |
michael@0 | 442 | if (gUnicodeToUTF8 != INVALID_ICONV_T) |
michael@0 | 443 | xp_iconv_reset(gUnicodeToUTF8); |
michael@0 | 444 | if (gUTF8ToUnicode != INVALID_ICONV_T) |
michael@0 | 445 | xp_iconv_reset(gUTF8ToUnicode); |
michael@0 | 446 | #endif |
michael@0 | 447 | Unlock(); |
michael@0 | 448 | } |
michael@0 | 449 | |
michael@0 | 450 | nsresult |
michael@0 | 451 | nsNativeCharsetConverter::NativeToUnicode(const char **input, |
michael@0 | 452 | uint32_t *inputLeft, |
michael@0 | 453 | char16_t **output, |
michael@0 | 454 | uint32_t *outputLeft) |
michael@0 | 455 | { |
michael@0 | 456 | size_t res = 0; |
michael@0 | 457 | size_t inLeft = (size_t) *inputLeft; |
michael@0 | 458 | size_t outLeft = (size_t) *outputLeft * 2; |
michael@0 | 459 | |
michael@0 | 460 | if (gNativeToUnicode != INVALID_ICONV_T) { |
michael@0 | 461 | |
michael@0 | 462 | res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft); |
michael@0 | 463 | |
michael@0 | 464 | *inputLeft = inLeft; |
michael@0 | 465 | *outputLeft = outLeft / 2; |
michael@0 | 466 | if (res != (size_t) -1) |
michael@0 | 467 | return NS_OK; |
michael@0 | 468 | |
michael@0 | 469 | NS_WARNING("conversion from native to utf-16 failed"); |
michael@0 | 470 | |
michael@0 | 471 | // reset converter |
michael@0 | 472 | xp_iconv_reset(gNativeToUnicode); |
michael@0 | 473 | } |
michael@0 | 474 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 475 | else if ((gNativeToUTF8 != INVALID_ICONV_T) && |
michael@0 | 476 | (gUTF8ToUnicode != INVALID_ICONV_T)) { |
michael@0 | 477 | // convert first to UTF8, then from UTF8 to UCS2 |
michael@0 | 478 | const char *in = *input; |
michael@0 | 479 | |
michael@0 | 480 | char ubuf[1024]; |
michael@0 | 481 | |
michael@0 | 482 | // we assume we're always called with enough space in |output|, |
michael@0 | 483 | // so convert many chars at a time... |
michael@0 | 484 | while (inLeft) { |
michael@0 | 485 | char *p = ubuf; |
michael@0 | 486 | size_t n = sizeof(ubuf); |
michael@0 | 487 | res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n); |
michael@0 | 488 | if (res == (size_t) -1) { |
michael@0 | 489 | NS_ERROR("conversion from native to utf-8 failed"); |
michael@0 | 490 | break; |
michael@0 | 491 | } |
michael@0 | 492 | NS_ASSERTION(outLeft > 0, "bad assumption"); |
michael@0 | 493 | p = ubuf; |
michael@0 | 494 | n = sizeof(ubuf) - n; |
michael@0 | 495 | res = xp_iconv(gUTF8ToUnicode, (const char **) &p, &n, (char **) output, &outLeft); |
michael@0 | 496 | if (res == (size_t) -1) { |
michael@0 | 497 | NS_ERROR("conversion from utf-8 to utf-16 failed"); |
michael@0 | 498 | break; |
michael@0 | 499 | } |
michael@0 | 500 | } |
michael@0 | 501 | |
michael@0 | 502 | (*input) += (*inputLeft - inLeft); |
michael@0 | 503 | *inputLeft = inLeft; |
michael@0 | 504 | *outputLeft = outLeft / 2; |
michael@0 | 505 | |
michael@0 | 506 | if (res != (size_t) -1) |
michael@0 | 507 | return NS_OK; |
michael@0 | 508 | |
michael@0 | 509 | // reset converters |
michael@0 | 510 | xp_iconv_reset(gNativeToUTF8); |
michael@0 | 511 | xp_iconv_reset(gUTF8ToUnicode); |
michael@0 | 512 | } |
michael@0 | 513 | #endif |
michael@0 | 514 | |
michael@0 | 515 | // fallback: zero-pad and hope for the best |
michael@0 | 516 | // XXX This is lame and we have to do better. |
michael@0 | 517 | isolatin1_to_utf16(input, inputLeft, output, outputLeft); |
michael@0 | 518 | |
michael@0 | 519 | return NS_OK; |
michael@0 | 520 | } |
michael@0 | 521 | |
michael@0 | 522 | nsresult |
michael@0 | 523 | nsNativeCharsetConverter::UnicodeToNative(const char16_t **input, |
michael@0 | 524 | uint32_t *inputLeft, |
michael@0 | 525 | char **output, |
michael@0 | 526 | uint32_t *outputLeft) |
michael@0 | 527 | { |
michael@0 | 528 | size_t res = 0; |
michael@0 | 529 | size_t inLeft = (size_t) *inputLeft * 2; |
michael@0 | 530 | size_t outLeft = (size_t) *outputLeft; |
michael@0 | 531 | |
michael@0 | 532 | if (gUnicodeToNative != INVALID_ICONV_T) { |
michael@0 | 533 | res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft); |
michael@0 | 534 | |
michael@0 | 535 | *inputLeft = inLeft / 2; |
michael@0 | 536 | *outputLeft = outLeft; |
michael@0 | 537 | if (res != (size_t) -1) { |
michael@0 | 538 | return NS_OK; |
michael@0 | 539 | } |
michael@0 | 540 | |
michael@0 | 541 | NS_ERROR("iconv failed"); |
michael@0 | 542 | |
michael@0 | 543 | // reset converter |
michael@0 | 544 | xp_iconv_reset(gUnicodeToNative); |
michael@0 | 545 | } |
michael@0 | 546 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT) |
michael@0 | 547 | else if ((gUnicodeToUTF8 != INVALID_ICONV_T) && |
michael@0 | 548 | (gUTF8ToNative != INVALID_ICONV_T)) { |
michael@0 | 549 | const char *in = (const char *) *input; |
michael@0 | 550 | |
michael@0 | 551 | char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes) |
michael@0 | 552 | |
michael@0 | 553 | // convert one uchar at a time... |
michael@0 | 554 | while (inLeft && outLeft) { |
michael@0 | 555 | char *p = ubuf; |
michael@0 | 556 | size_t n = sizeof(ubuf), one_uchar = sizeof(char16_t); |
michael@0 | 557 | res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n); |
michael@0 | 558 | if (res == (size_t) -1) { |
michael@0 | 559 | NS_ERROR("conversion from utf-16 to utf-8 failed"); |
michael@0 | 560 | break; |
michael@0 | 561 | } |
michael@0 | 562 | p = ubuf; |
michael@0 | 563 | n = sizeof(ubuf) - n; |
michael@0 | 564 | res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft); |
michael@0 | 565 | if (res == (size_t) -1) { |
michael@0 | 566 | if (errno == E2BIG) { |
michael@0 | 567 | // not enough room for last uchar... back up and return. |
michael@0 | 568 | in -= sizeof(char16_t); |
michael@0 | 569 | res = 0; |
michael@0 | 570 | } |
michael@0 | 571 | else |
michael@0 | 572 | NS_ERROR("conversion from utf-8 to native failed"); |
michael@0 | 573 | break; |
michael@0 | 574 | } |
michael@0 | 575 | inLeft -= sizeof(char16_t); |
michael@0 | 576 | } |
michael@0 | 577 | |
michael@0 | 578 | (*input) += (*inputLeft - inLeft / 2); |
michael@0 | 579 | *inputLeft = inLeft / 2; |
michael@0 | 580 | *outputLeft = outLeft; |
michael@0 | 581 | if (res != (size_t) -1) { |
michael@0 | 582 | return NS_OK; |
michael@0 | 583 | } |
michael@0 | 584 | |
michael@0 | 585 | // reset converters |
michael@0 | 586 | xp_iconv_reset(gUnicodeToUTF8); |
michael@0 | 587 | xp_iconv_reset(gUTF8ToNative); |
michael@0 | 588 | } |
michael@0 | 589 | #endif |
michael@0 | 590 | |
michael@0 | 591 | // fallback: truncate and hope for the best |
michael@0 | 592 | // XXX This is lame and we have to do better. |
michael@0 | 593 | utf16_to_isolatin1(input, inputLeft, output, outputLeft); |
michael@0 | 594 | |
michael@0 | 595 | return NS_OK; |
michael@0 | 596 | } |
michael@0 | 597 | |
michael@0 | 598 | bool |
michael@0 | 599 | nsNativeCharsetConverter::IsNativeUTF8() |
michael@0 | 600 | { |
michael@0 | 601 | if (!gInitialized) { |
michael@0 | 602 | Lock(); |
michael@0 | 603 | if (!gInitialized) |
michael@0 | 604 | LazyInit(); |
michael@0 | 605 | Unlock(); |
michael@0 | 606 | } |
michael@0 | 607 | return gIsNativeUTF8; |
michael@0 | 608 | } |
michael@0 | 609 | |
michael@0 | 610 | #endif // USE_ICONV |
michael@0 | 611 | |
michael@0 | 612 | //----------------------------------------------------------------------------- |
michael@0 | 613 | // conversion using mb[r]towc/wc[r]tomb |
michael@0 | 614 | //----------------------------------------------------------------------------- |
michael@0 | 615 | #if defined(USE_STDCONV) |
michael@0 | 616 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) |
michael@0 | 617 | #include <wchar.h> // mbrtowc, wcrtomb |
michael@0 | 618 | #endif |
michael@0 | 619 | |
michael@0 | 620 | class nsNativeCharsetConverter |
michael@0 | 621 | { |
michael@0 | 622 | public: |
michael@0 | 623 | nsNativeCharsetConverter(); |
michael@0 | 624 | |
michael@0 | 625 | nsresult NativeToUnicode(const char **input , uint32_t *inputLeft, |
michael@0 | 626 | char16_t **output, uint32_t *outputLeft); |
michael@0 | 627 | nsresult UnicodeToNative(const char16_t **input , uint32_t *inputLeft, |
michael@0 | 628 | char **output, uint32_t *outputLeft); |
michael@0 | 629 | |
michael@0 | 630 | static void GlobalInit(); |
michael@0 | 631 | static void GlobalShutdown() { } |
michael@0 | 632 | static bool IsNativeUTF8(); |
michael@0 | 633 | |
michael@0 | 634 | private: |
michael@0 | 635 | static bool gWCharIsUnicode; |
michael@0 | 636 | |
michael@0 | 637 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) |
michael@0 | 638 | mbstate_t ps; |
michael@0 | 639 | #endif |
michael@0 | 640 | }; |
michael@0 | 641 | |
michael@0 | 642 | bool nsNativeCharsetConverter::gWCharIsUnicode = false; |
michael@0 | 643 | |
michael@0 | 644 | nsNativeCharsetConverter::nsNativeCharsetConverter() |
michael@0 | 645 | { |
michael@0 | 646 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC) |
michael@0 | 647 | memset(&ps, 0, sizeof(ps)); |
michael@0 | 648 | #endif |
michael@0 | 649 | } |
michael@0 | 650 | |
michael@0 | 651 | void |
michael@0 | 652 | nsNativeCharsetConverter::GlobalInit() |
michael@0 | 653 | { |
michael@0 | 654 | // verify that wchar_t for the current locale is actually unicode. |
michael@0 | 655 | // if it is not, then we should avoid calling mbtowc/wctomb and |
michael@0 | 656 | // just fallback on zero-pad/truncation conversion. |
michael@0 | 657 | // |
michael@0 | 658 | // this test cannot be done at build time because the encoding of |
michael@0 | 659 | // wchar_t may depend on the runtime locale. sad, but true!! |
michael@0 | 660 | // |
michael@0 | 661 | // so, if wchar_t is unicode then converting an ASCII character |
michael@0 | 662 | // to wchar_t should not change its numeric value. we'll just |
michael@0 | 663 | // check what happens with the ASCII 'a' character. |
michael@0 | 664 | // |
michael@0 | 665 | // this test is not perfect... obviously, it could yield false |
michael@0 | 666 | // positives, but then at least ASCII text would be converted |
michael@0 | 667 | // properly (or maybe just the 'a' character) -- oh well :( |
michael@0 | 668 | |
michael@0 | 669 | char a = 'a'; |
michael@0 | 670 | unsigned int w = 0; |
michael@0 | 671 | |
michael@0 | 672 | int res = mbtowc((wchar_t *) &w, &a, 1); |
michael@0 | 673 | |
michael@0 | 674 | gWCharIsUnicode = (res != -1 && w == 'a'); |
michael@0 | 675 | |
michael@0 | 676 | #ifdef DEBUG |
michael@0 | 677 | if (!gWCharIsUnicode) |
michael@0 | 678 | NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)"); |
michael@0 | 679 | #endif |
michael@0 | 680 | } |
michael@0 | 681 | |
michael@0 | 682 | nsresult |
michael@0 | 683 | nsNativeCharsetConverter::NativeToUnicode(const char **input, |
michael@0 | 684 | uint32_t *inputLeft, |
michael@0 | 685 | char16_t **output, |
michael@0 | 686 | uint32_t *outputLeft) |
michael@0 | 687 | { |
michael@0 | 688 | if (gWCharIsUnicode) { |
michael@0 | 689 | int incr; |
michael@0 | 690 | |
michael@0 | 691 | // cannot use wchar_t here since it may have been redefined (e.g., |
michael@0 | 692 | // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP. |
michael@0 | 693 | unsigned int tmp = 0; |
michael@0 | 694 | while (*inputLeft && *outputLeft) { |
michael@0 | 695 | #ifdef HAVE_MBRTOWC |
michael@0 | 696 | incr = (int) mbrtowc((wchar_t *) &tmp, *input, *inputLeft, &ps); |
michael@0 | 697 | #else |
michael@0 | 698 | // XXX is this thread-safe? |
michael@0 | 699 | incr = (int) mbtowc((wchar_t *) &tmp, *input, *inputLeft); |
michael@0 | 700 | #endif |
michael@0 | 701 | if (incr < 0) { |
michael@0 | 702 | NS_WARNING("mbtowc failed: possible charset mismatch"); |
michael@0 | 703 | // zero-pad and hope for the best |
michael@0 | 704 | tmp = (unsigned char) **input; |
michael@0 | 705 | incr = 1; |
michael@0 | 706 | } |
michael@0 | 707 | **output = (char16_t) tmp; |
michael@0 | 708 | (*input) += incr; |
michael@0 | 709 | (*inputLeft) -= incr; |
michael@0 | 710 | (*output)++; |
michael@0 | 711 | (*outputLeft)--; |
michael@0 | 712 | } |
michael@0 | 713 | } |
michael@0 | 714 | else { |
michael@0 | 715 | // wchar_t isn't unicode, so the best we can do is treat the |
michael@0 | 716 | // input as if it is isolatin1 :( |
michael@0 | 717 | isolatin1_to_utf16(input, inputLeft, output, outputLeft); |
michael@0 | 718 | } |
michael@0 | 719 | |
michael@0 | 720 | return NS_OK; |
michael@0 | 721 | } |
michael@0 | 722 | |
michael@0 | 723 | nsresult |
michael@0 | 724 | nsNativeCharsetConverter::UnicodeToNative(const char16_t **input, |
michael@0 | 725 | uint32_t *inputLeft, |
michael@0 | 726 | char **output, |
michael@0 | 727 | uint32_t *outputLeft) |
michael@0 | 728 | { |
michael@0 | 729 | if (gWCharIsUnicode) { |
michael@0 | 730 | int incr; |
michael@0 | 731 | |
michael@0 | 732 | while (*inputLeft && *outputLeft >= MB_CUR_MAX) { |
michael@0 | 733 | #ifdef HAVE_WCRTOMB |
michael@0 | 734 | incr = (int) wcrtomb(*output, (wchar_t) **input, &ps); |
michael@0 | 735 | #else |
michael@0 | 736 | // XXX is this thread-safe? |
michael@0 | 737 | incr = (int) wctomb(*output, (wchar_t) **input); |
michael@0 | 738 | #endif |
michael@0 | 739 | if (incr < 0) { |
michael@0 | 740 | NS_WARNING("mbtowc failed: possible charset mismatch"); |
michael@0 | 741 | **output = (unsigned char) **input; // truncate |
michael@0 | 742 | incr = 1; |
michael@0 | 743 | } |
michael@0 | 744 | // most likely we're dead anyways if this assertion should fire |
michael@0 | 745 | NS_ASSERTION(uint32_t(incr) <= *outputLeft, "wrote beyond end of string"); |
michael@0 | 746 | (*output) += incr; |
michael@0 | 747 | (*outputLeft) -= incr; |
michael@0 | 748 | (*input)++; |
michael@0 | 749 | (*inputLeft)--; |
michael@0 | 750 | } |
michael@0 | 751 | } |
michael@0 | 752 | else { |
michael@0 | 753 | // wchar_t isn't unicode, so the best we can do is treat the |
michael@0 | 754 | // input as if it is isolatin1 :( |
michael@0 | 755 | utf16_to_isolatin1(input, inputLeft, output, outputLeft); |
michael@0 | 756 | } |
michael@0 | 757 | |
michael@0 | 758 | return NS_OK; |
michael@0 | 759 | } |
michael@0 | 760 | |
michael@0 | 761 | // XXX : for now, return false |
michael@0 | 762 | bool |
michael@0 | 763 | nsNativeCharsetConverter::IsNativeUTF8() |
michael@0 | 764 | { |
michael@0 | 765 | return false; |
michael@0 | 766 | } |
michael@0 | 767 | |
michael@0 | 768 | #endif // USE_STDCONV |
michael@0 | 769 | |
michael@0 | 770 | //----------------------------------------------------------------------------- |
michael@0 | 771 | // API implementation |
michael@0 | 772 | //----------------------------------------------------------------------------- |
michael@0 | 773 | |
michael@0 | 774 | nsresult |
michael@0 | 775 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) |
michael@0 | 776 | { |
michael@0 | 777 | output.Truncate(); |
michael@0 | 778 | |
michael@0 | 779 | uint32_t inputLen = input.Length(); |
michael@0 | 780 | |
michael@0 | 781 | nsACString::const_iterator iter; |
michael@0 | 782 | input.BeginReading(iter); |
michael@0 | 783 | |
michael@0 | 784 | // |
michael@0 | 785 | // OPTIMIZATION: preallocate space for largest possible result; convert |
michael@0 | 786 | // directly into the result buffer to avoid intermediate buffer copy. |
michael@0 | 787 | // |
michael@0 | 788 | // this will generally result in a larger allocation, but that seems |
michael@0 | 789 | // better than an extra buffer copy. |
michael@0 | 790 | // |
michael@0 | 791 | if (!output.SetLength(inputLen, fallible_t())) |
michael@0 | 792 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 793 | nsAString::iterator out_iter; |
michael@0 | 794 | output.BeginWriting(out_iter); |
michael@0 | 795 | |
michael@0 | 796 | char16_t *result = out_iter.get(); |
michael@0 | 797 | uint32_t resultLeft = inputLen; |
michael@0 | 798 | |
michael@0 | 799 | const char *buf = iter.get(); |
michael@0 | 800 | uint32_t bufLeft = inputLen; |
michael@0 | 801 | |
michael@0 | 802 | nsNativeCharsetConverter conv; |
michael@0 | 803 | nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft); |
michael@0 | 804 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 805 | NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer"); |
michael@0 | 806 | output.SetLength(inputLen - resultLeft); |
michael@0 | 807 | } |
michael@0 | 808 | return rv; |
michael@0 | 809 | } |
michael@0 | 810 | |
michael@0 | 811 | nsresult |
michael@0 | 812 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) |
michael@0 | 813 | { |
michael@0 | 814 | output.Truncate(); |
michael@0 | 815 | |
michael@0 | 816 | nsAString::const_iterator iter, end; |
michael@0 | 817 | input.BeginReading(iter); |
michael@0 | 818 | input.EndReading(end); |
michael@0 | 819 | |
michael@0 | 820 | // cannot easily avoid intermediate buffer copy. |
michael@0 | 821 | char temp[4096]; |
michael@0 | 822 | |
michael@0 | 823 | nsNativeCharsetConverter conv; |
michael@0 | 824 | |
michael@0 | 825 | const char16_t *buf = iter.get(); |
michael@0 | 826 | uint32_t bufLeft = Distance(iter, end); |
michael@0 | 827 | while (bufLeft) { |
michael@0 | 828 | char *p = temp; |
michael@0 | 829 | uint32_t tempLeft = sizeof(temp); |
michael@0 | 830 | |
michael@0 | 831 | nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft); |
michael@0 | 832 | if (NS_FAILED(rv)) return rv; |
michael@0 | 833 | |
michael@0 | 834 | if (tempLeft < sizeof(temp)) |
michael@0 | 835 | output.Append(temp, sizeof(temp) - tempLeft); |
michael@0 | 836 | } |
michael@0 | 837 | return NS_OK; |
michael@0 | 838 | } |
michael@0 | 839 | |
michael@0 | 840 | bool |
michael@0 | 841 | NS_IsNativeUTF8() |
michael@0 | 842 | { |
michael@0 | 843 | return nsNativeCharsetConverter::IsNativeUTF8(); |
michael@0 | 844 | } |
michael@0 | 845 | |
michael@0 | 846 | void |
michael@0 | 847 | NS_StartupNativeCharsetUtils() |
michael@0 | 848 | { |
michael@0 | 849 | // |
michael@0 | 850 | // need to initialize the locale or else charset conversion will fail. |
michael@0 | 851 | // better not delay this in case some other component alters the locale |
michael@0 | 852 | // settings. |
michael@0 | 853 | // |
michael@0 | 854 | // XXX we assume that we are called early enough that we should |
michael@0 | 855 | // always be the first to care about the locale's charset. |
michael@0 | 856 | // |
michael@0 | 857 | setlocale(LC_CTYPE, ""); |
michael@0 | 858 | |
michael@0 | 859 | nsNativeCharsetConverter::GlobalInit(); |
michael@0 | 860 | } |
michael@0 | 861 | |
michael@0 | 862 | void |
michael@0 | 863 | NS_ShutdownNativeCharsetUtils() |
michael@0 | 864 | { |
michael@0 | 865 | nsNativeCharsetConverter::GlobalShutdown(); |
michael@0 | 866 | } |
michael@0 | 867 | |
michael@0 | 868 | //----------------------------------------------------------------------------- |
michael@0 | 869 | // XP_WIN |
michael@0 | 870 | //----------------------------------------------------------------------------- |
michael@0 | 871 | #elif defined(XP_WIN) |
michael@0 | 872 | |
michael@0 | 873 | #include <windows.h> |
michael@0 | 874 | #include "nsString.h" |
michael@0 | 875 | #include "nsAString.h" |
michael@0 | 876 | #include "nsReadableUtils.h" |
michael@0 | 877 | |
michael@0 | 878 | using namespace mozilla; |
michael@0 | 879 | |
michael@0 | 880 | nsresult |
michael@0 | 881 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) |
michael@0 | 882 | { |
michael@0 | 883 | uint32_t inputLen = input.Length(); |
michael@0 | 884 | |
michael@0 | 885 | nsACString::const_iterator iter; |
michael@0 | 886 | input.BeginReading(iter); |
michael@0 | 887 | |
michael@0 | 888 | const char *buf = iter.get(); |
michael@0 | 889 | |
michael@0 | 890 | // determine length of result |
michael@0 | 891 | uint32_t resultLen = 0; |
michael@0 | 892 | int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, nullptr, 0); |
michael@0 | 893 | if (n > 0) |
michael@0 | 894 | resultLen += n; |
michael@0 | 895 | |
michael@0 | 896 | // allocate sufficient space |
michael@0 | 897 | if (!output.SetLength(resultLen, fallible_t())) |
michael@0 | 898 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 899 | if (resultLen > 0) { |
michael@0 | 900 | nsAString::iterator out_iter; |
michael@0 | 901 | output.BeginWriting(out_iter); |
michael@0 | 902 | |
michael@0 | 903 | char16_t *result = out_iter.get(); |
michael@0 | 904 | |
michael@0 | 905 | ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, wwc(result), resultLen); |
michael@0 | 906 | } |
michael@0 | 907 | return NS_OK; |
michael@0 | 908 | } |
michael@0 | 909 | |
michael@0 | 910 | nsresult |
michael@0 | 911 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) |
michael@0 | 912 | { |
michael@0 | 913 | uint32_t inputLen = input.Length(); |
michael@0 | 914 | |
michael@0 | 915 | nsAString::const_iterator iter; |
michael@0 | 916 | input.BeginReading(iter); |
michael@0 | 917 | |
michael@0 | 918 | char16ptr_t buf = iter.get(); |
michael@0 | 919 | |
michael@0 | 920 | // determine length of result |
michael@0 | 921 | uint32_t resultLen = 0; |
michael@0 | 922 | |
michael@0 | 923 | int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, nullptr, 0, |
michael@0 | 924 | nullptr, nullptr); |
michael@0 | 925 | if (n > 0) |
michael@0 | 926 | resultLen += n; |
michael@0 | 927 | |
michael@0 | 928 | // allocate sufficient space |
michael@0 | 929 | if (!output.SetLength(resultLen, fallible_t())) |
michael@0 | 930 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 931 | if (resultLen > 0) { |
michael@0 | 932 | nsACString::iterator out_iter; |
michael@0 | 933 | output.BeginWriting(out_iter); |
michael@0 | 934 | |
michael@0 | 935 | // default "defaultChar" is '?', which is an illegal character on windows |
michael@0 | 936 | // file system. That will cause file uncreatable. Change it to '_' |
michael@0 | 937 | const char defaultChar = '_'; |
michael@0 | 938 | |
michael@0 | 939 | char *result = out_iter.get(); |
michael@0 | 940 | |
michael@0 | 941 | ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen, |
michael@0 | 942 | &defaultChar, nullptr); |
michael@0 | 943 | } |
michael@0 | 944 | return NS_OK; |
michael@0 | 945 | } |
michael@0 | 946 | |
michael@0 | 947 | // moved from widget/windows/nsToolkit.cpp |
michael@0 | 948 | int32_t |
michael@0 | 949 | NS_ConvertAtoW(const char *aStrInA, int aBufferSize, char16_t *aStrOutW) |
michael@0 | 950 | { |
michael@0 | 951 | return MultiByteToWideChar(CP_ACP, 0, aStrInA, -1, wwc(aStrOutW), aBufferSize); |
michael@0 | 952 | } |
michael@0 | 953 | |
michael@0 | 954 | int32_t |
michael@0 | 955 | NS_ConvertWtoA(const char16_t *aStrInW, int aBufferSizeOut, |
michael@0 | 956 | char *aStrOutA, const char *aDefault) |
michael@0 | 957 | { |
michael@0 | 958 | if ((!aStrInW) || (!aStrOutA) || (aBufferSizeOut <= 0)) |
michael@0 | 959 | return 0; |
michael@0 | 960 | |
michael@0 | 961 | int numCharsConverted = WideCharToMultiByte(CP_ACP, 0, char16ptr_t(aStrInW), -1, |
michael@0 | 962 | aStrOutA, aBufferSizeOut, |
michael@0 | 963 | aDefault, nullptr); |
michael@0 | 964 | |
michael@0 | 965 | if (!numCharsConverted) { |
michael@0 | 966 | if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { |
michael@0 | 967 | // Overflow, add missing null termination but return 0 |
michael@0 | 968 | aStrOutA[aBufferSizeOut-1] = '\0'; |
michael@0 | 969 | } |
michael@0 | 970 | else { |
michael@0 | 971 | // Other error, clear string and return 0 |
michael@0 | 972 | aStrOutA[0] = '\0'; |
michael@0 | 973 | } |
michael@0 | 974 | } |
michael@0 | 975 | else if (numCharsConverted < aBufferSizeOut) { |
michael@0 | 976 | // Add 2nd null (really necessary?) |
michael@0 | 977 | aStrOutA[numCharsConverted] = '\0'; |
michael@0 | 978 | } |
michael@0 | 979 | |
michael@0 | 980 | return numCharsConverted; |
michael@0 | 981 | } |
michael@0 | 982 | |
michael@0 | 983 | #else |
michael@0 | 984 | |
michael@0 | 985 | #include "nsReadableUtils.h" |
michael@0 | 986 | |
michael@0 | 987 | nsresult |
michael@0 | 988 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output) |
michael@0 | 989 | { |
michael@0 | 990 | CopyASCIItoUTF16(input, output); |
michael@0 | 991 | return NS_OK; |
michael@0 | 992 | } |
michael@0 | 993 | |
michael@0 | 994 | nsresult |
michael@0 | 995 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output) |
michael@0 | 996 | { |
michael@0 | 997 | LossyCopyUTF16toASCII(input, output); |
michael@0 | 998 | return NS_OK; |
michael@0 | 999 | } |
michael@0 | 1000 | |
michael@0 | 1001 | void |
michael@0 | 1002 | NS_StartupNativeCharsetUtils() |
michael@0 | 1003 | { |
michael@0 | 1004 | } |
michael@0 | 1005 | |
michael@0 | 1006 | void |
michael@0 | 1007 | NS_ShutdownNativeCharsetUtils() |
michael@0 | 1008 | { |
michael@0 | 1009 | } |
michael@0 | 1010 | |
michael@0 | 1011 | #endif |