Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "xpcom-private.h"
7 //-----------------------------------------------------------------------------
8 // XP_MACOSX or ANDROID
9 //-----------------------------------------------------------------------------
10 #if defined(XP_MACOSX) || defined(ANDROID)
12 #include "nsAString.h"
13 #include "nsReadableUtils.h"
14 #include "nsString.h"
16 nsresult
17 NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
18 {
19 CopyUTF8toUTF16(input, output);
20 return NS_OK;
21 }
23 nsresult
24 NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
25 {
26 CopyUTF16toUTF8(input, output);
27 return NS_OK;
28 }
30 void
31 NS_StartupNativeCharsetUtils()
32 {
33 }
35 void
36 NS_ShutdownNativeCharsetUtils()
37 {
38 }
41 //-----------------------------------------------------------------------------
42 // XP_UNIX
43 //-----------------------------------------------------------------------------
44 #elif defined(XP_UNIX)
46 #include <stdlib.h> // mbtowc, wctomb
47 #include <locale.h> // setlocale
48 #include "mozilla/Mutex.h"
49 #include "nscore.h"
50 #include "nsAString.h"
51 #include "nsReadableUtils.h"
53 using namespace mozilla;
55 //
56 // choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
57 // but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
58 // or not (see bug 206811 and
59 // news://news.mozilla.org:119/bajml3$fvr1@ripley.netscape.com). we now use
60 // iconv for all platforms where nltypes.h and nllanginfo.h are present
61 // along with iconv.
62 //
63 #if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
64 #define USE_ICONV 1
65 #else
66 #define USE_STDCONV 1
67 #endif
69 static void
70 isolatin1_to_utf16(const char **input, uint32_t *inputLeft, char16_t **output, uint32_t *outputLeft)
71 {
72 while (*inputLeft && *outputLeft) {
73 **output = (unsigned char) **input;
74 (*input)++;
75 (*inputLeft)--;
76 (*output)++;
77 (*outputLeft)--;
78 }
79 }
81 static void
82 utf16_to_isolatin1(const char16_t **input, uint32_t *inputLeft, char **output, uint32_t *outputLeft)
83 {
84 while (*inputLeft && *outputLeft) {
85 **output = (unsigned char) **input;
86 (*input)++;
87 (*inputLeft)--;
88 (*output)++;
89 (*outputLeft)--;
90 }
91 }
93 //-----------------------------------------------------------------------------
94 // conversion using iconv
95 //-----------------------------------------------------------------------------
96 #if defined(USE_ICONV)
97 #include <nl_types.h> // CODESET
98 #include <langinfo.h> // nl_langinfo
99 #include <iconv.h> // iconv_open, iconv, iconv_close
100 #include <errno.h>
101 #include "plstr.h"
103 #if defined(HAVE_ICONV_WITH_CONST_INPUT)
104 #define ICONV_INPUT(x) (x)
105 #else
106 #define ICONV_INPUT(x) ((char **)x)
107 #endif
109 // solaris definitely needs this, but we'll enable it by default
110 // just in case... but we know for sure that iconv(3) in glibc
111 // doesn't need this.
112 #if !defined(__GLIBC__)
113 #define ENABLE_UTF8_FALLBACK_SUPPORT
114 #endif
116 #define INVALID_ICONV_T ((iconv_t) -1)
118 static inline size_t
119 xp_iconv(iconv_t converter,
120 const char **input,
121 size_t *inputLeft,
122 char **output,
123 size_t *outputLeft)
124 {
125 size_t res, outputAvail = outputLeft ? *outputLeft : 0;
126 res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft);
127 if (res == (size_t) -1) {
128 // on some platforms (e.g., linux) iconv will fail with
129 // E2BIG if it cannot convert _all_ of its input. it'll
130 // still adjust all of the in/out params correctly, so we
131 // can ignore this error. the assumption is that we will
132 // be called again to complete the conversion.
133 if ((errno == E2BIG) && (*outputLeft < outputAvail))
134 res = 0;
135 }
136 return res;
137 }
139 static inline void
140 xp_iconv_reset(iconv_t converter)
141 {
142 // NOTE: the man pages on Solaris claim that you can pass nullptr
143 // for all parameter to reset the converter, but beware the
144 // evil Solaris crash if you go down this route >:-)
146 const char *zero_char_in_ptr = nullptr;
147 char *zero_char_out_ptr = nullptr;
148 size_t zero_size_in = 0,
149 zero_size_out = 0;
151 xp_iconv(converter, &zero_char_in_ptr,
152 &zero_size_in,
153 &zero_char_out_ptr,
154 &zero_size_out);
155 }
157 static inline iconv_t
158 xp_iconv_open(const char **to_list, const char **from_list)
159 {
160 iconv_t res;
161 const char **from_name;
162 const char **to_name;
164 // try all possible combinations to locate a converter.
165 to_name = to_list;
166 while (*to_name) {
167 if (**to_name) {
168 from_name = from_list;
169 while (*from_name) {
170 if (**from_name) {
171 res = iconv_open(*to_name, *from_name);
172 if (res != INVALID_ICONV_T)
173 return res;
174 }
175 from_name++;
176 }
177 }
178 to_name++;
179 }
181 return INVALID_ICONV_T;
182 }
184 /*
185 * char16_t[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
186 * have to use UTF-16 with iconv(3) on platforms where it's supported.
187 * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
188 * and implementations of iconv(3). On Tru64, it also depends on the environment
189 * variable. To avoid the trouble arising from byte-swapping
190 * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
191 * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
192 * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
193 * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
194 * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
195 * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
196 * can be done other than adding a note in the release notes. (bug 206811)
197 */
198 static const char *UTF_16_NAMES[] = {
199 #if defined(IS_LITTLE_ENDIAN)
200 "UTF-16LE",
201 #if defined(__GLIBC__)
202 "UNICODELITTLE",
203 #endif
204 "UCS-2LE",
205 #else
206 "UTF-16BE",
207 #if defined(__GLIBC__)
208 "UNICODEBIG",
209 #endif
210 "UCS-2BE",
211 #endif
212 "UTF-16",
213 "UCS-2",
214 "UCS2",
215 "UCS_2",
216 "ucs-2",
217 "ucs2",
218 "ucs_2",
219 nullptr
220 };
222 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
223 static const char *UTF_8_NAMES[] = {
224 "UTF-8",
225 "UTF8",
226 "UTF_8",
227 "utf-8",
228 "utf8",
229 "utf_8",
230 nullptr
231 };
232 #endif
234 static const char *ISO_8859_1_NAMES[] = {
235 "ISO-8859-1",
236 #if !defined(__GLIBC__)
237 "ISO8859-1",
238 "ISO88591",
239 "ISO_8859_1",
240 "ISO8859_1",
241 "iso-8859-1",
242 "iso8859-1",
243 "iso88591",
244 "iso_8859_1",
245 "iso8859_1",
246 #endif
247 nullptr
248 };
250 class nsNativeCharsetConverter
251 {
252 public:
253 nsNativeCharsetConverter();
254 ~nsNativeCharsetConverter();
256 nsresult NativeToUnicode(const char **input , uint32_t *inputLeft,
257 char16_t **output, uint32_t *outputLeft);
258 nsresult UnicodeToNative(const char16_t **input , uint32_t *inputLeft,
259 char **output, uint32_t *outputLeft);
261 static void GlobalInit();
262 static void GlobalShutdown();
263 static bool IsNativeUTF8();
265 private:
266 static iconv_t gNativeToUnicode;
267 static iconv_t gUnicodeToNative;
268 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
269 static iconv_t gNativeToUTF8;
270 static iconv_t gUTF8ToNative;
271 static iconv_t gUnicodeToUTF8;
272 static iconv_t gUTF8ToUnicode;
273 #endif
274 static Mutex *gLock;
275 static bool gInitialized;
276 static bool gIsNativeUTF8;
278 static void LazyInit();
280 static void Lock() { if (gLock) gLock->Lock(); }
281 static void Unlock() { if (gLock) gLock->Unlock(); }
282 };
284 iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
285 iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
286 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
287 iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T;
288 iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T;
289 iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T;
290 iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T;
291 #endif
292 Mutex *nsNativeCharsetConverter::gLock = nullptr;
293 bool nsNativeCharsetConverter::gInitialized = false;
294 bool nsNativeCharsetConverter::gIsNativeUTF8 = false;
296 void
297 nsNativeCharsetConverter::LazyInit()
298 {
299 // LazyInit may be called before NS_StartupNativeCharsetUtils, but
300 // the setlocale it does has to be called before nl_langinfo. Like in
301 // NS_StartupNativeCharsetUtils, assume we are called early enough that
302 // we are the first to care about the locale's charset.
303 if (!gLock)
304 setlocale(LC_CTYPE, "");
305 const char *blank_list[] = { "", nullptr };
306 const char **native_charset_list = blank_list;
307 const char *native_charset = nl_langinfo(CODESET);
308 if (native_charset == nullptr) {
309 NS_ERROR("native charset is unknown");
310 // fallback to ISO-8859-1
311 native_charset_list = ISO_8859_1_NAMES;
312 }
313 else
314 native_charset_list[0] = native_charset;
316 // Most, if not all, Unixen supporting UTF-8 and nl_langinfo(CODESET)
317 // return 'UTF-8' (or 'utf-8')
318 if (!PL_strcasecmp(native_charset, "UTF-8"))
319 gIsNativeUTF8 = true;
321 gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
322 gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
324 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
325 if (gNativeToUnicode == INVALID_ICONV_T) {
326 gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
327 gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
328 NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
329 NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
330 }
331 if (gUnicodeToNative == INVALID_ICONV_T) {
332 gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
333 gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
334 NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
335 NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
336 }
337 #else
338 NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
339 NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
340 #endif
342 /*
343 * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
344 * prepend a byte order mark unicode character (BOM, u+FEFF) during
345 * the first use of the iconv converter. The same is the case of
346 * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
347 * However, we use 'UTF-16LE/BE' in both cases, instead so that we
348 * should be safe. But just in case...
349 *
350 * This dummy conversion gets rid of the BOMs and fixes bug 153562.
351 */
352 char dummy_input[1] = { ' ' };
353 char dummy_output[4];
355 if (gNativeToUnicode != INVALID_ICONV_T) {
356 const char *input = dummy_input;
357 size_t input_left = sizeof(dummy_input);
358 char *output = dummy_output;
359 size_t output_left = sizeof(dummy_output);
361 xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
362 }
363 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
364 if (gUTF8ToUnicode != INVALID_ICONV_T) {
365 const char *input = dummy_input;
366 size_t input_left = sizeof(dummy_input);
367 char *output = dummy_output;
368 size_t output_left = sizeof(dummy_output);
370 xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
371 }
372 #endif
374 gInitialized = true;
375 }
377 void
378 nsNativeCharsetConverter::GlobalInit()
379 {
380 gLock = new Mutex("nsNativeCharsetConverter.gLock");
381 }
383 void
384 nsNativeCharsetConverter::GlobalShutdown()
385 {
386 if (gLock) {
387 delete gLock;
388 gLock = nullptr;
389 }
391 if (gNativeToUnicode != INVALID_ICONV_T) {
392 iconv_close(gNativeToUnicode);
393 gNativeToUnicode = INVALID_ICONV_T;
394 }
396 if (gUnicodeToNative != INVALID_ICONV_T) {
397 iconv_close(gUnicodeToNative);
398 gUnicodeToNative = INVALID_ICONV_T;
399 }
401 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
402 if (gNativeToUTF8 != INVALID_ICONV_T) {
403 iconv_close(gNativeToUTF8);
404 gNativeToUTF8 = INVALID_ICONV_T;
405 }
406 if (gUTF8ToNative != INVALID_ICONV_T) {
407 iconv_close(gUTF8ToNative);
408 gUTF8ToNative = INVALID_ICONV_T;
409 }
410 if (gUnicodeToUTF8 != INVALID_ICONV_T) {
411 iconv_close(gUnicodeToUTF8);
412 gUnicodeToUTF8 = INVALID_ICONV_T;
413 }
414 if (gUTF8ToUnicode != INVALID_ICONV_T) {
415 iconv_close(gUTF8ToUnicode);
416 gUTF8ToUnicode = INVALID_ICONV_T;
417 }
418 #endif
420 gInitialized = false;
421 }
423 nsNativeCharsetConverter::nsNativeCharsetConverter()
424 {
425 Lock();
426 if (!gInitialized)
427 LazyInit();
428 }
430 nsNativeCharsetConverter::~nsNativeCharsetConverter()
431 {
432 // reset converters for next time
433 if (gNativeToUnicode != INVALID_ICONV_T)
434 xp_iconv_reset(gNativeToUnicode);
435 if (gUnicodeToNative != INVALID_ICONV_T)
436 xp_iconv_reset(gUnicodeToNative);
437 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
438 if (gNativeToUTF8 != INVALID_ICONV_T)
439 xp_iconv_reset(gNativeToUTF8);
440 if (gUTF8ToNative != INVALID_ICONV_T)
441 xp_iconv_reset(gUTF8ToNative);
442 if (gUnicodeToUTF8 != INVALID_ICONV_T)
443 xp_iconv_reset(gUnicodeToUTF8);
444 if (gUTF8ToUnicode != INVALID_ICONV_T)
445 xp_iconv_reset(gUTF8ToUnicode);
446 #endif
447 Unlock();
448 }
450 nsresult
451 nsNativeCharsetConverter::NativeToUnicode(const char **input,
452 uint32_t *inputLeft,
453 char16_t **output,
454 uint32_t *outputLeft)
455 {
456 size_t res = 0;
457 size_t inLeft = (size_t) *inputLeft;
458 size_t outLeft = (size_t) *outputLeft * 2;
460 if (gNativeToUnicode != INVALID_ICONV_T) {
462 res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft);
464 *inputLeft = inLeft;
465 *outputLeft = outLeft / 2;
466 if (res != (size_t) -1)
467 return NS_OK;
469 NS_WARNING("conversion from native to utf-16 failed");
471 // reset converter
472 xp_iconv_reset(gNativeToUnicode);
473 }
474 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
475 else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
476 (gUTF8ToUnicode != INVALID_ICONV_T)) {
477 // convert first to UTF8, then from UTF8 to UCS2
478 const char *in = *input;
480 char ubuf[1024];
482 // we assume we're always called with enough space in |output|,
483 // so convert many chars at a time...
484 while (inLeft) {
485 char *p = ubuf;
486 size_t n = sizeof(ubuf);
487 res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
488 if (res == (size_t) -1) {
489 NS_ERROR("conversion from native to utf-8 failed");
490 break;
491 }
492 NS_ASSERTION(outLeft > 0, "bad assumption");
493 p = ubuf;
494 n = sizeof(ubuf) - n;
495 res = xp_iconv(gUTF8ToUnicode, (const char **) &p, &n, (char **) output, &outLeft);
496 if (res == (size_t) -1) {
497 NS_ERROR("conversion from utf-8 to utf-16 failed");
498 break;
499 }
500 }
502 (*input) += (*inputLeft - inLeft);
503 *inputLeft = inLeft;
504 *outputLeft = outLeft / 2;
506 if (res != (size_t) -1)
507 return NS_OK;
509 // reset converters
510 xp_iconv_reset(gNativeToUTF8);
511 xp_iconv_reset(gUTF8ToUnicode);
512 }
513 #endif
515 // fallback: zero-pad and hope for the best
516 // XXX This is lame and we have to do better.
517 isolatin1_to_utf16(input, inputLeft, output, outputLeft);
519 return NS_OK;
520 }
522 nsresult
523 nsNativeCharsetConverter::UnicodeToNative(const char16_t **input,
524 uint32_t *inputLeft,
525 char **output,
526 uint32_t *outputLeft)
527 {
528 size_t res = 0;
529 size_t inLeft = (size_t) *inputLeft * 2;
530 size_t outLeft = (size_t) *outputLeft;
532 if (gUnicodeToNative != INVALID_ICONV_T) {
533 res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft);
535 *inputLeft = inLeft / 2;
536 *outputLeft = outLeft;
537 if (res != (size_t) -1) {
538 return NS_OK;
539 }
541 NS_ERROR("iconv failed");
543 // reset converter
544 xp_iconv_reset(gUnicodeToNative);
545 }
546 #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
547 else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
548 (gUTF8ToNative != INVALID_ICONV_T)) {
549 const char *in = (const char *) *input;
551 char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
553 // convert one uchar at a time...
554 while (inLeft && outLeft) {
555 char *p = ubuf;
556 size_t n = sizeof(ubuf), one_uchar = sizeof(char16_t);
557 res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
558 if (res == (size_t) -1) {
559 NS_ERROR("conversion from utf-16 to utf-8 failed");
560 break;
561 }
562 p = ubuf;
563 n = sizeof(ubuf) - n;
564 res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft);
565 if (res == (size_t) -1) {
566 if (errno == E2BIG) {
567 // not enough room for last uchar... back up and return.
568 in -= sizeof(char16_t);
569 res = 0;
570 }
571 else
572 NS_ERROR("conversion from utf-8 to native failed");
573 break;
574 }
575 inLeft -= sizeof(char16_t);
576 }
578 (*input) += (*inputLeft - inLeft / 2);
579 *inputLeft = inLeft / 2;
580 *outputLeft = outLeft;
581 if (res != (size_t) -1) {
582 return NS_OK;
583 }
585 // reset converters
586 xp_iconv_reset(gUnicodeToUTF8);
587 xp_iconv_reset(gUTF8ToNative);
588 }
589 #endif
591 // fallback: truncate and hope for the best
592 // XXX This is lame and we have to do better.
593 utf16_to_isolatin1(input, inputLeft, output, outputLeft);
595 return NS_OK;
596 }
598 bool
599 nsNativeCharsetConverter::IsNativeUTF8()
600 {
601 if (!gInitialized) {
602 Lock();
603 if (!gInitialized)
604 LazyInit();
605 Unlock();
606 }
607 return gIsNativeUTF8;
608 }
610 #endif // USE_ICONV
612 //-----------------------------------------------------------------------------
613 // conversion using mb[r]towc/wc[r]tomb
614 //-----------------------------------------------------------------------------
615 #if defined(USE_STDCONV)
616 #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
617 #include <wchar.h> // mbrtowc, wcrtomb
618 #endif
620 class nsNativeCharsetConverter
621 {
622 public:
623 nsNativeCharsetConverter();
625 nsresult NativeToUnicode(const char **input , uint32_t *inputLeft,
626 char16_t **output, uint32_t *outputLeft);
627 nsresult UnicodeToNative(const char16_t **input , uint32_t *inputLeft,
628 char **output, uint32_t *outputLeft);
630 static void GlobalInit();
631 static void GlobalShutdown() { }
632 static bool IsNativeUTF8();
634 private:
635 static bool gWCharIsUnicode;
637 #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
638 mbstate_t ps;
639 #endif
640 };
642 bool nsNativeCharsetConverter::gWCharIsUnicode = false;
644 nsNativeCharsetConverter::nsNativeCharsetConverter()
645 {
646 #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
647 memset(&ps, 0, sizeof(ps));
648 #endif
649 }
651 void
652 nsNativeCharsetConverter::GlobalInit()
653 {
654 // verify that wchar_t for the current locale is actually unicode.
655 // if it is not, then we should avoid calling mbtowc/wctomb and
656 // just fallback on zero-pad/truncation conversion.
657 //
658 // this test cannot be done at build time because the encoding of
659 // wchar_t may depend on the runtime locale. sad, but true!!
660 //
661 // so, if wchar_t is unicode then converting an ASCII character
662 // to wchar_t should not change its numeric value. we'll just
663 // check what happens with the ASCII 'a' character.
664 //
665 // this test is not perfect... obviously, it could yield false
666 // positives, but then at least ASCII text would be converted
667 // properly (or maybe just the 'a' character) -- oh well :(
669 char a = 'a';
670 unsigned int w = 0;
672 int res = mbtowc((wchar_t *) &w, &a, 1);
674 gWCharIsUnicode = (res != -1 && w == 'a');
676 #ifdef DEBUG
677 if (!gWCharIsUnicode)
678 NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
679 #endif
680 }
682 nsresult
683 nsNativeCharsetConverter::NativeToUnicode(const char **input,
684 uint32_t *inputLeft,
685 char16_t **output,
686 uint32_t *outputLeft)
687 {
688 if (gWCharIsUnicode) {
689 int incr;
691 // cannot use wchar_t here since it may have been redefined (e.g.,
692 // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP.
693 unsigned int tmp = 0;
694 while (*inputLeft && *outputLeft) {
695 #ifdef HAVE_MBRTOWC
696 incr = (int) mbrtowc((wchar_t *) &tmp, *input, *inputLeft, &ps);
697 #else
698 // XXX is this thread-safe?
699 incr = (int) mbtowc((wchar_t *) &tmp, *input, *inputLeft);
700 #endif
701 if (incr < 0) {
702 NS_WARNING("mbtowc failed: possible charset mismatch");
703 // zero-pad and hope for the best
704 tmp = (unsigned char) **input;
705 incr = 1;
706 }
707 **output = (char16_t) tmp;
708 (*input) += incr;
709 (*inputLeft) -= incr;
710 (*output)++;
711 (*outputLeft)--;
712 }
713 }
714 else {
715 // wchar_t isn't unicode, so the best we can do is treat the
716 // input as if it is isolatin1 :(
717 isolatin1_to_utf16(input, inputLeft, output, outputLeft);
718 }
720 return NS_OK;
721 }
723 nsresult
724 nsNativeCharsetConverter::UnicodeToNative(const char16_t **input,
725 uint32_t *inputLeft,
726 char **output,
727 uint32_t *outputLeft)
728 {
729 if (gWCharIsUnicode) {
730 int incr;
732 while (*inputLeft && *outputLeft >= MB_CUR_MAX) {
733 #ifdef HAVE_WCRTOMB
734 incr = (int) wcrtomb(*output, (wchar_t) **input, &ps);
735 #else
736 // XXX is this thread-safe?
737 incr = (int) wctomb(*output, (wchar_t) **input);
738 #endif
739 if (incr < 0) {
740 NS_WARNING("mbtowc failed: possible charset mismatch");
741 **output = (unsigned char) **input; // truncate
742 incr = 1;
743 }
744 // most likely we're dead anyways if this assertion should fire
745 NS_ASSERTION(uint32_t(incr) <= *outputLeft, "wrote beyond end of string");
746 (*output) += incr;
747 (*outputLeft) -= incr;
748 (*input)++;
749 (*inputLeft)--;
750 }
751 }
752 else {
753 // wchar_t isn't unicode, so the best we can do is treat the
754 // input as if it is isolatin1 :(
755 utf16_to_isolatin1(input, inputLeft, output, outputLeft);
756 }
758 return NS_OK;
759 }
761 // XXX : for now, return false
762 bool
763 nsNativeCharsetConverter::IsNativeUTF8()
764 {
765 return false;
766 }
768 #endif // USE_STDCONV
770 //-----------------------------------------------------------------------------
771 // API implementation
772 //-----------------------------------------------------------------------------
774 nsresult
775 NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
776 {
777 output.Truncate();
779 uint32_t inputLen = input.Length();
781 nsACString::const_iterator iter;
782 input.BeginReading(iter);
784 //
785 // OPTIMIZATION: preallocate space for largest possible result; convert
786 // directly into the result buffer to avoid intermediate buffer copy.
787 //
788 // this will generally result in a larger allocation, but that seems
789 // better than an extra buffer copy.
790 //
791 if (!output.SetLength(inputLen, fallible_t()))
792 return NS_ERROR_OUT_OF_MEMORY;
793 nsAString::iterator out_iter;
794 output.BeginWriting(out_iter);
796 char16_t *result = out_iter.get();
797 uint32_t resultLeft = inputLen;
799 const char *buf = iter.get();
800 uint32_t bufLeft = inputLen;
802 nsNativeCharsetConverter conv;
803 nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
804 if (NS_SUCCEEDED(rv)) {
805 NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
806 output.SetLength(inputLen - resultLeft);
807 }
808 return rv;
809 }
811 nsresult
812 NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
813 {
814 output.Truncate();
816 nsAString::const_iterator iter, end;
817 input.BeginReading(iter);
818 input.EndReading(end);
820 // cannot easily avoid intermediate buffer copy.
821 char temp[4096];
823 nsNativeCharsetConverter conv;
825 const char16_t *buf = iter.get();
826 uint32_t bufLeft = Distance(iter, end);
827 while (bufLeft) {
828 char *p = temp;
829 uint32_t tempLeft = sizeof(temp);
831 nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
832 if (NS_FAILED(rv)) return rv;
834 if (tempLeft < sizeof(temp))
835 output.Append(temp, sizeof(temp) - tempLeft);
836 }
837 return NS_OK;
838 }
840 bool
841 NS_IsNativeUTF8()
842 {
843 return nsNativeCharsetConverter::IsNativeUTF8();
844 }
846 void
847 NS_StartupNativeCharsetUtils()
848 {
849 //
850 // need to initialize the locale or else charset conversion will fail.
851 // better not delay this in case some other component alters the locale
852 // settings.
853 //
854 // XXX we assume that we are called early enough that we should
855 // always be the first to care about the locale's charset.
856 //
857 setlocale(LC_CTYPE, "");
859 nsNativeCharsetConverter::GlobalInit();
860 }
862 void
863 NS_ShutdownNativeCharsetUtils()
864 {
865 nsNativeCharsetConverter::GlobalShutdown();
866 }
868 //-----------------------------------------------------------------------------
869 // XP_WIN
870 //-----------------------------------------------------------------------------
871 #elif defined(XP_WIN)
873 #include <windows.h>
874 #include "nsString.h"
875 #include "nsAString.h"
876 #include "nsReadableUtils.h"
878 using namespace mozilla;
880 nsresult
881 NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
882 {
883 uint32_t inputLen = input.Length();
885 nsACString::const_iterator iter;
886 input.BeginReading(iter);
888 const char *buf = iter.get();
890 // determine length of result
891 uint32_t resultLen = 0;
892 int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, nullptr, 0);
893 if (n > 0)
894 resultLen += n;
896 // allocate sufficient space
897 if (!output.SetLength(resultLen, fallible_t()))
898 return NS_ERROR_OUT_OF_MEMORY;
899 if (resultLen > 0) {
900 nsAString::iterator out_iter;
901 output.BeginWriting(out_iter);
903 char16_t *result = out_iter.get();
905 ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, wwc(result), resultLen);
906 }
907 return NS_OK;
908 }
910 nsresult
911 NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
912 {
913 uint32_t inputLen = input.Length();
915 nsAString::const_iterator iter;
916 input.BeginReading(iter);
918 char16ptr_t buf = iter.get();
920 // determine length of result
921 uint32_t resultLen = 0;
923 int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, nullptr, 0,
924 nullptr, nullptr);
925 if (n > 0)
926 resultLen += n;
928 // allocate sufficient space
929 if (!output.SetLength(resultLen, fallible_t()))
930 return NS_ERROR_OUT_OF_MEMORY;
931 if (resultLen > 0) {
932 nsACString::iterator out_iter;
933 output.BeginWriting(out_iter);
935 // default "defaultChar" is '?', which is an illegal character on windows
936 // file system. That will cause file uncreatable. Change it to '_'
937 const char defaultChar = '_';
939 char *result = out_iter.get();
941 ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
942 &defaultChar, nullptr);
943 }
944 return NS_OK;
945 }
947 // moved from widget/windows/nsToolkit.cpp
948 int32_t
949 NS_ConvertAtoW(const char *aStrInA, int aBufferSize, char16_t *aStrOutW)
950 {
951 return MultiByteToWideChar(CP_ACP, 0, aStrInA, -1, wwc(aStrOutW), aBufferSize);
952 }
954 int32_t
955 NS_ConvertWtoA(const char16_t *aStrInW, int aBufferSizeOut,
956 char *aStrOutA, const char *aDefault)
957 {
958 if ((!aStrInW) || (!aStrOutA) || (aBufferSizeOut <= 0))
959 return 0;
961 int numCharsConverted = WideCharToMultiByte(CP_ACP, 0, char16ptr_t(aStrInW), -1,
962 aStrOutA, aBufferSizeOut,
963 aDefault, nullptr);
965 if (!numCharsConverted) {
966 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
967 // Overflow, add missing null termination but return 0
968 aStrOutA[aBufferSizeOut-1] = '\0';
969 }
970 else {
971 // Other error, clear string and return 0
972 aStrOutA[0] = '\0';
973 }
974 }
975 else if (numCharsConverted < aBufferSizeOut) {
976 // Add 2nd null (really necessary?)
977 aStrOutA[numCharsConverted] = '\0';
978 }
980 return numCharsConverted;
981 }
983 #else
985 #include "nsReadableUtils.h"
987 nsresult
988 NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
989 {
990 CopyASCIItoUTF16(input, output);
991 return NS_OK;
992 }
994 nsresult
995 NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
996 {
997 LossyCopyUTF16toASCII(input, output);
998 return NS_OK;
999 }
1001 void
1002 NS_StartupNativeCharsetUtils()
1003 {
1004 }
1006 void
1007 NS_ShutdownNativeCharsetUtils()
1008 {
1009 }
1011 #endif