js/public/CharacterEncoding.h

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef js_CharacterEncoding_h
     8 #define js_CharacterEncoding_h
    10 #include "mozilla/NullPtr.h"
    11 #include "mozilla/Range.h"
    13 #include "js/TypeDecls.h"
    14 #include "js/Utility.h"
    16 namespace js {
    17 struct ThreadSafeContext;
    18 }
    20 namespace JS {
    22 /*
    23  * By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
    24  * are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
    25  * byte is treated as a 2-byte character, and there is no way to pass in a
    26  * string containing characters beyond U+00FF.
    27  */
    28 class Latin1Chars : public mozilla::Range<unsigned char>
    29 {
    30     typedef mozilla::Range<unsigned char> Base;
    32   public:
    33     Latin1Chars() : Base() {}
    34     Latin1Chars(char *aBytes, size_t aLength) : Base(reinterpret_cast<unsigned char *>(aBytes), aLength) {}
    35     Latin1Chars(const char *aBytes, size_t aLength)
    36       : Base(reinterpret_cast<unsigned char *>(const_cast<char *>(aBytes)), aLength)
    37     {}
    38 };
    40 /*
    41  * A Latin1Chars, but with \0 termination for C compatibility.
    42  */
    43 class Latin1CharsZ : public mozilla::RangedPtr<unsigned char>
    44 {
    45     typedef mozilla::RangedPtr<unsigned char> Base;
    47   public:
    48     Latin1CharsZ() : Base(nullptr, 0) {}
    50     Latin1CharsZ(char *aBytes, size_t aLength)
    51       : Base(reinterpret_cast<unsigned char *>(aBytes), aLength)
    52     {
    53         MOZ_ASSERT(aBytes[aLength] == '\0');
    54     }
    56     Latin1CharsZ(unsigned char *aBytes, size_t aLength)
    57       : Base(aBytes, aLength)
    58     {
    59         MOZ_ASSERT(aBytes[aLength] == '\0');
    60     }
    62     using Base::operator=;
    64     char *c_str() { return reinterpret_cast<char *>(get()); }
    65 };
    67 class UTF8Chars : public mozilla::Range<unsigned char>
    68 {
    69     typedef mozilla::Range<unsigned char> Base;
    71   public:
    72     UTF8Chars() : Base() {}
    73     UTF8Chars(char *aBytes, size_t aLength)
    74       : Base(reinterpret_cast<unsigned char *>(aBytes), aLength)
    75     {}
    76     UTF8Chars(const char *aBytes, size_t aLength)
    77       : Base(reinterpret_cast<unsigned char *>(const_cast<char *>(aBytes)), aLength)
    78     {}
    79 };
    81 /*
    82  * SpiderMonkey also deals directly with UTF-8 encoded text in some places.
    83  */
    84 class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
    85 {
    86     typedef mozilla::RangedPtr<unsigned char> Base;
    88   public:
    89     UTF8CharsZ() : Base(nullptr, 0) {}
    91     UTF8CharsZ(char *aBytes, size_t aLength)
    92       : Base(reinterpret_cast<unsigned char *>(aBytes), aLength)
    93     {
    94         MOZ_ASSERT(aBytes[aLength] == '\0');
    95     }
    97     UTF8CharsZ(unsigned char *aBytes, size_t aLength)
    98       : Base(aBytes, aLength)
    99     {
   100         MOZ_ASSERT(aBytes[aLength] == '\0');
   101     }
   103     using Base::operator=;
   105     char *c_str() { return reinterpret_cast<char *>(get()); }
   106 };
   108 /*
   109  * SpiderMonkey uses a 2-byte character representation: it is a
   110  * 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
   111  * but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
   112  * sufficiently dedicated JavaScript program to be fully unicode-aware by
   113  * manually interpreting UTF-16 extension characters embedded in the JS
   114  * string.
   115  */
   116 class TwoByteChars : public mozilla::Range<jschar>
   117 {
   118     typedef mozilla::Range<jschar> Base;
   120   public:
   121     TwoByteChars() : Base() {}
   122     TwoByteChars(jschar *aChars, size_t aLength) : Base(aChars, aLength) {}
   123     TwoByteChars(const jschar *aChars, size_t aLength) : Base(const_cast<jschar *>(aChars), aLength) {}
   124 };
   126 /*
   127  * A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
   128  */
   129 class TwoByteCharsZ : public mozilla::RangedPtr<jschar>
   130 {
   131     typedef mozilla::RangedPtr<jschar> Base;
   133   public:
   134     TwoByteCharsZ() : Base(nullptr, 0) {}
   136     TwoByteCharsZ(jschar *chars, size_t length)
   137       : Base(chars, length)
   138     {
   139         MOZ_ASSERT(chars[length] == '\0');
   140     }
   142     using Base::operator=;
   143 };
   145 typedef mozilla::RangedPtr<const jschar> ConstCharPtr;
   147 /*
   148  * Like TwoByteChars, but the chars are const.
   149  */
   150 class ConstTwoByteChars : public mozilla::RangedPtr<const jschar>
   151 {
   152   public:
   153     ConstTwoByteChars(const ConstTwoByteChars &s) : ConstCharPtr(s) {}
   154     ConstTwoByteChars(const mozilla::RangedPtr<const jschar> &s) : ConstCharPtr(s) {}
   155     ConstTwoByteChars(const jschar *s, size_t len) : ConstCharPtr(s, len) {}
   156     ConstTwoByteChars(const jschar *pos, const jschar *start, size_t len)
   157       : ConstCharPtr(pos, start, len)
   158     {}
   160     using ConstCharPtr::operator=;
   161 };
   164 /*
   165  * Convert a 2-byte character sequence to "ISO-Latin-1". This works by
   166  * truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
   167  * contains any UTF-16 extension characters, then this may give invalid Latin1
   168  * output. The returned string is zero terminated. The returned string or the
   169  * returned string's |start()| must be freed with JS_free or js_free,
   170  * respectively. If allocation fails, an OOM error will be set and the method
   171  * will return a nullptr chars (which can be tested for with the ! operator).
   172  * This method cannot trigger GC.
   173  */
   174 extern Latin1CharsZ
   175 LossyTwoByteCharsToNewLatin1CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars);
   177 extern UTF8CharsZ
   178 TwoByteCharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars);
   180 uint32_t
   181 Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
   183 /*
   184  * Inflate bytes in UTF-8 encoding to jschars.
   185  * - On error, returns an empty TwoByteCharsZ.
   186  * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold
   187  *   its length;  the length value excludes the trailing null.
   188  */
   189 extern TwoByteCharsZ
   190 UTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen);
   192 /*
   193  * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
   194  * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
   195  * input.
   196  */
   197 extern TwoByteCharsZ
   198 LossyUTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen);
   200 } // namespace JS
   202 inline void JS_free(JS::Latin1CharsZ &ptr) { js_free((void*)ptr.get()); }
   203 inline void JS_free(JS::UTF8CharsZ &ptr) { js_free((void*)ptr.get()); }
   205 #endif /* js_CharacterEncoding_h */

mercurial