Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
michael@0 | 2 | * vim: set ts=8 sts=4 et sw=4 tw=99: |
michael@0 | 3 | * This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #ifndef js_CharacterEncoding_h |
michael@0 | 8 | #define js_CharacterEncoding_h |
michael@0 | 9 | |
michael@0 | 10 | #include "mozilla/NullPtr.h" |
michael@0 | 11 | #include "mozilla/Range.h" |
michael@0 | 12 | |
michael@0 | 13 | #include "js/TypeDecls.h" |
michael@0 | 14 | #include "js/Utility.h" |
michael@0 | 15 | |
michael@0 | 16 | namespace js { |
michael@0 | 17 | struct ThreadSafeContext; |
michael@0 | 18 | } |
michael@0 | 19 | |
michael@0 | 20 | namespace JS { |
michael@0 | 21 | |
michael@0 | 22 | /* |
michael@0 | 23 | * By default, all C/C++ 1-byte-per-character strings passed into the JSAPI |
michael@0 | 24 | * are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each |
michael@0 | 25 | * byte is treated as a 2-byte character, and there is no way to pass in a |
michael@0 | 26 | * string containing characters beyond U+00FF. |
michael@0 | 27 | */ |
michael@0 | 28 | class Latin1Chars : public mozilla::Range<unsigned char> |
michael@0 | 29 | { |
michael@0 | 30 | typedef mozilla::Range<unsigned char> Base; |
michael@0 | 31 | |
michael@0 | 32 | public: |
michael@0 | 33 | Latin1Chars() : Base() {} |
michael@0 | 34 | Latin1Chars(char *aBytes, size_t aLength) : Base(reinterpret_cast<unsigned char *>(aBytes), aLength) {} |
michael@0 | 35 | Latin1Chars(const char *aBytes, size_t aLength) |
michael@0 | 36 | : Base(reinterpret_cast<unsigned char *>(const_cast<char *>(aBytes)), aLength) |
michael@0 | 37 | {} |
michael@0 | 38 | }; |
michael@0 | 39 | |
michael@0 | 40 | /* |
michael@0 | 41 | * A Latin1Chars, but with \0 termination for C compatibility. |
michael@0 | 42 | */ |
michael@0 | 43 | class Latin1CharsZ : public mozilla::RangedPtr<unsigned char> |
michael@0 | 44 | { |
michael@0 | 45 | typedef mozilla::RangedPtr<unsigned char> Base; |
michael@0 | 46 | |
michael@0 | 47 | public: |
michael@0 | 48 | Latin1CharsZ() : Base(nullptr, 0) {} |
michael@0 | 49 | |
michael@0 | 50 | Latin1CharsZ(char *aBytes, size_t aLength) |
michael@0 | 51 | : Base(reinterpret_cast<unsigned char *>(aBytes), aLength) |
michael@0 | 52 | { |
michael@0 | 53 | MOZ_ASSERT(aBytes[aLength] == '\0'); |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | Latin1CharsZ(unsigned char *aBytes, size_t aLength) |
michael@0 | 57 | : Base(aBytes, aLength) |
michael@0 | 58 | { |
michael@0 | 59 | MOZ_ASSERT(aBytes[aLength] == '\0'); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | using Base::operator=; |
michael@0 | 63 | |
michael@0 | 64 | char *c_str() { return reinterpret_cast<char *>(get()); } |
michael@0 | 65 | }; |
michael@0 | 66 | |
michael@0 | 67 | class UTF8Chars : public mozilla::Range<unsigned char> |
michael@0 | 68 | { |
michael@0 | 69 | typedef mozilla::Range<unsigned char> Base; |
michael@0 | 70 | |
michael@0 | 71 | public: |
michael@0 | 72 | UTF8Chars() : Base() {} |
michael@0 | 73 | UTF8Chars(char *aBytes, size_t aLength) |
michael@0 | 74 | : Base(reinterpret_cast<unsigned char *>(aBytes), aLength) |
michael@0 | 75 | {} |
michael@0 | 76 | UTF8Chars(const char *aBytes, size_t aLength) |
michael@0 | 77 | : Base(reinterpret_cast<unsigned char *>(const_cast<char *>(aBytes)), aLength) |
michael@0 | 78 | {} |
michael@0 | 79 | }; |
michael@0 | 80 | |
michael@0 | 81 | /* |
michael@0 | 82 | * SpiderMonkey also deals directly with UTF-8 encoded text in some places. |
michael@0 | 83 | */ |
michael@0 | 84 | class UTF8CharsZ : public mozilla::RangedPtr<unsigned char> |
michael@0 | 85 | { |
michael@0 | 86 | typedef mozilla::RangedPtr<unsigned char> Base; |
michael@0 | 87 | |
michael@0 | 88 | public: |
michael@0 | 89 | UTF8CharsZ() : Base(nullptr, 0) {} |
michael@0 | 90 | |
michael@0 | 91 | UTF8CharsZ(char *aBytes, size_t aLength) |
michael@0 | 92 | : Base(reinterpret_cast<unsigned char *>(aBytes), aLength) |
michael@0 | 93 | { |
michael@0 | 94 | MOZ_ASSERT(aBytes[aLength] == '\0'); |
michael@0 | 95 | } |
michael@0 | 96 | |
michael@0 | 97 | UTF8CharsZ(unsigned char *aBytes, size_t aLength) |
michael@0 | 98 | : Base(aBytes, aLength) |
michael@0 | 99 | { |
michael@0 | 100 | MOZ_ASSERT(aBytes[aLength] == '\0'); |
michael@0 | 101 | } |
michael@0 | 102 | |
michael@0 | 103 | using Base::operator=; |
michael@0 | 104 | |
michael@0 | 105 | char *c_str() { return reinterpret_cast<char *>(get()); } |
michael@0 | 106 | }; |
michael@0 | 107 | |
michael@0 | 108 | /* |
michael@0 | 109 | * SpiderMonkey uses a 2-byte character representation: it is a |
michael@0 | 110 | * 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2, |
michael@0 | 111 | * but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a |
michael@0 | 112 | * sufficiently dedicated JavaScript program to be fully unicode-aware by |
michael@0 | 113 | * manually interpreting UTF-16 extension characters embedded in the JS |
michael@0 | 114 | * string. |
michael@0 | 115 | */ |
michael@0 | 116 | class TwoByteChars : public mozilla::Range<jschar> |
michael@0 | 117 | { |
michael@0 | 118 | typedef mozilla::Range<jschar> Base; |
michael@0 | 119 | |
michael@0 | 120 | public: |
michael@0 | 121 | TwoByteChars() : Base() {} |
michael@0 | 122 | TwoByteChars(jschar *aChars, size_t aLength) : Base(aChars, aLength) {} |
michael@0 | 123 | TwoByteChars(const jschar *aChars, size_t aLength) : Base(const_cast<jschar *>(aChars), aLength) {} |
michael@0 | 124 | }; |
michael@0 | 125 | |
michael@0 | 126 | /* |
michael@0 | 127 | * A TwoByteChars, but \0 terminated for compatibility with JSFlatString. |
michael@0 | 128 | */ |
michael@0 | 129 | class TwoByteCharsZ : public mozilla::RangedPtr<jschar> |
michael@0 | 130 | { |
michael@0 | 131 | typedef mozilla::RangedPtr<jschar> Base; |
michael@0 | 132 | |
michael@0 | 133 | public: |
michael@0 | 134 | TwoByteCharsZ() : Base(nullptr, 0) {} |
michael@0 | 135 | |
michael@0 | 136 | TwoByteCharsZ(jschar *chars, size_t length) |
michael@0 | 137 | : Base(chars, length) |
michael@0 | 138 | { |
michael@0 | 139 | MOZ_ASSERT(chars[length] == '\0'); |
michael@0 | 140 | } |
michael@0 | 141 | |
michael@0 | 142 | using Base::operator=; |
michael@0 | 143 | }; |
michael@0 | 144 | |
michael@0 | 145 | typedef mozilla::RangedPtr<const jschar> ConstCharPtr; |
michael@0 | 146 | |
michael@0 | 147 | /* |
michael@0 | 148 | * Like TwoByteChars, but the chars are const. |
michael@0 | 149 | */ |
michael@0 | 150 | class ConstTwoByteChars : public mozilla::RangedPtr<const jschar> |
michael@0 | 151 | { |
michael@0 | 152 | public: |
michael@0 | 153 | ConstTwoByteChars(const ConstTwoByteChars &s) : ConstCharPtr(s) {} |
michael@0 | 154 | ConstTwoByteChars(const mozilla::RangedPtr<const jschar> &s) : ConstCharPtr(s) {} |
michael@0 | 155 | ConstTwoByteChars(const jschar *s, size_t len) : ConstCharPtr(s, len) {} |
michael@0 | 156 | ConstTwoByteChars(const jschar *pos, const jschar *start, size_t len) |
michael@0 | 157 | : ConstCharPtr(pos, start, len) |
michael@0 | 158 | {} |
michael@0 | 159 | |
michael@0 | 160 | using ConstCharPtr::operator=; |
michael@0 | 161 | }; |
michael@0 | 162 | |
michael@0 | 163 | |
michael@0 | 164 | /* |
michael@0 | 165 | * Convert a 2-byte character sequence to "ISO-Latin-1". This works by |
michael@0 | 166 | * truncating each 2-byte pair in the sequence to a 1-byte pair. If the source |
michael@0 | 167 | * contains any UTF-16 extension characters, then this may give invalid Latin1 |
michael@0 | 168 | * output. The returned string is zero terminated. The returned string or the |
michael@0 | 169 | * returned string's |start()| must be freed with JS_free or js_free, |
michael@0 | 170 | * respectively. If allocation fails, an OOM error will be set and the method |
michael@0 | 171 | * will return a nullptr chars (which can be tested for with the ! operator). |
michael@0 | 172 | * This method cannot trigger GC. |
michael@0 | 173 | */ |
michael@0 | 174 | extern Latin1CharsZ |
michael@0 | 175 | LossyTwoByteCharsToNewLatin1CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars); |
michael@0 | 176 | |
michael@0 | 177 | extern UTF8CharsZ |
michael@0 | 178 | TwoByteCharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars); |
michael@0 | 179 | |
michael@0 | 180 | uint32_t |
michael@0 | 181 | Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length); |
michael@0 | 182 | |
michael@0 | 183 | /* |
michael@0 | 184 | * Inflate bytes in UTF-8 encoding to jschars. |
michael@0 | 185 | * - On error, returns an empty TwoByteCharsZ. |
michael@0 | 186 | * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold |
michael@0 | 187 | * its length; the length value excludes the trailing null. |
michael@0 | 188 | */ |
michael@0 | 189 | extern TwoByteCharsZ |
michael@0 | 190 | UTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen); |
michael@0 | 191 | |
michael@0 | 192 | /* |
michael@0 | 193 | * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters |
michael@0 | 194 | * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8 |
michael@0 | 195 | * input. |
michael@0 | 196 | */ |
michael@0 | 197 | extern TwoByteCharsZ |
michael@0 | 198 | LossyUTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen); |
michael@0 | 199 | |
michael@0 | 200 | } // namespace JS |
michael@0 | 201 | |
michael@0 | 202 | inline void JS_free(JS::Latin1CharsZ &ptr) { js_free((void*)ptr.get()); } |
michael@0 | 203 | inline void JS_free(JS::UTF8CharsZ &ptr) { js_free((void*)ptr.get()); } |
michael@0 | 204 | |
michael@0 | 205 | #endif /* js_CharacterEncoding_h */ |