The Tor Browser: js/src/vm/CharacterEncoding.cpp@129ffea94266

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-

     2  * vim: set ts=8 sts=4 et sw=4 tw=99:

     3  * This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 #include "js/CharacterEncoding.h"

     9 #include "jscntxt.h"

    10 #include "jsprf.h"

    12 using namespace JS;

    14 Latin1CharsZ

    15 JS::LossyTwoByteCharsToNewLatin1CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars)

    16 {

    17     JS_ASSERT(cx);

    18     size_t len = tbchars.length();

    19     unsigned char *latin1 = cx->pod_malloc<unsigned char>(len + 1);

    20     if (!latin1)

    21         return Latin1CharsZ();

    22     for (size_t i = 0; i < len; ++i)

    23         latin1[i] = static_cast<unsigned char>(tbchars[i]);

    24     latin1[len] = '\0';

    25     return Latin1CharsZ(latin1, len);

    26 }

    28 static size_t

    29 GetDeflatedUTF8StringLength(const jschar *chars, size_t nchars)

    30 {

    31     size_t nbytes;

    32     const jschar *end;

    33     unsigned c, c2;

    35     nbytes = nchars;

    36     for (end = chars + nchars; chars != end; chars++) {

    37         c = *chars;

    38         if (c < 0x80)

    39             continue;

    40         if (0xD800 <= c && c <= 0xDFFF) {

    41             /* nbytes sets 1 length since this is surrogate pair. */

    42             if (c >= 0xDC00 || (chars + 1) == end) {

    43                 nbytes += 2; /* Bad Surrogate */

    44                 continue;

    45             }

    46             c2 = chars[1];

    47             if (c2 < 0xDC00 || c2 > 0xDFFF) {

    48                 nbytes += 2; /* Bad Surrogate */

    49                 continue;

    50             }

    51             c = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;

    52             nbytes--;

    53             chars++;

    54         }

    55         c >>= 11;

    56         nbytes++;

    57         while (c) {

    58             c >>= 5;

    59             nbytes++;

    60         }

    61     }

    62     return nbytes;

    63 }

    65 static bool

    66 PutUTF8ReplacementCharacter(char **dst, size_t *dstlenp) {

    67     if (*dstlenp < 3)

    68         return false;

    69     *(*dst)++ = (char) 0xEF;

    70     *(*dst)++ = (char) 0xBF;

    71     *(*dst)++ = (char) 0xBD;

    72     *dstlenp -= 3;

    73     return true;

    74 }

    76 /*

    77  * Write up to |*dstlenp| bytes into |dst|.  Writes the number of bytes used

    78  * into |*dstlenp| on success.  Returns false on failure.

    79  */

    80 static bool

    81 DeflateStringToUTF8Buffer(js::ThreadSafeContext *cx, const jschar *src, size_t srclen,

    82                           char *dst, size_t *dstlenp)

    83 {

    84     size_t dstlen = *dstlenp;

    85     size_t origDstlen = dstlen;

    87     while (srclen) {

    88         uint32_t v;

    89         jschar c = *src++;

    90         srclen--;

    91         if (c >= 0xDC00 && c <= 0xDFFF) {

    92             if (!PutUTF8ReplacementCharacter(&dst, &dstlen))

    93                 goto bufferTooSmall;

    94             continue;

    95         } else if (c < 0xD800 || c > 0xDBFF) {

    96             v = c;

    97         } else {

    98             if (srclen < 1) {

    99                 if (!PutUTF8ReplacementCharacter(&dst, &dstlen))

   100                     goto bufferTooSmall;

   101                 continue;

   102             }

   103             jschar c2 = *src;

   104             if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {

   105                 if (!PutUTF8ReplacementCharacter(&dst, &dstlen))

   106                     goto bufferTooSmall;

   107                 continue;

   108             }

   109             src++;

   110             srclen--;

   111             v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;

   112         }

   113         size_t utf8Len;

   114         if (v < 0x0080) {

   115             /* no encoding necessary - performance hack */

   116             if (dstlen == 0)

   117                 goto bufferTooSmall;

   118             *dst++ = (char) v;

   119             utf8Len = 1;

   120         } else {

   121             uint8_t utf8buf[4];

   122             utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);

   123             if (utf8Len > dstlen)

   124                 goto bufferTooSmall;

   125             for (size_t i = 0; i < utf8Len; i++)

   126                 *dst++ = (char) utf8buf[i];

   127         }

   128         dstlen -= utf8Len;

   129     }

   130     *dstlenp = (origDstlen - dstlen);

   131     return true;

   133 bufferTooSmall:

   134     *dstlenp = (origDstlen - dstlen);

   135     if (cx->isJSContext())

   136         JS_ReportErrorNumber(cx->asJSContext(), js_GetErrorMessage, nullptr,

   137                              JSMSG_BUFFER_TOO_SMALL);

   138     return false;

   139 }

   142 UTF8CharsZ

   143 JS::TwoByteCharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, TwoByteChars tbchars)

   144 {

   145     JS_ASSERT(cx);

   147     /* Get required buffer size. */

   148     jschar *str = tbchars.start().get();

   149     size_t len = GetDeflatedUTF8StringLength(str, tbchars.length());

   151     /* Allocate buffer. */

   152     unsigned char *utf8 = cx->pod_malloc<unsigned char>(len + 1);

   153     if (!utf8)

   154         return UTF8CharsZ();

   156     /* Encode to UTF8. */

   157     DeflateStringToUTF8Buffer(cx, str, tbchars.length(), (char *)utf8, &len);

   158     utf8[len] = '\0';

   160     return UTF8CharsZ(utf8, len);

   161 }

   163 static const uint32_t INVALID_UTF8 = UINT32_MAX;

   165 /*

   166  * Convert a utf8 character sequence into a UCS-4 character and return that

   167  * character.  It is assumed that the caller already checked that the sequence

   168  * is valid.

   169  */

   170 uint32_t

   171 JS::Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length)

   172 {

   173     JS_ASSERT(1 <= utf8Length && utf8Length <= 4);

   175     if (utf8Length == 1) {

   176         JS_ASSERT(!(*utf8Buffer & 0x80));

   177         return *utf8Buffer;

   178     }

   180     /* from Unicode 3.1, non-shortest form is illegal */

   181     static const uint32_t minucs4Table[] = { 0x80, 0x800, 0x10000 };

   183     JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7 - utf8Length)))) ==

   184               (0x100 - (1 << (8 - utf8Length))));

   185     uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);

   186     uint32_t minucs4Char = minucs4Table[utf8Length - 2];

   187     while (--utf8Length) {

   188         JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);

   189         ucs4Char = (ucs4Char << 6) | (*utf8Buffer++ & 0x3F);

   190     }

   192     if (MOZ_UNLIKELY(ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF)))

   193         return INVALID_UTF8;

   195     return ucs4Char;

   196 }

   198 static void

   199 ReportInvalidCharacter(JSContext *cx, uint32_t offset)

   200 {

   201     char buffer[10];

   202     JS_snprintf(buffer, 10, "%d", offset);

   203     JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, js_GetErrorMessage, nullptr,

   204                                  JSMSG_MALFORMED_UTF8_CHAR, buffer);

   205 }

   207 static void

   208 ReportBufferTooSmall(JSContext *cx, uint32_t dummy)

   209 {

   210     JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BUFFER_TOO_SMALL);

   211 }

   213 static void

   214 ReportTooBigCharacter(JSContext *cx, uint32_t v)

   215 {

   216     char buffer[10];

   217     JS_snprintf(buffer, 10, "0x%x", v + 0x10000);

   218     JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, js_GetErrorMessage, nullptr,

   219                                  JSMSG_UTF8_CHAR_TOO_LARGE, buffer);

   220 }

   222 enum InflateUTF8Action {

   223     CountAndReportInvalids,

   224     CountAndIgnoreInvalids,

   225     Copy

   226 };

   228 static const uint32_t REPLACE_UTF8 = 0xFFFD;

   230 // If making changes to this algorithm, make sure to also update

   231 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp

   232 template <InflateUTF8Action action>

   233 static bool

   234 InflateUTF8StringToBuffer(JSContext *cx, const UTF8Chars src, jschar *dst, size_t *dstlenp,

   235                           bool *isAsciip)

   236 {

   237     *isAsciip = true;

   239     // First, count how many jschars need to be in the inflated string.

   240     // |i| is the index into |src|, and |j| is the the index into |dst|.

   241     size_t srclen = src.length();

   242     uint32_t j = 0;

   243     for (uint32_t i = 0; i < srclen; i++, j++) {

   244         uint32_t v = uint32_t(src[i]);

   245         if (!(v & 0x80)) {

   246             // ASCII code unit.  Simple copy.

   247             if (action == Copy)

   248                 dst[j] = jschar(v);

   250         } else {

   251             // Non-ASCII code unit.  Determine its length in bytes (n).

   252             *isAsciip = false;

   253             uint32_t n = 1;

   254             while (v & (0x80 >> n))

   255                 n++;

   257         #define INVALID(report, arg, n2)                                \

   258             do {                                                        \

   259                 if (action == CountAndReportInvalids) {                 \

   260                     report(cx, arg);                                    \

   261                     return false;                                       \

   262                 } else {                                                \

   263                     if (action == Copy)                                 \

   264                         dst[j] = jschar(REPLACE_UTF8);                  \

   265                     else                                                \

   266                         JS_ASSERT(action == CountAndIgnoreInvalids);    \

   267                     n = n2;                                             \

   268                     goto invalidMultiByteCodeUnit;                      \

   269                 }                                                       \

   270             } while (0)

   272             // Check the leading byte.

   273             if (n < 2 || n > 4)

   274                 INVALID(ReportInvalidCharacter, i, 1);

   276             // Check that |src| is large enough to hold an n-byte code unit.

   277             if (i + n > srclen)

   278                 INVALID(ReportBufferTooSmall, /* dummy = */ 0, 1);

   280             // Check the second byte.  From Unicode Standard v6.2, Table 3-7

   281             // Well-Formed UTF-8 Byte Sequences.

   282             if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) ||  // E0 A0~BF

   283                 (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F

   284                 (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF

   285                 (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F

   286             {

   287                 INVALID(ReportInvalidCharacter, i, 1);

   288             }

   290             // Check the continuation bytes.

   291             for (uint32_t m = 1; m < n; m++)

   292                 if ((src[i + m] & 0xC0) != 0x80)

   293                     INVALID(ReportInvalidCharacter, i, m);

   295             // Determine the code unit's length in jschars and act accordingly.

   296             v = Utf8ToOneUcs4Char((uint8_t *)&src[i], n);

   297             if (v < 0x10000) {

   298                 // The n-byte UTF8 code unit will fit in a single jschar.

   299                 if (action == Copy)

   300                     dst[j] = jschar(v);

   302             } else {

   303                 v -= 0x10000;

   304                 if (v <= 0xFFFFF) {

   305                     // The n-byte UTF8 code unit will fit in two jschars.

   306                     if (action == Copy)

   307                         dst[j] = jschar((v >> 10) + 0xD800);

   308                     j++;

   309                     if (action == Copy)

   310                         dst[j] = jschar((v & 0x3FF) + 0xDC00);

   312                 } else {

   313                     // The n-byte UTF8 code unit won't fit in two jschars.

   314                     INVALID(ReportTooBigCharacter, v, 1);

   315                 }

   316             }

   318           invalidMultiByteCodeUnit:

   319             // Move i to the last byte of the multi-byte code unit;  the loop

   320             // header will do the final i++ to move to the start of the next

   321             // code unit.

   322             i += n - 1;

   323         }

   324     }

   326     *dstlenp = j;

   328     return true;

   329 }

   331 typedef bool (*CountAction)(JSContext *, const UTF8Chars, jschar *, size_t *, bool *isAsciip);

   333 static TwoByteCharsZ

   334 InflateUTF8StringHelper(JSContext *cx, const UTF8Chars src, CountAction countAction, size_t *outlen)

   335 {

   336     *outlen = 0;

   338     bool isAscii;

   339     if (!countAction(cx, src, /* dst = */ nullptr, outlen, &isAscii))

   340         return TwoByteCharsZ();

   342     jschar *dst = cx->pod_malloc<jschar>(*outlen + 1);  // +1 for NUL

   343     if (!dst)

   344         return TwoByteCharsZ();

   346     if (isAscii) {

   347         size_t srclen = src.length();

   348         JS_ASSERT(*outlen == srclen);

   349         for (uint32_t i = 0; i < srclen; i++)

   350             dst[i] = jschar(src[i]);

   352     } else {

   353         JS_ALWAYS_TRUE(InflateUTF8StringToBuffer<Copy>(cx, src, dst, outlen, &isAscii));

   354     }

   356     dst[*outlen] = 0;    // NUL char

   358     return TwoByteCharsZ(dst, *outlen);

   359 }

   361 TwoByteCharsZ

   362 JS::UTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen)

   363 {

   364     return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndReportInvalids>,

   365                                    outlen);

   366 }

   368 TwoByteCharsZ

   369 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen)

   370 {

   371     return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndIgnoreInvalids>,

   372                                    outlen);

   373 }

The Tor Browser / file revision

js/src/vm/CharacterEncoding.cpp@129ffea94266

js/src/vm/CharacterEncoding.cpp