xpcom/string/src/nsUTF8UtilsSSE2.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "nscore.h"
     6 #include "nsAlgorithm.h"
     7 #include <emmintrin.h>
     8 #include <nsUTF8Utils.h>
    10 void
    11 LossyConvertEncoding16to8::write_sse2(const char16_t* aSource,
    12                                       uint32_t aSourceLength)
    13 {
    14   char* dest = mDestination;
    16   // Align source to a 16-byte boundary.
    17   uint32_t i = 0;
    18   uint32_t alignLen =
    19     XPCOM_MIN<uint32_t>(aSourceLength, uint32_t(-NS_PTR_TO_INT32(aSource) & 0xf) / sizeof(char16_t));
    20   for (; i < alignLen; i++) {
    21     dest[i] = static_cast<unsigned char>(aSource[i]);
    22   }
    24   // Walk 64 bytes (four XMM registers) at a time.
    25   __m128i vectmask = _mm_set1_epi16(0x00ff);
    26   for (; aSourceLength - i > 31; i += 32) {
    27     __m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i));
    28     source1 = _mm_and_si128(source1, vectmask);
    30     __m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 8));
    31     source2 = _mm_and_si128(source2, vectmask);
    33     __m128i source3 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16));
    34     source3 = _mm_and_si128(source3, vectmask);
    36     __m128i source4 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 24));
    37     source4 = _mm_and_si128(source4, vectmask);
    40     // Pack the source data.  SSE2 views this as a saturating uint16_t to
    41     // uint8_t conversion, but since we masked off the high-order byte of every
    42     // uint16_t, we're really just grabbing the low-order bytes of source1 and
    43     // source2.
    44     __m128i packed1 = _mm_packus_epi16(source1, source2);
    45     __m128i packed2 = _mm_packus_epi16(source3, source4);
    47     // This store needs to be unaligned since there's no guarantee that the
    48     // alignment we did above for the source will align the destination.
    49     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i),      packed1);
    50     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), packed2);
    51   }
    53   // Finish up the rest.
    54   for (; i < aSourceLength; i++) {
    55     dest[i] = static_cast<unsigned char>(aSource[i]);
    56   }
    58   mDestination += i;
    59 }
    61 void
    62 LossyConvertEncoding8to16::write_sse2(const char* aSource,
    63                                       uint32_t aSourceLength)
    64 {
    65   char16_t *dest = mDestination;
    67   // Align source to a 16-byte boundary.  We choose to align source rather than
    68   // dest because we'd rather have our loads than our stores be fast. You have
    69   // to wait for a load to complete, but you can keep on moving after issuing a
    70   // store.
    71   uint32_t i = 0;
    72   uint32_t alignLen = XPCOM_MIN(aSourceLength, uint32_t(-NS_PTR_TO_INT32(aSource) & 0xf));
    73   for (; i < alignLen; i++) {
    74     dest[i] = static_cast<unsigned char>(aSource[i]);
    75   }
    77   // Walk 32 bytes (two XMM registers) at a time.
    78   for (; aSourceLength - i > 31; i += 32) {
    79     __m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i));
    80     __m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16));
    82     // Interleave 0s in with the bytes of source to create lo and hi.
    83     __m128i lo1 = _mm_unpacklo_epi8(source1, _mm_setzero_si128());
    84     __m128i hi1 = _mm_unpackhi_epi8(source1, _mm_setzero_si128());
    85     __m128i lo2 = _mm_unpacklo_epi8(source2, _mm_setzero_si128());
    86     __m128i hi2 = _mm_unpackhi_epi8(source2, _mm_setzero_si128());
    88     // store lo and hi into dest.
    89     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i),      lo1);
    90     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 8),  hi1);
    91     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), lo2);
    92     _mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 24), hi2);
    93   }
    95   // Finish up whatever's left.
    96   for (; i < aSourceLength; i++) {
    97     dest[i] = static_cast<unsigned char>(aSource[i]);
    98   }
   100   mDestination += i;
   101 }

mercurial