intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 // This file should only be compiled if you're on x86 or x86_64.  Additionally,
     6 // you'll need to compile this file with -msse2 if you're using gcc.
     8 #include <emmintrin.h>
     9 #include "nscore.h"
    11 namespace mozilla {
    12 namespace SSE2 {
    14 void
    15 Convert_ascii_run(const char *&src,
    16                   char16_t  *&dst,
    17                   int32_t      len)
    18 {
    19   if (len > 15) {
    20     __m128i in, out1, out2;
    21     __m128d *outp1, *outp2;
    22     __m128i zeroes;
    23     uint32_t offset;
    25     // align input to 16 bytes
    26     while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
    27       if (*src & 0x80U)
    28         return;
    29       *dst++ = (char16_t) *src++;
    30       len--;
    31     }
    33     zeroes = _mm_setzero_si128();
    35     offset = NS_PTR_TO_UINT32(dst) & 15;
    37     // Note: all these inner loops have to break, not return; we need
    38     // to let the single-char loop below catch any leftover
    39     // byte-at-a-time ASCII chars, since this function must consume
    40     // all available ASCII chars before it returns
    42     if (offset == 0) {
    43       while (len > 15) {
    44         in = _mm_load_si128((__m128i *) src);
    45         if (_mm_movemask_epi8(in))
    46           break;
    47         out1 = _mm_unpacklo_epi8(in, zeroes);
    48         out2 = _mm_unpackhi_epi8(in, zeroes);
    49         _mm_stream_si128((__m128i *) dst, out1);
    50         _mm_stream_si128((__m128i *) (dst + 8), out2);
    51         dst += 16;
    52         src += 16;
    53         len -= 16;
    54       }
    55     } else if (offset == 8) {
    56       outp1 = (__m128d *) &out1;
    57       outp2 = (__m128d *) &out2;
    58       while (len > 15) {
    59         in = _mm_load_si128((__m128i *) src);
    60         if (_mm_movemask_epi8(in))
    61           break;
    62         out1 = _mm_unpacklo_epi8(in, zeroes);
    63         out2 = _mm_unpackhi_epi8(in, zeroes);
    64         _mm_storel_epi64((__m128i *) dst, out1);
    65         _mm_storel_epi64((__m128i *) (dst + 8), out2);
    66         _mm_storeh_pd((double *) (dst + 4), *outp1);
    67         _mm_storeh_pd((double *) (dst + 12), *outp2);
    68         src += 16;
    69         dst += 16;
    70         len -= 16;
    71       }
    72     } else {
    73       while (len > 15) {
    74         in = _mm_load_si128((__m128i *) src);
    75         if (_mm_movemask_epi8(in))
    76           break;
    77         out1 = _mm_unpacklo_epi8(in, zeroes);
    78         out2 = _mm_unpackhi_epi8(in, zeroes);
    79         _mm_storeu_si128((__m128i *) dst, out1);
    80         _mm_storeu_si128((__m128i *) (dst + 8), out2);
    81         src += 16;
    82         dst += 16;
    83         len -= 16;
    84       }
    85     }
    86   }
    88   // finish off a byte at a time
    90   while (len-- > 0 && (*src & 0x80U) == 0) {
    91     *dst++ = (char16_t) *src++;
    92   }
    93 }
    95 } // namespace SSE2
    96 } // namespace mozilla

mercurial