intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 // This file should only be compiled if you're on x86 or x86_64. Additionally,
michael@0 6 // you'll need to compile this file with -msse2 if you're using gcc.
michael@0 7
michael@0 8 #include <emmintrin.h>
michael@0 9 #include "nscore.h"
michael@0 10
michael@0 11 namespace mozilla {
michael@0 12 namespace SSE2 {
michael@0 13
michael@0 14 void
michael@0 15 Convert_ascii_run(const char *&src,
michael@0 16 char16_t *&dst,
michael@0 17 int32_t len)
michael@0 18 {
michael@0 19 if (len > 15) {
michael@0 20 __m128i in, out1, out2;
michael@0 21 __m128d *outp1, *outp2;
michael@0 22 __m128i zeroes;
michael@0 23 uint32_t offset;
michael@0 24
michael@0 25 // align input to 16 bytes
michael@0 26 while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
michael@0 27 if (*src & 0x80U)
michael@0 28 return;
michael@0 29 *dst++ = (char16_t) *src++;
michael@0 30 len--;
michael@0 31 }
michael@0 32
michael@0 33 zeroes = _mm_setzero_si128();
michael@0 34
michael@0 35 offset = NS_PTR_TO_UINT32(dst) & 15;
michael@0 36
michael@0 37 // Note: all these inner loops have to break, not return; we need
michael@0 38 // to let the single-char loop below catch any leftover
michael@0 39 // byte-at-a-time ASCII chars, since this function must consume
michael@0 40 // all available ASCII chars before it returns
michael@0 41
michael@0 42 if (offset == 0) {
michael@0 43 while (len > 15) {
michael@0 44 in = _mm_load_si128((__m128i *) src);
michael@0 45 if (_mm_movemask_epi8(in))
michael@0 46 break;
michael@0 47 out1 = _mm_unpacklo_epi8(in, zeroes);
michael@0 48 out2 = _mm_unpackhi_epi8(in, zeroes);
michael@0 49 _mm_stream_si128((__m128i *) dst, out1);
michael@0 50 _mm_stream_si128((__m128i *) (dst + 8), out2);
michael@0 51 dst += 16;
michael@0 52 src += 16;
michael@0 53 len -= 16;
michael@0 54 }
michael@0 55 } else if (offset == 8) {
michael@0 56 outp1 = (__m128d *) &out1;
michael@0 57 outp2 = (__m128d *) &out2;
michael@0 58 while (len > 15) {
michael@0 59 in = _mm_load_si128((__m128i *) src);
michael@0 60 if (_mm_movemask_epi8(in))
michael@0 61 break;
michael@0 62 out1 = _mm_unpacklo_epi8(in, zeroes);
michael@0 63 out2 = _mm_unpackhi_epi8(in, zeroes);
michael@0 64 _mm_storel_epi64((__m128i *) dst, out1);
michael@0 65 _mm_storel_epi64((__m128i *) (dst + 8), out2);
michael@0 66 _mm_storeh_pd((double *) (dst + 4), *outp1);
michael@0 67 _mm_storeh_pd((double *) (dst + 12), *outp2);
michael@0 68 src += 16;
michael@0 69 dst += 16;
michael@0 70 len -= 16;
michael@0 71 }
michael@0 72 } else {
michael@0 73 while (len > 15) {
michael@0 74 in = _mm_load_si128((__m128i *) src);
michael@0 75 if (_mm_movemask_epi8(in))
michael@0 76 break;
michael@0 77 out1 = _mm_unpacklo_epi8(in, zeroes);
michael@0 78 out2 = _mm_unpackhi_epi8(in, zeroes);
michael@0 79 _mm_storeu_si128((__m128i *) dst, out1);
michael@0 80 _mm_storeu_si128((__m128i *) (dst + 8), out2);
michael@0 81 src += 16;
michael@0 82 dst += 16;
michael@0 83 len -= 16;
michael@0 84 }
michael@0 85 }
michael@0 86 }
michael@0 87
michael@0 88 // finish off a byte at a time
michael@0 89
michael@0 90 while (len-- > 0 && (*src & 0x80U) == 0) {
michael@0 91 *dst++ = (char16_t) *src++;
michael@0 92 }
michael@0 93 }
michael@0 94
michael@0 95 } // namespace SSE2
michael@0 96 } // namespace mozilla

mercurial