1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,96 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +// This file should only be compiled if you're on x86 or x86_64. Additionally, 1.9 +// you'll need to compile this file with -msse2 if you're using gcc. 1.10 + 1.11 +#include <emmintrin.h> 1.12 +#include "nscore.h" 1.13 + 1.14 +namespace mozilla { 1.15 +namespace SSE2 { 1.16 + 1.17 +void 1.18 +Convert_ascii_run(const char *&src, 1.19 + char16_t *&dst, 1.20 + int32_t len) 1.21 +{ 1.22 + if (len > 15) { 1.23 + __m128i in, out1, out2; 1.24 + __m128d *outp1, *outp2; 1.25 + __m128i zeroes; 1.26 + uint32_t offset; 1.27 + 1.28 + // align input to 16 bytes 1.29 + while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) { 1.30 + if (*src & 0x80U) 1.31 + return; 1.32 + *dst++ = (char16_t) *src++; 1.33 + len--; 1.34 + } 1.35 + 1.36 + zeroes = _mm_setzero_si128(); 1.37 + 1.38 + offset = NS_PTR_TO_UINT32(dst) & 15; 1.39 + 1.40 + // Note: all these inner loops have to break, not return; we need 1.41 + // to let the single-char loop below catch any leftover 1.42 + // byte-at-a-time ASCII chars, since this function must consume 1.43 + // all available ASCII chars before it returns 1.44 + 1.45 + if (offset == 0) { 1.46 + while (len > 15) { 1.47 + in = _mm_load_si128((__m128i *) src); 1.48 + if (_mm_movemask_epi8(in)) 1.49 + break; 1.50 + out1 = _mm_unpacklo_epi8(in, zeroes); 1.51 + out2 = _mm_unpackhi_epi8(in, zeroes); 1.52 + _mm_stream_si128((__m128i *) dst, out1); 1.53 + _mm_stream_si128((__m128i *) (dst + 8), out2); 1.54 + dst += 16; 1.55 + src += 16; 1.56 + len -= 16; 1.57 + } 1.58 + } else if (offset == 8) { 1.59 + outp1 = (__m128d *) &out1; 1.60 + outp2 = (__m128d *) &out2; 1.61 + while (len > 15) { 1.62 + in = _mm_load_si128((__m128i *) src); 1.63 + if (_mm_movemask_epi8(in)) 1.64 + break; 1.65 + out1 = _mm_unpacklo_epi8(in, zeroes); 1.66 + out2 = _mm_unpackhi_epi8(in, zeroes); 1.67 + _mm_storel_epi64((__m128i *) dst, out1); 1.68 + _mm_storel_epi64((__m128i *) (dst + 8), out2); 1.69 + _mm_storeh_pd((double *) (dst + 4), *outp1); 1.70 + _mm_storeh_pd((double *) (dst + 12), *outp2); 1.71 + src += 16; 1.72 + dst += 16; 1.73 + len -= 16; 1.74 + } 1.75 + } else { 1.76 + while (len > 15) { 1.77 + in = _mm_load_si128((__m128i *) src); 1.78 + if (_mm_movemask_epi8(in)) 1.79 + break; 1.80 + out1 = _mm_unpacklo_epi8(in, zeroes); 1.81 + out2 = _mm_unpackhi_epi8(in, zeroes); 1.82 + _mm_storeu_si128((__m128i *) dst, out1); 1.83 + _mm_storeu_si128((__m128i *) (dst + 8), out2); 1.84 + src += 16; 1.85 + dst += 16; 1.86 + len -= 16; 1.87 + } 1.88 + } 1.89 + } 1.90 + 1.91 + // finish off a byte at a time 1.92 + 1.93 + while (len-- > 0 && (*src & 0x80U) == 0) { 1.94 + *dst++ = (char16_t) *src++; 1.95 + } 1.96 +} 1.97 + 1.98 +} // namespace SSE2 1.99 +} // namespace mozilla