intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/src/nsUTF8ToUnicodeSSE2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,96 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +// This file should only be compiled if you're on x86 or x86_64.  Additionally,
     1.9 +// you'll need to compile this file with -msse2 if you're using gcc.
    1.10 +
    1.11 +#include <emmintrin.h>
    1.12 +#include "nscore.h"
    1.13 +
    1.14 +namespace mozilla {
    1.15 +namespace SSE2 {
    1.16 +
    1.17 +void
    1.18 +Convert_ascii_run(const char *&src,
    1.19 +                  char16_t  *&dst,
    1.20 +                  int32_t      len)
    1.21 +{
    1.22 +  if (len > 15) {
    1.23 +    __m128i in, out1, out2;
    1.24 +    __m128d *outp1, *outp2;
    1.25 +    __m128i zeroes;
    1.26 +    uint32_t offset;
    1.27 +
    1.28 +    // align input to 16 bytes
    1.29 +    while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
    1.30 +      if (*src & 0x80U)
    1.31 +        return;
    1.32 +      *dst++ = (char16_t) *src++;
    1.33 +      len--;
    1.34 +    }
    1.35 +
    1.36 +    zeroes = _mm_setzero_si128();
    1.37 +
    1.38 +    offset = NS_PTR_TO_UINT32(dst) & 15;
    1.39 +
    1.40 +    // Note: all these inner loops have to break, not return; we need
    1.41 +    // to let the single-char loop below catch any leftover
    1.42 +    // byte-at-a-time ASCII chars, since this function must consume
    1.43 +    // all available ASCII chars before it returns
    1.44 +
    1.45 +    if (offset == 0) {
    1.46 +      while (len > 15) {
    1.47 +        in = _mm_load_si128((__m128i *) src);
    1.48 +        if (_mm_movemask_epi8(in))
    1.49 +          break;
    1.50 +        out1 = _mm_unpacklo_epi8(in, zeroes);
    1.51 +        out2 = _mm_unpackhi_epi8(in, zeroes);
    1.52 +        _mm_stream_si128((__m128i *) dst, out1);
    1.53 +        _mm_stream_si128((__m128i *) (dst + 8), out2);
    1.54 +        dst += 16;
    1.55 +        src += 16;
    1.56 +        len -= 16;
    1.57 +      }
    1.58 +    } else if (offset == 8) {
    1.59 +      outp1 = (__m128d *) &out1;
    1.60 +      outp2 = (__m128d *) &out2;
    1.61 +      while (len > 15) {
    1.62 +        in = _mm_load_si128((__m128i *) src);
    1.63 +        if (_mm_movemask_epi8(in))
    1.64 +          break;
    1.65 +        out1 = _mm_unpacklo_epi8(in, zeroes);
    1.66 +        out2 = _mm_unpackhi_epi8(in, zeroes);
    1.67 +        _mm_storel_epi64((__m128i *) dst, out1);
    1.68 +        _mm_storel_epi64((__m128i *) (dst + 8), out2);
    1.69 +        _mm_storeh_pd((double *) (dst + 4), *outp1);
    1.70 +        _mm_storeh_pd((double *) (dst + 12), *outp2);
    1.71 +        src += 16;
    1.72 +        dst += 16;
    1.73 +        len -= 16;
    1.74 +      }
    1.75 +    } else {
    1.76 +      while (len > 15) {
    1.77 +        in = _mm_load_si128((__m128i *) src);
    1.78 +        if (_mm_movemask_epi8(in))
    1.79 +          break;
    1.80 +        out1 = _mm_unpacklo_epi8(in, zeroes);
    1.81 +        out2 = _mm_unpackhi_epi8(in, zeroes);
    1.82 +        _mm_storeu_si128((__m128i *) dst, out1);
    1.83 +        _mm_storeu_si128((__m128i *) (dst + 8), out2);
    1.84 +        src += 16;
    1.85 +        dst += 16;
    1.86 +        len -= 16;
    1.87 +      }
    1.88 +    }
    1.89 +  }
    1.90 +
    1.91 +  // finish off a byte at a time
    1.92 +
    1.93 +  while (len-- > 0 && (*src & 0x80U) == 0) {
    1.94 +    *dst++ = (char16_t) *src++;
    1.95 +  }
    1.96 +}
    1.97 +
    1.98 +} // namespace SSE2
    1.99 +} // namespace mozilla

mercurial