content/base/src/nsTextFragmentSSE2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/base/src/nsTextFragmentSSE2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,72 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +// This file should only be compiled if you're on x86 or x86_64.  Additionally,
     1.9 +// you'll need to compile this file with -msse2 if you're using gcc.
    1.10 +
    1.11 +#include <emmintrin.h>
    1.12 +#include "nscore.h"
    1.13 +#include "nsAlgorithm.h"
    1.14 +#include "nsTextFragmentImpl.h"
    1.15 +#include <algorithm>
    1.16 +
    1.17 +namespace mozilla {
    1.18 +namespace SSE2 {
    1.19 +
    1.20 +static inline bool
    1.21 +is_zero (__m128i x)
    1.22 +{
    1.23 +  return
    1.24 +    _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
    1.25 +}
    1.26 +
    1.27 +int32_t
    1.28 +FirstNon8Bit(const char16_t *str, const char16_t *end)
    1.29 +{
    1.30 +  const uint32_t numUnicharsPerVector = 8;
    1.31 +  typedef Non8BitParameters<sizeof(size_t)> p;
    1.32 +  const size_t mask = p::mask();
    1.33 +  const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
    1.34 +  const int32_t len = end - str;
    1.35 +  int32_t i = 0;
    1.36 +
    1.37 +  // Align ourselves to a 16-byte boundary, as required by _mm_load_si128
    1.38 +  // (i.e. MOVDQA).
    1.39 +  int32_t alignLen =
    1.40 +    std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & 0xf) / sizeof(char16_t)));
    1.41 +  for (; i < alignLen; i++) {
    1.42 +    if (str[i] > 255)
    1.43 +      return i;
    1.44 +  }
    1.45 +
    1.46 +  // Check one XMM register (16 bytes) at a time.
    1.47 +  const int32_t vectWalkEnd = ((len - i) / numUnicharsPerVector) * numUnicharsPerVector;
    1.48 +  const uint16_t shortMask = 0xff00;
    1.49 +  __m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(shortMask));
    1.50 +  for(; i < vectWalkEnd; i += numUnicharsPerVector) {
    1.51 +    const __m128i vect = *reinterpret_cast<const __m128i*>(str + i);
    1.52 +    if (!is_zero(_mm_and_si128(vect, vectmask)))
    1.53 +      return i;
    1.54 +  }
    1.55 +
    1.56 +  // Check one word at a time.
    1.57 +  const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
    1.58 +  for(; i < wordWalkEnd; i += numUnicharsPerWord) {
    1.59 +    const size_t word = *reinterpret_cast<const size_t*>(str + i);
    1.60 +    if (word & mask)
    1.61 +      return i;
    1.62 +  }
    1.63 +
    1.64 +  // Take care of the remainder one character at a time.
    1.65 +  for (; i < len; i++) {
    1.66 +    if (str[i] > 255) {
    1.67 +      return i;
    1.68 +    }
    1.69 +  }
    1.70 +
    1.71 +  return -1;
    1.72 +}
    1.73 +
    1.74 +} // namespace SSE2
    1.75 +} // namespace mozilla

mercurial