gfx/ycbcr/yuv_convert_sse2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/ycbcr/yuv_convert_sse2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,47 @@
     1.4 +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
     1.5 +// Use of this source code is governed by a BSD-style license that can be
     1.6 +// found in the LICENSE file.
     1.7 +
     1.8 +#include <emmintrin.h>
     1.9 +#include "yuv_row.h"
    1.10 +
    1.11 +namespace mozilla {
    1.12 +namespace gfx {
    1.13 +
    1.14 +// FilterRows combines two rows of the image using linear interpolation.
    1.15 +// SSE2 version does 16 pixels at a time.
    1.16 +void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
    1.17 +                     int source_width, int source_y_fraction) {
    1.18 +  __m128i zero = _mm_setzero_si128();
    1.19 +  __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
    1.20 +  __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
    1.21 +
    1.22 +  const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
    1.23 +  const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
    1.24 +  __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
    1.25 +  __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
    1.26 +
    1.27 +  do {
    1.28 +    __m128i y0 = _mm_loadu_si128(y0_ptr128);
    1.29 +    __m128i y1 = _mm_loadu_si128(y1_ptr128);
    1.30 +    __m128i y2 = _mm_unpackhi_epi8(y0, zero);
    1.31 +    __m128i y3 = _mm_unpackhi_epi8(y1, zero);
    1.32 +    y0 = _mm_unpacklo_epi8(y0, zero);
    1.33 +    y1 = _mm_unpacklo_epi8(y1, zero);
    1.34 +    y0 = _mm_mullo_epi16(y0, y0_fraction);
    1.35 +    y1 = _mm_mullo_epi16(y1, y1_fraction);
    1.36 +    y2 = _mm_mullo_epi16(y2, y0_fraction);
    1.37 +    y3 = _mm_mullo_epi16(y3, y1_fraction);
    1.38 +    y0 = _mm_add_epi16(y0, y1);
    1.39 +    y2 = _mm_add_epi16(y2, y3);
    1.40 +    y0 = _mm_srli_epi16(y0, 8);
    1.41 +    y2 = _mm_srli_epi16(y2, 8);
    1.42 +    y0 = _mm_packus_epi16(y0, y2);
    1.43 +    *dest128++ = y0;
    1.44 +    ++y0_ptr128;
    1.45 +    ++y1_ptr128;
    1.46 +  } while (dest128 < end128);
    1.47 +}
    1.48 +
    1.49 +}
    1.50 +}

mercurial