1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/ycbcr/yuv_convert_sse2.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,47 @@ 1.4 +// Copyright (c) 2010 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +#include <emmintrin.h> 1.9 +#include "yuv_row.h" 1.10 + 1.11 +namespace mozilla { 1.12 +namespace gfx { 1.13 + 1.14 +// FilterRows combines two rows of the image using linear interpolation. 1.15 +// SSE2 version does 16 pixels at a time. 1.16 +void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, 1.17 + int source_width, int source_y_fraction) { 1.18 + __m128i zero = _mm_setzero_si128(); 1.19 + __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); 1.20 + __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); 1.21 + 1.22 + const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); 1.23 + const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); 1.24 + __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); 1.25 + __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); 1.26 + 1.27 + do { 1.28 + __m128i y0 = _mm_loadu_si128(y0_ptr128); 1.29 + __m128i y1 = _mm_loadu_si128(y1_ptr128); 1.30 + __m128i y2 = _mm_unpackhi_epi8(y0, zero); 1.31 + __m128i y3 = _mm_unpackhi_epi8(y1, zero); 1.32 + y0 = _mm_unpacklo_epi8(y0, zero); 1.33 + y1 = _mm_unpacklo_epi8(y1, zero); 1.34 + y0 = _mm_mullo_epi16(y0, y0_fraction); 1.35 + y1 = _mm_mullo_epi16(y1, y1_fraction); 1.36 + y2 = _mm_mullo_epi16(y2, y0_fraction); 1.37 + y3 = _mm_mullo_epi16(y3, y1_fraction); 1.38 + y0 = _mm_add_epi16(y0, y1); 1.39 + y2 = _mm_add_epi16(y2, y3); 1.40 + y0 = _mm_srli_epi16(y0, 8); 1.41 + y2 = _mm_srli_epi16(y2, 8); 1.42 + y0 = _mm_packus_epi16(y0, y2); 1.43 + *dest128++ = y0; 1.44 + ++y0_ptr128; 1.45 + ++y1_ptr128; 1.46 + } while (dest128 < end128); 1.47 +} 1.48 + 1.49 +} 1.50 +}