1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/ycbcr/yuv_convert_mmx.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,45 @@ 1.4 +// Copyright (c) 2010 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +#include <mmintrin.h> 1.9 +#include "yuv_row.h" 1.10 + 1.11 +namespace mozilla { 1.12 +namespace gfx { 1.13 + 1.14 +// FilterRows combines two rows of the image using linear interpolation. 1.15 +// MMX version does 8 pixels at a time. 1.16 +void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, 1.17 + int source_width, int source_y_fraction) { 1.18 + __m64 zero = _mm_setzero_si64(); 1.19 + __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); 1.20 + __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); 1.21 + 1.22 + const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); 1.23 + const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); 1.24 + __m64* dest64 = reinterpret_cast<__m64*>(ybuf); 1.25 + __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); 1.26 + 1.27 + do { 1.28 + __m64 y0 = *y0_ptr64++; 1.29 + __m64 y1 = *y1_ptr64++; 1.30 + __m64 y2 = _mm_unpackhi_pi8(y0, zero); 1.31 + __m64 y3 = _mm_unpackhi_pi8(y1, zero); 1.32 + y0 = _mm_unpacklo_pi8(y0, zero); 1.33 + y1 = _mm_unpacklo_pi8(y1, zero); 1.34 + y0 = _mm_mullo_pi16(y0, y0_fraction); 1.35 + y1 = _mm_mullo_pi16(y1, y1_fraction); 1.36 + y2 = _mm_mullo_pi16(y2, y0_fraction); 1.37 + y3 = _mm_mullo_pi16(y3, y1_fraction); 1.38 + y0 = _mm_add_pi16(y0, y1); 1.39 + y2 = _mm_add_pi16(y2, y3); 1.40 + y0 = _mm_srli_pi16(y0, 8); 1.41 + y2 = _mm_srli_pi16(y2, 8); 1.42 + y0 = _mm_packs_pu16(y0, y2); 1.43 + *dest64++ = y0; 1.44 + } while (dest64 < end64); 1.45 +} 1.46 + 1.47 +} 1.48 +}