Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
michael@0 | 2 | // Use of this source code is governed by a BSD-style license that can be |
michael@0 | 3 | // found in the LICENSE file. |
michael@0 | 4 | |
michael@0 | 5 | #include <emmintrin.h> |
michael@0 | 6 | #include "yuv_row.h" |
michael@0 | 7 | |
michael@0 | 8 | namespace mozilla { |
michael@0 | 9 | namespace gfx { |
michael@0 | 10 | |
michael@0 | 11 | // FilterRows combines two rows of the image using linear interpolation. |
michael@0 | 12 | // SSE2 version does 16 pixels at a time. |
michael@0 | 13 | void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
michael@0 | 14 | int source_width, int source_y_fraction) { |
michael@0 | 15 | __m128i zero = _mm_setzero_si128(); |
michael@0 | 16 | __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); |
michael@0 | 17 | __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); |
michael@0 | 18 | |
michael@0 | 19 | const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); |
michael@0 | 20 | const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); |
michael@0 | 21 | __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); |
michael@0 | 22 | __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); |
michael@0 | 23 | |
michael@0 | 24 | do { |
michael@0 | 25 | __m128i y0 = _mm_loadu_si128(y0_ptr128); |
michael@0 | 26 | __m128i y1 = _mm_loadu_si128(y1_ptr128); |
michael@0 | 27 | __m128i y2 = _mm_unpackhi_epi8(y0, zero); |
michael@0 | 28 | __m128i y3 = _mm_unpackhi_epi8(y1, zero); |
michael@0 | 29 | y0 = _mm_unpacklo_epi8(y0, zero); |
michael@0 | 30 | y1 = _mm_unpacklo_epi8(y1, zero); |
michael@0 | 31 | y0 = _mm_mullo_epi16(y0, y0_fraction); |
michael@0 | 32 | y1 = _mm_mullo_epi16(y1, y1_fraction); |
michael@0 | 33 | y2 = _mm_mullo_epi16(y2, y0_fraction); |
michael@0 | 34 | y3 = _mm_mullo_epi16(y3, y1_fraction); |
michael@0 | 35 | y0 = _mm_add_epi16(y0, y1); |
michael@0 | 36 | y2 = _mm_add_epi16(y2, y3); |
michael@0 | 37 | y0 = _mm_srli_epi16(y0, 8); |
michael@0 | 38 | y2 = _mm_srli_epi16(y2, 8); |
michael@0 | 39 | y0 = _mm_packus_epi16(y0, y2); |
michael@0 | 40 | *dest128++ = y0; |
michael@0 | 41 | ++y0_ptr128; |
michael@0 | 42 | ++y1_ptr128; |
michael@0 | 43 | } while (dest128 < end128); |
michael@0 | 44 | } |
michael@0 | 45 | |
michael@0 | 46 | } |
michael@0 | 47 | } |