gfx/ycbcr/yuv_convert_mmx.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4
michael@0 5 #include <mmintrin.h>
michael@0 6 #include "yuv_row.h"
michael@0 7
michael@0 8 namespace mozilla {
michael@0 9 namespace gfx {
michael@0 10
michael@0 11 // FilterRows combines two rows of the image using linear interpolation.
michael@0 12 // MMX version does 8 pixels at a time.
michael@0 13 void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
michael@0 14 int source_width, int source_y_fraction) {
michael@0 15 __m64 zero = _mm_setzero_si64();
michael@0 16 __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
michael@0 17 __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
michael@0 18
michael@0 19 const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
michael@0 20 const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
michael@0 21 __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
michael@0 22 __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
michael@0 23
michael@0 24 do {
michael@0 25 __m64 y0 = *y0_ptr64++;
michael@0 26 __m64 y1 = *y1_ptr64++;
michael@0 27 __m64 y2 = _mm_unpackhi_pi8(y0, zero);
michael@0 28 __m64 y3 = _mm_unpackhi_pi8(y1, zero);
michael@0 29 y0 = _mm_unpacklo_pi8(y0, zero);
michael@0 30 y1 = _mm_unpacklo_pi8(y1, zero);
michael@0 31 y0 = _mm_mullo_pi16(y0, y0_fraction);
michael@0 32 y1 = _mm_mullo_pi16(y1, y1_fraction);
michael@0 33 y2 = _mm_mullo_pi16(y2, y0_fraction);
michael@0 34 y3 = _mm_mullo_pi16(y3, y1_fraction);
michael@0 35 y0 = _mm_add_pi16(y0, y1);
michael@0 36 y2 = _mm_add_pi16(y2, y3);
michael@0 37 y0 = _mm_srli_pi16(y0, 8);
michael@0 38 y2 = _mm_srli_pi16(y2, 8);
michael@0 39 y0 = _mm_packs_pu16(y0, y2);
michael@0 40 *dest64++ = y0;
michael@0 41 } while (dest64 < end64);
michael@0 42 }
michael@0 43
michael@0 44 }
michael@0 45 }

mercurial