|
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 |
|
5 #include <mmintrin.h> |
|
6 #include "yuv_row.h" |
|
7 |
|
8 namespace mozilla { |
|
9 namespace gfx { |
|
10 |
|
11 // FilterRows combines two rows of the image using linear interpolation. |
|
12 // MMX version does 8 pixels at a time. |
|
13 void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
|
14 int source_width, int source_y_fraction) { |
|
15 __m64 zero = _mm_setzero_si64(); |
|
16 __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); |
|
17 __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); |
|
18 |
|
19 const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); |
|
20 const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); |
|
21 __m64* dest64 = reinterpret_cast<__m64*>(ybuf); |
|
22 __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); |
|
23 |
|
24 do { |
|
25 __m64 y0 = *y0_ptr64++; |
|
26 __m64 y1 = *y1_ptr64++; |
|
27 __m64 y2 = _mm_unpackhi_pi8(y0, zero); |
|
28 __m64 y3 = _mm_unpackhi_pi8(y1, zero); |
|
29 y0 = _mm_unpacklo_pi8(y0, zero); |
|
30 y1 = _mm_unpacklo_pi8(y1, zero); |
|
31 y0 = _mm_mullo_pi16(y0, y0_fraction); |
|
32 y1 = _mm_mullo_pi16(y1, y1_fraction); |
|
33 y2 = _mm_mullo_pi16(y2, y0_fraction); |
|
34 y3 = _mm_mullo_pi16(y3, y1_fraction); |
|
35 y0 = _mm_add_pi16(y0, y1); |
|
36 y2 = _mm_add_pi16(y2, y3); |
|
37 y0 = _mm_srli_pi16(y0, 8); |
|
38 y2 = _mm_srli_pi16(y2, 8); |
|
39 y0 = _mm_packs_pu16(y0, y2); |
|
40 *dest64++ = y0; |
|
41 } while (dest64 < end64); |
|
42 } |
|
43 |
|
44 } |
|
45 } |