|
1 /* |
|
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include "vpx_config.h" |
|
12 #include "vp8/common/variance.h" |
|
13 #include "vp8/common/pragmas.h" |
|
14 #include "vpx_ports/mem.h" |
|
15 |
|
16 extern unsigned int vp8_get16x16var_sse2 |
|
17 ( |
|
18 const unsigned char *src_ptr, |
|
19 int source_stride, |
|
20 const unsigned char *ref_ptr, |
|
21 int recon_stride, |
|
22 unsigned int *SSE, |
|
23 int *Sum |
|
24 ); |
|
25 extern void vp8_half_horiz_vert_variance16x_h_sse2 |
|
26 ( |
|
27 const unsigned char *ref_ptr, |
|
28 int ref_pixels_per_line, |
|
29 const unsigned char *src_ptr, |
|
30 int src_pixels_per_line, |
|
31 unsigned int Height, |
|
32 int *sum, |
|
33 unsigned int *sumsquared |
|
34 ); |
|
35 extern void vp8_half_horiz_variance16x_h_sse2 |
|
36 ( |
|
37 const unsigned char *ref_ptr, |
|
38 int ref_pixels_per_line, |
|
39 const unsigned char *src_ptr, |
|
40 int src_pixels_per_line, |
|
41 unsigned int Height, |
|
42 int *sum, |
|
43 unsigned int *sumsquared |
|
44 ); |
|
45 extern void vp8_half_vert_variance16x_h_sse2 |
|
46 ( |
|
47 const unsigned char *ref_ptr, |
|
48 int ref_pixels_per_line, |
|
49 const unsigned char *src_ptr, |
|
50 int src_pixels_per_line, |
|
51 unsigned int Height, |
|
52 int *sum, |
|
53 unsigned int *sumsquared |
|
54 ); |
|
55 extern void vp8_filter_block2d_bil_var_ssse3 |
|
56 ( |
|
57 const unsigned char *ref_ptr, |
|
58 int ref_pixels_per_line, |
|
59 const unsigned char *src_ptr, |
|
60 int src_pixels_per_line, |
|
61 unsigned int Height, |
|
62 int xoffset, |
|
63 int yoffset, |
|
64 int *sum, |
|
65 unsigned int *sumsquared |
|
66 ); |
|
67 |
|
68 unsigned int vp8_sub_pixel_variance16x16_ssse3 |
|
69 ( |
|
70 const unsigned char *src_ptr, |
|
71 int src_pixels_per_line, |
|
72 int xoffset, |
|
73 int yoffset, |
|
74 const unsigned char *dst_ptr, |
|
75 int dst_pixels_per_line, |
|
76 unsigned int *sse |
|
77 ) |
|
78 { |
|
79 int xsum0; |
|
80 unsigned int xxsum0; |
|
81 |
|
82 /* note we could avoid these if statements if the calling function |
|
83 * just called the appropriate functions inside. |
|
84 */ |
|
85 if (xoffset == 4 && yoffset == 0) |
|
86 { |
|
87 vp8_half_horiz_variance16x_h_sse2( |
|
88 src_ptr, src_pixels_per_line, |
|
89 dst_ptr, dst_pixels_per_line, 16, |
|
90 &xsum0, &xxsum0); |
|
91 } |
|
92 else if (xoffset == 0 && yoffset == 4) |
|
93 { |
|
94 vp8_half_vert_variance16x_h_sse2( |
|
95 src_ptr, src_pixels_per_line, |
|
96 dst_ptr, dst_pixels_per_line, 16, |
|
97 &xsum0, &xxsum0); |
|
98 } |
|
99 else if (xoffset == 4 && yoffset == 4) |
|
100 { |
|
101 vp8_half_horiz_vert_variance16x_h_sse2( |
|
102 src_ptr, src_pixels_per_line, |
|
103 dst_ptr, dst_pixels_per_line, 16, |
|
104 &xsum0, &xxsum0); |
|
105 } |
|
106 else |
|
107 { |
|
108 vp8_filter_block2d_bil_var_ssse3( |
|
109 src_ptr, src_pixels_per_line, |
|
110 dst_ptr, dst_pixels_per_line, 16, |
|
111 xoffset, yoffset, |
|
112 &xsum0, &xxsum0); |
|
113 } |
|
114 |
|
115 *sse = xxsum0; |
|
116 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
|
117 } |
|
118 |
|
119 unsigned int vp8_sub_pixel_variance16x8_ssse3 |
|
120 ( |
|
121 const unsigned char *src_ptr, |
|
122 int src_pixels_per_line, |
|
123 int xoffset, |
|
124 int yoffset, |
|
125 const unsigned char *dst_ptr, |
|
126 int dst_pixels_per_line, |
|
127 unsigned int *sse |
|
128 |
|
129 ) |
|
130 { |
|
131 int xsum0; |
|
132 unsigned int xxsum0; |
|
133 |
|
134 if (xoffset == 4 && yoffset == 0) |
|
135 { |
|
136 vp8_half_horiz_variance16x_h_sse2( |
|
137 src_ptr, src_pixels_per_line, |
|
138 dst_ptr, dst_pixels_per_line, 8, |
|
139 &xsum0, &xxsum0); |
|
140 } |
|
141 else if (xoffset == 0 && yoffset == 4) |
|
142 { |
|
143 vp8_half_vert_variance16x_h_sse2( |
|
144 src_ptr, src_pixels_per_line, |
|
145 dst_ptr, dst_pixels_per_line, 8, |
|
146 &xsum0, &xxsum0); |
|
147 } |
|
148 else if (xoffset == 4 && yoffset == 4) |
|
149 { |
|
150 vp8_half_horiz_vert_variance16x_h_sse2( |
|
151 src_ptr, src_pixels_per_line, |
|
152 dst_ptr, dst_pixels_per_line, 8, |
|
153 &xsum0, &xxsum0); |
|
154 } |
|
155 else |
|
156 { |
|
157 vp8_filter_block2d_bil_var_ssse3( |
|
158 src_ptr, src_pixels_per_line, |
|
159 dst_ptr, dst_pixels_per_line, 8, |
|
160 xoffset, yoffset, |
|
161 &xsum0, &xxsum0); |
|
162 } |
|
163 |
|
164 *sse = xxsum0; |
|
165 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
|
166 } |