|
1 /* |
|
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 |
|
12 #include "variance.h" |
|
13 #include "filter.h" |
|
14 |
|
15 |
|
16 unsigned int vp8_get_mb_ss_c |
|
17 ( |
|
18 const short *src_ptr |
|
19 ) |
|
20 { |
|
21 unsigned int i = 0, sum = 0; |
|
22 |
|
23 do |
|
24 { |
|
25 sum += (src_ptr[i] * src_ptr[i]); |
|
26 i++; |
|
27 } |
|
28 while (i < 256); |
|
29 |
|
30 return sum; |
|
31 } |
|
32 |
|
33 |
|
34 static void variance( |
|
35 const unsigned char *src_ptr, |
|
36 int source_stride, |
|
37 const unsigned char *ref_ptr, |
|
38 int recon_stride, |
|
39 int w, |
|
40 int h, |
|
41 unsigned int *sse, |
|
42 int *sum) |
|
43 { |
|
44 int i, j; |
|
45 int diff; |
|
46 |
|
47 *sum = 0; |
|
48 *sse = 0; |
|
49 |
|
50 for (i = 0; i < h; i++) |
|
51 { |
|
52 for (j = 0; j < w; j++) |
|
53 { |
|
54 diff = src_ptr[j] - ref_ptr[j]; |
|
55 *sum += diff; |
|
56 *sse += diff * diff; |
|
57 } |
|
58 |
|
59 src_ptr += source_stride; |
|
60 ref_ptr += recon_stride; |
|
61 } |
|
62 } |
|
63 |
|
64 |
|
65 unsigned int vp8_variance16x16_c( |
|
66 const unsigned char *src_ptr, |
|
67 int source_stride, |
|
68 const unsigned char *ref_ptr, |
|
69 int recon_stride, |
|
70 unsigned int *sse) |
|
71 { |
|
72 unsigned int var; |
|
73 int avg; |
|
74 |
|
75 |
|
76 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); |
|
77 *sse = var; |
|
78 return (var - (((unsigned int)avg * avg) >> 8)); |
|
79 } |
|
80 |
|
81 unsigned int vp8_variance8x16_c( |
|
82 const unsigned char *src_ptr, |
|
83 int source_stride, |
|
84 const unsigned char *ref_ptr, |
|
85 int recon_stride, |
|
86 unsigned int *sse) |
|
87 { |
|
88 unsigned int var; |
|
89 int avg; |
|
90 |
|
91 |
|
92 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); |
|
93 *sse = var; |
|
94 return (var - (((unsigned int)avg * avg) >> 7)); |
|
95 } |
|
96 |
|
97 unsigned int vp8_variance16x8_c( |
|
98 const unsigned char *src_ptr, |
|
99 int source_stride, |
|
100 const unsigned char *ref_ptr, |
|
101 int recon_stride, |
|
102 unsigned int *sse) |
|
103 { |
|
104 unsigned int var; |
|
105 int avg; |
|
106 |
|
107 |
|
108 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); |
|
109 *sse = var; |
|
110 return (var - (((unsigned int)avg * avg) >> 7)); |
|
111 } |
|
112 |
|
113 |
|
114 unsigned int vp8_variance8x8_c( |
|
115 const unsigned char *src_ptr, |
|
116 int source_stride, |
|
117 const unsigned char *ref_ptr, |
|
118 int recon_stride, |
|
119 unsigned int *sse) |
|
120 { |
|
121 unsigned int var; |
|
122 int avg; |
|
123 |
|
124 |
|
125 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); |
|
126 *sse = var; |
|
127 return (var - (((unsigned int)avg * avg) >> 6)); |
|
128 } |
|
129 |
|
130 unsigned int vp8_variance4x4_c( |
|
131 const unsigned char *src_ptr, |
|
132 int source_stride, |
|
133 const unsigned char *ref_ptr, |
|
134 int recon_stride, |
|
135 unsigned int *sse) |
|
136 { |
|
137 unsigned int var; |
|
138 int avg; |
|
139 |
|
140 |
|
141 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); |
|
142 *sse = var; |
|
143 return (var - (((unsigned int)avg * avg) >> 4)); |
|
144 } |
|
145 |
|
146 |
|
147 unsigned int vp8_mse16x16_c( |
|
148 const unsigned char *src_ptr, |
|
149 int source_stride, |
|
150 const unsigned char *ref_ptr, |
|
151 int recon_stride, |
|
152 unsigned int *sse) |
|
153 { |
|
154 unsigned int var; |
|
155 int avg; |
|
156 |
|
157 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); |
|
158 *sse = var; |
|
159 return var; |
|
160 } |
|
161 |
|
162 |
|
163 /**************************************************************************** |
|
164 * |
|
165 * ROUTINE : filter_block2d_bil_first_pass |
|
166 * |
|
167 * INPUTS : UINT8 *src_ptr : Pointer to source block. |
|
168 * UINT32 src_pixels_per_line : Stride of input block. |
|
169 * UINT32 pixel_step : Offset between filter input samples (see notes). |
|
170 * UINT32 output_height : Input block height. |
|
171 * UINT32 output_width : Input block width. |
|
172 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. |
|
173 * |
|
174 * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. |
|
175 * |
|
176 * RETURNS : void |
|
177 * |
|
178 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in |
|
179 * either horizontal or vertical direction to produce the |
|
180 * filtered output block. Used to implement first-pass |
|
181 * of 2-D separable filter. |
|
182 * |
|
183 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. |
|
184 * Two filter taps should sum to VP8_FILTER_WEIGHT. |
|
185 * pixel_step defines whether the filter is applied |
|
186 * horizontally (pixel_step=1) or vertically (pixel_step=stride). |
|
187 * It defines the offset required to move from one input |
|
188 * to the next. |
|
189 * |
|
190 ****************************************************************************/ |
|
191 static void var_filter_block2d_bil_first_pass |
|
192 ( |
|
193 const unsigned char *src_ptr, |
|
194 unsigned short *output_ptr, |
|
195 unsigned int src_pixels_per_line, |
|
196 int pixel_step, |
|
197 unsigned int output_height, |
|
198 unsigned int output_width, |
|
199 const short *vp8_filter |
|
200 ) |
|
201 { |
|
202 unsigned int i, j; |
|
203 |
|
204 for (i = 0; i < output_height; i++) |
|
205 { |
|
206 for (j = 0; j < output_width; j++) |
|
207 { |
|
208 /* Apply bilinear filter */ |
|
209 output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + |
|
210 ((int)src_ptr[pixel_step] * vp8_filter[1]) + |
|
211 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; |
|
212 src_ptr++; |
|
213 } |
|
214 |
|
215 /* Next row... */ |
|
216 src_ptr += src_pixels_per_line - output_width; |
|
217 output_ptr += output_width; |
|
218 } |
|
219 } |
|
220 |
|
221 /**************************************************************************** |
|
222 * |
|
223 * ROUTINE : filter_block2d_bil_second_pass |
|
224 * |
|
225 * INPUTS : INT32 *src_ptr : Pointer to source block. |
|
226 * UINT32 src_pixels_per_line : Stride of input block. |
|
227 * UINT32 pixel_step : Offset between filter input samples (see notes). |
|
228 * UINT32 output_height : Input block height. |
|
229 * UINT32 output_width : Input block width. |
|
230 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. |
|
231 * |
|
232 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. |
|
233 * |
|
234 * RETURNS : void |
|
235 * |
|
236 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in |
|
237 * either horizontal or vertical direction to produce the |
|
238 * filtered output block. Used to implement second-pass |
|
239 * of 2-D separable filter. |
|
240 * |
|
241 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. |
|
242 * Two filter taps should sum to VP8_FILTER_WEIGHT. |
|
243 * pixel_step defines whether the filter is applied |
|
244 * horizontally (pixel_step=1) or vertically (pixel_step=stride). |
|
245 * It defines the offset required to move from one input |
|
246 * to the next. |
|
247 * |
|
248 ****************************************************************************/ |
|
249 static void var_filter_block2d_bil_second_pass |
|
250 ( |
|
251 const unsigned short *src_ptr, |
|
252 unsigned char *output_ptr, |
|
253 unsigned int src_pixels_per_line, |
|
254 unsigned int pixel_step, |
|
255 unsigned int output_height, |
|
256 unsigned int output_width, |
|
257 const short *vp8_filter |
|
258 ) |
|
259 { |
|
260 unsigned int i, j; |
|
261 int Temp; |
|
262 |
|
263 for (i = 0; i < output_height; i++) |
|
264 { |
|
265 for (j = 0; j < output_width; j++) |
|
266 { |
|
267 /* Apply filter */ |
|
268 Temp = ((int)src_ptr[0] * vp8_filter[0]) + |
|
269 ((int)src_ptr[pixel_step] * vp8_filter[1]) + |
|
270 (VP8_FILTER_WEIGHT / 2); |
|
271 output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); |
|
272 src_ptr++; |
|
273 } |
|
274 |
|
275 /* Next row... */ |
|
276 src_ptr += src_pixels_per_line - output_width; |
|
277 output_ptr += output_width; |
|
278 } |
|
279 } |
|
280 |
|
281 |
|
282 unsigned int vp8_sub_pixel_variance4x4_c |
|
283 ( |
|
284 const unsigned char *src_ptr, |
|
285 int src_pixels_per_line, |
|
286 int xoffset, |
|
287 int yoffset, |
|
288 const unsigned char *dst_ptr, |
|
289 int dst_pixels_per_line, |
|
290 unsigned int *sse |
|
291 ) |
|
292 { |
|
293 unsigned char temp2[20*16]; |
|
294 const short *HFilter, *VFilter; |
|
295 unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */ |
|
296 |
|
297 HFilter = vp8_bilinear_filters[xoffset]; |
|
298 VFilter = vp8_bilinear_filters[yoffset]; |
|
299 |
|
300 /* First filter 1d Horizontal */ |
|
301 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); |
|
302 |
|
303 /* Now filter Verticaly */ |
|
304 var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); |
|
305 |
|
306 return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); |
|
307 } |
|
308 |
|
309 |
|
310 unsigned int vp8_sub_pixel_variance8x8_c |
|
311 ( |
|
312 const unsigned char *src_ptr, |
|
313 int src_pixels_per_line, |
|
314 int xoffset, |
|
315 int yoffset, |
|
316 const unsigned char *dst_ptr, |
|
317 int dst_pixels_per_line, |
|
318 unsigned int *sse |
|
319 ) |
|
320 { |
|
321 unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */ |
|
322 unsigned char temp2[20*16]; |
|
323 const short *HFilter, *VFilter; |
|
324 |
|
325 HFilter = vp8_bilinear_filters[xoffset]; |
|
326 VFilter = vp8_bilinear_filters[yoffset]; |
|
327 |
|
328 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); |
|
329 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); |
|
330 |
|
331 return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); |
|
332 } |
|
333 |
|
334 unsigned int vp8_sub_pixel_variance16x16_c |
|
335 ( |
|
336 const unsigned char *src_ptr, |
|
337 int src_pixels_per_line, |
|
338 int xoffset, |
|
339 int yoffset, |
|
340 const unsigned char *dst_ptr, |
|
341 int dst_pixels_per_line, |
|
342 unsigned int *sse |
|
343 ) |
|
344 { |
|
345 unsigned short FData3[17*16]; /* Temp data bufffer used in filtering */ |
|
346 unsigned char temp2[20*16]; |
|
347 const short *HFilter, *VFilter; |
|
348 |
|
349 HFilter = vp8_bilinear_filters[xoffset]; |
|
350 VFilter = vp8_bilinear_filters[yoffset]; |
|
351 |
|
352 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); |
|
353 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); |
|
354 |
|
355 return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); |
|
356 } |
|
357 |
|
358 |
|
359 unsigned int vp8_variance_halfpixvar16x16_h_c( |
|
360 const unsigned char *src_ptr, |
|
361 int source_stride, |
|
362 const unsigned char *ref_ptr, |
|
363 int recon_stride, |
|
364 unsigned int *sse) |
|
365 { |
|
366 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, |
|
367 ref_ptr, recon_stride, sse); |
|
368 } |
|
369 |
|
370 |
|
371 unsigned int vp8_variance_halfpixvar16x16_v_c( |
|
372 const unsigned char *src_ptr, |
|
373 int source_stride, |
|
374 const unsigned char *ref_ptr, |
|
375 int recon_stride, |
|
376 unsigned int *sse) |
|
377 { |
|
378 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, |
|
379 ref_ptr, recon_stride, sse); |
|
380 } |
|
381 |
|
382 |
|
383 unsigned int vp8_variance_halfpixvar16x16_hv_c( |
|
384 const unsigned char *src_ptr, |
|
385 int source_stride, |
|
386 const unsigned char *ref_ptr, |
|
387 int recon_stride, |
|
388 unsigned int *sse) |
|
389 { |
|
390 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, |
|
391 ref_ptr, recon_stride, sse); |
|
392 } |
|
393 |
|
394 |
|
395 unsigned int vp8_sub_pixel_mse16x16_c |
|
396 ( |
|
397 const unsigned char *src_ptr, |
|
398 int src_pixels_per_line, |
|
399 int xoffset, |
|
400 int yoffset, |
|
401 const unsigned char *dst_ptr, |
|
402 int dst_pixels_per_line, |
|
403 unsigned int *sse |
|
404 ) |
|
405 { |
|
406 vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); |
|
407 return *sse; |
|
408 } |
|
409 |
|
410 unsigned int vp8_sub_pixel_variance16x8_c |
|
411 ( |
|
412 const unsigned char *src_ptr, |
|
413 int src_pixels_per_line, |
|
414 int xoffset, |
|
415 int yoffset, |
|
416 const unsigned char *dst_ptr, |
|
417 int dst_pixels_per_line, |
|
418 unsigned int *sse |
|
419 ) |
|
420 { |
|
421 unsigned short FData3[16*9]; /* Temp data bufffer used in filtering */ |
|
422 unsigned char temp2[20*16]; |
|
423 const short *HFilter, *VFilter; |
|
424 |
|
425 HFilter = vp8_bilinear_filters[xoffset]; |
|
426 VFilter = vp8_bilinear_filters[yoffset]; |
|
427 |
|
428 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); |
|
429 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); |
|
430 |
|
431 return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); |
|
432 } |
|
433 |
|
434 unsigned int vp8_sub_pixel_variance8x16_c |
|
435 ( |
|
436 const unsigned char *src_ptr, |
|
437 int src_pixels_per_line, |
|
438 int xoffset, |
|
439 int yoffset, |
|
440 const unsigned char *dst_ptr, |
|
441 int dst_pixels_per_line, |
|
442 unsigned int *sse |
|
443 ) |
|
444 { |
|
445 unsigned short FData3[9*16]; /* Temp data bufffer used in filtering */ |
|
446 unsigned char temp2[20*16]; |
|
447 const short *HFilter, *VFilter; |
|
448 |
|
449 |
|
450 HFilter = vp8_bilinear_filters[xoffset]; |
|
451 VFilter = vp8_bilinear_filters[yoffset]; |
|
452 |
|
453 |
|
454 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); |
|
455 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); |
|
456 |
|
457 return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); |
|
458 } |