|
1 /* |
|
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include <limits.h> |
|
12 #include <math.h> |
|
13 #include <stdio.h> |
|
14 |
|
15 #include "./vpx_config.h" |
|
16 |
|
17 #include "vpx_mem/vpx_mem.h" |
|
18 |
|
19 #include "vp9/common/vp9_findnearmv.h" |
|
20 #include "vp9/common/vp9_common.h" |
|
21 |
|
22 #include "vp9/encoder/vp9_onyx_int.h" |
|
23 #include "vp9/encoder/vp9_mcomp.h" |
|
24 |
|
25 // #define NEW_DIAMOND_SEARCH |
|
26 |
|
27 void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) { |
|
28 const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); |
|
29 const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); |
|
30 const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; |
|
31 const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; |
|
32 |
|
33 // Get intersection of UMV window and valid MV window to reduce # of checks |
|
34 // in diamond search. |
|
35 if (x->mv_col_min < col_min) |
|
36 x->mv_col_min = col_min; |
|
37 if (x->mv_col_max > col_max) |
|
38 x->mv_col_max = col_max; |
|
39 if (x->mv_row_min < row_min) |
|
40 x->mv_row_min = row_min; |
|
41 if (x->mv_row_max > row_max) |
|
42 x->mv_row_max = row_max; |
|
43 } |
|
44 |
|
45 int vp9_init_search_range(VP9_COMP *cpi, int size) { |
|
46 int sr = 0; |
|
47 |
|
48 // Minimum search size no matter what the passed in value. |
|
49 size = MAX(16, size); |
|
50 |
|
51 while ((size << sr) < MAX_FULL_PEL_VAL) |
|
52 sr++; |
|
53 |
|
54 if (sr) |
|
55 sr--; |
|
56 |
|
57 sr += cpi->sf.reduce_first_step_size; |
|
58 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); |
|
59 return sr; |
|
60 } |
|
61 |
|
62 static INLINE int mv_cost(const MV *mv, |
|
63 const int *joint_cost, int *comp_cost[2]) { |
|
64 return joint_cost[vp9_get_mv_joint(mv)] + |
|
65 comp_cost[0][mv->row] + comp_cost[1][mv->col]; |
|
66 } |
|
67 |
|
68 int vp9_mv_bit_cost(const MV *mv, const MV *ref, |
|
69 const int *mvjcost, int *mvcost[2], int weight) { |
|
70 const MV diff = { mv->row - ref->row, |
|
71 mv->col - ref->col }; |
|
72 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); |
|
73 } |
|
74 |
|
75 static int mv_err_cost(const MV *mv, const MV *ref, |
|
76 const int *mvjcost, int *mvcost[2], |
|
77 int error_per_bit) { |
|
78 if (mvcost) { |
|
79 const MV diff = { mv->row - ref->row, |
|
80 mv->col - ref->col }; |
|
81 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * |
|
82 error_per_bit, 13); |
|
83 } |
|
84 return 0; |
|
85 } |
|
86 |
|
87 static int mvsad_err_cost(const MV *mv, const MV *ref, |
|
88 const int *mvjsadcost, int *mvsadcost[2], |
|
89 int error_per_bit) { |
|
90 if (mvsadcost) { |
|
91 const MV diff = { mv->row - ref->row, |
|
92 mv->col - ref->col }; |
|
93 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * |
|
94 error_per_bit, 8); |
|
95 } |
|
96 return 0; |
|
97 } |
|
98 |
|
99 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
|
100 int len; |
|
101 int search_site_count = 0; |
|
102 |
|
103 // Generate offsets for 4 search sites per step. |
|
104 x->ss[search_site_count].mv.col = 0; |
|
105 x->ss[search_site_count].mv.row = 0; |
|
106 x->ss[search_site_count].offset = 0; |
|
107 search_site_count++; |
|
108 |
|
109 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
|
110 // Compute offsets for search sites. |
|
111 x->ss[search_site_count].mv.col = 0; |
|
112 x->ss[search_site_count].mv.row = -len; |
|
113 x->ss[search_site_count].offset = -len * stride; |
|
114 search_site_count++; |
|
115 |
|
116 // Compute offsets for search sites. |
|
117 x->ss[search_site_count].mv.col = 0; |
|
118 x->ss[search_site_count].mv.row = len; |
|
119 x->ss[search_site_count].offset = len * stride; |
|
120 search_site_count++; |
|
121 |
|
122 // Compute offsets for search sites. |
|
123 x->ss[search_site_count].mv.col = -len; |
|
124 x->ss[search_site_count].mv.row = 0; |
|
125 x->ss[search_site_count].offset = -len; |
|
126 search_site_count++; |
|
127 |
|
128 // Compute offsets for search sites. |
|
129 x->ss[search_site_count].mv.col = len; |
|
130 x->ss[search_site_count].mv.row = 0; |
|
131 x->ss[search_site_count].offset = len; |
|
132 search_site_count++; |
|
133 } |
|
134 |
|
135 x->ss_count = search_site_count; |
|
136 x->searches_per_step = 4; |
|
137 } |
|
138 |
|
139 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { |
|
140 int len, ss_count = 1; |
|
141 |
|
142 x->ss[0].mv.col = x->ss[0].mv.row = 0; |
|
143 x->ss[0].offset = 0; |
|
144 |
|
145 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
|
146 // Generate offsets for 8 search sites per step. |
|
147 const MV ss_mvs[8] = { |
|
148 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, |
|
149 {-len, -len}, {-len, len}, {len, -len}, {len, len} |
|
150 }; |
|
151 int i; |
|
152 for (i = 0; i < 8; ++i) { |
|
153 search_site *const ss = &x->ss[ss_count++]; |
|
154 ss->mv = ss_mvs[i]; |
|
155 ss->offset = ss->mv.row * stride + ss->mv.col; |
|
156 } |
|
157 } |
|
158 |
|
159 x->ss_count = ss_count; |
|
160 x->searches_per_step = 8; |
|
161 } |
|
162 |
|
163 /* |
|
164 * To avoid the penalty for crossing cache-line read, preload the reference |
|
165 * area in a small buffer, which is aligned to make sure there won't be crossing |
|
166 * cache-line read while reading from this buffer. This reduced the cpu |
|
167 * cycles spent on reading ref data in sub-pixel filter functions. |
|
168 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
|
169 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
|
170 * could reduce the area. |
|
171 */ |
|
172 |
|
173 /* estimated cost of a motion vector (r,c) */ |
|
174 #define MVC(r, c) \ |
|
175 (mvcost ? \ |
|
176 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ |
|
177 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ |
|
178 error_per_bit + 4096) >> 13 : 0) |
|
179 |
|
180 |
|
181 #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset |
|
182 // for svf calc |
|
183 |
|
184 #define IFMVCV(r, c, s, e) \ |
|
185 if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ |
|
186 s \ |
|
187 else \ |
|
188 e; |
|
189 |
|
190 /* pointer to predictor base of a motionvector */ |
|
191 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) |
|
192 |
|
193 /* returns subpixel variance error function */ |
|
194 #define DIST(r, c) \ |
|
195 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) |
|
196 |
|
197 /* checks if (r, c) has better score than previous best */ |
|
198 #define CHECK_BETTER(v, r, c) \ |
|
199 IFMVCV(r, c, { \ |
|
200 thismse = (DIST(r, c)); \ |
|
201 if ((v = MVC(r, c) + thismse) < besterr) { \ |
|
202 besterr = v; \ |
|
203 br = r; \ |
|
204 bc = c; \ |
|
205 *distortion = thismse; \ |
|
206 *sse1 = sse; \ |
|
207 } \ |
|
208 }, \ |
|
209 v = INT_MAX;) |
|
210 |
|
211 #define FIRST_LEVEL_CHECKS \ |
|
212 { \ |
|
213 unsigned int left, right, up, down, diag; \ |
|
214 CHECK_BETTER(left, tr, tc - hstep); \ |
|
215 CHECK_BETTER(right, tr, tc + hstep); \ |
|
216 CHECK_BETTER(up, tr - hstep, tc); \ |
|
217 CHECK_BETTER(down, tr + hstep, tc); \ |
|
218 whichdir = (left < right ? 0 : 1) + \ |
|
219 (up < down ? 0 : 2); \ |
|
220 switch (whichdir) { \ |
|
221 case 0: \ |
|
222 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ |
|
223 break; \ |
|
224 case 1: \ |
|
225 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ |
|
226 break; \ |
|
227 case 2: \ |
|
228 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ |
|
229 break; \ |
|
230 case 3: \ |
|
231 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ |
|
232 break; \ |
|
233 } \ |
|
234 } |
|
235 |
|
236 #define SECOND_LEVEL_CHECKS \ |
|
237 { \ |
|
238 int kr, kc; \ |
|
239 unsigned int second; \ |
|
240 if (tr != br && tc != bc) { \ |
|
241 kr = br - tr; \ |
|
242 kc = bc - tc; \ |
|
243 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ |
|
244 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ |
|
245 } else if (tr == br && tc != bc) { \ |
|
246 kc = bc - tc; \ |
|
247 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ |
|
248 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ |
|
249 switch (whichdir) { \ |
|
250 case 0: \ |
|
251 case 1: \ |
|
252 CHECK_BETTER(second, tr + hstep, tc + kc); \ |
|
253 break; \ |
|
254 case 2: \ |
|
255 case 3: \ |
|
256 CHECK_BETTER(second, tr - hstep, tc + kc); \ |
|
257 break; \ |
|
258 } \ |
|
259 } else if (tr != br && tc == bc) { \ |
|
260 kr = br - tr; \ |
|
261 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ |
|
262 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ |
|
263 switch (whichdir) { \ |
|
264 case 0: \ |
|
265 case 2: \ |
|
266 CHECK_BETTER(second, tr + kr, tc + hstep); \ |
|
267 break; \ |
|
268 case 1: \ |
|
269 case 3: \ |
|
270 CHECK_BETTER(second, tr + kr, tc - hstep); \ |
|
271 break; \ |
|
272 } \ |
|
273 } \ |
|
274 } |
|
275 |
|
276 int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, |
|
277 MV *bestmv, const MV *ref_mv, |
|
278 int allow_hp, |
|
279 int error_per_bit, |
|
280 const vp9_variance_fn_ptr_t *vfp, |
|
281 int forced_stop, |
|
282 int iters_per_step, |
|
283 int *mvjcost, int *mvcost[2], |
|
284 int *distortion, |
|
285 unsigned int *sse1) { |
|
286 uint8_t *z = x->plane[0].src.buf; |
|
287 int src_stride = x->plane[0].src.stride; |
|
288 MACROBLOCKD *xd = &x->e_mbd; |
|
289 |
|
290 unsigned int besterr = INT_MAX; |
|
291 unsigned int sse; |
|
292 unsigned int whichdir; |
|
293 unsigned int halfiters = iters_per_step; |
|
294 unsigned int quarteriters = iters_per_step; |
|
295 unsigned int eighthiters = iters_per_step; |
|
296 int thismse; |
|
297 |
|
298 const int y_stride = xd->plane[0].pre[0].stride; |
|
299 const int offset = bestmv->row * y_stride + bestmv->col; |
|
300 uint8_t *y = xd->plane[0].pre[0].buf + offset; |
|
301 |
|
302 int rr = ref_mv->row; |
|
303 int rc = ref_mv->col; |
|
304 int br = bestmv->row * 8; |
|
305 int bc = bestmv->col * 8; |
|
306 int hstep = 4; |
|
307 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
|
308 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
|
309 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
|
310 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
|
311 |
|
312 int tr = br; |
|
313 int tc = bc; |
|
314 |
|
315 // central mv |
|
316 bestmv->row <<= 3; |
|
317 bestmv->col <<= 3; |
|
318 |
|
319 // calculate central point error |
|
320 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); |
|
321 *distortion = besterr; |
|
322 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
|
323 |
|
324 // TODO(jbb): Each subsequent iteration checks at least one point in |
|
325 // common with the last iteration could be 2 if diagonal is selected. |
|
326 while (halfiters--) { |
|
327 // 1/2 pel |
|
328 FIRST_LEVEL_CHECKS; |
|
329 // no reason to check the same one again. |
|
330 if (tr == br && tc == bc) |
|
331 break; |
|
332 tr = br; |
|
333 tc = bc; |
|
334 } |
|
335 |
|
336 // TODO(yaowu): Each subsequent iteration checks at least one point in common |
|
337 // with the last iteration could be 2 if diagonal is selected. |
|
338 |
|
339 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
|
340 if (forced_stop != 2) { |
|
341 hstep >>= 1; |
|
342 while (quarteriters--) { |
|
343 FIRST_LEVEL_CHECKS; |
|
344 // no reason to check the same one again. |
|
345 if (tr == br && tc == bc) |
|
346 break; |
|
347 tr = br; |
|
348 tc = bc; |
|
349 } |
|
350 } |
|
351 |
|
352 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
|
353 hstep >>= 1; |
|
354 while (eighthiters--) { |
|
355 FIRST_LEVEL_CHECKS; |
|
356 // no reason to check the same one again. |
|
357 if (tr == br && tc == bc) |
|
358 break; |
|
359 tr = br; |
|
360 tc = bc; |
|
361 } |
|
362 } |
|
363 |
|
364 bestmv->row = br; |
|
365 bestmv->col = bc; |
|
366 |
|
367 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
|
368 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
|
369 return INT_MAX; |
|
370 |
|
371 return besterr; |
|
372 } |
|
373 |
|
374 int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, |
|
375 MV *bestmv, const MV *ref_mv, |
|
376 int allow_hp, |
|
377 int error_per_bit, |
|
378 const vp9_variance_fn_ptr_t *vfp, |
|
379 int forced_stop, |
|
380 int iters_per_step, |
|
381 int *mvjcost, int *mvcost[2], |
|
382 int *distortion, |
|
383 unsigned int *sse1) { |
|
384 uint8_t *z = x->plane[0].src.buf; |
|
385 const int src_stride = x->plane[0].src.stride; |
|
386 MACROBLOCKD *xd = &x->e_mbd; |
|
387 unsigned int besterr = INT_MAX; |
|
388 unsigned int sse; |
|
389 unsigned int whichdir; |
|
390 int thismse; |
|
391 unsigned int halfiters = iters_per_step; |
|
392 unsigned int quarteriters = iters_per_step; |
|
393 unsigned int eighthiters = iters_per_step; |
|
394 |
|
395 const int y_stride = xd->plane[0].pre[0].stride; |
|
396 const int offset = bestmv->row * y_stride + bestmv->col; |
|
397 uint8_t *y = xd->plane[0].pre[0].buf + offset; |
|
398 |
|
399 int rr = ref_mv->row; |
|
400 int rc = ref_mv->col; |
|
401 int br = bestmv->row * 8; |
|
402 int bc = bestmv->col * 8; |
|
403 int hstep = 4; |
|
404 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
|
405 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
|
406 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
|
407 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
|
408 |
|
409 int tr = br; |
|
410 int tc = bc; |
|
411 |
|
412 // central mv |
|
413 bestmv->row *= 8; |
|
414 bestmv->col *= 8; |
|
415 |
|
416 // calculate central point error |
|
417 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); |
|
418 *distortion = besterr; |
|
419 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
|
420 |
|
421 // 1/2 pel |
|
422 FIRST_LEVEL_CHECKS; |
|
423 if (halfiters > 1) { |
|
424 SECOND_LEVEL_CHECKS; |
|
425 } |
|
426 tr = br; |
|
427 tc = bc; |
|
428 |
|
429 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
|
430 if (forced_stop != 2) { |
|
431 hstep >>= 1; |
|
432 FIRST_LEVEL_CHECKS; |
|
433 if (quarteriters > 1) { |
|
434 SECOND_LEVEL_CHECKS; |
|
435 } |
|
436 tr = br; |
|
437 tc = bc; |
|
438 } |
|
439 |
|
440 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
|
441 hstep >>= 1; |
|
442 FIRST_LEVEL_CHECKS; |
|
443 if (eighthiters > 1) { |
|
444 SECOND_LEVEL_CHECKS; |
|
445 } |
|
446 tr = br; |
|
447 tc = bc; |
|
448 } |
|
449 |
|
450 bestmv->row = br; |
|
451 bestmv->col = bc; |
|
452 |
|
453 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
|
454 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
|
455 return INT_MAX; |
|
456 |
|
457 return besterr; |
|
458 } |
|
459 |
|
460 #undef DIST |
|
461 /* returns subpixel variance error function */ |
|
462 #define DIST(r, c) \ |
|
463 vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ |
|
464 z, src_stride, &sse, second_pred) |
|
465 |
|
466 int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, |
|
467 MV *bestmv, const MV *ref_mv, |
|
468 int allow_hp, |
|
469 int error_per_bit, |
|
470 const vp9_variance_fn_ptr_t *vfp, |
|
471 int forced_stop, |
|
472 int iters_per_step, |
|
473 int *mvjcost, int *mvcost[2], |
|
474 int *distortion, |
|
475 unsigned int *sse1, |
|
476 const uint8_t *second_pred, |
|
477 int w, int h) { |
|
478 uint8_t *const z = x->plane[0].src.buf; |
|
479 const int src_stride = x->plane[0].src.stride; |
|
480 MACROBLOCKD *const xd = &x->e_mbd; |
|
481 |
|
482 unsigned int besterr = INT_MAX; |
|
483 unsigned int sse; |
|
484 unsigned int whichdir; |
|
485 unsigned int halfiters = iters_per_step; |
|
486 unsigned int quarteriters = iters_per_step; |
|
487 unsigned int eighthiters = iters_per_step; |
|
488 int thismse; |
|
489 |
|
490 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
|
491 const int y_stride = xd->plane[0].pre[0].stride; |
|
492 const int offset = bestmv->row * y_stride + bestmv->col; |
|
493 uint8_t *const y = xd->plane[0].pre[0].buf + offset; |
|
494 |
|
495 int rr = ref_mv->row; |
|
496 int rc = ref_mv->col; |
|
497 int br = bestmv->row * 8; |
|
498 int bc = bestmv->col * 8; |
|
499 int hstep = 4; |
|
500 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
|
501 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
|
502 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
|
503 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
|
504 |
|
505 int tr = br; |
|
506 int tc = bc; |
|
507 |
|
508 // central mv |
|
509 bestmv->row *= 8; |
|
510 bestmv->col *= 8; |
|
511 |
|
512 // calculate central point error |
|
513 // TODO(yunqingwang): central pointer error was already calculated in full- |
|
514 // pixel search, and can be passed in this function. |
|
515 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); |
|
516 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
|
517 *distortion = besterr; |
|
518 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
|
519 |
|
520 // Each subsequent iteration checks at least one point in |
|
521 // common with the last iteration could be 2 ( if diag selected) |
|
522 while (halfiters--) { |
|
523 // 1/2 pel |
|
524 FIRST_LEVEL_CHECKS; |
|
525 // no reason to check the same one again. |
|
526 if (tr == br && tc == bc) |
|
527 break; |
|
528 tr = br; |
|
529 tc = bc; |
|
530 } |
|
531 |
|
532 // Each subsequent iteration checks at least one point in common with |
|
533 // the last iteration could be 2 ( if diag selected) 1/4 pel |
|
534 |
|
535 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
|
536 if (forced_stop != 2) { |
|
537 hstep >>= 1; |
|
538 while (quarteriters--) { |
|
539 FIRST_LEVEL_CHECKS; |
|
540 // no reason to check the same one again. |
|
541 if (tr == br && tc == bc) |
|
542 break; |
|
543 tr = br; |
|
544 tc = bc; |
|
545 } |
|
546 } |
|
547 |
|
548 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
|
549 hstep >>= 1; |
|
550 while (eighthiters--) { |
|
551 FIRST_LEVEL_CHECKS; |
|
552 // no reason to check the same one again. |
|
553 if (tr == br && tc == bc) |
|
554 break; |
|
555 tr = br; |
|
556 tc = bc; |
|
557 } |
|
558 } |
|
559 bestmv->row = br; |
|
560 bestmv->col = bc; |
|
561 |
|
562 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
|
563 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
|
564 return INT_MAX; |
|
565 |
|
566 return besterr; |
|
567 } |
|
568 |
|
569 int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, |
|
570 MV *bestmv, const MV *ref_mv, |
|
571 int allow_hp, |
|
572 int error_per_bit, |
|
573 const vp9_variance_fn_ptr_t *vfp, |
|
574 int forced_stop, |
|
575 int iters_per_step, |
|
576 int *mvjcost, int *mvcost[2], |
|
577 int *distortion, |
|
578 unsigned int *sse1, |
|
579 const uint8_t *second_pred, |
|
580 int w, int h) { |
|
581 uint8_t *z = x->plane[0].src.buf; |
|
582 const int src_stride = x->plane[0].src.stride; |
|
583 MACROBLOCKD *xd = &x->e_mbd; |
|
584 unsigned int besterr = INT_MAX; |
|
585 unsigned int sse; |
|
586 unsigned int whichdir; |
|
587 int thismse; |
|
588 unsigned int halfiters = iters_per_step; |
|
589 unsigned int quarteriters = iters_per_step; |
|
590 unsigned int eighthiters = iters_per_step; |
|
591 |
|
592 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
|
593 const int y_stride = xd->plane[0].pre[0].stride; |
|
594 const int offset = bestmv->row * y_stride + bestmv->col; |
|
595 uint8_t *y = xd->plane[0].pre[0].buf + offset; |
|
596 |
|
597 int rr = ref_mv->row; |
|
598 int rc = ref_mv->col; |
|
599 int br = bestmv->row * 8; |
|
600 int bc = bestmv->col * 8; |
|
601 int hstep = 4; |
|
602 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
|
603 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
|
604 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
|
605 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
|
606 |
|
607 int tr = br; |
|
608 int tc = bc; |
|
609 |
|
610 // central mv |
|
611 bestmv->row *= 8; |
|
612 bestmv->col *= 8; |
|
613 |
|
614 // calculate central point error |
|
615 // TODO(yunqingwang): central pointer error was already calculated in full- |
|
616 // pixel search, and can be passed in this function. |
|
617 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); |
|
618 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
|
619 *distortion = besterr; |
|
620 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
|
621 |
|
622 // Each subsequent iteration checks at least one point in |
|
623 // common with the last iteration could be 2 ( if diag selected) |
|
624 // 1/2 pel |
|
625 FIRST_LEVEL_CHECKS; |
|
626 if (halfiters > 1) { |
|
627 SECOND_LEVEL_CHECKS; |
|
628 } |
|
629 tr = br; |
|
630 tc = bc; |
|
631 |
|
632 // Each subsequent iteration checks at least one point in common with |
|
633 // the last iteration could be 2 ( if diag selected) 1/4 pel |
|
634 |
|
635 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
|
636 if (forced_stop != 2) { |
|
637 hstep >>= 1; |
|
638 FIRST_LEVEL_CHECKS; |
|
639 if (quarteriters > 1) { |
|
640 SECOND_LEVEL_CHECKS; |
|
641 } |
|
642 tr = br; |
|
643 tc = bc; |
|
644 } |
|
645 |
|
646 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
|
647 hstep >>= 1; |
|
648 FIRST_LEVEL_CHECKS; |
|
649 if (eighthiters > 1) { |
|
650 SECOND_LEVEL_CHECKS; |
|
651 } |
|
652 tr = br; |
|
653 tc = bc; |
|
654 } |
|
655 bestmv->row = br; |
|
656 bestmv->col = bc; |
|
657 |
|
658 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
|
659 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
|
660 return INT_MAX; |
|
661 |
|
662 return besterr; |
|
663 } |
|
664 |
|
665 #undef MVC |
|
666 #undef PRE |
|
667 #undef DIST |
|
668 #undef IFMVCV |
|
669 #undef CHECK_BETTER |
|
670 #undef SP |
|
671 |
|
672 #define CHECK_BOUNDS(range) \ |
|
673 {\ |
|
674 all_in = 1;\ |
|
675 all_in &= ((br-range) >= x->mv_row_min);\ |
|
676 all_in &= ((br+range) <= x->mv_row_max);\ |
|
677 all_in &= ((bc-range) >= x->mv_col_min);\ |
|
678 all_in &= ((bc+range) <= x->mv_col_max);\ |
|
679 } |
|
680 |
|
681 #define CHECK_POINT \ |
|
682 {\ |
|
683 if (this_mv.col < x->mv_col_min) continue;\ |
|
684 if (this_mv.col > x->mv_col_max) continue;\ |
|
685 if (this_mv.row < x->mv_row_min) continue;\ |
|
686 if (this_mv.row > x->mv_row_max) continue;\ |
|
687 } |
|
688 |
|
689 #define CHECK_BETTER \ |
|
690 {\ |
|
691 if (thissad < bestsad)\ |
|
692 {\ |
|
693 if (use_mvcost) \ |
|
694 thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \ |
|
695 mvjsadcost, mvsadcost, \ |
|
696 sad_per_bit);\ |
|
697 if (thissad < bestsad)\ |
|
698 {\ |
|
699 bestsad = thissad;\ |
|
700 best_site = i;\ |
|
701 }\ |
|
702 }\ |
|
703 } |
|
704 |
|
705 #define get_next_chkpts(list, i, n) \ |
|
706 list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \ |
|
707 list[1] = (i); \ |
|
708 list[2] = ((i) == (n) - 1 ? 0 : (i) + 1); |
|
709 |
|
710 #define MAX_PATTERN_SCALES 11 |
|
711 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale |
|
712 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates |
|
713 |
|
714 // Generic pattern search function that searches over multiple scales. |
|
715 // Each scale can have a different number of candidates and shape of |
|
716 // candidates as indicated in the num_candidates and candidates arrays |
|
717 // passed into this function |
|
718 static int vp9_pattern_search(MACROBLOCK *x, |
|
719 MV *ref_mv, |
|
720 int search_param, |
|
721 int sad_per_bit, |
|
722 int do_init_search, |
|
723 int do_refine, |
|
724 const vp9_variance_fn_ptr_t *vfp, |
|
725 int use_mvcost, |
|
726 const MV *center_mv, MV *best_mv, |
|
727 const int num_candidates[MAX_PATTERN_SCALES], |
|
728 const MV candidates[MAX_PATTERN_SCALES] |
|
729 [MAX_PATTERN_CANDIDATES]) { |
|
730 const MACROBLOCKD* const xd = &x->e_mbd; |
|
731 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
|
732 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
|
733 }; |
|
734 int i, j, s, t; |
|
735 uint8_t *what = x->plane[0].src.buf; |
|
736 int what_stride = x->plane[0].src.stride; |
|
737 int in_what_stride = xd->plane[0].pre[0].stride; |
|
738 int br, bc; |
|
739 MV this_mv; |
|
740 int bestsad = INT_MAX; |
|
741 int thissad; |
|
742 uint8_t *base_offset; |
|
743 uint8_t *this_offset; |
|
744 int k = -1; |
|
745 int all_in; |
|
746 int best_site = -1; |
|
747 int_mv fcenter_mv; |
|
748 int best_init_s = search_param_to_steps[search_param]; |
|
749 int *mvjsadcost = x->nmvjointsadcost; |
|
750 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
751 |
|
752 fcenter_mv.as_mv.row = center_mv->row >> 3; |
|
753 fcenter_mv.as_mv.col = center_mv->col >> 3; |
|
754 |
|
755 // adjust ref_mv to make sure it is within MV range |
|
756 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
|
757 br = ref_mv->row; |
|
758 bc = ref_mv->col; |
|
759 |
|
760 // Work out the start point for the search |
|
761 base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); |
|
762 this_offset = base_offset + (br * in_what_stride) + bc; |
|
763 this_mv.row = br; |
|
764 this_mv.col = bc; |
|
765 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) |
|
766 + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, |
|
767 mvjsadcost, mvsadcost, sad_per_bit); |
|
768 |
|
769 // Search all possible scales upto the search param around the center point |
|
770 // pick the scale of the point that is best as the starting scale of |
|
771 // further steps around it. |
|
772 if (do_init_search) { |
|
773 s = best_init_s; |
|
774 best_init_s = -1; |
|
775 for (t = 0; t <= s; ++t) { |
|
776 best_site = -1; |
|
777 CHECK_BOUNDS((1 << t)) |
|
778 if (all_in) { |
|
779 for (i = 0; i < num_candidates[t]; i++) { |
|
780 this_mv.row = br + candidates[t][i].row; |
|
781 this_mv.col = bc + candidates[t][i].col; |
|
782 this_offset = base_offset + (this_mv.row * in_what_stride) + |
|
783 this_mv.col; |
|
784 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
785 bestsad); |
|
786 CHECK_BETTER |
|
787 } |
|
788 } else { |
|
789 for (i = 0; i < num_candidates[t]; i++) { |
|
790 this_mv.row = br + candidates[t][i].row; |
|
791 this_mv.col = bc + candidates[t][i].col; |
|
792 CHECK_POINT |
|
793 this_offset = base_offset + (this_mv.row * in_what_stride) + |
|
794 this_mv.col; |
|
795 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
796 bestsad); |
|
797 CHECK_BETTER |
|
798 } |
|
799 } |
|
800 if (best_site == -1) { |
|
801 continue; |
|
802 } else { |
|
803 best_init_s = t; |
|
804 k = best_site; |
|
805 } |
|
806 } |
|
807 if (best_init_s != -1) { |
|
808 br += candidates[best_init_s][k].row; |
|
809 bc += candidates[best_init_s][k].col; |
|
810 } |
|
811 } |
|
812 |
|
813 // If the center point is still the best, just skip this and move to |
|
814 // the refinement step. |
|
815 if (best_init_s != -1) { |
|
816 s = best_init_s; |
|
817 best_site = -1; |
|
818 do { |
|
819 // No need to search all 6 points the 1st time if initial search was used |
|
820 if (!do_init_search || s != best_init_s) { |
|
821 CHECK_BOUNDS((1 << s)) |
|
822 if (all_in) { |
|
823 for (i = 0; i < num_candidates[s]; i++) { |
|
824 this_mv.row = br + candidates[s][i].row; |
|
825 this_mv.col = bc + candidates[s][i].col; |
|
826 this_offset = base_offset + (this_mv.row * in_what_stride) + |
|
827 this_mv.col; |
|
828 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
829 bestsad); |
|
830 CHECK_BETTER |
|
831 } |
|
832 } else { |
|
833 for (i = 0; i < num_candidates[s]; i++) { |
|
834 this_mv.row = br + candidates[s][i].row; |
|
835 this_mv.col = bc + candidates[s][i].col; |
|
836 CHECK_POINT |
|
837 this_offset = base_offset + (this_mv.row * in_what_stride) + |
|
838 this_mv.col; |
|
839 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
840 bestsad); |
|
841 CHECK_BETTER |
|
842 } |
|
843 } |
|
844 |
|
845 if (best_site == -1) { |
|
846 continue; |
|
847 } else { |
|
848 br += candidates[s][best_site].row; |
|
849 bc += candidates[s][best_site].col; |
|
850 k = best_site; |
|
851 } |
|
852 } |
|
853 |
|
854 do { |
|
855 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
|
856 best_site = -1; |
|
857 CHECK_BOUNDS((1 << s)) |
|
858 |
|
859 get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); |
|
860 if (all_in) { |
|
861 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
|
862 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; |
|
863 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; |
|
864 this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
|
865 this_mv.col; |
|
866 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
867 bestsad); |
|
868 CHECK_BETTER |
|
869 } |
|
870 } else { |
|
871 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
|
872 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; |
|
873 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; |
|
874 CHECK_POINT |
|
875 this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
|
876 this_mv.col; |
|
877 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
878 bestsad); |
|
879 CHECK_BETTER |
|
880 } |
|
881 } |
|
882 |
|
883 if (best_site != -1) { |
|
884 k = next_chkpts_indices[best_site]; |
|
885 br += candidates[s][k].row; |
|
886 bc += candidates[s][k].col; |
|
887 } |
|
888 } while (best_site != -1); |
|
889 } while (s--); |
|
890 } |
|
891 |
|
892 // Check 4 1-away neighbors if do_refine is true. |
|
893 // For most well-designed schemes do_refine will not be necessary. |
|
894 if (do_refine) { |
|
895 static const MV neighbors[4] = { |
|
896 {0, -1}, { -1, 0}, {1, 0}, {0, 1}, |
|
897 }; |
|
898 for (j = 0; j < 16; j++) { |
|
899 best_site = -1; |
|
900 CHECK_BOUNDS(1) |
|
901 if (all_in) { |
|
902 for (i = 0; i < 4; i++) { |
|
903 this_mv.row = br + neighbors[i].row; |
|
904 this_mv.col = bc + neighbors[i].col; |
|
905 this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
|
906 this_mv.col; |
|
907 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
908 bestsad); |
|
909 CHECK_BETTER |
|
910 } |
|
911 } else { |
|
912 for (i = 0; i < 4; i++) { |
|
913 this_mv.row = br + neighbors[i].row; |
|
914 this_mv.col = bc + neighbors[i].col; |
|
915 CHECK_POINT |
|
916 this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
|
917 this_mv.col; |
|
918 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
|
919 bestsad); |
|
920 CHECK_BETTER |
|
921 } |
|
922 } |
|
923 |
|
924 if (best_site == -1) { |
|
925 break; |
|
926 } else { |
|
927 br += neighbors[best_site].row; |
|
928 bc += neighbors[best_site].col; |
|
929 } |
|
930 } |
|
931 } |
|
932 |
|
933 best_mv->row = br; |
|
934 best_mv->col = bc; |
|
935 |
|
936 this_offset = base_offset + (best_mv->row * in_what_stride) + |
|
937 best_mv->col; |
|
938 this_mv.row = best_mv->row * 8; |
|
939 this_mv.col = best_mv->col * 8; |
|
940 if (bestsad == INT_MAX) |
|
941 return INT_MAX; |
|
942 |
|
943 return vfp->vf(what, what_stride, this_offset, in_what_stride, |
|
944 (unsigned int *)&bestsad) + |
|
945 use_mvcost ? mv_err_cost(&this_mv, center_mv, |
|
946 x->nmvjointcost, x->mvcost, x->errorperbit) |
|
947 : 0; |
|
948 } |
|
949 |
|
950 |
|
951 int vp9_hex_search(MACROBLOCK *x, |
|
952 MV *ref_mv, |
|
953 int search_param, |
|
954 int sad_per_bit, |
|
955 int do_init_search, |
|
956 const vp9_variance_fn_ptr_t *vfp, |
|
957 int use_mvcost, |
|
958 const MV *center_mv, MV *best_mv) { |
|
959 // First scale has 8-closest points, the rest have 6 points in hex shape |
|
960 // at increasing scales |
|
961 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { |
|
962 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 |
|
963 }; |
|
964 // Note that the largest candidate step at each scale is 2^scale |
|
965 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { |
|
966 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, |
|
967 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, |
|
968 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, |
|
969 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, |
|
970 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, |
|
971 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, |
|
972 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, |
|
973 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, |
|
974 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, |
|
975 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, |
|
976 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, |
|
977 { -1024, 0}}, |
|
978 }; |
|
979 return |
|
980 vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
|
981 do_init_search, 0, vfp, use_mvcost, |
|
982 center_mv, best_mv, |
|
983 hex_num_candidates, hex_candidates); |
|
984 } |
|
985 |
|
986 int vp9_bigdia_search(MACROBLOCK *x, |
|
987 MV *ref_mv, |
|
988 int search_param, |
|
989 int sad_per_bit, |
|
990 int do_init_search, |
|
991 const vp9_variance_fn_ptr_t *vfp, |
|
992 int use_mvcost, |
|
993 const MV *center_mv, |
|
994 MV *best_mv) { |
|
995 // First scale has 4-closest points, the rest have 8 points in diamond |
|
996 // shape at increasing scales |
|
997 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { |
|
998 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
|
999 }; |
|
1000 // Note that the largest candidate step at each scale is 2^scale |
|
1001 static const MV bigdia_candidates[MAX_PATTERN_SCALES] |
|
1002 [MAX_PATTERN_CANDIDATES] = { |
|
1003 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, |
|
1004 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, |
|
1005 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, |
|
1006 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, |
|
1007 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, |
|
1008 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, |
|
1009 {-16, 16}, {-32, 0}}, |
|
1010 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, |
|
1011 {-32, 32}, {-64, 0}}, |
|
1012 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, |
|
1013 {-64, 64}, {-128, 0}}, |
|
1014 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, |
|
1015 {-128, 128}, {-256, 0}}, |
|
1016 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, |
|
1017 {-256, 256}, {-512, 0}}, |
|
1018 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, |
|
1019 {-512, 512}, {-1024, 0}}, |
|
1020 }; |
|
1021 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
|
1022 do_init_search, 0, vfp, use_mvcost, |
|
1023 center_mv, best_mv, |
|
1024 bigdia_num_candidates, bigdia_candidates); |
|
1025 } |
|
1026 |
|
1027 int vp9_square_search(MACROBLOCK *x, |
|
1028 MV *ref_mv, |
|
1029 int search_param, |
|
1030 int sad_per_bit, |
|
1031 int do_init_search, |
|
1032 const vp9_variance_fn_ptr_t *vfp, |
|
1033 int use_mvcost, |
|
1034 const MV *center_mv, |
|
1035 MV *best_mv) { |
|
1036 // All scales have 8 closest points in square shape |
|
1037 static const int square_num_candidates[MAX_PATTERN_SCALES] = { |
|
1038 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
|
1039 }; |
|
1040 // Note that the largest candidate step at each scale is 2^scale |
|
1041 static const MV square_candidates[MAX_PATTERN_SCALES] |
|
1042 [MAX_PATTERN_CANDIDATES] = { |
|
1043 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, |
|
1044 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, |
|
1045 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, |
|
1046 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, |
|
1047 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, |
|
1048 {-16, 16}, {-16, 0}}, |
|
1049 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, |
|
1050 {-32, 32}, {-32, 0}}, |
|
1051 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, |
|
1052 {-64, 64}, {-64, 0}}, |
|
1053 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, |
|
1054 {-128, 128}, {-128, 0}}, |
|
1055 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, |
|
1056 {-256, 256}, {-256, 0}}, |
|
1057 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, |
|
1058 {-512, 512}, {-512, 0}}, |
|
1059 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, |
|
1060 {0, 1024}, {-1024, 1024}, {-1024, 0}}, |
|
1061 }; |
|
1062 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
|
1063 do_init_search, 0, vfp, use_mvcost, |
|
1064 center_mv, best_mv, |
|
1065 square_num_candidates, square_candidates); |
|
1066 }; |
|
1067 |
|
1068 #undef CHECK_BOUNDS |
|
1069 #undef CHECK_POINT |
|
1070 #undef CHECK_BETTER |
|
1071 |
|
1072 int vp9_diamond_search_sad_c(MACROBLOCK *x, |
|
1073 int_mv *ref_mv, int_mv *best_mv, |
|
1074 int search_param, int sad_per_bit, int *num00, |
|
1075 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
|
1076 int *mvcost[2], int_mv *center_mv) { |
|
1077 int i, j, step; |
|
1078 |
|
1079 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1080 uint8_t *what = x->plane[0].src.buf; |
|
1081 int what_stride = x->plane[0].src.stride; |
|
1082 uint8_t *in_what; |
|
1083 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1084 uint8_t *best_address; |
|
1085 |
|
1086 int tot_steps; |
|
1087 int_mv this_mv; |
|
1088 |
|
1089 int bestsad = INT_MAX; |
|
1090 int best_site = 0; |
|
1091 int last_site = 0; |
|
1092 |
|
1093 int ref_row, ref_col; |
|
1094 int this_row_offset, this_col_offset; |
|
1095 search_site *ss; |
|
1096 |
|
1097 uint8_t *check_here; |
|
1098 int thissad; |
|
1099 int_mv fcenter_mv; |
|
1100 |
|
1101 int *mvjsadcost = x->nmvjointsadcost; |
|
1102 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1103 |
|
1104 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1105 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1106 |
|
1107 clamp_mv(&ref_mv->as_mv, |
|
1108 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
|
1109 ref_row = ref_mv->as_mv.row; |
|
1110 ref_col = ref_mv->as_mv.col; |
|
1111 *num00 = 0; |
|
1112 best_mv->as_mv.row = ref_row; |
|
1113 best_mv->as_mv.col = ref_col; |
|
1114 |
|
1115 // Work out the start point for the search |
|
1116 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + |
|
1117 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); |
|
1118 best_address = in_what; |
|
1119 |
|
1120 // Check the starting position |
|
1121 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) |
|
1122 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
|
1123 mvjsadcost, mvsadcost, sad_per_bit); |
|
1124 |
|
1125 // search_param determines the length of the initial step and hence the number |
|
1126 // of iterations |
|
1127 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = |
|
1128 // (MAX_FIRST_STEP/4) pel... etc. |
|
1129 ss = &x->ss[search_param * x->searches_per_step]; |
|
1130 tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
|
1131 |
|
1132 i = 1; |
|
1133 |
|
1134 for (step = 0; step < tot_steps; step++) { |
|
1135 for (j = 0; j < x->searches_per_step; j++) { |
|
1136 // Trap illegal vectors |
|
1137 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
|
1138 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
|
1139 |
|
1140 if ((this_col_offset > x->mv_col_min) && |
|
1141 (this_col_offset < x->mv_col_max) && |
|
1142 (this_row_offset > x->mv_row_min) && |
|
1143 (this_row_offset < x->mv_row_max)) { |
|
1144 check_here = ss[i].offset + best_address; |
|
1145 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1146 bestsad); |
|
1147 |
|
1148 if (thissad < bestsad) { |
|
1149 this_mv.as_mv.row = this_row_offset; |
|
1150 this_mv.as_mv.col = this_col_offset; |
|
1151 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1152 mvjsadcost, mvsadcost, sad_per_bit); |
|
1153 |
|
1154 if (thissad < bestsad) { |
|
1155 bestsad = thissad; |
|
1156 best_site = i; |
|
1157 } |
|
1158 } |
|
1159 } |
|
1160 |
|
1161 i++; |
|
1162 } |
|
1163 |
|
1164 if (best_site != last_site) { |
|
1165 best_mv->as_mv.row += ss[best_site].mv.row; |
|
1166 best_mv->as_mv.col += ss[best_site].mv.col; |
|
1167 best_address += ss[best_site].offset; |
|
1168 last_site = best_site; |
|
1169 #if defined(NEW_DIAMOND_SEARCH) |
|
1170 while (1) { |
|
1171 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; |
|
1172 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; |
|
1173 if ((this_col_offset > x->mv_col_min) && |
|
1174 (this_col_offset < x->mv_col_max) && |
|
1175 (this_row_offset > x->mv_row_min) && |
|
1176 (this_row_offset < x->mv_row_max)) { |
|
1177 check_here = ss[best_site].offset + best_address; |
|
1178 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1179 bestsad); |
|
1180 if (thissad < bestsad) { |
|
1181 this_mv.as_mv.row = this_row_offset; |
|
1182 this_mv.as_mv.col = this_col_offset; |
|
1183 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1184 mvjsadcost, mvsadcost, sad_per_bit); |
|
1185 if (thissad < bestsad) { |
|
1186 bestsad = thissad; |
|
1187 best_mv->as_mv.row += ss[best_site].mv.row; |
|
1188 best_mv->as_mv.col += ss[best_site].mv.col; |
|
1189 best_address += ss[best_site].offset; |
|
1190 continue; |
|
1191 } |
|
1192 } |
|
1193 } |
|
1194 break; |
|
1195 }; |
|
1196 #endif |
|
1197 } else if (best_address == in_what) { |
|
1198 (*num00)++; |
|
1199 } |
|
1200 } |
|
1201 |
|
1202 this_mv.as_mv.row = best_mv->as_mv.row * 8; |
|
1203 this_mv.as_mv.col = best_mv->as_mv.col * 8; |
|
1204 |
|
1205 if (bestsad == INT_MAX) |
|
1206 return INT_MAX; |
|
1207 |
|
1208 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
|
1209 (unsigned int *)(&thissad)) + |
|
1210 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1211 mvjcost, mvcost, x->errorperbit); |
|
1212 } |
|
1213 |
|
1214 int vp9_diamond_search_sadx4(MACROBLOCK *x, |
|
1215 int_mv *ref_mv, int_mv *best_mv, int search_param, |
|
1216 int sad_per_bit, int *num00, |
|
1217 vp9_variance_fn_ptr_t *fn_ptr, |
|
1218 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
|
1219 int i, j, step; |
|
1220 |
|
1221 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1222 uint8_t *what = x->plane[0].src.buf; |
|
1223 int what_stride = x->plane[0].src.stride; |
|
1224 uint8_t *in_what; |
|
1225 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1226 uint8_t *best_address; |
|
1227 |
|
1228 int tot_steps; |
|
1229 int_mv this_mv; |
|
1230 |
|
1231 unsigned int bestsad = INT_MAX; |
|
1232 int best_site = 0; |
|
1233 int last_site = 0; |
|
1234 |
|
1235 int ref_row; |
|
1236 int ref_col; |
|
1237 int this_row_offset; |
|
1238 int this_col_offset; |
|
1239 search_site *ss; |
|
1240 |
|
1241 uint8_t *check_here; |
|
1242 unsigned int thissad; |
|
1243 int_mv fcenter_mv; |
|
1244 |
|
1245 int *mvjsadcost = x->nmvjointsadcost; |
|
1246 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1247 |
|
1248 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1249 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1250 |
|
1251 clamp_mv(&ref_mv->as_mv, |
|
1252 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
|
1253 ref_row = ref_mv->as_mv.row; |
|
1254 ref_col = ref_mv->as_mv.col; |
|
1255 *num00 = 0; |
|
1256 best_mv->as_mv.row = ref_row; |
|
1257 best_mv->as_mv.col = ref_col; |
|
1258 |
|
1259 // Work out the start point for the search |
|
1260 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + |
|
1261 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); |
|
1262 best_address = in_what; |
|
1263 |
|
1264 // Check the starting position |
|
1265 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) |
|
1266 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
|
1267 mvjsadcost, mvsadcost, sad_per_bit); |
|
1268 |
|
1269 // search_param determines the length of the initial step and hence the number |
|
1270 // of iterations. |
|
1271 // 0 = initial step (MAX_FIRST_STEP) pel |
|
1272 // 1 = (MAX_FIRST_STEP/2) pel, |
|
1273 // 2 = (MAX_FIRST_STEP/4) pel... |
|
1274 ss = &x->ss[search_param * x->searches_per_step]; |
|
1275 tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
|
1276 |
|
1277 i = 1; |
|
1278 |
|
1279 for (step = 0; step < tot_steps; step++) { |
|
1280 int all_in = 1, t; |
|
1281 |
|
1282 // All_in is true if every one of the points we are checking are within |
|
1283 // the bounds of the image. |
|
1284 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); |
|
1285 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); |
|
1286 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); |
|
1287 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); |
|
1288 |
|
1289 // If all the pixels are within the bounds we don't check whether the |
|
1290 // search point is valid in this loop, otherwise we check each point |
|
1291 // for validity.. |
|
1292 if (all_in) { |
|
1293 unsigned int sad_array[4]; |
|
1294 |
|
1295 for (j = 0; j < x->searches_per_step; j += 4) { |
|
1296 unsigned char const *block_offset[4]; |
|
1297 |
|
1298 for (t = 0; t < 4; t++) |
|
1299 block_offset[t] = ss[i + t].offset + best_address; |
|
1300 |
|
1301 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
|
1302 sad_array); |
|
1303 |
|
1304 for (t = 0; t < 4; t++, i++) { |
|
1305 if (sad_array[t] < bestsad) { |
|
1306 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; |
|
1307 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; |
|
1308 sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1309 mvjsadcost, mvsadcost, sad_per_bit); |
|
1310 |
|
1311 if (sad_array[t] < bestsad) { |
|
1312 bestsad = sad_array[t]; |
|
1313 best_site = i; |
|
1314 } |
|
1315 } |
|
1316 } |
|
1317 } |
|
1318 } else { |
|
1319 for (j = 0; j < x->searches_per_step; j++) { |
|
1320 // Trap illegal vectors |
|
1321 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
|
1322 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
|
1323 |
|
1324 if ((this_col_offset > x->mv_col_min) && |
|
1325 (this_col_offset < x->mv_col_max) && |
|
1326 (this_row_offset > x->mv_row_min) && |
|
1327 (this_row_offset < x->mv_row_max)) { |
|
1328 check_here = ss[i].offset + best_address; |
|
1329 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1330 bestsad); |
|
1331 |
|
1332 if (thissad < bestsad) { |
|
1333 this_mv.as_mv.row = this_row_offset; |
|
1334 this_mv.as_mv.col = this_col_offset; |
|
1335 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1336 mvjsadcost, mvsadcost, sad_per_bit); |
|
1337 |
|
1338 if (thissad < bestsad) { |
|
1339 bestsad = thissad; |
|
1340 best_site = i; |
|
1341 } |
|
1342 } |
|
1343 } |
|
1344 i++; |
|
1345 } |
|
1346 } |
|
1347 if (best_site != last_site) { |
|
1348 best_mv->as_mv.row += ss[best_site].mv.row; |
|
1349 best_mv->as_mv.col += ss[best_site].mv.col; |
|
1350 best_address += ss[best_site].offset; |
|
1351 last_site = best_site; |
|
1352 #if defined(NEW_DIAMOND_SEARCH) |
|
1353 while (1) { |
|
1354 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; |
|
1355 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; |
|
1356 if ((this_col_offset > x->mv_col_min) && |
|
1357 (this_col_offset < x->mv_col_max) && |
|
1358 (this_row_offset > x->mv_row_min) && |
|
1359 (this_row_offset < x->mv_row_max)) { |
|
1360 check_here = ss[best_site].offset + best_address; |
|
1361 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1362 bestsad); |
|
1363 if (thissad < bestsad) { |
|
1364 this_mv.as_mv.row = this_row_offset; |
|
1365 this_mv.as_mv.col = this_col_offset; |
|
1366 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1367 mvjsadcost, mvsadcost, sad_per_bit); |
|
1368 if (thissad < bestsad) { |
|
1369 bestsad = thissad; |
|
1370 best_mv->as_mv.row += ss[best_site].mv.row; |
|
1371 best_mv->as_mv.col += ss[best_site].mv.col; |
|
1372 best_address += ss[best_site].offset; |
|
1373 continue; |
|
1374 } |
|
1375 } |
|
1376 } |
|
1377 break; |
|
1378 }; |
|
1379 #endif |
|
1380 } else if (best_address == in_what) { |
|
1381 (*num00)++; |
|
1382 } |
|
1383 } |
|
1384 |
|
1385 this_mv.as_mv.row = best_mv->as_mv.row * 8; |
|
1386 this_mv.as_mv.col = best_mv->as_mv.col * 8; |
|
1387 |
|
1388 if (bestsad == INT_MAX) |
|
1389 return INT_MAX; |
|
1390 |
|
1391 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
|
1392 (unsigned int *)(&thissad)) + |
|
1393 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1394 mvjcost, mvcost, x->errorperbit); |
|
1395 } |
|
1396 |
|
1397 /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
|
1398 point as the best match, we will do a final 1-away diamond |
|
1399 refining search */ |
|
1400 |
|
1401 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, |
|
1402 int_mv *mvp_full, int step_param, |
|
1403 int sadpb, int further_steps, |
|
1404 int do_refine, vp9_variance_fn_ptr_t *fn_ptr, |
|
1405 int_mv *ref_mv, int_mv *dst_mv) { |
|
1406 int_mv temp_mv; |
|
1407 int thissme, n, num00; |
|
1408 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
|
1409 step_param, sadpb, &num00, |
|
1410 fn_ptr, x->nmvjointcost, |
|
1411 x->mvcost, ref_mv); |
|
1412 dst_mv->as_int = temp_mv.as_int; |
|
1413 |
|
1414 n = num00; |
|
1415 num00 = 0; |
|
1416 |
|
1417 /* If there won't be more n-step search, check to see if refining search is |
|
1418 * needed. */ |
|
1419 if (n > further_steps) |
|
1420 do_refine = 0; |
|
1421 |
|
1422 while (n < further_steps) { |
|
1423 n++; |
|
1424 |
|
1425 if (num00) { |
|
1426 num00--; |
|
1427 } else { |
|
1428 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
|
1429 step_param + n, sadpb, &num00, |
|
1430 fn_ptr, x->nmvjointcost, x->mvcost, |
|
1431 ref_mv); |
|
1432 |
|
1433 /* check to see if refining search is needed. */ |
|
1434 if (num00 > (further_steps - n)) |
|
1435 do_refine = 0; |
|
1436 |
|
1437 if (thissme < bestsme) { |
|
1438 bestsme = thissme; |
|
1439 dst_mv->as_int = temp_mv.as_int; |
|
1440 } |
|
1441 } |
|
1442 } |
|
1443 |
|
1444 /* final 1-away diamond refining search */ |
|
1445 if (do_refine == 1) { |
|
1446 int search_range = 8; |
|
1447 int_mv best_mv; |
|
1448 best_mv.as_int = dst_mv->as_int; |
|
1449 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, |
|
1450 fn_ptr, x->nmvjointcost, x->mvcost, |
|
1451 ref_mv); |
|
1452 |
|
1453 if (thissme < bestsme) { |
|
1454 bestsme = thissme; |
|
1455 dst_mv->as_int = best_mv.as_int; |
|
1456 } |
|
1457 } |
|
1458 return bestsme; |
|
1459 } |
|
1460 |
|
1461 int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, |
|
1462 int sad_per_bit, int distance, |
|
1463 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
|
1464 int *mvcost[2], |
|
1465 int_mv *center_mv, int n) { |
|
1466 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1467 uint8_t *what = x->plane[0].src.buf; |
|
1468 int what_stride = x->plane[0].src.stride; |
|
1469 uint8_t *in_what; |
|
1470 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1471 int mv_stride = xd->plane[0].pre[0].stride; |
|
1472 uint8_t *bestaddress; |
|
1473 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
|
1474 int_mv this_mv; |
|
1475 int bestsad = INT_MAX; |
|
1476 int r, c; |
|
1477 |
|
1478 uint8_t *check_here; |
|
1479 int thissad; |
|
1480 |
|
1481 int ref_row = ref_mv->as_mv.row; |
|
1482 int ref_col = ref_mv->as_mv.col; |
|
1483 |
|
1484 int row_min = ref_row - distance; |
|
1485 int row_max = ref_row + distance; |
|
1486 int col_min = ref_col - distance; |
|
1487 int col_max = ref_col + distance; |
|
1488 int_mv fcenter_mv; |
|
1489 |
|
1490 int *mvjsadcost = x->nmvjointsadcost; |
|
1491 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1492 |
|
1493 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1494 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1495 |
|
1496 // Work out the mid point for the search |
|
1497 in_what = xd->plane[0].pre[0].buf; |
|
1498 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
|
1499 |
|
1500 best_mv->as_mv.row = ref_row; |
|
1501 best_mv->as_mv.col = ref_col; |
|
1502 |
|
1503 // Baseline value at the centre |
|
1504 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, |
|
1505 in_what_stride, 0x7fffffff) |
|
1506 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
|
1507 mvjsadcost, mvsadcost, sad_per_bit); |
|
1508 |
|
1509 // Apply further limits to prevent us looking using vectors that stretch |
|
1510 // beyond the UMV border |
|
1511 col_min = MAX(col_min, x->mv_col_min); |
|
1512 col_max = MIN(col_max, x->mv_col_max); |
|
1513 row_min = MAX(row_min, x->mv_row_min); |
|
1514 row_max = MIN(row_max, x->mv_row_max); |
|
1515 |
|
1516 for (r = row_min; r < row_max; r++) { |
|
1517 this_mv.as_mv.row = r; |
|
1518 check_here = r * mv_stride + in_what + col_min; |
|
1519 |
|
1520 for (c = col_min; c < col_max; c++) { |
|
1521 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1522 bestsad); |
|
1523 |
|
1524 this_mv.as_mv.col = c; |
|
1525 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1526 mvjsadcost, mvsadcost, sad_per_bit); |
|
1527 |
|
1528 if (thissad < bestsad) { |
|
1529 bestsad = thissad; |
|
1530 best_mv->as_mv.row = r; |
|
1531 best_mv->as_mv.col = c; |
|
1532 bestaddress = check_here; |
|
1533 } |
|
1534 |
|
1535 check_here++; |
|
1536 } |
|
1537 } |
|
1538 |
|
1539 this_mv.as_mv.row = best_mv->as_mv.row * 8; |
|
1540 this_mv.as_mv.col = best_mv->as_mv.col * 8; |
|
1541 |
|
1542 if (bestsad < INT_MAX) |
|
1543 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
|
1544 (unsigned int *)(&thissad)) + |
|
1545 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1546 mvjcost, mvcost, x->errorperbit); |
|
1547 else |
|
1548 return INT_MAX; |
|
1549 } |
|
1550 |
|
1551 int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, |
|
1552 int sad_per_bit, int distance, |
|
1553 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
|
1554 int *mvcost[2], int_mv *center_mv, int n) { |
|
1555 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1556 uint8_t *what = x->plane[0].src.buf; |
|
1557 int what_stride = x->plane[0].src.stride; |
|
1558 uint8_t *in_what; |
|
1559 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1560 int mv_stride = xd->plane[0].pre[0].stride; |
|
1561 uint8_t *bestaddress; |
|
1562 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
|
1563 int_mv this_mv; |
|
1564 unsigned int bestsad = INT_MAX; |
|
1565 int r, c; |
|
1566 |
|
1567 uint8_t *check_here; |
|
1568 unsigned int thissad; |
|
1569 |
|
1570 int ref_row = ref_mv->as_mv.row; |
|
1571 int ref_col = ref_mv->as_mv.col; |
|
1572 |
|
1573 int row_min = ref_row - distance; |
|
1574 int row_max = ref_row + distance; |
|
1575 int col_min = ref_col - distance; |
|
1576 int col_max = ref_col + distance; |
|
1577 |
|
1578 unsigned int sad_array[3]; |
|
1579 int_mv fcenter_mv; |
|
1580 |
|
1581 int *mvjsadcost = x->nmvjointsadcost; |
|
1582 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1583 |
|
1584 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1585 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1586 |
|
1587 // Work out the mid point for the search |
|
1588 in_what = xd->plane[0].pre[0].buf; |
|
1589 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
|
1590 |
|
1591 best_mv->as_mv.row = ref_row; |
|
1592 best_mv->as_mv.col = ref_col; |
|
1593 |
|
1594 // Baseline value at the centre |
|
1595 bestsad = fn_ptr->sdf(what, what_stride, |
|
1596 bestaddress, in_what_stride, 0x7fffffff) |
|
1597 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
|
1598 mvjsadcost, mvsadcost, sad_per_bit); |
|
1599 |
|
1600 // Apply further limits to prevent us looking using vectors that stretch |
|
1601 // beyond the UMV border |
|
1602 col_min = MAX(col_min, x->mv_col_min); |
|
1603 col_max = MIN(col_max, x->mv_col_max); |
|
1604 row_min = MAX(row_min, x->mv_row_min); |
|
1605 row_max = MIN(row_max, x->mv_row_max); |
|
1606 |
|
1607 for (r = row_min; r < row_max; r++) { |
|
1608 this_mv.as_mv.row = r; |
|
1609 check_here = r * mv_stride + in_what + col_min; |
|
1610 c = col_min; |
|
1611 |
|
1612 while ((c + 2) < col_max) { |
|
1613 int i; |
|
1614 |
|
1615 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
|
1616 |
|
1617 for (i = 0; i < 3; i++) { |
|
1618 thissad = sad_array[i]; |
|
1619 |
|
1620 if (thissad < bestsad) { |
|
1621 this_mv.as_mv.col = c; |
|
1622 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1623 mvjsadcost, mvsadcost, sad_per_bit); |
|
1624 |
|
1625 if (thissad < bestsad) { |
|
1626 bestsad = thissad; |
|
1627 best_mv->as_mv.row = r; |
|
1628 best_mv->as_mv.col = c; |
|
1629 bestaddress = check_here; |
|
1630 } |
|
1631 } |
|
1632 |
|
1633 check_here++; |
|
1634 c++; |
|
1635 } |
|
1636 } |
|
1637 |
|
1638 while (c < col_max) { |
|
1639 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1640 bestsad); |
|
1641 |
|
1642 if (thissad < bestsad) { |
|
1643 this_mv.as_mv.col = c; |
|
1644 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1645 mvjsadcost, mvsadcost, sad_per_bit); |
|
1646 |
|
1647 if (thissad < bestsad) { |
|
1648 bestsad = thissad; |
|
1649 best_mv->as_mv.row = r; |
|
1650 best_mv->as_mv.col = c; |
|
1651 bestaddress = check_here; |
|
1652 } |
|
1653 } |
|
1654 |
|
1655 check_here++; |
|
1656 c++; |
|
1657 } |
|
1658 } |
|
1659 |
|
1660 this_mv.as_mv.row = best_mv->as_mv.row * 8; |
|
1661 this_mv.as_mv.col = best_mv->as_mv.col * 8; |
|
1662 |
|
1663 if (bestsad < INT_MAX) |
|
1664 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
|
1665 (unsigned int *)(&thissad)) + |
|
1666 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1667 mvjcost, mvcost, x->errorperbit); |
|
1668 else |
|
1669 return INT_MAX; |
|
1670 } |
|
1671 |
|
1672 int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, |
|
1673 int sad_per_bit, int distance, |
|
1674 vp9_variance_fn_ptr_t *fn_ptr, |
|
1675 int *mvjcost, int *mvcost[2], |
|
1676 int_mv *center_mv, int n) { |
|
1677 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1678 uint8_t *what = x->plane[0].src.buf; |
|
1679 int what_stride = x->plane[0].src.stride; |
|
1680 uint8_t *in_what; |
|
1681 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1682 int mv_stride = xd->plane[0].pre[0].stride; |
|
1683 uint8_t *bestaddress; |
|
1684 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
|
1685 int_mv this_mv; |
|
1686 unsigned int bestsad = INT_MAX; |
|
1687 int r, c; |
|
1688 |
|
1689 uint8_t *check_here; |
|
1690 unsigned int thissad; |
|
1691 |
|
1692 int ref_row = ref_mv->as_mv.row; |
|
1693 int ref_col = ref_mv->as_mv.col; |
|
1694 |
|
1695 int row_min = ref_row - distance; |
|
1696 int row_max = ref_row + distance; |
|
1697 int col_min = ref_col - distance; |
|
1698 int col_max = ref_col + distance; |
|
1699 |
|
1700 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); |
|
1701 unsigned int sad_array[3]; |
|
1702 int_mv fcenter_mv; |
|
1703 |
|
1704 int *mvjsadcost = x->nmvjointsadcost; |
|
1705 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1706 |
|
1707 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1708 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1709 |
|
1710 // Work out the mid point for the search |
|
1711 in_what = xd->plane[0].pre[0].buf; |
|
1712 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
|
1713 |
|
1714 best_mv->as_mv.row = ref_row; |
|
1715 best_mv->as_mv.col = ref_col; |
|
1716 |
|
1717 // Baseline value at the centre |
|
1718 bestsad = fn_ptr->sdf(what, what_stride, |
|
1719 bestaddress, in_what_stride, 0x7fffffff) |
|
1720 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
|
1721 mvjsadcost, mvsadcost, sad_per_bit); |
|
1722 |
|
1723 // Apply further limits to prevent us looking using vectors that stretch |
|
1724 // beyond the UMV border |
|
1725 col_min = MAX(col_min, x->mv_col_min); |
|
1726 col_max = MIN(col_max, x->mv_col_max); |
|
1727 row_min = MAX(row_min, x->mv_row_min); |
|
1728 row_max = MIN(row_max, x->mv_row_max); |
|
1729 |
|
1730 for (r = row_min; r < row_max; r++) { |
|
1731 this_mv.as_mv.row = r; |
|
1732 check_here = r * mv_stride + in_what + col_min; |
|
1733 c = col_min; |
|
1734 |
|
1735 while ((c + 7) < col_max) { |
|
1736 int i; |
|
1737 |
|
1738 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); |
|
1739 |
|
1740 for (i = 0; i < 8; i++) { |
|
1741 thissad = (unsigned int)sad_array8[i]; |
|
1742 |
|
1743 if (thissad < bestsad) { |
|
1744 this_mv.as_mv.col = c; |
|
1745 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1746 mvjsadcost, mvsadcost, sad_per_bit); |
|
1747 |
|
1748 if (thissad < bestsad) { |
|
1749 bestsad = thissad; |
|
1750 best_mv->as_mv.row = r; |
|
1751 best_mv->as_mv.col = c; |
|
1752 bestaddress = check_here; |
|
1753 } |
|
1754 } |
|
1755 |
|
1756 check_here++; |
|
1757 c++; |
|
1758 } |
|
1759 } |
|
1760 |
|
1761 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { |
|
1762 int i; |
|
1763 |
|
1764 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
|
1765 |
|
1766 for (i = 0; i < 3; i++) { |
|
1767 thissad = sad_array[i]; |
|
1768 |
|
1769 if (thissad < bestsad) { |
|
1770 this_mv.as_mv.col = c; |
|
1771 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1772 mvjsadcost, mvsadcost, sad_per_bit); |
|
1773 |
|
1774 if (thissad < bestsad) { |
|
1775 bestsad = thissad; |
|
1776 best_mv->as_mv.row = r; |
|
1777 best_mv->as_mv.col = c; |
|
1778 bestaddress = check_here; |
|
1779 } |
|
1780 } |
|
1781 |
|
1782 check_here++; |
|
1783 c++; |
|
1784 } |
|
1785 } |
|
1786 |
|
1787 while (c < col_max) { |
|
1788 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1789 bestsad); |
|
1790 |
|
1791 if (thissad < bestsad) { |
|
1792 this_mv.as_mv.col = c; |
|
1793 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1794 mvjsadcost, mvsadcost, sad_per_bit); |
|
1795 |
|
1796 if (thissad < bestsad) { |
|
1797 bestsad = thissad; |
|
1798 best_mv->as_mv.row = r; |
|
1799 best_mv->as_mv.col = c; |
|
1800 bestaddress = check_here; |
|
1801 } |
|
1802 } |
|
1803 |
|
1804 check_here++; |
|
1805 c++; |
|
1806 } |
|
1807 } |
|
1808 |
|
1809 this_mv.as_mv.row = best_mv->as_mv.row * 8; |
|
1810 this_mv.as_mv.col = best_mv->as_mv.col * 8; |
|
1811 |
|
1812 if (bestsad < INT_MAX) |
|
1813 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
|
1814 (unsigned int *)(&thissad)) + |
|
1815 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1816 mvjcost, mvcost, x->errorperbit); |
|
1817 else |
|
1818 return INT_MAX; |
|
1819 } |
|
1820 int vp9_refining_search_sad_c(MACROBLOCK *x, |
|
1821 int_mv *ref_mv, int error_per_bit, |
|
1822 int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
|
1823 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
|
1824 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1825 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
|
1826 int i, j; |
|
1827 int this_row_offset, this_col_offset; |
|
1828 |
|
1829 int what_stride = x->plane[0].src.stride; |
|
1830 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1831 uint8_t *what = x->plane[0].src.buf; |
|
1832 uint8_t *best_address = xd->plane[0].pre[0].buf + |
|
1833 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
|
1834 ref_mv->as_mv.col; |
|
1835 uint8_t *check_here; |
|
1836 unsigned int thissad; |
|
1837 int_mv this_mv; |
|
1838 unsigned int bestsad = INT_MAX; |
|
1839 int_mv fcenter_mv; |
|
1840 |
|
1841 int *mvjsadcost = x->nmvjointsadcost; |
|
1842 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1843 |
|
1844 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1845 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1846 |
|
1847 bestsad = fn_ptr->sdf(what, what_stride, best_address, |
|
1848 in_what_stride, 0x7fffffff) + |
|
1849 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
|
1850 mvjsadcost, mvsadcost, error_per_bit); |
|
1851 |
|
1852 for (i = 0; i < search_range; i++) { |
|
1853 int best_site = -1; |
|
1854 |
|
1855 for (j = 0; j < 4; j++) { |
|
1856 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
|
1857 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
|
1858 |
|
1859 if ((this_col_offset > x->mv_col_min) && |
|
1860 (this_col_offset < x->mv_col_max) && |
|
1861 (this_row_offset > x->mv_row_min) && |
|
1862 (this_row_offset < x->mv_row_max)) { |
|
1863 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
|
1864 best_address; |
|
1865 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1866 bestsad); |
|
1867 |
|
1868 if (thissad < bestsad) { |
|
1869 this_mv.as_mv.row = this_row_offset; |
|
1870 this_mv.as_mv.col = this_col_offset; |
|
1871 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1872 mvjsadcost, mvsadcost, error_per_bit); |
|
1873 |
|
1874 if (thissad < bestsad) { |
|
1875 bestsad = thissad; |
|
1876 best_site = j; |
|
1877 } |
|
1878 } |
|
1879 } |
|
1880 } |
|
1881 |
|
1882 if (best_site == -1) { |
|
1883 break; |
|
1884 } else { |
|
1885 ref_mv->as_mv.row += neighbors[best_site].row; |
|
1886 ref_mv->as_mv.col += neighbors[best_site].col; |
|
1887 best_address += (neighbors[best_site].row) * in_what_stride + |
|
1888 neighbors[best_site].col; |
|
1889 } |
|
1890 } |
|
1891 |
|
1892 this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
|
1893 this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
|
1894 |
|
1895 if (bestsad < INT_MAX) |
|
1896 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
|
1897 (unsigned int *)(&thissad)) + |
|
1898 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
1899 mvjcost, mvcost, x->errorperbit); |
|
1900 else |
|
1901 return INT_MAX; |
|
1902 } |
|
1903 |
|
1904 int vp9_refining_search_sadx4(MACROBLOCK *x, |
|
1905 int_mv *ref_mv, int error_per_bit, |
|
1906 int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
|
1907 int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
|
1908 const MACROBLOCKD* const xd = &x->e_mbd; |
|
1909 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
|
1910 int i, j; |
|
1911 int this_row_offset, this_col_offset; |
|
1912 |
|
1913 int what_stride = x->plane[0].src.stride; |
|
1914 int in_what_stride = xd->plane[0].pre[0].stride; |
|
1915 uint8_t *what = x->plane[0].src.buf; |
|
1916 uint8_t *best_address = xd->plane[0].pre[0].buf + |
|
1917 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
|
1918 ref_mv->as_mv.col; |
|
1919 uint8_t *check_here; |
|
1920 unsigned int thissad; |
|
1921 int_mv this_mv; |
|
1922 unsigned int bestsad = INT_MAX; |
|
1923 int_mv fcenter_mv; |
|
1924 |
|
1925 int *mvjsadcost = x->nmvjointsadcost; |
|
1926 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
1927 |
|
1928 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
1929 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
1930 |
|
1931 bestsad = fn_ptr->sdf(what, what_stride, best_address, |
|
1932 in_what_stride, 0x7fffffff) + |
|
1933 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
|
1934 mvjsadcost, mvsadcost, error_per_bit); |
|
1935 |
|
1936 for (i = 0; i < search_range; i++) { |
|
1937 int best_site = -1; |
|
1938 int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & |
|
1939 ((ref_mv->as_mv.row + 1) < x->mv_row_max) & |
|
1940 ((ref_mv->as_mv.col - 1) > x->mv_col_min) & |
|
1941 ((ref_mv->as_mv.col + 1) < x->mv_col_max); |
|
1942 |
|
1943 if (all_in) { |
|
1944 unsigned int sad_array[4]; |
|
1945 unsigned char const *block_offset[4]; |
|
1946 block_offset[0] = best_address - in_what_stride; |
|
1947 block_offset[1] = best_address - 1; |
|
1948 block_offset[2] = best_address + 1; |
|
1949 block_offset[3] = best_address + in_what_stride; |
|
1950 |
|
1951 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
|
1952 sad_array); |
|
1953 |
|
1954 for (j = 0; j < 4; j++) { |
|
1955 if (sad_array[j] < bestsad) { |
|
1956 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; |
|
1957 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; |
|
1958 sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1959 mvjsadcost, mvsadcost, error_per_bit); |
|
1960 |
|
1961 if (sad_array[j] < bestsad) { |
|
1962 bestsad = sad_array[j]; |
|
1963 best_site = j; |
|
1964 } |
|
1965 } |
|
1966 } |
|
1967 } else { |
|
1968 for (j = 0; j < 4; j++) { |
|
1969 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
|
1970 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
|
1971 |
|
1972 if ((this_col_offset > x->mv_col_min) && |
|
1973 (this_col_offset < x->mv_col_max) && |
|
1974 (this_row_offset > x->mv_row_min) && |
|
1975 (this_row_offset < x->mv_row_max)) { |
|
1976 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
|
1977 best_address; |
|
1978 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
|
1979 bestsad); |
|
1980 |
|
1981 if (thissad < bestsad) { |
|
1982 this_mv.as_mv.row = this_row_offset; |
|
1983 this_mv.as_mv.col = this_col_offset; |
|
1984 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
1985 mvjsadcost, mvsadcost, error_per_bit); |
|
1986 |
|
1987 if (thissad < bestsad) { |
|
1988 bestsad = thissad; |
|
1989 best_site = j; |
|
1990 } |
|
1991 } |
|
1992 } |
|
1993 } |
|
1994 } |
|
1995 |
|
1996 if (best_site == -1) { |
|
1997 break; |
|
1998 } else { |
|
1999 ref_mv->as_mv.row += neighbors[best_site].row; |
|
2000 ref_mv->as_mv.col += neighbors[best_site].col; |
|
2001 best_address += (neighbors[best_site].row) * in_what_stride + |
|
2002 neighbors[best_site].col; |
|
2003 } |
|
2004 } |
|
2005 |
|
2006 this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
|
2007 this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
|
2008 |
|
2009 if (bestsad < INT_MAX) |
|
2010 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
|
2011 (unsigned int *)(&thissad)) + |
|
2012 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
2013 mvjcost, mvcost, x->errorperbit); |
|
2014 else |
|
2015 return INT_MAX; |
|
2016 } |
|
2017 |
|
2018 /* This function is called when we do joint motion search in comp_inter_inter |
|
2019 * mode. |
|
2020 */ |
|
2021 int vp9_refining_search_8p_c(MACROBLOCK *x, |
|
2022 int_mv *ref_mv, int error_per_bit, |
|
2023 int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
|
2024 int *mvjcost, int *mvcost[2], int_mv *center_mv, |
|
2025 const uint8_t *second_pred, int w, int h) { |
|
2026 const MACROBLOCKD* const xd = &x->e_mbd; |
|
2027 MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, |
|
2028 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; |
|
2029 int i, j; |
|
2030 int this_row_offset, this_col_offset; |
|
2031 |
|
2032 int what_stride = x->plane[0].src.stride; |
|
2033 int in_what_stride = xd->plane[0].pre[0].stride; |
|
2034 uint8_t *what = x->plane[0].src.buf; |
|
2035 uint8_t *best_address = xd->plane[0].pre[0].buf + |
|
2036 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
|
2037 ref_mv->as_mv.col; |
|
2038 uint8_t *check_here; |
|
2039 unsigned int thissad; |
|
2040 int_mv this_mv; |
|
2041 unsigned int bestsad = INT_MAX; |
|
2042 int_mv fcenter_mv; |
|
2043 |
|
2044 int *mvjsadcost = x->nmvjointsadcost; |
|
2045 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
|
2046 |
|
2047 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
|
2048 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
|
2049 |
|
2050 /* Get compound pred by averaging two pred blocks. */ |
|
2051 bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, |
|
2052 second_pred, 0x7fffffff) + |
|
2053 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
|
2054 mvjsadcost, mvsadcost, error_per_bit); |
|
2055 |
|
2056 for (i = 0; i < search_range; i++) { |
|
2057 int best_site = -1; |
|
2058 |
|
2059 for (j = 0; j < 8; j++) { |
|
2060 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
|
2061 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
|
2062 |
|
2063 if ((this_col_offset > x->mv_col_min) && |
|
2064 (this_col_offset < x->mv_col_max) && |
|
2065 (this_row_offset > x->mv_row_min) && |
|
2066 (this_row_offset < x->mv_row_max)) { |
|
2067 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
|
2068 best_address; |
|
2069 |
|
2070 /* Get compound block and use it to calculate SAD. */ |
|
2071 thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, |
|
2072 second_pred, bestsad); |
|
2073 |
|
2074 if (thissad < bestsad) { |
|
2075 this_mv.as_mv.row = this_row_offset; |
|
2076 this_mv.as_mv.col = this_col_offset; |
|
2077 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
|
2078 mvjsadcost, mvsadcost, error_per_bit); |
|
2079 if (thissad < bestsad) { |
|
2080 bestsad = thissad; |
|
2081 best_site = j; |
|
2082 } |
|
2083 } |
|
2084 } |
|
2085 } |
|
2086 |
|
2087 if (best_site == -1) { |
|
2088 break; |
|
2089 } else { |
|
2090 ref_mv->as_mv.row += neighbors[best_site].row; |
|
2091 ref_mv->as_mv.col += neighbors[best_site].col; |
|
2092 best_address += (neighbors[best_site].row) * in_what_stride + |
|
2093 neighbors[best_site].col; |
|
2094 } |
|
2095 } |
|
2096 |
|
2097 this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
|
2098 this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
|
2099 |
|
2100 if (bestsad < INT_MAX) { |
|
2101 // FIXME(rbultje, yunqing): add full-pixel averaging variance functions |
|
2102 // so we don't have to use the subpixel with xoff=0,yoff=0 here. |
|
2103 return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, |
|
2104 (unsigned int *)(&thissad), second_pred) + |
|
2105 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
|
2106 mvjcost, mvcost, x->errorperbit); |
|
2107 } else { |
|
2108 return INT_MAX; |
|
2109 } |
|
2110 } |