Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include <limits.h> |
michael@0 | 12 | #include <math.h> |
michael@0 | 13 | #include <stdio.h> |
michael@0 | 14 | |
michael@0 | 15 | #include "./vpx_config.h" |
michael@0 | 16 | |
michael@0 | 17 | #include "vpx_mem/vpx_mem.h" |
michael@0 | 18 | |
michael@0 | 19 | #include "vp9/common/vp9_findnearmv.h" |
michael@0 | 20 | #include "vp9/common/vp9_common.h" |
michael@0 | 21 | |
michael@0 | 22 | #include "vp9/encoder/vp9_onyx_int.h" |
michael@0 | 23 | #include "vp9/encoder/vp9_mcomp.h" |
michael@0 | 24 | |
michael@0 | 25 | // #define NEW_DIAMOND_SEARCH |
michael@0 | 26 | |
michael@0 | 27 | void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) { |
michael@0 | 28 | const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); |
michael@0 | 29 | const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); |
michael@0 | 30 | const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; |
michael@0 | 31 | const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; |
michael@0 | 32 | |
michael@0 | 33 | // Get intersection of UMV window and valid MV window to reduce # of checks |
michael@0 | 34 | // in diamond search. |
michael@0 | 35 | if (x->mv_col_min < col_min) |
michael@0 | 36 | x->mv_col_min = col_min; |
michael@0 | 37 | if (x->mv_col_max > col_max) |
michael@0 | 38 | x->mv_col_max = col_max; |
michael@0 | 39 | if (x->mv_row_min < row_min) |
michael@0 | 40 | x->mv_row_min = row_min; |
michael@0 | 41 | if (x->mv_row_max > row_max) |
michael@0 | 42 | x->mv_row_max = row_max; |
michael@0 | 43 | } |
michael@0 | 44 | |
michael@0 | 45 | int vp9_init_search_range(VP9_COMP *cpi, int size) { |
michael@0 | 46 | int sr = 0; |
michael@0 | 47 | |
michael@0 | 48 | // Minimum search size no matter what the passed in value. |
michael@0 | 49 | size = MAX(16, size); |
michael@0 | 50 | |
michael@0 | 51 | while ((size << sr) < MAX_FULL_PEL_VAL) |
michael@0 | 52 | sr++; |
michael@0 | 53 | |
michael@0 | 54 | if (sr) |
michael@0 | 55 | sr--; |
michael@0 | 56 | |
michael@0 | 57 | sr += cpi->sf.reduce_first_step_size; |
michael@0 | 58 | sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); |
michael@0 | 59 | return sr; |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | static INLINE int mv_cost(const MV *mv, |
michael@0 | 63 | const int *joint_cost, int *comp_cost[2]) { |
michael@0 | 64 | return joint_cost[vp9_get_mv_joint(mv)] + |
michael@0 | 65 | comp_cost[0][mv->row] + comp_cost[1][mv->col]; |
michael@0 | 66 | } |
michael@0 | 67 | |
michael@0 | 68 | int vp9_mv_bit_cost(const MV *mv, const MV *ref, |
michael@0 | 69 | const int *mvjcost, int *mvcost[2], int weight) { |
michael@0 | 70 | const MV diff = { mv->row - ref->row, |
michael@0 | 71 | mv->col - ref->col }; |
michael@0 | 72 | return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); |
michael@0 | 73 | } |
michael@0 | 74 | |
michael@0 | 75 | static int mv_err_cost(const MV *mv, const MV *ref, |
michael@0 | 76 | const int *mvjcost, int *mvcost[2], |
michael@0 | 77 | int error_per_bit) { |
michael@0 | 78 | if (mvcost) { |
michael@0 | 79 | const MV diff = { mv->row - ref->row, |
michael@0 | 80 | mv->col - ref->col }; |
michael@0 | 81 | return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * |
michael@0 | 82 | error_per_bit, 13); |
michael@0 | 83 | } |
michael@0 | 84 | return 0; |
michael@0 | 85 | } |
michael@0 | 86 | |
michael@0 | 87 | static int mvsad_err_cost(const MV *mv, const MV *ref, |
michael@0 | 88 | const int *mvjsadcost, int *mvsadcost[2], |
michael@0 | 89 | int error_per_bit) { |
michael@0 | 90 | if (mvsadcost) { |
michael@0 | 91 | const MV diff = { mv->row - ref->row, |
michael@0 | 92 | mv->col - ref->col }; |
michael@0 | 93 | return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * |
michael@0 | 94 | error_per_bit, 8); |
michael@0 | 95 | } |
michael@0 | 96 | return 0; |
michael@0 | 97 | } |
michael@0 | 98 | |
michael@0 | 99 | void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
michael@0 | 100 | int len; |
michael@0 | 101 | int search_site_count = 0; |
michael@0 | 102 | |
michael@0 | 103 | // Generate offsets for 4 search sites per step. |
michael@0 | 104 | x->ss[search_site_count].mv.col = 0; |
michael@0 | 105 | x->ss[search_site_count].mv.row = 0; |
michael@0 | 106 | x->ss[search_site_count].offset = 0; |
michael@0 | 107 | search_site_count++; |
michael@0 | 108 | |
michael@0 | 109 | for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
michael@0 | 110 | // Compute offsets for search sites. |
michael@0 | 111 | x->ss[search_site_count].mv.col = 0; |
michael@0 | 112 | x->ss[search_site_count].mv.row = -len; |
michael@0 | 113 | x->ss[search_site_count].offset = -len * stride; |
michael@0 | 114 | search_site_count++; |
michael@0 | 115 | |
michael@0 | 116 | // Compute offsets for search sites. |
michael@0 | 117 | x->ss[search_site_count].mv.col = 0; |
michael@0 | 118 | x->ss[search_site_count].mv.row = len; |
michael@0 | 119 | x->ss[search_site_count].offset = len * stride; |
michael@0 | 120 | search_site_count++; |
michael@0 | 121 | |
michael@0 | 122 | // Compute offsets for search sites. |
michael@0 | 123 | x->ss[search_site_count].mv.col = -len; |
michael@0 | 124 | x->ss[search_site_count].mv.row = 0; |
michael@0 | 125 | x->ss[search_site_count].offset = -len; |
michael@0 | 126 | search_site_count++; |
michael@0 | 127 | |
michael@0 | 128 | // Compute offsets for search sites. |
michael@0 | 129 | x->ss[search_site_count].mv.col = len; |
michael@0 | 130 | x->ss[search_site_count].mv.row = 0; |
michael@0 | 131 | x->ss[search_site_count].offset = len; |
michael@0 | 132 | search_site_count++; |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | x->ss_count = search_site_count; |
michael@0 | 136 | x->searches_per_step = 4; |
michael@0 | 137 | } |
michael@0 | 138 | |
michael@0 | 139 | void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { |
michael@0 | 140 | int len, ss_count = 1; |
michael@0 | 141 | |
michael@0 | 142 | x->ss[0].mv.col = x->ss[0].mv.row = 0; |
michael@0 | 143 | x->ss[0].offset = 0; |
michael@0 | 144 | |
michael@0 | 145 | for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
michael@0 | 146 | // Generate offsets for 8 search sites per step. |
michael@0 | 147 | const MV ss_mvs[8] = { |
michael@0 | 148 | {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, |
michael@0 | 149 | {-len, -len}, {-len, len}, {len, -len}, {len, len} |
michael@0 | 150 | }; |
michael@0 | 151 | int i; |
michael@0 | 152 | for (i = 0; i < 8; ++i) { |
michael@0 | 153 | search_site *const ss = &x->ss[ss_count++]; |
michael@0 | 154 | ss->mv = ss_mvs[i]; |
michael@0 | 155 | ss->offset = ss->mv.row * stride + ss->mv.col; |
michael@0 | 156 | } |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | x->ss_count = ss_count; |
michael@0 | 160 | x->searches_per_step = 8; |
michael@0 | 161 | } |
michael@0 | 162 | |
michael@0 | 163 | /* |
michael@0 | 164 | * To avoid the penalty for crossing cache-line read, preload the reference |
michael@0 | 165 | * area in a small buffer, which is aligned to make sure there won't be crossing |
michael@0 | 166 | * cache-line read while reading from this buffer. This reduced the cpu |
michael@0 | 167 | * cycles spent on reading ref data in sub-pixel filter functions. |
michael@0 | 168 | * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
michael@0 | 169 | * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
michael@0 | 170 | * could reduce the area. |
michael@0 | 171 | */ |
michael@0 | 172 | |
michael@0 | 173 | /* estimated cost of a motion vector (r,c) */ |
michael@0 | 174 | #define MVC(r, c) \ |
michael@0 | 175 | (mvcost ? \ |
michael@0 | 176 | ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ |
michael@0 | 177 | mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ |
michael@0 | 178 | error_per_bit + 4096) >> 13 : 0) |
michael@0 | 179 | |
michael@0 | 180 | |
michael@0 | 181 | #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset |
michael@0 | 182 | // for svf calc |
michael@0 | 183 | |
michael@0 | 184 | #define IFMVCV(r, c, s, e) \ |
michael@0 | 185 | if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ |
michael@0 | 186 | s \ |
michael@0 | 187 | else \ |
michael@0 | 188 | e; |
michael@0 | 189 | |
michael@0 | 190 | /* pointer to predictor base of a motionvector */ |
michael@0 | 191 | #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) |
michael@0 | 192 | |
michael@0 | 193 | /* returns subpixel variance error function */ |
michael@0 | 194 | #define DIST(r, c) \ |
michael@0 | 195 | vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) |
michael@0 | 196 | |
michael@0 | 197 | /* checks if (r, c) has better score than previous best */ |
michael@0 | 198 | #define CHECK_BETTER(v, r, c) \ |
michael@0 | 199 | IFMVCV(r, c, { \ |
michael@0 | 200 | thismse = (DIST(r, c)); \ |
michael@0 | 201 | if ((v = MVC(r, c) + thismse) < besterr) { \ |
michael@0 | 202 | besterr = v; \ |
michael@0 | 203 | br = r; \ |
michael@0 | 204 | bc = c; \ |
michael@0 | 205 | *distortion = thismse; \ |
michael@0 | 206 | *sse1 = sse; \ |
michael@0 | 207 | } \ |
michael@0 | 208 | }, \ |
michael@0 | 209 | v = INT_MAX;) |
michael@0 | 210 | |
michael@0 | 211 | #define FIRST_LEVEL_CHECKS \ |
michael@0 | 212 | { \ |
michael@0 | 213 | unsigned int left, right, up, down, diag; \ |
michael@0 | 214 | CHECK_BETTER(left, tr, tc - hstep); \ |
michael@0 | 215 | CHECK_BETTER(right, tr, tc + hstep); \ |
michael@0 | 216 | CHECK_BETTER(up, tr - hstep, tc); \ |
michael@0 | 217 | CHECK_BETTER(down, tr + hstep, tc); \ |
michael@0 | 218 | whichdir = (left < right ? 0 : 1) + \ |
michael@0 | 219 | (up < down ? 0 : 2); \ |
michael@0 | 220 | switch (whichdir) { \ |
michael@0 | 221 | case 0: \ |
michael@0 | 222 | CHECK_BETTER(diag, tr - hstep, tc - hstep); \ |
michael@0 | 223 | break; \ |
michael@0 | 224 | case 1: \ |
michael@0 | 225 | CHECK_BETTER(diag, tr - hstep, tc + hstep); \ |
michael@0 | 226 | break; \ |
michael@0 | 227 | case 2: \ |
michael@0 | 228 | CHECK_BETTER(diag, tr + hstep, tc - hstep); \ |
michael@0 | 229 | break; \ |
michael@0 | 230 | case 3: \ |
michael@0 | 231 | CHECK_BETTER(diag, tr + hstep, tc + hstep); \ |
michael@0 | 232 | break; \ |
michael@0 | 233 | } \ |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | #define SECOND_LEVEL_CHECKS \ |
michael@0 | 237 | { \ |
michael@0 | 238 | int kr, kc; \ |
michael@0 | 239 | unsigned int second; \ |
michael@0 | 240 | if (tr != br && tc != bc) { \ |
michael@0 | 241 | kr = br - tr; \ |
michael@0 | 242 | kc = bc - tc; \ |
michael@0 | 243 | CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ |
michael@0 | 244 | CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ |
michael@0 | 245 | } else if (tr == br && tc != bc) { \ |
michael@0 | 246 | kc = bc - tc; \ |
michael@0 | 247 | CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ |
michael@0 | 248 | CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ |
michael@0 | 249 | switch (whichdir) { \ |
michael@0 | 250 | case 0: \ |
michael@0 | 251 | case 1: \ |
michael@0 | 252 | CHECK_BETTER(second, tr + hstep, tc + kc); \ |
michael@0 | 253 | break; \ |
michael@0 | 254 | case 2: \ |
michael@0 | 255 | case 3: \ |
michael@0 | 256 | CHECK_BETTER(second, tr - hstep, tc + kc); \ |
michael@0 | 257 | break; \ |
michael@0 | 258 | } \ |
michael@0 | 259 | } else if (tr != br && tc == bc) { \ |
michael@0 | 260 | kr = br - tr; \ |
michael@0 | 261 | CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ |
michael@0 | 262 | CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ |
michael@0 | 263 | switch (whichdir) { \ |
michael@0 | 264 | case 0: \ |
michael@0 | 265 | case 2: \ |
michael@0 | 266 | CHECK_BETTER(second, tr + kr, tc + hstep); \ |
michael@0 | 267 | break; \ |
michael@0 | 268 | case 1: \ |
michael@0 | 269 | case 3: \ |
michael@0 | 270 | CHECK_BETTER(second, tr + kr, tc - hstep); \ |
michael@0 | 271 | break; \ |
michael@0 | 272 | } \ |
michael@0 | 273 | } \ |
michael@0 | 274 | } |
michael@0 | 275 | |
michael@0 | 276 | int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, |
michael@0 | 277 | MV *bestmv, const MV *ref_mv, |
michael@0 | 278 | int allow_hp, |
michael@0 | 279 | int error_per_bit, |
michael@0 | 280 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 281 | int forced_stop, |
michael@0 | 282 | int iters_per_step, |
michael@0 | 283 | int *mvjcost, int *mvcost[2], |
michael@0 | 284 | int *distortion, |
michael@0 | 285 | unsigned int *sse1) { |
michael@0 | 286 | uint8_t *z = x->plane[0].src.buf; |
michael@0 | 287 | int src_stride = x->plane[0].src.stride; |
michael@0 | 288 | MACROBLOCKD *xd = &x->e_mbd; |
michael@0 | 289 | |
michael@0 | 290 | unsigned int besterr = INT_MAX; |
michael@0 | 291 | unsigned int sse; |
michael@0 | 292 | unsigned int whichdir; |
michael@0 | 293 | unsigned int halfiters = iters_per_step; |
michael@0 | 294 | unsigned int quarteriters = iters_per_step; |
michael@0 | 295 | unsigned int eighthiters = iters_per_step; |
michael@0 | 296 | int thismse; |
michael@0 | 297 | |
michael@0 | 298 | const int y_stride = xd->plane[0].pre[0].stride; |
michael@0 | 299 | const int offset = bestmv->row * y_stride + bestmv->col; |
michael@0 | 300 | uint8_t *y = xd->plane[0].pre[0].buf + offset; |
michael@0 | 301 | |
michael@0 | 302 | int rr = ref_mv->row; |
michael@0 | 303 | int rc = ref_mv->col; |
michael@0 | 304 | int br = bestmv->row * 8; |
michael@0 | 305 | int bc = bestmv->col * 8; |
michael@0 | 306 | int hstep = 4; |
michael@0 | 307 | const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
michael@0 | 308 | const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
michael@0 | 309 | const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
michael@0 | 310 | const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
michael@0 | 311 | |
michael@0 | 312 | int tr = br; |
michael@0 | 313 | int tc = bc; |
michael@0 | 314 | |
michael@0 | 315 | // central mv |
michael@0 | 316 | bestmv->row <<= 3; |
michael@0 | 317 | bestmv->col <<= 3; |
michael@0 | 318 | |
michael@0 | 319 | // calculate central point error |
michael@0 | 320 | besterr = vfp->vf(y, y_stride, z, src_stride, sse1); |
michael@0 | 321 | *distortion = besterr; |
michael@0 | 322 | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
michael@0 | 323 | |
michael@0 | 324 | // TODO(jbb): Each subsequent iteration checks at least one point in |
michael@0 | 325 | // common with the last iteration could be 2 if diagonal is selected. |
michael@0 | 326 | while (halfiters--) { |
michael@0 | 327 | // 1/2 pel |
michael@0 | 328 | FIRST_LEVEL_CHECKS; |
michael@0 | 329 | // no reason to check the same one again. |
michael@0 | 330 | if (tr == br && tc == bc) |
michael@0 | 331 | break; |
michael@0 | 332 | tr = br; |
michael@0 | 333 | tc = bc; |
michael@0 | 334 | } |
michael@0 | 335 | |
michael@0 | 336 | // TODO(yaowu): Each subsequent iteration checks at least one point in common |
michael@0 | 337 | // with the last iteration could be 2 if diagonal is selected. |
michael@0 | 338 | |
michael@0 | 339 | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
michael@0 | 340 | if (forced_stop != 2) { |
michael@0 | 341 | hstep >>= 1; |
michael@0 | 342 | while (quarteriters--) { |
michael@0 | 343 | FIRST_LEVEL_CHECKS; |
michael@0 | 344 | // no reason to check the same one again. |
michael@0 | 345 | if (tr == br && tc == bc) |
michael@0 | 346 | break; |
michael@0 | 347 | tr = br; |
michael@0 | 348 | tc = bc; |
michael@0 | 349 | } |
michael@0 | 350 | } |
michael@0 | 351 | |
michael@0 | 352 | if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
michael@0 | 353 | hstep >>= 1; |
michael@0 | 354 | while (eighthiters--) { |
michael@0 | 355 | FIRST_LEVEL_CHECKS; |
michael@0 | 356 | // no reason to check the same one again. |
michael@0 | 357 | if (tr == br && tc == bc) |
michael@0 | 358 | break; |
michael@0 | 359 | tr = br; |
michael@0 | 360 | tc = bc; |
michael@0 | 361 | } |
michael@0 | 362 | } |
michael@0 | 363 | |
michael@0 | 364 | bestmv->row = br; |
michael@0 | 365 | bestmv->col = bc; |
michael@0 | 366 | |
michael@0 | 367 | if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
michael@0 | 368 | (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
michael@0 | 369 | return INT_MAX; |
michael@0 | 370 | |
michael@0 | 371 | return besterr; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, |
michael@0 | 375 | MV *bestmv, const MV *ref_mv, |
michael@0 | 376 | int allow_hp, |
michael@0 | 377 | int error_per_bit, |
michael@0 | 378 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 379 | int forced_stop, |
michael@0 | 380 | int iters_per_step, |
michael@0 | 381 | int *mvjcost, int *mvcost[2], |
michael@0 | 382 | int *distortion, |
michael@0 | 383 | unsigned int *sse1) { |
michael@0 | 384 | uint8_t *z = x->plane[0].src.buf; |
michael@0 | 385 | const int src_stride = x->plane[0].src.stride; |
michael@0 | 386 | MACROBLOCKD *xd = &x->e_mbd; |
michael@0 | 387 | unsigned int besterr = INT_MAX; |
michael@0 | 388 | unsigned int sse; |
michael@0 | 389 | unsigned int whichdir; |
michael@0 | 390 | int thismse; |
michael@0 | 391 | unsigned int halfiters = iters_per_step; |
michael@0 | 392 | unsigned int quarteriters = iters_per_step; |
michael@0 | 393 | unsigned int eighthiters = iters_per_step; |
michael@0 | 394 | |
michael@0 | 395 | const int y_stride = xd->plane[0].pre[0].stride; |
michael@0 | 396 | const int offset = bestmv->row * y_stride + bestmv->col; |
michael@0 | 397 | uint8_t *y = xd->plane[0].pre[0].buf + offset; |
michael@0 | 398 | |
michael@0 | 399 | int rr = ref_mv->row; |
michael@0 | 400 | int rc = ref_mv->col; |
michael@0 | 401 | int br = bestmv->row * 8; |
michael@0 | 402 | int bc = bestmv->col * 8; |
michael@0 | 403 | int hstep = 4; |
michael@0 | 404 | const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
michael@0 | 405 | const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
michael@0 | 406 | const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
michael@0 | 407 | const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
michael@0 | 408 | |
michael@0 | 409 | int tr = br; |
michael@0 | 410 | int tc = bc; |
michael@0 | 411 | |
michael@0 | 412 | // central mv |
michael@0 | 413 | bestmv->row *= 8; |
michael@0 | 414 | bestmv->col *= 8; |
michael@0 | 415 | |
michael@0 | 416 | // calculate central point error |
michael@0 | 417 | besterr = vfp->vf(y, y_stride, z, src_stride, sse1); |
michael@0 | 418 | *distortion = besterr; |
michael@0 | 419 | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
michael@0 | 420 | |
michael@0 | 421 | // 1/2 pel |
michael@0 | 422 | FIRST_LEVEL_CHECKS; |
michael@0 | 423 | if (halfiters > 1) { |
michael@0 | 424 | SECOND_LEVEL_CHECKS; |
michael@0 | 425 | } |
michael@0 | 426 | tr = br; |
michael@0 | 427 | tc = bc; |
michael@0 | 428 | |
michael@0 | 429 | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
michael@0 | 430 | if (forced_stop != 2) { |
michael@0 | 431 | hstep >>= 1; |
michael@0 | 432 | FIRST_LEVEL_CHECKS; |
michael@0 | 433 | if (quarteriters > 1) { |
michael@0 | 434 | SECOND_LEVEL_CHECKS; |
michael@0 | 435 | } |
michael@0 | 436 | tr = br; |
michael@0 | 437 | tc = bc; |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
michael@0 | 441 | hstep >>= 1; |
michael@0 | 442 | FIRST_LEVEL_CHECKS; |
michael@0 | 443 | if (eighthiters > 1) { |
michael@0 | 444 | SECOND_LEVEL_CHECKS; |
michael@0 | 445 | } |
michael@0 | 446 | tr = br; |
michael@0 | 447 | tc = bc; |
michael@0 | 448 | } |
michael@0 | 449 | |
michael@0 | 450 | bestmv->row = br; |
michael@0 | 451 | bestmv->col = bc; |
michael@0 | 452 | |
michael@0 | 453 | if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
michael@0 | 454 | (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
michael@0 | 455 | return INT_MAX; |
michael@0 | 456 | |
michael@0 | 457 | return besterr; |
michael@0 | 458 | } |
michael@0 | 459 | |
michael@0 | 460 | #undef DIST |
michael@0 | 461 | /* returns subpixel variance error function */ |
michael@0 | 462 | #define DIST(r, c) \ |
michael@0 | 463 | vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ |
michael@0 | 464 | z, src_stride, &sse, second_pred) |
michael@0 | 465 | |
michael@0 | 466 | int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, |
michael@0 | 467 | MV *bestmv, const MV *ref_mv, |
michael@0 | 468 | int allow_hp, |
michael@0 | 469 | int error_per_bit, |
michael@0 | 470 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 471 | int forced_stop, |
michael@0 | 472 | int iters_per_step, |
michael@0 | 473 | int *mvjcost, int *mvcost[2], |
michael@0 | 474 | int *distortion, |
michael@0 | 475 | unsigned int *sse1, |
michael@0 | 476 | const uint8_t *second_pred, |
michael@0 | 477 | int w, int h) { |
michael@0 | 478 | uint8_t *const z = x->plane[0].src.buf; |
michael@0 | 479 | const int src_stride = x->plane[0].src.stride; |
michael@0 | 480 | MACROBLOCKD *const xd = &x->e_mbd; |
michael@0 | 481 | |
michael@0 | 482 | unsigned int besterr = INT_MAX; |
michael@0 | 483 | unsigned int sse; |
michael@0 | 484 | unsigned int whichdir; |
michael@0 | 485 | unsigned int halfiters = iters_per_step; |
michael@0 | 486 | unsigned int quarteriters = iters_per_step; |
michael@0 | 487 | unsigned int eighthiters = iters_per_step; |
michael@0 | 488 | int thismse; |
michael@0 | 489 | |
michael@0 | 490 | DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
michael@0 | 491 | const int y_stride = xd->plane[0].pre[0].stride; |
michael@0 | 492 | const int offset = bestmv->row * y_stride + bestmv->col; |
michael@0 | 493 | uint8_t *const y = xd->plane[0].pre[0].buf + offset; |
michael@0 | 494 | |
michael@0 | 495 | int rr = ref_mv->row; |
michael@0 | 496 | int rc = ref_mv->col; |
michael@0 | 497 | int br = bestmv->row * 8; |
michael@0 | 498 | int bc = bestmv->col * 8; |
michael@0 | 499 | int hstep = 4; |
michael@0 | 500 | const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
michael@0 | 501 | const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
michael@0 | 502 | const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
michael@0 | 503 | const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
michael@0 | 504 | |
michael@0 | 505 | int tr = br; |
michael@0 | 506 | int tc = bc; |
michael@0 | 507 | |
michael@0 | 508 | // central mv |
michael@0 | 509 | bestmv->row *= 8; |
michael@0 | 510 | bestmv->col *= 8; |
michael@0 | 511 | |
michael@0 | 512 | // calculate central point error |
michael@0 | 513 | // TODO(yunqingwang): central pointer error was already calculated in full- |
michael@0 | 514 | // pixel search, and can be passed in this function. |
michael@0 | 515 | comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); |
michael@0 | 516 | besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
michael@0 | 517 | *distortion = besterr; |
michael@0 | 518 | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
michael@0 | 519 | |
michael@0 | 520 | // Each subsequent iteration checks at least one point in |
michael@0 | 521 | // common with the last iteration could be 2 ( if diag selected) |
michael@0 | 522 | while (halfiters--) { |
michael@0 | 523 | // 1/2 pel |
michael@0 | 524 | FIRST_LEVEL_CHECKS; |
michael@0 | 525 | // no reason to check the same one again. |
michael@0 | 526 | if (tr == br && tc == bc) |
michael@0 | 527 | break; |
michael@0 | 528 | tr = br; |
michael@0 | 529 | tc = bc; |
michael@0 | 530 | } |
michael@0 | 531 | |
michael@0 | 532 | // Each subsequent iteration checks at least one point in common with |
michael@0 | 533 | // the last iteration could be 2 ( if diag selected) 1/4 pel |
michael@0 | 534 | |
michael@0 | 535 | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
michael@0 | 536 | if (forced_stop != 2) { |
michael@0 | 537 | hstep >>= 1; |
michael@0 | 538 | while (quarteriters--) { |
michael@0 | 539 | FIRST_LEVEL_CHECKS; |
michael@0 | 540 | // no reason to check the same one again. |
michael@0 | 541 | if (tr == br && tc == bc) |
michael@0 | 542 | break; |
michael@0 | 543 | tr = br; |
michael@0 | 544 | tc = bc; |
michael@0 | 545 | } |
michael@0 | 546 | } |
michael@0 | 547 | |
michael@0 | 548 | if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
michael@0 | 549 | hstep >>= 1; |
michael@0 | 550 | while (eighthiters--) { |
michael@0 | 551 | FIRST_LEVEL_CHECKS; |
michael@0 | 552 | // no reason to check the same one again. |
michael@0 | 553 | if (tr == br && tc == bc) |
michael@0 | 554 | break; |
michael@0 | 555 | tr = br; |
michael@0 | 556 | tc = bc; |
michael@0 | 557 | } |
michael@0 | 558 | } |
michael@0 | 559 | bestmv->row = br; |
michael@0 | 560 | bestmv->col = bc; |
michael@0 | 561 | |
michael@0 | 562 | if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
michael@0 | 563 | (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
michael@0 | 564 | return INT_MAX; |
michael@0 | 565 | |
michael@0 | 566 | return besterr; |
michael@0 | 567 | } |
michael@0 | 568 | |
michael@0 | 569 | int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, |
michael@0 | 570 | MV *bestmv, const MV *ref_mv, |
michael@0 | 571 | int allow_hp, |
michael@0 | 572 | int error_per_bit, |
michael@0 | 573 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 574 | int forced_stop, |
michael@0 | 575 | int iters_per_step, |
michael@0 | 576 | int *mvjcost, int *mvcost[2], |
michael@0 | 577 | int *distortion, |
michael@0 | 578 | unsigned int *sse1, |
michael@0 | 579 | const uint8_t *second_pred, |
michael@0 | 580 | int w, int h) { |
michael@0 | 581 | uint8_t *z = x->plane[0].src.buf; |
michael@0 | 582 | const int src_stride = x->plane[0].src.stride; |
michael@0 | 583 | MACROBLOCKD *xd = &x->e_mbd; |
michael@0 | 584 | unsigned int besterr = INT_MAX; |
michael@0 | 585 | unsigned int sse; |
michael@0 | 586 | unsigned int whichdir; |
michael@0 | 587 | int thismse; |
michael@0 | 588 | unsigned int halfiters = iters_per_step; |
michael@0 | 589 | unsigned int quarteriters = iters_per_step; |
michael@0 | 590 | unsigned int eighthiters = iters_per_step; |
michael@0 | 591 | |
michael@0 | 592 | DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); |
michael@0 | 593 | const int y_stride = xd->plane[0].pre[0].stride; |
michael@0 | 594 | const int offset = bestmv->row * y_stride + bestmv->col; |
michael@0 | 595 | uint8_t *y = xd->plane[0].pre[0].buf + offset; |
michael@0 | 596 | |
michael@0 | 597 | int rr = ref_mv->row; |
michael@0 | 598 | int rc = ref_mv->col; |
michael@0 | 599 | int br = bestmv->row * 8; |
michael@0 | 600 | int bc = bestmv->col * 8; |
michael@0 | 601 | int hstep = 4; |
michael@0 | 602 | const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); |
michael@0 | 603 | const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); |
michael@0 | 604 | const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); |
michael@0 | 605 | const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); |
michael@0 | 606 | |
michael@0 | 607 | int tr = br; |
michael@0 | 608 | int tc = bc; |
michael@0 | 609 | |
michael@0 | 610 | // central mv |
michael@0 | 611 | bestmv->row *= 8; |
michael@0 | 612 | bestmv->col *= 8; |
michael@0 | 613 | |
michael@0 | 614 | // calculate central point error |
michael@0 | 615 | // TODO(yunqingwang): central pointer error was already calculated in full- |
michael@0 | 616 | // pixel search, and can be passed in this function. |
michael@0 | 617 | comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); |
michael@0 | 618 | besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); |
michael@0 | 619 | *distortion = besterr; |
michael@0 | 620 | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
michael@0 | 621 | |
michael@0 | 622 | // Each subsequent iteration checks at least one point in |
michael@0 | 623 | // common with the last iteration could be 2 ( if diag selected) |
michael@0 | 624 | // 1/2 pel |
michael@0 | 625 | FIRST_LEVEL_CHECKS; |
michael@0 | 626 | if (halfiters > 1) { |
michael@0 | 627 | SECOND_LEVEL_CHECKS; |
michael@0 | 628 | } |
michael@0 | 629 | tr = br; |
michael@0 | 630 | tc = bc; |
michael@0 | 631 | |
michael@0 | 632 | // Each subsequent iteration checks at least one point in common with |
michael@0 | 633 | // the last iteration could be 2 ( if diag selected) 1/4 pel |
michael@0 | 634 | |
michael@0 | 635 | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
michael@0 | 636 | if (forced_stop != 2) { |
michael@0 | 637 | hstep >>= 1; |
michael@0 | 638 | FIRST_LEVEL_CHECKS; |
michael@0 | 639 | if (quarteriters > 1) { |
michael@0 | 640 | SECOND_LEVEL_CHECKS; |
michael@0 | 641 | } |
michael@0 | 642 | tr = br; |
michael@0 | 643 | tc = bc; |
michael@0 | 644 | } |
michael@0 | 645 | |
michael@0 | 646 | if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { |
michael@0 | 647 | hstep >>= 1; |
michael@0 | 648 | FIRST_LEVEL_CHECKS; |
michael@0 | 649 | if (eighthiters > 1) { |
michael@0 | 650 | SECOND_LEVEL_CHECKS; |
michael@0 | 651 | } |
michael@0 | 652 | tr = br; |
michael@0 | 653 | tc = bc; |
michael@0 | 654 | } |
michael@0 | 655 | bestmv->row = br; |
michael@0 | 656 | bestmv->col = bc; |
michael@0 | 657 | |
michael@0 | 658 | if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || |
michael@0 | 659 | (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) |
michael@0 | 660 | return INT_MAX; |
michael@0 | 661 | |
michael@0 | 662 | return besterr; |
michael@0 | 663 | } |
michael@0 | 664 | |
michael@0 | 665 | #undef MVC |
michael@0 | 666 | #undef PRE |
michael@0 | 667 | #undef DIST |
michael@0 | 668 | #undef IFMVCV |
michael@0 | 669 | #undef CHECK_BETTER |
michael@0 | 670 | #undef SP |
michael@0 | 671 | |
michael@0 | 672 | #define CHECK_BOUNDS(range) \ |
michael@0 | 673 | {\ |
michael@0 | 674 | all_in = 1;\ |
michael@0 | 675 | all_in &= ((br-range) >= x->mv_row_min);\ |
michael@0 | 676 | all_in &= ((br+range) <= x->mv_row_max);\ |
michael@0 | 677 | all_in &= ((bc-range) >= x->mv_col_min);\ |
michael@0 | 678 | all_in &= ((bc+range) <= x->mv_col_max);\ |
michael@0 | 679 | } |
michael@0 | 680 | |
michael@0 | 681 | #define CHECK_POINT \ |
michael@0 | 682 | {\ |
michael@0 | 683 | if (this_mv.col < x->mv_col_min) continue;\ |
michael@0 | 684 | if (this_mv.col > x->mv_col_max) continue;\ |
michael@0 | 685 | if (this_mv.row < x->mv_row_min) continue;\ |
michael@0 | 686 | if (this_mv.row > x->mv_row_max) continue;\ |
michael@0 | 687 | } |
michael@0 | 688 | |
michael@0 | 689 | #define CHECK_BETTER \ |
michael@0 | 690 | {\ |
michael@0 | 691 | if (thissad < bestsad)\ |
michael@0 | 692 | {\ |
michael@0 | 693 | if (use_mvcost) \ |
michael@0 | 694 | thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \ |
michael@0 | 695 | mvjsadcost, mvsadcost, \ |
michael@0 | 696 | sad_per_bit);\ |
michael@0 | 697 | if (thissad < bestsad)\ |
michael@0 | 698 | {\ |
michael@0 | 699 | bestsad = thissad;\ |
michael@0 | 700 | best_site = i;\ |
michael@0 | 701 | }\ |
michael@0 | 702 | }\ |
michael@0 | 703 | } |
michael@0 | 704 | |
michael@0 | 705 | #define get_next_chkpts(list, i, n) \ |
michael@0 | 706 | list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \ |
michael@0 | 707 | list[1] = (i); \ |
michael@0 | 708 | list[2] = ((i) == (n) - 1 ? 0 : (i) + 1); |
michael@0 | 709 | |
michael@0 | 710 | #define MAX_PATTERN_SCALES 11 |
michael@0 | 711 | #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale |
michael@0 | 712 | #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates |
michael@0 | 713 | |
michael@0 | 714 | // Generic pattern search function that searches over multiple scales. |
michael@0 | 715 | // Each scale can have a different number of candidates and shape of |
michael@0 | 716 | // candidates as indicated in the num_candidates and candidates arrays |
michael@0 | 717 | // passed into this function |
michael@0 | 718 | static int vp9_pattern_search(MACROBLOCK *x, |
michael@0 | 719 | MV *ref_mv, |
michael@0 | 720 | int search_param, |
michael@0 | 721 | int sad_per_bit, |
michael@0 | 722 | int do_init_search, |
michael@0 | 723 | int do_refine, |
michael@0 | 724 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 725 | int use_mvcost, |
michael@0 | 726 | const MV *center_mv, MV *best_mv, |
michael@0 | 727 | const int num_candidates[MAX_PATTERN_SCALES], |
michael@0 | 728 | const MV candidates[MAX_PATTERN_SCALES] |
michael@0 | 729 | [MAX_PATTERN_CANDIDATES]) { |
michael@0 | 730 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 731 | static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
michael@0 | 732 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
michael@0 | 733 | }; |
michael@0 | 734 | int i, j, s, t; |
michael@0 | 735 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 736 | int what_stride = x->plane[0].src.stride; |
michael@0 | 737 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 738 | int br, bc; |
michael@0 | 739 | MV this_mv; |
michael@0 | 740 | int bestsad = INT_MAX; |
michael@0 | 741 | int thissad; |
michael@0 | 742 | uint8_t *base_offset; |
michael@0 | 743 | uint8_t *this_offset; |
michael@0 | 744 | int k = -1; |
michael@0 | 745 | int all_in; |
michael@0 | 746 | int best_site = -1; |
michael@0 | 747 | int_mv fcenter_mv; |
michael@0 | 748 | int best_init_s = search_param_to_steps[search_param]; |
michael@0 | 749 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 750 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 751 | |
michael@0 | 752 | fcenter_mv.as_mv.row = center_mv->row >> 3; |
michael@0 | 753 | fcenter_mv.as_mv.col = center_mv->col >> 3; |
michael@0 | 754 | |
michael@0 | 755 | // adjust ref_mv to make sure it is within MV range |
michael@0 | 756 | clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
michael@0 | 757 | br = ref_mv->row; |
michael@0 | 758 | bc = ref_mv->col; |
michael@0 | 759 | |
michael@0 | 760 | // Work out the start point for the search |
michael@0 | 761 | base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); |
michael@0 | 762 | this_offset = base_offset + (br * in_what_stride) + bc; |
michael@0 | 763 | this_mv.row = br; |
michael@0 | 764 | this_mv.col = bc; |
michael@0 | 765 | bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) |
michael@0 | 766 | + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, |
michael@0 | 767 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 768 | |
michael@0 | 769 | // Search all possible scales upto the search param around the center point |
michael@0 | 770 | // pick the scale of the point that is best as the starting scale of |
michael@0 | 771 | // further steps around it. |
michael@0 | 772 | if (do_init_search) { |
michael@0 | 773 | s = best_init_s; |
michael@0 | 774 | best_init_s = -1; |
michael@0 | 775 | for (t = 0; t <= s; ++t) { |
michael@0 | 776 | best_site = -1; |
michael@0 | 777 | CHECK_BOUNDS((1 << t)) |
michael@0 | 778 | if (all_in) { |
michael@0 | 779 | for (i = 0; i < num_candidates[t]; i++) { |
michael@0 | 780 | this_mv.row = br + candidates[t][i].row; |
michael@0 | 781 | this_mv.col = bc + candidates[t][i].col; |
michael@0 | 782 | this_offset = base_offset + (this_mv.row * in_what_stride) + |
michael@0 | 783 | this_mv.col; |
michael@0 | 784 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 785 | bestsad); |
michael@0 | 786 | CHECK_BETTER |
michael@0 | 787 | } |
michael@0 | 788 | } else { |
michael@0 | 789 | for (i = 0; i < num_candidates[t]; i++) { |
michael@0 | 790 | this_mv.row = br + candidates[t][i].row; |
michael@0 | 791 | this_mv.col = bc + candidates[t][i].col; |
michael@0 | 792 | CHECK_POINT |
michael@0 | 793 | this_offset = base_offset + (this_mv.row * in_what_stride) + |
michael@0 | 794 | this_mv.col; |
michael@0 | 795 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 796 | bestsad); |
michael@0 | 797 | CHECK_BETTER |
michael@0 | 798 | } |
michael@0 | 799 | } |
michael@0 | 800 | if (best_site == -1) { |
michael@0 | 801 | continue; |
michael@0 | 802 | } else { |
michael@0 | 803 | best_init_s = t; |
michael@0 | 804 | k = best_site; |
michael@0 | 805 | } |
michael@0 | 806 | } |
michael@0 | 807 | if (best_init_s != -1) { |
michael@0 | 808 | br += candidates[best_init_s][k].row; |
michael@0 | 809 | bc += candidates[best_init_s][k].col; |
michael@0 | 810 | } |
michael@0 | 811 | } |
michael@0 | 812 | |
michael@0 | 813 | // If the center point is still the best, just skip this and move to |
michael@0 | 814 | // the refinement step. |
michael@0 | 815 | if (best_init_s != -1) { |
michael@0 | 816 | s = best_init_s; |
michael@0 | 817 | best_site = -1; |
michael@0 | 818 | do { |
michael@0 | 819 | // No need to search all 6 points the 1st time if initial search was used |
michael@0 | 820 | if (!do_init_search || s != best_init_s) { |
michael@0 | 821 | CHECK_BOUNDS((1 << s)) |
michael@0 | 822 | if (all_in) { |
michael@0 | 823 | for (i = 0; i < num_candidates[s]; i++) { |
michael@0 | 824 | this_mv.row = br + candidates[s][i].row; |
michael@0 | 825 | this_mv.col = bc + candidates[s][i].col; |
michael@0 | 826 | this_offset = base_offset + (this_mv.row * in_what_stride) + |
michael@0 | 827 | this_mv.col; |
michael@0 | 828 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 829 | bestsad); |
michael@0 | 830 | CHECK_BETTER |
michael@0 | 831 | } |
michael@0 | 832 | } else { |
michael@0 | 833 | for (i = 0; i < num_candidates[s]; i++) { |
michael@0 | 834 | this_mv.row = br + candidates[s][i].row; |
michael@0 | 835 | this_mv.col = bc + candidates[s][i].col; |
michael@0 | 836 | CHECK_POINT |
michael@0 | 837 | this_offset = base_offset + (this_mv.row * in_what_stride) + |
michael@0 | 838 | this_mv.col; |
michael@0 | 839 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 840 | bestsad); |
michael@0 | 841 | CHECK_BETTER |
michael@0 | 842 | } |
michael@0 | 843 | } |
michael@0 | 844 | |
michael@0 | 845 | if (best_site == -1) { |
michael@0 | 846 | continue; |
michael@0 | 847 | } else { |
michael@0 | 848 | br += candidates[s][best_site].row; |
michael@0 | 849 | bc += candidates[s][best_site].col; |
michael@0 | 850 | k = best_site; |
michael@0 | 851 | } |
michael@0 | 852 | } |
michael@0 | 853 | |
michael@0 | 854 | do { |
michael@0 | 855 | int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
michael@0 | 856 | best_site = -1; |
michael@0 | 857 | CHECK_BOUNDS((1 << s)) |
michael@0 | 858 | |
michael@0 | 859 | get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); |
michael@0 | 860 | if (all_in) { |
michael@0 | 861 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
michael@0 | 862 | this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; |
michael@0 | 863 | this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; |
michael@0 | 864 | this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
michael@0 | 865 | this_mv.col; |
michael@0 | 866 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 867 | bestsad); |
michael@0 | 868 | CHECK_BETTER |
michael@0 | 869 | } |
michael@0 | 870 | } else { |
michael@0 | 871 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
michael@0 | 872 | this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; |
michael@0 | 873 | this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; |
michael@0 | 874 | CHECK_POINT |
michael@0 | 875 | this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
michael@0 | 876 | this_mv.col; |
michael@0 | 877 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 878 | bestsad); |
michael@0 | 879 | CHECK_BETTER |
michael@0 | 880 | } |
michael@0 | 881 | } |
michael@0 | 882 | |
michael@0 | 883 | if (best_site != -1) { |
michael@0 | 884 | k = next_chkpts_indices[best_site]; |
michael@0 | 885 | br += candidates[s][k].row; |
michael@0 | 886 | bc += candidates[s][k].col; |
michael@0 | 887 | } |
michael@0 | 888 | } while (best_site != -1); |
michael@0 | 889 | } while (s--); |
michael@0 | 890 | } |
michael@0 | 891 | |
michael@0 | 892 | // Check 4 1-away neighbors if do_refine is true. |
michael@0 | 893 | // For most well-designed schemes do_refine will not be necessary. |
michael@0 | 894 | if (do_refine) { |
michael@0 | 895 | static const MV neighbors[4] = { |
michael@0 | 896 | {0, -1}, { -1, 0}, {1, 0}, {0, 1}, |
michael@0 | 897 | }; |
michael@0 | 898 | for (j = 0; j < 16; j++) { |
michael@0 | 899 | best_site = -1; |
michael@0 | 900 | CHECK_BOUNDS(1) |
michael@0 | 901 | if (all_in) { |
michael@0 | 902 | for (i = 0; i < 4; i++) { |
michael@0 | 903 | this_mv.row = br + neighbors[i].row; |
michael@0 | 904 | this_mv.col = bc + neighbors[i].col; |
michael@0 | 905 | this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
michael@0 | 906 | this_mv.col; |
michael@0 | 907 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 908 | bestsad); |
michael@0 | 909 | CHECK_BETTER |
michael@0 | 910 | } |
michael@0 | 911 | } else { |
michael@0 | 912 | for (i = 0; i < 4; i++) { |
michael@0 | 913 | this_mv.row = br + neighbors[i].row; |
michael@0 | 914 | this_mv.col = bc + neighbors[i].col; |
michael@0 | 915 | CHECK_POINT |
michael@0 | 916 | this_offset = base_offset + (this_mv.row * (in_what_stride)) + |
michael@0 | 917 | this_mv.col; |
michael@0 | 918 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 919 | bestsad); |
michael@0 | 920 | CHECK_BETTER |
michael@0 | 921 | } |
michael@0 | 922 | } |
michael@0 | 923 | |
michael@0 | 924 | if (best_site == -1) { |
michael@0 | 925 | break; |
michael@0 | 926 | } else { |
michael@0 | 927 | br += neighbors[best_site].row; |
michael@0 | 928 | bc += neighbors[best_site].col; |
michael@0 | 929 | } |
michael@0 | 930 | } |
michael@0 | 931 | } |
michael@0 | 932 | |
michael@0 | 933 | best_mv->row = br; |
michael@0 | 934 | best_mv->col = bc; |
michael@0 | 935 | |
michael@0 | 936 | this_offset = base_offset + (best_mv->row * in_what_stride) + |
michael@0 | 937 | best_mv->col; |
michael@0 | 938 | this_mv.row = best_mv->row * 8; |
michael@0 | 939 | this_mv.col = best_mv->col * 8; |
michael@0 | 940 | if (bestsad == INT_MAX) |
michael@0 | 941 | return INT_MAX; |
michael@0 | 942 | |
michael@0 | 943 | return vfp->vf(what, what_stride, this_offset, in_what_stride, |
michael@0 | 944 | (unsigned int *)&bestsad) + |
michael@0 | 945 | use_mvcost ? mv_err_cost(&this_mv, center_mv, |
michael@0 | 946 | x->nmvjointcost, x->mvcost, x->errorperbit) |
michael@0 | 947 | : 0; |
michael@0 | 948 | } |
michael@0 | 949 | |
michael@0 | 950 | |
michael@0 | 951 | int vp9_hex_search(MACROBLOCK *x, |
michael@0 | 952 | MV *ref_mv, |
michael@0 | 953 | int search_param, |
michael@0 | 954 | int sad_per_bit, |
michael@0 | 955 | int do_init_search, |
michael@0 | 956 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 957 | int use_mvcost, |
michael@0 | 958 | const MV *center_mv, MV *best_mv) { |
michael@0 | 959 | // First scale has 8-closest points, the rest have 6 points in hex shape |
michael@0 | 960 | // at increasing scales |
michael@0 | 961 | static const int hex_num_candidates[MAX_PATTERN_SCALES] = { |
michael@0 | 962 | 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 |
michael@0 | 963 | }; |
michael@0 | 964 | // Note that the largest candidate step at each scale is 2^scale |
michael@0 | 965 | static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { |
michael@0 | 966 | {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, |
michael@0 | 967 | {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, |
michael@0 | 968 | {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, |
michael@0 | 969 | {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, |
michael@0 | 970 | {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, |
michael@0 | 971 | {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, |
michael@0 | 972 | {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, |
michael@0 | 973 | {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, |
michael@0 | 974 | {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, |
michael@0 | 975 | {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, |
michael@0 | 976 | {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, |
michael@0 | 977 | { -1024, 0}}, |
michael@0 | 978 | }; |
michael@0 | 979 | return |
michael@0 | 980 | vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
michael@0 | 981 | do_init_search, 0, vfp, use_mvcost, |
michael@0 | 982 | center_mv, best_mv, |
michael@0 | 983 | hex_num_candidates, hex_candidates); |
michael@0 | 984 | } |
michael@0 | 985 | |
michael@0 | 986 | int vp9_bigdia_search(MACROBLOCK *x, |
michael@0 | 987 | MV *ref_mv, |
michael@0 | 988 | int search_param, |
michael@0 | 989 | int sad_per_bit, |
michael@0 | 990 | int do_init_search, |
michael@0 | 991 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 992 | int use_mvcost, |
michael@0 | 993 | const MV *center_mv, |
michael@0 | 994 | MV *best_mv) { |
michael@0 | 995 | // First scale has 4-closest points, the rest have 8 points in diamond |
michael@0 | 996 | // shape at increasing scales |
michael@0 | 997 | static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { |
michael@0 | 998 | 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
michael@0 | 999 | }; |
michael@0 | 1000 | // Note that the largest candidate step at each scale is 2^scale |
michael@0 | 1001 | static const MV bigdia_candidates[MAX_PATTERN_SCALES] |
michael@0 | 1002 | [MAX_PATTERN_CANDIDATES] = { |
michael@0 | 1003 | {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, |
michael@0 | 1004 | {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, |
michael@0 | 1005 | {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, |
michael@0 | 1006 | {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, |
michael@0 | 1007 | {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, |
michael@0 | 1008 | {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, |
michael@0 | 1009 | {-16, 16}, {-32, 0}}, |
michael@0 | 1010 | {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, |
michael@0 | 1011 | {-32, 32}, {-64, 0}}, |
michael@0 | 1012 | {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, |
michael@0 | 1013 | {-64, 64}, {-128, 0}}, |
michael@0 | 1014 | {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, |
michael@0 | 1015 | {-128, 128}, {-256, 0}}, |
michael@0 | 1016 | {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, |
michael@0 | 1017 | {-256, 256}, {-512, 0}}, |
michael@0 | 1018 | {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, |
michael@0 | 1019 | {-512, 512}, {-1024, 0}}, |
michael@0 | 1020 | }; |
michael@0 | 1021 | return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
michael@0 | 1022 | do_init_search, 0, vfp, use_mvcost, |
michael@0 | 1023 | center_mv, best_mv, |
michael@0 | 1024 | bigdia_num_candidates, bigdia_candidates); |
michael@0 | 1025 | } |
michael@0 | 1026 | |
michael@0 | 1027 | int vp9_square_search(MACROBLOCK *x, |
michael@0 | 1028 | MV *ref_mv, |
michael@0 | 1029 | int search_param, |
michael@0 | 1030 | int sad_per_bit, |
michael@0 | 1031 | int do_init_search, |
michael@0 | 1032 | const vp9_variance_fn_ptr_t *vfp, |
michael@0 | 1033 | int use_mvcost, |
michael@0 | 1034 | const MV *center_mv, |
michael@0 | 1035 | MV *best_mv) { |
michael@0 | 1036 | // All scales have 8 closest points in square shape |
michael@0 | 1037 | static const int square_num_candidates[MAX_PATTERN_SCALES] = { |
michael@0 | 1038 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
michael@0 | 1039 | }; |
michael@0 | 1040 | // Note that the largest candidate step at each scale is 2^scale |
michael@0 | 1041 | static const MV square_candidates[MAX_PATTERN_SCALES] |
michael@0 | 1042 | [MAX_PATTERN_CANDIDATES] = { |
michael@0 | 1043 | {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, |
michael@0 | 1044 | {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, |
michael@0 | 1045 | {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, |
michael@0 | 1046 | {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, |
michael@0 | 1047 | {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, |
michael@0 | 1048 | {-16, 16}, {-16, 0}}, |
michael@0 | 1049 | {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, |
michael@0 | 1050 | {-32, 32}, {-32, 0}}, |
michael@0 | 1051 | {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, |
michael@0 | 1052 | {-64, 64}, {-64, 0}}, |
michael@0 | 1053 | {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, |
michael@0 | 1054 | {-128, 128}, {-128, 0}}, |
michael@0 | 1055 | {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, |
michael@0 | 1056 | {-256, 256}, {-256, 0}}, |
michael@0 | 1057 | {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, |
michael@0 | 1058 | {-512, 512}, {-512, 0}}, |
michael@0 | 1059 | {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, |
michael@0 | 1060 | {0, 1024}, {-1024, 1024}, {-1024, 0}}, |
michael@0 | 1061 | }; |
michael@0 | 1062 | return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, |
michael@0 | 1063 | do_init_search, 0, vfp, use_mvcost, |
michael@0 | 1064 | center_mv, best_mv, |
michael@0 | 1065 | square_num_candidates, square_candidates); |
michael@0 | 1066 | }; |
michael@0 | 1067 | |
michael@0 | 1068 | #undef CHECK_BOUNDS |
michael@0 | 1069 | #undef CHECK_POINT |
michael@0 | 1070 | #undef CHECK_BETTER |
michael@0 | 1071 | |
michael@0 | 1072 | int vp9_diamond_search_sad_c(MACROBLOCK *x, |
michael@0 | 1073 | int_mv *ref_mv, int_mv *best_mv, |
michael@0 | 1074 | int search_param, int sad_per_bit, int *num00, |
michael@0 | 1075 | vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
michael@0 | 1076 | int *mvcost[2], int_mv *center_mv) { |
michael@0 | 1077 | int i, j, step; |
michael@0 | 1078 | |
michael@0 | 1079 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1080 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1081 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1082 | uint8_t *in_what; |
michael@0 | 1083 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1084 | uint8_t *best_address; |
michael@0 | 1085 | |
michael@0 | 1086 | int tot_steps; |
michael@0 | 1087 | int_mv this_mv; |
michael@0 | 1088 | |
michael@0 | 1089 | int bestsad = INT_MAX; |
michael@0 | 1090 | int best_site = 0; |
michael@0 | 1091 | int last_site = 0; |
michael@0 | 1092 | |
michael@0 | 1093 | int ref_row, ref_col; |
michael@0 | 1094 | int this_row_offset, this_col_offset; |
michael@0 | 1095 | search_site *ss; |
michael@0 | 1096 | |
michael@0 | 1097 | uint8_t *check_here; |
michael@0 | 1098 | int thissad; |
michael@0 | 1099 | int_mv fcenter_mv; |
michael@0 | 1100 | |
michael@0 | 1101 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1102 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1103 | |
michael@0 | 1104 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1105 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1106 | |
michael@0 | 1107 | clamp_mv(&ref_mv->as_mv, |
michael@0 | 1108 | x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
michael@0 | 1109 | ref_row = ref_mv->as_mv.row; |
michael@0 | 1110 | ref_col = ref_mv->as_mv.col; |
michael@0 | 1111 | *num00 = 0; |
michael@0 | 1112 | best_mv->as_mv.row = ref_row; |
michael@0 | 1113 | best_mv->as_mv.col = ref_col; |
michael@0 | 1114 | |
michael@0 | 1115 | // Work out the start point for the search |
michael@0 | 1116 | in_what = (uint8_t *)(xd->plane[0].pre[0].buf + |
michael@0 | 1117 | (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); |
michael@0 | 1118 | best_address = in_what; |
michael@0 | 1119 | |
michael@0 | 1120 | // Check the starting position |
michael@0 | 1121 | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) |
michael@0 | 1122 | + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1123 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1124 | |
michael@0 | 1125 | // search_param determines the length of the initial step and hence the number |
michael@0 | 1126 | // of iterations |
michael@0 | 1127 | // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = |
michael@0 | 1128 | // (MAX_FIRST_STEP/4) pel... etc. |
michael@0 | 1129 | ss = &x->ss[search_param * x->searches_per_step]; |
michael@0 | 1130 | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
michael@0 | 1131 | |
michael@0 | 1132 | i = 1; |
michael@0 | 1133 | |
michael@0 | 1134 | for (step = 0; step < tot_steps; step++) { |
michael@0 | 1135 | for (j = 0; j < x->searches_per_step; j++) { |
michael@0 | 1136 | // Trap illegal vectors |
michael@0 | 1137 | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
michael@0 | 1138 | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
michael@0 | 1139 | |
michael@0 | 1140 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1141 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1142 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1143 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1144 | check_here = ss[i].offset + best_address; |
michael@0 | 1145 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1146 | bestsad); |
michael@0 | 1147 | |
michael@0 | 1148 | if (thissad < bestsad) { |
michael@0 | 1149 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1150 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1151 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1152 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1153 | |
michael@0 | 1154 | if (thissad < bestsad) { |
michael@0 | 1155 | bestsad = thissad; |
michael@0 | 1156 | best_site = i; |
michael@0 | 1157 | } |
michael@0 | 1158 | } |
michael@0 | 1159 | } |
michael@0 | 1160 | |
michael@0 | 1161 | i++; |
michael@0 | 1162 | } |
michael@0 | 1163 | |
michael@0 | 1164 | if (best_site != last_site) { |
michael@0 | 1165 | best_mv->as_mv.row += ss[best_site].mv.row; |
michael@0 | 1166 | best_mv->as_mv.col += ss[best_site].mv.col; |
michael@0 | 1167 | best_address += ss[best_site].offset; |
michael@0 | 1168 | last_site = best_site; |
michael@0 | 1169 | #if defined(NEW_DIAMOND_SEARCH) |
michael@0 | 1170 | while (1) { |
michael@0 | 1171 | this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; |
michael@0 | 1172 | this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; |
michael@0 | 1173 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1174 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1175 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1176 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1177 | check_here = ss[best_site].offset + best_address; |
michael@0 | 1178 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1179 | bestsad); |
michael@0 | 1180 | if (thissad < bestsad) { |
michael@0 | 1181 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1182 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1183 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1184 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1185 | if (thissad < bestsad) { |
michael@0 | 1186 | bestsad = thissad; |
michael@0 | 1187 | best_mv->as_mv.row += ss[best_site].mv.row; |
michael@0 | 1188 | best_mv->as_mv.col += ss[best_site].mv.col; |
michael@0 | 1189 | best_address += ss[best_site].offset; |
michael@0 | 1190 | continue; |
michael@0 | 1191 | } |
michael@0 | 1192 | } |
michael@0 | 1193 | } |
michael@0 | 1194 | break; |
michael@0 | 1195 | }; |
michael@0 | 1196 | #endif |
michael@0 | 1197 | } else if (best_address == in_what) { |
michael@0 | 1198 | (*num00)++; |
michael@0 | 1199 | } |
michael@0 | 1200 | } |
michael@0 | 1201 | |
michael@0 | 1202 | this_mv.as_mv.row = best_mv->as_mv.row * 8; |
michael@0 | 1203 | this_mv.as_mv.col = best_mv->as_mv.col * 8; |
michael@0 | 1204 | |
michael@0 | 1205 | if (bestsad == INT_MAX) |
michael@0 | 1206 | return INT_MAX; |
michael@0 | 1207 | |
michael@0 | 1208 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
michael@0 | 1209 | (unsigned int *)(&thissad)) + |
michael@0 | 1210 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1211 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1212 | } |
michael@0 | 1213 | |
michael@0 | 1214 | int vp9_diamond_search_sadx4(MACROBLOCK *x, |
michael@0 | 1215 | int_mv *ref_mv, int_mv *best_mv, int search_param, |
michael@0 | 1216 | int sad_per_bit, int *num00, |
michael@0 | 1217 | vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 1218 | int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
michael@0 | 1219 | int i, j, step; |
michael@0 | 1220 | |
michael@0 | 1221 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1222 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1223 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1224 | uint8_t *in_what; |
michael@0 | 1225 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1226 | uint8_t *best_address; |
michael@0 | 1227 | |
michael@0 | 1228 | int tot_steps; |
michael@0 | 1229 | int_mv this_mv; |
michael@0 | 1230 | |
michael@0 | 1231 | unsigned int bestsad = INT_MAX; |
michael@0 | 1232 | int best_site = 0; |
michael@0 | 1233 | int last_site = 0; |
michael@0 | 1234 | |
michael@0 | 1235 | int ref_row; |
michael@0 | 1236 | int ref_col; |
michael@0 | 1237 | int this_row_offset; |
michael@0 | 1238 | int this_col_offset; |
michael@0 | 1239 | search_site *ss; |
michael@0 | 1240 | |
michael@0 | 1241 | uint8_t *check_here; |
michael@0 | 1242 | unsigned int thissad; |
michael@0 | 1243 | int_mv fcenter_mv; |
michael@0 | 1244 | |
michael@0 | 1245 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1246 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1247 | |
michael@0 | 1248 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1249 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1250 | |
michael@0 | 1251 | clamp_mv(&ref_mv->as_mv, |
michael@0 | 1252 | x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); |
michael@0 | 1253 | ref_row = ref_mv->as_mv.row; |
michael@0 | 1254 | ref_col = ref_mv->as_mv.col; |
michael@0 | 1255 | *num00 = 0; |
michael@0 | 1256 | best_mv->as_mv.row = ref_row; |
michael@0 | 1257 | best_mv->as_mv.col = ref_col; |
michael@0 | 1258 | |
michael@0 | 1259 | // Work out the start point for the search |
michael@0 | 1260 | in_what = (uint8_t *)(xd->plane[0].pre[0].buf + |
michael@0 | 1261 | (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); |
michael@0 | 1262 | best_address = in_what; |
michael@0 | 1263 | |
michael@0 | 1264 | // Check the starting position |
michael@0 | 1265 | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) |
michael@0 | 1266 | + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1267 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1268 | |
michael@0 | 1269 | // search_param determines the length of the initial step and hence the number |
michael@0 | 1270 | // of iterations. |
michael@0 | 1271 | // 0 = initial step (MAX_FIRST_STEP) pel |
michael@0 | 1272 | // 1 = (MAX_FIRST_STEP/2) pel, |
michael@0 | 1273 | // 2 = (MAX_FIRST_STEP/4) pel... |
michael@0 | 1274 | ss = &x->ss[search_param * x->searches_per_step]; |
michael@0 | 1275 | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
michael@0 | 1276 | |
michael@0 | 1277 | i = 1; |
michael@0 | 1278 | |
michael@0 | 1279 | for (step = 0; step < tot_steps; step++) { |
michael@0 | 1280 | int all_in = 1, t; |
michael@0 | 1281 | |
michael@0 | 1282 | // All_in is true if every one of the points we are checking are within |
michael@0 | 1283 | // the bounds of the image. |
michael@0 | 1284 | all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); |
michael@0 | 1285 | all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); |
michael@0 | 1286 | all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); |
michael@0 | 1287 | all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); |
michael@0 | 1288 | |
michael@0 | 1289 | // If all the pixels are within the bounds we don't check whether the |
michael@0 | 1290 | // search point is valid in this loop, otherwise we check each point |
michael@0 | 1291 | // for validity.. |
michael@0 | 1292 | if (all_in) { |
michael@0 | 1293 | unsigned int sad_array[4]; |
michael@0 | 1294 | |
michael@0 | 1295 | for (j = 0; j < x->searches_per_step; j += 4) { |
michael@0 | 1296 | unsigned char const *block_offset[4]; |
michael@0 | 1297 | |
michael@0 | 1298 | for (t = 0; t < 4; t++) |
michael@0 | 1299 | block_offset[t] = ss[i + t].offset + best_address; |
michael@0 | 1300 | |
michael@0 | 1301 | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
michael@0 | 1302 | sad_array); |
michael@0 | 1303 | |
michael@0 | 1304 | for (t = 0; t < 4; t++, i++) { |
michael@0 | 1305 | if (sad_array[t] < bestsad) { |
michael@0 | 1306 | this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; |
michael@0 | 1307 | this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; |
michael@0 | 1308 | sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1309 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1310 | |
michael@0 | 1311 | if (sad_array[t] < bestsad) { |
michael@0 | 1312 | bestsad = sad_array[t]; |
michael@0 | 1313 | best_site = i; |
michael@0 | 1314 | } |
michael@0 | 1315 | } |
michael@0 | 1316 | } |
michael@0 | 1317 | } |
michael@0 | 1318 | } else { |
michael@0 | 1319 | for (j = 0; j < x->searches_per_step; j++) { |
michael@0 | 1320 | // Trap illegal vectors |
michael@0 | 1321 | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
michael@0 | 1322 | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
michael@0 | 1323 | |
michael@0 | 1324 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1325 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1326 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1327 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1328 | check_here = ss[i].offset + best_address; |
michael@0 | 1329 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1330 | bestsad); |
michael@0 | 1331 | |
michael@0 | 1332 | if (thissad < bestsad) { |
michael@0 | 1333 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1334 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1335 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1336 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1337 | |
michael@0 | 1338 | if (thissad < bestsad) { |
michael@0 | 1339 | bestsad = thissad; |
michael@0 | 1340 | best_site = i; |
michael@0 | 1341 | } |
michael@0 | 1342 | } |
michael@0 | 1343 | } |
michael@0 | 1344 | i++; |
michael@0 | 1345 | } |
michael@0 | 1346 | } |
michael@0 | 1347 | if (best_site != last_site) { |
michael@0 | 1348 | best_mv->as_mv.row += ss[best_site].mv.row; |
michael@0 | 1349 | best_mv->as_mv.col += ss[best_site].mv.col; |
michael@0 | 1350 | best_address += ss[best_site].offset; |
michael@0 | 1351 | last_site = best_site; |
michael@0 | 1352 | #if defined(NEW_DIAMOND_SEARCH) |
michael@0 | 1353 | while (1) { |
michael@0 | 1354 | this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; |
michael@0 | 1355 | this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; |
michael@0 | 1356 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1357 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1358 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1359 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1360 | check_here = ss[best_site].offset + best_address; |
michael@0 | 1361 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1362 | bestsad); |
michael@0 | 1363 | if (thissad < bestsad) { |
michael@0 | 1364 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1365 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1366 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1367 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1368 | if (thissad < bestsad) { |
michael@0 | 1369 | bestsad = thissad; |
michael@0 | 1370 | best_mv->as_mv.row += ss[best_site].mv.row; |
michael@0 | 1371 | best_mv->as_mv.col += ss[best_site].mv.col; |
michael@0 | 1372 | best_address += ss[best_site].offset; |
michael@0 | 1373 | continue; |
michael@0 | 1374 | } |
michael@0 | 1375 | } |
michael@0 | 1376 | } |
michael@0 | 1377 | break; |
michael@0 | 1378 | }; |
michael@0 | 1379 | #endif |
michael@0 | 1380 | } else if (best_address == in_what) { |
michael@0 | 1381 | (*num00)++; |
michael@0 | 1382 | } |
michael@0 | 1383 | } |
michael@0 | 1384 | |
michael@0 | 1385 | this_mv.as_mv.row = best_mv->as_mv.row * 8; |
michael@0 | 1386 | this_mv.as_mv.col = best_mv->as_mv.col * 8; |
michael@0 | 1387 | |
michael@0 | 1388 | if (bestsad == INT_MAX) |
michael@0 | 1389 | return INT_MAX; |
michael@0 | 1390 | |
michael@0 | 1391 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
michael@0 | 1392 | (unsigned int *)(&thissad)) + |
michael@0 | 1393 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1394 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1395 | } |
michael@0 | 1396 | |
michael@0 | 1397 | /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
michael@0 | 1398 | point as the best match, we will do a final 1-away diamond |
michael@0 | 1399 | refining search */ |
michael@0 | 1400 | |
michael@0 | 1401 | int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, |
michael@0 | 1402 | int_mv *mvp_full, int step_param, |
michael@0 | 1403 | int sadpb, int further_steps, |
michael@0 | 1404 | int do_refine, vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 1405 | int_mv *ref_mv, int_mv *dst_mv) { |
michael@0 | 1406 | int_mv temp_mv; |
michael@0 | 1407 | int thissme, n, num00; |
michael@0 | 1408 | int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
michael@0 | 1409 | step_param, sadpb, &num00, |
michael@0 | 1410 | fn_ptr, x->nmvjointcost, |
michael@0 | 1411 | x->mvcost, ref_mv); |
michael@0 | 1412 | dst_mv->as_int = temp_mv.as_int; |
michael@0 | 1413 | |
michael@0 | 1414 | n = num00; |
michael@0 | 1415 | num00 = 0; |
michael@0 | 1416 | |
michael@0 | 1417 | /* If there won't be more n-step search, check to see if refining search is |
michael@0 | 1418 | * needed. */ |
michael@0 | 1419 | if (n > further_steps) |
michael@0 | 1420 | do_refine = 0; |
michael@0 | 1421 | |
michael@0 | 1422 | while (n < further_steps) { |
michael@0 | 1423 | n++; |
michael@0 | 1424 | |
michael@0 | 1425 | if (num00) { |
michael@0 | 1426 | num00--; |
michael@0 | 1427 | } else { |
michael@0 | 1428 | thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, |
michael@0 | 1429 | step_param + n, sadpb, &num00, |
michael@0 | 1430 | fn_ptr, x->nmvjointcost, x->mvcost, |
michael@0 | 1431 | ref_mv); |
michael@0 | 1432 | |
michael@0 | 1433 | /* check to see if refining search is needed. */ |
michael@0 | 1434 | if (num00 > (further_steps - n)) |
michael@0 | 1435 | do_refine = 0; |
michael@0 | 1436 | |
michael@0 | 1437 | if (thissme < bestsme) { |
michael@0 | 1438 | bestsme = thissme; |
michael@0 | 1439 | dst_mv->as_int = temp_mv.as_int; |
michael@0 | 1440 | } |
michael@0 | 1441 | } |
michael@0 | 1442 | } |
michael@0 | 1443 | |
michael@0 | 1444 | /* final 1-away diamond refining search */ |
michael@0 | 1445 | if (do_refine == 1) { |
michael@0 | 1446 | int search_range = 8; |
michael@0 | 1447 | int_mv best_mv; |
michael@0 | 1448 | best_mv.as_int = dst_mv->as_int; |
michael@0 | 1449 | thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, |
michael@0 | 1450 | fn_ptr, x->nmvjointcost, x->mvcost, |
michael@0 | 1451 | ref_mv); |
michael@0 | 1452 | |
michael@0 | 1453 | if (thissme < bestsme) { |
michael@0 | 1454 | bestsme = thissme; |
michael@0 | 1455 | dst_mv->as_int = best_mv.as_int; |
michael@0 | 1456 | } |
michael@0 | 1457 | } |
michael@0 | 1458 | return bestsme; |
michael@0 | 1459 | } |
michael@0 | 1460 | |
michael@0 | 1461 | int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, |
michael@0 | 1462 | int sad_per_bit, int distance, |
michael@0 | 1463 | vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
michael@0 | 1464 | int *mvcost[2], |
michael@0 | 1465 | int_mv *center_mv, int n) { |
michael@0 | 1466 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1467 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1468 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1469 | uint8_t *in_what; |
michael@0 | 1470 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1471 | int mv_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1472 | uint8_t *bestaddress; |
michael@0 | 1473 | int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
michael@0 | 1474 | int_mv this_mv; |
michael@0 | 1475 | int bestsad = INT_MAX; |
michael@0 | 1476 | int r, c; |
michael@0 | 1477 | |
michael@0 | 1478 | uint8_t *check_here; |
michael@0 | 1479 | int thissad; |
michael@0 | 1480 | |
michael@0 | 1481 | int ref_row = ref_mv->as_mv.row; |
michael@0 | 1482 | int ref_col = ref_mv->as_mv.col; |
michael@0 | 1483 | |
michael@0 | 1484 | int row_min = ref_row - distance; |
michael@0 | 1485 | int row_max = ref_row + distance; |
michael@0 | 1486 | int col_min = ref_col - distance; |
michael@0 | 1487 | int col_max = ref_col + distance; |
michael@0 | 1488 | int_mv fcenter_mv; |
michael@0 | 1489 | |
michael@0 | 1490 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1491 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1492 | |
michael@0 | 1493 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1494 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1495 | |
michael@0 | 1496 | // Work out the mid point for the search |
michael@0 | 1497 | in_what = xd->plane[0].pre[0].buf; |
michael@0 | 1498 | bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
michael@0 | 1499 | |
michael@0 | 1500 | best_mv->as_mv.row = ref_row; |
michael@0 | 1501 | best_mv->as_mv.col = ref_col; |
michael@0 | 1502 | |
michael@0 | 1503 | // Baseline value at the centre |
michael@0 | 1504 | bestsad = fn_ptr->sdf(what, what_stride, bestaddress, |
michael@0 | 1505 | in_what_stride, 0x7fffffff) |
michael@0 | 1506 | + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1507 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1508 | |
michael@0 | 1509 | // Apply further limits to prevent us looking using vectors that stretch |
michael@0 | 1510 | // beyond the UMV border |
michael@0 | 1511 | col_min = MAX(col_min, x->mv_col_min); |
michael@0 | 1512 | col_max = MIN(col_max, x->mv_col_max); |
michael@0 | 1513 | row_min = MAX(row_min, x->mv_row_min); |
michael@0 | 1514 | row_max = MIN(row_max, x->mv_row_max); |
michael@0 | 1515 | |
michael@0 | 1516 | for (r = row_min; r < row_max; r++) { |
michael@0 | 1517 | this_mv.as_mv.row = r; |
michael@0 | 1518 | check_here = r * mv_stride + in_what + col_min; |
michael@0 | 1519 | |
michael@0 | 1520 | for (c = col_min; c < col_max; c++) { |
michael@0 | 1521 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1522 | bestsad); |
michael@0 | 1523 | |
michael@0 | 1524 | this_mv.as_mv.col = c; |
michael@0 | 1525 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1526 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1527 | |
michael@0 | 1528 | if (thissad < bestsad) { |
michael@0 | 1529 | bestsad = thissad; |
michael@0 | 1530 | best_mv->as_mv.row = r; |
michael@0 | 1531 | best_mv->as_mv.col = c; |
michael@0 | 1532 | bestaddress = check_here; |
michael@0 | 1533 | } |
michael@0 | 1534 | |
michael@0 | 1535 | check_here++; |
michael@0 | 1536 | } |
michael@0 | 1537 | } |
michael@0 | 1538 | |
michael@0 | 1539 | this_mv.as_mv.row = best_mv->as_mv.row * 8; |
michael@0 | 1540 | this_mv.as_mv.col = best_mv->as_mv.col * 8; |
michael@0 | 1541 | |
michael@0 | 1542 | if (bestsad < INT_MAX) |
michael@0 | 1543 | return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
michael@0 | 1544 | (unsigned int *)(&thissad)) + |
michael@0 | 1545 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1546 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1547 | else |
michael@0 | 1548 | return INT_MAX; |
michael@0 | 1549 | } |
michael@0 | 1550 | |
michael@0 | 1551 | int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, |
michael@0 | 1552 | int sad_per_bit, int distance, |
michael@0 | 1553 | vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, |
michael@0 | 1554 | int *mvcost[2], int_mv *center_mv, int n) { |
michael@0 | 1555 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1556 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1557 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1558 | uint8_t *in_what; |
michael@0 | 1559 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1560 | int mv_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1561 | uint8_t *bestaddress; |
michael@0 | 1562 | int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
michael@0 | 1563 | int_mv this_mv; |
michael@0 | 1564 | unsigned int bestsad = INT_MAX; |
michael@0 | 1565 | int r, c; |
michael@0 | 1566 | |
michael@0 | 1567 | uint8_t *check_here; |
michael@0 | 1568 | unsigned int thissad; |
michael@0 | 1569 | |
michael@0 | 1570 | int ref_row = ref_mv->as_mv.row; |
michael@0 | 1571 | int ref_col = ref_mv->as_mv.col; |
michael@0 | 1572 | |
michael@0 | 1573 | int row_min = ref_row - distance; |
michael@0 | 1574 | int row_max = ref_row + distance; |
michael@0 | 1575 | int col_min = ref_col - distance; |
michael@0 | 1576 | int col_max = ref_col + distance; |
michael@0 | 1577 | |
michael@0 | 1578 | unsigned int sad_array[3]; |
michael@0 | 1579 | int_mv fcenter_mv; |
michael@0 | 1580 | |
michael@0 | 1581 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1582 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1583 | |
michael@0 | 1584 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1585 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1586 | |
michael@0 | 1587 | // Work out the mid point for the search |
michael@0 | 1588 | in_what = xd->plane[0].pre[0].buf; |
michael@0 | 1589 | bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
michael@0 | 1590 | |
michael@0 | 1591 | best_mv->as_mv.row = ref_row; |
michael@0 | 1592 | best_mv->as_mv.col = ref_col; |
michael@0 | 1593 | |
michael@0 | 1594 | // Baseline value at the centre |
michael@0 | 1595 | bestsad = fn_ptr->sdf(what, what_stride, |
michael@0 | 1596 | bestaddress, in_what_stride, 0x7fffffff) |
michael@0 | 1597 | + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1598 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1599 | |
michael@0 | 1600 | // Apply further limits to prevent us looking using vectors that stretch |
michael@0 | 1601 | // beyond the UMV border |
michael@0 | 1602 | col_min = MAX(col_min, x->mv_col_min); |
michael@0 | 1603 | col_max = MIN(col_max, x->mv_col_max); |
michael@0 | 1604 | row_min = MAX(row_min, x->mv_row_min); |
michael@0 | 1605 | row_max = MIN(row_max, x->mv_row_max); |
michael@0 | 1606 | |
michael@0 | 1607 | for (r = row_min; r < row_max; r++) { |
michael@0 | 1608 | this_mv.as_mv.row = r; |
michael@0 | 1609 | check_here = r * mv_stride + in_what + col_min; |
michael@0 | 1610 | c = col_min; |
michael@0 | 1611 | |
michael@0 | 1612 | while ((c + 2) < col_max) { |
michael@0 | 1613 | int i; |
michael@0 | 1614 | |
michael@0 | 1615 | fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
michael@0 | 1616 | |
michael@0 | 1617 | for (i = 0; i < 3; i++) { |
michael@0 | 1618 | thissad = sad_array[i]; |
michael@0 | 1619 | |
michael@0 | 1620 | if (thissad < bestsad) { |
michael@0 | 1621 | this_mv.as_mv.col = c; |
michael@0 | 1622 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1623 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1624 | |
michael@0 | 1625 | if (thissad < bestsad) { |
michael@0 | 1626 | bestsad = thissad; |
michael@0 | 1627 | best_mv->as_mv.row = r; |
michael@0 | 1628 | best_mv->as_mv.col = c; |
michael@0 | 1629 | bestaddress = check_here; |
michael@0 | 1630 | } |
michael@0 | 1631 | } |
michael@0 | 1632 | |
michael@0 | 1633 | check_here++; |
michael@0 | 1634 | c++; |
michael@0 | 1635 | } |
michael@0 | 1636 | } |
michael@0 | 1637 | |
michael@0 | 1638 | while (c < col_max) { |
michael@0 | 1639 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1640 | bestsad); |
michael@0 | 1641 | |
michael@0 | 1642 | if (thissad < bestsad) { |
michael@0 | 1643 | this_mv.as_mv.col = c; |
michael@0 | 1644 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1645 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1646 | |
michael@0 | 1647 | if (thissad < bestsad) { |
michael@0 | 1648 | bestsad = thissad; |
michael@0 | 1649 | best_mv->as_mv.row = r; |
michael@0 | 1650 | best_mv->as_mv.col = c; |
michael@0 | 1651 | bestaddress = check_here; |
michael@0 | 1652 | } |
michael@0 | 1653 | } |
michael@0 | 1654 | |
michael@0 | 1655 | check_here++; |
michael@0 | 1656 | c++; |
michael@0 | 1657 | } |
michael@0 | 1658 | } |
michael@0 | 1659 | |
michael@0 | 1660 | this_mv.as_mv.row = best_mv->as_mv.row * 8; |
michael@0 | 1661 | this_mv.as_mv.col = best_mv->as_mv.col * 8; |
michael@0 | 1662 | |
michael@0 | 1663 | if (bestsad < INT_MAX) |
michael@0 | 1664 | return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
michael@0 | 1665 | (unsigned int *)(&thissad)) + |
michael@0 | 1666 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1667 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1668 | else |
michael@0 | 1669 | return INT_MAX; |
michael@0 | 1670 | } |
michael@0 | 1671 | |
michael@0 | 1672 | int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, |
michael@0 | 1673 | int sad_per_bit, int distance, |
michael@0 | 1674 | vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 1675 | int *mvjcost, int *mvcost[2], |
michael@0 | 1676 | int_mv *center_mv, int n) { |
michael@0 | 1677 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1678 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1679 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1680 | uint8_t *in_what; |
michael@0 | 1681 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1682 | int mv_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1683 | uint8_t *bestaddress; |
michael@0 | 1684 | int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; |
michael@0 | 1685 | int_mv this_mv; |
michael@0 | 1686 | unsigned int bestsad = INT_MAX; |
michael@0 | 1687 | int r, c; |
michael@0 | 1688 | |
michael@0 | 1689 | uint8_t *check_here; |
michael@0 | 1690 | unsigned int thissad; |
michael@0 | 1691 | |
michael@0 | 1692 | int ref_row = ref_mv->as_mv.row; |
michael@0 | 1693 | int ref_col = ref_mv->as_mv.col; |
michael@0 | 1694 | |
michael@0 | 1695 | int row_min = ref_row - distance; |
michael@0 | 1696 | int row_max = ref_row + distance; |
michael@0 | 1697 | int col_min = ref_col - distance; |
michael@0 | 1698 | int col_max = ref_col + distance; |
michael@0 | 1699 | |
michael@0 | 1700 | DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); |
michael@0 | 1701 | unsigned int sad_array[3]; |
michael@0 | 1702 | int_mv fcenter_mv; |
michael@0 | 1703 | |
michael@0 | 1704 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1705 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1706 | |
michael@0 | 1707 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1708 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1709 | |
michael@0 | 1710 | // Work out the mid point for the search |
michael@0 | 1711 | in_what = xd->plane[0].pre[0].buf; |
michael@0 | 1712 | bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; |
michael@0 | 1713 | |
michael@0 | 1714 | best_mv->as_mv.row = ref_row; |
michael@0 | 1715 | best_mv->as_mv.col = ref_col; |
michael@0 | 1716 | |
michael@0 | 1717 | // Baseline value at the centre |
michael@0 | 1718 | bestsad = fn_ptr->sdf(what, what_stride, |
michael@0 | 1719 | bestaddress, in_what_stride, 0x7fffffff) |
michael@0 | 1720 | + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1721 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1722 | |
michael@0 | 1723 | // Apply further limits to prevent us looking using vectors that stretch |
michael@0 | 1724 | // beyond the UMV border |
michael@0 | 1725 | col_min = MAX(col_min, x->mv_col_min); |
michael@0 | 1726 | col_max = MIN(col_max, x->mv_col_max); |
michael@0 | 1727 | row_min = MAX(row_min, x->mv_row_min); |
michael@0 | 1728 | row_max = MIN(row_max, x->mv_row_max); |
michael@0 | 1729 | |
michael@0 | 1730 | for (r = row_min; r < row_max; r++) { |
michael@0 | 1731 | this_mv.as_mv.row = r; |
michael@0 | 1732 | check_here = r * mv_stride + in_what + col_min; |
michael@0 | 1733 | c = col_min; |
michael@0 | 1734 | |
michael@0 | 1735 | while ((c + 7) < col_max) { |
michael@0 | 1736 | int i; |
michael@0 | 1737 | |
michael@0 | 1738 | fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); |
michael@0 | 1739 | |
michael@0 | 1740 | for (i = 0; i < 8; i++) { |
michael@0 | 1741 | thissad = (unsigned int)sad_array8[i]; |
michael@0 | 1742 | |
michael@0 | 1743 | if (thissad < bestsad) { |
michael@0 | 1744 | this_mv.as_mv.col = c; |
michael@0 | 1745 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1746 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1747 | |
michael@0 | 1748 | if (thissad < bestsad) { |
michael@0 | 1749 | bestsad = thissad; |
michael@0 | 1750 | best_mv->as_mv.row = r; |
michael@0 | 1751 | best_mv->as_mv.col = c; |
michael@0 | 1752 | bestaddress = check_here; |
michael@0 | 1753 | } |
michael@0 | 1754 | } |
michael@0 | 1755 | |
michael@0 | 1756 | check_here++; |
michael@0 | 1757 | c++; |
michael@0 | 1758 | } |
michael@0 | 1759 | } |
michael@0 | 1760 | |
michael@0 | 1761 | while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { |
michael@0 | 1762 | int i; |
michael@0 | 1763 | |
michael@0 | 1764 | fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); |
michael@0 | 1765 | |
michael@0 | 1766 | for (i = 0; i < 3; i++) { |
michael@0 | 1767 | thissad = sad_array[i]; |
michael@0 | 1768 | |
michael@0 | 1769 | if (thissad < bestsad) { |
michael@0 | 1770 | this_mv.as_mv.col = c; |
michael@0 | 1771 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1772 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1773 | |
michael@0 | 1774 | if (thissad < bestsad) { |
michael@0 | 1775 | bestsad = thissad; |
michael@0 | 1776 | best_mv->as_mv.row = r; |
michael@0 | 1777 | best_mv->as_mv.col = c; |
michael@0 | 1778 | bestaddress = check_here; |
michael@0 | 1779 | } |
michael@0 | 1780 | } |
michael@0 | 1781 | |
michael@0 | 1782 | check_here++; |
michael@0 | 1783 | c++; |
michael@0 | 1784 | } |
michael@0 | 1785 | } |
michael@0 | 1786 | |
michael@0 | 1787 | while (c < col_max) { |
michael@0 | 1788 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1789 | bestsad); |
michael@0 | 1790 | |
michael@0 | 1791 | if (thissad < bestsad) { |
michael@0 | 1792 | this_mv.as_mv.col = c; |
michael@0 | 1793 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1794 | mvjsadcost, mvsadcost, sad_per_bit); |
michael@0 | 1795 | |
michael@0 | 1796 | if (thissad < bestsad) { |
michael@0 | 1797 | bestsad = thissad; |
michael@0 | 1798 | best_mv->as_mv.row = r; |
michael@0 | 1799 | best_mv->as_mv.col = c; |
michael@0 | 1800 | bestaddress = check_here; |
michael@0 | 1801 | } |
michael@0 | 1802 | } |
michael@0 | 1803 | |
michael@0 | 1804 | check_here++; |
michael@0 | 1805 | c++; |
michael@0 | 1806 | } |
michael@0 | 1807 | } |
michael@0 | 1808 | |
michael@0 | 1809 | this_mv.as_mv.row = best_mv->as_mv.row * 8; |
michael@0 | 1810 | this_mv.as_mv.col = best_mv->as_mv.col * 8; |
michael@0 | 1811 | |
michael@0 | 1812 | if (bestsad < INT_MAX) |
michael@0 | 1813 | return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, |
michael@0 | 1814 | (unsigned int *)(&thissad)) + |
michael@0 | 1815 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1816 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1817 | else |
michael@0 | 1818 | return INT_MAX; |
michael@0 | 1819 | } |
michael@0 | 1820 | int vp9_refining_search_sad_c(MACROBLOCK *x, |
michael@0 | 1821 | int_mv *ref_mv, int error_per_bit, |
michael@0 | 1822 | int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 1823 | int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
michael@0 | 1824 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1825 | MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
michael@0 | 1826 | int i, j; |
michael@0 | 1827 | int this_row_offset, this_col_offset; |
michael@0 | 1828 | |
michael@0 | 1829 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1830 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1831 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1832 | uint8_t *best_address = xd->plane[0].pre[0].buf + |
michael@0 | 1833 | (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
michael@0 | 1834 | ref_mv->as_mv.col; |
michael@0 | 1835 | uint8_t *check_here; |
michael@0 | 1836 | unsigned int thissad; |
michael@0 | 1837 | int_mv this_mv; |
michael@0 | 1838 | unsigned int bestsad = INT_MAX; |
michael@0 | 1839 | int_mv fcenter_mv; |
michael@0 | 1840 | |
michael@0 | 1841 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1842 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1843 | |
michael@0 | 1844 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1845 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1846 | |
michael@0 | 1847 | bestsad = fn_ptr->sdf(what, what_stride, best_address, |
michael@0 | 1848 | in_what_stride, 0x7fffffff) + |
michael@0 | 1849 | mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1850 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 1851 | |
michael@0 | 1852 | for (i = 0; i < search_range; i++) { |
michael@0 | 1853 | int best_site = -1; |
michael@0 | 1854 | |
michael@0 | 1855 | for (j = 0; j < 4; j++) { |
michael@0 | 1856 | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
michael@0 | 1857 | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
michael@0 | 1858 | |
michael@0 | 1859 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1860 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1861 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1862 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1863 | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
michael@0 | 1864 | best_address; |
michael@0 | 1865 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1866 | bestsad); |
michael@0 | 1867 | |
michael@0 | 1868 | if (thissad < bestsad) { |
michael@0 | 1869 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1870 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1871 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1872 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 1873 | |
michael@0 | 1874 | if (thissad < bestsad) { |
michael@0 | 1875 | bestsad = thissad; |
michael@0 | 1876 | best_site = j; |
michael@0 | 1877 | } |
michael@0 | 1878 | } |
michael@0 | 1879 | } |
michael@0 | 1880 | } |
michael@0 | 1881 | |
michael@0 | 1882 | if (best_site == -1) { |
michael@0 | 1883 | break; |
michael@0 | 1884 | } else { |
michael@0 | 1885 | ref_mv->as_mv.row += neighbors[best_site].row; |
michael@0 | 1886 | ref_mv->as_mv.col += neighbors[best_site].col; |
michael@0 | 1887 | best_address += (neighbors[best_site].row) * in_what_stride + |
michael@0 | 1888 | neighbors[best_site].col; |
michael@0 | 1889 | } |
michael@0 | 1890 | } |
michael@0 | 1891 | |
michael@0 | 1892 | this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
michael@0 | 1893 | this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
michael@0 | 1894 | |
michael@0 | 1895 | if (bestsad < INT_MAX) |
michael@0 | 1896 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
michael@0 | 1897 | (unsigned int *)(&thissad)) + |
michael@0 | 1898 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 1899 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 1900 | else |
michael@0 | 1901 | return INT_MAX; |
michael@0 | 1902 | } |
michael@0 | 1903 | |
michael@0 | 1904 | int vp9_refining_search_sadx4(MACROBLOCK *x, |
michael@0 | 1905 | int_mv *ref_mv, int error_per_bit, |
michael@0 | 1906 | int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 1907 | int *mvjcost, int *mvcost[2], int_mv *center_mv) { |
michael@0 | 1908 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 1909 | MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; |
michael@0 | 1910 | int i, j; |
michael@0 | 1911 | int this_row_offset, this_col_offset; |
michael@0 | 1912 | |
michael@0 | 1913 | int what_stride = x->plane[0].src.stride; |
michael@0 | 1914 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 1915 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 1916 | uint8_t *best_address = xd->plane[0].pre[0].buf + |
michael@0 | 1917 | (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
michael@0 | 1918 | ref_mv->as_mv.col; |
michael@0 | 1919 | uint8_t *check_here; |
michael@0 | 1920 | unsigned int thissad; |
michael@0 | 1921 | int_mv this_mv; |
michael@0 | 1922 | unsigned int bestsad = INT_MAX; |
michael@0 | 1923 | int_mv fcenter_mv; |
michael@0 | 1924 | |
michael@0 | 1925 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 1926 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 1927 | |
michael@0 | 1928 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 1929 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 1930 | |
michael@0 | 1931 | bestsad = fn_ptr->sdf(what, what_stride, best_address, |
michael@0 | 1932 | in_what_stride, 0x7fffffff) + |
michael@0 | 1933 | mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 1934 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 1935 | |
michael@0 | 1936 | for (i = 0; i < search_range; i++) { |
michael@0 | 1937 | int best_site = -1; |
michael@0 | 1938 | int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & |
michael@0 | 1939 | ((ref_mv->as_mv.row + 1) < x->mv_row_max) & |
michael@0 | 1940 | ((ref_mv->as_mv.col - 1) > x->mv_col_min) & |
michael@0 | 1941 | ((ref_mv->as_mv.col + 1) < x->mv_col_max); |
michael@0 | 1942 | |
michael@0 | 1943 | if (all_in) { |
michael@0 | 1944 | unsigned int sad_array[4]; |
michael@0 | 1945 | unsigned char const *block_offset[4]; |
michael@0 | 1946 | block_offset[0] = best_address - in_what_stride; |
michael@0 | 1947 | block_offset[1] = best_address - 1; |
michael@0 | 1948 | block_offset[2] = best_address + 1; |
michael@0 | 1949 | block_offset[3] = best_address + in_what_stride; |
michael@0 | 1950 | |
michael@0 | 1951 | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
michael@0 | 1952 | sad_array); |
michael@0 | 1953 | |
michael@0 | 1954 | for (j = 0; j < 4; j++) { |
michael@0 | 1955 | if (sad_array[j] < bestsad) { |
michael@0 | 1956 | this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; |
michael@0 | 1957 | this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; |
michael@0 | 1958 | sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1959 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 1960 | |
michael@0 | 1961 | if (sad_array[j] < bestsad) { |
michael@0 | 1962 | bestsad = sad_array[j]; |
michael@0 | 1963 | best_site = j; |
michael@0 | 1964 | } |
michael@0 | 1965 | } |
michael@0 | 1966 | } |
michael@0 | 1967 | } else { |
michael@0 | 1968 | for (j = 0; j < 4; j++) { |
michael@0 | 1969 | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
michael@0 | 1970 | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
michael@0 | 1971 | |
michael@0 | 1972 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 1973 | (this_col_offset < x->mv_col_max) && |
michael@0 | 1974 | (this_row_offset > x->mv_row_min) && |
michael@0 | 1975 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 1976 | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
michael@0 | 1977 | best_address; |
michael@0 | 1978 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, |
michael@0 | 1979 | bestsad); |
michael@0 | 1980 | |
michael@0 | 1981 | if (thissad < bestsad) { |
michael@0 | 1982 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 1983 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 1984 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 1985 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 1986 | |
michael@0 | 1987 | if (thissad < bestsad) { |
michael@0 | 1988 | bestsad = thissad; |
michael@0 | 1989 | best_site = j; |
michael@0 | 1990 | } |
michael@0 | 1991 | } |
michael@0 | 1992 | } |
michael@0 | 1993 | } |
michael@0 | 1994 | } |
michael@0 | 1995 | |
michael@0 | 1996 | if (best_site == -1) { |
michael@0 | 1997 | break; |
michael@0 | 1998 | } else { |
michael@0 | 1999 | ref_mv->as_mv.row += neighbors[best_site].row; |
michael@0 | 2000 | ref_mv->as_mv.col += neighbors[best_site].col; |
michael@0 | 2001 | best_address += (neighbors[best_site].row) * in_what_stride + |
michael@0 | 2002 | neighbors[best_site].col; |
michael@0 | 2003 | } |
michael@0 | 2004 | } |
michael@0 | 2005 | |
michael@0 | 2006 | this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
michael@0 | 2007 | this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
michael@0 | 2008 | |
michael@0 | 2009 | if (bestsad < INT_MAX) |
michael@0 | 2010 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, |
michael@0 | 2011 | (unsigned int *)(&thissad)) + |
michael@0 | 2012 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 2013 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 2014 | else |
michael@0 | 2015 | return INT_MAX; |
michael@0 | 2016 | } |
michael@0 | 2017 | |
michael@0 | 2018 | /* This function is called when we do joint motion search in comp_inter_inter |
michael@0 | 2019 | * mode. |
michael@0 | 2020 | */ |
michael@0 | 2021 | int vp9_refining_search_8p_c(MACROBLOCK *x, |
michael@0 | 2022 | int_mv *ref_mv, int error_per_bit, |
michael@0 | 2023 | int search_range, vp9_variance_fn_ptr_t *fn_ptr, |
michael@0 | 2024 | int *mvjcost, int *mvcost[2], int_mv *center_mv, |
michael@0 | 2025 | const uint8_t *second_pred, int w, int h) { |
michael@0 | 2026 | const MACROBLOCKD* const xd = &x->e_mbd; |
michael@0 | 2027 | MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, |
michael@0 | 2028 | {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; |
michael@0 | 2029 | int i, j; |
michael@0 | 2030 | int this_row_offset, this_col_offset; |
michael@0 | 2031 | |
michael@0 | 2032 | int what_stride = x->plane[0].src.stride; |
michael@0 | 2033 | int in_what_stride = xd->plane[0].pre[0].stride; |
michael@0 | 2034 | uint8_t *what = x->plane[0].src.buf; |
michael@0 | 2035 | uint8_t *best_address = xd->plane[0].pre[0].buf + |
michael@0 | 2036 | (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + |
michael@0 | 2037 | ref_mv->as_mv.col; |
michael@0 | 2038 | uint8_t *check_here; |
michael@0 | 2039 | unsigned int thissad; |
michael@0 | 2040 | int_mv this_mv; |
michael@0 | 2041 | unsigned int bestsad = INT_MAX; |
michael@0 | 2042 | int_mv fcenter_mv; |
michael@0 | 2043 | |
michael@0 | 2044 | int *mvjsadcost = x->nmvjointsadcost; |
michael@0 | 2045 | int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; |
michael@0 | 2046 | |
michael@0 | 2047 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
michael@0 | 2048 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
michael@0 | 2049 | |
michael@0 | 2050 | /* Get compound pred by averaging two pred blocks. */ |
michael@0 | 2051 | bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, |
michael@0 | 2052 | second_pred, 0x7fffffff) + |
michael@0 | 2053 | mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, |
michael@0 | 2054 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 2055 | |
michael@0 | 2056 | for (i = 0; i < search_range; i++) { |
michael@0 | 2057 | int best_site = -1; |
michael@0 | 2058 | |
michael@0 | 2059 | for (j = 0; j < 8; j++) { |
michael@0 | 2060 | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
michael@0 | 2061 | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
michael@0 | 2062 | |
michael@0 | 2063 | if ((this_col_offset > x->mv_col_min) && |
michael@0 | 2064 | (this_col_offset < x->mv_col_max) && |
michael@0 | 2065 | (this_row_offset > x->mv_row_min) && |
michael@0 | 2066 | (this_row_offset < x->mv_row_max)) { |
michael@0 | 2067 | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
michael@0 | 2068 | best_address; |
michael@0 | 2069 | |
michael@0 | 2070 | /* Get compound block and use it to calculate SAD. */ |
michael@0 | 2071 | thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, |
michael@0 | 2072 | second_pred, bestsad); |
michael@0 | 2073 | |
michael@0 | 2074 | if (thissad < bestsad) { |
michael@0 | 2075 | this_mv.as_mv.row = this_row_offset; |
michael@0 | 2076 | this_mv.as_mv.col = this_col_offset; |
michael@0 | 2077 | thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, |
michael@0 | 2078 | mvjsadcost, mvsadcost, error_per_bit); |
michael@0 | 2079 | if (thissad < bestsad) { |
michael@0 | 2080 | bestsad = thissad; |
michael@0 | 2081 | best_site = j; |
michael@0 | 2082 | } |
michael@0 | 2083 | } |
michael@0 | 2084 | } |
michael@0 | 2085 | } |
michael@0 | 2086 | |
michael@0 | 2087 | if (best_site == -1) { |
michael@0 | 2088 | break; |
michael@0 | 2089 | } else { |
michael@0 | 2090 | ref_mv->as_mv.row += neighbors[best_site].row; |
michael@0 | 2091 | ref_mv->as_mv.col += neighbors[best_site].col; |
michael@0 | 2092 | best_address += (neighbors[best_site].row) * in_what_stride + |
michael@0 | 2093 | neighbors[best_site].col; |
michael@0 | 2094 | } |
michael@0 | 2095 | } |
michael@0 | 2096 | |
michael@0 | 2097 | this_mv.as_mv.row = ref_mv->as_mv.row * 8; |
michael@0 | 2098 | this_mv.as_mv.col = ref_mv->as_mv.col * 8; |
michael@0 | 2099 | |
michael@0 | 2100 | if (bestsad < INT_MAX) { |
michael@0 | 2101 | // FIXME(rbultje, yunqing): add full-pixel averaging variance functions |
michael@0 | 2102 | // so we don't have to use the subpixel with xoff=0,yoff=0 here. |
michael@0 | 2103 | return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, |
michael@0 | 2104 | (unsigned int *)(&thissad), second_pred) + |
michael@0 | 2105 | mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, |
michael@0 | 2106 | mvjcost, mvcost, x->errorperbit); |
michael@0 | 2107 | } else { |
michael@0 | 2108 | return INT_MAX; |
michael@0 | 2109 | } |
michael@0 | 2110 | } |