1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/common/mfqe.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,385 @@ 1.4 +/* 1.5 + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 + 1.15 +/* MFQE: Multiframe Quality Enhancement 1.16 + * In rate limited situations keyframes may cause significant visual artifacts 1.17 + * commonly referred to as "popping." This file implements a postproccesing 1.18 + * algorithm which blends data from the preceeding frame when there is no 1.19 + * motion and the q from the previous frame is lower which indicates that it is 1.20 + * higher quality. 1.21 + */ 1.22 + 1.23 +#include "postproc.h" 1.24 +#include "variance.h" 1.25 +#include "vpx_mem/vpx_mem.h" 1.26 +#include "vp8_rtcd.h" 1.27 +#include "vpx_scale/yv12config.h" 1.28 + 1.29 +#include <limits.h> 1.30 +#include <stdlib.h> 1.31 + 1.32 +static void filter_by_weight(unsigned char *src, int src_stride, 1.33 + unsigned char *dst, int dst_stride, 1.34 + int block_size, int src_weight) 1.35 +{ 1.36 + int dst_weight = (1 << MFQE_PRECISION) - src_weight; 1.37 + int rounding_bit = 1 << (MFQE_PRECISION - 1); 1.38 + int r, c; 1.39 + 1.40 + for (r = 0; r < block_size; r++) 1.41 + { 1.42 + for (c = 0; c < block_size; c++) 1.43 + { 1.44 + dst[c] = (src[c] * src_weight + 1.45 + dst[c] * dst_weight + 1.46 + rounding_bit) >> MFQE_PRECISION; 1.47 + } 1.48 + src += src_stride; 1.49 + dst += dst_stride; 1.50 + } 1.51 +} 1.52 + 1.53 +void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, 1.54 + unsigned char *dst, int dst_stride, 1.55 + int src_weight) 1.56 +{ 1.57 + filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); 1.58 +} 1.59 + 1.60 +void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, 1.61 + unsigned char *dst, int dst_stride, 1.62 + int src_weight) 1.63 +{ 1.64 + filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); 1.65 +} 1.66 + 1.67 +void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, 1.68 + unsigned char *dst, int dst_stride, 1.69 + int src_weight) 1.70 +{ 1.71 + filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight); 1.72 +} 1.73 + 1.74 +static void apply_ifactor(unsigned char *y_src, 1.75 + int y_src_stride, 1.76 + unsigned char *y_dst, 1.77 + int y_dst_stride, 1.78 + unsigned char *u_src, 1.79 + unsigned char *v_src, 1.80 + int uv_src_stride, 1.81 + unsigned char *u_dst, 1.82 + unsigned char *v_dst, 1.83 + int uv_dst_stride, 1.84 + int block_size, 1.85 + int src_weight) 1.86 +{ 1.87 + if (block_size == 16) 1.88 + { 1.89 + vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); 1.90 + vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); 1.91 + vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); 1.92 + } 1.93 + else /* if (block_size == 8) */ 1.94 + { 1.95 + vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); 1.96 + vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); 1.97 + vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); 1.98 + } 1.99 +} 1.100 + 1.101 +static unsigned int int_sqrt(unsigned int x) 1.102 +{ 1.103 + unsigned int y = x; 1.104 + unsigned int guess; 1.105 + int p = 1; 1.106 + while (y>>=1) p++; 1.107 + p>>=1; 1.108 + 1.109 + guess=0; 1.110 + while (p>=0) 1.111 + { 1.112 + guess |= (1<<p); 1.113 + if (x<guess*guess) 1.114 + guess -= (1<<p); 1.115 + p--; 1.116 + } 1.117 + /* choose between guess or guess+1 */ 1.118 + return guess+(guess*guess+guess+1<=x); 1.119 +} 1.120 + 1.121 +#define USE_SSD 1.122 +static void multiframe_quality_enhance_block 1.123 +( 1.124 + int blksize, /* Currently only values supported are 16, 8 */ 1.125 + int qcurr, 1.126 + int qprev, 1.127 + unsigned char *y, 1.128 + unsigned char *u, 1.129 + unsigned char *v, 1.130 + int y_stride, 1.131 + int uv_stride, 1.132 + unsigned char *yd, 1.133 + unsigned char *ud, 1.134 + unsigned char *vd, 1.135 + int yd_stride, 1.136 + int uvd_stride 1.137 +) 1.138 +{ 1.139 + static const unsigned char VP8_ZEROS[16]= 1.140 + { 1.141 + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 1.142 + }; 1.143 + int uvblksize = blksize >> 1; 1.144 + int qdiff = qcurr - qprev; 1.145 + 1.146 + int i; 1.147 + unsigned char *up; 1.148 + unsigned char *udp; 1.149 + unsigned char *vp; 1.150 + unsigned char *vdp; 1.151 + 1.152 + unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk; 1.153 + 1.154 + if (blksize == 16) 1.155 + { 1.156 + actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8; 1.157 + act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8; 1.158 +#ifdef USE_SSD 1.159 + sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse)); 1.160 + sad = (sse + 128)>>8; 1.161 + usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse)); 1.162 + usad = (sse + 32)>>6; 1.163 + vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse)); 1.164 + vsad = (sse + 32)>>6; 1.165 +#else 1.166 + sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8; 1.167 + usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6; 1.168 + vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6; 1.169 +#endif 1.170 + } 1.171 + else /* if (blksize == 8) */ 1.172 + { 1.173 + actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6; 1.174 + act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6; 1.175 +#ifdef USE_SSD 1.176 + sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse)); 1.177 + sad = (sse + 32)>>6; 1.178 + usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse)); 1.179 + usad = (sse + 8)>>4; 1.180 + vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse)); 1.181 + vsad = (sse + 8)>>4; 1.182 +#else 1.183 + sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6; 1.184 + usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4; 1.185 + vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4; 1.186 +#endif 1.187 + } 1.188 + 1.189 + actrisk = (actd > act * 5); 1.190 + 1.191 + /* thr = qdiff/16 + log2(act) + log4(qprev) */ 1.192 + thr = (qdiff >> 4); 1.193 + while (actd >>= 1) thr++; 1.194 + while (qprev >>= 2) thr++; 1.195 + 1.196 +#ifdef USE_SSD 1.197 + thrsq = thr * thr; 1.198 + if (sad < thrsq && 1.199 + /* additional checks for color mismatch and excessive addition of 1.200 + * high-frequencies */ 1.201 + 4 * usad < thrsq && 4 * vsad < thrsq && !actrisk) 1.202 +#else 1.203 + if (sad < thr && 1.204 + /* additional checks for color mismatch and excessive addition of 1.205 + * high-frequencies */ 1.206 + 2 * usad < thr && 2 * vsad < thr && !actrisk) 1.207 +#endif 1.208 + { 1.209 + int ifactor; 1.210 +#ifdef USE_SSD 1.211 + /* TODO: optimize this later to not need sqr root */ 1.212 + sad = int_sqrt(sad); 1.213 +#endif 1.214 + ifactor = (sad << MFQE_PRECISION) / thr; 1.215 + ifactor >>= (qdiff >> 5); 1.216 + 1.217 + if (ifactor) 1.218 + { 1.219 + apply_ifactor(y, y_stride, yd, yd_stride, 1.220 + u, v, uv_stride, 1.221 + ud, vd, uvd_stride, 1.222 + blksize, ifactor); 1.223 + } 1.224 + } 1.225 + else /* else implicitly copy from previous frame */ 1.226 + { 1.227 + if (blksize == 16) 1.228 + { 1.229 + vp8_copy_mem16x16(y, y_stride, yd, yd_stride); 1.230 + vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride); 1.231 + vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride); 1.232 + } 1.233 + else /* if (blksize == 8) */ 1.234 + { 1.235 + vp8_copy_mem8x8(y, y_stride, yd, yd_stride); 1.236 + for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride) 1.237 + vpx_memcpy(udp, up, uvblksize); 1.238 + for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride) 1.239 + vpx_memcpy(vdp, vp, uvblksize); 1.240 + } 1.241 + } 1.242 +} 1.243 + 1.244 +static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) 1.245 +{ 1.246 + if (mode_info_context->mbmi.mb_skip_coeff) 1.247 + map[0] = map[1] = map[2] = map[3] = 1; 1.248 + else if (mode_info_context->mbmi.mode==SPLITMV) 1.249 + { 1.250 + static int ndx[4][4] = 1.251 + { 1.252 + {0, 1, 4, 5}, 1.253 + {2, 3, 6, 7}, 1.254 + {8, 9, 12, 13}, 1.255 + {10, 11, 14, 15} 1.256 + }; 1.257 + int i, j; 1.258 + for (i=0; i<4; ++i) 1.259 + { 1.260 + map[i] = 1; 1.261 + for (j=0; j<4 && map[j]; ++j) 1.262 + map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 && 1.263 + mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2); 1.264 + } 1.265 + } 1.266 + else 1.267 + { 1.268 + map[0] = map[1] = map[2] = map[3] = 1.269 + (mode_info_context->mbmi.mode > B_PRED && 1.270 + abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 && 1.271 + abs(mode_info_context->mbmi.mv.as_mv.col) <= 2); 1.272 + } 1.273 + return (map[0]+map[1]+map[2]+map[3]); 1.274 +} 1.275 + 1.276 +void vp8_multiframe_quality_enhance 1.277 +( 1.278 + VP8_COMMON *cm 1.279 +) 1.280 +{ 1.281 + YV12_BUFFER_CONFIG *show = cm->frame_to_show; 1.282 + YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; 1.283 + 1.284 + FRAME_TYPE frame_type = cm->frame_type; 1.285 + /* Point at base of Mb MODE_INFO list has motion vectors etc */ 1.286 + const MODE_INFO *mode_info_context = cm->show_frame_mi; 1.287 + int mb_row; 1.288 + int mb_col; 1.289 + int totmap, map[4]; 1.290 + int qcurr = cm->base_qindex; 1.291 + int qprev = cm->postproc_state.last_base_qindex; 1.292 + 1.293 + unsigned char *y_ptr, *u_ptr, *v_ptr; 1.294 + unsigned char *yd_ptr, *ud_ptr, *vd_ptr; 1.295 + 1.296 + /* Set up the buffer pointers */ 1.297 + y_ptr = show->y_buffer; 1.298 + u_ptr = show->u_buffer; 1.299 + v_ptr = show->v_buffer; 1.300 + yd_ptr = dest->y_buffer; 1.301 + ud_ptr = dest->u_buffer; 1.302 + vd_ptr = dest->v_buffer; 1.303 + 1.304 + /* postprocess each macro block */ 1.305 + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) 1.306 + { 1.307 + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) 1.308 + { 1.309 + /* if motion is high there will likely be no benefit */ 1.310 + if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map); 1.311 + else totmap = (frame_type == KEY_FRAME ? 4 : 0); 1.312 + if (totmap) 1.313 + { 1.314 + if (totmap < 4) 1.315 + { 1.316 + int i, j; 1.317 + for (i=0; i<2; ++i) 1.318 + for (j=0; j<2; ++j) 1.319 + { 1.320 + if (map[i*2+j]) 1.321 + { 1.322 + multiframe_quality_enhance_block(8, qcurr, qprev, 1.323 + y_ptr + 8*(i*show->y_stride+j), 1.324 + u_ptr + 4*(i*show->uv_stride+j), 1.325 + v_ptr + 4*(i*show->uv_stride+j), 1.326 + show->y_stride, 1.327 + show->uv_stride, 1.328 + yd_ptr + 8*(i*dest->y_stride+j), 1.329 + ud_ptr + 4*(i*dest->uv_stride+j), 1.330 + vd_ptr + 4*(i*dest->uv_stride+j), 1.331 + dest->y_stride, 1.332 + dest->uv_stride); 1.333 + } 1.334 + else 1.335 + { 1.336 + /* copy a 8x8 block */ 1.337 + int k; 1.338 + unsigned char *up = u_ptr + 4*(i*show->uv_stride+j); 1.339 + unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j); 1.340 + unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j); 1.341 + unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j); 1.342 + vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride, 1.343 + yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride); 1.344 + for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride, 1.345 + vp += show->uv_stride, vdp += dest->uv_stride) 1.346 + { 1.347 + vpx_memcpy(udp, up, 4); 1.348 + vpx_memcpy(vdp, vp, 4); 1.349 + } 1.350 + } 1.351 + } 1.352 + } 1.353 + else /* totmap = 4 */ 1.354 + { 1.355 + multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr, 1.356 + u_ptr, v_ptr, 1.357 + show->y_stride, 1.358 + show->uv_stride, 1.359 + yd_ptr, ud_ptr, vd_ptr, 1.360 + dest->y_stride, 1.361 + dest->uv_stride); 1.362 + } 1.363 + } 1.364 + else 1.365 + { 1.366 + vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride); 1.367 + vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride); 1.368 + vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride); 1.369 + } 1.370 + y_ptr += 16; 1.371 + u_ptr += 8; 1.372 + v_ptr += 8; 1.373 + yd_ptr += 16; 1.374 + ud_ptr += 8; 1.375 + vd_ptr += 8; 1.376 + mode_info_context++; /* step to next MB */ 1.377 + } 1.378 + 1.379 + y_ptr += show->y_stride * 16 - 16 * cm->mb_cols; 1.380 + u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; 1.381 + v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; 1.382 + yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols; 1.383 + ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; 1.384 + vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; 1.385 + 1.386 + mode_info_context++; /* Skip border mb */ 1.387 + } 1.388 +}