media/libvpx/vp8/common/mfqe.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp8/common/mfqe.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,385 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS.  All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +
    1.15 +/* MFQE: Multiframe Quality Enhancement
    1.16 + * In rate limited situations keyframes may cause significant visual artifacts
    1.17 + * commonly referred to as "popping." This file implements a postproccesing
    1.18 + * algorithm which blends data from the preceeding frame when there is no
    1.19 + * motion and the q from the previous frame is lower which indicates that it is
    1.20 + * higher quality.
    1.21 + */
    1.22 +
    1.23 +#include "postproc.h"
    1.24 +#include "variance.h"
    1.25 +#include "vpx_mem/vpx_mem.h"
    1.26 +#include "vp8_rtcd.h"
    1.27 +#include "vpx_scale/yv12config.h"
    1.28 +
    1.29 +#include <limits.h>
    1.30 +#include <stdlib.h>
    1.31 +
    1.32 +static void filter_by_weight(unsigned char *src, int src_stride,
    1.33 +                             unsigned char *dst, int dst_stride,
    1.34 +                             int block_size, int src_weight)
    1.35 +{
    1.36 +    int dst_weight = (1 << MFQE_PRECISION) - src_weight;
    1.37 +    int rounding_bit = 1 << (MFQE_PRECISION - 1);
    1.38 +    int r, c;
    1.39 +
    1.40 +    for (r = 0; r < block_size; r++)
    1.41 +    {
    1.42 +        for (c = 0; c < block_size; c++)
    1.43 +        {
    1.44 +            dst[c] = (src[c] * src_weight +
    1.45 +                      dst[c] * dst_weight +
    1.46 +                      rounding_bit) >> MFQE_PRECISION;
    1.47 +        }
    1.48 +        src += src_stride;
    1.49 +        dst += dst_stride;
    1.50 +    }
    1.51 +}
    1.52 +
    1.53 +void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
    1.54 +                                 unsigned char *dst, int dst_stride,
    1.55 +                                 int src_weight)
    1.56 +{
    1.57 +    filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
    1.58 +}
    1.59 +
    1.60 +void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
    1.61 +                               unsigned char *dst, int dst_stride,
    1.62 +                               int src_weight)
    1.63 +{
    1.64 +    filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
    1.65 +}
    1.66 +
    1.67 +void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
    1.68 +                               unsigned char *dst, int dst_stride,
    1.69 +                               int src_weight)
    1.70 +{
    1.71 +    filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
    1.72 +}
    1.73 +
    1.74 +static void apply_ifactor(unsigned char *y_src,
    1.75 +                          int y_src_stride,
    1.76 +                          unsigned char *y_dst,
    1.77 +                          int y_dst_stride,
    1.78 +                          unsigned char *u_src,
    1.79 +                          unsigned char *v_src,
    1.80 +                          int uv_src_stride,
    1.81 +                          unsigned char *u_dst,
    1.82 +                          unsigned char *v_dst,
    1.83 +                          int uv_dst_stride,
    1.84 +                          int block_size,
    1.85 +                          int src_weight)
    1.86 +{
    1.87 +    if (block_size == 16)
    1.88 +    {
    1.89 +        vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
    1.90 +        vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
    1.91 +        vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
    1.92 +    }
    1.93 +    else /* if (block_size == 8) */
    1.94 +    {
    1.95 +        vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
    1.96 +        vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
    1.97 +        vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
    1.98 +    }
    1.99 +}
   1.100 +
   1.101 +static unsigned int int_sqrt(unsigned int x)
   1.102 +{
   1.103 +    unsigned int y = x;
   1.104 +    unsigned int guess;
   1.105 +    int p = 1;
   1.106 +    while (y>>=1) p++;
   1.107 +    p>>=1;
   1.108 +
   1.109 +    guess=0;
   1.110 +    while (p>=0)
   1.111 +    {
   1.112 +        guess |= (1<<p);
   1.113 +        if (x<guess*guess)
   1.114 +            guess -= (1<<p);
   1.115 +        p--;
   1.116 +    }
   1.117 +    /* choose between guess or guess+1 */
   1.118 +    return guess+(guess*guess+guess+1<=x);
   1.119 +}
   1.120 +
   1.121 +#define USE_SSD
   1.122 +static void multiframe_quality_enhance_block
   1.123 +(
   1.124 +    int blksize, /* Currently only values supported are 16, 8 */
   1.125 +    int qcurr,
   1.126 +    int qprev,
   1.127 +    unsigned char *y,
   1.128 +    unsigned char *u,
   1.129 +    unsigned char *v,
   1.130 +    int y_stride,
   1.131 +    int uv_stride,
   1.132 +    unsigned char *yd,
   1.133 +    unsigned char *ud,
   1.134 +    unsigned char *vd,
   1.135 +    int yd_stride,
   1.136 +    int uvd_stride
   1.137 +)
   1.138 +{
   1.139 +    static const unsigned char VP8_ZEROS[16]=
   1.140 +    {
   1.141 +         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
   1.142 +    };
   1.143 +    int uvblksize = blksize >> 1;
   1.144 +    int qdiff = qcurr - qprev;
   1.145 +
   1.146 +    int i;
   1.147 +    unsigned char *up;
   1.148 +    unsigned char *udp;
   1.149 +    unsigned char *vp;
   1.150 +    unsigned char *vdp;
   1.151 +
   1.152 +    unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
   1.153 +
   1.154 +    if (blksize == 16)
   1.155 +    {
   1.156 +        actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
   1.157 +        act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
   1.158 +#ifdef USE_SSD
   1.159 +        sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse));
   1.160 +        sad = (sse + 128)>>8;
   1.161 +        usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse));
   1.162 +        usad = (sse + 32)>>6;
   1.163 +        vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
   1.164 +        vsad = (sse + 32)>>6;
   1.165 +#else
   1.166 +        sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
   1.167 +        usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
   1.168 +        vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
   1.169 +#endif
   1.170 +    }
   1.171 +    else /* if (blksize == 8) */
   1.172 +    {
   1.173 +        actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
   1.174 +        act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
   1.175 +#ifdef USE_SSD
   1.176 +        sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse));
   1.177 +        sad = (sse + 32)>>6;
   1.178 +        usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse));
   1.179 +        usad = (sse + 8)>>4;
   1.180 +        vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
   1.181 +        vsad = (sse + 8)>>4;
   1.182 +#else
   1.183 +        sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
   1.184 +        usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
   1.185 +        vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
   1.186 +#endif
   1.187 +    }
   1.188 +
   1.189 +    actrisk = (actd > act * 5);
   1.190 +
   1.191 +    /* thr = qdiff/16 + log2(act) + log4(qprev) */
   1.192 +    thr = (qdiff >> 4);
   1.193 +    while (actd >>= 1) thr++;
   1.194 +    while (qprev >>= 2) thr++;
   1.195 +
   1.196 +#ifdef USE_SSD
   1.197 +    thrsq = thr * thr;
   1.198 +    if (sad < thrsq &&
   1.199 +        /* additional checks for color mismatch and excessive addition of
   1.200 +         * high-frequencies */
   1.201 +        4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
   1.202 +#else
   1.203 +    if (sad < thr &&
   1.204 +        /* additional checks for color mismatch and excessive addition of
   1.205 +         * high-frequencies */
   1.206 +        2 * usad < thr && 2 * vsad < thr && !actrisk)
   1.207 +#endif
   1.208 +    {
   1.209 +        int ifactor;
   1.210 +#ifdef USE_SSD
   1.211 +        /* TODO: optimize this later to not need sqr root */
   1.212 +        sad = int_sqrt(sad);
   1.213 +#endif
   1.214 +        ifactor = (sad << MFQE_PRECISION) / thr;
   1.215 +        ifactor >>= (qdiff >> 5);
   1.216 +
   1.217 +        if (ifactor)
   1.218 +        {
   1.219 +            apply_ifactor(y, y_stride, yd, yd_stride,
   1.220 +                          u, v, uv_stride,
   1.221 +                          ud, vd, uvd_stride,
   1.222 +                          blksize, ifactor);
   1.223 +        }
   1.224 +    }
   1.225 +    else  /* else implicitly copy from previous frame */
   1.226 +    {
   1.227 +        if (blksize == 16)
   1.228 +        {
   1.229 +            vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
   1.230 +            vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
   1.231 +            vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
   1.232 +        }
   1.233 +        else  /* if (blksize == 8) */
   1.234 +        {
   1.235 +            vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
   1.236 +            for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
   1.237 +                vpx_memcpy(udp, up, uvblksize);
   1.238 +            for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
   1.239 +                vpx_memcpy(vdp, vp, uvblksize);
   1.240 +        }
   1.241 +    }
   1.242 +}
   1.243 +
   1.244 +static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
   1.245 +{
   1.246 +    if (mode_info_context->mbmi.mb_skip_coeff)
   1.247 +        map[0] = map[1] = map[2] = map[3] = 1;
   1.248 +    else if (mode_info_context->mbmi.mode==SPLITMV)
   1.249 +    {
   1.250 +        static int ndx[4][4] =
   1.251 +        {
   1.252 +            {0, 1, 4, 5},
   1.253 +            {2, 3, 6, 7},
   1.254 +            {8, 9, 12, 13},
   1.255 +            {10, 11, 14, 15}
   1.256 +        };
   1.257 +        int i, j;
   1.258 +        for (i=0; i<4; ++i)
   1.259 +        {
   1.260 +            map[i] = 1;
   1.261 +            for (j=0; j<4 && map[j]; ++j)
   1.262 +                map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
   1.263 +                           mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
   1.264 +        }
   1.265 +    }
   1.266 +    else
   1.267 +    {
   1.268 +        map[0] = map[1] = map[2] = map[3] =
   1.269 +            (mode_info_context->mbmi.mode > B_PRED &&
   1.270 +             abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
   1.271 +             abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
   1.272 +    }
   1.273 +    return (map[0]+map[1]+map[2]+map[3]);
   1.274 +}
   1.275 +
   1.276 +void vp8_multiframe_quality_enhance
   1.277 +(
   1.278 +    VP8_COMMON *cm
   1.279 +)
   1.280 +{
   1.281 +    YV12_BUFFER_CONFIG *show = cm->frame_to_show;
   1.282 +    YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
   1.283 +
   1.284 +    FRAME_TYPE frame_type = cm->frame_type;
   1.285 +    /* Point at base of Mb MODE_INFO list has motion vectors etc */
   1.286 +    const MODE_INFO *mode_info_context = cm->show_frame_mi;
   1.287 +    int mb_row;
   1.288 +    int mb_col;
   1.289 +    int totmap, map[4];
   1.290 +    int qcurr = cm->base_qindex;
   1.291 +    int qprev = cm->postproc_state.last_base_qindex;
   1.292 +
   1.293 +    unsigned char *y_ptr, *u_ptr, *v_ptr;
   1.294 +    unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
   1.295 +
   1.296 +    /* Set up the buffer pointers */
   1.297 +    y_ptr = show->y_buffer;
   1.298 +    u_ptr = show->u_buffer;
   1.299 +    v_ptr = show->v_buffer;
   1.300 +    yd_ptr = dest->y_buffer;
   1.301 +    ud_ptr = dest->u_buffer;
   1.302 +    vd_ptr = dest->v_buffer;
   1.303 +
   1.304 +    /* postprocess each macro block */
   1.305 +    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
   1.306 +    {
   1.307 +        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
   1.308 +        {
   1.309 +            /* if motion is high there will likely be no benefit */
   1.310 +            if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
   1.311 +            else totmap = (frame_type == KEY_FRAME ? 4 : 0);
   1.312 +            if (totmap)
   1.313 +            {
   1.314 +                if (totmap < 4)
   1.315 +                {
   1.316 +                    int i, j;
   1.317 +                    for (i=0; i<2; ++i)
   1.318 +                        for (j=0; j<2; ++j)
   1.319 +                        {
   1.320 +                            if (map[i*2+j])
   1.321 +                            {
   1.322 +                                multiframe_quality_enhance_block(8, qcurr, qprev,
   1.323 +                                                                 y_ptr + 8*(i*show->y_stride+j),
   1.324 +                                                                 u_ptr + 4*(i*show->uv_stride+j),
   1.325 +                                                                 v_ptr + 4*(i*show->uv_stride+j),
   1.326 +                                                                 show->y_stride,
   1.327 +                                                                 show->uv_stride,
   1.328 +                                                                 yd_ptr + 8*(i*dest->y_stride+j),
   1.329 +                                                                 ud_ptr + 4*(i*dest->uv_stride+j),
   1.330 +                                                                 vd_ptr + 4*(i*dest->uv_stride+j),
   1.331 +                                                                 dest->y_stride,
   1.332 +                                                                 dest->uv_stride);
   1.333 +                            }
   1.334 +                            else
   1.335 +                            {
   1.336 +                                /* copy a 8x8 block */
   1.337 +                                int k;
   1.338 +                                unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
   1.339 +                                unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
   1.340 +                                unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
   1.341 +                                unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
   1.342 +                                vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
   1.343 +                                                yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
   1.344 +                                for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
   1.345 +                                                        vp += show->uv_stride, vdp += dest->uv_stride)
   1.346 +                                {
   1.347 +                                    vpx_memcpy(udp, up, 4);
   1.348 +                                    vpx_memcpy(vdp, vp, 4);
   1.349 +                                }
   1.350 +                            }
   1.351 +                        }
   1.352 +                }
   1.353 +                else /* totmap = 4 */
   1.354 +                {
   1.355 +                    multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
   1.356 +                                                     u_ptr, v_ptr,
   1.357 +                                                     show->y_stride,
   1.358 +                                                     show->uv_stride,
   1.359 +                                                     yd_ptr, ud_ptr, vd_ptr,
   1.360 +                                                     dest->y_stride,
   1.361 +                                                     dest->uv_stride);
   1.362 +                }
   1.363 +            }
   1.364 +            else
   1.365 +            {
   1.366 +                vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
   1.367 +                vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
   1.368 +                vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
   1.369 +            }
   1.370 +            y_ptr += 16;
   1.371 +            u_ptr += 8;
   1.372 +            v_ptr += 8;
   1.373 +            yd_ptr += 16;
   1.374 +            ud_ptr += 8;
   1.375 +            vd_ptr += 8;
   1.376 +            mode_info_context++;     /* step to next MB */
   1.377 +        }
   1.378 +
   1.379 +        y_ptr += show->y_stride  * 16 - 16 * cm->mb_cols;
   1.380 +        u_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
   1.381 +        v_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
   1.382 +        yd_ptr += dest->y_stride  * 16 - 16 * cm->mb_cols;
   1.383 +        ud_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
   1.384 +        vd_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
   1.385 +
   1.386 +        mode_info_context++;         /* Skip border mb */
   1.387 +    }
   1.388 +}

mercurial