media/libvpx/vp9/common/vp9_convolve.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp9/common/vp9_convolve.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,305 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS.  All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include <assert.h>
    1.15 +
    1.16 +#include "./vpx_config.h"
    1.17 +#include "./vp9_rtcd.h"
    1.18 +#include "vp9/common/vp9_common.h"
    1.19 +#include "vp9/common/vp9_convolve.h"
    1.20 +#include "vp9/common/vp9_filter.h"
    1.21 +#include "vpx/vpx_integer.h"
    1.22 +#include "vpx_ports/mem.h"
    1.23 +
    1.24 +static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    1.25 +                             uint8_t *dst, ptrdiff_t dst_stride,
    1.26 +                             const int16_t *filter_x0, int x_step_q4,
    1.27 +                             const int16_t *filter_y, int y_step_q4,
    1.28 +                             int w, int h, int taps) {
    1.29 +  int x, y, k;
    1.30 +
    1.31 +  /* NOTE: This assumes that the filter table is 256-byte aligned. */
    1.32 +  /* TODO(agrange) Modify to make independent of table alignment. */
    1.33 +  const int16_t *const filter_x_base =
    1.34 +      (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
    1.35 +
    1.36 +  /* Adjust base pointer address for this source line */
    1.37 +  src -= taps / 2 - 1;
    1.38 +
    1.39 +  for (y = 0; y < h; ++y) {
    1.40 +    /* Initial phase offset */
    1.41 +    int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
    1.42 +
    1.43 +    for (x = 0; x < w; ++x) {
    1.44 +      /* Per-pixel src offset */
    1.45 +      const int src_x = x_q4 >> SUBPEL_BITS;
    1.46 +      int sum = 0;
    1.47 +
    1.48 +      /* Pointer to filter to use */
    1.49 +      const int16_t *const filter_x = filter_x_base +
    1.50 +          (x_q4 & SUBPEL_MASK) * taps;
    1.51 +
    1.52 +      for (k = 0; k < taps; ++k)
    1.53 +        sum += src[src_x + k] * filter_x[k];
    1.54 +
    1.55 +      dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
    1.56 +
    1.57 +      /* Move to the next source pixel */
    1.58 +      x_q4 += x_step_q4;
    1.59 +    }
    1.60 +    src += src_stride;
    1.61 +    dst += dst_stride;
    1.62 +  }
    1.63 +}
    1.64 +
    1.65 +static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
    1.66 +                                 uint8_t *dst, ptrdiff_t dst_stride,
    1.67 +                                 const int16_t *filter_x0, int x_step_q4,
    1.68 +                                 const int16_t *filter_y, int y_step_q4,
    1.69 +                                 int w, int h, int taps) {
    1.70 +  int x, y, k;
    1.71 +
    1.72 +  /* NOTE: This assumes that the filter table is 256-byte aligned. */
    1.73 +  /* TODO(agrange) Modify to make independent of table alignment. */
    1.74 +  const int16_t *const filter_x_base =
    1.75 +      (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
    1.76 +
    1.77 +  /* Adjust base pointer address for this source line */
    1.78 +  src -= taps / 2 - 1;
    1.79 +
    1.80 +  for (y = 0; y < h; ++y) {
    1.81 +    /* Initial phase offset */
    1.82 +    int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
    1.83 +
    1.84 +    for (x = 0; x < w; ++x) {
    1.85 +      /* Per-pixel src offset */
    1.86 +      const int src_x = x_q4 >> SUBPEL_BITS;
    1.87 +      int sum = 0;
    1.88 +
    1.89 +      /* Pointer to filter to use */
    1.90 +      const int16_t *const filter_x = filter_x_base +
    1.91 +          (x_q4 & SUBPEL_MASK) * taps;
    1.92 +
    1.93 +      for (k = 0; k < taps; ++k)
    1.94 +        sum += src[src_x + k] * filter_x[k];
    1.95 +
    1.96 +      dst[x] = ROUND_POWER_OF_TWO(dst[x] +
    1.97 +                   clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
    1.98 +
    1.99 +      /* Move to the next source pixel */
   1.100 +      x_q4 += x_step_q4;
   1.101 +    }
   1.102 +    src += src_stride;
   1.103 +    dst += dst_stride;
   1.104 +  }
   1.105 +}
   1.106 +
   1.107 +static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
   1.108 +                            uint8_t *dst, ptrdiff_t dst_stride,
   1.109 +                            const int16_t *filter_x, int x_step_q4,
   1.110 +                            const int16_t *filter_y0, int y_step_q4,
   1.111 +                            int w, int h, int taps) {
   1.112 +  int x, y, k;
   1.113 +
   1.114 +  /* NOTE: This assumes that the filter table is 256-byte aligned. */
   1.115 +  /* TODO(agrange) Modify to make independent of table alignment. */
   1.116 +  const int16_t *const filter_y_base =
   1.117 +      (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
   1.118 +
   1.119 +  /* Adjust base pointer address for this source column */
   1.120 +  src -= src_stride * (taps / 2 - 1);
   1.121 +
   1.122 +  for (x = 0; x < w; ++x) {
   1.123 +    /* Initial phase offset */
   1.124 +    int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
   1.125 +
   1.126 +    for (y = 0; y < h; ++y) {
   1.127 +      /* Per-pixel src offset */
   1.128 +      const int src_y = y_q4 >> SUBPEL_BITS;
   1.129 +      int sum = 0;
   1.130 +
   1.131 +      /* Pointer to filter to use */
   1.132 +      const int16_t *const filter_y = filter_y_base +
   1.133 +          (y_q4 & SUBPEL_MASK) * taps;
   1.134 +
   1.135 +      for (k = 0; k < taps; ++k)
   1.136 +        sum += src[(src_y + k) * src_stride] * filter_y[k];
   1.137 +
   1.138 +      dst[y * dst_stride] =
   1.139 +          clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
   1.140 +
   1.141 +      /* Move to the next source pixel */
   1.142 +      y_q4 += y_step_q4;
   1.143 +    }
   1.144 +    ++src;
   1.145 +    ++dst;
   1.146 +  }
   1.147 +}
   1.148 +
   1.149 +static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
   1.150 +                                uint8_t *dst, ptrdiff_t dst_stride,
   1.151 +                                const int16_t *filter_x, int x_step_q4,
   1.152 +                                const int16_t *filter_y0, int y_step_q4,
   1.153 +                                int w, int h, int taps) {
   1.154 +  int x, y, k;
   1.155 +
   1.156 +  /* NOTE: This assumes that the filter table is 256-byte aligned. */
   1.157 +  /* TODO(agrange) Modify to make independent of table alignment. */
   1.158 +  const int16_t *const filter_y_base =
   1.159 +      (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
   1.160 +
   1.161 +  /* Adjust base pointer address for this source column */
   1.162 +  src -= src_stride * (taps / 2 - 1);
   1.163 +
   1.164 +  for (x = 0; x < w; ++x) {
   1.165 +    /* Initial phase offset */
   1.166 +    int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
   1.167 +
   1.168 +    for (y = 0; y < h; ++y) {
   1.169 +      /* Per-pixel src offset */
   1.170 +      const int src_y = y_q4 >> SUBPEL_BITS;
   1.171 +      int sum = 0;
   1.172 +
   1.173 +      /* Pointer to filter to use */
   1.174 +      const int16_t *const filter_y = filter_y_base +
   1.175 +          (y_q4 & SUBPEL_MASK) * taps;
   1.176 +
   1.177 +      for (k = 0; k < taps; ++k)
   1.178 +        sum += src[(src_y + k) * src_stride] * filter_y[k];
   1.179 +
   1.180 +      dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
   1.181 +           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
   1.182 +
   1.183 +      /* Move to the next source pixel */
   1.184 +      y_q4 += y_step_q4;
   1.185 +    }
   1.186 +    ++src;
   1.187 +    ++dst;
   1.188 +  }
   1.189 +}
   1.190 +
   1.191 +static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
   1.192 +                       uint8_t *dst, ptrdiff_t dst_stride,
   1.193 +                       const int16_t *filter_x, int x_step_q4,
   1.194 +                       const int16_t *filter_y, int y_step_q4,
   1.195 +                       int w, int h, int taps) {
   1.196 +  /* Fixed size intermediate buffer places limits on parameters.
   1.197 +   * Maximum intermediate_height is 324, for y_step_q4 == 80,
   1.198 +   * h == 64, taps == 8.
   1.199 +   * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
   1.200 +   */
   1.201 +  uint8_t temp[64 * 324];
   1.202 +  int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
   1.203 +
   1.204 +  assert(w <= 64);
   1.205 +  assert(h <= 64);
   1.206 +  assert(taps <= 8);
   1.207 +  assert(y_step_q4 <= 80);
   1.208 +  assert(x_step_q4 <= 80);
   1.209 +
   1.210 +  if (intermediate_height < h)
   1.211 +    intermediate_height = h;
   1.212 +
   1.213 +  convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
   1.214 +                   filter_x, x_step_q4, filter_y, y_step_q4, w,
   1.215 +                   intermediate_height, taps);
   1.216 +  convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
   1.217 +                  x_step_q4, filter_y, y_step_q4, w, h, taps);
   1.218 +}
   1.219 +
   1.220 +void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
   1.221 +                           uint8_t *dst, ptrdiff_t dst_stride,
   1.222 +                           const int16_t *filter_x, int x_step_q4,
   1.223 +                           const int16_t *filter_y, int y_step_q4,
   1.224 +                           int w, int h) {
   1.225 +  convolve_horiz_c(src, src_stride, dst, dst_stride,
   1.226 +                   filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
   1.227 +}
   1.228 +
   1.229 +void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
   1.230 +                               uint8_t *dst, ptrdiff_t dst_stride,
   1.231 +                               const int16_t *filter_x, int x_step_q4,
   1.232 +                               const int16_t *filter_y, int y_step_q4,
   1.233 +                               int w, int h) {
   1.234 +  convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
   1.235 +                       filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
   1.236 +}
   1.237 +
   1.238 +void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
   1.239 +                          uint8_t *dst, ptrdiff_t dst_stride,
   1.240 +                          const int16_t *filter_x, int x_step_q4,
   1.241 +                          const int16_t *filter_y, int y_step_q4,
   1.242 +                          int w, int h) {
   1.243 +  convolve_vert_c(src, src_stride, dst, dst_stride,
   1.244 +                  filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
   1.245 +}
   1.246 +
   1.247 +void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
   1.248 +                              uint8_t *dst, ptrdiff_t dst_stride,
   1.249 +                              const int16_t *filter_x, int x_step_q4,
   1.250 +                              const int16_t *filter_y, int y_step_q4,
   1.251 +                              int w, int h) {
   1.252 +  convolve_avg_vert_c(src, src_stride, dst, dst_stride,
   1.253 +                      filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
   1.254 +}
   1.255 +
   1.256 +void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
   1.257 +                     uint8_t *dst, ptrdiff_t dst_stride,
   1.258 +                     const int16_t *filter_x, int x_step_q4,
   1.259 +                     const int16_t *filter_y, int y_step_q4,
   1.260 +                     int w, int h) {
   1.261 +  convolve_c(src, src_stride, dst, dst_stride,
   1.262 +             filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
   1.263 +}
   1.264 +
   1.265 +void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
   1.266 +                         uint8_t *dst, ptrdiff_t dst_stride,
   1.267 +                         const int16_t *filter_x, int x_step_q4,
   1.268 +                         const int16_t *filter_y, int y_step_q4,
   1.269 +                         int w, int h) {
   1.270 +  /* Fixed size intermediate buffer places limits on parameters. */
   1.271 +  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
   1.272 +  assert(w <= 64);
   1.273 +  assert(h <= 64);
   1.274 +
   1.275 +  vp9_convolve8(src, src_stride, temp, 64,
   1.276 +               filter_x, x_step_q4, filter_y, y_step_q4, w, h);
   1.277 +  vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
   1.278 +}
   1.279 +
   1.280 +void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
   1.281 +                         uint8_t *dst, ptrdiff_t dst_stride,
   1.282 +                         const int16_t *filter_x, int filter_x_stride,
   1.283 +                         const int16_t *filter_y, int filter_y_stride,
   1.284 +                         int w, int h) {
   1.285 +  int r;
   1.286 +
   1.287 +  for (r = h; r > 0; --r) {
   1.288 +    vpx_memcpy(dst, src, w);
   1.289 +    src += src_stride;
   1.290 +    dst += dst_stride;
   1.291 +  }
   1.292 +}
   1.293 +
   1.294 +void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
   1.295 +                        uint8_t *dst, ptrdiff_t dst_stride,
   1.296 +                        const int16_t *filter_x, int filter_x_stride,
   1.297 +                        const int16_t *filter_y, int filter_y_stride,
   1.298 +                        int w, int h) {
   1.299 +  int x, y;
   1.300 +
   1.301 +  for (y = 0; y < h; ++y) {
   1.302 +    for (x = 0; x < w; ++x)
   1.303 +      dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
   1.304 +
   1.305 +    src += src_stride;
   1.306 +    dst += dst_stride;
   1.307 +  }
   1.308 +}

mercurial