media/libvpx/vp9/common/vp9_convolve.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include <assert.h>
michael@0 12
michael@0 13 #include "./vpx_config.h"
michael@0 14 #include "./vp9_rtcd.h"
michael@0 15 #include "vp9/common/vp9_common.h"
michael@0 16 #include "vp9/common/vp9_convolve.h"
michael@0 17 #include "vp9/common/vp9_filter.h"
michael@0 18 #include "vpx/vpx_integer.h"
michael@0 19 #include "vpx_ports/mem.h"
michael@0 20
michael@0 21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 22 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 23 const int16_t *filter_x0, int x_step_q4,
michael@0 24 const int16_t *filter_y, int y_step_q4,
michael@0 25 int w, int h, int taps) {
michael@0 26 int x, y, k;
michael@0 27
michael@0 28 /* NOTE: This assumes that the filter table is 256-byte aligned. */
michael@0 29 /* TODO(agrange) Modify to make independent of table alignment. */
michael@0 30 const int16_t *const filter_x_base =
michael@0 31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
michael@0 32
michael@0 33 /* Adjust base pointer address for this source line */
michael@0 34 src -= taps / 2 - 1;
michael@0 35
michael@0 36 for (y = 0; y < h; ++y) {
michael@0 37 /* Initial phase offset */
michael@0 38 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
michael@0 39
michael@0 40 for (x = 0; x < w; ++x) {
michael@0 41 /* Per-pixel src offset */
michael@0 42 const int src_x = x_q4 >> SUBPEL_BITS;
michael@0 43 int sum = 0;
michael@0 44
michael@0 45 /* Pointer to filter to use */
michael@0 46 const int16_t *const filter_x = filter_x_base +
michael@0 47 (x_q4 & SUBPEL_MASK) * taps;
michael@0 48
michael@0 49 for (k = 0; k < taps; ++k)
michael@0 50 sum += src[src_x + k] * filter_x[k];
michael@0 51
michael@0 52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
michael@0 53
michael@0 54 /* Move to the next source pixel */
michael@0 55 x_q4 += x_step_q4;
michael@0 56 }
michael@0 57 src += src_stride;
michael@0 58 dst += dst_stride;
michael@0 59 }
michael@0 60 }
michael@0 61
michael@0 62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 63 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 64 const int16_t *filter_x0, int x_step_q4,
michael@0 65 const int16_t *filter_y, int y_step_q4,
michael@0 66 int w, int h, int taps) {
michael@0 67 int x, y, k;
michael@0 68
michael@0 69 /* NOTE: This assumes that the filter table is 256-byte aligned. */
michael@0 70 /* TODO(agrange) Modify to make independent of table alignment. */
michael@0 71 const int16_t *const filter_x_base =
michael@0 72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
michael@0 73
michael@0 74 /* Adjust base pointer address for this source line */
michael@0 75 src -= taps / 2 - 1;
michael@0 76
michael@0 77 for (y = 0; y < h; ++y) {
michael@0 78 /* Initial phase offset */
michael@0 79 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
michael@0 80
michael@0 81 for (x = 0; x < w; ++x) {
michael@0 82 /* Per-pixel src offset */
michael@0 83 const int src_x = x_q4 >> SUBPEL_BITS;
michael@0 84 int sum = 0;
michael@0 85
michael@0 86 /* Pointer to filter to use */
michael@0 87 const int16_t *const filter_x = filter_x_base +
michael@0 88 (x_q4 & SUBPEL_MASK) * taps;
michael@0 89
michael@0 90 for (k = 0; k < taps; ++k)
michael@0 91 sum += src[src_x + k] * filter_x[k];
michael@0 92
michael@0 93 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
michael@0 94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
michael@0 95
michael@0 96 /* Move to the next source pixel */
michael@0 97 x_q4 += x_step_q4;
michael@0 98 }
michael@0 99 src += src_stride;
michael@0 100 dst += dst_stride;
michael@0 101 }
michael@0 102 }
michael@0 103
michael@0 104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 105 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 106 const int16_t *filter_x, int x_step_q4,
michael@0 107 const int16_t *filter_y0, int y_step_q4,
michael@0 108 int w, int h, int taps) {
michael@0 109 int x, y, k;
michael@0 110
michael@0 111 /* NOTE: This assumes that the filter table is 256-byte aligned. */
michael@0 112 /* TODO(agrange) Modify to make independent of table alignment. */
michael@0 113 const int16_t *const filter_y_base =
michael@0 114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
michael@0 115
michael@0 116 /* Adjust base pointer address for this source column */
michael@0 117 src -= src_stride * (taps / 2 - 1);
michael@0 118
michael@0 119 for (x = 0; x < w; ++x) {
michael@0 120 /* Initial phase offset */
michael@0 121 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
michael@0 122
michael@0 123 for (y = 0; y < h; ++y) {
michael@0 124 /* Per-pixel src offset */
michael@0 125 const int src_y = y_q4 >> SUBPEL_BITS;
michael@0 126 int sum = 0;
michael@0 127
michael@0 128 /* Pointer to filter to use */
michael@0 129 const int16_t *const filter_y = filter_y_base +
michael@0 130 (y_q4 & SUBPEL_MASK) * taps;
michael@0 131
michael@0 132 for (k = 0; k < taps; ++k)
michael@0 133 sum += src[(src_y + k) * src_stride] * filter_y[k];
michael@0 134
michael@0 135 dst[y * dst_stride] =
michael@0 136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
michael@0 137
michael@0 138 /* Move to the next source pixel */
michael@0 139 y_q4 += y_step_q4;
michael@0 140 }
michael@0 141 ++src;
michael@0 142 ++dst;
michael@0 143 }
michael@0 144 }
michael@0 145
michael@0 146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 147 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 148 const int16_t *filter_x, int x_step_q4,
michael@0 149 const int16_t *filter_y0, int y_step_q4,
michael@0 150 int w, int h, int taps) {
michael@0 151 int x, y, k;
michael@0 152
michael@0 153 /* NOTE: This assumes that the filter table is 256-byte aligned. */
michael@0 154 /* TODO(agrange) Modify to make independent of table alignment. */
michael@0 155 const int16_t *const filter_y_base =
michael@0 156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
michael@0 157
michael@0 158 /* Adjust base pointer address for this source column */
michael@0 159 src -= src_stride * (taps / 2 - 1);
michael@0 160
michael@0 161 for (x = 0; x < w; ++x) {
michael@0 162 /* Initial phase offset */
michael@0 163 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
michael@0 164
michael@0 165 for (y = 0; y < h; ++y) {
michael@0 166 /* Per-pixel src offset */
michael@0 167 const int src_y = y_q4 >> SUBPEL_BITS;
michael@0 168 int sum = 0;
michael@0 169
michael@0 170 /* Pointer to filter to use */
michael@0 171 const int16_t *const filter_y = filter_y_base +
michael@0 172 (y_q4 & SUBPEL_MASK) * taps;
michael@0 173
michael@0 174 for (k = 0; k < taps; ++k)
michael@0 175 sum += src[(src_y + k) * src_stride] * filter_y[k];
michael@0 176
michael@0 177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
michael@0 178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
michael@0 179
michael@0 180 /* Move to the next source pixel */
michael@0 181 y_q4 += y_step_q4;
michael@0 182 }
michael@0 183 ++src;
michael@0 184 ++dst;
michael@0 185 }
michael@0 186 }
michael@0 187
michael@0 188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 189 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 190 const int16_t *filter_x, int x_step_q4,
michael@0 191 const int16_t *filter_y, int y_step_q4,
michael@0 192 int w, int h, int taps) {
michael@0 193 /* Fixed size intermediate buffer places limits on parameters.
michael@0 194 * Maximum intermediate_height is 324, for y_step_q4 == 80,
michael@0 195 * h == 64, taps == 8.
michael@0 196 * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
michael@0 197 */
michael@0 198 uint8_t temp[64 * 324];
michael@0 199 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
michael@0 200
michael@0 201 assert(w <= 64);
michael@0 202 assert(h <= 64);
michael@0 203 assert(taps <= 8);
michael@0 204 assert(y_step_q4 <= 80);
michael@0 205 assert(x_step_q4 <= 80);
michael@0 206
michael@0 207 if (intermediate_height < h)
michael@0 208 intermediate_height = h;
michael@0 209
michael@0 210 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
michael@0 211 filter_x, x_step_q4, filter_y, y_step_q4, w,
michael@0 212 intermediate_height, taps);
michael@0 213 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
michael@0 214 x_step_q4, filter_y, y_step_q4, w, h, taps);
michael@0 215 }
michael@0 216
michael@0 217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 218 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 219 const int16_t *filter_x, int x_step_q4,
michael@0 220 const int16_t *filter_y, int y_step_q4,
michael@0 221 int w, int h) {
michael@0 222 convolve_horiz_c(src, src_stride, dst, dst_stride,
michael@0 223 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
michael@0 224 }
michael@0 225
michael@0 226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 227 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 228 const int16_t *filter_x, int x_step_q4,
michael@0 229 const int16_t *filter_y, int y_step_q4,
michael@0 230 int w, int h) {
michael@0 231 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
michael@0 232 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
michael@0 233 }
michael@0 234
michael@0 235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 236 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 237 const int16_t *filter_x, int x_step_q4,
michael@0 238 const int16_t *filter_y, int y_step_q4,
michael@0 239 int w, int h) {
michael@0 240 convolve_vert_c(src, src_stride, dst, dst_stride,
michael@0 241 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
michael@0 242 }
michael@0 243
michael@0 244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 245 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 246 const int16_t *filter_x, int x_step_q4,
michael@0 247 const int16_t *filter_y, int y_step_q4,
michael@0 248 int w, int h) {
michael@0 249 convolve_avg_vert_c(src, src_stride, dst, dst_stride,
michael@0 250 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
michael@0 251 }
michael@0 252
michael@0 253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 254 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 255 const int16_t *filter_x, int x_step_q4,
michael@0 256 const int16_t *filter_y, int y_step_q4,
michael@0 257 int w, int h) {
michael@0 258 convolve_c(src, src_stride, dst, dst_stride,
michael@0 259 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
michael@0 260 }
michael@0 261
michael@0 262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 263 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 264 const int16_t *filter_x, int x_step_q4,
michael@0 265 const int16_t *filter_y, int y_step_q4,
michael@0 266 int w, int h) {
michael@0 267 /* Fixed size intermediate buffer places limits on parameters. */
michael@0 268 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
michael@0 269 assert(w <= 64);
michael@0 270 assert(h <= 64);
michael@0 271
michael@0 272 vp9_convolve8(src, src_stride, temp, 64,
michael@0 273 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
michael@0 274 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
michael@0 275 }
michael@0 276
michael@0 277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 278 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 279 const int16_t *filter_x, int filter_x_stride,
michael@0 280 const int16_t *filter_y, int filter_y_stride,
michael@0 281 int w, int h) {
michael@0 282 int r;
michael@0 283
michael@0 284 for (r = h; r > 0; --r) {
michael@0 285 vpx_memcpy(dst, src, w);
michael@0 286 src += src_stride;
michael@0 287 dst += dst_stride;
michael@0 288 }
michael@0 289 }
michael@0 290
michael@0 291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
michael@0 292 uint8_t *dst, ptrdiff_t dst_stride,
michael@0 293 const int16_t *filter_x, int filter_x_stride,
michael@0 294 const int16_t *filter_y, int filter_y_stride,
michael@0 295 int w, int h) {
michael@0 296 int x, y;
michael@0 297
michael@0 298 for (y = 0; y < h; ++y) {
michael@0 299 for (x = 0; x < w; ++x)
michael@0 300 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
michael@0 301
michael@0 302 src += src_stride;
michael@0 303 dst += dst_stride;
michael@0 304 }
michael@0 305 }

mercurial