1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,78 @@ 1.4 +/* 1.5 + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "./vp9_rtcd.h" 1.15 +#include "vp9/common/vp9_common.h" 1.16 +#include "vpx_ports/mem.h" 1.17 + 1.18 +void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, 1.19 + uint8_t *dst, ptrdiff_t dst_stride, 1.20 + const int16_t *filter_x, int x_step_q4, 1.21 + const int16_t *filter_y, int y_step_q4, 1.22 + int w, int h) { 1.23 + /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the 1.24 + * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). 1.25 + */ 1.26 + DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72); 1.27 + 1.28 + // Account for the vertical phase needing 3 lines prior and 4 lines post 1.29 + int intermediate_height = h + 7; 1.30 + 1.31 + if (x_step_q4 != 16 || y_step_q4 != 16) 1.32 + return vp9_convolve8_c(src, src_stride, 1.33 + dst, dst_stride, 1.34 + filter_x, x_step_q4, 1.35 + filter_y, y_step_q4, 1.36 + w, h); 1.37 + 1.38 + /* Filter starting 3 lines back. The neon implementation will ignore the 1.39 + * given height and filter a multiple of 4 lines. Since this goes in to 1.40 + * the temp buffer which has lots of extra room and is subsequently discarded 1.41 + * this is safe if somewhat less than ideal. 1.42 + */ 1.43 + vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride, 1.44 + temp, 64, 1.45 + filter_x, x_step_q4, filter_y, y_step_q4, 1.46 + w, intermediate_height); 1.47 + 1.48 + /* Step into the temp buffer 3 lines to get the actual frame data */ 1.49 + vp9_convolve8_vert_neon(temp + 64 * 3, 64, 1.50 + dst, dst_stride, 1.51 + filter_x, x_step_q4, filter_y, y_step_q4, 1.52 + w, h); 1.53 +} 1.54 + 1.55 +void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, 1.56 + uint8_t *dst, ptrdiff_t dst_stride, 1.57 + const int16_t *filter_x, int x_step_q4, 1.58 + const int16_t *filter_y, int y_step_q4, 1.59 + int w, int h) { 1.60 + DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72); 1.61 + int intermediate_height = h + 7; 1.62 + 1.63 + if (x_step_q4 != 16 || y_step_q4 != 16) 1.64 + return vp9_convolve8_avg_c(src, src_stride, 1.65 + dst, dst_stride, 1.66 + filter_x, x_step_q4, 1.67 + filter_y, y_step_q4, 1.68 + w, h); 1.69 + 1.70 + /* This implementation has the same issues as above. In addition, we only want 1.71 + * to average the values after both passes. 1.72 + */ 1.73 + vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride, 1.74 + temp, 64, 1.75 + filter_x, x_step_q4, filter_y, y_step_q4, 1.76 + w, intermediate_height); 1.77 + vp9_convolve8_avg_vert_neon(temp + 64 * 3, 1.78 + 64, dst, dst_stride, 1.79 + filter_x, x_step_q4, filter_y, y_step_q4, 1.80 + w, h); 1.81 +}