1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/row_any.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,542 @@ 1.4 +/* 1.5 + * Copyright 2012 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/row.h" 1.15 + 1.16 +#include "libyuv/basic_types.h" 1.17 + 1.18 +#ifdef __cplusplus 1.19 +namespace libyuv { 1.20 +extern "C" { 1.21 +#endif 1.22 + 1.23 +// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels. 1.24 +// TODO(fbarchard): Consider 'any' functions handling odd alignment. 1.25 +// YUV to RGB does multiple of 8 with SIMD and remainder with C. 1.26 +#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \ 1.27 + void NAMEANY(const uint8* y_buf, \ 1.28 + const uint8* u_buf, \ 1.29 + const uint8* v_buf, \ 1.30 + uint8* rgb_buf, \ 1.31 + int width) { \ 1.32 + int n = width & ~MASK; \ 1.33 + I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \ 1.34 + I420TORGB_C(y_buf + n, \ 1.35 + u_buf + (n >> UV_SHIFT), \ 1.36 + v_buf + (n >> UV_SHIFT), \ 1.37 + rgb_buf + n * BPP, width & MASK); \ 1.38 + } 1.39 + 1.40 +#ifdef HAS_I422TOARGBROW_SSSE3 1.41 +YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 1.42 + 0, 4, 7) 1.43 +YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1.44 + 1, 4, 7) 1.45 +YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 1.46 + 2, 4, 7) 1.47 +YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1.48 + 1, 4, 7) 1.49 +YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1.50 + 1, 4, 7) 1.51 +YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1.52 + 1, 4, 7) 1.53 +// I422ToRGB565Row_SSSE3 is unaligned. 1.54 +YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C, 1.55 + 1, 2, 7) 1.56 +YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C, 1.57 + 1, 2, 7) 1.58 +YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C, 1.59 + 1, 2, 7) 1.60 +// I422ToRGB24Row_SSSE3 is unaligned. 1.61 +YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7) 1.62 +YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) 1.63 +YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15) 1.64 +YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15) 1.65 +#endif // HAS_I422TOARGBROW_SSSE3 1.66 +#ifdef HAS_I422TOARGBROW_AVX2 1.67 +YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15) 1.68 +#endif // HAS_I422TOARGBROW_AVX2 1.69 +#ifdef HAS_I422TOARGBROW_NEON 1.70 +YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7) 1.71 +YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7) 1.72 +YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7) 1.73 +YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7) 1.74 +YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7) 1.75 +YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7) 1.76 +YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7) 1.77 +YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7) 1.78 +YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C, 1.79 + 1, 2, 7) 1.80 +YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C, 1.81 + 1, 2, 7) 1.82 +YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7) 1.83 +YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15) 1.84 +YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15) 1.85 +#endif // HAS_I422TOARGBROW_NEON 1.86 +#undef YANY 1.87 + 1.88 +// Wrappers to handle odd width 1.89 +#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \ 1.90 + void NAMEANY(const uint8* y_buf, \ 1.91 + const uint8* uv_buf, \ 1.92 + uint8* rgb_buf, \ 1.93 + int width) { \ 1.94 + int n = width & ~7; \ 1.95 + NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \ 1.96 + NV12TORGB_C(y_buf + n, \ 1.97 + uv_buf + (n >> UV_SHIFT), \ 1.98 + rgb_buf + n * BPP, width & 7); \ 1.99 + } 1.100 + 1.101 +#ifdef HAS_NV12TOARGBROW_SSSE3 1.102 +NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 1.103 + 0, 4) 1.104 +NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 1.105 + 0, 4) 1.106 +#endif // HAS_NV12TOARGBROW_SSSE3 1.107 +#ifdef HAS_NV12TOARGBROW_NEON 1.108 +NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4) 1.109 +NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4) 1.110 +#endif // HAS_NV12TOARGBROW_NEON 1.111 +#ifdef HAS_NV12TORGB565ROW_SSSE3 1.112 +NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C, 1.113 + 0, 2) 1.114 +NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C, 1.115 + 0, 2) 1.116 +#endif // HAS_NV12TORGB565ROW_SSSE3 1.117 +#ifdef HAS_NV12TORGB565ROW_NEON 1.118 +NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2) 1.119 +NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2) 1.120 +#endif // HAS_NV12TORGB565ROW_NEON 1.121 +#undef NVANY 1.122 + 1.123 +#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ 1.124 + void NAMEANY(const uint8* src, \ 1.125 + uint8* dst, \ 1.126 + int width) { \ 1.127 + int n = width & ~MASK; \ 1.128 + ARGBTORGB_SIMD(src, dst, n); \ 1.129 + ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \ 1.130 + } 1.131 + 1.132 +#if defined(HAS_ARGBTORGB24ROW_SSSE3) 1.133 +RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C, 1.134 + 15, 4, 3) 1.135 +RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C, 1.136 + 15, 4, 3) 1.137 +RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C, 1.138 + 3, 4, 2) 1.139 +RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C, 1.140 + 3, 4, 2) 1.141 +RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, 1.142 + 3, 4, 2) 1.143 +#endif 1.144 +#if defined(HAS_I400TOARGBROW_SSE2) 1.145 +RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C, 1.146 + 7, 1, 4) 1.147 +#endif 1.148 +#if defined(HAS_YTOARGBROW_SSE2) 1.149 +RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C, 1.150 + 7, 1, 4) 1.151 +RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C, 1.152 + 15, 2, 4) 1.153 +RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C, 1.154 + 15, 2, 4) 1.155 +// These require alignment on ARGB, so C is used for remainder. 1.156 +RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C, 1.157 + 15, 3, 4) 1.158 +RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C, 1.159 + 15, 3, 4) 1.160 +RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C, 1.161 + 7, 2, 4) 1.162 +RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C, 1.163 + 7, 2, 4) 1.164 +RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C, 1.165 + 7, 2, 4) 1.166 +#endif 1.167 +#if defined(HAS_ARGBTORGB24ROW_NEON) 1.168 +RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) 1.169 +RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3) 1.170 +RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C, 1.171 + 7, 4, 2) 1.172 +RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C, 1.173 + 7, 4, 2) 1.174 +RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, 1.175 + 7, 4, 2) 1.176 +RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C, 1.177 + 7, 1, 4) 1.178 +RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C, 1.179 + 7, 1, 4) 1.180 +RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C, 1.181 + 7, 2, 4) 1.182 +RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 1.183 + 7, 2, 4) 1.184 +#endif 1.185 +#undef RGBANY 1.186 + 1.187 +// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. 1.188 +#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ 1.189 + void NAMEANY(const uint8* src, \ 1.190 + uint8* dst, uint32 selector, \ 1.191 + int width) { \ 1.192 + int n = width & ~MASK; \ 1.193 + ARGBTORGB_SIMD(src, dst, selector, n); \ 1.194 + ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \ 1.195 + } 1.196 + 1.197 +#if defined(HAS_ARGBTOBAYERROW_SSSE3) 1.198 +BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C, 1.199 + 7, 4, 1) 1.200 +#endif 1.201 +#if defined(HAS_ARGBTOBAYERROW_NEON) 1.202 +BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C, 1.203 + 7, 4, 1) 1.204 +#endif 1.205 +#if defined(HAS_ARGBTOBAYERGGROW_SSE2) 1.206 +BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, 1.207 + 7, 4, 1) 1.208 +#endif 1.209 +#if defined(HAS_ARGBTOBAYERGGROW_NEON) 1.210 +BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, 1.211 + 7, 4, 1) 1.212 +#endif 1.213 + 1.214 +#undef BAYERANY 1.215 + 1.216 +// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. 1.217 +#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \ 1.218 + void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ 1.219 + ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \ 1.220 + ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \ 1.221 + dst_y + (width - NUM) * BPP, NUM); \ 1.222 + } 1.223 + 1.224 +#ifdef HAS_ARGBTOYROW_AVX2 1.225 +YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32) 1.226 +YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32) 1.227 +YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32) 1.228 +YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32) 1.229 +#endif 1.230 +#ifdef HAS_ARGBTOYROW_SSSE3 1.231 +YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) 1.232 +#endif 1.233 +#ifdef HAS_BGRATOYROW_SSSE3 1.234 +YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) 1.235 +YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16) 1.236 +YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16) 1.237 +YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16) 1.238 +YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) 1.239 +#endif 1.240 +#ifdef HAS_ARGBTOYJROW_SSSE3 1.241 +YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16) 1.242 +#endif 1.243 +#ifdef HAS_ARGBTOYROW_NEON 1.244 +YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) 1.245 +YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8) 1.246 +YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8) 1.247 +YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8) 1.248 +YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8) 1.249 +YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8) 1.250 +YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8) 1.251 +YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8) 1.252 +YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8) 1.253 +YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8) 1.254 +YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16) 1.255 +YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16) 1.256 +YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8) 1.257 +YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8) 1.258 +YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8) 1.259 +YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8) 1.260 +YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8) 1.261 +#endif 1.262 +#undef YANY 1.263 + 1.264 +#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ 1.265 + void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ 1.266 + int n = width & ~MASK; \ 1.267 + ARGBTOY_SIMD(src_argb, dst_y, n); \ 1.268 + ARGBTOY_C(src_argb + n * SBPP, \ 1.269 + dst_y + n * BPP, width & MASK); \ 1.270 + } 1.271 + 1.272 +// Attenuate is destructive so last16 method can not be used due to overlap. 1.273 +#ifdef HAS_ARGBATTENUATEROW_SSSE3 1.274 +YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, 1.275 + 4, 4, 3) 1.276 +#endif 1.277 +#ifdef HAS_ARGBATTENUATEROW_SSE2 1.278 +YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C, 1.279 + 4, 4, 3) 1.280 +#endif 1.281 +#ifdef HAS_ARGBUNATTENUATEROW_SSE2 1.282 +YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C, 1.283 + 4, 4, 3) 1.284 +#endif 1.285 +#ifdef HAS_ARGBATTENUATEROW_AVX2 1.286 +YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C, 1.287 + 4, 4, 7) 1.288 +#endif 1.289 +#ifdef HAS_ARGBUNATTENUATEROW_AVX2 1.290 +YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C, 1.291 + 4, 4, 7) 1.292 +#endif 1.293 +#ifdef HAS_ARGBATTENUATEROW_NEON 1.294 +YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, 1.295 + 4, 4, 7) 1.296 +#endif 1.297 +#undef YANY 1.298 + 1.299 +// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C. 1.300 +#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \ 1.301 + void NAMEANY(const uint8* src_argb, int src_stride_argb, \ 1.302 + uint8* dst_u, uint8* dst_v, int width) { \ 1.303 + int n = width & ~MASK; \ 1.304 + ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \ 1.305 + ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ 1.306 + dst_u + (n >> 1), \ 1.307 + dst_v + (n >> 1), \ 1.308 + width & MASK); \ 1.309 + } 1.310 + 1.311 +#ifdef HAS_ARGBTOUVROW_AVX2 1.312 +UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31) 1.313 +UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31) 1.314 +UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) 1.315 +#endif 1.316 +#ifdef HAS_ARGBTOUVROW_SSSE3 1.317 +UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15) 1.318 +UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C, 1.319 + 4, 15) 1.320 +UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15) 1.321 +UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15) 1.322 +UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15) 1.323 +UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15) 1.324 +UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15) 1.325 +#endif 1.326 +#ifdef HAS_ARGBTOUVROW_NEON 1.327 +UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15) 1.328 +UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15) 1.329 +UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15) 1.330 +UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15) 1.331 +UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15) 1.332 +UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15) 1.333 +UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15) 1.334 +UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15) 1.335 +UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15) 1.336 +UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15) 1.337 +UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15) 1.338 +UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15) 1.339 +#endif 1.340 +#undef UVANY 1.341 + 1.342 +#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \ 1.343 + void NAMEANY(const uint8* src_uv, \ 1.344 + uint8* dst_u, uint8* dst_v, int width) { \ 1.345 + int n = width & ~MASK; \ 1.346 + ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ 1.347 + ANYTOUV_C(src_uv + n * BPP, \ 1.348 + dst_u + (n >> SHIFT), \ 1.349 + dst_v + (n >> SHIFT), \ 1.350 + width & MASK); \ 1.351 + } 1.352 + 1.353 +#ifdef HAS_ARGBTOUV444ROW_SSSE3 1.354 +UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3, 1.355 + ARGBToUV444Row_C, 4, 15, 0) 1.356 +#endif 1.357 +#ifdef HAS_YUY2TOUV422ROW_AVX2 1.358 +UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1.359 + YUY2ToUV422Row_C, 2, 31, 1) 1.360 +UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1.361 + UYVYToUV422Row_C, 2, 31, 1) 1.362 +#endif 1.363 +#ifdef HAS_ARGBTOUVROW_SSSE3 1.364 +UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3, 1.365 + ARGBToUV422Row_C, 4, 15, 1) 1.366 +UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, 1.367 + YUY2ToUV422Row_C, 2, 15, 1) 1.368 +UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, 1.369 + UYVYToUV422Row_C, 2, 15, 1) 1.370 +#endif 1.371 +#ifdef HAS_YUY2TOUV422ROW_NEON 1.372 +UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 1.373 + ARGBToUV444Row_C, 4, 7, 0) 1.374 +UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 1.375 + ARGBToUV422Row_C, 4, 15, 1) 1.376 +UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 1.377 + ARGBToUV411Row_C, 4, 31, 2) 1.378 +UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1.379 + YUY2ToUV422Row_C, 2, 15, 1) 1.380 +UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1.381 + UYVYToUV422Row_C, 2, 15, 1) 1.382 +#endif 1.383 +#undef UV422ANY 1.384 + 1.385 +#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ 1.386 + void NAMEANY(const uint8* src_uv, \ 1.387 + uint8* dst_u, uint8* dst_v, int width) { \ 1.388 + int n = width & ~MASK; \ 1.389 + ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ 1.390 + ANYTOUV_C(src_uv + n * 2, \ 1.391 + dst_u + n, \ 1.392 + dst_v + n, \ 1.393 + width & MASK); \ 1.394 + } 1.395 + 1.396 +#ifdef HAS_SPLITUVROW_SSE2 1.397 +SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15) 1.398 +#endif 1.399 +#ifdef HAS_SPLITUVROW_AVX2 1.400 +SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31) 1.401 +#endif 1.402 +#ifdef HAS_SPLITUVROW_NEON 1.403 +SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15) 1.404 +#endif 1.405 +#ifdef HAS_SPLITUVROW_MIPS_DSPR2 1.406 +SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, 1.407 + SplitUVRow_C, 15) 1.408 +#endif 1.409 +#undef SPLITUVROWANY 1.410 + 1.411 +#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ 1.412 + void NAMEANY(const uint8* src_u, const uint8* src_v, \ 1.413 + uint8* dst_uv, int width) { \ 1.414 + int n = width & ~MASK; \ 1.415 + ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \ 1.416 + ANYTOUV_C(src_u + n, \ 1.417 + src_v + n, \ 1.418 + dst_uv + n * 2, \ 1.419 + width & MASK); \ 1.420 + } 1.421 + 1.422 +#ifdef HAS_MERGEUVROW_SSE2 1.423 +MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15) 1.424 +#endif 1.425 +#ifdef HAS_MERGEUVROW_AVX2 1.426 +MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31) 1.427 +#endif 1.428 +#ifdef HAS_MERGEUVROW_NEON 1.429 +MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15) 1.430 +#endif 1.431 +#undef MERGEUVROW_ANY 1.432 + 1.433 +#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \ 1.434 + void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \ 1.435 + uint8* dst_argb, int width) { \ 1.436 + int n = width & ~MASK; \ 1.437 + ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \ 1.438 + ARGBMATH_C(src_argb0 + n * 4, \ 1.439 + src_argb1 + n * 4, \ 1.440 + dst_argb + n * 4, \ 1.441 + width & MASK); \ 1.442 + } 1.443 + 1.444 +#ifdef HAS_ARGBMULTIPLYROW_SSE2 1.445 +MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C, 1.446 + 3) 1.447 +#endif 1.448 +#ifdef HAS_ARGBADDROW_SSE2 1.449 +MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3) 1.450 +#endif 1.451 +#ifdef HAS_ARGBSUBTRACTROW_SSE2 1.452 +MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C, 1.453 + 3) 1.454 +#endif 1.455 +#ifdef HAS_ARGBMULTIPLYROW_AVX2 1.456 +MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C, 1.457 + 7) 1.458 +#endif 1.459 +#ifdef HAS_ARGBADDROW_AVX2 1.460 +MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7) 1.461 +#endif 1.462 +#ifdef HAS_ARGBSUBTRACTROW_AVX2 1.463 +MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C, 1.464 + 7) 1.465 +#endif 1.466 +#ifdef HAS_ARGBMULTIPLYROW_NEON 1.467 +MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C, 1.468 + 7) 1.469 +#endif 1.470 +#ifdef HAS_ARGBADDROW_NEON 1.471 +MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7) 1.472 +#endif 1.473 +#ifdef HAS_ARGBSUBTRACTROW_NEON 1.474 +MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C, 1.475 + 7) 1.476 +#endif 1.477 +#undef MATHROW_ANY 1.478 + 1.479 +// Shuffle may want to work in place, so last16 method can not be used. 1.480 +#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ 1.481 + void NAMEANY(const uint8* src_argb, uint8* dst_argb, \ 1.482 + const uint8* shuffler, int width) { \ 1.483 + int n = width & ~MASK; \ 1.484 + ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \ 1.485 + ARGBTOY_C(src_argb + n * SBPP, \ 1.486 + dst_argb + n * BPP, shuffler, width & MASK); \ 1.487 + } 1.488 + 1.489 +#ifdef HAS_ARGBSHUFFLEROW_SSE2 1.490 +YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, 1.491 + ARGBShuffleRow_C, 4, 4, 3) 1.492 +#endif 1.493 +#ifdef HAS_ARGBSHUFFLEROW_SSSE3 1.494 +YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3, 1.495 + ARGBShuffleRow_C, 4, 4, 7) 1.496 +#endif 1.497 +#ifdef HAS_ARGBSHUFFLEROW_AVX2 1.498 +YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, 1.499 + ARGBShuffleRow_C, 4, 4, 15) 1.500 +#endif 1.501 +#ifdef HAS_ARGBSHUFFLEROW_NEON 1.502 +YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, 1.503 + ARGBShuffleRow_C, 4, 4, 3) 1.504 +#endif 1.505 +#undef YANY 1.506 + 1.507 +// Interpolate may want to work in place, so last16 method can not be used. 1.508 +#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \ 1.509 + void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ 1.510 + ptrdiff_t src_stride_ptr, int width, \ 1.511 + int source_y_fraction) { \ 1.512 + int n = width & ~MASK; \ 1.513 + TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \ 1.514 + n, source_y_fraction); \ 1.515 + TERP_C(dst_ptr + n * BPP, \ 1.516 + src_ptr + n * SBPP, src_stride_ptr, \ 1.517 + width & MASK, source_y_fraction); \ 1.518 + } 1.519 + 1.520 +#ifdef HAS_INTERPOLATEROW_AVX2 1.521 +NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1.522 + InterpolateRow_C, 1, 1, 32) 1.523 +#endif 1.524 +#ifdef HAS_INTERPOLATEROW_SSSE3 1.525 +NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3, 1.526 + InterpolateRow_C, 1, 1, 15) 1.527 +#endif 1.528 +#ifdef HAS_INTERPOLATEROW_SSE2 1.529 +NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2, 1.530 + InterpolateRow_C, 1, 1, 15) 1.531 +#endif 1.532 +#ifdef HAS_INTERPOLATEROW_NEON 1.533 +NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1.534 + InterpolateRow_C, 1, 1, 15) 1.535 +#endif 1.536 +#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 1.537 +NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, 1.538 + InterpolateRow_C, 1, 1, 3) 1.539 +#endif 1.540 +#undef NANY 1.541 + 1.542 +#ifdef __cplusplus 1.543 +} // extern "C" 1.544 +} // namespace libyuv 1.545 +#endif