1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/scale_common.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,772 @@ 1.4 +/* 1.5 + * Copyright 2013 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/scale.h" 1.15 + 1.16 +#include <assert.h> 1.17 +#include <string.h> 1.18 + 1.19 +#include "libyuv/cpu_id.h" 1.20 +#include "libyuv/planar_functions.h" // For CopyARGB 1.21 +#include "libyuv/row.h" 1.22 +#include "libyuv/scale_row.h" 1.23 + 1.24 +#ifdef __cplusplus 1.25 +namespace libyuv { 1.26 +extern "C" { 1.27 +#endif 1.28 + 1.29 +static __inline int Abs(int v) { 1.30 + return v >= 0 ? v : -v; 1.31 +} 1.32 + 1.33 +// CPU agnostic row functions 1.34 +void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.35 + uint8* dst, int dst_width) { 1.36 + int x; 1.37 + for (x = 0; x < dst_width - 1; x += 2) { 1.38 + dst[0] = src_ptr[1]; 1.39 + dst[1] = src_ptr[3]; 1.40 + dst += 2; 1.41 + src_ptr += 4; 1.42 + } 1.43 + if (dst_width & 1) { 1.44 + dst[0] = src_ptr[1]; 1.45 + } 1.46 +} 1.47 + 1.48 +void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.49 + uint8* dst, int dst_width) { 1.50 + const uint8* s = src_ptr; 1.51 + int x; 1.52 + for (x = 0; x < dst_width - 1; x += 2) { 1.53 + dst[0] = (s[0] + s[1] + 1) >> 1; 1.54 + dst[1] = (s[2] + s[3] + 1) >> 1; 1.55 + dst += 2; 1.56 + s += 4; 1.57 + } 1.58 + if (dst_width & 1) { 1.59 + dst[0] = (s[0] + s[1] + 1) >> 1; 1.60 + } 1.61 +} 1.62 + 1.63 +void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.64 + uint8* dst, int dst_width) { 1.65 + const uint8* s = src_ptr; 1.66 + const uint8* t = src_ptr + src_stride; 1.67 + int x; 1.68 + for (x = 0; x < dst_width - 1; x += 2) { 1.69 + dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; 1.70 + dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; 1.71 + dst += 2; 1.72 + s += 4; 1.73 + t += 4; 1.74 + } 1.75 + if (dst_width & 1) { 1.76 + dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; 1.77 + } 1.78 +} 1.79 + 1.80 +void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.81 + uint8* dst, int dst_width) { 1.82 + int x; 1.83 + for (x = 0; x < dst_width - 1; x += 2) { 1.84 + dst[0] = src_ptr[2]; 1.85 + dst[1] = src_ptr[6]; 1.86 + dst += 2; 1.87 + src_ptr += 8; 1.88 + } 1.89 + if (dst_width & 1) { 1.90 + dst[0] = src_ptr[2]; 1.91 + } 1.92 +} 1.93 + 1.94 +void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.95 + uint8* dst, int dst_width) { 1.96 + intptr_t stride = src_stride; 1.97 + int x; 1.98 + for (x = 0; x < dst_width - 1; x += 2) { 1.99 + dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + 1.100 + src_ptr[stride + 0] + src_ptr[stride + 1] + 1.101 + src_ptr[stride + 2] + src_ptr[stride + 3] + 1.102 + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + 1.103 + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + 1.104 + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + 1.105 + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 1.106 + 8) >> 4; 1.107 + dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + 1.108 + src_ptr[stride + 4] + src_ptr[stride + 5] + 1.109 + src_ptr[stride + 6] + src_ptr[stride + 7] + 1.110 + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + 1.111 + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + 1.112 + src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + 1.113 + src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + 1.114 + 8) >> 4; 1.115 + dst += 2; 1.116 + src_ptr += 8; 1.117 + } 1.118 + if (dst_width & 1) { 1.119 + dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + 1.120 + src_ptr[stride + 0] + src_ptr[stride + 1] + 1.121 + src_ptr[stride + 2] + src_ptr[stride + 3] + 1.122 + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + 1.123 + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + 1.124 + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + 1.125 + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 1.126 + 8) >> 4; 1.127 + } 1.128 +} 1.129 + 1.130 +void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.131 + uint8* dst, int dst_width) { 1.132 + int x; 1.133 + assert((dst_width % 3 == 0) && (dst_width > 0)); 1.134 + for (x = 0; x < dst_width; x += 3) { 1.135 + dst[0] = src_ptr[0]; 1.136 + dst[1] = src_ptr[1]; 1.137 + dst[2] = src_ptr[3]; 1.138 + dst += 3; 1.139 + src_ptr += 4; 1.140 + } 1.141 +} 1.142 + 1.143 +// Filter rows 0 and 1 together, 3 : 1 1.144 +void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.145 + uint8* d, int dst_width) { 1.146 + const uint8* s = src_ptr; 1.147 + const uint8* t = src_ptr + src_stride; 1.148 + int x; 1.149 + assert((dst_width % 3 == 0) && (dst_width > 0)); 1.150 + for (x = 0; x < dst_width; x += 3) { 1.151 + uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; 1.152 + uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; 1.153 + uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; 1.154 + uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; 1.155 + uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; 1.156 + uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; 1.157 + d[0] = (a0 * 3 + b0 + 2) >> 2; 1.158 + d[1] = (a1 * 3 + b1 + 2) >> 2; 1.159 + d[2] = (a2 * 3 + b2 + 2) >> 2; 1.160 + d += 3; 1.161 + s += 4; 1.162 + t += 4; 1.163 + } 1.164 +} 1.165 + 1.166 +// Filter rows 1 and 2 together, 1 : 1 1.167 +void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.168 + uint8* d, int dst_width) { 1.169 + const uint8* s = src_ptr; 1.170 + const uint8* t = src_ptr + src_stride; 1.171 + int x; 1.172 + assert((dst_width % 3 == 0) && (dst_width > 0)); 1.173 + for (x = 0; x < dst_width; x += 3) { 1.174 + uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; 1.175 + uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; 1.176 + uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; 1.177 + uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; 1.178 + uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; 1.179 + uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; 1.180 + d[0] = (a0 + b0 + 1) >> 1; 1.181 + d[1] = (a1 + b1 + 1) >> 1; 1.182 + d[2] = (a2 + b2 + 1) >> 1; 1.183 + d += 3; 1.184 + s += 4; 1.185 + t += 4; 1.186 + } 1.187 +} 1.188 + 1.189 +// Scales a single row of pixels using point sampling. 1.190 +void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, 1.191 + int dst_width, int x, int dx) { 1.192 + int j; 1.193 + for (j = 0; j < dst_width - 1; j += 2) { 1.194 + dst_ptr[0] = src_ptr[x >> 16]; 1.195 + x += dx; 1.196 + dst_ptr[1] = src_ptr[x >> 16]; 1.197 + x += dx; 1.198 + dst_ptr += 2; 1.199 + } 1.200 + if (dst_width & 1) { 1.201 + dst_ptr[0] = src_ptr[x >> 16]; 1.202 + } 1.203 +} 1.204 + 1.205 +// Scales a single row of pixels up by 2x using point sampling. 1.206 +void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, 1.207 + int dst_width, int x, int dx) { 1.208 + int j; 1.209 + for (j = 0; j < dst_width - 1; j += 2) { 1.210 + dst_ptr[1] = dst_ptr[0] = src_ptr[0]; 1.211 + src_ptr += 1; 1.212 + dst_ptr += 2; 1.213 + } 1.214 + if (dst_width & 1) { 1.215 + dst_ptr[0] = src_ptr[0]; 1.216 + } 1.217 +} 1.218 + 1.219 +// (1-f)a + fb can be replaced with a + f(b-a) 1.220 +#define BLENDER(a, b, f) (uint8)((int)(a) + \ 1.221 + ((int)(f) * ((int)(b) - (int)(a)) >> 16)) 1.222 + 1.223 +void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, 1.224 + int dst_width, int x, int dx) { 1.225 + int j; 1.226 + for (j = 0; j < dst_width - 1; j += 2) { 1.227 + int xi = x >> 16; 1.228 + int a = src_ptr[xi]; 1.229 + int b = src_ptr[xi + 1]; 1.230 + dst_ptr[0] = BLENDER(a, b, x & 0xffff); 1.231 + x += dx; 1.232 + xi = x >> 16; 1.233 + a = src_ptr[xi]; 1.234 + b = src_ptr[xi + 1]; 1.235 + dst_ptr[1] = BLENDER(a, b, x & 0xffff); 1.236 + x += dx; 1.237 + dst_ptr += 2; 1.238 + } 1.239 + if (dst_width & 1) { 1.240 + int xi = x >> 16; 1.241 + int a = src_ptr[xi]; 1.242 + int b = src_ptr[xi + 1]; 1.243 + dst_ptr[0] = BLENDER(a, b, x & 0xffff); 1.244 + } 1.245 +} 1.246 + 1.247 +void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, 1.248 + int dst_width, int x32, int dx) { 1.249 + int64 x = (int64)(x32); 1.250 + int j; 1.251 + for (j = 0; j < dst_width - 1; j += 2) { 1.252 + int64 xi = x >> 16; 1.253 + int a = src_ptr[xi]; 1.254 + int b = src_ptr[xi + 1]; 1.255 + dst_ptr[0] = BLENDER(a, b, x & 0xffff); 1.256 + x += dx; 1.257 + xi = x >> 16; 1.258 + a = src_ptr[xi]; 1.259 + b = src_ptr[xi + 1]; 1.260 + dst_ptr[1] = BLENDER(a, b, x & 0xffff); 1.261 + x += dx; 1.262 + dst_ptr += 2; 1.263 + } 1.264 + if (dst_width & 1) { 1.265 + int64 xi = x >> 16; 1.266 + int a = src_ptr[xi]; 1.267 + int b = src_ptr[xi + 1]; 1.268 + dst_ptr[0] = BLENDER(a, b, x & 0xffff); 1.269 + } 1.270 +} 1.271 +#undef BLENDER 1.272 + 1.273 +void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.274 + uint8* dst, int dst_width) { 1.275 + int x; 1.276 + assert(dst_width % 3 == 0); 1.277 + for (x = 0; x < dst_width; x += 3) { 1.278 + dst[0] = src_ptr[0]; 1.279 + dst[1] = src_ptr[3]; 1.280 + dst[2] = src_ptr[6]; 1.281 + dst += 3; 1.282 + src_ptr += 8; 1.283 + } 1.284 +} 1.285 + 1.286 +// 8x3 -> 3x1 1.287 +void ScaleRowDown38_3_Box_C(const uint8* src_ptr, 1.288 + ptrdiff_t src_stride, 1.289 + uint8* dst_ptr, int dst_width) { 1.290 + intptr_t stride = src_stride; 1.291 + int i; 1.292 + assert((dst_width % 3 == 0) && (dst_width > 0)); 1.293 + for (i = 0; i < dst_width; i += 3) { 1.294 + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + 1.295 + src_ptr[stride + 0] + src_ptr[stride + 1] + 1.296 + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + 1.297 + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * 1.298 + (65536 / 9) >> 16; 1.299 + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + 1.300 + src_ptr[stride + 3] + src_ptr[stride + 4] + 1.301 + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + 1.302 + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * 1.303 + (65536 / 9) >> 16; 1.304 + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + 1.305 + src_ptr[stride + 6] + src_ptr[stride + 7] + 1.306 + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * 1.307 + (65536 / 6) >> 16; 1.308 + src_ptr += 8; 1.309 + dst_ptr += 3; 1.310 + } 1.311 +} 1.312 + 1.313 +// 8x2 -> 3x1 1.314 +void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.315 + uint8* dst_ptr, int dst_width) { 1.316 + intptr_t stride = src_stride; 1.317 + int i; 1.318 + assert((dst_width % 3 == 0) && (dst_width > 0)); 1.319 + for (i = 0; i < dst_width; i += 3) { 1.320 + dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + 1.321 + src_ptr[stride + 0] + src_ptr[stride + 1] + 1.322 + src_ptr[stride + 2]) * (65536 / 6) >> 16; 1.323 + dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + 1.324 + src_ptr[stride + 3] + src_ptr[stride + 4] + 1.325 + src_ptr[stride + 5]) * (65536 / 6) >> 16; 1.326 + dst_ptr[2] = (src_ptr[6] + src_ptr[7] + 1.327 + src_ptr[stride + 6] + src_ptr[stride + 7]) * 1.328 + (65536 / 4) >> 16; 1.329 + src_ptr += 8; 1.330 + dst_ptr += 3; 1.331 + } 1.332 +} 1.333 + 1.334 +void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, 1.335 + uint16* dst_ptr, int src_width, int src_height) { 1.336 + int x; 1.337 + assert(src_width > 0); 1.338 + assert(src_height > 0); 1.339 + for (x = 0; x < src_width; ++x) { 1.340 + const uint8* s = src_ptr + x; 1.341 + unsigned int sum = 0u; 1.342 + int y; 1.343 + for (y = 0; y < src_height; ++y) { 1.344 + sum += s[0]; 1.345 + s += src_stride; 1.346 + } 1.347 + // TODO(fbarchard): Consider limitting height to 256 to avoid overflow. 1.348 + dst_ptr[x] = sum < 65535u ? sum : 65535u; 1.349 + } 1.350 +} 1.351 + 1.352 +void ScaleARGBRowDown2_C(const uint8* src_argb, 1.353 + ptrdiff_t src_stride, 1.354 + uint8* dst_argb, int dst_width) { 1.355 + const uint32* src = (const uint32*)(src_argb); 1.356 + uint32* dst = (uint32*)(dst_argb); 1.357 + 1.358 + int x; 1.359 + for (x = 0; x < dst_width - 1; x += 2) { 1.360 + dst[0] = src[1]; 1.361 + dst[1] = src[3]; 1.362 + src += 4; 1.363 + dst += 2; 1.364 + } 1.365 + if (dst_width & 1) { 1.366 + dst[0] = src[1]; 1.367 + } 1.368 +} 1.369 + 1.370 +void ScaleARGBRowDown2Linear_C(const uint8* src_argb, 1.371 + ptrdiff_t src_stride, 1.372 + uint8* dst_argb, int dst_width) { 1.373 + int x; 1.374 + for (x = 0; x < dst_width; ++x) { 1.375 + dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; 1.376 + dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; 1.377 + dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; 1.378 + dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; 1.379 + src_argb += 8; 1.380 + dst_argb += 4; 1.381 + } 1.382 +} 1.383 + 1.384 +void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, 1.385 + uint8* dst_argb, int dst_width) { 1.386 + int x; 1.387 + for (x = 0; x < dst_width; ++x) { 1.388 + dst_argb[0] = (src_argb[0] + src_argb[4] + 1.389 + src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; 1.390 + dst_argb[1] = (src_argb[1] + src_argb[5] + 1.391 + src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; 1.392 + dst_argb[2] = (src_argb[2] + src_argb[6] + 1.393 + src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; 1.394 + dst_argb[3] = (src_argb[3] + src_argb[7] + 1.395 + src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; 1.396 + src_argb += 8; 1.397 + dst_argb += 4; 1.398 + } 1.399 +} 1.400 + 1.401 +void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, 1.402 + int src_stepx, 1.403 + uint8* dst_argb, int dst_width) { 1.404 + const uint32* src = (const uint32*)(src_argb); 1.405 + uint32* dst = (uint32*)(dst_argb); 1.406 + 1.407 + int x; 1.408 + for (x = 0; x < dst_width - 1; x += 2) { 1.409 + dst[0] = src[0]; 1.410 + dst[1] = src[src_stepx]; 1.411 + src += src_stepx * 2; 1.412 + dst += 2; 1.413 + } 1.414 + if (dst_width & 1) { 1.415 + dst[0] = src[0]; 1.416 + } 1.417 +} 1.418 + 1.419 +void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, 1.420 + ptrdiff_t src_stride, 1.421 + int src_stepx, 1.422 + uint8* dst_argb, int dst_width) { 1.423 + int x; 1.424 + for (x = 0; x < dst_width; ++x) { 1.425 + dst_argb[0] = (src_argb[0] + src_argb[4] + 1.426 + src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; 1.427 + dst_argb[1] = (src_argb[1] + src_argb[5] + 1.428 + src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; 1.429 + dst_argb[2] = (src_argb[2] + src_argb[6] + 1.430 + src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; 1.431 + dst_argb[3] = (src_argb[3] + src_argb[7] + 1.432 + src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; 1.433 + src_argb += src_stepx * 4; 1.434 + dst_argb += 4; 1.435 + } 1.436 +} 1.437 + 1.438 +// Scales a single row of pixels using point sampling. 1.439 +void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, 1.440 + int dst_width, int x, int dx) { 1.441 + const uint32* src = (const uint32*)(src_argb); 1.442 + uint32* dst = (uint32*)(dst_argb); 1.443 + int j; 1.444 + for (j = 0; j < dst_width - 1; j += 2) { 1.445 + dst[0] = src[x >> 16]; 1.446 + x += dx; 1.447 + dst[1] = src[x >> 16]; 1.448 + x += dx; 1.449 + dst += 2; 1.450 + } 1.451 + if (dst_width & 1) { 1.452 + dst[0] = src[x >> 16]; 1.453 + } 1.454 +} 1.455 + 1.456 +void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, 1.457 + int dst_width, int x32, int dx) { 1.458 + int64 x = (int64)(x32); 1.459 + const uint32* src = (const uint32*)(src_argb); 1.460 + uint32* dst = (uint32*)(dst_argb); 1.461 + int j; 1.462 + for (j = 0; j < dst_width - 1; j += 2) { 1.463 + dst[0] = src[x >> 16]; 1.464 + x += dx; 1.465 + dst[1] = src[x >> 16]; 1.466 + x += dx; 1.467 + dst += 2; 1.468 + } 1.469 + if (dst_width & 1) { 1.470 + dst[0] = src[x >> 16]; 1.471 + } 1.472 +} 1.473 + 1.474 +// Scales a single row of pixels up by 2x using point sampling. 1.475 +void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, 1.476 + int dst_width, int x, int dx) { 1.477 + const uint32* src = (const uint32*)(src_argb); 1.478 + uint32* dst = (uint32*)(dst_argb); 1.479 + int j; 1.480 + for (j = 0; j < dst_width - 1; j += 2) { 1.481 + dst[1] = dst[0] = src[0]; 1.482 + src += 1; 1.483 + dst += 2; 1.484 + } 1.485 + if (dst_width & 1) { 1.486 + dst[0] = src[0]; 1.487 + } 1.488 +} 1.489 + 1.490 +// Mimics SSSE3 blender 1.491 +#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 1.492 +#define BLENDERC(a, b, f, s) (uint32)( \ 1.493 + BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) 1.494 +#define BLENDER(a, b, f) \ 1.495 + BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ 1.496 + BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) 1.497 + 1.498 +void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, 1.499 + int dst_width, int x, int dx) { 1.500 + const uint32* src = (const uint32*)(src_argb); 1.501 + uint32* dst = (uint32*)(dst_argb); 1.502 + int j; 1.503 + for (j = 0; j < dst_width - 1; j += 2) { 1.504 + int xi = x >> 16; 1.505 + int xf = (x >> 9) & 0x7f; 1.506 + uint32 a = src[xi]; 1.507 + uint32 b = src[xi + 1]; 1.508 + dst[0] = BLENDER(a, b, xf); 1.509 + x += dx; 1.510 + xi = x >> 16; 1.511 + xf = (x >> 9) & 0x7f; 1.512 + a = src[xi]; 1.513 + b = src[xi + 1]; 1.514 + dst[1] = BLENDER(a, b, xf); 1.515 + x += dx; 1.516 + dst += 2; 1.517 + } 1.518 + if (dst_width & 1) { 1.519 + int xi = x >> 16; 1.520 + int xf = (x >> 9) & 0x7f; 1.521 + uint32 a = src[xi]; 1.522 + uint32 b = src[xi + 1]; 1.523 + dst[0] = BLENDER(a, b, xf); 1.524 + } 1.525 +} 1.526 + 1.527 +void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, 1.528 + int dst_width, int x32, int dx) { 1.529 + int64 x = (int64)(x32); 1.530 + const uint32* src = (const uint32*)(src_argb); 1.531 + uint32* dst = (uint32*)(dst_argb); 1.532 + int j; 1.533 + for (j = 0; j < dst_width - 1; j += 2) { 1.534 + int64 xi = x >> 16; 1.535 + int xf = (x >> 9) & 0x7f; 1.536 + uint32 a = src[xi]; 1.537 + uint32 b = src[xi + 1]; 1.538 + dst[0] = BLENDER(a, b, xf); 1.539 + x += dx; 1.540 + xi = x >> 16; 1.541 + xf = (x >> 9) & 0x7f; 1.542 + a = src[xi]; 1.543 + b = src[xi + 1]; 1.544 + dst[1] = BLENDER(a, b, xf); 1.545 + x += dx; 1.546 + dst += 2; 1.547 + } 1.548 + if (dst_width & 1) { 1.549 + int64 xi = x >> 16; 1.550 + int xf = (x >> 9) & 0x7f; 1.551 + uint32 a = src[xi]; 1.552 + uint32 b = src[xi + 1]; 1.553 + dst[0] = BLENDER(a, b, xf); 1.554 + } 1.555 +} 1.556 +#undef BLENDER1 1.557 +#undef BLENDERC 1.558 +#undef BLENDER 1.559 + 1.560 +// Scale plane vertically with bilinear interpolation. 1.561 +void ScalePlaneVertical(int src_height, 1.562 + int dst_width, int dst_height, 1.563 + int src_stride, int dst_stride, 1.564 + const uint8* src_argb, uint8* dst_argb, 1.565 + int x, int y, int dy, 1.566 + int bpp, enum FilterMode filtering) { 1.567 + // TODO(fbarchard): Allow higher bpp. 1.568 + int dst_width_bytes = dst_width * bpp; 1.569 + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 1.570 + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1.571 + InterpolateRow_C; 1.572 + const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; 1.573 + int j; 1.574 + assert(bpp >= 1 && bpp <= 4); 1.575 + assert(src_height != 0); 1.576 + assert(dst_width > 0); 1.577 + assert(dst_height > 0); 1.578 + src_argb += (x >> 16) * bpp; 1.579 +#if defined(HAS_INTERPOLATEROW_SSE2) 1.580 + if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { 1.581 + InterpolateRow = InterpolateRow_Any_SSE2; 1.582 + if (IS_ALIGNED(dst_width_bytes, 16)) { 1.583 + InterpolateRow = InterpolateRow_Unaligned_SSE2; 1.584 + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 1.585 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.586 + InterpolateRow = InterpolateRow_SSE2; 1.587 + } 1.588 + } 1.589 + } 1.590 +#endif 1.591 +#if defined(HAS_INTERPOLATEROW_SSSE3) 1.592 + if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { 1.593 + InterpolateRow = InterpolateRow_Any_SSSE3; 1.594 + if (IS_ALIGNED(dst_width_bytes, 16)) { 1.595 + InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1.596 + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 1.597 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.598 + InterpolateRow = InterpolateRow_SSSE3; 1.599 + } 1.600 + } 1.601 + } 1.602 +#endif 1.603 +#if defined(HAS_INTERPOLATEROW_AVX2) 1.604 + if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { 1.605 + InterpolateRow = InterpolateRow_Any_AVX2; 1.606 + if (IS_ALIGNED(dst_width_bytes, 32)) { 1.607 + InterpolateRow = InterpolateRow_AVX2; 1.608 + } 1.609 + } 1.610 +#endif 1.611 +#if defined(HAS_INTERPOLATEROW_NEON) 1.612 + if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { 1.613 + InterpolateRow = InterpolateRow_Any_NEON; 1.614 + if (IS_ALIGNED(dst_width_bytes, 16)) { 1.615 + InterpolateRow = InterpolateRow_NEON; 1.616 + } 1.617 + } 1.618 +#endif 1.619 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1.620 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && 1.621 + IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && 1.622 + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { 1.623 + InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; 1.624 + if (IS_ALIGNED(dst_width_bytes, 4)) { 1.625 + InterpolateRow = InterpolateRow_MIPS_DSPR2; 1.626 + } 1.627 + } 1.628 +#endif 1.629 + for (j = 0; j < dst_height; ++j) { 1.630 + int yi; 1.631 + int yf; 1.632 + if (y > max_y) { 1.633 + y = max_y; 1.634 + } 1.635 + yi = y >> 16; 1.636 + yf = filtering ? ((y >> 8) & 255) : 0; 1.637 + InterpolateRow(dst_argb, src_argb + yi * src_stride, 1.638 + src_stride, dst_width_bytes, yf); 1.639 + dst_argb += dst_stride; 1.640 + y += dy; 1.641 + } 1.642 +} 1.643 + 1.644 +// Simplify the filtering based on scale factors. 1.645 +enum FilterMode ScaleFilterReduce(int src_width, int src_height, 1.646 + int dst_width, int dst_height, 1.647 + enum FilterMode filtering) { 1.648 + if (src_width < 0) { 1.649 + src_width = -src_width; 1.650 + } 1.651 + if (src_height < 0) { 1.652 + src_height = -src_height; 1.653 + } 1.654 + if (filtering == kFilterBox) { 1.655 + // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. 1.656 + if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { 1.657 + filtering = kFilterBilinear; 1.658 + } 1.659 + // If scaling to larger, switch from Box to Bilinear. 1.660 + if (dst_width >= src_width || dst_height >= src_height) { 1.661 + filtering = kFilterBilinear; 1.662 + } 1.663 + } 1.664 + if (filtering == kFilterBilinear) { 1.665 + if (src_height == 1) { 1.666 + filtering = kFilterLinear; 1.667 + } 1.668 + // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. 1.669 + if (dst_height == src_height || dst_height * 3 == src_height) { 1.670 + filtering = kFilterLinear; 1.671 + } 1.672 + // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to 1.673 + // avoid reading 2 pixels horizontally that causes memory exception. 1.674 + if (src_width == 1) { 1.675 + filtering = kFilterNone; 1.676 + } 1.677 + } 1.678 + if (filtering == kFilterLinear) { 1.679 + if (src_width == 1) { 1.680 + filtering = kFilterNone; 1.681 + } 1.682 + // TODO(fbarchard): Detect any odd scale factor and reduce to None. 1.683 + if (dst_width == src_width || dst_width * 3 == src_width) { 1.684 + filtering = kFilterNone; 1.685 + } 1.686 + } 1.687 + return filtering; 1.688 +} 1.689 + 1.690 +// Divide num by div and return as 16.16 fixed point result. 1.691 +int FixedDiv_C(int num, int div) { 1.692 + return (int)(((int64)(num) << 16) / div); 1.693 +} 1.694 + 1.695 +// Divide num by div and return as 16.16 fixed point result. 1.696 +int FixedDiv1_C(int num, int div) { 1.697 + return (int)((((int64)(num) << 16) - 0x00010001) / 1.698 + (div - 1)); 1.699 +} 1.700 + 1.701 +#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) 1.702 + 1.703 +// Compute slope values for stepping. 1.704 +void ScaleSlope(int src_width, int src_height, 1.705 + int dst_width, int dst_height, 1.706 + enum FilterMode filtering, 1.707 + int* x, int* y, int* dx, int* dy) { 1.708 + assert(x != NULL); 1.709 + assert(y != NULL); 1.710 + assert(dx != NULL); 1.711 + assert(dy != NULL); 1.712 + assert(src_width != 0); 1.713 + assert(src_height != 0); 1.714 + assert(dst_width > 0); 1.715 + assert(dst_height > 0); 1.716 + // Check for 1 pixel and avoid FixedDiv overflow. 1.717 + if (dst_width == 1 && src_width >= 32768) { 1.718 + dst_width = src_width; 1.719 + } 1.720 + if (dst_height == 1 && src_height >= 32768) { 1.721 + dst_height = src_height; 1.722 + } 1.723 + if (filtering == kFilterBox) { 1.724 + // Scale step for point sampling duplicates all pixels equally. 1.725 + *dx = FixedDiv(Abs(src_width), dst_width); 1.726 + *dy = FixedDiv(src_height, dst_height); 1.727 + *x = 0; 1.728 + *y = 0; 1.729 + } else if (filtering == kFilterBilinear) { 1.730 + // Scale step for bilinear sampling renders last pixel once for upsample. 1.731 + if (dst_width <= Abs(src_width)) { 1.732 + *dx = FixedDiv(Abs(src_width), dst_width); 1.733 + *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. 1.734 + } else if (dst_width > 1) { 1.735 + *dx = FixedDiv1(Abs(src_width), dst_width); 1.736 + *x = 0; 1.737 + } 1.738 + if (dst_height <= src_height) { 1.739 + *dy = FixedDiv(src_height, dst_height); 1.740 + *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. 1.741 + } else if (dst_height > 1) { 1.742 + *dy = FixedDiv1(src_height, dst_height); 1.743 + *y = 0; 1.744 + } 1.745 + } else if (filtering == kFilterLinear) { 1.746 + // Scale step for bilinear sampling renders last pixel once for upsample. 1.747 + if (dst_width <= Abs(src_width)) { 1.748 + *dx = FixedDiv(Abs(src_width), dst_width); 1.749 + *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. 1.750 + } else if (dst_width > 1) { 1.751 + *dx = FixedDiv1(Abs(src_width), dst_width); 1.752 + *x = 0; 1.753 + } 1.754 + *dy = FixedDiv(src_height, dst_height); 1.755 + *y = *dy >> 1; 1.756 + } else { 1.757 + // Scale step for point sampling duplicates all pixels equally. 1.758 + *dx = FixedDiv(Abs(src_width), dst_width); 1.759 + *dy = FixedDiv(src_height, dst_height); 1.760 + *x = CENTERSTART(*dx, 0); 1.761 + *y = CENTERSTART(*dy, 0); 1.762 + } 1.763 + // Negative src_width means horizontally mirror. 1.764 + if (src_width < 0) { 1.765 + *x += (dst_width - 1) * *dx; 1.766 + *dx = -*dx; 1.767 + // src_width = -src_width; // Caller must do this. 1.768 + } 1.769 +} 1.770 +#undef CENTERSTART 1.771 + 1.772 +#ifdef __cplusplus 1.773 +} // extern "C" 1.774 +} // namespace libyuv 1.775 +#endif