michael@0: /* michael@0: * Copyright 2013 The LibYuv Project Authors. All rights reserved. michael@0: * michael@0: * Use of this source code is governed by a BSD-style license michael@0: * that can be found in the LICENSE file in the root of the source michael@0: * tree. An additional intellectual property rights grant can be found michael@0: * in the file PATENTS. All contributing project authors may michael@0: * be found in the AUTHORS file in the root of the source tree. michael@0: */ michael@0: michael@0: #include "libyuv/scale.h" michael@0: michael@0: #include michael@0: #include michael@0: michael@0: #include "libyuv/cpu_id.h" michael@0: #include "libyuv/planar_functions.h" // For CopyARGB michael@0: #include "libyuv/row.h" michael@0: #include "libyuv/scale_row.h" michael@0: michael@0: #ifdef __cplusplus michael@0: namespace libyuv { michael@0: extern "C" { michael@0: #endif michael@0: michael@0: static __inline int Abs(int v) { michael@0: return v >= 0 ? v : -v; michael@0: } michael@0: michael@0: // CPU agnostic row functions michael@0: void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = src_ptr[1]; michael@0: dst[1] = src_ptr[3]; michael@0: dst += 2; michael@0: src_ptr += 4; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src_ptr[1]; michael@0: } michael@0: } michael@0: michael@0: void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: const uint8* s = src_ptr; michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = (s[0] + s[1] + 1) >> 1; michael@0: dst[1] = (s[2] + s[3] + 1) >> 1; michael@0: dst += 2; michael@0: s += 4; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = (s[0] + s[1] + 1) >> 1; michael@0: } michael@0: } michael@0: michael@0: void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: const uint8* s = src_ptr; michael@0: const uint8* t = src_ptr + src_stride; michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; michael@0: dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; michael@0: dst += 2; michael@0: s += 4; michael@0: t += 4; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; michael@0: } michael@0: } michael@0: michael@0: void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = src_ptr[2]; michael@0: dst[1] = src_ptr[6]; michael@0: dst += 2; michael@0: src_ptr += 8; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src_ptr[2]; michael@0: } michael@0: } michael@0: michael@0: void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: intptr_t stride = src_stride; michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + michael@0: src_ptr[stride + 0] + src_ptr[stride + 1] + michael@0: src_ptr[stride + 2] + src_ptr[stride + 3] + michael@0: src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + michael@0: src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + michael@0: src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + michael@0: src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + michael@0: 8) >> 4; michael@0: dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + michael@0: src_ptr[stride + 4] + src_ptr[stride + 5] + michael@0: src_ptr[stride + 6] + src_ptr[stride + 7] + michael@0: src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + michael@0: src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + michael@0: src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + michael@0: src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + michael@0: 8) >> 4; michael@0: dst += 2; michael@0: src_ptr += 8; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + michael@0: src_ptr[stride + 0] + src_ptr[stride + 1] + michael@0: src_ptr[stride + 2] + src_ptr[stride + 3] + michael@0: src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + michael@0: src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + michael@0: src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + michael@0: src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + michael@0: 8) >> 4; michael@0: } michael@0: } michael@0: michael@0: void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: int x; michael@0: assert((dst_width % 3 == 0) && (dst_width > 0)); michael@0: for (x = 0; x < dst_width; x += 3) { michael@0: dst[0] = src_ptr[0]; michael@0: dst[1] = src_ptr[1]; michael@0: dst[2] = src_ptr[3]; michael@0: dst += 3; michael@0: src_ptr += 4; michael@0: } michael@0: } michael@0: michael@0: // Filter rows 0 and 1 together, 3 : 1 michael@0: void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* d, int dst_width) { michael@0: const uint8* s = src_ptr; michael@0: const uint8* t = src_ptr + src_stride; michael@0: int x; michael@0: assert((dst_width % 3 == 0) && (dst_width > 0)); michael@0: for (x = 0; x < dst_width; x += 3) { michael@0: uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; michael@0: uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; michael@0: uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; michael@0: uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; michael@0: uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; michael@0: uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; michael@0: d[0] = (a0 * 3 + b0 + 2) >> 2; michael@0: d[1] = (a1 * 3 + b1 + 2) >> 2; michael@0: d[2] = (a2 * 3 + b2 + 2) >> 2; michael@0: d += 3; michael@0: s += 4; michael@0: t += 4; michael@0: } michael@0: } michael@0: michael@0: // Filter rows 1 and 2 together, 1 : 1 michael@0: void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* d, int dst_width) { michael@0: const uint8* s = src_ptr; michael@0: const uint8* t = src_ptr + src_stride; michael@0: int x; michael@0: assert((dst_width % 3 == 0) && (dst_width > 0)); michael@0: for (x = 0; x < dst_width; x += 3) { michael@0: uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; michael@0: uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; michael@0: uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; michael@0: uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; michael@0: uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; michael@0: uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; michael@0: d[0] = (a0 + b0 + 1) >> 1; michael@0: d[1] = (a1 + b1 + 1) >> 1; michael@0: d[2] = (a2 + b2 + 1) >> 1; michael@0: d += 3; michael@0: s += 4; michael@0: t += 4; michael@0: } michael@0: } michael@0: michael@0: // Scales a single row of pixels using point sampling. michael@0: void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, michael@0: int dst_width, int x, int dx) { michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: dst_ptr[0] = src_ptr[x >> 16]; michael@0: x += dx; michael@0: dst_ptr[1] = src_ptr[x >> 16]; michael@0: x += dx; michael@0: dst_ptr += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst_ptr[0] = src_ptr[x >> 16]; michael@0: } michael@0: } michael@0: michael@0: // Scales a single row of pixels up by 2x using point sampling. michael@0: void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, michael@0: int dst_width, int x, int dx) { michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: dst_ptr[1] = dst_ptr[0] = src_ptr[0]; michael@0: src_ptr += 1; michael@0: dst_ptr += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst_ptr[0] = src_ptr[0]; michael@0: } michael@0: } michael@0: michael@0: // (1-f)a + fb can be replaced with a + f(b-a) michael@0: #define BLENDER(a, b, f) (uint8)((int)(a) + \ michael@0: ((int)(f) * ((int)(b) - (int)(a)) >> 16)) michael@0: michael@0: void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, michael@0: int dst_width, int x, int dx) { michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: int xi = x >> 16; michael@0: int a = src_ptr[xi]; michael@0: int b = src_ptr[xi + 1]; michael@0: dst_ptr[0] = BLENDER(a, b, x & 0xffff); michael@0: x += dx; michael@0: xi = x >> 16; michael@0: a = src_ptr[xi]; michael@0: b = src_ptr[xi + 1]; michael@0: dst_ptr[1] = BLENDER(a, b, x & 0xffff); michael@0: x += dx; michael@0: dst_ptr += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: int xi = x >> 16; michael@0: int a = src_ptr[xi]; michael@0: int b = src_ptr[xi + 1]; michael@0: dst_ptr[0] = BLENDER(a, b, x & 0xffff); michael@0: } michael@0: } michael@0: michael@0: void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, michael@0: int dst_width, int x32, int dx) { michael@0: int64 x = (int64)(x32); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: int64 xi = x >> 16; michael@0: int a = src_ptr[xi]; michael@0: int b = src_ptr[xi + 1]; michael@0: dst_ptr[0] = BLENDER(a, b, x & 0xffff); michael@0: x += dx; michael@0: xi = x >> 16; michael@0: a = src_ptr[xi]; michael@0: b = src_ptr[xi + 1]; michael@0: dst_ptr[1] = BLENDER(a, b, x & 0xffff); michael@0: x += dx; michael@0: dst_ptr += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: int64 xi = x >> 16; michael@0: int a = src_ptr[xi]; michael@0: int b = src_ptr[xi + 1]; michael@0: dst_ptr[0] = BLENDER(a, b, x & 0xffff); michael@0: } michael@0: } michael@0: #undef BLENDER michael@0: michael@0: void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst, int dst_width) { michael@0: int x; michael@0: assert(dst_width % 3 == 0); michael@0: for (x = 0; x < dst_width; x += 3) { michael@0: dst[0] = src_ptr[0]; michael@0: dst[1] = src_ptr[3]; michael@0: dst[2] = src_ptr[6]; michael@0: dst += 3; michael@0: src_ptr += 8; michael@0: } michael@0: } michael@0: michael@0: // 8x3 -> 3x1 michael@0: void ScaleRowDown38_3_Box_C(const uint8* src_ptr, michael@0: ptrdiff_t src_stride, michael@0: uint8* dst_ptr, int dst_width) { michael@0: intptr_t stride = src_stride; michael@0: int i; michael@0: assert((dst_width % 3 == 0) && (dst_width > 0)); michael@0: for (i = 0; i < dst_width; i += 3) { michael@0: dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + michael@0: src_ptr[stride + 0] + src_ptr[stride + 1] + michael@0: src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + michael@0: src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * michael@0: (65536 / 9) >> 16; michael@0: dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + michael@0: src_ptr[stride + 3] + src_ptr[stride + 4] + michael@0: src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + michael@0: src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * michael@0: (65536 / 9) >> 16; michael@0: dst_ptr[2] = (src_ptr[6] + src_ptr[7] + michael@0: src_ptr[stride + 6] + src_ptr[stride + 7] + michael@0: src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * michael@0: (65536 / 6) >> 16; michael@0: src_ptr += 8; michael@0: dst_ptr += 3; michael@0: } michael@0: } michael@0: michael@0: // 8x2 -> 3x1 michael@0: void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint8* dst_ptr, int dst_width) { michael@0: intptr_t stride = src_stride; michael@0: int i; michael@0: assert((dst_width % 3 == 0) && (dst_width > 0)); michael@0: for (i = 0; i < dst_width; i += 3) { michael@0: dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + michael@0: src_ptr[stride + 0] + src_ptr[stride + 1] + michael@0: src_ptr[stride + 2]) * (65536 / 6) >> 16; michael@0: dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + michael@0: src_ptr[stride + 3] + src_ptr[stride + 4] + michael@0: src_ptr[stride + 5]) * (65536 / 6) >> 16; michael@0: dst_ptr[2] = (src_ptr[6] + src_ptr[7] + michael@0: src_ptr[stride + 6] + src_ptr[stride + 7]) * michael@0: (65536 / 4) >> 16; michael@0: src_ptr += 8; michael@0: dst_ptr += 3; michael@0: } michael@0: } michael@0: michael@0: void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, michael@0: uint16* dst_ptr, int src_width, int src_height) { michael@0: int x; michael@0: assert(src_width > 0); michael@0: assert(src_height > 0); michael@0: for (x = 0; x < src_width; ++x) { michael@0: const uint8* s = src_ptr + x; michael@0: unsigned int sum = 0u; michael@0: int y; michael@0: for (y = 0; y < src_height; ++y) { michael@0: sum += s[0]; michael@0: s += src_stride; michael@0: } michael@0: // TODO(fbarchard): Consider limitting height to 256 to avoid overflow. michael@0: dst_ptr[x] = sum < 65535u ? sum : 65535u; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBRowDown2_C(const uint8* src_argb, michael@0: ptrdiff_t src_stride, michael@0: uint8* dst_argb, int dst_width) { michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = src[1]; michael@0: dst[1] = src[3]; michael@0: src += 4; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src[1]; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBRowDown2Linear_C(const uint8* src_argb, michael@0: ptrdiff_t src_stride, michael@0: uint8* dst_argb, int dst_width) { michael@0: int x; michael@0: for (x = 0; x < dst_width; ++x) { michael@0: dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; michael@0: dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; michael@0: dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; michael@0: dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; michael@0: src_argb += 8; michael@0: dst_argb += 4; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, michael@0: uint8* dst_argb, int dst_width) { michael@0: int x; michael@0: for (x = 0; x < dst_width; ++x) { michael@0: dst_argb[0] = (src_argb[0] + src_argb[4] + michael@0: src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; michael@0: dst_argb[1] = (src_argb[1] + src_argb[5] + michael@0: src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; michael@0: dst_argb[2] = (src_argb[2] + src_argb[6] + michael@0: src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; michael@0: dst_argb[3] = (src_argb[3] + src_argb[7] + michael@0: src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; michael@0: src_argb += 8; michael@0: dst_argb += 4; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, michael@0: int src_stepx, michael@0: uint8* dst_argb, int dst_width) { michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: michael@0: int x; michael@0: for (x = 0; x < dst_width - 1; x += 2) { michael@0: dst[0] = src[0]; michael@0: dst[1] = src[src_stepx]; michael@0: src += src_stepx * 2; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src[0]; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, michael@0: ptrdiff_t src_stride, michael@0: int src_stepx, michael@0: uint8* dst_argb, int dst_width) { michael@0: int x; michael@0: for (x = 0; x < dst_width; ++x) { michael@0: dst_argb[0] = (src_argb[0] + src_argb[4] + michael@0: src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; michael@0: dst_argb[1] = (src_argb[1] + src_argb[5] + michael@0: src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; michael@0: dst_argb[2] = (src_argb[2] + src_argb[6] + michael@0: src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; michael@0: dst_argb[3] = (src_argb[3] + src_argb[7] + michael@0: src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; michael@0: src_argb += src_stepx * 4; michael@0: dst_argb += 4; michael@0: } michael@0: } michael@0: michael@0: // Scales a single row of pixels using point sampling. michael@0: void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, michael@0: int dst_width, int x, int dx) { michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: dst[0] = src[x >> 16]; michael@0: x += dx; michael@0: dst[1] = src[x >> 16]; michael@0: x += dx; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src[x >> 16]; michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, michael@0: int dst_width, int x32, int dx) { michael@0: int64 x = (int64)(x32); michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: dst[0] = src[x >> 16]; michael@0: x += dx; michael@0: dst[1] = src[x >> 16]; michael@0: x += dx; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src[x >> 16]; michael@0: } michael@0: } michael@0: michael@0: // Scales a single row of pixels up by 2x using point sampling. michael@0: void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, michael@0: int dst_width, int x, int dx) { michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: dst[1] = dst[0] = src[0]; michael@0: src += 1; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: dst[0] = src[0]; michael@0: } michael@0: } michael@0: michael@0: // Mimics SSSE3 blender michael@0: #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 michael@0: #define BLENDERC(a, b, f, s) (uint32)( \ michael@0: BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) michael@0: #define BLENDER(a, b, f) \ michael@0: BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ michael@0: BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) michael@0: michael@0: void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, michael@0: int dst_width, int x, int dx) { michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: int xi = x >> 16; michael@0: int xf = (x >> 9) & 0x7f; michael@0: uint32 a = src[xi]; michael@0: uint32 b = src[xi + 1]; michael@0: dst[0] = BLENDER(a, b, xf); michael@0: x += dx; michael@0: xi = x >> 16; michael@0: xf = (x >> 9) & 0x7f; michael@0: a = src[xi]; michael@0: b = src[xi + 1]; michael@0: dst[1] = BLENDER(a, b, xf); michael@0: x += dx; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: int xi = x >> 16; michael@0: int xf = (x >> 9) & 0x7f; michael@0: uint32 a = src[xi]; michael@0: uint32 b = src[xi + 1]; michael@0: dst[0] = BLENDER(a, b, xf); michael@0: } michael@0: } michael@0: michael@0: void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, michael@0: int dst_width, int x32, int dx) { michael@0: int64 x = (int64)(x32); michael@0: const uint32* src = (const uint32*)(src_argb); michael@0: uint32* dst = (uint32*)(dst_argb); michael@0: int j; michael@0: for (j = 0; j < dst_width - 1; j += 2) { michael@0: int64 xi = x >> 16; michael@0: int xf = (x >> 9) & 0x7f; michael@0: uint32 a = src[xi]; michael@0: uint32 b = src[xi + 1]; michael@0: dst[0] = BLENDER(a, b, xf); michael@0: x += dx; michael@0: xi = x >> 16; michael@0: xf = (x >> 9) & 0x7f; michael@0: a = src[xi]; michael@0: b = src[xi + 1]; michael@0: dst[1] = BLENDER(a, b, xf); michael@0: x += dx; michael@0: dst += 2; michael@0: } michael@0: if (dst_width & 1) { michael@0: int64 xi = x >> 16; michael@0: int xf = (x >> 9) & 0x7f; michael@0: uint32 a = src[xi]; michael@0: uint32 b = src[xi + 1]; michael@0: dst[0] = BLENDER(a, b, xf); michael@0: } michael@0: } michael@0: #undef BLENDER1 michael@0: #undef BLENDERC michael@0: #undef BLENDER michael@0: michael@0: // Scale plane vertically with bilinear interpolation. michael@0: void ScalePlaneVertical(int src_height, michael@0: int dst_width, int dst_height, michael@0: int src_stride, int dst_stride, michael@0: const uint8* src_argb, uint8* dst_argb, michael@0: int x, int y, int dy, michael@0: int bpp, enum FilterMode filtering) { michael@0: // TODO(fbarchard): Allow higher bpp. michael@0: int dst_width_bytes = dst_width * bpp; michael@0: void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, michael@0: ptrdiff_t src_stride, int dst_width, int source_y_fraction) = michael@0: InterpolateRow_C; michael@0: const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; michael@0: int j; michael@0: assert(bpp >= 1 && bpp <= 4); michael@0: assert(src_height != 0); michael@0: assert(dst_width > 0); michael@0: assert(dst_height > 0); michael@0: src_argb += (x >> 16) * bpp; michael@0: #if defined(HAS_INTERPOLATEROW_SSE2) michael@0: if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { michael@0: InterpolateRow = InterpolateRow_Any_SSE2; michael@0: if (IS_ALIGNED(dst_width_bytes, 16)) { michael@0: InterpolateRow = InterpolateRow_Unaligned_SSE2; michael@0: if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && michael@0: IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { michael@0: InterpolateRow = InterpolateRow_SSE2; michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: #if defined(HAS_INTERPOLATEROW_SSSE3) michael@0: if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { michael@0: InterpolateRow = InterpolateRow_Any_SSSE3; michael@0: if (IS_ALIGNED(dst_width_bytes, 16)) { michael@0: InterpolateRow = InterpolateRow_Unaligned_SSSE3; michael@0: if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && michael@0: IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { michael@0: InterpolateRow = InterpolateRow_SSSE3; michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: #if defined(HAS_INTERPOLATEROW_AVX2) michael@0: if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { michael@0: InterpolateRow = InterpolateRow_Any_AVX2; michael@0: if (IS_ALIGNED(dst_width_bytes, 32)) { michael@0: InterpolateRow = InterpolateRow_AVX2; michael@0: } michael@0: } michael@0: #endif michael@0: #if defined(HAS_INTERPOLATEROW_NEON) michael@0: if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { michael@0: InterpolateRow = InterpolateRow_Any_NEON; michael@0: if (IS_ALIGNED(dst_width_bytes, 16)) { michael@0: InterpolateRow = InterpolateRow_NEON; michael@0: } michael@0: } michael@0: #endif michael@0: #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) michael@0: if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && michael@0: IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && michael@0: IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { michael@0: InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; michael@0: if (IS_ALIGNED(dst_width_bytes, 4)) { michael@0: InterpolateRow = InterpolateRow_MIPS_DSPR2; michael@0: } michael@0: } michael@0: #endif michael@0: for (j = 0; j < dst_height; ++j) { michael@0: int yi; michael@0: int yf; michael@0: if (y > max_y) { michael@0: y = max_y; michael@0: } michael@0: yi = y >> 16; michael@0: yf = filtering ? ((y >> 8) & 255) : 0; michael@0: InterpolateRow(dst_argb, src_argb + yi * src_stride, michael@0: src_stride, dst_width_bytes, yf); michael@0: dst_argb += dst_stride; michael@0: y += dy; michael@0: } michael@0: } michael@0: michael@0: // Simplify the filtering based on scale factors. michael@0: enum FilterMode ScaleFilterReduce(int src_width, int src_height, michael@0: int dst_width, int dst_height, michael@0: enum FilterMode filtering) { michael@0: if (src_width < 0) { michael@0: src_width = -src_width; michael@0: } michael@0: if (src_height < 0) { michael@0: src_height = -src_height; michael@0: } michael@0: if (filtering == kFilterBox) { michael@0: // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. michael@0: if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { michael@0: filtering = kFilterBilinear; michael@0: } michael@0: // If scaling to larger, switch from Box to Bilinear. michael@0: if (dst_width >= src_width || dst_height >= src_height) { michael@0: filtering = kFilterBilinear; michael@0: } michael@0: } michael@0: if (filtering == kFilterBilinear) { michael@0: if (src_height == 1) { michael@0: filtering = kFilterLinear; michael@0: } michael@0: // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. michael@0: if (dst_height == src_height || dst_height * 3 == src_height) { michael@0: filtering = kFilterLinear; michael@0: } michael@0: // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to michael@0: // avoid reading 2 pixels horizontally that causes memory exception. michael@0: if (src_width == 1) { michael@0: filtering = kFilterNone; michael@0: } michael@0: } michael@0: if (filtering == kFilterLinear) { michael@0: if (src_width == 1) { michael@0: filtering = kFilterNone; michael@0: } michael@0: // TODO(fbarchard): Detect any odd scale factor and reduce to None. michael@0: if (dst_width == src_width || dst_width * 3 == src_width) { michael@0: filtering = kFilterNone; michael@0: } michael@0: } michael@0: return filtering; michael@0: } michael@0: michael@0: // Divide num by div and return as 16.16 fixed point result. michael@0: int FixedDiv_C(int num, int div) { michael@0: return (int)(((int64)(num) << 16) / div); michael@0: } michael@0: michael@0: // Divide num by div and return as 16.16 fixed point result. michael@0: int FixedDiv1_C(int num, int div) { michael@0: return (int)((((int64)(num) << 16) - 0x00010001) / michael@0: (div - 1)); michael@0: } michael@0: michael@0: #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) michael@0: michael@0: // Compute slope values for stepping. michael@0: void ScaleSlope(int src_width, int src_height, michael@0: int dst_width, int dst_height, michael@0: enum FilterMode filtering, michael@0: int* x, int* y, int* dx, int* dy) { michael@0: assert(x != NULL); michael@0: assert(y != NULL); michael@0: assert(dx != NULL); michael@0: assert(dy != NULL); michael@0: assert(src_width != 0); michael@0: assert(src_height != 0); michael@0: assert(dst_width > 0); michael@0: assert(dst_height > 0); michael@0: // Check for 1 pixel and avoid FixedDiv overflow. michael@0: if (dst_width == 1 && src_width >= 32768) { michael@0: dst_width = src_width; michael@0: } michael@0: if (dst_height == 1 && src_height >= 32768) { michael@0: dst_height = src_height; michael@0: } michael@0: if (filtering == kFilterBox) { michael@0: // Scale step for point sampling duplicates all pixels equally. michael@0: *dx = FixedDiv(Abs(src_width), dst_width); michael@0: *dy = FixedDiv(src_height, dst_height); michael@0: *x = 0; michael@0: *y = 0; michael@0: } else if (filtering == kFilterBilinear) { michael@0: // Scale step for bilinear sampling renders last pixel once for upsample. michael@0: if (dst_width <= Abs(src_width)) { michael@0: *dx = FixedDiv(Abs(src_width), dst_width); michael@0: *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. michael@0: } else if (dst_width > 1) { michael@0: *dx = FixedDiv1(Abs(src_width), dst_width); michael@0: *x = 0; michael@0: } michael@0: if (dst_height <= src_height) { michael@0: *dy = FixedDiv(src_height, dst_height); michael@0: *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. michael@0: } else if (dst_height > 1) { michael@0: *dy = FixedDiv1(src_height, dst_height); michael@0: *y = 0; michael@0: } michael@0: } else if (filtering == kFilterLinear) { michael@0: // Scale step for bilinear sampling renders last pixel once for upsample. michael@0: if (dst_width <= Abs(src_width)) { michael@0: *dx = FixedDiv(Abs(src_width), dst_width); michael@0: *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. michael@0: } else if (dst_width > 1) { michael@0: *dx = FixedDiv1(Abs(src_width), dst_width); michael@0: *x = 0; michael@0: } michael@0: *dy = FixedDiv(src_height, dst_height); michael@0: *y = *dy >> 1; michael@0: } else { michael@0: // Scale step for point sampling duplicates all pixels equally. michael@0: *dx = FixedDiv(Abs(src_width), dst_width); michael@0: *dy = FixedDiv(src_height, dst_height); michael@0: *x = CENTERSTART(*dx, 0); michael@0: *y = CENTERSTART(*dy, 0); michael@0: } michael@0: // Negative src_width means horizontally mirror. michael@0: if (src_width < 0) { michael@0: *x += (dst_width - 1) * *dx; michael@0: *dx = -*dx; michael@0: // src_width = -src_width; // Caller must do this. michael@0: } michael@0: } michael@0: #undef CENTERSTART michael@0: michael@0: #ifdef __cplusplus michael@0: } // extern "C" michael@0: } // namespace libyuv michael@0: #endif