Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include "libyuv/scale.h"
13 #include <assert.h>
14 #include <string.h>
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
26 // Remove this macro if OVERREAD is safe.
27 #define AVOID_OVERREAD 1
29 static __inline int Abs(int v) {
30 return v >= 0 ? v : -v;
31 }
33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
35 // Scale plane, 1/2
36 // This is an optimized version for scaling down a plane to 1/2 of
37 // its original size.
39 static void ScalePlaneDown2(int src_width, int src_height,
40 int dst_width, int dst_height,
41 int src_stride, int dst_stride,
42 const uint8* src_ptr, uint8* dst_ptr,
43 enum FilterMode filtering) {
44 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
45 uint8* dst_ptr, int dst_width) =
46 filtering == kFilterNone ? ScaleRowDown2_C :
47 (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
48 ScaleRowDown2Box_C);
49 int row_stride = src_stride << 1;
50 if (!filtering) {
51 src_ptr += src_stride; // Point to odd rows.
52 src_stride = 0;
53 }
55 #if defined(HAS_SCALEROWDOWN2_NEON)
56 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
57 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
58 }
59 #elif defined(HAS_SCALEROWDOWN2_SSE2)
60 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
61 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
62 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
63 ScaleRowDown2Box_Unaligned_SSE2);
64 if (IS_ALIGNED(src_ptr, 16) &&
65 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
66 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
67 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
68 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
69 ScaleRowDown2Box_SSE2);
70 }
71 }
72 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
73 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
74 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
75 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
76 ScaleRowDown2 = filtering ?
77 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
78 }
79 #endif
81 if (filtering == kFilterLinear) {
82 src_stride = 0;
83 }
84 // TODO(fbarchard): Loop through source height to allow odd height.
85 int y;
86 for (y = 0; y < dst_height; ++y) {
87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
88 src_ptr += row_stride;
89 dst_ptr += dst_stride;
90 }
91 }
93 // Scale plane, 1/4
94 // This is an optimized version for scaling down a plane to 1/4 of
95 // its original size.
97 static void ScalePlaneDown4(int src_width, int src_height,
98 int dst_width, int dst_height,
99 int src_stride, int dst_stride,
100 const uint8* src_ptr, uint8* dst_ptr,
101 enum FilterMode filtering) {
102 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
103 uint8* dst_ptr, int dst_width) =
104 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
105 int row_stride = src_stride << 2;
106 if (!filtering) {
107 src_ptr += src_stride * 2; // Point to row 2.
108 src_stride = 0;
109 }
110 #if defined(HAS_SCALEROWDOWN4_NEON)
111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
112 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
113 }
114 #elif defined(HAS_SCALEROWDOWN4_SSE2)
115 if (TestCpuFlag(kCpuHasSSE2) &&
116 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
117 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
118 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
119 }
120 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
121 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
122 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
123 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
124 ScaleRowDown4 = filtering ?
125 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
126 }
127 #endif
129 if (filtering == kFilterLinear) {
130 src_stride = 0;
131 }
132 int y;
133 for (y = 0; y < dst_height; ++y) {
134 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
135 src_ptr += row_stride;
136 dst_ptr += dst_stride;
137 }
138 }
140 // Scale plane down, 3/4
142 static void ScalePlaneDown34(int src_width, int src_height,
143 int dst_width, int dst_height,
144 int src_stride, int dst_stride,
145 const uint8* src_ptr, uint8* dst_ptr,
146 enum FilterMode filtering) {
147 assert(dst_width % 3 == 0);
148 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
149 uint8* dst_ptr, int dst_width);
150 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
151 uint8* dst_ptr, int dst_width);
152 if (!filtering) {
153 ScaleRowDown34_0 = ScaleRowDown34_C;
154 ScaleRowDown34_1 = ScaleRowDown34_C;
155 } else {
156 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
157 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
158 }
159 #if defined(HAS_SCALEROWDOWN34_NEON)
160 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
161 if (!filtering) {
162 ScaleRowDown34_0 = ScaleRowDown34_NEON;
163 ScaleRowDown34_1 = ScaleRowDown34_NEON;
164 } else {
165 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
166 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
167 }
168 }
169 #endif
170 #if defined(HAS_SCALEROWDOWN34_SSSE3)
171 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
172 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
173 if (!filtering) {
174 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
175 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
176 } else {
177 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
178 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
179 }
180 }
181 #endif
182 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
183 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
184 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
185 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
186 if (!filtering) {
187 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
188 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
189 } else {
190 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
191 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
192 }
193 }
194 #endif
196 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
197 int y;
198 for (y = 0; y < dst_height - 2; y += 3) {
199 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
200 src_ptr += src_stride;
201 dst_ptr += dst_stride;
202 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
203 src_ptr += src_stride;
204 dst_ptr += dst_stride;
205 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
206 dst_ptr, dst_width);
207 src_ptr += src_stride * 2;
208 dst_ptr += dst_stride;
209 }
211 // Remainder 1 or 2 rows with last row vertically unfiltered
212 if ((dst_height % 3) == 2) {
213 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
214 src_ptr += src_stride;
215 dst_ptr += dst_stride;
216 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
217 } else if ((dst_height % 3) == 1) {
218 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
219 }
220 }
223 // Scale plane, 3/8
224 // This is an optimized version for scaling down a plane to 3/8
225 // of its original size.
226 //
227 // Uses box filter arranges like this
228 // aaabbbcc -> abc
229 // aaabbbcc def
230 // aaabbbcc ghi
231 // dddeeeff
232 // dddeeeff
233 // dddeeeff
234 // ggghhhii
235 // ggghhhii
236 // Boxes are 3x3, 2x3, 3x2 and 2x2
238 static void ScalePlaneDown38(int src_width, int src_height,
239 int dst_width, int dst_height,
240 int src_stride, int dst_stride,
241 const uint8* src_ptr, uint8* dst_ptr,
242 enum FilterMode filtering) {
243 assert(dst_width % 3 == 0);
244 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
245 uint8* dst_ptr, int dst_width);
246 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
247 uint8* dst_ptr, int dst_width);
248 if (!filtering) {
249 ScaleRowDown38_3 = ScaleRowDown38_C;
250 ScaleRowDown38_2 = ScaleRowDown38_C;
251 } else {
252 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
253 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
254 }
255 #if defined(HAS_SCALEROWDOWN38_NEON)
256 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
257 if (!filtering) {
258 ScaleRowDown38_3 = ScaleRowDown38_NEON;
259 ScaleRowDown38_2 = ScaleRowDown38_NEON;
260 } else {
261 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
262 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
263 }
264 }
265 #elif defined(HAS_SCALEROWDOWN38_SSSE3)
266 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
267 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
268 if (!filtering) {
269 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
270 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
271 } else {
272 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
273 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
274 }
275 }
276 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
277 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
278 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
279 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
280 if (!filtering) {
281 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
282 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
283 } else {
284 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
285 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
286 }
287 }
288 #endif
290 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
291 int y;
292 for (y = 0; y < dst_height - 2; y += 3) {
293 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
294 src_ptr += src_stride * 3;
295 dst_ptr += dst_stride;
296 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
297 src_ptr += src_stride * 3;
298 dst_ptr += dst_stride;
299 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
300 src_ptr += src_stride * 2;
301 dst_ptr += dst_stride;
302 }
304 // Remainder 1 or 2 rows with last row vertically unfiltered
305 if ((dst_height % 3) == 2) {
306 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
307 src_ptr += src_stride * 3;
308 dst_ptr += dst_stride;
309 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
310 } else if ((dst_height % 3) == 1) {
311 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
312 }
313 }
315 static __inline uint32 SumBox(int iboxwidth, int iboxheight,
316 ptrdiff_t src_stride, const uint8* src_ptr) {
317 assert(iboxwidth > 0);
318 assert(iboxheight > 0);
319 uint32 sum = 0u;
320 int y;
321 for (y = 0; y < iboxheight; ++y) {
322 int x;
323 for (x = 0; x < iboxwidth; ++x) {
324 sum += src_ptr[x];
325 }
326 src_ptr += src_stride;
327 }
328 return sum;
329 }
331 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
332 int x, int dx, ptrdiff_t src_stride,
333 const uint8* src_ptr, uint8* dst_ptr) {
334 int i;
335 for (i = 0; i < dst_width; ++i) {
336 int ix = x >> 16;
337 x += dx;
338 int boxwidth = (x >> 16) - ix;
339 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
340 (boxwidth * boxheight);
341 }
342 }
344 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
345 assert(iboxwidth > 0);
346 uint32 sum = 0u;
347 int x;
348 for (x = 0; x < iboxwidth; ++x) {
349 sum += src_ptr[x];
350 }
351 return sum;
352 }
354 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
355 const uint16* src_ptr, uint8* dst_ptr) {
356 int scaletbl[2];
357 int minboxwidth = (dx >> 16);
358 scaletbl[0] = 65536 / (minboxwidth * boxheight);
359 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
360 int* scaleptr = scaletbl - minboxwidth;
361 int i;
362 for (i = 0; i < dst_width; ++i) {
363 int ix = x >> 16;
364 x += dx;
365 int boxwidth = (x >> 16) - ix;
366 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
367 }
368 }
370 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
371 const uint16* src_ptr, uint8* dst_ptr) {
372 int boxwidth = (dx >> 16);
373 int scaleval = 65536 / (boxwidth * boxheight);
374 int i;
375 for (i = 0; i < dst_width; ++i) {
376 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
377 x += boxwidth;
378 }
379 }
381 // Scale plane down to any dimensions, with interpolation.
382 // (boxfilter).
383 //
384 // Same method as SimpleScale, which is fixed point, outputting
385 // one pixel of destination using fixed point (16.16) to step
386 // through source, sampling a box of pixel with simple
387 // averaging.
388 static void ScalePlaneBox(int src_width, int src_height,
389 int dst_width, int dst_height,
390 int src_stride, int dst_stride,
391 const uint8* src_ptr, uint8* dst_ptr) {
392 // Initial source x/y coordinate and step values as 16.16 fixed point.
393 int x = 0;
394 int y = 0;
395 int dx = 0;
396 int dy = 0;
397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
398 &x, &y, &dx, &dy);
399 src_width = Abs(src_width);
400 const int max_y = (src_height << 16);
401 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
402 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
403 uint8* dst = dst_ptr;
404 int j;
405 for (j = 0; j < dst_height; ++j) {
406 int iy = y >> 16;
407 const uint8* src = src_ptr + iy * src_stride;
408 y += dy;
409 if (y > max_y) {
410 y = max_y;
411 }
412 int boxheight = (y >> 16) - iy;
413 ScalePlaneBoxRow_C(dst_width, boxheight,
414 x, dx, src_stride,
415 src, dst);
416 dst += dst_stride;
417 }
418 return;
419 }
420 // Allocate a row buffer of uint16.
421 align_buffer_64(row16, src_width * 2);
423 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
424 const uint16* src_ptr, uint8* dst_ptr) =
425 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
426 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
427 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
428 #if defined(HAS_SCALEADDROWS_SSE2)
429 if (TestCpuFlag(kCpuHasSSE2) &&
430 #ifdef AVOID_OVERREAD
431 IS_ALIGNED(src_width, 16) &&
432 #endif
433 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
434 ScaleAddRows = ScaleAddRows_SSE2;
435 }
436 #endif
438 int j;
439 for (j = 0; j < dst_height; ++j) {
440 int iy = y >> 16;
441 const uint8* src = src_ptr + iy * src_stride;
442 y += dy;
443 if (y > (src_height << 16)) {
444 y = (src_height << 16);
445 }
446 int boxheight = (y >> 16) - iy;
447 ScaleAddRows(src, src_stride, (uint16*)(row16),
448 src_width, boxheight);
449 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
450 dst_ptr);
451 dst_ptr += dst_stride;
452 }
453 free_aligned_buffer_64(row16);
454 }
456 // Scale plane down with bilinear interpolation.
457 void ScalePlaneBilinearDown(int src_width, int src_height,
458 int dst_width, int dst_height,
459 int src_stride, int dst_stride,
460 const uint8* src_ptr, uint8* dst_ptr,
461 enum FilterMode filtering) {
462 // Initial source x/y coordinate and step values as 16.16 fixed point.
463 int x = 0;
464 int y = 0;
465 int dx = 0;
466 int dy = 0;
467 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
468 &x, &y, &dx, &dy);
469 src_width = Abs(src_width);
471 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
472 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
473 InterpolateRow_C;
474 #if defined(HAS_INTERPOLATEROW_SSE2)
475 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
476 InterpolateRow = InterpolateRow_Any_SSE2;
477 if (IS_ALIGNED(src_width, 16)) {
478 InterpolateRow = InterpolateRow_Unaligned_SSE2;
479 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
480 InterpolateRow = InterpolateRow_SSE2;
481 }
482 }
483 }
484 #endif
485 #if defined(HAS_INTERPOLATEROW_SSSE3)
486 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
487 InterpolateRow = InterpolateRow_Any_SSSE3;
488 if (IS_ALIGNED(src_width, 16)) {
489 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
490 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
491 InterpolateRow = InterpolateRow_SSSE3;
492 }
493 }
494 }
495 #endif
496 #if defined(HAS_INTERPOLATEROW_AVX2)
497 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
498 InterpolateRow = InterpolateRow_Any_AVX2;
499 if (IS_ALIGNED(src_width, 32)) {
500 InterpolateRow = InterpolateRow_AVX2;
501 }
502 }
503 #endif
504 #if defined(HAS_INTERPOLATEROW_NEON)
505 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
506 InterpolateRow = InterpolateRow_Any_NEON;
507 if (IS_ALIGNED(src_width, 16)) {
508 InterpolateRow = InterpolateRow_NEON;
509 }
510 }
511 #endif
512 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
513 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
514 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
515 if (IS_ALIGNED(src_width, 4)) {
516 InterpolateRow = InterpolateRow_MIPS_DSPR2;
517 }
518 }
519 #endif
521 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
522 int dst_width, int x, int dx) =
523 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
525 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
526 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
527 ScaleFilterCols = ScaleFilterCols_SSSE3;
528 }
529 #endif
531 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
532 // Allocate a row buffer.
533 align_buffer_64(row, src_width);
535 const int max_y = (src_height - 1) << 16;
536 int j;
537 for (j = 0; j < dst_height; ++j) {
538 if (y > max_y) {
539 y = max_y;
540 }
541 int yi = y >> 16;
542 const uint8* src = src_ptr + yi * src_stride;
543 if (filtering == kFilterLinear) {
544 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
545 } else {
546 int yf = (y >> 8) & 255;
547 InterpolateRow(row, src, src_stride, src_width, yf);
548 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
549 }
550 dst_ptr += dst_stride;
551 y += dy;
552 }
553 free_aligned_buffer_64(row);
554 }
556 // Scale up down with bilinear interpolation.
557 void ScalePlaneBilinearUp(int src_width, int src_height,
558 int dst_width, int dst_height,
559 int src_stride, int dst_stride,
560 const uint8* src_ptr, uint8* dst_ptr,
561 enum FilterMode filtering) {
562 // Initial source x/y coordinate and step values as 16.16 fixed point.
563 int x = 0;
564 int y = 0;
565 int dx = 0;
566 int dy = 0;
567 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
568 &x, &y, &dx, &dy);
569 src_width = Abs(src_width);
571 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
572 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
573 InterpolateRow_C;
574 #if defined(HAS_INTERPOLATEROW_SSE2)
575 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
576 InterpolateRow = InterpolateRow_Any_SSE2;
577 if (IS_ALIGNED(dst_width, 16)) {
578 InterpolateRow = InterpolateRow_Unaligned_SSE2;
579 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
580 InterpolateRow = InterpolateRow_SSE2;
581 }
582 }
583 }
584 #endif
585 #if defined(HAS_INTERPOLATEROW_SSSE3)
586 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
587 InterpolateRow = InterpolateRow_Any_SSSE3;
588 if (IS_ALIGNED(dst_width, 16)) {
589 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
590 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
591 InterpolateRow = InterpolateRow_SSSE3;
592 }
593 }
594 }
595 #endif
596 #if defined(HAS_INTERPOLATEROW_AVX2)
597 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
598 InterpolateRow = InterpolateRow_Any_AVX2;
599 if (IS_ALIGNED(dst_width, 32)) {
600 InterpolateRow = InterpolateRow_AVX2;
601 }
602 }
603 #endif
604 #if defined(HAS_INTERPOLATEROW_NEON)
605 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
606 InterpolateRow = InterpolateRow_Any_NEON;
607 if (IS_ALIGNED(dst_width, 16)) {
608 InterpolateRow = InterpolateRow_NEON;
609 }
610 }
611 #endif
612 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
613 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
614 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
615 if (IS_ALIGNED(dst_width, 4)) {
616 InterpolateRow = InterpolateRow_MIPS_DSPR2;
617 }
618 }
619 #endif
621 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
622 int dst_width, int x, int dx) =
623 filtering ? ScaleFilterCols_C : ScaleCols_C;
624 if (filtering && src_width >= 32768) {
625 ScaleFilterCols = ScaleFilterCols64_C;
626 }
627 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
628 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
629 ScaleFilterCols = ScaleFilterCols_SSSE3;
630 }
631 #endif
632 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
633 ScaleFilterCols = ScaleColsUp2_C;
634 #if defined(HAS_SCALECOLS_SSE2)
635 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
636 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
637 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
638 ScaleFilterCols = ScaleColsUp2_SSE2;
639 }
640 #endif
641 }
643 const int max_y = (src_height - 1) << 16;
644 if (y > max_y) {
645 y = max_y;
646 }
647 int yi = y >> 16;
648 const uint8* src = src_ptr + yi * src_stride;
650 // Allocate 2 row buffers.
651 const int kRowSize = (dst_width + 15) & ~15;
652 align_buffer_64(row, kRowSize * 2);
654 uint8* rowptr = row;
655 int rowstride = kRowSize;
656 int lasty = yi;
658 ScaleFilterCols(rowptr, src, dst_width, x, dx);
659 if (src_height > 1) {
660 src += src_stride;
661 }
662 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
663 src += src_stride;
665 int j;
666 for (j = 0; j < dst_height; ++j) {
667 yi = y >> 16;
668 if (yi != lasty) {
669 if (y > max_y) {
670 y = max_y;
671 yi = y >> 16;
672 src = src_ptr + yi * src_stride;
673 }
674 if (yi != lasty) {
675 ScaleFilterCols(rowptr, src, dst_width, x, dx);
676 rowptr += rowstride;
677 rowstride = -rowstride;
678 lasty = yi;
679 src += src_stride;
680 }
681 }
682 if (filtering == kFilterLinear) {
683 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
684 } else {
685 int yf = (y >> 8) & 255;
686 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
687 }
688 dst_ptr += dst_stride;
689 y += dy;
690 }
691 free_aligned_buffer_64(row);
692 }
694 // Scale Plane to/from any dimensions, without interpolation.
695 // Fixed point math is used for performance: The upper 16 bits
696 // of x and dx is the integer part of the source position and
697 // the lower 16 bits are the fixed decimal part.
699 static void ScalePlaneSimple(int src_width, int src_height,
700 int dst_width, int dst_height,
701 int src_stride, int dst_stride,
702 const uint8* src_ptr, uint8* dst_ptr) {
703 // Initial source x/y coordinate and step values as 16.16 fixed point.
704 int x = 0;
705 int y = 0;
706 int dx = 0;
707 int dy = 0;
708 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
709 &x, &y, &dx, &dy);
710 src_width = Abs(src_width);
712 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
713 int dst_width, int x, int dx) = ScaleCols_C;
714 if (src_width * 2 == dst_width && x < 0x8000) {
715 ScaleCols = ScaleColsUp2_C;
716 #if defined(HAS_SCALECOLS_SSE2)
717 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
718 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
719 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
720 ScaleCols = ScaleColsUp2_SSE2;
721 }
722 #endif
723 }
725 int i;
726 for (i = 0; i < dst_height; ++i) {
727 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
728 dst_width, x, dx);
729 dst_ptr += dst_stride;
730 y += dy;
731 }
732 }
734 // Scale a plane.
735 // This function dispatches to a specialized scaler based on scale factor.
737 LIBYUV_API
738 void ScalePlane(const uint8* src, int src_stride,
739 int src_width, int src_height,
740 uint8* dst, int dst_stride,
741 int dst_width, int dst_height,
742 enum FilterMode filtering) {
743 // Simplify filtering when possible.
744 filtering = ScaleFilterReduce(src_width, src_height,
745 dst_width, dst_height,
746 filtering);
748 // Negative height means invert the image.
749 if (src_height < 0) {
750 src_height = -src_height;
751 src = src + (src_height - 1) * src_stride;
752 src_stride = -src_stride;
753 }
755 // Use specialized scales to improve performance for common resolutions.
756 // For example, all the 1/2 scalings will use ScalePlaneDown2()
757 if (dst_width == src_width && dst_height == src_height) {
758 // Straight copy.
759 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
760 return;
761 }
762 if (dst_width == src_width) {
763 int dy = FixedDiv(src_height, dst_height);
764 // Arbitrary scale vertically, but unscaled vertically.
765 ScalePlaneVertical(src_height,
766 dst_width, dst_height,
767 src_stride, dst_stride, src, dst,
768 0, 0, dy, 1, filtering);
769 return;
770 }
771 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
772 // Scale down.
773 if (4 * dst_width == 3 * src_width &&
774 4 * dst_height == 3 * src_height) {
775 // optimized, 3/4
776 ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
777 src_stride, dst_stride, src, dst, filtering);
778 return;
779 }
780 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
781 // optimized, 1/2
782 ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
783 src_stride, dst_stride, src, dst, filtering);
784 return;
785 }
786 // 3/8 rounded up for odd sized chroma height.
787 if (8 * dst_width == 3 * src_width &&
788 dst_height == ((src_height * 3 + 7) / 8)) {
789 // optimized, 3/8
790 ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
791 src_stride, dst_stride, src, dst, filtering);
792 return;
793 }
794 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
795 filtering != kFilterBilinear) {
796 // optimized, 1/4
797 ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
798 src_stride, dst_stride, src, dst, filtering);
799 return;
800 }
801 }
802 if (filtering == kFilterBox && dst_height * 2 < src_height) {
803 ScalePlaneBox(src_width, src_height, dst_width, dst_height,
804 src_stride, dst_stride, src, dst);
805 return;
806 }
807 if (filtering && dst_height > src_height) {
808 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
809 src_stride, dst_stride, src, dst, filtering);
810 return;
811 }
812 if (filtering) {
813 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
814 src_stride, dst_stride, src, dst, filtering);
815 return;
816 }
817 ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
818 src_stride, dst_stride, src, dst);
819 }
821 // Scale an I420 image.
822 // This function in turn calls a scaling function for each plane.
824 LIBYUV_API
825 int I420Scale(const uint8* src_y, int src_stride_y,
826 const uint8* src_u, int src_stride_u,
827 const uint8* src_v, int src_stride_v,
828 int src_width, int src_height,
829 uint8* dst_y, int dst_stride_y,
830 uint8* dst_u, int dst_stride_u,
831 uint8* dst_v, int dst_stride_v,
832 int dst_width, int dst_height,
833 enum FilterMode filtering) {
834 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
835 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
836 return -1;
837 }
838 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
839 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
840 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
841 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
843 ScalePlane(src_y, src_stride_y, src_width, src_height,
844 dst_y, dst_stride_y, dst_width, dst_height,
845 filtering);
846 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
847 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
848 filtering);
849 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
850 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
851 filtering);
852 return 0;
853 }
855 // Deprecated api
856 LIBYUV_API
857 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
858 int src_stride_y, int src_stride_u, int src_stride_v,
859 int src_width, int src_height,
860 uint8* dst_y, uint8* dst_u, uint8* dst_v,
861 int dst_stride_y, int dst_stride_u, int dst_stride_v,
862 int dst_width, int dst_height,
863 LIBYUV_BOOL interpolate) {
864 return I420Scale(src_y, src_stride_y,
865 src_u, src_stride_u,
866 src_v, src_stride_v,
867 src_width, src_height,
868 dst_y, dst_stride_y,
869 dst_u, dst_stride_u,
870 dst_v, dst_stride_v,
871 dst_width, dst_height,
872 interpolate ? kFilterBox : kFilterNone);
873 }
875 // Deprecated api
876 LIBYUV_API
877 int ScaleOffset(const uint8* src, int src_width, int src_height,
878 uint8* dst, int dst_width, int dst_height, int dst_yoffset,
879 LIBYUV_BOOL interpolate) {
880 if (!src || src_width <= 0 || src_height <= 0 ||
881 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset < 0 ||
882 dst_yoffset >= dst_height) {
883 return -1;
884 }
885 dst_yoffset = dst_yoffset & ~1; // chroma requires offset to multiple of 2.
886 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
887 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
888 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
889 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
890 int aheight = dst_height - dst_yoffset * 2; // actual output height
891 const uint8* src_y = src;
892 const uint8* src_u = src + src_width * src_height;
893 const uint8* src_v = src + src_width * src_height +
894 src_halfwidth * src_halfheight;
895 uint8* dst_y = dst + dst_yoffset * dst_width;
896 uint8* dst_u = dst + dst_width * dst_height +
897 (dst_yoffset >> 1) * dst_halfwidth;
898 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
899 (dst_yoffset >> 1) * dst_halfwidth;
900 return I420Scale(src_y, src_width,
901 src_u, src_halfwidth,
902 src_v, src_halfwidth,
903 src_width, src_height,
904 dst_y, dst_width,
905 dst_u, dst_halfwidth,
906 dst_v, dst_halfwidth,
907 dst_width, aheight,
908 interpolate ? kFilterBox : kFilterNone);
909 }
911 #ifdef __cplusplus
912 } // extern "C"
913 } // namespace libyuv
914 #endif