Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include "libyuv/scale.h"
13 #include <assert.h>
14 #include <string.h>
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
33 static void ScaleARGBDown2(int src_width, int src_height,
34 int dst_width, int dst_height,
35 int src_stride, int dst_stride,
36 const uint8* src_argb, uint8* dst_argb,
37 int x, int dx, int y, int dy,
38 enum FilterMode filtering) {
39 int j;
40 int row_stride = src_stride * (dy >> 16);
41 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
42 uint8* dst_argb, int dst_width) =
43 filtering == kFilterNone ? ScaleARGBRowDown2_C :
44 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
45 ScaleARGBRowDown2Box_C);
46 assert(dx == 65536 * 2); // Test scale factor of 2.
47 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
48 // Advance to odd row, even column.
49 if (filtering == kFilterBilinear) {
50 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
51 } else {
52 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
53 }
55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
56 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
57 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
58 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
59 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
60 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
61 ScaleARGBRowDown2Box_SSE2);
62 }
63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
64 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
65 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
66 ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
67 ScaleARGBRowDown2_NEON;
68 }
69 #endif
71 if (filtering == kFilterLinear) {
72 src_stride = 0;
73 }
74 for (j = 0; j < dst_height; ++j) {
75 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
76 src_argb += row_stride;
77 dst_argb += dst_stride;
78 }
79 }
81 // ScaleARGB ARGB, 1/4
82 // This is an optimized version for scaling down a ARGB to 1/4 of
83 // its original size.
84 static void ScaleARGBDown4Box(int src_width, int src_height,
85 int dst_width, int dst_height,
86 int src_stride, int dst_stride,
87 const uint8* src_argb, uint8* dst_argb,
88 int x, int dx, int y, int dy) {
89 int j;
90 // Allocate 2 rows of ARGB.
91 const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
92 align_buffer_64(row, kRowSize * 2);
93 int row_stride = src_stride * (dy >> 16);
94 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
95 uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
96 // Advance to odd row, even column.
97 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
98 assert(dx == 65536 * 4); // Test scale factor of 4.
99 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
102 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
103 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
104 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
105 }
106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
108 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
109 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
110 }
111 #endif
112 for (j = 0; j < dst_height; ++j) {
113 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
114 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
115 row + kRowSize, dst_width * 2);
116 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
117 src_argb += row_stride;
118 dst_argb += dst_stride;
119 }
120 free_aligned_buffer_64(row);
121 }
123 // ScaleARGB ARGB Even
124 // This is an optimized version for scaling down a ARGB to even
125 // multiple of its original size.
126 static void ScaleARGBDownEven(int src_width, int src_height,
127 int dst_width, int dst_height,
128 int src_stride, int dst_stride,
129 const uint8* src_argb, uint8* dst_argb,
130 int x, int dx, int y, int dy,
131 enum FilterMode filtering) {
132 int j;
133 int col_step = dx >> 16;
134 int row_stride = (dy >> 16) * src_stride;
135 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
136 int src_step, uint8* dst_argb, int dst_width) =
137 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
138 assert(IS_ALIGNED(src_width, 2));
139 assert(IS_ALIGNED(src_height, 2));
140 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
142 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
143 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
144 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
145 ScaleARGBRowDownEven_SSE2;
146 }
147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
149 IS_ALIGNED(src_argb, 4)) {
150 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
151 ScaleARGBRowDownEven_NEON;
152 }
153 #endif
155 if (filtering == kFilterLinear) {
156 src_stride = 0;
157 }
158 for (j = 0; j < dst_height; ++j) {
159 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
160 src_argb += row_stride;
161 dst_argb += dst_stride;
162 }
163 }
165 // Scale ARGB down with bilinear interpolation.
166 static void ScaleARGBBilinearDown(int src_width, int src_height,
167 int dst_width, int dst_height,
168 int src_stride, int dst_stride,
169 const uint8* src_argb, uint8* dst_argb,
170 int x, int dx, int y, int dy,
171 enum FilterMode filtering) {
172 int j;
173 int64 xlast = x + (int64)(dst_width - 1) * dx;
174 int64 xl = (dx >= 0) ? x : xlast;
175 int64 xr = (dx >= 0) ? xlast : x;
176 int clip_src_width;
177 xl = (xl >> 16) & ~3; // Left edge aligned.
178 xr = (xr >> 16) + 1; // Right most pixel used.
179 clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4.
180 src_argb += xl * 4;
181 x -= (int)(xl << 16);
182 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
183 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
184 InterpolateRow_C;
185 #if defined(HAS_INTERPOLATEROW_SSE2)
186 if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
187 InterpolateRow = InterpolateRow_Any_SSE2;
188 if (IS_ALIGNED(clip_src_width, 16)) {
189 InterpolateRow = InterpolateRow_Unaligned_SSE2;
190 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
191 InterpolateRow = InterpolateRow_SSE2;
192 }
193 }
194 }
195 #endif
196 #if defined(HAS_INTERPOLATEROW_SSSE3)
197 if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
198 InterpolateRow = InterpolateRow_Any_SSSE3;
199 if (IS_ALIGNED(clip_src_width, 16)) {
200 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
201 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
202 InterpolateRow = InterpolateRow_SSSE3;
203 }
204 }
205 }
206 #endif
207 #if defined(HAS_INTERPOLATEROW_AVX2)
208 if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
209 InterpolateRow = InterpolateRow_Any_AVX2;
210 if (IS_ALIGNED(clip_src_width, 32)) {
211 InterpolateRow = InterpolateRow_AVX2;
212 }
213 }
214 #endif
215 #if defined(HAS_INTERPOLATEROW_NEON)
216 if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
217 InterpolateRow = InterpolateRow_Any_NEON;
218 if (IS_ALIGNED(clip_src_width, 16)) {
219 InterpolateRow = InterpolateRow_NEON;
220 }
221 }
222 #endif
223 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
224 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
225 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
226 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
227 if (IS_ALIGNED(clip_src_width, 4)) {
228 InterpolateRow = InterpolateRow_MIPS_DSPR2;
229 }
230 }
231 #endif
232 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
233 int dst_width, int x, int dx) =
234 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
235 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
236 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
237 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
238 }
239 #endif
240 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
241 // Allocate a row of ARGB.
242 align_buffer_64(row, clip_src_width * 4);
244 const int max_y = (src_height - 1) << 16;
245 for (j = 0; j < dst_height; ++j) {
246 if (y > max_y) {
247 y = max_y;
248 }
249 int yi = y >> 16;
250 const uint8* src = src_argb + yi * src_stride;
251 if (filtering == kFilterLinear) {
252 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
253 } else {
254 int yf = (y >> 8) & 255;
255 InterpolateRow(row, src, src_stride, clip_src_width, yf);
256 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
257 }
258 dst_argb += dst_stride;
259 y += dy;
260 }
261 free_aligned_buffer_64(row);
262 }
264 // Scale ARGB up with bilinear interpolation.
265 static void ScaleARGBBilinearUp(int src_width, int src_height,
266 int dst_width, int dst_height,
267 int src_stride, int dst_stride,
268 const uint8* src_argb, uint8* dst_argb,
269 int x, int dx, int y, int dy,
270 enum FilterMode filtering) {
271 int j;
272 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
273 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
274 InterpolateRow_C;
275 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
276 int dst_width, int x, int dx) =
277 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
278 #if defined(HAS_INTERPOLATEROW_SSE2)
279 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
280 InterpolateRow = InterpolateRow_Any_SSE2;
281 if (IS_ALIGNED(dst_width, 4)) {
282 InterpolateRow = InterpolateRow_Unaligned_SSE2;
283 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
284 InterpolateRow = InterpolateRow_SSE2;
285 }
286 }
287 }
288 #endif
289 #if defined(HAS_INTERPOLATEROW_SSSE3)
290 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
291 InterpolateRow = InterpolateRow_Any_SSSE3;
292 if (IS_ALIGNED(dst_width, 4)) {
293 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
294 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
295 InterpolateRow = InterpolateRow_SSSE3;
296 }
297 }
298 }
299 #endif
300 #if defined(HAS_INTERPOLATEROW_AVX2)
301 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
302 InterpolateRow = InterpolateRow_Any_AVX2;
303 if (IS_ALIGNED(dst_width, 8)) {
304 InterpolateRow = InterpolateRow_AVX2;
305 }
306 }
307 #endif
308 #if defined(HAS_INTERPOLATEROW_NEON)
309 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
310 InterpolateRow = InterpolateRow_Any_NEON;
311 if (IS_ALIGNED(dst_width, 4)) {
312 InterpolateRow = InterpolateRow_NEON;
313 }
314 }
315 #endif
316 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
317 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
318 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
319 InterpolateRow = InterpolateRow_MIPS_DSPR2;
320 }
321 #endif
322 if (src_width >= 32768) {
323 ScaleARGBFilterCols = filtering ?
324 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
325 }
326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
327 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
328 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
329 }
330 #endif
331 #if defined(HAS_SCALEARGBCOLS_SSE2)
332 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
333 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
334 }
335 #endif
336 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
337 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
338 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
339 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
340 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
341 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
342 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
343 }
344 #endif
345 }
347 const int max_y = (src_height - 1) << 16;
348 if (y > max_y) {
349 y = max_y;
350 }
351 int yi = y >> 16;
352 const uint8* src = src_argb + yi * src_stride;
354 // Allocate 2 rows of ARGB.
355 const int kRowSize = (dst_width * 4 + 15) & ~15;
356 align_buffer_64(row, kRowSize * 2);
358 uint8* rowptr = row;
359 int rowstride = kRowSize;
360 int lasty = yi;
362 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
363 if (src_height > 1) {
364 src += src_stride;
365 }
366 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
367 src += src_stride;
369 for (j = 0; j < dst_height; ++j) {
370 yi = y >> 16;
371 if (yi != lasty) {
372 if (y > max_y) {
373 y = max_y;
374 yi = y >> 16;
375 src = src_argb + yi * src_stride;
376 }
377 if (yi != lasty) {
378 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
379 rowptr += rowstride;
380 rowstride = -rowstride;
381 lasty = yi;
382 src += src_stride;
383 }
384 }
385 if (filtering == kFilterLinear) {
386 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
387 } else {
388 int yf = (y >> 8) & 255;
389 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
390 }
391 dst_argb += dst_stride;
392 y += dy;
393 }
394 free_aligned_buffer_64(row);
395 }
397 #ifdef YUVSCALEUP
398 // Scale YUV to ARGB up with bilinear interpolation.
399 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
400 int dst_width, int dst_height,
401 int src_stride_y,
402 int src_stride_u,
403 int src_stride_v,
404 int dst_stride_argb,
405 const uint8* src_y,
406 const uint8* src_u,
407 const uint8* src_v,
408 uint8* dst_argb,
409 int x, int dx, int y, int dy,
410 enum FilterMode filtering) {
411 int j;
412 void (*I422ToARGBRow)(const uint8* y_buf,
413 const uint8* u_buf,
414 const uint8* v_buf,
415 uint8* rgb_buf,
416 int width) = I422ToARGBRow_C;
417 #if defined(HAS_I422TOARGBROW_SSSE3)
418 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
419 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
420 if (IS_ALIGNED(src_width, 8)) {
421 I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
422 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
423 I422ToARGBRow = I422ToARGBRow_SSSE3;
424 }
425 }
426 }
427 #endif
428 #if defined(HAS_I422TOARGBROW_AVX2)
429 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
430 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
431 if (IS_ALIGNED(src_width, 16)) {
432 I422ToARGBRow = I422ToARGBRow_AVX2;
433 }
434 }
435 #endif
436 #if defined(HAS_I422TOARGBROW_NEON)
437 if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
438 I422ToARGBRow = I422ToARGBRow_Any_NEON;
439 if (IS_ALIGNED(src_width, 8)) {
440 I422ToARGBRow = I422ToARGBRow_NEON;
441 }
442 }
443 #endif
444 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
445 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
446 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
447 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
448 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
449 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
450 I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
451 }
452 #endif
454 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
455 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
456 InterpolateRow_C;
457 #if defined(HAS_INTERPOLATEROW_SSE2)
458 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
459 InterpolateRow = InterpolateRow_Any_SSE2;
460 if (IS_ALIGNED(dst_width, 4)) {
461 InterpolateRow = InterpolateRow_Unaligned_SSE2;
462 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
463 InterpolateRow = InterpolateRow_SSE2;
464 }
465 }
466 }
467 #endif
468 #if defined(HAS_INTERPOLATEROW_SSSE3)
469 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
470 InterpolateRow = InterpolateRow_Any_SSSE3;
471 if (IS_ALIGNED(dst_width, 4)) {
472 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
473 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
474 InterpolateRow = InterpolateRow_SSSE3;
475 }
476 }
477 }
478 #endif
479 #if defined(HAS_INTERPOLATEROW_AVX2)
480 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
481 InterpolateRow = InterpolateRow_Any_AVX2;
482 if (IS_ALIGNED(dst_width, 8)) {
483 InterpolateRow = InterpolateRow_AVX2;
484 }
485 }
486 #endif
487 #if defined(HAS_INTERPOLATEROW_NEON)
488 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
489 InterpolateRow = InterpolateRow_Any_NEON;
490 if (IS_ALIGNED(dst_width, 4)) {
491 InterpolateRow = InterpolateRow_NEON;
492 }
493 }
494 #endif
495 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
496 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
497 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
498 InterpolateRow = InterpolateRow_MIPS_DSPR2;
499 }
500 #endif
502 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
503 int dst_width, int x, int dx) =
504 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
505 if (src_width >= 32768) {
506 ScaleARGBFilterCols = filtering ?
507 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
508 }
509 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
510 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
511 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
512 }
513 #endif
514 #if defined(HAS_SCALEARGBCOLS_SSE2)
515 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
516 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
517 }
518 #endif
519 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
520 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
521 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
522 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
523 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
524 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
525 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
526 }
527 #endif
528 }
530 const int max_y = (src_height - 1) << 16;
531 if (y > max_y) {
532 y = max_y;
533 }
534 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
535 int yi = y >> 16;
536 int uv_yi = yi >> kYShift;
537 const uint8* src_row_y = src_y + yi * src_stride_y;
538 const uint8* src_row_u = src_u + uv_yi * src_stride_u;
539 const uint8* src_row_v = src_v + uv_yi * src_stride_v;
541 // Allocate 2 rows of ARGB.
542 const int kRowSize = (dst_width * 4 + 15) & ~15;
543 align_buffer_64(row, kRowSize * 2);
545 // Allocate 1 row of ARGB for source conversion.
546 align_buffer_64(argb_row, src_width * 4);
548 uint8* rowptr = row;
549 int rowstride = kRowSize;
550 int lasty = yi;
552 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
553 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
554 if (src_height > 1) {
555 src_row_y += src_stride_y;
556 if (yi & 1) {
557 src_row_u += src_stride_u;
558 src_row_v += src_stride_v;
559 }
560 }
561 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
562 if (src_height > 2) {
563 src_row_y += src_stride_y;
564 if (!(yi & 1)) {
565 src_row_u += src_stride_u;
566 src_row_v += src_stride_v;
567 }
568 }
570 for (j = 0; j < dst_height; ++j) {
571 yi = y >> 16;
572 if (yi != lasty) {
573 if (y > max_y) {
574 y = max_y;
575 yi = y >> 16;
576 uv_yi = yi >> kYShift;
577 src_row_y = src_y + yi * src_stride_y;
578 src_row_u = src_u + uv_yi * src_stride_u;
579 src_row_v = src_v + uv_yi * src_stride_v;
580 }
581 if (yi != lasty) {
582 // TODO(fbarchard): Convert the clipped region of row.
583 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
584 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
585 rowptr += rowstride;
586 rowstride = -rowstride;
587 lasty = yi;
588 src_row_y += src_stride_y;
589 if (yi & 1) {
590 src_row_u += src_stride_u;
591 src_row_v += src_stride_v;
592 }
593 }
594 }
595 if (filtering == kFilterLinear) {
596 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
597 } else {
598 int yf = (y >> 8) & 255;
599 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
600 }
601 dst_argb += dst_stride_argb;
602 y += dy;
603 }
604 free_aligned_buffer_64(row);
605 free_aligned_buffer_64(row_argb);
606 }
607 #endif
609 // Scale ARGB to/from any dimensions, without interpolation.
610 // Fixed point math is used for performance: The upper 16 bits
611 // of x and dx is the integer part of the source position and
612 // the lower 16 bits are the fixed decimal part.
614 static void ScaleARGBSimple(int src_width, int src_height,
615 int dst_width, int dst_height,
616 int src_stride, int dst_stride,
617 const uint8* src_argb, uint8* dst_argb,
618 int x, int dx, int y, int dy) {
619 int j;
620 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
621 int dst_width, int x, int dx) =
622 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
623 #if defined(HAS_SCALEARGBCOLS_SSE2)
624 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
625 ScaleARGBCols = ScaleARGBCols_SSE2;
626 }
627 #endif
628 if (src_width * 2 == dst_width && x < 0x8000) {
629 ScaleARGBCols = ScaleARGBColsUp2_C;
630 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
631 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
632 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
633 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
634 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
635 }
636 #endif
637 }
639 for (j = 0; j < dst_height; ++j) {
640 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
641 dst_width, x, dx);
642 dst_argb += dst_stride;
643 y += dy;
644 }
645 }
647 // ScaleARGB a ARGB.
648 // This function in turn calls a scaling function
649 // suitable for handling the desired resolutions.
650 static void ScaleARGB(const uint8* src, int src_stride,
651 int src_width, int src_height,
652 uint8* dst, int dst_stride,
653 int dst_width, int dst_height,
654 int clip_x, int clip_y, int clip_width, int clip_height,
655 enum FilterMode filtering) {
656 // Initial source x/y coordinate and step values as 16.16 fixed point.
657 int x = 0;
658 int y = 0;
659 int dx = 0;
660 int dy = 0;
661 // ARGB does not support box filter yet, but allow the user to pass it.
662 // Simplify filtering when possible.
663 filtering = ScaleFilterReduce(src_width, src_height,
664 dst_width, dst_height,
665 filtering);
667 // Negative src_height means invert the image.
668 if (src_height < 0) {
669 src_height = -src_height;
670 src = src + (src_height - 1) * src_stride;
671 src_stride = -src_stride;
672 }
673 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
674 &x, &y, &dx, &dy);
675 src_width = Abs(src_width);
676 if (clip_x) {
677 int64 clipf = (int64)(clip_x) * dx;
678 x += (clipf & 0xffff);
679 src += (clipf >> 16) * 4;
680 dst += clip_x * 4;
681 }
682 if (clip_y) {
683 int64 clipf = (int64)(clip_y) * dy;
684 y += (clipf & 0xffff);
685 src += (clipf >> 16) * src_stride;
686 dst += clip_y * dst_stride;
687 }
689 // Special case for integer step values.
690 if (((dx | dy) & 0xffff) == 0) {
691 if (!dx || !dy) { // 1 pixel wide and/or tall.
692 filtering = kFilterNone;
693 } else {
694 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
695 if (!(dx & 0x10000) && !(dy & 0x10000)) {
696 if (dx == 0x20000) {
697 // Optimized 1/2 downsample.
698 ScaleARGBDown2(src_width, src_height,
699 clip_width, clip_height,
700 src_stride, dst_stride, src, dst,
701 x, dx, y, dy, filtering);
702 return;
703 }
704 if (dx == 0x40000 && filtering == kFilterBox) {
705 // Optimized 1/4 box downsample.
706 ScaleARGBDown4Box(src_width, src_height,
707 clip_width, clip_height,
708 src_stride, dst_stride, src, dst,
709 x, dx, y, dy);
710 return;
711 }
712 ScaleARGBDownEven(src_width, src_height,
713 clip_width, clip_height,
714 src_stride, dst_stride, src, dst,
715 x, dx, y, dy, filtering);
716 return;
717 }
718 // Optimized odd scale down. ie 3, 5, 7, 9x.
719 if ((dx & 0x10000) && (dy & 0x10000)) {
720 filtering = kFilterNone;
721 if (dx == 0x10000 && dy == 0x10000) {
722 // Straight copy.
723 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
724 dst, dst_stride, clip_width, clip_height);
725 return;
726 }
727 }
728 }
729 }
730 if (dx == 0x10000 && (x & 0xffff) == 0) {
731 // Arbitrary scale vertically, but unscaled vertically.
732 ScalePlaneVertical(src_height,
733 clip_width, clip_height,
734 src_stride, dst_stride, src, dst,
735 x, y, dy, 4, filtering);
736 return;
737 }
738 if (filtering && dy < 65536) {
739 ScaleARGBBilinearUp(src_width, src_height,
740 clip_width, clip_height,
741 src_stride, dst_stride, src, dst,
742 x, dx, y, dy, filtering);
743 return;
744 }
745 if (filtering) {
746 ScaleARGBBilinearDown(src_width, src_height,
747 clip_width, clip_height,
748 src_stride, dst_stride, src, dst,
749 x, dx, y, dy, filtering);
750 return;
751 }
752 ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
753 src_stride, dst_stride, src, dst,
754 x, dx, y, dy);
755 }
757 LIBYUV_API
758 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
759 int src_width, int src_height,
760 uint8* dst_argb, int dst_stride_argb,
761 int dst_width, int dst_height,
762 int clip_x, int clip_y, int clip_width, int clip_height,
763 enum FilterMode filtering) {
764 if (!src_argb || src_width == 0 || src_height == 0 ||
765 !dst_argb || dst_width <= 0 || dst_height <= 0 ||
766 clip_x < 0 || clip_y < 0 ||
767 (clip_x + clip_width) > dst_width ||
768 (clip_y + clip_height) > dst_height) {
769 return -1;
770 }
771 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
772 dst_argb, dst_stride_argb, dst_width, dst_height,
773 clip_x, clip_y, clip_width, clip_height, filtering);
774 return 0;
775 }
777 // Scale an ARGB image.
778 LIBYUV_API
779 int ARGBScale(const uint8* src_argb, int src_stride_argb,
780 int src_width, int src_height,
781 uint8* dst_argb, int dst_stride_argb,
782 int dst_width, int dst_height,
783 enum FilterMode filtering) {
784 if (!src_argb || src_width == 0 || src_height == 0 ||
785 !dst_argb || dst_width <= 0 || dst_height <= 0) {
786 return -1;
787 }
788 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
789 dst_argb, dst_stride_argb, dst_width, dst_height,
790 0, 0, dst_width, dst_height, filtering);
791 return 0;
792 }
794 #ifdef __cplusplus
795 } // extern "C"
796 } // namespace libyuv
797 #endif