Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include "libyuv/scale.h"
13 #include <assert.h>
14 #include <string.h>
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
30 // CPU agnostic row functions
31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
32 uint8* dst, int dst_width) {
33 int x;
34 for (x = 0; x < dst_width - 1; x += 2) {
35 dst[0] = src_ptr[1];
36 dst[1] = src_ptr[3];
37 dst += 2;
38 src_ptr += 4;
39 }
40 if (dst_width & 1) {
41 dst[0] = src_ptr[1];
42 }
43 }
45 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
46 uint8* dst, int dst_width) {
47 const uint8* s = src_ptr;
48 int x;
49 for (x = 0; x < dst_width - 1; x += 2) {
50 dst[0] = (s[0] + s[1] + 1) >> 1;
51 dst[1] = (s[2] + s[3] + 1) >> 1;
52 dst += 2;
53 s += 4;
54 }
55 if (dst_width & 1) {
56 dst[0] = (s[0] + s[1] + 1) >> 1;
57 }
58 }
60 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
61 uint8* dst, int dst_width) {
62 const uint8* s = src_ptr;
63 const uint8* t = src_ptr + src_stride;
64 int x;
65 for (x = 0; x < dst_width - 1; x += 2) {
66 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
67 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
68 dst += 2;
69 s += 4;
70 t += 4;
71 }
72 if (dst_width & 1) {
73 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
74 }
75 }
77 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
78 uint8* dst, int dst_width) {
79 int x;
80 for (x = 0; x < dst_width - 1; x += 2) {
81 dst[0] = src_ptr[2];
82 dst[1] = src_ptr[6];
83 dst += 2;
84 src_ptr += 8;
85 }
86 if (dst_width & 1) {
87 dst[0] = src_ptr[2];
88 }
89 }
91 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
92 uint8* dst, int dst_width) {
93 intptr_t stride = src_stride;
94 int x;
95 for (x = 0; x < dst_width - 1; x += 2) {
96 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
97 src_ptr[stride + 0] + src_ptr[stride + 1] +
98 src_ptr[stride + 2] + src_ptr[stride + 3] +
99 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
100 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
101 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
102 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
103 8) >> 4;
104 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
105 src_ptr[stride + 4] + src_ptr[stride + 5] +
106 src_ptr[stride + 6] + src_ptr[stride + 7] +
107 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
108 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
109 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
110 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
111 8) >> 4;
112 dst += 2;
113 src_ptr += 8;
114 }
115 if (dst_width & 1) {
116 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
117 src_ptr[stride + 0] + src_ptr[stride + 1] +
118 src_ptr[stride + 2] + src_ptr[stride + 3] +
119 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
120 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
121 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
122 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
123 8) >> 4;
124 }
125 }
127 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
128 uint8* dst, int dst_width) {
129 int x;
130 assert((dst_width % 3 == 0) && (dst_width > 0));
131 for (x = 0; x < dst_width; x += 3) {
132 dst[0] = src_ptr[0];
133 dst[1] = src_ptr[1];
134 dst[2] = src_ptr[3];
135 dst += 3;
136 src_ptr += 4;
137 }
138 }
140 // Filter rows 0 and 1 together, 3 : 1
141 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
142 uint8* d, int dst_width) {
143 const uint8* s = src_ptr;
144 const uint8* t = src_ptr + src_stride;
145 int x;
146 assert((dst_width % 3 == 0) && (dst_width > 0));
147 for (x = 0; x < dst_width; x += 3) {
148 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
149 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
150 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
151 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
152 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
153 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
154 d[0] = (a0 * 3 + b0 + 2) >> 2;
155 d[1] = (a1 * 3 + b1 + 2) >> 2;
156 d[2] = (a2 * 3 + b2 + 2) >> 2;
157 d += 3;
158 s += 4;
159 t += 4;
160 }
161 }
163 // Filter rows 1 and 2 together, 1 : 1
164 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
165 uint8* d, int dst_width) {
166 const uint8* s = src_ptr;
167 const uint8* t = src_ptr + src_stride;
168 int x;
169 assert((dst_width % 3 == 0) && (dst_width > 0));
170 for (x = 0; x < dst_width; x += 3) {
171 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
172 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
173 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
174 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
175 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
176 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
177 d[0] = (a0 + b0 + 1) >> 1;
178 d[1] = (a1 + b1 + 1) >> 1;
179 d[2] = (a2 + b2 + 1) >> 1;
180 d += 3;
181 s += 4;
182 t += 4;
183 }
184 }
186 // Scales a single row of pixels using point sampling.
187 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
188 int dst_width, int x, int dx) {
189 int j;
190 for (j = 0; j < dst_width - 1; j += 2) {
191 dst_ptr[0] = src_ptr[x >> 16];
192 x += dx;
193 dst_ptr[1] = src_ptr[x >> 16];
194 x += dx;
195 dst_ptr += 2;
196 }
197 if (dst_width & 1) {
198 dst_ptr[0] = src_ptr[x >> 16];
199 }
200 }
202 // Scales a single row of pixels up by 2x using point sampling.
203 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
204 int dst_width, int x, int dx) {
205 int j;
206 for (j = 0; j < dst_width - 1; j += 2) {
207 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
208 src_ptr += 1;
209 dst_ptr += 2;
210 }
211 if (dst_width & 1) {
212 dst_ptr[0] = src_ptr[0];
213 }
214 }
216 // (1-f)a + fb can be replaced with a + f(b-a)
217 #define BLENDER(a, b, f) (uint8)((int)(a) + \
218 ((int)(f) * ((int)(b) - (int)(a)) >> 16))
220 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
221 int dst_width, int x, int dx) {
222 int j;
223 for (j = 0; j < dst_width - 1; j += 2) {
224 int xi = x >> 16;
225 int a = src_ptr[xi];
226 int b = src_ptr[xi + 1];
227 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
228 x += dx;
229 xi = x >> 16;
230 a = src_ptr[xi];
231 b = src_ptr[xi + 1];
232 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
233 x += dx;
234 dst_ptr += 2;
235 }
236 if (dst_width & 1) {
237 int xi = x >> 16;
238 int a = src_ptr[xi];
239 int b = src_ptr[xi + 1];
240 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
241 }
242 }
244 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
245 int dst_width, int x32, int dx) {
246 int64 x = (int64)(x32);
247 int j;
248 for (j = 0; j < dst_width - 1; j += 2) {
249 int64 xi = x >> 16;
250 int a = src_ptr[xi];
251 int b = src_ptr[xi + 1];
252 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
253 x += dx;
254 xi = x >> 16;
255 a = src_ptr[xi];
256 b = src_ptr[xi + 1];
257 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
258 x += dx;
259 dst_ptr += 2;
260 }
261 if (dst_width & 1) {
262 int64 xi = x >> 16;
263 int a = src_ptr[xi];
264 int b = src_ptr[xi + 1];
265 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
266 }
267 }
268 #undef BLENDER
270 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
271 uint8* dst, int dst_width) {
272 int x;
273 assert(dst_width % 3 == 0);
274 for (x = 0; x < dst_width; x += 3) {
275 dst[0] = src_ptr[0];
276 dst[1] = src_ptr[3];
277 dst[2] = src_ptr[6];
278 dst += 3;
279 src_ptr += 8;
280 }
281 }
283 // 8x3 -> 3x1
284 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
285 ptrdiff_t src_stride,
286 uint8* dst_ptr, int dst_width) {
287 intptr_t stride = src_stride;
288 int i;
289 assert((dst_width % 3 == 0) && (dst_width > 0));
290 for (i = 0; i < dst_width; i += 3) {
291 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
292 src_ptr[stride + 0] + src_ptr[stride + 1] +
293 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
294 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
295 (65536 / 9) >> 16;
296 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
297 src_ptr[stride + 3] + src_ptr[stride + 4] +
298 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
299 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
300 (65536 / 9) >> 16;
301 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
302 src_ptr[stride + 6] + src_ptr[stride + 7] +
303 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
304 (65536 / 6) >> 16;
305 src_ptr += 8;
306 dst_ptr += 3;
307 }
308 }
310 // 8x2 -> 3x1
311 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
312 uint8* dst_ptr, int dst_width) {
313 intptr_t stride = src_stride;
314 int i;
315 assert((dst_width % 3 == 0) && (dst_width > 0));
316 for (i = 0; i < dst_width; i += 3) {
317 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
318 src_ptr[stride + 0] + src_ptr[stride + 1] +
319 src_ptr[stride + 2]) * (65536 / 6) >> 16;
320 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
321 src_ptr[stride + 3] + src_ptr[stride + 4] +
322 src_ptr[stride + 5]) * (65536 / 6) >> 16;
323 dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
324 src_ptr[stride + 6] + src_ptr[stride + 7]) *
325 (65536 / 4) >> 16;
326 src_ptr += 8;
327 dst_ptr += 3;
328 }
329 }
331 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
332 uint16* dst_ptr, int src_width, int src_height) {
333 int x;
334 assert(src_width > 0);
335 assert(src_height > 0);
336 for (x = 0; x < src_width; ++x) {
337 const uint8* s = src_ptr + x;
338 unsigned int sum = 0u;
339 int y;
340 for (y = 0; y < src_height; ++y) {
341 sum += s[0];
342 s += src_stride;
343 }
344 // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
345 dst_ptr[x] = sum < 65535u ? sum : 65535u;
346 }
347 }
349 void ScaleARGBRowDown2_C(const uint8* src_argb,
350 ptrdiff_t src_stride,
351 uint8* dst_argb, int dst_width) {
352 const uint32* src = (const uint32*)(src_argb);
353 uint32* dst = (uint32*)(dst_argb);
355 int x;
356 for (x = 0; x < dst_width - 1; x += 2) {
357 dst[0] = src[1];
358 dst[1] = src[3];
359 src += 4;
360 dst += 2;
361 }
362 if (dst_width & 1) {
363 dst[0] = src[1];
364 }
365 }
367 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
368 ptrdiff_t src_stride,
369 uint8* dst_argb, int dst_width) {
370 int x;
371 for (x = 0; x < dst_width; ++x) {
372 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
373 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
374 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
375 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
376 src_argb += 8;
377 dst_argb += 4;
378 }
379 }
381 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
382 uint8* dst_argb, int dst_width) {
383 int x;
384 for (x = 0; x < dst_width; ++x) {
385 dst_argb[0] = (src_argb[0] + src_argb[4] +
386 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
387 dst_argb[1] = (src_argb[1] + src_argb[5] +
388 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
389 dst_argb[2] = (src_argb[2] + src_argb[6] +
390 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
391 dst_argb[3] = (src_argb[3] + src_argb[7] +
392 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
393 src_argb += 8;
394 dst_argb += 4;
395 }
396 }
398 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
399 int src_stepx,
400 uint8* dst_argb, int dst_width) {
401 const uint32* src = (const uint32*)(src_argb);
402 uint32* dst = (uint32*)(dst_argb);
404 int x;
405 for (x = 0; x < dst_width - 1; x += 2) {
406 dst[0] = src[0];
407 dst[1] = src[src_stepx];
408 src += src_stepx * 2;
409 dst += 2;
410 }
411 if (dst_width & 1) {
412 dst[0] = src[0];
413 }
414 }
416 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
417 ptrdiff_t src_stride,
418 int src_stepx,
419 uint8* dst_argb, int dst_width) {
420 int x;
421 for (x = 0; x < dst_width; ++x) {
422 dst_argb[0] = (src_argb[0] + src_argb[4] +
423 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
424 dst_argb[1] = (src_argb[1] + src_argb[5] +
425 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
426 dst_argb[2] = (src_argb[2] + src_argb[6] +
427 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
428 dst_argb[3] = (src_argb[3] + src_argb[7] +
429 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
430 src_argb += src_stepx * 4;
431 dst_argb += 4;
432 }
433 }
435 // Scales a single row of pixels using point sampling.
436 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
437 int dst_width, int x, int dx) {
438 const uint32* src = (const uint32*)(src_argb);
439 uint32* dst = (uint32*)(dst_argb);
440 int j;
441 for (j = 0; j < dst_width - 1; j += 2) {
442 dst[0] = src[x >> 16];
443 x += dx;
444 dst[1] = src[x >> 16];
445 x += dx;
446 dst += 2;
447 }
448 if (dst_width & 1) {
449 dst[0] = src[x >> 16];
450 }
451 }
453 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
454 int dst_width, int x32, int dx) {
455 int64 x = (int64)(x32);
456 const uint32* src = (const uint32*)(src_argb);
457 uint32* dst = (uint32*)(dst_argb);
458 int j;
459 for (j = 0; j < dst_width - 1; j += 2) {
460 dst[0] = src[x >> 16];
461 x += dx;
462 dst[1] = src[x >> 16];
463 x += dx;
464 dst += 2;
465 }
466 if (dst_width & 1) {
467 dst[0] = src[x >> 16];
468 }
469 }
471 // Scales a single row of pixels up by 2x using point sampling.
472 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
473 int dst_width, int x, int dx) {
474 const uint32* src = (const uint32*)(src_argb);
475 uint32* dst = (uint32*)(dst_argb);
476 int j;
477 for (j = 0; j < dst_width - 1; j += 2) {
478 dst[1] = dst[0] = src[0];
479 src += 1;
480 dst += 2;
481 }
482 if (dst_width & 1) {
483 dst[0] = src[0];
484 }
485 }
487 // Mimics SSSE3 blender
488 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
489 #define BLENDERC(a, b, f, s) (uint32)( \
490 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
491 #define BLENDER(a, b, f) \
492 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
493 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
495 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
496 int dst_width, int x, int dx) {
497 const uint32* src = (const uint32*)(src_argb);
498 uint32* dst = (uint32*)(dst_argb);
499 int j;
500 for (j = 0; j < dst_width - 1; j += 2) {
501 int xi = x >> 16;
502 int xf = (x >> 9) & 0x7f;
503 uint32 a = src[xi];
504 uint32 b = src[xi + 1];
505 dst[0] = BLENDER(a, b, xf);
506 x += dx;
507 xi = x >> 16;
508 xf = (x >> 9) & 0x7f;
509 a = src[xi];
510 b = src[xi + 1];
511 dst[1] = BLENDER(a, b, xf);
512 x += dx;
513 dst += 2;
514 }
515 if (dst_width & 1) {
516 int xi = x >> 16;
517 int xf = (x >> 9) & 0x7f;
518 uint32 a = src[xi];
519 uint32 b = src[xi + 1];
520 dst[0] = BLENDER(a, b, xf);
521 }
522 }
524 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
525 int dst_width, int x32, int dx) {
526 int64 x = (int64)(x32);
527 const uint32* src = (const uint32*)(src_argb);
528 uint32* dst = (uint32*)(dst_argb);
529 int j;
530 for (j = 0; j < dst_width - 1; j += 2) {
531 int64 xi = x >> 16;
532 int xf = (x >> 9) & 0x7f;
533 uint32 a = src[xi];
534 uint32 b = src[xi + 1];
535 dst[0] = BLENDER(a, b, xf);
536 x += dx;
537 xi = x >> 16;
538 xf = (x >> 9) & 0x7f;
539 a = src[xi];
540 b = src[xi + 1];
541 dst[1] = BLENDER(a, b, xf);
542 x += dx;
543 dst += 2;
544 }
545 if (dst_width & 1) {
546 int64 xi = x >> 16;
547 int xf = (x >> 9) & 0x7f;
548 uint32 a = src[xi];
549 uint32 b = src[xi + 1];
550 dst[0] = BLENDER(a, b, xf);
551 }
552 }
553 #undef BLENDER1
554 #undef BLENDERC
555 #undef BLENDER
557 // Scale plane vertically with bilinear interpolation.
558 void ScalePlaneVertical(int src_height,
559 int dst_width, int dst_height,
560 int src_stride, int dst_stride,
561 const uint8* src_argb, uint8* dst_argb,
562 int x, int y, int dy,
563 int bpp, enum FilterMode filtering) {
564 // TODO(fbarchard): Allow higher bpp.
565 int dst_width_bytes = dst_width * bpp;
566 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
567 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
568 InterpolateRow_C;
569 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
570 int j;
571 assert(bpp >= 1 && bpp <= 4);
572 assert(src_height != 0);
573 assert(dst_width > 0);
574 assert(dst_height > 0);
575 src_argb += (x >> 16) * bpp;
576 #if defined(HAS_INTERPOLATEROW_SSE2)
577 if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
578 InterpolateRow = InterpolateRow_Any_SSE2;
579 if (IS_ALIGNED(dst_width_bytes, 16)) {
580 InterpolateRow = InterpolateRow_Unaligned_SSE2;
581 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
582 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
583 InterpolateRow = InterpolateRow_SSE2;
584 }
585 }
586 }
587 #endif
588 #if defined(HAS_INTERPOLATEROW_SSSE3)
589 if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
590 InterpolateRow = InterpolateRow_Any_SSSE3;
591 if (IS_ALIGNED(dst_width_bytes, 16)) {
592 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
593 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
594 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
595 InterpolateRow = InterpolateRow_SSSE3;
596 }
597 }
598 }
599 #endif
600 #if defined(HAS_INTERPOLATEROW_AVX2)
601 if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
602 InterpolateRow = InterpolateRow_Any_AVX2;
603 if (IS_ALIGNED(dst_width_bytes, 32)) {
604 InterpolateRow = InterpolateRow_AVX2;
605 }
606 }
607 #endif
608 #if defined(HAS_INTERPOLATEROW_NEON)
609 if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
610 InterpolateRow = InterpolateRow_Any_NEON;
611 if (IS_ALIGNED(dst_width_bytes, 16)) {
612 InterpolateRow = InterpolateRow_NEON;
613 }
614 }
615 #endif
616 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
617 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
618 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
619 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
620 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
621 if (IS_ALIGNED(dst_width_bytes, 4)) {
622 InterpolateRow = InterpolateRow_MIPS_DSPR2;
623 }
624 }
625 #endif
626 for (j = 0; j < dst_height; ++j) {
627 int yi;
628 int yf;
629 if (y > max_y) {
630 y = max_y;
631 }
632 yi = y >> 16;
633 yf = filtering ? ((y >> 8) & 255) : 0;
634 InterpolateRow(dst_argb, src_argb + yi * src_stride,
635 src_stride, dst_width_bytes, yf);
636 dst_argb += dst_stride;
637 y += dy;
638 }
639 }
641 // Simplify the filtering based on scale factors.
642 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
643 int dst_width, int dst_height,
644 enum FilterMode filtering) {
645 if (src_width < 0) {
646 src_width = -src_width;
647 }
648 if (src_height < 0) {
649 src_height = -src_height;
650 }
651 if (filtering == kFilterBox) {
652 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
653 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
654 filtering = kFilterBilinear;
655 }
656 // If scaling to larger, switch from Box to Bilinear.
657 if (dst_width >= src_width || dst_height >= src_height) {
658 filtering = kFilterBilinear;
659 }
660 }
661 if (filtering == kFilterBilinear) {
662 if (src_height == 1) {
663 filtering = kFilterLinear;
664 }
665 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
666 if (dst_height == src_height || dst_height * 3 == src_height) {
667 filtering = kFilterLinear;
668 }
669 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
670 // avoid reading 2 pixels horizontally that causes memory exception.
671 if (src_width == 1) {
672 filtering = kFilterNone;
673 }
674 }
675 if (filtering == kFilterLinear) {
676 if (src_width == 1) {
677 filtering = kFilterNone;
678 }
679 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
680 if (dst_width == src_width || dst_width * 3 == src_width) {
681 filtering = kFilterNone;
682 }
683 }
684 return filtering;
685 }
687 // Divide num by div and return as 16.16 fixed point result.
688 int FixedDiv_C(int num, int div) {
689 return (int)(((int64)(num) << 16) / div);
690 }
692 // Divide num by div and return as 16.16 fixed point result.
693 int FixedDiv1_C(int num, int div) {
694 return (int)((((int64)(num) << 16) - 0x00010001) /
695 (div - 1));
696 }
698 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
700 // Compute slope values for stepping.
701 void ScaleSlope(int src_width, int src_height,
702 int dst_width, int dst_height,
703 enum FilterMode filtering,
704 int* x, int* y, int* dx, int* dy) {
705 assert(x != NULL);
706 assert(y != NULL);
707 assert(dx != NULL);
708 assert(dy != NULL);
709 assert(src_width != 0);
710 assert(src_height != 0);
711 assert(dst_width > 0);
712 assert(dst_height > 0);
713 // Check for 1 pixel and avoid FixedDiv overflow.
714 if (dst_width == 1 && src_width >= 32768) {
715 dst_width = src_width;
716 }
717 if (dst_height == 1 && src_height >= 32768) {
718 dst_height = src_height;
719 }
720 if (filtering == kFilterBox) {
721 // Scale step for point sampling duplicates all pixels equally.
722 *dx = FixedDiv(Abs(src_width), dst_width);
723 *dy = FixedDiv(src_height, dst_height);
724 *x = 0;
725 *y = 0;
726 } else if (filtering == kFilterBilinear) {
727 // Scale step for bilinear sampling renders last pixel once for upsample.
728 if (dst_width <= Abs(src_width)) {
729 *dx = FixedDiv(Abs(src_width), dst_width);
730 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
731 } else if (dst_width > 1) {
732 *dx = FixedDiv1(Abs(src_width), dst_width);
733 *x = 0;
734 }
735 if (dst_height <= src_height) {
736 *dy = FixedDiv(src_height, dst_height);
737 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
738 } else if (dst_height > 1) {
739 *dy = FixedDiv1(src_height, dst_height);
740 *y = 0;
741 }
742 } else if (filtering == kFilterLinear) {
743 // Scale step for bilinear sampling renders last pixel once for upsample.
744 if (dst_width <= Abs(src_width)) {
745 *dx = FixedDiv(Abs(src_width), dst_width);
746 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
747 } else if (dst_width > 1) {
748 *dx = FixedDiv1(Abs(src_width), dst_width);
749 *x = 0;
750 }
751 *dy = FixedDiv(src_height, dst_height);
752 *y = *dy >> 1;
753 } else {
754 // Scale step for point sampling duplicates all pixels equally.
755 *dx = FixedDiv(Abs(src_width), dst_width);
756 *dy = FixedDiv(src_height, dst_height);
757 *x = CENTERSTART(*dx, 0);
758 *y = CENTERSTART(*dy, 0);
759 }
760 // Negative src_width means horizontally mirror.
761 if (src_width < 0) {
762 *x += (dst_width - 1) * *dx;
763 *dx = -*dx;
764 // src_width = -src_width; // Caller must do this.
765 }
766 }
767 #undef CENTERSTART
769 #ifdef __cplusplus
770 } // extern "C"
771 } // namespace libyuv
772 #endif