|
1 /* |
|
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include "libyuv/scale.h" |
|
12 |
|
13 #include <assert.h> |
|
14 #include <string.h> |
|
15 |
|
16 #include "libyuv/cpu_id.h" |
|
17 #include "libyuv/planar_functions.h" // For CopyPlane |
|
18 #include "libyuv/row.h" |
|
19 #include "libyuv/scale_row.h" |
|
20 |
|
21 #ifdef __cplusplus |
|
22 namespace libyuv { |
|
23 extern "C" { |
|
24 #endif |
|
25 |
|
26 // Remove this macro if OVERREAD is safe. |
|
27 #define AVOID_OVERREAD 1 |
|
28 |
|
29 static __inline int Abs(int v) { |
|
30 return v >= 0 ? v : -v; |
|
31 } |
|
32 |
|
33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) |
|
34 |
|
35 // Scale plane, 1/2 |
|
36 // This is an optimized version for scaling down a plane to 1/2 of |
|
37 // its original size. |
|
38 |
|
39 static void ScalePlaneDown2(int src_width, int src_height, |
|
40 int dst_width, int dst_height, |
|
41 int src_stride, int dst_stride, |
|
42 const uint8* src_ptr, uint8* dst_ptr, |
|
43 enum FilterMode filtering) { |
|
44 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
45 uint8* dst_ptr, int dst_width) = |
|
46 filtering == kFilterNone ? ScaleRowDown2_C : |
|
47 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : |
|
48 ScaleRowDown2Box_C); |
|
49 int row_stride = src_stride << 1; |
|
50 if (!filtering) { |
|
51 src_ptr += src_stride; // Point to odd rows. |
|
52 src_stride = 0; |
|
53 } |
|
54 |
|
55 #if defined(HAS_SCALEROWDOWN2_NEON) |
|
56 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { |
|
57 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; |
|
58 } |
|
59 #elif defined(HAS_SCALEROWDOWN2_SSE2) |
|
60 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { |
|
61 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : |
|
62 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : |
|
63 ScaleRowDown2Box_Unaligned_SSE2); |
|
64 if (IS_ALIGNED(src_ptr, 16) && |
|
65 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && |
|
66 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
67 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : |
|
68 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : |
|
69 ScaleRowDown2Box_SSE2); |
|
70 } |
|
71 } |
|
72 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) |
|
73 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && |
|
74 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && |
|
75 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
|
76 ScaleRowDown2 = filtering ? |
|
77 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; |
|
78 } |
|
79 #endif |
|
80 |
|
81 if (filtering == kFilterLinear) { |
|
82 src_stride = 0; |
|
83 } |
|
84 // TODO(fbarchard): Loop through source height to allow odd height. |
|
85 int y; |
|
86 for (y = 0; y < dst_height; ++y) { |
|
87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); |
|
88 src_ptr += row_stride; |
|
89 dst_ptr += dst_stride; |
|
90 } |
|
91 } |
|
92 |
|
93 // Scale plane, 1/4 |
|
94 // This is an optimized version for scaling down a plane to 1/4 of |
|
95 // its original size. |
|
96 |
|
97 static void ScalePlaneDown4(int src_width, int src_height, |
|
98 int dst_width, int dst_height, |
|
99 int src_stride, int dst_stride, |
|
100 const uint8* src_ptr, uint8* dst_ptr, |
|
101 enum FilterMode filtering) { |
|
102 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
103 uint8* dst_ptr, int dst_width) = |
|
104 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; |
|
105 int row_stride = src_stride << 2; |
|
106 if (!filtering) { |
|
107 src_ptr += src_stride * 2; // Point to row 2. |
|
108 src_stride = 0; |
|
109 } |
|
110 #if defined(HAS_SCALEROWDOWN4_NEON) |
|
111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { |
|
112 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; |
|
113 } |
|
114 #elif defined(HAS_SCALEROWDOWN4_SSE2) |
|
115 if (TestCpuFlag(kCpuHasSSE2) && |
|
116 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && |
|
117 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
118 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; |
|
119 } |
|
120 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) |
|
121 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && |
|
122 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
|
123 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
|
124 ScaleRowDown4 = filtering ? |
|
125 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; |
|
126 } |
|
127 #endif |
|
128 |
|
129 if (filtering == kFilterLinear) { |
|
130 src_stride = 0; |
|
131 } |
|
132 int y; |
|
133 for (y = 0; y < dst_height; ++y) { |
|
134 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); |
|
135 src_ptr += row_stride; |
|
136 dst_ptr += dst_stride; |
|
137 } |
|
138 } |
|
139 |
|
140 // Scale plane down, 3/4 |
|
141 |
|
142 static void ScalePlaneDown34(int src_width, int src_height, |
|
143 int dst_width, int dst_height, |
|
144 int src_stride, int dst_stride, |
|
145 const uint8* src_ptr, uint8* dst_ptr, |
|
146 enum FilterMode filtering) { |
|
147 assert(dst_width % 3 == 0); |
|
148 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
149 uint8* dst_ptr, int dst_width); |
|
150 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
151 uint8* dst_ptr, int dst_width); |
|
152 if (!filtering) { |
|
153 ScaleRowDown34_0 = ScaleRowDown34_C; |
|
154 ScaleRowDown34_1 = ScaleRowDown34_C; |
|
155 } else { |
|
156 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; |
|
157 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; |
|
158 } |
|
159 #if defined(HAS_SCALEROWDOWN34_NEON) |
|
160 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { |
|
161 if (!filtering) { |
|
162 ScaleRowDown34_0 = ScaleRowDown34_NEON; |
|
163 ScaleRowDown34_1 = ScaleRowDown34_NEON; |
|
164 } else { |
|
165 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; |
|
166 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; |
|
167 } |
|
168 } |
|
169 #endif |
|
170 #if defined(HAS_SCALEROWDOWN34_SSSE3) |
|
171 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
|
172 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
173 if (!filtering) { |
|
174 ScaleRowDown34_0 = ScaleRowDown34_SSSE3; |
|
175 ScaleRowDown34_1 = ScaleRowDown34_SSSE3; |
|
176 } else { |
|
177 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; |
|
178 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; |
|
179 } |
|
180 } |
|
181 #endif |
|
182 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) |
|
183 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && |
|
184 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
|
185 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
|
186 if (!filtering) { |
|
187 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; |
|
188 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; |
|
189 } else { |
|
190 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2; |
|
191 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2; |
|
192 } |
|
193 } |
|
194 #endif |
|
195 |
|
196 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
|
197 int y; |
|
198 for (y = 0; y < dst_height - 2; y += 3) { |
|
199 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
|
200 src_ptr += src_stride; |
|
201 dst_ptr += dst_stride; |
|
202 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); |
|
203 src_ptr += src_stride; |
|
204 dst_ptr += dst_stride; |
|
205 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, |
|
206 dst_ptr, dst_width); |
|
207 src_ptr += src_stride * 2; |
|
208 dst_ptr += dst_stride; |
|
209 } |
|
210 |
|
211 // Remainder 1 or 2 rows with last row vertically unfiltered |
|
212 if ((dst_height % 3) == 2) { |
|
213 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
|
214 src_ptr += src_stride; |
|
215 dst_ptr += dst_stride; |
|
216 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); |
|
217 } else if ((dst_height % 3) == 1) { |
|
218 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); |
|
219 } |
|
220 } |
|
221 |
|
222 |
|
223 // Scale plane, 3/8 |
|
224 // This is an optimized version for scaling down a plane to 3/8 |
|
225 // of its original size. |
|
226 // |
|
227 // Uses box filter arranges like this |
|
228 // aaabbbcc -> abc |
|
229 // aaabbbcc def |
|
230 // aaabbbcc ghi |
|
231 // dddeeeff |
|
232 // dddeeeff |
|
233 // dddeeeff |
|
234 // ggghhhii |
|
235 // ggghhhii |
|
236 // Boxes are 3x3, 2x3, 3x2 and 2x2 |
|
237 |
|
238 static void ScalePlaneDown38(int src_width, int src_height, |
|
239 int dst_width, int dst_height, |
|
240 int src_stride, int dst_stride, |
|
241 const uint8* src_ptr, uint8* dst_ptr, |
|
242 enum FilterMode filtering) { |
|
243 assert(dst_width % 3 == 0); |
|
244 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
245 uint8* dst_ptr, int dst_width); |
|
246 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
247 uint8* dst_ptr, int dst_width); |
|
248 if (!filtering) { |
|
249 ScaleRowDown38_3 = ScaleRowDown38_C; |
|
250 ScaleRowDown38_2 = ScaleRowDown38_C; |
|
251 } else { |
|
252 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; |
|
253 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; |
|
254 } |
|
255 #if defined(HAS_SCALEROWDOWN38_NEON) |
|
256 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { |
|
257 if (!filtering) { |
|
258 ScaleRowDown38_3 = ScaleRowDown38_NEON; |
|
259 ScaleRowDown38_2 = ScaleRowDown38_NEON; |
|
260 } else { |
|
261 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; |
|
262 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; |
|
263 } |
|
264 } |
|
265 #elif defined(HAS_SCALEROWDOWN38_SSSE3) |
|
266 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
|
267 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
268 if (!filtering) { |
|
269 ScaleRowDown38_3 = ScaleRowDown38_SSSE3; |
|
270 ScaleRowDown38_2 = ScaleRowDown38_SSSE3; |
|
271 } else { |
|
272 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; |
|
273 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; |
|
274 } |
|
275 } |
|
276 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) |
|
277 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && |
|
278 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
|
279 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
|
280 if (!filtering) { |
|
281 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2; |
|
282 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2; |
|
283 } else { |
|
284 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2; |
|
285 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2; |
|
286 } |
|
287 } |
|
288 #endif |
|
289 |
|
290 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
|
291 int y; |
|
292 for (y = 0; y < dst_height - 2; y += 3) { |
|
293 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
|
294 src_ptr += src_stride * 3; |
|
295 dst_ptr += dst_stride; |
|
296 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
|
297 src_ptr += src_stride * 3; |
|
298 dst_ptr += dst_stride; |
|
299 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); |
|
300 src_ptr += src_stride * 2; |
|
301 dst_ptr += dst_stride; |
|
302 } |
|
303 |
|
304 // Remainder 1 or 2 rows with last row vertically unfiltered |
|
305 if ((dst_height % 3) == 2) { |
|
306 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
|
307 src_ptr += src_stride * 3; |
|
308 dst_ptr += dst_stride; |
|
309 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
|
310 } else if ((dst_height % 3) == 1) { |
|
311 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
|
312 } |
|
313 } |
|
314 |
|
315 static __inline uint32 SumBox(int iboxwidth, int iboxheight, |
|
316 ptrdiff_t src_stride, const uint8* src_ptr) { |
|
317 assert(iboxwidth > 0); |
|
318 assert(iboxheight > 0); |
|
319 uint32 sum = 0u; |
|
320 int y; |
|
321 for (y = 0; y < iboxheight; ++y) { |
|
322 int x; |
|
323 for (x = 0; x < iboxwidth; ++x) { |
|
324 sum += src_ptr[x]; |
|
325 } |
|
326 src_ptr += src_stride; |
|
327 } |
|
328 return sum; |
|
329 } |
|
330 |
|
331 static void ScalePlaneBoxRow_C(int dst_width, int boxheight, |
|
332 int x, int dx, ptrdiff_t src_stride, |
|
333 const uint8* src_ptr, uint8* dst_ptr) { |
|
334 int i; |
|
335 for (i = 0; i < dst_width; ++i) { |
|
336 int ix = x >> 16; |
|
337 x += dx; |
|
338 int boxwidth = (x >> 16) - ix; |
|
339 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) / |
|
340 (boxwidth * boxheight); |
|
341 } |
|
342 } |
|
343 |
|
344 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { |
|
345 assert(iboxwidth > 0); |
|
346 uint32 sum = 0u; |
|
347 int x; |
|
348 for (x = 0; x < iboxwidth; ++x) { |
|
349 sum += src_ptr[x]; |
|
350 } |
|
351 return sum; |
|
352 } |
|
353 |
|
354 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, |
|
355 const uint16* src_ptr, uint8* dst_ptr) { |
|
356 int scaletbl[2]; |
|
357 int minboxwidth = (dx >> 16); |
|
358 scaletbl[0] = 65536 / (minboxwidth * boxheight); |
|
359 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); |
|
360 int* scaleptr = scaletbl - minboxwidth; |
|
361 int i; |
|
362 for (i = 0; i < dst_width; ++i) { |
|
363 int ix = x >> 16; |
|
364 x += dx; |
|
365 int boxwidth = (x >> 16) - ix; |
|
366 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; |
|
367 } |
|
368 } |
|
369 |
|
370 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, |
|
371 const uint16* src_ptr, uint8* dst_ptr) { |
|
372 int boxwidth = (dx >> 16); |
|
373 int scaleval = 65536 / (boxwidth * boxheight); |
|
374 int i; |
|
375 for (i = 0; i < dst_width; ++i) { |
|
376 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; |
|
377 x += boxwidth; |
|
378 } |
|
379 } |
|
380 |
|
381 // Scale plane down to any dimensions, with interpolation. |
|
382 // (boxfilter). |
|
383 // |
|
384 // Same method as SimpleScale, which is fixed point, outputting |
|
385 // one pixel of destination using fixed point (16.16) to step |
|
386 // through source, sampling a box of pixel with simple |
|
387 // averaging. |
|
388 static void ScalePlaneBox(int src_width, int src_height, |
|
389 int dst_width, int dst_height, |
|
390 int src_stride, int dst_stride, |
|
391 const uint8* src_ptr, uint8* dst_ptr) { |
|
392 // Initial source x/y coordinate and step values as 16.16 fixed point. |
|
393 int x = 0; |
|
394 int y = 0; |
|
395 int dx = 0; |
|
396 int dy = 0; |
|
397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, |
|
398 &x, &y, &dx, &dy); |
|
399 src_width = Abs(src_width); |
|
400 const int max_y = (src_height << 16); |
|
401 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. |
|
402 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { |
|
403 uint8* dst = dst_ptr; |
|
404 int j; |
|
405 for (j = 0; j < dst_height; ++j) { |
|
406 int iy = y >> 16; |
|
407 const uint8* src = src_ptr + iy * src_stride; |
|
408 y += dy; |
|
409 if (y > max_y) { |
|
410 y = max_y; |
|
411 } |
|
412 int boxheight = (y >> 16) - iy; |
|
413 ScalePlaneBoxRow_C(dst_width, boxheight, |
|
414 x, dx, src_stride, |
|
415 src, dst); |
|
416 dst += dst_stride; |
|
417 } |
|
418 return; |
|
419 } |
|
420 // Allocate a row buffer of uint16. |
|
421 align_buffer_64(row16, src_width * 2); |
|
422 |
|
423 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
|
424 const uint16* src_ptr, uint8* dst_ptr) = |
|
425 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; |
|
426 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, |
|
427 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; |
|
428 #if defined(HAS_SCALEADDROWS_SSE2) |
|
429 if (TestCpuFlag(kCpuHasSSE2) && |
|
430 #ifdef AVOID_OVERREAD |
|
431 IS_ALIGNED(src_width, 16) && |
|
432 #endif |
|
433 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
434 ScaleAddRows = ScaleAddRows_SSE2; |
|
435 } |
|
436 #endif |
|
437 |
|
438 int j; |
|
439 for (j = 0; j < dst_height; ++j) { |
|
440 int iy = y >> 16; |
|
441 const uint8* src = src_ptr + iy * src_stride; |
|
442 y += dy; |
|
443 if (y > (src_height << 16)) { |
|
444 y = (src_height << 16); |
|
445 } |
|
446 int boxheight = (y >> 16) - iy; |
|
447 ScaleAddRows(src, src_stride, (uint16*)(row16), |
|
448 src_width, boxheight); |
|
449 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), |
|
450 dst_ptr); |
|
451 dst_ptr += dst_stride; |
|
452 } |
|
453 free_aligned_buffer_64(row16); |
|
454 } |
|
455 |
|
456 // Scale plane down with bilinear interpolation. |
|
457 void ScalePlaneBilinearDown(int src_width, int src_height, |
|
458 int dst_width, int dst_height, |
|
459 int src_stride, int dst_stride, |
|
460 const uint8* src_ptr, uint8* dst_ptr, |
|
461 enum FilterMode filtering) { |
|
462 // Initial source x/y coordinate and step values as 16.16 fixed point. |
|
463 int x = 0; |
|
464 int y = 0; |
|
465 int dx = 0; |
|
466 int dy = 0; |
|
467 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
|
468 &x, &y, &dx, &dy); |
|
469 src_width = Abs(src_width); |
|
470 |
|
471 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, |
|
472 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
|
473 InterpolateRow_C; |
|
474 #if defined(HAS_INTERPOLATEROW_SSE2) |
|
475 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { |
|
476 InterpolateRow = InterpolateRow_Any_SSE2; |
|
477 if (IS_ALIGNED(src_width, 16)) { |
|
478 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
|
479 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
480 InterpolateRow = InterpolateRow_SSE2; |
|
481 } |
|
482 } |
|
483 } |
|
484 #endif |
|
485 #if defined(HAS_INTERPOLATEROW_SSSE3) |
|
486 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { |
|
487 InterpolateRow = InterpolateRow_Any_SSSE3; |
|
488 if (IS_ALIGNED(src_width, 16)) { |
|
489 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
|
490 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
|
491 InterpolateRow = InterpolateRow_SSSE3; |
|
492 } |
|
493 } |
|
494 } |
|
495 #endif |
|
496 #if defined(HAS_INTERPOLATEROW_AVX2) |
|
497 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { |
|
498 InterpolateRow = InterpolateRow_Any_AVX2; |
|
499 if (IS_ALIGNED(src_width, 32)) { |
|
500 InterpolateRow = InterpolateRow_AVX2; |
|
501 } |
|
502 } |
|
503 #endif |
|
504 #if defined(HAS_INTERPOLATEROW_NEON) |
|
505 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { |
|
506 InterpolateRow = InterpolateRow_Any_NEON; |
|
507 if (IS_ALIGNED(src_width, 16)) { |
|
508 InterpolateRow = InterpolateRow_NEON; |
|
509 } |
|
510 } |
|
511 #endif |
|
512 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) |
|
513 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { |
|
514 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
|
515 if (IS_ALIGNED(src_width, 4)) { |
|
516 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
|
517 } |
|
518 } |
|
519 #endif |
|
520 |
|
521 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, |
|
522 int dst_width, int x, int dx) = |
|
523 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; |
|
524 |
|
525 #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
|
526 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
|
527 ScaleFilterCols = ScaleFilterCols_SSSE3; |
|
528 } |
|
529 #endif |
|
530 |
|
531 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
|
532 // Allocate a row buffer. |
|
533 align_buffer_64(row, src_width); |
|
534 |
|
535 const int max_y = (src_height - 1) << 16; |
|
536 int j; |
|
537 for (j = 0; j < dst_height; ++j) { |
|
538 if (y > max_y) { |
|
539 y = max_y; |
|
540 } |
|
541 int yi = y >> 16; |
|
542 const uint8* src = src_ptr + yi * src_stride; |
|
543 if (filtering == kFilterLinear) { |
|
544 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); |
|
545 } else { |
|
546 int yf = (y >> 8) & 255; |
|
547 InterpolateRow(row, src, src_stride, src_width, yf); |
|
548 ScaleFilterCols(dst_ptr, row, dst_width, x, dx); |
|
549 } |
|
550 dst_ptr += dst_stride; |
|
551 y += dy; |
|
552 } |
|
553 free_aligned_buffer_64(row); |
|
554 } |
|
555 |
|
556 // Scale up down with bilinear interpolation. |
|
557 void ScalePlaneBilinearUp(int src_width, int src_height, |
|
558 int dst_width, int dst_height, |
|
559 int src_stride, int dst_stride, |
|
560 const uint8* src_ptr, uint8* dst_ptr, |
|
561 enum FilterMode filtering) { |
|
562 // Initial source x/y coordinate and step values as 16.16 fixed point. |
|
563 int x = 0; |
|
564 int y = 0; |
|
565 int dx = 0; |
|
566 int dy = 0; |
|
567 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
|
568 &x, &y, &dx, &dy); |
|
569 src_width = Abs(src_width); |
|
570 |
|
571 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, |
|
572 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
|
573 InterpolateRow_C; |
|
574 #if defined(HAS_INTERPOLATEROW_SSE2) |
|
575 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { |
|
576 InterpolateRow = InterpolateRow_Any_SSE2; |
|
577 if (IS_ALIGNED(dst_width, 16)) { |
|
578 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
|
579 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
580 InterpolateRow = InterpolateRow_SSE2; |
|
581 } |
|
582 } |
|
583 } |
|
584 #endif |
|
585 #if defined(HAS_INTERPOLATEROW_SSSE3) |
|
586 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { |
|
587 InterpolateRow = InterpolateRow_Any_SSSE3; |
|
588 if (IS_ALIGNED(dst_width, 16)) { |
|
589 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
|
590 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
591 InterpolateRow = InterpolateRow_SSSE3; |
|
592 } |
|
593 } |
|
594 } |
|
595 #endif |
|
596 #if defined(HAS_INTERPOLATEROW_AVX2) |
|
597 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { |
|
598 InterpolateRow = InterpolateRow_Any_AVX2; |
|
599 if (IS_ALIGNED(dst_width, 32)) { |
|
600 InterpolateRow = InterpolateRow_AVX2; |
|
601 } |
|
602 } |
|
603 #endif |
|
604 #if defined(HAS_INTERPOLATEROW_NEON) |
|
605 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { |
|
606 InterpolateRow = InterpolateRow_Any_NEON; |
|
607 if (IS_ALIGNED(dst_width, 16)) { |
|
608 InterpolateRow = InterpolateRow_NEON; |
|
609 } |
|
610 } |
|
611 #endif |
|
612 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) |
|
613 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { |
|
614 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
|
615 if (IS_ALIGNED(dst_width, 4)) { |
|
616 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
|
617 } |
|
618 } |
|
619 #endif |
|
620 |
|
621 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, |
|
622 int dst_width, int x, int dx) = |
|
623 filtering ? ScaleFilterCols_C : ScaleCols_C; |
|
624 if (filtering && src_width >= 32768) { |
|
625 ScaleFilterCols = ScaleFilterCols64_C; |
|
626 } |
|
627 #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
|
628 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
|
629 ScaleFilterCols = ScaleFilterCols_SSSE3; |
|
630 } |
|
631 #endif |
|
632 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
|
633 ScaleFilterCols = ScaleColsUp2_C; |
|
634 #if defined(HAS_SCALECOLS_SSE2) |
|
635 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
|
636 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
|
637 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
638 ScaleFilterCols = ScaleColsUp2_SSE2; |
|
639 } |
|
640 #endif |
|
641 } |
|
642 |
|
643 const int max_y = (src_height - 1) << 16; |
|
644 if (y > max_y) { |
|
645 y = max_y; |
|
646 } |
|
647 int yi = y >> 16; |
|
648 const uint8* src = src_ptr + yi * src_stride; |
|
649 |
|
650 // Allocate 2 row buffers. |
|
651 const int kRowSize = (dst_width + 15) & ~15; |
|
652 align_buffer_64(row, kRowSize * 2); |
|
653 |
|
654 uint8* rowptr = row; |
|
655 int rowstride = kRowSize; |
|
656 int lasty = yi; |
|
657 |
|
658 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
|
659 if (src_height > 1) { |
|
660 src += src_stride; |
|
661 } |
|
662 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
|
663 src += src_stride; |
|
664 |
|
665 int j; |
|
666 for (j = 0; j < dst_height; ++j) { |
|
667 yi = y >> 16; |
|
668 if (yi != lasty) { |
|
669 if (y > max_y) { |
|
670 y = max_y; |
|
671 yi = y >> 16; |
|
672 src = src_ptr + yi * src_stride; |
|
673 } |
|
674 if (yi != lasty) { |
|
675 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
|
676 rowptr += rowstride; |
|
677 rowstride = -rowstride; |
|
678 lasty = yi; |
|
679 src += src_stride; |
|
680 } |
|
681 } |
|
682 if (filtering == kFilterLinear) { |
|
683 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); |
|
684 } else { |
|
685 int yf = (y >> 8) & 255; |
|
686 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); |
|
687 } |
|
688 dst_ptr += dst_stride; |
|
689 y += dy; |
|
690 } |
|
691 free_aligned_buffer_64(row); |
|
692 } |
|
693 |
|
694 // Scale Plane to/from any dimensions, without interpolation. |
|
695 // Fixed point math is used for performance: The upper 16 bits |
|
696 // of x and dx is the integer part of the source position and |
|
697 // the lower 16 bits are the fixed decimal part. |
|
698 |
|
699 static void ScalePlaneSimple(int src_width, int src_height, |
|
700 int dst_width, int dst_height, |
|
701 int src_stride, int dst_stride, |
|
702 const uint8* src_ptr, uint8* dst_ptr) { |
|
703 // Initial source x/y coordinate and step values as 16.16 fixed point. |
|
704 int x = 0; |
|
705 int y = 0; |
|
706 int dx = 0; |
|
707 int dy = 0; |
|
708 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, |
|
709 &x, &y, &dx, &dy); |
|
710 src_width = Abs(src_width); |
|
711 |
|
712 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, |
|
713 int dst_width, int x, int dx) = ScaleCols_C; |
|
714 if (src_width * 2 == dst_width && x < 0x8000) { |
|
715 ScaleCols = ScaleColsUp2_C; |
|
716 #if defined(HAS_SCALECOLS_SSE2) |
|
717 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
|
718 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
|
719 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
720 ScaleCols = ScaleColsUp2_SSE2; |
|
721 } |
|
722 #endif |
|
723 } |
|
724 |
|
725 int i; |
|
726 for (i = 0; i < dst_height; ++i) { |
|
727 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, |
|
728 dst_width, x, dx); |
|
729 dst_ptr += dst_stride; |
|
730 y += dy; |
|
731 } |
|
732 } |
|
733 |
|
734 // Scale a plane. |
|
735 // This function dispatches to a specialized scaler based on scale factor. |
|
736 |
|
737 LIBYUV_API |
|
738 void ScalePlane(const uint8* src, int src_stride, |
|
739 int src_width, int src_height, |
|
740 uint8* dst, int dst_stride, |
|
741 int dst_width, int dst_height, |
|
742 enum FilterMode filtering) { |
|
743 // Simplify filtering when possible. |
|
744 filtering = ScaleFilterReduce(src_width, src_height, |
|
745 dst_width, dst_height, |
|
746 filtering); |
|
747 |
|
748 // Negative height means invert the image. |
|
749 if (src_height < 0) { |
|
750 src_height = -src_height; |
|
751 src = src + (src_height - 1) * src_stride; |
|
752 src_stride = -src_stride; |
|
753 } |
|
754 |
|
755 // Use specialized scales to improve performance for common resolutions. |
|
756 // For example, all the 1/2 scalings will use ScalePlaneDown2() |
|
757 if (dst_width == src_width && dst_height == src_height) { |
|
758 // Straight copy. |
|
759 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); |
|
760 return; |
|
761 } |
|
762 if (dst_width == src_width) { |
|
763 int dy = FixedDiv(src_height, dst_height); |
|
764 // Arbitrary scale vertically, but unscaled vertically. |
|
765 ScalePlaneVertical(src_height, |
|
766 dst_width, dst_height, |
|
767 src_stride, dst_stride, src, dst, |
|
768 0, 0, dy, 1, filtering); |
|
769 return; |
|
770 } |
|
771 if (dst_width <= Abs(src_width) && dst_height <= src_height) { |
|
772 // Scale down. |
|
773 if (4 * dst_width == 3 * src_width && |
|
774 4 * dst_height == 3 * src_height) { |
|
775 // optimized, 3/4 |
|
776 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, |
|
777 src_stride, dst_stride, src, dst, filtering); |
|
778 return; |
|
779 } |
|
780 if (2 * dst_width == src_width && 2 * dst_height == src_height) { |
|
781 // optimized, 1/2 |
|
782 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, |
|
783 src_stride, dst_stride, src, dst, filtering); |
|
784 return; |
|
785 } |
|
786 // 3/8 rounded up for odd sized chroma height. |
|
787 if (8 * dst_width == 3 * src_width && |
|
788 dst_height == ((src_height * 3 + 7) / 8)) { |
|
789 // optimized, 3/8 |
|
790 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, |
|
791 src_stride, dst_stride, src, dst, filtering); |
|
792 return; |
|
793 } |
|
794 if (4 * dst_width == src_width && 4 * dst_height == src_height && |
|
795 filtering != kFilterBilinear) { |
|
796 // optimized, 1/4 |
|
797 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, |
|
798 src_stride, dst_stride, src, dst, filtering); |
|
799 return; |
|
800 } |
|
801 } |
|
802 if (filtering == kFilterBox && dst_height * 2 < src_height) { |
|
803 ScalePlaneBox(src_width, src_height, dst_width, dst_height, |
|
804 src_stride, dst_stride, src, dst); |
|
805 return; |
|
806 } |
|
807 if (filtering && dst_height > src_height) { |
|
808 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, |
|
809 src_stride, dst_stride, src, dst, filtering); |
|
810 return; |
|
811 } |
|
812 if (filtering) { |
|
813 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, |
|
814 src_stride, dst_stride, src, dst, filtering); |
|
815 return; |
|
816 } |
|
817 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, |
|
818 src_stride, dst_stride, src, dst); |
|
819 } |
|
820 |
|
821 // Scale an I420 image. |
|
822 // This function in turn calls a scaling function for each plane. |
|
823 |
|
824 LIBYUV_API |
|
825 int I420Scale(const uint8* src_y, int src_stride_y, |
|
826 const uint8* src_u, int src_stride_u, |
|
827 const uint8* src_v, int src_stride_v, |
|
828 int src_width, int src_height, |
|
829 uint8* dst_y, int dst_stride_y, |
|
830 uint8* dst_u, int dst_stride_u, |
|
831 uint8* dst_v, int dst_stride_v, |
|
832 int dst_width, int dst_height, |
|
833 enum FilterMode filtering) { |
|
834 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || |
|
835 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { |
|
836 return -1; |
|
837 } |
|
838 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
|
839 int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
|
840 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
|
841 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
|
842 |
|
843 ScalePlane(src_y, src_stride_y, src_width, src_height, |
|
844 dst_y, dst_stride_y, dst_width, dst_height, |
|
845 filtering); |
|
846 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, |
|
847 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, |
|
848 filtering); |
|
849 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, |
|
850 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, |
|
851 filtering); |
|
852 return 0; |
|
853 } |
|
854 |
|
855 // Deprecated api |
|
856 LIBYUV_API |
|
857 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, |
|
858 int src_stride_y, int src_stride_u, int src_stride_v, |
|
859 int src_width, int src_height, |
|
860 uint8* dst_y, uint8* dst_u, uint8* dst_v, |
|
861 int dst_stride_y, int dst_stride_u, int dst_stride_v, |
|
862 int dst_width, int dst_height, |
|
863 LIBYUV_BOOL interpolate) { |
|
864 return I420Scale(src_y, src_stride_y, |
|
865 src_u, src_stride_u, |
|
866 src_v, src_stride_v, |
|
867 src_width, src_height, |
|
868 dst_y, dst_stride_y, |
|
869 dst_u, dst_stride_u, |
|
870 dst_v, dst_stride_v, |
|
871 dst_width, dst_height, |
|
872 interpolate ? kFilterBox : kFilterNone); |
|
873 } |
|
874 |
|
875 // Deprecated api |
|
876 LIBYUV_API |
|
877 int ScaleOffset(const uint8* src, int src_width, int src_height, |
|
878 uint8* dst, int dst_width, int dst_height, int dst_yoffset, |
|
879 LIBYUV_BOOL interpolate) { |
|
880 if (!src || src_width <= 0 || src_height <= 0 || |
|
881 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset < 0 || |
|
882 dst_yoffset >= dst_height) { |
|
883 return -1; |
|
884 } |
|
885 dst_yoffset = dst_yoffset & ~1; // chroma requires offset to multiple of 2. |
|
886 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
|
887 int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
|
888 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
|
889 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
|
890 int aheight = dst_height - dst_yoffset * 2; // actual output height |
|
891 const uint8* src_y = src; |
|
892 const uint8* src_u = src + src_width * src_height; |
|
893 const uint8* src_v = src + src_width * src_height + |
|
894 src_halfwidth * src_halfheight; |
|
895 uint8* dst_y = dst + dst_yoffset * dst_width; |
|
896 uint8* dst_u = dst + dst_width * dst_height + |
|
897 (dst_yoffset >> 1) * dst_halfwidth; |
|
898 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + |
|
899 (dst_yoffset >> 1) * dst_halfwidth; |
|
900 return I420Scale(src_y, src_width, |
|
901 src_u, src_halfwidth, |
|
902 src_v, src_halfwidth, |
|
903 src_width, src_height, |
|
904 dst_y, dst_width, |
|
905 dst_u, dst_halfwidth, |
|
906 dst_v, dst_halfwidth, |
|
907 dst_width, aheight, |
|
908 interpolate ? kFilterBox : kFilterNone); |
|
909 } |
|
910 |
|
911 #ifdef __cplusplus |
|
912 } // extern "C" |
|
913 } // namespace libyuv |
|
914 #endif |