|
1 /* |
|
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include "libyuv/scale.h" |
|
12 |
|
13 #include <assert.h> |
|
14 #include <string.h> |
|
15 |
|
16 #include "libyuv/cpu_id.h" |
|
17 #include "libyuv/planar_functions.h" // For CopyARGB |
|
18 #include "libyuv/row.h" |
|
19 #include "libyuv/scale_row.h" |
|
20 |
|
21 #ifdef __cplusplus |
|
22 namespace libyuv { |
|
23 extern "C" { |
|
24 #endif |
|
25 |
|
26 static __inline int Abs(int v) { |
|
27 return v >= 0 ? v : -v; |
|
28 } |
|
29 |
|
30 // ScaleARGB ARGB, 1/2 |
|
31 // This is an optimized version for scaling down a ARGB to 1/2 of |
|
32 // its original size. |
|
33 static void ScaleARGBDown2(int src_width, int src_height, |
|
34 int dst_width, int dst_height, |
|
35 int src_stride, int dst_stride, |
|
36 const uint8* src_argb, uint8* dst_argb, |
|
37 int x, int dx, int y, int dy, |
|
38 enum FilterMode filtering) { |
|
39 int j; |
|
40 int row_stride = src_stride * (dy >> 16); |
|
41 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, |
|
42 uint8* dst_argb, int dst_width) = |
|
43 filtering == kFilterNone ? ScaleARGBRowDown2_C : |
|
44 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : |
|
45 ScaleARGBRowDown2Box_C); |
|
46 assert(dx == 65536 * 2); // Test scale factor of 2. |
|
47 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. |
|
48 // Advance to odd row, even column. |
|
49 if (filtering == kFilterBilinear) { |
|
50 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; |
|
51 } else { |
|
52 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; |
|
53 } |
|
54 |
|
55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2) |
|
56 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && |
|
57 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && |
|
58 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
59 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : |
|
60 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : |
|
61 ScaleARGBRowDown2Box_SSE2); |
|
62 } |
|
63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON) |
|
64 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && |
|
65 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { |
|
66 ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : |
|
67 ScaleARGBRowDown2_NEON; |
|
68 } |
|
69 #endif |
|
70 |
|
71 if (filtering == kFilterLinear) { |
|
72 src_stride = 0; |
|
73 } |
|
74 for (j = 0; j < dst_height; ++j) { |
|
75 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); |
|
76 src_argb += row_stride; |
|
77 dst_argb += dst_stride; |
|
78 } |
|
79 } |
|
80 |
|
81 // ScaleARGB ARGB, 1/4 |
|
82 // This is an optimized version for scaling down a ARGB to 1/4 of |
|
83 // its original size. |
|
84 static void ScaleARGBDown4Box(int src_width, int src_height, |
|
85 int dst_width, int dst_height, |
|
86 int src_stride, int dst_stride, |
|
87 const uint8* src_argb, uint8* dst_argb, |
|
88 int x, int dx, int y, int dy) { |
|
89 int j; |
|
90 // Allocate 2 rows of ARGB. |
|
91 const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; |
|
92 align_buffer_64(row, kRowSize * 2); |
|
93 int row_stride = src_stride * (dy >> 16); |
|
94 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, |
|
95 uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; |
|
96 // Advance to odd row, even column. |
|
97 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; |
|
98 assert(dx == 65536 * 4); // Test scale factor of 4. |
|
99 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. |
|
100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2) |
|
101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && |
|
102 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && |
|
103 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
104 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; |
|
105 } |
|
106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON) |
|
107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && |
|
108 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { |
|
109 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; |
|
110 } |
|
111 #endif |
|
112 for (j = 0; j < dst_height; ++j) { |
|
113 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); |
|
114 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, |
|
115 row + kRowSize, dst_width * 2); |
|
116 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); |
|
117 src_argb += row_stride; |
|
118 dst_argb += dst_stride; |
|
119 } |
|
120 free_aligned_buffer_64(row); |
|
121 } |
|
122 |
|
123 // ScaleARGB ARGB Even |
|
124 // This is an optimized version for scaling down a ARGB to even |
|
125 // multiple of its original size. |
|
126 static void ScaleARGBDownEven(int src_width, int src_height, |
|
127 int dst_width, int dst_height, |
|
128 int src_stride, int dst_stride, |
|
129 const uint8* src_argb, uint8* dst_argb, |
|
130 int x, int dx, int y, int dy, |
|
131 enum FilterMode filtering) { |
|
132 int j; |
|
133 int col_step = dx >> 16; |
|
134 int row_stride = (dy >> 16) * src_stride; |
|
135 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, |
|
136 int src_step, uint8* dst_argb, int dst_width) = |
|
137 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; |
|
138 assert(IS_ALIGNED(src_width, 2)); |
|
139 assert(IS_ALIGNED(src_height, 2)); |
|
140 src_argb += (y >> 16) * src_stride + (x >> 16) * 4; |
|
141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) |
|
142 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && |
|
143 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
144 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : |
|
145 ScaleARGBRowDownEven_SSE2; |
|
146 } |
|
147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) |
|
148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && |
|
149 IS_ALIGNED(src_argb, 4)) { |
|
150 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : |
|
151 ScaleARGBRowDownEven_NEON; |
|
152 } |
|
153 #endif |
|
154 |
|
155 if (filtering == kFilterLinear) { |
|
156 src_stride = 0; |
|
157 } |
|
158 for (j = 0; j < dst_height; ++j) { |
|
159 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); |
|
160 src_argb += row_stride; |
|
161 dst_argb += dst_stride; |
|
162 } |
|
163 } |
|
164 |
|
165 // Scale ARGB down with bilinear interpolation. |
|
166 static void ScaleARGBBilinearDown(int src_width, int src_height, |
|
167 int dst_width, int dst_height, |
|
168 int src_stride, int dst_stride, |
|
169 const uint8* src_argb, uint8* dst_argb, |
|
170 int x, int dx, int y, int dy, |
|
171 enum FilterMode filtering) { |
|
172 int j; |
|
173 int64 xlast = x + (int64)(dst_width - 1) * dx; |
|
174 int64 xl = (dx >= 0) ? x : xlast; |
|
175 int64 xr = (dx >= 0) ? xlast : x; |
|
176 int clip_src_width; |
|
177 xl = (xl >> 16) & ~3; // Left edge aligned. |
|
178 xr = (xr >> 16) + 1; // Right most pixel used. |
|
179 clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4. |
|
180 src_argb += xl * 4; |
|
181 x -= (int)(xl << 16); |
|
182 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, |
|
183 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
|
184 InterpolateRow_C; |
|
185 #if defined(HAS_INTERPOLATEROW_SSE2) |
|
186 if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) { |
|
187 InterpolateRow = InterpolateRow_Any_SSE2; |
|
188 if (IS_ALIGNED(clip_src_width, 16)) { |
|
189 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
|
190 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { |
|
191 InterpolateRow = InterpolateRow_SSE2; |
|
192 } |
|
193 } |
|
194 } |
|
195 #endif |
|
196 #if defined(HAS_INTERPOLATEROW_SSSE3) |
|
197 if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) { |
|
198 InterpolateRow = InterpolateRow_Any_SSSE3; |
|
199 if (IS_ALIGNED(clip_src_width, 16)) { |
|
200 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
|
201 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { |
|
202 InterpolateRow = InterpolateRow_SSSE3; |
|
203 } |
|
204 } |
|
205 } |
|
206 #endif |
|
207 #if defined(HAS_INTERPOLATEROW_AVX2) |
|
208 if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) { |
|
209 InterpolateRow = InterpolateRow_Any_AVX2; |
|
210 if (IS_ALIGNED(clip_src_width, 32)) { |
|
211 InterpolateRow = InterpolateRow_AVX2; |
|
212 } |
|
213 } |
|
214 #endif |
|
215 #if defined(HAS_INTERPOLATEROW_NEON) |
|
216 if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) { |
|
217 InterpolateRow = InterpolateRow_Any_NEON; |
|
218 if (IS_ALIGNED(clip_src_width, 16)) { |
|
219 InterpolateRow = InterpolateRow_NEON; |
|
220 } |
|
221 } |
|
222 #endif |
|
223 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) |
|
224 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 && |
|
225 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { |
|
226 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
|
227 if (IS_ALIGNED(clip_src_width, 4)) { |
|
228 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
|
229 } |
|
230 } |
|
231 #endif |
|
232 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, |
|
233 int dst_width, int x, int dx) = |
|
234 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; |
|
235 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) |
|
236 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
|
237 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; |
|
238 } |
|
239 #endif |
|
240 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
|
241 // Allocate a row of ARGB. |
|
242 align_buffer_64(row, clip_src_width * 4); |
|
243 |
|
244 const int max_y = (src_height - 1) << 16; |
|
245 for (j = 0; j < dst_height; ++j) { |
|
246 if (y > max_y) { |
|
247 y = max_y; |
|
248 } |
|
249 int yi = y >> 16; |
|
250 const uint8* src = src_argb + yi * src_stride; |
|
251 if (filtering == kFilterLinear) { |
|
252 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); |
|
253 } else { |
|
254 int yf = (y >> 8) & 255; |
|
255 InterpolateRow(row, src, src_stride, clip_src_width, yf); |
|
256 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); |
|
257 } |
|
258 dst_argb += dst_stride; |
|
259 y += dy; |
|
260 } |
|
261 free_aligned_buffer_64(row); |
|
262 } |
|
263 |
|
264 // Scale ARGB up with bilinear interpolation. |
|
265 static void ScaleARGBBilinearUp(int src_width, int src_height, |
|
266 int dst_width, int dst_height, |
|
267 int src_stride, int dst_stride, |
|
268 const uint8* src_argb, uint8* dst_argb, |
|
269 int x, int dx, int y, int dy, |
|
270 enum FilterMode filtering) { |
|
271 int j; |
|
272 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, |
|
273 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
|
274 InterpolateRow_C; |
|
275 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, |
|
276 int dst_width, int x, int dx) = |
|
277 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; |
|
278 #if defined(HAS_INTERPOLATEROW_SSE2) |
|
279 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { |
|
280 InterpolateRow = InterpolateRow_Any_SSE2; |
|
281 if (IS_ALIGNED(dst_width, 4)) { |
|
282 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
|
283 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
284 InterpolateRow = InterpolateRow_SSE2; |
|
285 } |
|
286 } |
|
287 } |
|
288 #endif |
|
289 #if defined(HAS_INTERPOLATEROW_SSSE3) |
|
290 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { |
|
291 InterpolateRow = InterpolateRow_Any_SSSE3; |
|
292 if (IS_ALIGNED(dst_width, 4)) { |
|
293 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
|
294 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
295 InterpolateRow = InterpolateRow_SSSE3; |
|
296 } |
|
297 } |
|
298 } |
|
299 #endif |
|
300 #if defined(HAS_INTERPOLATEROW_AVX2) |
|
301 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { |
|
302 InterpolateRow = InterpolateRow_Any_AVX2; |
|
303 if (IS_ALIGNED(dst_width, 8)) { |
|
304 InterpolateRow = InterpolateRow_AVX2; |
|
305 } |
|
306 } |
|
307 #endif |
|
308 #if defined(HAS_INTERPOLATEROW_NEON) |
|
309 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { |
|
310 InterpolateRow = InterpolateRow_Any_NEON; |
|
311 if (IS_ALIGNED(dst_width, 4)) { |
|
312 InterpolateRow = InterpolateRow_NEON; |
|
313 } |
|
314 } |
|
315 #endif |
|
316 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) |
|
317 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && |
|
318 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { |
|
319 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
|
320 } |
|
321 #endif |
|
322 if (src_width >= 32768) { |
|
323 ScaleARGBFilterCols = filtering ? |
|
324 ScaleARGBFilterCols64_C : ScaleARGBCols64_C; |
|
325 } |
|
326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) |
|
327 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
|
328 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; |
|
329 } |
|
330 #endif |
|
331 #if defined(HAS_SCALEARGBCOLS_SSE2) |
|
332 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { |
|
333 ScaleARGBFilterCols = ScaleARGBCols_SSE2; |
|
334 } |
|
335 #endif |
|
336 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
|
337 ScaleARGBFilterCols = ScaleARGBColsUp2_C; |
|
338 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) |
|
339 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
|
340 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
|
341 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
342 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; |
|
343 } |
|
344 #endif |
|
345 } |
|
346 |
|
347 const int max_y = (src_height - 1) << 16; |
|
348 if (y > max_y) { |
|
349 y = max_y; |
|
350 } |
|
351 int yi = y >> 16; |
|
352 const uint8* src = src_argb + yi * src_stride; |
|
353 |
|
354 // Allocate 2 rows of ARGB. |
|
355 const int kRowSize = (dst_width * 4 + 15) & ~15; |
|
356 align_buffer_64(row, kRowSize * 2); |
|
357 |
|
358 uint8* rowptr = row; |
|
359 int rowstride = kRowSize; |
|
360 int lasty = yi; |
|
361 |
|
362 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); |
|
363 if (src_height > 1) { |
|
364 src += src_stride; |
|
365 } |
|
366 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
|
367 src += src_stride; |
|
368 |
|
369 for (j = 0; j < dst_height; ++j) { |
|
370 yi = y >> 16; |
|
371 if (yi != lasty) { |
|
372 if (y > max_y) { |
|
373 y = max_y; |
|
374 yi = y >> 16; |
|
375 src = src_argb + yi * src_stride; |
|
376 } |
|
377 if (yi != lasty) { |
|
378 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); |
|
379 rowptr += rowstride; |
|
380 rowstride = -rowstride; |
|
381 lasty = yi; |
|
382 src += src_stride; |
|
383 } |
|
384 } |
|
385 if (filtering == kFilterLinear) { |
|
386 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); |
|
387 } else { |
|
388 int yf = (y >> 8) & 255; |
|
389 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); |
|
390 } |
|
391 dst_argb += dst_stride; |
|
392 y += dy; |
|
393 } |
|
394 free_aligned_buffer_64(row); |
|
395 } |
|
396 |
|
397 #ifdef YUVSCALEUP |
|
398 // Scale YUV to ARGB up with bilinear interpolation. |
|
399 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, |
|
400 int dst_width, int dst_height, |
|
401 int src_stride_y, |
|
402 int src_stride_u, |
|
403 int src_stride_v, |
|
404 int dst_stride_argb, |
|
405 const uint8* src_y, |
|
406 const uint8* src_u, |
|
407 const uint8* src_v, |
|
408 uint8* dst_argb, |
|
409 int x, int dx, int y, int dy, |
|
410 enum FilterMode filtering) { |
|
411 int j; |
|
412 void (*I422ToARGBRow)(const uint8* y_buf, |
|
413 const uint8* u_buf, |
|
414 const uint8* v_buf, |
|
415 uint8* rgb_buf, |
|
416 int width) = I422ToARGBRow_C; |
|
417 #if defined(HAS_I422TOARGBROW_SSSE3) |
|
418 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) { |
|
419 I422ToARGBRow = I422ToARGBRow_Any_SSSE3; |
|
420 if (IS_ALIGNED(src_width, 8)) { |
|
421 I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; |
|
422 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
|
423 I422ToARGBRow = I422ToARGBRow_SSSE3; |
|
424 } |
|
425 } |
|
426 } |
|
427 #endif |
|
428 #if defined(HAS_I422TOARGBROW_AVX2) |
|
429 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) { |
|
430 I422ToARGBRow = I422ToARGBRow_Any_AVX2; |
|
431 if (IS_ALIGNED(src_width, 16)) { |
|
432 I422ToARGBRow = I422ToARGBRow_AVX2; |
|
433 } |
|
434 } |
|
435 #endif |
|
436 #if defined(HAS_I422TOARGBROW_NEON) |
|
437 if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) { |
|
438 I422ToARGBRow = I422ToARGBRow_Any_NEON; |
|
439 if (IS_ALIGNED(src_width, 8)) { |
|
440 I422ToARGBRow = I422ToARGBRow_NEON; |
|
441 } |
|
442 } |
|
443 #endif |
|
444 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2) |
|
445 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) && |
|
446 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && |
|
447 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && |
|
448 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && |
|
449 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { |
|
450 I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; |
|
451 } |
|
452 #endif |
|
453 |
|
454 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, |
|
455 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
|
456 InterpolateRow_C; |
|
457 #if defined(HAS_INTERPOLATEROW_SSE2) |
|
458 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { |
|
459 InterpolateRow = InterpolateRow_Any_SSE2; |
|
460 if (IS_ALIGNED(dst_width, 4)) { |
|
461 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
|
462 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
|
463 InterpolateRow = InterpolateRow_SSE2; |
|
464 } |
|
465 } |
|
466 } |
|
467 #endif |
|
468 #if defined(HAS_INTERPOLATEROW_SSSE3) |
|
469 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { |
|
470 InterpolateRow = InterpolateRow_Any_SSSE3; |
|
471 if (IS_ALIGNED(dst_width, 4)) { |
|
472 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
|
473 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
|
474 InterpolateRow = InterpolateRow_SSSE3; |
|
475 } |
|
476 } |
|
477 } |
|
478 #endif |
|
479 #if defined(HAS_INTERPOLATEROW_AVX2) |
|
480 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { |
|
481 InterpolateRow = InterpolateRow_Any_AVX2; |
|
482 if (IS_ALIGNED(dst_width, 8)) { |
|
483 InterpolateRow = InterpolateRow_AVX2; |
|
484 } |
|
485 } |
|
486 #endif |
|
487 #if defined(HAS_INTERPOLATEROW_NEON) |
|
488 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { |
|
489 InterpolateRow = InterpolateRow_Any_NEON; |
|
490 if (IS_ALIGNED(dst_width, 4)) { |
|
491 InterpolateRow = InterpolateRow_NEON; |
|
492 } |
|
493 } |
|
494 #endif |
|
495 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) |
|
496 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && |
|
497 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { |
|
498 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
|
499 } |
|
500 #endif |
|
501 |
|
502 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, |
|
503 int dst_width, int x, int dx) = |
|
504 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; |
|
505 if (src_width >= 32768) { |
|
506 ScaleARGBFilterCols = filtering ? |
|
507 ScaleARGBFilterCols64_C : ScaleARGBCols64_C; |
|
508 } |
|
509 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) |
|
510 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
|
511 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; |
|
512 } |
|
513 #endif |
|
514 #if defined(HAS_SCALEARGBCOLS_SSE2) |
|
515 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { |
|
516 ScaleARGBFilterCols = ScaleARGBCols_SSE2; |
|
517 } |
|
518 #endif |
|
519 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
|
520 ScaleARGBFilterCols = ScaleARGBColsUp2_C; |
|
521 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) |
|
522 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
|
523 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
|
524 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
525 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; |
|
526 } |
|
527 #endif |
|
528 } |
|
529 |
|
530 const int max_y = (src_height - 1) << 16; |
|
531 if (y > max_y) { |
|
532 y = max_y; |
|
533 } |
|
534 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. |
|
535 int yi = y >> 16; |
|
536 int uv_yi = yi >> kYShift; |
|
537 const uint8* src_row_y = src_y + yi * src_stride_y; |
|
538 const uint8* src_row_u = src_u + uv_yi * src_stride_u; |
|
539 const uint8* src_row_v = src_v + uv_yi * src_stride_v; |
|
540 |
|
541 // Allocate 2 rows of ARGB. |
|
542 const int kRowSize = (dst_width * 4 + 15) & ~15; |
|
543 align_buffer_64(row, kRowSize * 2); |
|
544 |
|
545 // Allocate 1 row of ARGB for source conversion. |
|
546 align_buffer_64(argb_row, src_width * 4); |
|
547 |
|
548 uint8* rowptr = row; |
|
549 int rowstride = kRowSize; |
|
550 int lasty = yi; |
|
551 |
|
552 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. |
|
553 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); |
|
554 if (src_height > 1) { |
|
555 src_row_y += src_stride_y; |
|
556 if (yi & 1) { |
|
557 src_row_u += src_stride_u; |
|
558 src_row_v += src_stride_v; |
|
559 } |
|
560 } |
|
561 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); |
|
562 if (src_height > 2) { |
|
563 src_row_y += src_stride_y; |
|
564 if (!(yi & 1)) { |
|
565 src_row_u += src_stride_u; |
|
566 src_row_v += src_stride_v; |
|
567 } |
|
568 } |
|
569 |
|
570 for (j = 0; j < dst_height; ++j) { |
|
571 yi = y >> 16; |
|
572 if (yi != lasty) { |
|
573 if (y > max_y) { |
|
574 y = max_y; |
|
575 yi = y >> 16; |
|
576 uv_yi = yi >> kYShift; |
|
577 src_row_y = src_y + yi * src_stride_y; |
|
578 src_row_u = src_u + uv_yi * src_stride_u; |
|
579 src_row_v = src_v + uv_yi * src_stride_v; |
|
580 } |
|
581 if (yi != lasty) { |
|
582 // TODO(fbarchard): Convert the clipped region of row. |
|
583 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); |
|
584 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); |
|
585 rowptr += rowstride; |
|
586 rowstride = -rowstride; |
|
587 lasty = yi; |
|
588 src_row_y += src_stride_y; |
|
589 if (yi & 1) { |
|
590 src_row_u += src_stride_u; |
|
591 src_row_v += src_stride_v; |
|
592 } |
|
593 } |
|
594 } |
|
595 if (filtering == kFilterLinear) { |
|
596 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); |
|
597 } else { |
|
598 int yf = (y >> 8) & 255; |
|
599 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); |
|
600 } |
|
601 dst_argb += dst_stride_argb; |
|
602 y += dy; |
|
603 } |
|
604 free_aligned_buffer_64(row); |
|
605 free_aligned_buffer_64(row_argb); |
|
606 } |
|
607 #endif |
|
608 |
|
609 // Scale ARGB to/from any dimensions, without interpolation. |
|
610 // Fixed point math is used for performance: The upper 16 bits |
|
611 // of x and dx is the integer part of the source position and |
|
612 // the lower 16 bits are the fixed decimal part. |
|
613 |
|
614 static void ScaleARGBSimple(int src_width, int src_height, |
|
615 int dst_width, int dst_height, |
|
616 int src_stride, int dst_stride, |
|
617 const uint8* src_argb, uint8* dst_argb, |
|
618 int x, int dx, int y, int dy) { |
|
619 int j; |
|
620 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, |
|
621 int dst_width, int x, int dx) = |
|
622 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; |
|
623 #if defined(HAS_SCALEARGBCOLS_SSE2) |
|
624 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { |
|
625 ScaleARGBCols = ScaleARGBCols_SSE2; |
|
626 } |
|
627 #endif |
|
628 if (src_width * 2 == dst_width && x < 0x8000) { |
|
629 ScaleARGBCols = ScaleARGBColsUp2_C; |
|
630 #if defined(HAS_SCALEARGBCOLSUP2_SSE2) |
|
631 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
|
632 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
|
633 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
|
634 ScaleARGBCols = ScaleARGBColsUp2_SSE2; |
|
635 } |
|
636 #endif |
|
637 } |
|
638 |
|
639 for (j = 0; j < dst_height; ++j) { |
|
640 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, |
|
641 dst_width, x, dx); |
|
642 dst_argb += dst_stride; |
|
643 y += dy; |
|
644 } |
|
645 } |
|
646 |
|
647 // ScaleARGB a ARGB. |
|
648 // This function in turn calls a scaling function |
|
649 // suitable for handling the desired resolutions. |
|
650 static void ScaleARGB(const uint8* src, int src_stride, |
|
651 int src_width, int src_height, |
|
652 uint8* dst, int dst_stride, |
|
653 int dst_width, int dst_height, |
|
654 int clip_x, int clip_y, int clip_width, int clip_height, |
|
655 enum FilterMode filtering) { |
|
656 // Initial source x/y coordinate and step values as 16.16 fixed point. |
|
657 int x = 0; |
|
658 int y = 0; |
|
659 int dx = 0; |
|
660 int dy = 0; |
|
661 // ARGB does not support box filter yet, but allow the user to pass it. |
|
662 // Simplify filtering when possible. |
|
663 filtering = ScaleFilterReduce(src_width, src_height, |
|
664 dst_width, dst_height, |
|
665 filtering); |
|
666 |
|
667 // Negative src_height means invert the image. |
|
668 if (src_height < 0) { |
|
669 src_height = -src_height; |
|
670 src = src + (src_height - 1) * src_stride; |
|
671 src_stride = -src_stride; |
|
672 } |
|
673 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
|
674 &x, &y, &dx, &dy); |
|
675 src_width = Abs(src_width); |
|
676 if (clip_x) { |
|
677 int64 clipf = (int64)(clip_x) * dx; |
|
678 x += (clipf & 0xffff); |
|
679 src += (clipf >> 16) * 4; |
|
680 dst += clip_x * 4; |
|
681 } |
|
682 if (clip_y) { |
|
683 int64 clipf = (int64)(clip_y) * dy; |
|
684 y += (clipf & 0xffff); |
|
685 src += (clipf >> 16) * src_stride; |
|
686 dst += clip_y * dst_stride; |
|
687 } |
|
688 |
|
689 // Special case for integer step values. |
|
690 if (((dx | dy) & 0xffff) == 0) { |
|
691 if (!dx || !dy) { // 1 pixel wide and/or tall. |
|
692 filtering = kFilterNone; |
|
693 } else { |
|
694 // Optimized even scale down. ie 2, 4, 6, 8, 10x. |
|
695 if (!(dx & 0x10000) && !(dy & 0x10000)) { |
|
696 if (dx == 0x20000) { |
|
697 // Optimized 1/2 downsample. |
|
698 ScaleARGBDown2(src_width, src_height, |
|
699 clip_width, clip_height, |
|
700 src_stride, dst_stride, src, dst, |
|
701 x, dx, y, dy, filtering); |
|
702 return; |
|
703 } |
|
704 if (dx == 0x40000 && filtering == kFilterBox) { |
|
705 // Optimized 1/4 box downsample. |
|
706 ScaleARGBDown4Box(src_width, src_height, |
|
707 clip_width, clip_height, |
|
708 src_stride, dst_stride, src, dst, |
|
709 x, dx, y, dy); |
|
710 return; |
|
711 } |
|
712 ScaleARGBDownEven(src_width, src_height, |
|
713 clip_width, clip_height, |
|
714 src_stride, dst_stride, src, dst, |
|
715 x, dx, y, dy, filtering); |
|
716 return; |
|
717 } |
|
718 // Optimized odd scale down. ie 3, 5, 7, 9x. |
|
719 if ((dx & 0x10000) && (dy & 0x10000)) { |
|
720 filtering = kFilterNone; |
|
721 if (dx == 0x10000 && dy == 0x10000) { |
|
722 // Straight copy. |
|
723 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, |
|
724 dst, dst_stride, clip_width, clip_height); |
|
725 return; |
|
726 } |
|
727 } |
|
728 } |
|
729 } |
|
730 if (dx == 0x10000 && (x & 0xffff) == 0) { |
|
731 // Arbitrary scale vertically, but unscaled vertically. |
|
732 ScalePlaneVertical(src_height, |
|
733 clip_width, clip_height, |
|
734 src_stride, dst_stride, src, dst, |
|
735 x, y, dy, 4, filtering); |
|
736 return; |
|
737 } |
|
738 if (filtering && dy < 65536) { |
|
739 ScaleARGBBilinearUp(src_width, src_height, |
|
740 clip_width, clip_height, |
|
741 src_stride, dst_stride, src, dst, |
|
742 x, dx, y, dy, filtering); |
|
743 return; |
|
744 } |
|
745 if (filtering) { |
|
746 ScaleARGBBilinearDown(src_width, src_height, |
|
747 clip_width, clip_height, |
|
748 src_stride, dst_stride, src, dst, |
|
749 x, dx, y, dy, filtering); |
|
750 return; |
|
751 } |
|
752 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, |
|
753 src_stride, dst_stride, src, dst, |
|
754 x, dx, y, dy); |
|
755 } |
|
756 |
|
757 LIBYUV_API |
|
758 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, |
|
759 int src_width, int src_height, |
|
760 uint8* dst_argb, int dst_stride_argb, |
|
761 int dst_width, int dst_height, |
|
762 int clip_x, int clip_y, int clip_width, int clip_height, |
|
763 enum FilterMode filtering) { |
|
764 if (!src_argb || src_width == 0 || src_height == 0 || |
|
765 !dst_argb || dst_width <= 0 || dst_height <= 0 || |
|
766 clip_x < 0 || clip_y < 0 || |
|
767 (clip_x + clip_width) > dst_width || |
|
768 (clip_y + clip_height) > dst_height) { |
|
769 return -1; |
|
770 } |
|
771 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, |
|
772 dst_argb, dst_stride_argb, dst_width, dst_height, |
|
773 clip_x, clip_y, clip_width, clip_height, filtering); |
|
774 return 0; |
|
775 } |
|
776 |
|
777 // Scale an ARGB image. |
|
778 LIBYUV_API |
|
779 int ARGBScale(const uint8* src_argb, int src_stride_argb, |
|
780 int src_width, int src_height, |
|
781 uint8* dst_argb, int dst_stride_argb, |
|
782 int dst_width, int dst_height, |
|
783 enum FilterMode filtering) { |
|
784 if (!src_argb || src_width == 0 || src_height == 0 || |
|
785 !dst_argb || dst_width <= 0 || dst_height <= 0) { |
|
786 return -1; |
|
787 } |
|
788 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, |
|
789 dst_argb, dst_stride_argb, dst_width, dst_height, |
|
790 0, 0, dst_width, dst_height, filtering); |
|
791 return 0; |
|
792 } |
|
793 |
|
794 #ifdef __cplusplus |
|
795 } // extern "C" |
|
796 } // namespace libyuv |
|
797 #endif |