|
1 /* |
|
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include <stdlib.h> |
|
12 #include <time.h> |
|
13 |
|
14 #include "libyuv/cpu_id.h" |
|
15 #include "libyuv/scale_argb.h" |
|
16 #include "libyuv/row.h" |
|
17 #include "../unit_test/unit_test.h" |
|
18 |
|
19 namespace libyuv { |
|
20 |
|
21 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. |
|
22 static int ARGBTestFilter(int src_width, int src_height, |
|
23 int dst_width, int dst_height, |
|
24 FilterMode f, int benchmark_iterations) { |
|
25 const int b = 128; |
|
26 int i, j; |
|
27 int src_argb_plane_size = (Abs(src_width) + b * 2) * |
|
28 (Abs(src_height) + b * 2) * 4; |
|
29 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; |
|
30 |
|
31 align_buffer_64(src_argb, src_argb_plane_size); |
|
32 srandom(time(NULL)); |
|
33 MemRandomize(src_argb, src_argb_plane_size); |
|
34 |
|
35 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; |
|
36 int dst_stride_argb = (b * 2 + dst_width) * 4; |
|
37 |
|
38 align_buffer_64(dst_argb_c, dst_argb_plane_size); |
|
39 align_buffer_64(dst_argb_opt, dst_argb_plane_size); |
|
40 memset(dst_argb_c, 2, dst_argb_plane_size); |
|
41 memset(dst_argb_opt, 3, dst_argb_plane_size); |
|
42 |
|
43 // Warm up both versions for consistent benchmarks. |
|
44 MaskCpuFlags(0); // Disable all CPU optimization. |
|
45 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
46 src_width, src_height, |
|
47 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
48 dst_width, dst_height, f); |
|
49 MaskCpuFlags(-1); // Enable all CPU optimization. |
|
50 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
51 src_width, src_height, |
|
52 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
53 dst_width, dst_height, f); |
|
54 |
|
55 MaskCpuFlags(0); // Disable all CPU optimization. |
|
56 double c_time = get_time(); |
|
57 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
58 src_width, src_height, |
|
59 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
60 dst_width, dst_height, f); |
|
61 |
|
62 c_time = (get_time() - c_time); |
|
63 |
|
64 MaskCpuFlags(-1); // Enable all CPU optimization. |
|
65 double opt_time = get_time(); |
|
66 for (i = 0; i < benchmark_iterations; ++i) { |
|
67 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
68 src_width, src_height, |
|
69 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
70 dst_width, dst_height, f); |
|
71 } |
|
72 opt_time = (get_time() - opt_time) / benchmark_iterations; |
|
73 |
|
74 // Report performance of C vs OPT |
|
75 printf("filter %d - %8d us C - %8d us OPT\n", |
|
76 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); |
|
77 |
|
78 // C version may be a little off from the optimized. Order of |
|
79 // operations may introduce rounding somewhere. So do a difference |
|
80 // of the buffers and look to see that the max difference isn't |
|
81 // over 2. |
|
82 int max_diff = 0; |
|
83 for (i = b; i < (dst_height + b); ++i) { |
|
84 for (j = b * 4; j < (dst_width + b) * 4; ++j) { |
|
85 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - |
|
86 dst_argb_opt[(i * dst_stride_argb) + j]); |
|
87 if (abs_diff > max_diff) { |
|
88 max_diff = abs_diff; |
|
89 } |
|
90 } |
|
91 } |
|
92 |
|
93 free_aligned_buffer_64(dst_argb_c); |
|
94 free_aligned_buffer_64(dst_argb_opt); |
|
95 free_aligned_buffer_64(src_argb); |
|
96 return max_diff; |
|
97 } |
|
98 |
|
99 static const int kTileX = 8; |
|
100 static const int kTileY = 8; |
|
101 |
|
102 static int TileARGBScale(const uint8* src_argb, int src_stride_argb, |
|
103 int src_width, int src_height, |
|
104 uint8* dst_argb, int dst_stride_argb, |
|
105 int dst_width, int dst_height, |
|
106 FilterMode filtering) { |
|
107 for (int y = 0; y < dst_height; y += kTileY) { |
|
108 for (int x = 0; x < dst_width; x += kTileX) { |
|
109 int clip_width = kTileX; |
|
110 if (x + clip_width > dst_width) { |
|
111 clip_width = dst_width - x; |
|
112 } |
|
113 int clip_height = kTileY; |
|
114 if (y + clip_height > dst_height) { |
|
115 clip_height = dst_height - y; |
|
116 } |
|
117 int r = ARGBScaleClip(src_argb, src_stride_argb, |
|
118 src_width, src_height, |
|
119 dst_argb, dst_stride_argb, |
|
120 dst_width, dst_height, |
|
121 x, y, clip_width, clip_height, filtering); |
|
122 if (r) { |
|
123 return r; |
|
124 } |
|
125 } |
|
126 } |
|
127 return 0; |
|
128 } |
|
129 |
|
130 static int ARGBClipTestFilter(int src_width, int src_height, |
|
131 int dst_width, int dst_height, |
|
132 FilterMode f, int benchmark_iterations) { |
|
133 const int b = 128; |
|
134 int src_argb_plane_size = (Abs(src_width) + b * 2) * |
|
135 (Abs(src_height) + b * 2) * 4; |
|
136 int src_stride_argb = (b * 2 + Abs(src_width)) * 4; |
|
137 |
|
138 align_buffer_64(src_argb, src_argb_plane_size); |
|
139 memset(src_argb, 1, src_argb_plane_size); |
|
140 |
|
141 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; |
|
142 int dst_stride_argb = (b * 2 + dst_width) * 4; |
|
143 |
|
144 srandom(time(NULL)); |
|
145 |
|
146 int i, j; |
|
147 for (i = b; i < (Abs(src_height) + b); ++i) { |
|
148 for (j = b; j < (Abs(src_width) + b) * 4; ++j) { |
|
149 src_argb[(i * src_stride_argb) + j] = (random() & 0xff); |
|
150 } |
|
151 } |
|
152 |
|
153 align_buffer_64(dst_argb_c, dst_argb_plane_size); |
|
154 align_buffer_64(dst_argb_opt, dst_argb_plane_size); |
|
155 memset(dst_argb_c, 2, dst_argb_plane_size); |
|
156 memset(dst_argb_opt, 3, dst_argb_plane_size); |
|
157 |
|
158 // Do full image, no clipping. |
|
159 double c_time = get_time(); |
|
160 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
161 src_width, src_height, |
|
162 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
163 dst_width, dst_height, f); |
|
164 c_time = (get_time() - c_time); |
|
165 |
|
166 // Do tiled image, clipping scale to a tile at a time. |
|
167 double opt_time = get_time(); |
|
168 for (i = 0; i < benchmark_iterations; ++i) { |
|
169 TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
|
170 src_width, src_height, |
|
171 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
|
172 dst_width, dst_height, f); |
|
173 } |
|
174 opt_time = (get_time() - opt_time) / benchmark_iterations; |
|
175 |
|
176 // Report performance of Full vs Tiled. |
|
177 printf("filter %d - %8d us Full - %8d us Tiled\n", |
|
178 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); |
|
179 |
|
180 // Compare full scaled image vs tiled image. |
|
181 int max_diff = 0; |
|
182 for (i = b; i < (dst_height + b); ++i) { |
|
183 for (j = b * 4; j < (dst_width + b) * 4; ++j) { |
|
184 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - |
|
185 dst_argb_opt[(i * dst_stride_argb) + j]); |
|
186 if (abs_diff > max_diff) { |
|
187 max_diff = abs_diff; |
|
188 } |
|
189 } |
|
190 } |
|
191 |
|
192 free_aligned_buffer_64(dst_argb_c); |
|
193 free_aligned_buffer_64(dst_argb_opt); |
|
194 free_aligned_buffer_64(src_argb); |
|
195 return max_diff; |
|
196 } |
|
197 |
|
198 #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ |
|
199 TEST_F(libyuvTest, ARGBScaleDownBy##name##_##filter) { \ |
|
200 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ |
|
201 Abs(benchmark_width_) * hfactor, \ |
|
202 Abs(benchmark_height_) * vfactor, \ |
|
203 kFilter##filter, benchmark_iterations_); \ |
|
204 EXPECT_LE(diff, max_diff); \ |
|
205 } \ |
|
206 TEST_F(libyuvTest, ARGBScaleDownClipBy##name##_##filter) { \ |
|
207 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ |
|
208 Abs(benchmark_width_) * hfactor, \ |
|
209 Abs(benchmark_height_) * vfactor, \ |
|
210 kFilter##filter, benchmark_iterations_); \ |
|
211 EXPECT_LE(diff, max_diff); \ |
|
212 } |
|
213 |
|
214 // Test a scale factor with 2 filters. Expect unfiltered to be exact, but |
|
215 // filtering is different fixed point implementations for SSSE3, Neon and C. |
|
216 #define TEST_FACTOR(name, hfactor, vfactor) \ |
|
217 TEST_FACTOR1(name, None, hfactor, vfactor, 2) \ |
|
218 TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \ |
|
219 TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \ |
|
220 TEST_FACTOR1(name, Box, hfactor, vfactor, 2) |
|
221 |
|
222 TEST_FACTOR(2, 1 / 2, 1 / 2) |
|
223 TEST_FACTOR(4, 1 / 4, 1 / 4) |
|
224 TEST_FACTOR(8, 1 / 8, 1 / 8) |
|
225 TEST_FACTOR(3by4, 3 / 4, 3 / 4) |
|
226 #undef TEST_FACTOR1 |
|
227 #undef TEST_FACTOR |
|
228 |
|
229 #define TEST_SCALETO1(name, width, height, filter, max_diff) \ |
|
230 TEST_F(libyuvTest, name##To##width##x##height##_##filter) { \ |
|
231 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ |
|
232 width, height, \ |
|
233 kFilter##filter, benchmark_iterations_); \ |
|
234 EXPECT_LE(diff, max_diff); \ |
|
235 } \ |
|
236 TEST_F(libyuvTest, name##From##width##x##height##_##filter) { \ |
|
237 int diff = ARGBTestFilter(width, height, \ |
|
238 Abs(benchmark_width_), Abs(benchmark_height_), \ |
|
239 kFilter##filter, benchmark_iterations_); \ |
|
240 EXPECT_LE(diff, max_diff); \ |
|
241 } \ |
|
242 TEST_F(libyuvTest, name##ClipTo##width##x##height##_##filter) { \ |
|
243 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ |
|
244 width, height, \ |
|
245 kFilter##filter, benchmark_iterations_); \ |
|
246 EXPECT_LE(diff, max_diff); \ |
|
247 } \ |
|
248 TEST_F(libyuvTest, name##ClipFrom##width##x##height##_##filter) { \ |
|
249 int diff = ARGBClipTestFilter(width, height, \ |
|
250 Abs(benchmark_width_), Abs(benchmark_height_), \ |
|
251 kFilter##filter, benchmark_iterations_); \ |
|
252 EXPECT_LE(diff, max_diff); \ |
|
253 } |
|
254 |
|
255 /// Test scale to a specified size with all 4 filters. |
|
256 #define TEST_SCALETO(name, width, height) \ |
|
257 TEST_SCALETO1(name, width, height, None, 0) \ |
|
258 TEST_SCALETO1(name, width, height, Linear, 3) \ |
|
259 TEST_SCALETO1(name, width, height, Bilinear, 3) \ |
|
260 TEST_SCALETO1(name, width, height, Box, 3) |
|
261 |
|
262 TEST_SCALETO(ARGBScale, 1, 1) |
|
263 TEST_SCALETO(ARGBScale, 320, 240) |
|
264 TEST_SCALETO(ARGBScale, 352, 288) |
|
265 TEST_SCALETO(ARGBScale, 640, 360) |
|
266 TEST_SCALETO(ARGBScale, 1280, 720) |
|
267 #undef TEST_SCALETO1 |
|
268 #undef TEST_SCALETO |
|
269 |
|
270 } // namespace libyuv |