Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <stdlib.h>
12 #include <time.h>
14 #include "libyuv/cpu_id.h"
15 #include "libyuv/scale_argb.h"
16 #include "libyuv/row.h"
17 #include "../unit_test/unit_test.h"
19 namespace libyuv {
21 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
22 static int ARGBTestFilter(int src_width, int src_height,
23 int dst_width, int dst_height,
24 FilterMode f, int benchmark_iterations) {
25 const int b = 128;
26 int i, j;
27 int src_argb_plane_size = (Abs(src_width) + b * 2) *
28 (Abs(src_height) + b * 2) * 4;
29 int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
31 align_buffer_64(src_argb, src_argb_plane_size);
32 srandom(time(NULL));
33 MemRandomize(src_argb, src_argb_plane_size);
35 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
36 int dst_stride_argb = (b * 2 + dst_width) * 4;
38 align_buffer_64(dst_argb_c, dst_argb_plane_size);
39 align_buffer_64(dst_argb_opt, dst_argb_plane_size);
40 memset(dst_argb_c, 2, dst_argb_plane_size);
41 memset(dst_argb_opt, 3, dst_argb_plane_size);
43 // Warm up both versions for consistent benchmarks.
44 MaskCpuFlags(0); // Disable all CPU optimization.
45 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
46 src_width, src_height,
47 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
48 dst_width, dst_height, f);
49 MaskCpuFlags(-1); // Enable all CPU optimization.
50 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
51 src_width, src_height,
52 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
53 dst_width, dst_height, f);
55 MaskCpuFlags(0); // Disable all CPU optimization.
56 double c_time = get_time();
57 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
58 src_width, src_height,
59 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
60 dst_width, dst_height, f);
62 c_time = (get_time() - c_time);
64 MaskCpuFlags(-1); // Enable all CPU optimization.
65 double opt_time = get_time();
66 for (i = 0; i < benchmark_iterations; ++i) {
67 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
68 src_width, src_height,
69 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
70 dst_width, dst_height, f);
71 }
72 opt_time = (get_time() - opt_time) / benchmark_iterations;
74 // Report performance of C vs OPT
75 printf("filter %d - %8d us C - %8d us OPT\n",
76 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
78 // C version may be a little off from the optimized. Order of
79 // operations may introduce rounding somewhere. So do a difference
80 // of the buffers and look to see that the max difference isn't
81 // over 2.
82 int max_diff = 0;
83 for (i = b; i < (dst_height + b); ++i) {
84 for (j = b * 4; j < (dst_width + b) * 4; ++j) {
85 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
86 dst_argb_opt[(i * dst_stride_argb) + j]);
87 if (abs_diff > max_diff) {
88 max_diff = abs_diff;
89 }
90 }
91 }
93 free_aligned_buffer_64(dst_argb_c);
94 free_aligned_buffer_64(dst_argb_opt);
95 free_aligned_buffer_64(src_argb);
96 return max_diff;
97 }
99 static const int kTileX = 8;
100 static const int kTileY = 8;
102 static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
103 int src_width, int src_height,
104 uint8* dst_argb, int dst_stride_argb,
105 int dst_width, int dst_height,
106 FilterMode filtering) {
107 for (int y = 0; y < dst_height; y += kTileY) {
108 for (int x = 0; x < dst_width; x += kTileX) {
109 int clip_width = kTileX;
110 if (x + clip_width > dst_width) {
111 clip_width = dst_width - x;
112 }
113 int clip_height = kTileY;
114 if (y + clip_height > dst_height) {
115 clip_height = dst_height - y;
116 }
117 int r = ARGBScaleClip(src_argb, src_stride_argb,
118 src_width, src_height,
119 dst_argb, dst_stride_argb,
120 dst_width, dst_height,
121 x, y, clip_width, clip_height, filtering);
122 if (r) {
123 return r;
124 }
125 }
126 }
127 return 0;
128 }
130 static int ARGBClipTestFilter(int src_width, int src_height,
131 int dst_width, int dst_height,
132 FilterMode f, int benchmark_iterations) {
133 const int b = 128;
134 int src_argb_plane_size = (Abs(src_width) + b * 2) *
135 (Abs(src_height) + b * 2) * 4;
136 int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
138 align_buffer_64(src_argb, src_argb_plane_size);
139 memset(src_argb, 1, src_argb_plane_size);
141 int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
142 int dst_stride_argb = (b * 2 + dst_width) * 4;
144 srandom(time(NULL));
146 int i, j;
147 for (i = b; i < (Abs(src_height) + b); ++i) {
148 for (j = b; j < (Abs(src_width) + b) * 4; ++j) {
149 src_argb[(i * src_stride_argb) + j] = (random() & 0xff);
150 }
151 }
153 align_buffer_64(dst_argb_c, dst_argb_plane_size);
154 align_buffer_64(dst_argb_opt, dst_argb_plane_size);
155 memset(dst_argb_c, 2, dst_argb_plane_size);
156 memset(dst_argb_opt, 3, dst_argb_plane_size);
158 // Do full image, no clipping.
159 double c_time = get_time();
160 ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
161 src_width, src_height,
162 dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
163 dst_width, dst_height, f);
164 c_time = (get_time() - c_time);
166 // Do tiled image, clipping scale to a tile at a time.
167 double opt_time = get_time();
168 for (i = 0; i < benchmark_iterations; ++i) {
169 TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
170 src_width, src_height,
171 dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
172 dst_width, dst_height, f);
173 }
174 opt_time = (get_time() - opt_time) / benchmark_iterations;
176 // Report performance of Full vs Tiled.
177 printf("filter %d - %8d us Full - %8d us Tiled\n",
178 f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
180 // Compare full scaled image vs tiled image.
181 int max_diff = 0;
182 for (i = b; i < (dst_height + b); ++i) {
183 for (j = b * 4; j < (dst_width + b) * 4; ++j) {
184 int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
185 dst_argb_opt[(i * dst_stride_argb) + j]);
186 if (abs_diff > max_diff) {
187 max_diff = abs_diff;
188 }
189 }
190 }
192 free_aligned_buffer_64(dst_argb_c);
193 free_aligned_buffer_64(dst_argb_opt);
194 free_aligned_buffer_64(src_argb);
195 return max_diff;
196 }
198 #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \
199 TEST_F(libyuvTest, ARGBScaleDownBy##name##_##filter) { \
200 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \
201 Abs(benchmark_width_) * hfactor, \
202 Abs(benchmark_height_) * vfactor, \
203 kFilter##filter, benchmark_iterations_); \
204 EXPECT_LE(diff, max_diff); \
205 } \
206 TEST_F(libyuvTest, ARGBScaleDownClipBy##name##_##filter) { \
207 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \
208 Abs(benchmark_width_) * hfactor, \
209 Abs(benchmark_height_) * vfactor, \
210 kFilter##filter, benchmark_iterations_); \
211 EXPECT_LE(diff, max_diff); \
212 }
214 // Test a scale factor with 2 filters. Expect unfiltered to be exact, but
215 // filtering is different fixed point implementations for SSSE3, Neon and C.
216 #define TEST_FACTOR(name, hfactor, vfactor) \
217 TEST_FACTOR1(name, None, hfactor, vfactor, 2) \
218 TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \
219 TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \
220 TEST_FACTOR1(name, Box, hfactor, vfactor, 2)
222 TEST_FACTOR(2, 1 / 2, 1 / 2)
223 TEST_FACTOR(4, 1 / 4, 1 / 4)
224 TEST_FACTOR(8, 1 / 8, 1 / 8)
225 TEST_FACTOR(3by4, 3 / 4, 3 / 4)
226 #undef TEST_FACTOR1
227 #undef TEST_FACTOR
229 #define TEST_SCALETO1(name, width, height, filter, max_diff) \
230 TEST_F(libyuvTest, name##To##width##x##height##_##filter) { \
231 int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \
232 width, height, \
233 kFilter##filter, benchmark_iterations_); \
234 EXPECT_LE(diff, max_diff); \
235 } \
236 TEST_F(libyuvTest, name##From##width##x##height##_##filter) { \
237 int diff = ARGBTestFilter(width, height, \
238 Abs(benchmark_width_), Abs(benchmark_height_), \
239 kFilter##filter, benchmark_iterations_); \
240 EXPECT_LE(diff, max_diff); \
241 } \
242 TEST_F(libyuvTest, name##ClipTo##width##x##height##_##filter) { \
243 int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \
244 width, height, \
245 kFilter##filter, benchmark_iterations_); \
246 EXPECT_LE(diff, max_diff); \
247 } \
248 TEST_F(libyuvTest, name##ClipFrom##width##x##height##_##filter) { \
249 int diff = ARGBClipTestFilter(width, height, \
250 Abs(benchmark_width_), Abs(benchmark_height_), \
251 kFilter##filter, benchmark_iterations_); \
252 EXPECT_LE(diff, max_diff); \
253 }
255 /// Test scale to a specified size with all 4 filters.
256 #define TEST_SCALETO(name, width, height) \
257 TEST_SCALETO1(name, width, height, None, 0) \
258 TEST_SCALETO1(name, width, height, Linear, 3) \
259 TEST_SCALETO1(name, width, height, Bilinear, 3) \
260 TEST_SCALETO1(name, width, height, Box, 3)
262 TEST_SCALETO(ARGBScale, 1, 1)
263 TEST_SCALETO(ARGBScale, 320, 240)
264 TEST_SCALETO(ARGBScale, 352, 288)
265 TEST_SCALETO(ARGBScale, 640, 360)
266 TEST_SCALETO(ARGBScale, 1280, 720)
267 #undef TEST_SCALETO1
268 #undef TEST_SCALETO
270 } // namespace libyuv