Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include <stdlib.h> |
michael@0 | 12 | #include <time.h> |
michael@0 | 13 | |
michael@0 | 14 | #include "libyuv/cpu_id.h" |
michael@0 | 15 | #include "libyuv/scale_argb.h" |
michael@0 | 16 | #include "libyuv/row.h" |
michael@0 | 17 | #include "../unit_test/unit_test.h" |
michael@0 | 18 | |
michael@0 | 19 | namespace libyuv { |
michael@0 | 20 | |
michael@0 | 21 | // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. |
michael@0 | 22 | static int ARGBTestFilter(int src_width, int src_height, |
michael@0 | 23 | int dst_width, int dst_height, |
michael@0 | 24 | FilterMode f, int benchmark_iterations) { |
michael@0 | 25 | const int b = 128; |
michael@0 | 26 | int i, j; |
michael@0 | 27 | int src_argb_plane_size = (Abs(src_width) + b * 2) * |
michael@0 | 28 | (Abs(src_height) + b * 2) * 4; |
michael@0 | 29 | int src_stride_argb = (b * 2 + Abs(src_width)) * 4; |
michael@0 | 30 | |
michael@0 | 31 | align_buffer_64(src_argb, src_argb_plane_size); |
michael@0 | 32 | srandom(time(NULL)); |
michael@0 | 33 | MemRandomize(src_argb, src_argb_plane_size); |
michael@0 | 34 | |
michael@0 | 35 | int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; |
michael@0 | 36 | int dst_stride_argb = (b * 2 + dst_width) * 4; |
michael@0 | 37 | |
michael@0 | 38 | align_buffer_64(dst_argb_c, dst_argb_plane_size); |
michael@0 | 39 | align_buffer_64(dst_argb_opt, dst_argb_plane_size); |
michael@0 | 40 | memset(dst_argb_c, 2, dst_argb_plane_size); |
michael@0 | 41 | memset(dst_argb_opt, 3, dst_argb_plane_size); |
michael@0 | 42 | |
michael@0 | 43 | // Warm up both versions for consistent benchmarks. |
michael@0 | 44 | MaskCpuFlags(0); // Disable all CPU optimization. |
michael@0 | 45 | ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 46 | src_width, src_height, |
michael@0 | 47 | dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 48 | dst_width, dst_height, f); |
michael@0 | 49 | MaskCpuFlags(-1); // Enable all CPU optimization. |
michael@0 | 50 | ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 51 | src_width, src_height, |
michael@0 | 52 | dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 53 | dst_width, dst_height, f); |
michael@0 | 54 | |
michael@0 | 55 | MaskCpuFlags(0); // Disable all CPU optimization. |
michael@0 | 56 | double c_time = get_time(); |
michael@0 | 57 | ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 58 | src_width, src_height, |
michael@0 | 59 | dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 60 | dst_width, dst_height, f); |
michael@0 | 61 | |
michael@0 | 62 | c_time = (get_time() - c_time); |
michael@0 | 63 | |
michael@0 | 64 | MaskCpuFlags(-1); // Enable all CPU optimization. |
michael@0 | 65 | double opt_time = get_time(); |
michael@0 | 66 | for (i = 0; i < benchmark_iterations; ++i) { |
michael@0 | 67 | ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 68 | src_width, src_height, |
michael@0 | 69 | dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 70 | dst_width, dst_height, f); |
michael@0 | 71 | } |
michael@0 | 72 | opt_time = (get_time() - opt_time) / benchmark_iterations; |
michael@0 | 73 | |
michael@0 | 74 | // Report performance of C vs OPT |
michael@0 | 75 | printf("filter %d - %8d us C - %8d us OPT\n", |
michael@0 | 76 | f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); |
michael@0 | 77 | |
michael@0 | 78 | // C version may be a little off from the optimized. Order of |
michael@0 | 79 | // operations may introduce rounding somewhere. So do a difference |
michael@0 | 80 | // of the buffers and look to see that the max difference isn't |
michael@0 | 81 | // over 2. |
michael@0 | 82 | int max_diff = 0; |
michael@0 | 83 | for (i = b; i < (dst_height + b); ++i) { |
michael@0 | 84 | for (j = b * 4; j < (dst_width + b) * 4; ++j) { |
michael@0 | 85 | int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - |
michael@0 | 86 | dst_argb_opt[(i * dst_stride_argb) + j]); |
michael@0 | 87 | if (abs_diff > max_diff) { |
michael@0 | 88 | max_diff = abs_diff; |
michael@0 | 89 | } |
michael@0 | 90 | } |
michael@0 | 91 | } |
michael@0 | 92 | |
michael@0 | 93 | free_aligned_buffer_64(dst_argb_c); |
michael@0 | 94 | free_aligned_buffer_64(dst_argb_opt); |
michael@0 | 95 | free_aligned_buffer_64(src_argb); |
michael@0 | 96 | return max_diff; |
michael@0 | 97 | } |
michael@0 | 98 | |
michael@0 | 99 | static const int kTileX = 8; |
michael@0 | 100 | static const int kTileY = 8; |
michael@0 | 101 | |
michael@0 | 102 | static int TileARGBScale(const uint8* src_argb, int src_stride_argb, |
michael@0 | 103 | int src_width, int src_height, |
michael@0 | 104 | uint8* dst_argb, int dst_stride_argb, |
michael@0 | 105 | int dst_width, int dst_height, |
michael@0 | 106 | FilterMode filtering) { |
michael@0 | 107 | for (int y = 0; y < dst_height; y += kTileY) { |
michael@0 | 108 | for (int x = 0; x < dst_width; x += kTileX) { |
michael@0 | 109 | int clip_width = kTileX; |
michael@0 | 110 | if (x + clip_width > dst_width) { |
michael@0 | 111 | clip_width = dst_width - x; |
michael@0 | 112 | } |
michael@0 | 113 | int clip_height = kTileY; |
michael@0 | 114 | if (y + clip_height > dst_height) { |
michael@0 | 115 | clip_height = dst_height - y; |
michael@0 | 116 | } |
michael@0 | 117 | int r = ARGBScaleClip(src_argb, src_stride_argb, |
michael@0 | 118 | src_width, src_height, |
michael@0 | 119 | dst_argb, dst_stride_argb, |
michael@0 | 120 | dst_width, dst_height, |
michael@0 | 121 | x, y, clip_width, clip_height, filtering); |
michael@0 | 122 | if (r) { |
michael@0 | 123 | return r; |
michael@0 | 124 | } |
michael@0 | 125 | } |
michael@0 | 126 | } |
michael@0 | 127 | return 0; |
michael@0 | 128 | } |
michael@0 | 129 | |
michael@0 | 130 | static int ARGBClipTestFilter(int src_width, int src_height, |
michael@0 | 131 | int dst_width, int dst_height, |
michael@0 | 132 | FilterMode f, int benchmark_iterations) { |
michael@0 | 133 | const int b = 128; |
michael@0 | 134 | int src_argb_plane_size = (Abs(src_width) + b * 2) * |
michael@0 | 135 | (Abs(src_height) + b * 2) * 4; |
michael@0 | 136 | int src_stride_argb = (b * 2 + Abs(src_width)) * 4; |
michael@0 | 137 | |
michael@0 | 138 | align_buffer_64(src_argb, src_argb_plane_size); |
michael@0 | 139 | memset(src_argb, 1, src_argb_plane_size); |
michael@0 | 140 | |
michael@0 | 141 | int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4; |
michael@0 | 142 | int dst_stride_argb = (b * 2 + dst_width) * 4; |
michael@0 | 143 | |
michael@0 | 144 | srandom(time(NULL)); |
michael@0 | 145 | |
michael@0 | 146 | int i, j; |
michael@0 | 147 | for (i = b; i < (Abs(src_height) + b); ++i) { |
michael@0 | 148 | for (j = b; j < (Abs(src_width) + b) * 4; ++j) { |
michael@0 | 149 | src_argb[(i * src_stride_argb) + j] = (random() & 0xff); |
michael@0 | 150 | } |
michael@0 | 151 | } |
michael@0 | 152 | |
michael@0 | 153 | align_buffer_64(dst_argb_c, dst_argb_plane_size); |
michael@0 | 154 | align_buffer_64(dst_argb_opt, dst_argb_plane_size); |
michael@0 | 155 | memset(dst_argb_c, 2, dst_argb_plane_size); |
michael@0 | 156 | memset(dst_argb_opt, 3, dst_argb_plane_size); |
michael@0 | 157 | |
michael@0 | 158 | // Do full image, no clipping. |
michael@0 | 159 | double c_time = get_time(); |
michael@0 | 160 | ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 161 | src_width, src_height, |
michael@0 | 162 | dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 163 | dst_width, dst_height, f); |
michael@0 | 164 | c_time = (get_time() - c_time); |
michael@0 | 165 | |
michael@0 | 166 | // Do tiled image, clipping scale to a tile at a time. |
michael@0 | 167 | double opt_time = get_time(); |
michael@0 | 168 | for (i = 0; i < benchmark_iterations; ++i) { |
michael@0 | 169 | TileARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb, |
michael@0 | 170 | src_width, src_height, |
michael@0 | 171 | dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb, |
michael@0 | 172 | dst_width, dst_height, f); |
michael@0 | 173 | } |
michael@0 | 174 | opt_time = (get_time() - opt_time) / benchmark_iterations; |
michael@0 | 175 | |
michael@0 | 176 | // Report performance of Full vs Tiled. |
michael@0 | 177 | printf("filter %d - %8d us Full - %8d us Tiled\n", |
michael@0 | 178 | f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6)); |
michael@0 | 179 | |
michael@0 | 180 | // Compare full scaled image vs tiled image. |
michael@0 | 181 | int max_diff = 0; |
michael@0 | 182 | for (i = b; i < (dst_height + b); ++i) { |
michael@0 | 183 | for (j = b * 4; j < (dst_width + b) * 4; ++j) { |
michael@0 | 184 | int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - |
michael@0 | 185 | dst_argb_opt[(i * dst_stride_argb) + j]); |
michael@0 | 186 | if (abs_diff > max_diff) { |
michael@0 | 187 | max_diff = abs_diff; |
michael@0 | 188 | } |
michael@0 | 189 | } |
michael@0 | 190 | } |
michael@0 | 191 | |
michael@0 | 192 | free_aligned_buffer_64(dst_argb_c); |
michael@0 | 193 | free_aligned_buffer_64(dst_argb_opt); |
michael@0 | 194 | free_aligned_buffer_64(src_argb); |
michael@0 | 195 | return max_diff; |
michael@0 | 196 | } |
michael@0 | 197 | |
michael@0 | 198 | #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ |
michael@0 | 199 | TEST_F(libyuvTest, ARGBScaleDownBy##name##_##filter) { \ |
michael@0 | 200 | int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 201 | Abs(benchmark_width_) * hfactor, \ |
michael@0 | 202 | Abs(benchmark_height_) * vfactor, \ |
michael@0 | 203 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 204 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 205 | } \ |
michael@0 | 206 | TEST_F(libyuvTest, ARGBScaleDownClipBy##name##_##filter) { \ |
michael@0 | 207 | int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 208 | Abs(benchmark_width_) * hfactor, \ |
michael@0 | 209 | Abs(benchmark_height_) * vfactor, \ |
michael@0 | 210 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 211 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 212 | } |
michael@0 | 213 | |
michael@0 | 214 | // Test a scale factor with 2 filters. Expect unfiltered to be exact, but |
michael@0 | 215 | // filtering is different fixed point implementations for SSSE3, Neon and C. |
michael@0 | 216 | #define TEST_FACTOR(name, hfactor, vfactor) \ |
michael@0 | 217 | TEST_FACTOR1(name, None, hfactor, vfactor, 2) \ |
michael@0 | 218 | TEST_FACTOR1(name, Linear, hfactor, vfactor, 2) \ |
michael@0 | 219 | TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2) \ |
michael@0 | 220 | TEST_FACTOR1(name, Box, hfactor, vfactor, 2) |
michael@0 | 221 | |
michael@0 | 222 | TEST_FACTOR(2, 1 / 2, 1 / 2) |
michael@0 | 223 | TEST_FACTOR(4, 1 / 4, 1 / 4) |
michael@0 | 224 | TEST_FACTOR(8, 1 / 8, 1 / 8) |
michael@0 | 225 | TEST_FACTOR(3by4, 3 / 4, 3 / 4) |
michael@0 | 226 | #undef TEST_FACTOR1 |
michael@0 | 227 | #undef TEST_FACTOR |
michael@0 | 228 | |
michael@0 | 229 | #define TEST_SCALETO1(name, width, height, filter, max_diff) \ |
michael@0 | 230 | TEST_F(libyuvTest, name##To##width##x##height##_##filter) { \ |
michael@0 | 231 | int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 232 | width, height, \ |
michael@0 | 233 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 234 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 235 | } \ |
michael@0 | 236 | TEST_F(libyuvTest, name##From##width##x##height##_##filter) { \ |
michael@0 | 237 | int diff = ARGBTestFilter(width, height, \ |
michael@0 | 238 | Abs(benchmark_width_), Abs(benchmark_height_), \ |
michael@0 | 239 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 240 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 241 | } \ |
michael@0 | 242 | TEST_F(libyuvTest, name##ClipTo##width##x##height##_##filter) { \ |
michael@0 | 243 | int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 244 | width, height, \ |
michael@0 | 245 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 246 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 247 | } \ |
michael@0 | 248 | TEST_F(libyuvTest, name##ClipFrom##width##x##height##_##filter) { \ |
michael@0 | 249 | int diff = ARGBClipTestFilter(width, height, \ |
michael@0 | 250 | Abs(benchmark_width_), Abs(benchmark_height_), \ |
michael@0 | 251 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 252 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | /// Test scale to a specified size with all 4 filters. |
michael@0 | 256 | #define TEST_SCALETO(name, width, height) \ |
michael@0 | 257 | TEST_SCALETO1(name, width, height, None, 0) \ |
michael@0 | 258 | TEST_SCALETO1(name, width, height, Linear, 3) \ |
michael@0 | 259 | TEST_SCALETO1(name, width, height, Bilinear, 3) \ |
michael@0 | 260 | TEST_SCALETO1(name, width, height, Box, 3) |
michael@0 | 261 | |
michael@0 | 262 | TEST_SCALETO(ARGBScale, 1, 1) |
michael@0 | 263 | TEST_SCALETO(ARGBScale, 320, 240) |
michael@0 | 264 | TEST_SCALETO(ARGBScale, 352, 288) |
michael@0 | 265 | TEST_SCALETO(ARGBScale, 640, 360) |
michael@0 | 266 | TEST_SCALETO(ARGBScale, 1280, 720) |
michael@0 | 267 | #undef TEST_SCALETO1 |
michael@0 | 268 | #undef TEST_SCALETO |
michael@0 | 269 | |
michael@0 | 270 | } // namespace libyuv |