Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include <stdlib.h> |
michael@0 | 12 | #include <time.h> |
michael@0 | 13 | |
michael@0 | 14 | #include "libyuv/cpu_id.h" |
michael@0 | 15 | #include "libyuv/scale.h" |
michael@0 | 16 | #include "../unit_test/unit_test.h" |
michael@0 | 17 | |
michael@0 | 18 | namespace libyuv { |
michael@0 | 19 | |
michael@0 | 20 | // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. |
michael@0 | 21 | static int TestFilter(int src_width, int src_height, |
michael@0 | 22 | int dst_width, int dst_height, |
michael@0 | 23 | FilterMode f, int benchmark_iterations) { |
michael@0 | 24 | int i, j; |
michael@0 | 25 | const int b = 128; |
michael@0 | 26 | int src_width_uv = (Abs(src_width) + 1) >> 1; |
michael@0 | 27 | int src_height_uv = (Abs(src_height) + 1) >> 1; |
michael@0 | 28 | |
michael@0 | 29 | int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2); |
michael@0 | 30 | int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2); |
michael@0 | 31 | |
michael@0 | 32 | int src_stride_y = b * 2 + Abs(src_width); |
michael@0 | 33 | int src_stride_uv = b * 2 + src_width_uv; |
michael@0 | 34 | |
michael@0 | 35 | align_buffer_page_end(src_y, src_y_plane_size) |
michael@0 | 36 | align_buffer_page_end(src_u, src_uv_plane_size) |
michael@0 | 37 | align_buffer_page_end(src_v, src_uv_plane_size) |
michael@0 | 38 | srandom(time(NULL)); |
michael@0 | 39 | MemRandomize(src_y, src_y_plane_size); |
michael@0 | 40 | MemRandomize(src_u, src_uv_plane_size); |
michael@0 | 41 | MemRandomize(src_v, src_uv_plane_size); |
michael@0 | 42 | |
michael@0 | 43 | int dst_width_uv = (dst_width + 1) >> 1; |
michael@0 | 44 | int dst_height_uv = (dst_height + 1) >> 1; |
michael@0 | 45 | |
michael@0 | 46 | int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2); |
michael@0 | 47 | int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2); |
michael@0 | 48 | |
michael@0 | 49 | int dst_stride_y = b * 2 + dst_width; |
michael@0 | 50 | int dst_stride_uv = b * 2 + dst_width_uv; |
michael@0 | 51 | |
michael@0 | 52 | align_buffer_page_end(dst_y_c, dst_y_plane_size) |
michael@0 | 53 | align_buffer_page_end(dst_u_c, dst_uv_plane_size) |
michael@0 | 54 | align_buffer_page_end(dst_v_c, dst_uv_plane_size) |
michael@0 | 55 | align_buffer_page_end(dst_y_opt, dst_y_plane_size) |
michael@0 | 56 | align_buffer_page_end(dst_u_opt, dst_uv_plane_size) |
michael@0 | 57 | align_buffer_page_end(dst_v_opt, dst_uv_plane_size) |
michael@0 | 58 | |
michael@0 | 59 | |
michael@0 | 60 | MaskCpuFlags(0); // Disable all CPU optimization. |
michael@0 | 61 | double c_time = get_time(); |
michael@0 | 62 | I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, |
michael@0 | 63 | src_u + (src_stride_uv * b) + b, src_stride_uv, |
michael@0 | 64 | src_v + (src_stride_uv * b) + b, src_stride_uv, |
michael@0 | 65 | src_width, src_height, |
michael@0 | 66 | dst_y_c + (dst_stride_y * b) + b, dst_stride_y, |
michael@0 | 67 | dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv, |
michael@0 | 68 | dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, |
michael@0 | 69 | dst_width, dst_height, f); |
michael@0 | 70 | c_time = (get_time() - c_time); |
michael@0 | 71 | |
michael@0 | 72 | MaskCpuFlags(-1); // Enable all CPU optimization. |
michael@0 | 73 | double opt_time = get_time(); |
michael@0 | 74 | for (i = 0; i < benchmark_iterations; ++i) { |
michael@0 | 75 | I420Scale(src_y + (src_stride_y * b) + b, src_stride_y, |
michael@0 | 76 | src_u + (src_stride_uv * b) + b, src_stride_uv, |
michael@0 | 77 | src_v + (src_stride_uv * b) + b, src_stride_uv, |
michael@0 | 78 | src_width, src_height, |
michael@0 | 79 | dst_y_opt + (dst_stride_y * b) + b, dst_stride_y, |
michael@0 | 80 | dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv, |
michael@0 | 81 | dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, |
michael@0 | 82 | dst_width, dst_height, f); |
michael@0 | 83 | } |
michael@0 | 84 | opt_time = (get_time() - opt_time) / benchmark_iterations; |
michael@0 | 85 | // Report performance of C vs OPT |
michael@0 | 86 | printf("filter %d - %8d us C - %8d us OPT\n", |
michael@0 | 87 | f, |
michael@0 | 88 | static_cast<int>(c_time * 1e6), |
michael@0 | 89 | static_cast<int>(opt_time * 1e6)); |
michael@0 | 90 | |
michael@0 | 91 | // C version may be a little off from the optimized. Order of |
michael@0 | 92 | // operations may introduce rounding somewhere. So do a difference |
michael@0 | 93 | // of the buffers and look to see that the max difference isn't |
michael@0 | 94 | // over 2. |
michael@0 | 95 | int max_diff = 0; |
michael@0 | 96 | for (i = b; i < (dst_height + b); ++i) { |
michael@0 | 97 | for (j = b; j < (dst_width + b); ++j) { |
michael@0 | 98 | int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] - |
michael@0 | 99 | dst_y_opt[(i * dst_stride_y) + j]); |
michael@0 | 100 | if (abs_diff > max_diff) { |
michael@0 | 101 | max_diff = abs_diff; |
michael@0 | 102 | } |
michael@0 | 103 | } |
michael@0 | 104 | } |
michael@0 | 105 | |
michael@0 | 106 | for (i = b; i < (dst_height_uv + b); ++i) { |
michael@0 | 107 | for (j = b; j < (dst_width_uv + b); ++j) { |
michael@0 | 108 | int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] - |
michael@0 | 109 | dst_u_opt[(i * dst_stride_uv) + j]); |
michael@0 | 110 | if (abs_diff > max_diff) { |
michael@0 | 111 | max_diff = abs_diff; |
michael@0 | 112 | } |
michael@0 | 113 | abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] - |
michael@0 | 114 | dst_v_opt[(i * dst_stride_uv) + j]); |
michael@0 | 115 | if (abs_diff > max_diff) { |
michael@0 | 116 | max_diff = abs_diff; |
michael@0 | 117 | } |
michael@0 | 118 | } |
michael@0 | 119 | } |
michael@0 | 120 | |
michael@0 | 121 | free_aligned_buffer_page_end(dst_y_c) |
michael@0 | 122 | free_aligned_buffer_page_end(dst_u_c) |
michael@0 | 123 | free_aligned_buffer_page_end(dst_v_c) |
michael@0 | 124 | free_aligned_buffer_page_end(dst_y_opt) |
michael@0 | 125 | free_aligned_buffer_page_end(dst_u_opt) |
michael@0 | 126 | free_aligned_buffer_page_end(dst_v_opt) |
michael@0 | 127 | |
michael@0 | 128 | free_aligned_buffer_page_end(src_y) |
michael@0 | 129 | free_aligned_buffer_page_end(src_u) |
michael@0 | 130 | free_aligned_buffer_page_end(src_v) |
michael@0 | 131 | |
michael@0 | 132 | return max_diff; |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ |
michael@0 | 136 | TEST_F(libyuvTest, ScaleDownBy##name##_##filter) { \ |
michael@0 | 137 | int diff = TestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 138 | Abs(benchmark_width_) * hfactor, \ |
michael@0 | 139 | Abs(benchmark_height_) * vfactor, \ |
michael@0 | 140 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 141 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 142 | } |
michael@0 | 143 | |
michael@0 | 144 | // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but |
michael@0 | 145 | // filtering is different fixed point implementations for SSSE3, Neon and C. |
michael@0 | 146 | #define TEST_FACTOR(name, hfactor, vfactor) \ |
michael@0 | 147 | TEST_FACTOR1(name, None, hfactor, vfactor, 0) \ |
michael@0 | 148 | TEST_FACTOR1(name, Linear, hfactor, vfactor, 3) \ |
michael@0 | 149 | TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 3) \ |
michael@0 | 150 | TEST_FACTOR1(name, Box, hfactor, vfactor, 3) \ |
michael@0 | 151 | |
michael@0 | 152 | TEST_FACTOR(2, 1 / 2, 1 / 2) |
michael@0 | 153 | TEST_FACTOR(4, 1 / 4, 1 / 4) |
michael@0 | 154 | TEST_FACTOR(8, 1 / 8, 1 / 8) |
michael@0 | 155 | TEST_FACTOR(3by4, 3 / 4, 3 / 4) |
michael@0 | 156 | #undef TEST_FACTOR1 |
michael@0 | 157 | #undef TEST_FACTOR |
michael@0 | 158 | |
michael@0 | 159 | #define TEST_SCALETO1(name, width, height, filter, max_diff) \ |
michael@0 | 160 | TEST_F(libyuvTest, name##To##width##x##height##_##filter) { \ |
michael@0 | 161 | int diff = TestFilter(benchmark_width_, benchmark_height_, \ |
michael@0 | 162 | width, height, \ |
michael@0 | 163 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 164 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 165 | } \ |
michael@0 | 166 | TEST_F(libyuvTest, name##From##width##x##height##_##filter) { \ |
michael@0 | 167 | int diff = TestFilter(width, height, \ |
michael@0 | 168 | Abs(benchmark_width_), Abs(benchmark_height_), \ |
michael@0 | 169 | kFilter##filter, benchmark_iterations_); \ |
michael@0 | 170 | EXPECT_LE(diff, max_diff); \ |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | // Test scale to a specified size with all 4 filters. |
michael@0 | 174 | #define TEST_SCALETO(name, width, height) \ |
michael@0 | 175 | TEST_SCALETO1(name, width, height, None, 0) \ |
michael@0 | 176 | TEST_SCALETO1(name, width, height, Linear, 3) \ |
michael@0 | 177 | TEST_SCALETO1(name, width, height, Bilinear, 3) \ |
michael@0 | 178 | TEST_SCALETO1(name, width, height, Box, 3) |
michael@0 | 179 | |
michael@0 | 180 | TEST_SCALETO(Scale, 1, 1) |
michael@0 | 181 | TEST_SCALETO(Scale, 320, 240) |
michael@0 | 182 | TEST_SCALETO(Scale, 352, 288) |
michael@0 | 183 | TEST_SCALETO(Scale, 640, 360) |
michael@0 | 184 | TEST_SCALETO(Scale, 1280, 720) |
michael@0 | 185 | #undef TEST_SCALETO1 |
michael@0 | 186 | #undef TEST_SCALETO |
michael@0 | 187 | |
michael@0 | 188 | } // namespace libyuv |