Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 2009 The Android Open Source Project |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license that can be |
michael@0 | 5 | * found in the LICENSE file. |
michael@0 | 6 | */ |
michael@0 | 7 | |
michael@0 | 8 | #include "SkBitmapProcState_opts_SSE2.h" |
michael@0 | 9 | #include "SkBitmapProcState_opts_SSSE3.h" |
michael@0 | 10 | #include "SkBitmapFilter_opts_SSE2.h" |
michael@0 | 11 | #include "SkBlitMask.h" |
michael@0 | 12 | #include "SkBlitRow.h" |
michael@0 | 13 | #include "SkBlitRect_opts_SSE2.h" |
michael@0 | 14 | #include "SkBlitRow_opts_SSE2.h" |
michael@0 | 15 | #include "SkBlurImage_opts_SSE2.h" |
michael@0 | 16 | #include "SkUtils_opts_SSE2.h" |
michael@0 | 17 | #include "SkUtils.h" |
michael@0 | 18 | #include "SkMorphology_opts.h" |
michael@0 | 19 | #include "SkMorphology_opts_SSE2.h" |
michael@0 | 20 | |
michael@0 | 21 | #include "SkRTConf.h" |
michael@0 | 22 | |
michael@0 | 23 | #if defined(_MSC_VER) && defined(_WIN64) |
michael@0 | 24 | #include <intrin.h> |
michael@0 | 25 | #endif |
michael@0 | 26 | |
michael@0 | 27 | /* This file must *not* be compiled with -msse or -msse2, otherwise |
michael@0 | 28 | gcc may generate sse2 even for scalar ops (and thus give an invalid |
michael@0 | 29 | instruction on Pentium3 on the code below). Only files named *_SSE2.cpp |
michael@0 | 30 | in this directory should be compiled with -msse2. */ |
michael@0 | 31 | |
michael@0 | 32 | |
michael@0 | 33 | #ifdef _MSC_VER |
michael@0 | 34 | static inline void getcpuid(int info_type, int info[4]) { |
michael@0 | 35 | #if defined(_WIN64) |
michael@0 | 36 | __cpuid(info, info_type); |
michael@0 | 37 | #else |
michael@0 | 38 | __asm { |
michael@0 | 39 | mov eax, [info_type] |
michael@0 | 40 | cpuid |
michael@0 | 41 | mov edi, [info] |
michael@0 | 42 | mov [edi], eax |
michael@0 | 43 | mov [edi+4], ebx |
michael@0 | 44 | mov [edi+8], ecx |
michael@0 | 45 | mov [edi+12], edx |
michael@0 | 46 | } |
michael@0 | 47 | #endif |
michael@0 | 48 | } |
michael@0 | 49 | #else |
michael@0 | 50 | #if defined(__x86_64__) |
michael@0 | 51 | static inline void getcpuid(int info_type, int info[4]) { |
michael@0 | 52 | asm volatile ( |
michael@0 | 53 | "cpuid \n\t" |
michael@0 | 54 | : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) |
michael@0 | 55 | : "a"(info_type) |
michael@0 | 56 | ); |
michael@0 | 57 | } |
michael@0 | 58 | #else |
michael@0 | 59 | static inline void getcpuid(int info_type, int info[4]) { |
michael@0 | 60 | // We save and restore ebx, so this code can be compatible with -fPIC |
michael@0 | 61 | asm volatile ( |
michael@0 | 62 | "pushl %%ebx \n\t" |
michael@0 | 63 | "cpuid \n\t" |
michael@0 | 64 | "movl %%ebx, %1 \n\t" |
michael@0 | 65 | "popl %%ebx \n\t" |
michael@0 | 66 | : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) |
michael@0 | 67 | : "a"(info_type) |
michael@0 | 68 | ); |
michael@0 | 69 | } |
michael@0 | 70 | #endif |
michael@0 | 71 | #endif |
michael@0 | 72 | |
michael@0 | 73 | #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
michael@0 | 74 | /* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ |
michael@0 | 75 | static inline bool hasSSE2() { |
michael@0 | 76 | return true; |
michael@0 | 77 | } |
michael@0 | 78 | #else |
michael@0 | 79 | |
michael@0 | 80 | static inline bool hasSSE2() { |
michael@0 | 81 | int cpu_info[4] = { 0 }; |
michael@0 | 82 | getcpuid(1, cpu_info); |
michael@0 | 83 | return (cpu_info[3] & (1<<26)) != 0; |
michael@0 | 84 | } |
michael@0 | 85 | #endif |
michael@0 | 86 | |
michael@0 | 87 | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
michael@0 | 88 | /* If we know SSSE3 is supported at compile time, don't even bother checking. */ |
michael@0 | 89 | static inline bool hasSSSE3() { |
michael@0 | 90 | return true; |
michael@0 | 91 | } |
michael@0 | 92 | #else |
michael@0 | 93 | |
michael@0 | 94 | static inline bool hasSSSE3() { |
michael@0 | 95 | int cpu_info[4] = { 0 }; |
michael@0 | 96 | getcpuid(1, cpu_info); |
michael@0 | 97 | return (cpu_info[2] & 0x200) != 0; |
michael@0 | 98 | } |
michael@0 | 99 | #endif |
michael@0 | 100 | |
michael@0 | 101 | static bool cachedHasSSE2() { |
michael@0 | 102 | static bool gHasSSE2 = hasSSE2(); |
michael@0 | 103 | return gHasSSE2; |
michael@0 | 104 | } |
michael@0 | 105 | |
michael@0 | 106 | static bool cachedHasSSSE3() { |
michael@0 | 107 | static bool gHasSSSE3 = hasSSSE3(); |
michael@0 | 108 | return gHasSSSE3; |
michael@0 | 109 | } |
michael@0 | 110 | |
michael@0 | 111 | SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); |
michael@0 | 112 | |
michael@0 | 113 | void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { |
michael@0 | 114 | if (cachedHasSSE2()) { |
michael@0 | 115 | procs->fExtraHorizontalReads = 3; |
michael@0 | 116 | procs->fConvolveVertically = &convolveVertically_SSE2; |
michael@0 | 117 | procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; |
michael@0 | 118 | procs->fConvolveHorizontally = &convolveHorizontally_SSE2; |
michael@0 | 119 | procs->fApplySIMDPadding = &applySIMDPadding_SSE2; |
michael@0 | 120 | } |
michael@0 | 121 | } |
michael@0 | 122 | |
michael@0 | 123 | void SkBitmapProcState::platformProcs() { |
michael@0 | 124 | if (cachedHasSSSE3()) { |
michael@0 | 125 | if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
michael@0 | 126 | fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; |
michael@0 | 127 | } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { |
michael@0 | 128 | fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; |
michael@0 | 129 | } |
michael@0 | 130 | |
michael@0 | 131 | if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { |
michael@0 | 132 | fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; |
michael@0 | 133 | } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { |
michael@0 | 134 | fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; |
michael@0 | 135 | } |
michael@0 | 136 | } else if (cachedHasSSE2()) { |
michael@0 | 137 | if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
michael@0 | 138 | fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; |
michael@0 | 139 | } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { |
michael@0 | 140 | fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; |
michael@0 | 141 | } |
michael@0 | 142 | |
michael@0 | 143 | if (fSampleProc16 == S32_D16_filter_DX) { |
michael@0 | 144 | fSampleProc16 = S32_D16_filter_DX_SSE2; |
michael@0 | 145 | } |
michael@0 | 146 | } |
michael@0 | 147 | |
michael@0 | 148 | if (cachedHasSSSE3() || cachedHasSSE2()) { |
michael@0 | 149 | if (fMatrixProc == ClampX_ClampY_filter_scale) { |
michael@0 | 150 | fMatrixProc = ClampX_ClampY_filter_scale_SSE2; |
michael@0 | 151 | } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { |
michael@0 | 152 | fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; |
michael@0 | 153 | } |
michael@0 | 154 | |
michael@0 | 155 | if (fMatrixProc == ClampX_ClampY_filter_affine) { |
michael@0 | 156 | fMatrixProc = ClampX_ClampY_filter_affine_SSE2; |
michael@0 | 157 | } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { |
michael@0 | 158 | fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; |
michael@0 | 159 | } |
michael@0 | 160 | if (c_hqfilter_sse) { |
michael@0 | 161 | if (fShaderProc32 == highQualityFilter32) { |
michael@0 | 162 | fShaderProc32 = highQualityFilter_SSE2; |
michael@0 | 163 | } |
michael@0 | 164 | } |
michael@0 | 165 | } |
michael@0 | 166 | } |
michael@0 | 167 | |
michael@0 | 168 | static SkBlitRow::Proc platform_16_procs[] = { |
michael@0 | 169 | S32_D565_Opaque_SSE2, // S32_D565_Opaque |
michael@0 | 170 | NULL, // S32_D565_Blend |
michael@0 | 171 | S32A_D565_Opaque_SSE2, // S32A_D565_Opaque |
michael@0 | 172 | NULL, // S32A_D565_Blend |
michael@0 | 173 | S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither |
michael@0 | 174 | NULL, // S32_D565_Blend_Dither |
michael@0 | 175 | S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither |
michael@0 | 176 | NULL, // S32A_D565_Blend_Dither |
michael@0 | 177 | }; |
michael@0 | 178 | |
michael@0 | 179 | static SkBlitRow::Proc32 platform_32_procs[] = { |
michael@0 | 180 | NULL, // S32_Opaque, |
michael@0 | 181 | S32_Blend_BlitRow32_SSE2, // S32_Blend, |
michael@0 | 182 | S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque |
michael@0 | 183 | S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
michael@0 | 184 | }; |
michael@0 | 185 | |
michael@0 | 186 | SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { |
michael@0 | 187 | if (cachedHasSSE2()) { |
michael@0 | 188 | return platform_16_procs[flags]; |
michael@0 | 189 | } else { |
michael@0 | 190 | return NULL; |
michael@0 | 191 | } |
michael@0 | 192 | } |
michael@0 | 193 | |
michael@0 | 194 | SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { |
michael@0 | 195 | if (cachedHasSSE2()) { |
michael@0 | 196 | return Color32_SSE2; |
michael@0 | 197 | } else { |
michael@0 | 198 | return NULL; |
michael@0 | 199 | } |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
michael@0 | 203 | if (cachedHasSSE2()) { |
michael@0 | 204 | return platform_32_procs[flags]; |
michael@0 | 205 | } else { |
michael@0 | 206 | return NULL; |
michael@0 | 207 | } |
michael@0 | 208 | } |
michael@0 | 209 | |
michael@0 | 210 | |
michael@0 | 211 | SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, |
michael@0 | 212 | SkMask::Format maskFormat, |
michael@0 | 213 | SkColor color) { |
michael@0 | 214 | if (SkMask::kA8_Format != maskFormat) { |
michael@0 | 215 | return NULL; |
michael@0 | 216 | } |
michael@0 | 217 | |
michael@0 | 218 | ColorProc proc = NULL; |
michael@0 | 219 | if (cachedHasSSE2()) { |
michael@0 | 220 | switch (dstConfig) { |
michael@0 | 221 | case SkBitmap::kARGB_8888_Config: |
michael@0 | 222 | // The SSE2 version is not (yet) faster for black, so we check |
michael@0 | 223 | // for that. |
michael@0 | 224 | if (SK_ColorBLACK != color) { |
michael@0 | 225 | proc = SkARGB32_A8_BlitMask_SSE2; |
michael@0 | 226 | } |
michael@0 | 227 | break; |
michael@0 | 228 | default: |
michael@0 | 229 | break; |
michael@0 | 230 | } |
michael@0 | 231 | } |
michael@0 | 232 | return proc; |
michael@0 | 233 | } |
michael@0 | 234 | |
michael@0 | 235 | SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { |
michael@0 | 236 | if (cachedHasSSE2()) { |
michael@0 | 237 | if (isOpaque) { |
michael@0 | 238 | return SkBlitLCD16OpaqueRow_SSE2; |
michael@0 | 239 | } else { |
michael@0 | 240 | return SkBlitLCD16Row_SSE2; |
michael@0 | 241 | } |
michael@0 | 242 | } else { |
michael@0 | 243 | return NULL; |
michael@0 | 244 | } |
michael@0 | 245 | |
michael@0 | 246 | } |
michael@0 | 247 | SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, |
michael@0 | 248 | SkMask::Format maskFormat, |
michael@0 | 249 | RowFlags flags) { |
michael@0 | 250 | return NULL; |
michael@0 | 251 | } |
michael@0 | 252 | |
michael@0 | 253 | SkMemset16Proc SkMemset16GetPlatformProc() { |
michael@0 | 254 | if (cachedHasSSE2()) { |
michael@0 | 255 | return sk_memset16_SSE2; |
michael@0 | 256 | } else { |
michael@0 | 257 | return NULL; |
michael@0 | 258 | } |
michael@0 | 259 | } |
michael@0 | 260 | |
michael@0 | 261 | SkMemset32Proc SkMemset32GetPlatformProc() { |
michael@0 | 262 | if (cachedHasSSE2()) { |
michael@0 | 263 | return sk_memset32_SSE2; |
michael@0 | 264 | } else { |
michael@0 | 265 | return NULL; |
michael@0 | 266 | } |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) { |
michael@0 | 270 | if (!cachedHasSSE2()) { |
michael@0 | 271 | return NULL; |
michael@0 | 272 | } |
michael@0 | 273 | switch (type) { |
michael@0 | 274 | case kDilateX_SkMorphologyProcType: |
michael@0 | 275 | return SkDilateX_SSE2; |
michael@0 | 276 | case kDilateY_SkMorphologyProcType: |
michael@0 | 277 | return SkDilateY_SSE2; |
michael@0 | 278 | case kErodeX_SkMorphologyProcType: |
michael@0 | 279 | return SkErodeX_SSE2; |
michael@0 | 280 | case kErodeY_SkMorphologyProcType: |
michael@0 | 281 | return SkErodeY_SSE2; |
michael@0 | 282 | default: |
michael@0 | 283 | return NULL; |
michael@0 | 284 | } |
michael@0 | 285 | } |
michael@0 | 286 | |
michael@0 | 287 | bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, |
michael@0 | 288 | SkBoxBlurProc* boxBlurY, |
michael@0 | 289 | SkBoxBlurProc* boxBlurXY, |
michael@0 | 290 | SkBoxBlurProc* boxBlurYX) { |
michael@0 | 291 | #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
michael@0 | 292 | return false; |
michael@0 | 293 | #else |
michael@0 | 294 | if (!cachedHasSSE2()) { |
michael@0 | 295 | return false; |
michael@0 | 296 | } |
michael@0 | 297 | return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX); |
michael@0 | 298 | #endif |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning |
michael@0 | 302 | |
michael@0 | 303 | SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { |
michael@0 | 304 | if (cachedHasSSE2()) { |
michael@0 | 305 | return ColorRect32_SSE2; |
michael@0 | 306 | } else { |
michael@0 | 307 | return NULL; |
michael@0 | 308 | } |
michael@0 | 309 | } |