1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/skia/trunk/src/opts/opts_check_SSE2.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,309 @@ 1.4 +/* 1.5 + * Copyright 2009 The Android Open Source Project 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license that can be 1.8 + * found in the LICENSE file. 1.9 + */ 1.10 + 1.11 +#include "SkBitmapProcState_opts_SSE2.h" 1.12 +#include "SkBitmapProcState_opts_SSSE3.h" 1.13 +#include "SkBitmapFilter_opts_SSE2.h" 1.14 +#include "SkBlitMask.h" 1.15 +#include "SkBlitRow.h" 1.16 +#include "SkBlitRect_opts_SSE2.h" 1.17 +#include "SkBlitRow_opts_SSE2.h" 1.18 +#include "SkBlurImage_opts_SSE2.h" 1.19 +#include "SkUtils_opts_SSE2.h" 1.20 +#include "SkUtils.h" 1.21 +#include "SkMorphology_opts.h" 1.22 +#include "SkMorphology_opts_SSE2.h" 1.23 + 1.24 +#include "SkRTConf.h" 1.25 + 1.26 +#if defined(_MSC_VER) && defined(_WIN64) 1.27 +#include <intrin.h> 1.28 +#endif 1.29 + 1.30 +/* This file must *not* be compiled with -msse or -msse2, otherwise 1.31 + gcc may generate sse2 even for scalar ops (and thus give an invalid 1.32 + instruction on Pentium3 on the code below). Only files named *_SSE2.cpp 1.33 + in this directory should be compiled with -msse2. */ 1.34 + 1.35 + 1.36 +#ifdef _MSC_VER 1.37 +static inline void getcpuid(int info_type, int info[4]) { 1.38 +#if defined(_WIN64) 1.39 + __cpuid(info, info_type); 1.40 +#else 1.41 + __asm { 1.42 + mov eax, [info_type] 1.43 + cpuid 1.44 + mov edi, [info] 1.45 + mov [edi], eax 1.46 + mov [edi+4], ebx 1.47 + mov [edi+8], ecx 1.48 + mov [edi+12], edx 1.49 + } 1.50 +#endif 1.51 +} 1.52 +#else 1.53 +#if defined(__x86_64__) 1.54 +static inline void getcpuid(int info_type, int info[4]) { 1.55 + asm volatile ( 1.56 + "cpuid \n\t" 1.57 + : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 1.58 + : "a"(info_type) 1.59 + ); 1.60 +} 1.61 +#else 1.62 +static inline void getcpuid(int info_type, int info[4]) { 1.63 + // We save and restore ebx, so this code can be compatible with -fPIC 1.64 + asm volatile ( 1.65 + "pushl %%ebx \n\t" 1.66 + "cpuid \n\t" 1.67 + "movl %%ebx, %1 \n\t" 1.68 + "popl %%ebx \n\t" 1.69 + : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 1.70 + : "a"(info_type) 1.71 + ); 1.72 +} 1.73 +#endif 1.74 +#endif 1.75 + 1.76 +#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 1.77 +/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ 1.78 +static inline bool hasSSE2() { 1.79 + return true; 1.80 +} 1.81 +#else 1.82 + 1.83 +static inline bool hasSSE2() { 1.84 + int cpu_info[4] = { 0 }; 1.85 + getcpuid(1, cpu_info); 1.86 + return (cpu_info[3] & (1<<26)) != 0; 1.87 +} 1.88 +#endif 1.89 + 1.90 +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 1.91 +/* If we know SSSE3 is supported at compile time, don't even bother checking. */ 1.92 +static inline bool hasSSSE3() { 1.93 + return true; 1.94 +} 1.95 +#else 1.96 + 1.97 +static inline bool hasSSSE3() { 1.98 + int cpu_info[4] = { 0 }; 1.99 + getcpuid(1, cpu_info); 1.100 + return (cpu_info[2] & 0x200) != 0; 1.101 +} 1.102 +#endif 1.103 + 1.104 +static bool cachedHasSSE2() { 1.105 + static bool gHasSSE2 = hasSSE2(); 1.106 + return gHasSSE2; 1.107 +} 1.108 + 1.109 +static bool cachedHasSSSE3() { 1.110 + static bool gHasSSSE3 = hasSSSE3(); 1.111 + return gHasSSSE3; 1.112 +} 1.113 + 1.114 +SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); 1.115 + 1.116 +void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { 1.117 + if (cachedHasSSE2()) { 1.118 + procs->fExtraHorizontalReads = 3; 1.119 + procs->fConvolveVertically = &convolveVertically_SSE2; 1.120 + procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; 1.121 + procs->fConvolveHorizontally = &convolveHorizontally_SSE2; 1.122 + procs->fApplySIMDPadding = &applySIMDPadding_SSE2; 1.123 + } 1.124 +} 1.125 + 1.126 +void SkBitmapProcState::platformProcs() { 1.127 + if (cachedHasSSSE3()) { 1.128 + if (fSampleProc32 == S32_opaque_D32_filter_DX) { 1.129 + fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 1.130 + } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 1.131 + fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 1.132 + } 1.133 + 1.134 + if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 1.135 + fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; 1.136 + } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { 1.137 + fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; 1.138 + } 1.139 + } else if (cachedHasSSE2()) { 1.140 + if (fSampleProc32 == S32_opaque_D32_filter_DX) { 1.141 + fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 1.142 + } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 1.143 + fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 1.144 + } 1.145 + 1.146 + if (fSampleProc16 == S32_D16_filter_DX) { 1.147 + fSampleProc16 = S32_D16_filter_DX_SSE2; 1.148 + } 1.149 + } 1.150 + 1.151 + if (cachedHasSSSE3() || cachedHasSSE2()) { 1.152 + if (fMatrixProc == ClampX_ClampY_filter_scale) { 1.153 + fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 1.154 + } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 1.155 + fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 1.156 + } 1.157 + 1.158 + if (fMatrixProc == ClampX_ClampY_filter_affine) { 1.159 + fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 1.160 + } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 1.161 + fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 1.162 + } 1.163 + if (c_hqfilter_sse) { 1.164 + if (fShaderProc32 == highQualityFilter32) { 1.165 + fShaderProc32 = highQualityFilter_SSE2; 1.166 + } 1.167 + } 1.168 + } 1.169 +} 1.170 + 1.171 +static SkBlitRow::Proc platform_16_procs[] = { 1.172 + S32_D565_Opaque_SSE2, // S32_D565_Opaque 1.173 + NULL, // S32_D565_Blend 1.174 + S32A_D565_Opaque_SSE2, // S32A_D565_Opaque 1.175 + NULL, // S32A_D565_Blend 1.176 + S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither 1.177 + NULL, // S32_D565_Blend_Dither 1.178 + S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither 1.179 + NULL, // S32A_D565_Blend_Dither 1.180 +}; 1.181 + 1.182 +static SkBlitRow::Proc32 platform_32_procs[] = { 1.183 + NULL, // S32_Opaque, 1.184 + S32_Blend_BlitRow32_SSE2, // S32_Blend, 1.185 + S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 1.186 + S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 1.187 +}; 1.188 + 1.189 +SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 1.190 + if (cachedHasSSE2()) { 1.191 + return platform_16_procs[flags]; 1.192 + } else { 1.193 + return NULL; 1.194 + } 1.195 +} 1.196 + 1.197 +SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 1.198 + if (cachedHasSSE2()) { 1.199 + return Color32_SSE2; 1.200 + } else { 1.201 + return NULL; 1.202 + } 1.203 +} 1.204 + 1.205 +SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 1.206 + if (cachedHasSSE2()) { 1.207 + return platform_32_procs[flags]; 1.208 + } else { 1.209 + return NULL; 1.210 + } 1.211 +} 1.212 + 1.213 + 1.214 +SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 1.215 + SkMask::Format maskFormat, 1.216 + SkColor color) { 1.217 + if (SkMask::kA8_Format != maskFormat) { 1.218 + return NULL; 1.219 + } 1.220 + 1.221 + ColorProc proc = NULL; 1.222 + if (cachedHasSSE2()) { 1.223 + switch (dstConfig) { 1.224 + case SkBitmap::kARGB_8888_Config: 1.225 + // The SSE2 version is not (yet) faster for black, so we check 1.226 + // for that. 1.227 + if (SK_ColorBLACK != color) { 1.228 + proc = SkARGB32_A8_BlitMask_SSE2; 1.229 + } 1.230 + break; 1.231 + default: 1.232 + break; 1.233 + } 1.234 + } 1.235 + return proc; 1.236 +} 1.237 + 1.238 +SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 1.239 + if (cachedHasSSE2()) { 1.240 + if (isOpaque) { 1.241 + return SkBlitLCD16OpaqueRow_SSE2; 1.242 + } else { 1.243 + return SkBlitLCD16Row_SSE2; 1.244 + } 1.245 + } else { 1.246 + return NULL; 1.247 + } 1.248 + 1.249 +} 1.250 +SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 1.251 + SkMask::Format maskFormat, 1.252 + RowFlags flags) { 1.253 + return NULL; 1.254 +} 1.255 + 1.256 +SkMemset16Proc SkMemset16GetPlatformProc() { 1.257 + if (cachedHasSSE2()) { 1.258 + return sk_memset16_SSE2; 1.259 + } else { 1.260 + return NULL; 1.261 + } 1.262 +} 1.263 + 1.264 +SkMemset32Proc SkMemset32GetPlatformProc() { 1.265 + if (cachedHasSSE2()) { 1.266 + return sk_memset32_SSE2; 1.267 + } else { 1.268 + return NULL; 1.269 + } 1.270 +} 1.271 + 1.272 +SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) { 1.273 + if (!cachedHasSSE2()) { 1.274 + return NULL; 1.275 + } 1.276 + switch (type) { 1.277 + case kDilateX_SkMorphologyProcType: 1.278 + return SkDilateX_SSE2; 1.279 + case kDilateY_SkMorphologyProcType: 1.280 + return SkDilateY_SSE2; 1.281 + case kErodeX_SkMorphologyProcType: 1.282 + return SkErodeX_SSE2; 1.283 + case kErodeY_SkMorphologyProcType: 1.284 + return SkErodeY_SSE2; 1.285 + default: 1.286 + return NULL; 1.287 + } 1.288 +} 1.289 + 1.290 +bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, 1.291 + SkBoxBlurProc* boxBlurY, 1.292 + SkBoxBlurProc* boxBlurXY, 1.293 + SkBoxBlurProc* boxBlurYX) { 1.294 +#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION 1.295 + return false; 1.296 +#else 1.297 + if (!cachedHasSSE2()) { 1.298 + return false; 1.299 + } 1.300 + return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX); 1.301 +#endif 1.302 +} 1.303 + 1.304 +SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning 1.305 + 1.306 +SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 1.307 + if (cachedHasSSE2()) { 1.308 + return ColorRect32_SSE2; 1.309 + } else { 1.310 + return NULL; 1.311 + } 1.312 +}