michael@0: /* michael@0: * Copyright 2009 The Android Open Source Project michael@0: * michael@0: * Use of this source code is governed by a BSD-style license that can be michael@0: * found in the LICENSE file. michael@0: */ michael@0: michael@0: #include "SkBitmapProcState_opts_SSE2.h" michael@0: #include "SkBitmapProcState_opts_SSSE3.h" michael@0: #include "SkBitmapFilter_opts_SSE2.h" michael@0: #include "SkBlitMask.h" michael@0: #include "SkBlitRow.h" michael@0: #include "SkBlitRect_opts_SSE2.h" michael@0: #include "SkBlitRow_opts_SSE2.h" michael@0: #include "SkBlurImage_opts_SSE2.h" michael@0: #include "SkUtils_opts_SSE2.h" michael@0: #include "SkUtils.h" michael@0: #include "SkMorphology_opts.h" michael@0: #include "SkMorphology_opts_SSE2.h" michael@0: michael@0: #include "SkRTConf.h" michael@0: michael@0: #if defined(_MSC_VER) && defined(_WIN64) michael@0: #include michael@0: #endif michael@0: michael@0: /* This file must *not* be compiled with -msse or -msse2, otherwise michael@0: gcc may generate sse2 even for scalar ops (and thus give an invalid michael@0: instruction on Pentium3 on the code below). Only files named *_SSE2.cpp michael@0: in this directory should be compiled with -msse2. */ michael@0: michael@0: michael@0: #ifdef _MSC_VER michael@0: static inline void getcpuid(int info_type, int info[4]) { michael@0: #if defined(_WIN64) michael@0: __cpuid(info, info_type); michael@0: #else michael@0: __asm { michael@0: mov eax, [info_type] michael@0: cpuid michael@0: mov edi, [info] michael@0: mov [edi], eax michael@0: mov [edi+4], ebx michael@0: mov [edi+8], ecx michael@0: mov [edi+12], edx michael@0: } michael@0: #endif michael@0: } michael@0: #else michael@0: #if defined(__x86_64__) michael@0: static inline void getcpuid(int info_type, int info[4]) { michael@0: asm volatile ( michael@0: "cpuid \n\t" michael@0: : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) michael@0: : "a"(info_type) michael@0: ); michael@0: } michael@0: #else michael@0: static inline void getcpuid(int info_type, int info[4]) { michael@0: // We save and restore ebx, so this code can be compatible with -fPIC michael@0: asm volatile ( michael@0: "pushl %%ebx \n\t" michael@0: "cpuid \n\t" michael@0: "movl %%ebx, %1 \n\t" michael@0: "popl %%ebx \n\t" michael@0: : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) michael@0: : "a"(info_type) michael@0: ); michael@0: } michael@0: #endif michael@0: #endif michael@0: michael@0: #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 michael@0: /* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ michael@0: static inline bool hasSSE2() { michael@0: return true; michael@0: } michael@0: #else michael@0: michael@0: static inline bool hasSSE2() { michael@0: int cpu_info[4] = { 0 }; michael@0: getcpuid(1, cpu_info); michael@0: return (cpu_info[3] & (1<<26)) != 0; michael@0: } michael@0: #endif michael@0: michael@0: #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 michael@0: /* If we know SSSE3 is supported at compile time, don't even bother checking. */ michael@0: static inline bool hasSSSE3() { michael@0: return true; michael@0: } michael@0: #else michael@0: michael@0: static inline bool hasSSSE3() { michael@0: int cpu_info[4] = { 0 }; michael@0: getcpuid(1, cpu_info); michael@0: return (cpu_info[2] & 0x200) != 0; michael@0: } michael@0: #endif michael@0: michael@0: static bool cachedHasSSE2() { michael@0: static bool gHasSSE2 = hasSSE2(); michael@0: return gHasSSE2; michael@0: } michael@0: michael@0: static bool cachedHasSSSE3() { michael@0: static bool gHasSSSE3 = hasSSSE3(); michael@0: return gHasSSSE3; michael@0: } michael@0: michael@0: SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); michael@0: michael@0: void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { michael@0: if (cachedHasSSE2()) { michael@0: procs->fExtraHorizontalReads = 3; michael@0: procs->fConvolveVertically = &convolveVertically_SSE2; michael@0: procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; michael@0: procs->fConvolveHorizontally = &convolveHorizontally_SSE2; michael@0: procs->fApplySIMDPadding = &applySIMDPadding_SSE2; michael@0: } michael@0: } michael@0: michael@0: void SkBitmapProcState::platformProcs() { michael@0: if (cachedHasSSSE3()) { michael@0: if (fSampleProc32 == S32_opaque_D32_filter_DX) { michael@0: fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; michael@0: } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { michael@0: fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; michael@0: } michael@0: michael@0: if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { michael@0: fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; michael@0: } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { michael@0: fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; michael@0: } michael@0: } else if (cachedHasSSE2()) { michael@0: if (fSampleProc32 == S32_opaque_D32_filter_DX) { michael@0: fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; michael@0: } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { michael@0: fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; michael@0: } michael@0: michael@0: if (fSampleProc16 == S32_D16_filter_DX) { michael@0: fSampleProc16 = S32_D16_filter_DX_SSE2; michael@0: } michael@0: } michael@0: michael@0: if (cachedHasSSSE3() || cachedHasSSE2()) { michael@0: if (fMatrixProc == ClampX_ClampY_filter_scale) { michael@0: fMatrixProc = ClampX_ClampY_filter_scale_SSE2; michael@0: } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { michael@0: fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; michael@0: } michael@0: michael@0: if (fMatrixProc == ClampX_ClampY_filter_affine) { michael@0: fMatrixProc = ClampX_ClampY_filter_affine_SSE2; michael@0: } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { michael@0: fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; michael@0: } michael@0: if (c_hqfilter_sse) { michael@0: if (fShaderProc32 == highQualityFilter32) { michael@0: fShaderProc32 = highQualityFilter_SSE2; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: static SkBlitRow::Proc platform_16_procs[] = { michael@0: S32_D565_Opaque_SSE2, // S32_D565_Opaque michael@0: NULL, // S32_D565_Blend michael@0: S32A_D565_Opaque_SSE2, // S32A_D565_Opaque michael@0: NULL, // S32A_D565_Blend michael@0: S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither michael@0: NULL, // S32_D565_Blend_Dither michael@0: S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither michael@0: NULL, // S32A_D565_Blend_Dither michael@0: }; michael@0: michael@0: static SkBlitRow::Proc32 platform_32_procs[] = { michael@0: NULL, // S32_Opaque, michael@0: S32_Blend_BlitRow32_SSE2, // S32_Blend, michael@0: S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque michael@0: S32A_Blend_BlitRow32_SSE2, // S32A_Blend, michael@0: }; michael@0: michael@0: SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { michael@0: if (cachedHasSSE2()) { michael@0: return platform_16_procs[flags]; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { michael@0: if (cachedHasSSE2()) { michael@0: return Color32_SSE2; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { michael@0: if (cachedHasSSE2()) { michael@0: return platform_32_procs[flags]; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: michael@0: SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, michael@0: SkMask::Format maskFormat, michael@0: SkColor color) { michael@0: if (SkMask::kA8_Format != maskFormat) { michael@0: return NULL; michael@0: } michael@0: michael@0: ColorProc proc = NULL; michael@0: if (cachedHasSSE2()) { michael@0: switch (dstConfig) { michael@0: case SkBitmap::kARGB_8888_Config: michael@0: // The SSE2 version is not (yet) faster for black, so we check michael@0: // for that. michael@0: if (SK_ColorBLACK != color) { michael@0: proc = SkARGB32_A8_BlitMask_SSE2; michael@0: } michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: return proc; michael@0: } michael@0: michael@0: SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { michael@0: if (cachedHasSSE2()) { michael@0: if (isOpaque) { michael@0: return SkBlitLCD16OpaqueRow_SSE2; michael@0: } else { michael@0: return SkBlitLCD16Row_SSE2; michael@0: } michael@0: } else { michael@0: return NULL; michael@0: } michael@0: michael@0: } michael@0: SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, michael@0: SkMask::Format maskFormat, michael@0: RowFlags flags) { michael@0: return NULL; michael@0: } michael@0: michael@0: SkMemset16Proc SkMemset16GetPlatformProc() { michael@0: if (cachedHasSSE2()) { michael@0: return sk_memset16_SSE2; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: SkMemset32Proc SkMemset32GetPlatformProc() { michael@0: if (cachedHasSSE2()) { michael@0: return sk_memset32_SSE2; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) { michael@0: if (!cachedHasSSE2()) { michael@0: return NULL; michael@0: } michael@0: switch (type) { michael@0: case kDilateX_SkMorphologyProcType: michael@0: return SkDilateX_SSE2; michael@0: case kDilateY_SkMorphologyProcType: michael@0: return SkDilateY_SSE2; michael@0: case kErodeX_SkMorphologyProcType: michael@0: return SkErodeX_SSE2; michael@0: case kErodeY_SkMorphologyProcType: michael@0: return SkErodeY_SSE2; michael@0: default: michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, michael@0: SkBoxBlurProc* boxBlurY, michael@0: SkBoxBlurProc* boxBlurXY, michael@0: SkBoxBlurProc* boxBlurYX) { michael@0: #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION michael@0: return false; michael@0: #else michael@0: if (!cachedHasSSE2()) { michael@0: return false; michael@0: } michael@0: return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX); michael@0: #endif michael@0: } michael@0: michael@0: SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning michael@0: michael@0: SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { michael@0: if (cachedHasSSE2()) { michael@0: return ColorRect32_SSE2; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: }