gfx/skia/trunk/src/opts/opts_check_SSE2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/skia/trunk/src/opts/opts_check_SSE2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,309 @@
     1.4 +/*
     1.5 + * Copyright 2009 The Android Open Source Project
     1.6 + *
     1.7 + * Use of this source code is governed by a BSD-style license that can be
     1.8 + * found in the LICENSE file.
     1.9 + */
    1.10 +
    1.11 +#include "SkBitmapProcState_opts_SSE2.h"
    1.12 +#include "SkBitmapProcState_opts_SSSE3.h"
    1.13 +#include "SkBitmapFilter_opts_SSE2.h"
    1.14 +#include "SkBlitMask.h"
    1.15 +#include "SkBlitRow.h"
    1.16 +#include "SkBlitRect_opts_SSE2.h"
    1.17 +#include "SkBlitRow_opts_SSE2.h"
    1.18 +#include "SkBlurImage_opts_SSE2.h"
    1.19 +#include "SkUtils_opts_SSE2.h"
    1.20 +#include "SkUtils.h"
    1.21 +#include "SkMorphology_opts.h"
    1.22 +#include "SkMorphology_opts_SSE2.h"
    1.23 +
    1.24 +#include "SkRTConf.h"
    1.25 +
    1.26 +#if defined(_MSC_VER) && defined(_WIN64)
    1.27 +#include <intrin.h>
    1.28 +#endif
    1.29 +
    1.30 +/* This file must *not* be compiled with -msse or -msse2, otherwise
    1.31 +   gcc may generate sse2 even for scalar ops (and thus give an invalid
    1.32 +   instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
    1.33 +   in this directory should be compiled with -msse2. */
    1.34 +
    1.35 +
    1.36 +#ifdef _MSC_VER
    1.37 +static inline void getcpuid(int info_type, int info[4]) {
    1.38 +#if defined(_WIN64)
    1.39 +    __cpuid(info, info_type);
    1.40 +#else
    1.41 +    __asm {
    1.42 +        mov    eax, [info_type]
    1.43 +        cpuid
    1.44 +        mov    edi, [info]
    1.45 +        mov    [edi], eax
    1.46 +        mov    [edi+4], ebx
    1.47 +        mov    [edi+8], ecx
    1.48 +        mov    [edi+12], edx
    1.49 +    }
    1.50 +#endif
    1.51 +}
    1.52 +#else
    1.53 +#if defined(__x86_64__)
    1.54 +static inline void getcpuid(int info_type, int info[4]) {
    1.55 +    asm volatile (
    1.56 +        "cpuid \n\t"
    1.57 +        : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
    1.58 +        : "a"(info_type)
    1.59 +    );
    1.60 +}
    1.61 +#else
    1.62 +static inline void getcpuid(int info_type, int info[4]) {
    1.63 +    // We save and restore ebx, so this code can be compatible with -fPIC
    1.64 +    asm volatile (
    1.65 +        "pushl %%ebx      \n\t"
    1.66 +        "cpuid            \n\t"
    1.67 +        "movl %%ebx, %1   \n\t"
    1.68 +        "popl %%ebx       \n\t"
    1.69 +        : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
    1.70 +        : "a"(info_type)
    1.71 +    );
    1.72 +}
    1.73 +#endif
    1.74 +#endif
    1.75 +
    1.76 +#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
    1.77 +/* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
    1.78 +static inline bool hasSSE2() {
    1.79 +    return true;
    1.80 +}
    1.81 +#else
    1.82 +
    1.83 +static inline bool hasSSE2() {
    1.84 +    int cpu_info[4] = { 0 };
    1.85 +    getcpuid(1, cpu_info);
    1.86 +    return (cpu_info[3] & (1<<26)) != 0;
    1.87 +}
    1.88 +#endif
    1.89 +
    1.90 +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
    1.91 +/* If we know SSSE3 is supported at compile time, don't even bother checking. */
    1.92 +static inline bool hasSSSE3() {
    1.93 +    return true;
    1.94 +}
    1.95 +#else
    1.96 +
    1.97 +static inline bool hasSSSE3() {
    1.98 +    int cpu_info[4] = { 0 };
    1.99 +    getcpuid(1, cpu_info);
   1.100 +    return (cpu_info[2] & 0x200) != 0;
   1.101 +}
   1.102 +#endif
   1.103 +
   1.104 +static bool cachedHasSSE2() {
   1.105 +    static bool gHasSSE2 = hasSSE2();
   1.106 +    return gHasSSE2;
   1.107 +}
   1.108 +
   1.109 +static bool cachedHasSSSE3() {
   1.110 +    static bool gHasSSSE3 = hasSSSE3();
   1.111 +    return gHasSSSE3;
   1.112 +}
   1.113 +
   1.114 +SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
   1.115 +
   1.116 +void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
   1.117 +    if (cachedHasSSE2()) {
   1.118 +        procs->fExtraHorizontalReads = 3;
   1.119 +        procs->fConvolveVertically = &convolveVertically_SSE2;
   1.120 +        procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
   1.121 +        procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
   1.122 +        procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
   1.123 +    }
   1.124 +}
   1.125 +
   1.126 +void SkBitmapProcState::platformProcs() {
   1.127 +    if (cachedHasSSSE3()) {
   1.128 +        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
   1.129 +            fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
   1.130 +        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
   1.131 +            fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
   1.132 +        }
   1.133 +
   1.134 +        if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
   1.135 +            fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
   1.136 +        } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
   1.137 +            fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
   1.138 +        }
   1.139 +    } else if (cachedHasSSE2()) {
   1.140 +        if (fSampleProc32 == S32_opaque_D32_filter_DX) {
   1.141 +            fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
   1.142 +        } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
   1.143 +            fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
   1.144 +        }
   1.145 +
   1.146 +        if (fSampleProc16 == S32_D16_filter_DX) {
   1.147 +            fSampleProc16 = S32_D16_filter_DX_SSE2;
   1.148 +        }
   1.149 +    }
   1.150 +
   1.151 +    if (cachedHasSSSE3() || cachedHasSSE2()) {
   1.152 +        if (fMatrixProc == ClampX_ClampY_filter_scale) {
   1.153 +            fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
   1.154 +        } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
   1.155 +            fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
   1.156 +        }
   1.157 +
   1.158 +        if (fMatrixProc == ClampX_ClampY_filter_affine) {
   1.159 +            fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
   1.160 +        } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
   1.161 +            fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
   1.162 +        }
   1.163 +        if (c_hqfilter_sse) {
   1.164 +            if (fShaderProc32 == highQualityFilter32) {
   1.165 +                fShaderProc32 = highQualityFilter_SSE2;
   1.166 +            }
   1.167 +        }
   1.168 +    }
   1.169 +}
   1.170 +
   1.171 +static SkBlitRow::Proc platform_16_procs[] = {
   1.172 +    S32_D565_Opaque_SSE2,               // S32_D565_Opaque
   1.173 +    NULL,                               // S32_D565_Blend
   1.174 +    S32A_D565_Opaque_SSE2,              // S32A_D565_Opaque
   1.175 +    NULL,                               // S32A_D565_Blend
   1.176 +    S32_D565_Opaque_Dither_SSE2,        // S32_D565_Opaque_Dither
   1.177 +    NULL,                               // S32_D565_Blend_Dither
   1.178 +    S32A_D565_Opaque_Dither_SSE2,       // S32A_D565_Opaque_Dither
   1.179 +    NULL,                               // S32A_D565_Blend_Dither
   1.180 +};
   1.181 +
   1.182 +static SkBlitRow::Proc32 platform_32_procs[] = {
   1.183 +    NULL,                               // S32_Opaque,
   1.184 +    S32_Blend_BlitRow32_SSE2,           // S32_Blend,
   1.185 +    S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
   1.186 +    S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
   1.187 +};
   1.188 +
   1.189 +SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
   1.190 +    if (cachedHasSSE2()) {
   1.191 +        return platform_16_procs[flags];
   1.192 +    } else {
   1.193 +        return NULL;
   1.194 +    }
   1.195 +}
   1.196 +
   1.197 +SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
   1.198 +    if (cachedHasSSE2()) {
   1.199 +        return Color32_SSE2;
   1.200 +    } else {
   1.201 +        return NULL;
   1.202 +    }
   1.203 +}
   1.204 +
   1.205 +SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
   1.206 +    if (cachedHasSSE2()) {
   1.207 +        return platform_32_procs[flags];
   1.208 +    } else {
   1.209 +        return NULL;
   1.210 +    }
   1.211 +}
   1.212 +
   1.213 +
   1.214 +SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
   1.215 +                                                     SkMask::Format maskFormat,
   1.216 +                                                     SkColor color) {
   1.217 +    if (SkMask::kA8_Format != maskFormat) {
   1.218 +        return NULL;
   1.219 +    }
   1.220 +
   1.221 +    ColorProc proc = NULL;
   1.222 +    if (cachedHasSSE2()) {
   1.223 +        switch (dstConfig) {
   1.224 +            case SkBitmap::kARGB_8888_Config:
   1.225 +                // The SSE2 version is not (yet) faster for black, so we check
   1.226 +                // for that.
   1.227 +                if (SK_ColorBLACK != color) {
   1.228 +                    proc = SkARGB32_A8_BlitMask_SSE2;
   1.229 +                }
   1.230 +                break;
   1.231 +            default:
   1.232 +                break;
   1.233 +        }
   1.234 +    }
   1.235 +    return proc;
   1.236 +}
   1.237 +
   1.238 +SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
   1.239 +    if (cachedHasSSE2()) {
   1.240 +        if (isOpaque) {
   1.241 +            return SkBlitLCD16OpaqueRow_SSE2;
   1.242 +        } else {
   1.243 +            return SkBlitLCD16Row_SSE2;
   1.244 +        }
   1.245 +    } else {
   1.246 +        return NULL;
   1.247 +    }
   1.248 +
   1.249 +}
   1.250 +SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
   1.251 +                                                 SkMask::Format maskFormat,
   1.252 +                                                 RowFlags flags) {
   1.253 +    return NULL;
   1.254 +}
   1.255 +
   1.256 +SkMemset16Proc SkMemset16GetPlatformProc() {
   1.257 +    if (cachedHasSSE2()) {
   1.258 +        return sk_memset16_SSE2;
   1.259 +    } else {
   1.260 +        return NULL;
   1.261 +    }
   1.262 +}
   1.263 +
   1.264 +SkMemset32Proc SkMemset32GetPlatformProc() {
   1.265 +    if (cachedHasSSE2()) {
   1.266 +        return sk_memset32_SSE2;
   1.267 +    } else {
   1.268 +        return NULL;
   1.269 +    }
   1.270 +}
   1.271 +
   1.272 +SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
   1.273 +    if (!cachedHasSSE2()) {
   1.274 +        return NULL;
   1.275 +    }
   1.276 +    switch (type) {
   1.277 +        case kDilateX_SkMorphologyProcType:
   1.278 +            return SkDilateX_SSE2;
   1.279 +        case kDilateY_SkMorphologyProcType:
   1.280 +            return SkDilateY_SSE2;
   1.281 +        case kErodeX_SkMorphologyProcType:
   1.282 +            return SkErodeX_SSE2;
   1.283 +        case kErodeY_SkMorphologyProcType:
   1.284 +            return SkErodeY_SSE2;
   1.285 +        default:
   1.286 +            return NULL;
   1.287 +    }
   1.288 +}
   1.289 +
   1.290 +bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
   1.291 +                               SkBoxBlurProc* boxBlurY,
   1.292 +                               SkBoxBlurProc* boxBlurXY,
   1.293 +                               SkBoxBlurProc* boxBlurYX) {
   1.294 +#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
   1.295 +    return false;
   1.296 +#else
   1.297 +    if (!cachedHasSSE2()) {
   1.298 +        return false;
   1.299 +    }
   1.300 +    return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
   1.301 +#endif
   1.302 +}
   1.303 +
   1.304 +SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
   1.305 +
   1.306 +SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
   1.307 +    if (cachedHasSSE2()) {
   1.308 +        return ColorRect32_SSE2;
   1.309 +    } else {
   1.310 +        return NULL;
   1.311 +    }
   1.312 +}

mercurial