gfx/skia/trunk/src/opts/SkBlitMask_opts_arm_neon.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/skia/trunk/src/opts/SkBlitMask_opts_arm_neon.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,254 @@
     1.4 +
     1.5 +#include "SkBlitMask.h"
     1.6 +#include "SkColor_opts_neon.h"
     1.7 +
     1.8 +static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
     1.9 +                              const void* SK_RESTRICT maskPtr, size_t maskRB,
    1.10 +                              SkColor, int width, int height) {
    1.11 +    SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
    1.12 +    const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
    1.13 +
    1.14 +    maskRB -= width;
    1.15 +    dstRB -= (width << 2);
    1.16 +    do {
    1.17 +        int w = width;
    1.18 +        while (w >= 8) {
    1.19 +            uint8x8_t vmask = vld1_u8(mask);
    1.20 +            uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
    1.21 +            uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
    1.22 +
    1.23 +            vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
    1.24 +            vdevice.val[NEON_A] += vmask;
    1.25 +
    1.26 +            vst4_u8((uint8_t*)device, vdevice);
    1.27 +
    1.28 +            mask += 8;
    1.29 +            device += 8;
    1.30 +            w -= 8;
    1.31 +        }
    1.32 +        while (w-- > 0) {
    1.33 +            unsigned aa = *mask++;
    1.34 +            *device = (aa << SK_A32_SHIFT)
    1.35 +                        + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
    1.36 +            device += 1;
    1.37 +        };
    1.38 +        device = (uint32_t*)((char*)device + dstRB);
    1.39 +        mask += maskRB;
    1.40 +    } while (--height != 0);
    1.41 +}
    1.42 +
    1.43 +template <bool isColor>
    1.44 +static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
    1.45 +                                     const void* SK_RESTRICT maskPtr, size_t maskRB,
    1.46 +                                     SkColor color, int width, int height) {
    1.47 +    SkPMColor pmc = SkPreMultiplyColor(color);
    1.48 +    SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
    1.49 +    const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
    1.50 +    uint8x8x4_t vpmc;
    1.51 +
    1.52 +    maskRB -= width;
    1.53 +    dstRB -= (width << 2);
    1.54 +
    1.55 +    if (width >= 8) {
    1.56 +        vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
    1.57 +        vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
    1.58 +        vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
    1.59 +        vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
    1.60 +    }
    1.61 +    do {
    1.62 +        int w = width;
    1.63 +        while (w >= 8) {
    1.64 +            uint8x8_t vmask = vld1_u8(mask);
    1.65 +            uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
    1.66 +            if (isColor) {
    1.67 +                vscale = vsubw_u8(vdupq_n_u16(256),
    1.68 +                            SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
    1.69 +            } else {
    1.70 +                vscale = vsubw_u8(vdupq_n_u16(256), vmask);
    1.71 +            }
    1.72 +            uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
    1.73 +
    1.74 +            vdev.val[NEON_A] =   SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
    1.75 +                               + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
    1.76 +            vdev.val[NEON_R] =   SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
    1.77 +                               + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
    1.78 +            vdev.val[NEON_G] =   SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
    1.79 +                               + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
    1.80 +            vdev.val[NEON_B] =   SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
    1.81 +                               + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
    1.82 +
    1.83 +            vst4_u8((uint8_t*)device, vdev);
    1.84 +
    1.85 +            mask += 8;
    1.86 +            device += 8;
    1.87 +            w -= 8;
    1.88 +        }
    1.89 +
    1.90 +        while (w--) {
    1.91 +            unsigned aa = *mask++;
    1.92 +            if (isColor) {
    1.93 +                *device = SkBlendARGB32(pmc, *device, aa);
    1.94 +            } else {
    1.95 +                *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
    1.96 +                            + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
    1.97 +            }
    1.98 +            device += 1;
    1.99 +        };
   1.100 +
   1.101 +        device = (uint32_t*)((char*)device + dstRB);
   1.102 +        mask += maskRB;
   1.103 +
   1.104 +    } while (--height != 0);
   1.105 +}
   1.106 +
   1.107 +static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
   1.108 +                               const void* SK_RESTRICT maskPtr, size_t maskRB,
   1.109 +                               SkColor color, int width, int height) {
   1.110 +    D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
   1.111 +}
   1.112 +
   1.113 +static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
   1.114 +                              const void* SK_RESTRICT maskPtr, size_t maskRB,
   1.115 +                              SkColor color, int width, int height) {
   1.116 +    D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
   1.117 +}
   1.118 +
   1.119 +SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
   1.120 +    if (SK_ColorBLACK == color) {
   1.121 +        return D32_A8_Black_neon;
   1.122 +    } else if (0xFF == SkColorGetA(color)) {
   1.123 +        return D32_A8_Opaque_neon;
   1.124 +    } else {
   1.125 +        return D32_A8_Color_neon;
   1.126 +    }
   1.127 +}
   1.128 +
   1.129 +////////////////////////////////////////////////////////////////////////////////
   1.130 +
   1.131 +void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
   1.132 +                                        SkColor color, int width,
   1.133 +                                        SkPMColor opaqueDst) {
   1.134 +    int colR = SkColorGetR(color);
   1.135 +    int colG = SkColorGetG(color);
   1.136 +    int colB = SkColorGetB(color);
   1.137 +
   1.138 +    uint8x8_t vcolR, vcolG, vcolB;
   1.139 +    uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
   1.140 +
   1.141 +    if (width >= 8) {
   1.142 +        vcolR = vdup_n_u8(colR);
   1.143 +        vcolG = vdup_n_u8(colG);
   1.144 +        vcolB = vdup_n_u8(colB);
   1.145 +        vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
   1.146 +        vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
   1.147 +        vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
   1.148 +        vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
   1.149 +    }
   1.150 +
   1.151 +    while (width >= 8) {
   1.152 +        uint8x8x4_t vdst;
   1.153 +        uint16x8_t vmask;
   1.154 +        uint16x8_t vmaskR, vmaskG, vmaskB;
   1.155 +        uint8x8_t vsel_trans, vsel_opq;
   1.156 +
   1.157 +        vdst = vld4_u8((uint8_t*)dst);
   1.158 +        vmask = vld1q_u16(src);
   1.159 +
   1.160 +        // Prepare compare masks
   1.161 +        vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
   1.162 +        vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
   1.163 +
   1.164 +        // Get all the color masks on 5 bits
   1.165 +        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
   1.166 +        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
   1.167 +                             SK_B16_BITS + SK_R16_BITS + 1);
   1.168 +        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
   1.169 +
   1.170 +        // Upscale to 0..32
   1.171 +        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
   1.172 +        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
   1.173 +        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
   1.174 +
   1.175 +        vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
   1.176 +        vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
   1.177 +
   1.178 +        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
   1.179 +        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
   1.180 +        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
   1.181 +
   1.182 +        vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
   1.183 +        vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
   1.184 +        vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
   1.185 +
   1.186 +        vst4_u8((uint8_t*)dst, vdst);
   1.187 +
   1.188 +        dst += 8;
   1.189 +        src += 8;
   1.190 +        width -= 8;
   1.191 +    }
   1.192 +
   1.193 +    // Leftovers
   1.194 +    for (int i = 0; i < width; i++) {
   1.195 +        dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
   1.196 +                                    opaqueDst);
   1.197 +    }
   1.198 +}
   1.199 +
   1.200 +void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
   1.201 +                                   SkColor color, int width, SkPMColor) {
   1.202 +    int colA = SkColorGetA(color);
   1.203 +    int colR = SkColorGetR(color);
   1.204 +    int colG = SkColorGetG(color);
   1.205 +    int colB = SkColorGetB(color);
   1.206 +
   1.207 +    colA = SkAlpha255To256(colA);
   1.208 +
   1.209 +    uint8x8_t vcolR, vcolG, vcolB;
   1.210 +    uint16x8_t vcolA;
   1.211 +
   1.212 +    if (width >= 8) {
   1.213 +        vcolA = vdupq_n_u16(colA);
   1.214 +        vcolR = vdup_n_u8(colR);
   1.215 +        vcolG = vdup_n_u8(colG);
   1.216 +        vcolB = vdup_n_u8(colB);
   1.217 +    }
   1.218 +
   1.219 +    while (width >= 8) {
   1.220 +        uint8x8x4_t vdst;
   1.221 +        uint16x8_t vmask;
   1.222 +        uint16x8_t vmaskR, vmaskG, vmaskB;
   1.223 +
   1.224 +        vdst = vld4_u8((uint8_t*)dst);
   1.225 +        vmask = vld1q_u16(src);
   1.226 +
   1.227 +        // Get all the color masks on 5 bits
   1.228 +        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
   1.229 +        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
   1.230 +                             SK_B16_BITS + SK_R16_BITS + 1);
   1.231 +        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
   1.232 +
   1.233 +        // Upscale to 0..32
   1.234 +        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
   1.235 +        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
   1.236 +        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
   1.237 +
   1.238 +        vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
   1.239 +        vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
   1.240 +        vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
   1.241 +
   1.242 +        vdst.val[NEON_A] = vdup_n_u8(0xFF);
   1.243 +        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
   1.244 +        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
   1.245 +        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
   1.246 +
   1.247 +        vst4_u8((uint8_t*)dst, vdst);
   1.248 +
   1.249 +        dst += 8;
   1.250 +        src += 8;
   1.251 +        width -= 8;
   1.252 +    }
   1.253 +
   1.254 +    for (int i = 0; i < width; i++) {
   1.255 +        dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
   1.256 +    }
   1.257 +}

mercurial