gfx/skia/trunk/src/opts/SkBlitMask_opts_arm_neon.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1
michael@0 2 #include "SkBlitMask.h"
michael@0 3 #include "SkColor_opts_neon.h"
michael@0 4
michael@0 5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
michael@0 6 const void* SK_RESTRICT maskPtr, size_t maskRB,
michael@0 7 SkColor, int width, int height) {
michael@0 8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
michael@0 9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
michael@0 10
michael@0 11 maskRB -= width;
michael@0 12 dstRB -= (width << 2);
michael@0 13 do {
michael@0 14 int w = width;
michael@0 15 while (w >= 8) {
michael@0 16 uint8x8_t vmask = vld1_u8(mask);
michael@0 17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
michael@0 18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
michael@0 19
michael@0 20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
michael@0 21 vdevice.val[NEON_A] += vmask;
michael@0 22
michael@0 23 vst4_u8((uint8_t*)device, vdevice);
michael@0 24
michael@0 25 mask += 8;
michael@0 26 device += 8;
michael@0 27 w -= 8;
michael@0 28 }
michael@0 29 while (w-- > 0) {
michael@0 30 unsigned aa = *mask++;
michael@0 31 *device = (aa << SK_A32_SHIFT)
michael@0 32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
michael@0 33 device += 1;
michael@0 34 };
michael@0 35 device = (uint32_t*)((char*)device + dstRB);
michael@0 36 mask += maskRB;
michael@0 37 } while (--height != 0);
michael@0 38 }
michael@0 39
michael@0 40 template <bool isColor>
michael@0 41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
michael@0 42 const void* SK_RESTRICT maskPtr, size_t maskRB,
michael@0 43 SkColor color, int width, int height) {
michael@0 44 SkPMColor pmc = SkPreMultiplyColor(color);
michael@0 45 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
michael@0 46 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
michael@0 47 uint8x8x4_t vpmc;
michael@0 48
michael@0 49 maskRB -= width;
michael@0 50 dstRB -= (width << 2);
michael@0 51
michael@0 52 if (width >= 8) {
michael@0 53 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
michael@0 54 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
michael@0 55 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
michael@0 56 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
michael@0 57 }
michael@0 58 do {
michael@0 59 int w = width;
michael@0 60 while (w >= 8) {
michael@0 61 uint8x8_t vmask = vld1_u8(mask);
michael@0 62 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
michael@0 63 if (isColor) {
michael@0 64 vscale = vsubw_u8(vdupq_n_u16(256),
michael@0 65 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
michael@0 66 } else {
michael@0 67 vscale = vsubw_u8(vdupq_n_u16(256), vmask);
michael@0 68 }
michael@0 69 uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
michael@0 70
michael@0 71 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
michael@0 72 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
michael@0 73 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
michael@0 74 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
michael@0 75 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
michael@0 76 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
michael@0 77 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
michael@0 78 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
michael@0 79
michael@0 80 vst4_u8((uint8_t*)device, vdev);
michael@0 81
michael@0 82 mask += 8;
michael@0 83 device += 8;
michael@0 84 w -= 8;
michael@0 85 }
michael@0 86
michael@0 87 while (w--) {
michael@0 88 unsigned aa = *mask++;
michael@0 89 if (isColor) {
michael@0 90 *device = SkBlendARGB32(pmc, *device, aa);
michael@0 91 } else {
michael@0 92 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
michael@0 93 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
michael@0 94 }
michael@0 95 device += 1;
michael@0 96 };
michael@0 97
michael@0 98 device = (uint32_t*)((char*)device + dstRB);
michael@0 99 mask += maskRB;
michael@0 100
michael@0 101 } while (--height != 0);
michael@0 102 }
michael@0 103
michael@0 104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
michael@0 105 const void* SK_RESTRICT maskPtr, size_t maskRB,
michael@0 106 SkColor color, int width, int height) {
michael@0 107 D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
michael@0 108 }
michael@0 109
michael@0 110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
michael@0 111 const void* SK_RESTRICT maskPtr, size_t maskRB,
michael@0 112 SkColor color, int width, int height) {
michael@0 113 D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
michael@0 114 }
michael@0 115
michael@0 116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
michael@0 117 if (SK_ColorBLACK == color) {
michael@0 118 return D32_A8_Black_neon;
michael@0 119 } else if (0xFF == SkColorGetA(color)) {
michael@0 120 return D32_A8_Opaque_neon;
michael@0 121 } else {
michael@0 122 return D32_A8_Color_neon;
michael@0 123 }
michael@0 124 }
michael@0 125
michael@0 126 ////////////////////////////////////////////////////////////////////////////////
michael@0 127
michael@0 128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
michael@0 129 SkColor color, int width,
michael@0 130 SkPMColor opaqueDst) {
michael@0 131 int colR = SkColorGetR(color);
michael@0 132 int colG = SkColorGetG(color);
michael@0 133 int colB = SkColorGetB(color);
michael@0 134
michael@0 135 uint8x8_t vcolR, vcolG, vcolB;
michael@0 136 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
michael@0 137
michael@0 138 if (width >= 8) {
michael@0 139 vcolR = vdup_n_u8(colR);
michael@0 140 vcolG = vdup_n_u8(colG);
michael@0 141 vcolB = vdup_n_u8(colB);
michael@0 142 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
michael@0 143 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
michael@0 144 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
michael@0 145 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
michael@0 146 }
michael@0 147
michael@0 148 while (width >= 8) {
michael@0 149 uint8x8x4_t vdst;
michael@0 150 uint16x8_t vmask;
michael@0 151 uint16x8_t vmaskR, vmaskG, vmaskB;
michael@0 152 uint8x8_t vsel_trans, vsel_opq;
michael@0 153
michael@0 154 vdst = vld4_u8((uint8_t*)dst);
michael@0 155 vmask = vld1q_u16(src);
michael@0 156
michael@0 157 // Prepare compare masks
michael@0 158 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
michael@0 159 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
michael@0 160
michael@0 161 // Get all the color masks on 5 bits
michael@0 162 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
michael@0 163 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
michael@0 164 SK_B16_BITS + SK_R16_BITS + 1);
michael@0 165 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
michael@0 166
michael@0 167 // Upscale to 0..32
michael@0 168 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
michael@0 169 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
michael@0 170 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
michael@0 171
michael@0 172 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
michael@0 173 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
michael@0 174
michael@0 175 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
michael@0 176 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
michael@0 177 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
michael@0 178
michael@0 179 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
michael@0 180 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
michael@0 181 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
michael@0 182
michael@0 183 vst4_u8((uint8_t*)dst, vdst);
michael@0 184
michael@0 185 dst += 8;
michael@0 186 src += 8;
michael@0 187 width -= 8;
michael@0 188 }
michael@0 189
michael@0 190 // Leftovers
michael@0 191 for (int i = 0; i < width; i++) {
michael@0 192 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
michael@0 193 opaqueDst);
michael@0 194 }
michael@0 195 }
michael@0 196
michael@0 197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
michael@0 198 SkColor color, int width, SkPMColor) {
michael@0 199 int colA = SkColorGetA(color);
michael@0 200 int colR = SkColorGetR(color);
michael@0 201 int colG = SkColorGetG(color);
michael@0 202 int colB = SkColorGetB(color);
michael@0 203
michael@0 204 colA = SkAlpha255To256(colA);
michael@0 205
michael@0 206 uint8x8_t vcolR, vcolG, vcolB;
michael@0 207 uint16x8_t vcolA;
michael@0 208
michael@0 209 if (width >= 8) {
michael@0 210 vcolA = vdupq_n_u16(colA);
michael@0 211 vcolR = vdup_n_u8(colR);
michael@0 212 vcolG = vdup_n_u8(colG);
michael@0 213 vcolB = vdup_n_u8(colB);
michael@0 214 }
michael@0 215
michael@0 216 while (width >= 8) {
michael@0 217 uint8x8x4_t vdst;
michael@0 218 uint16x8_t vmask;
michael@0 219 uint16x8_t vmaskR, vmaskG, vmaskB;
michael@0 220
michael@0 221 vdst = vld4_u8((uint8_t*)dst);
michael@0 222 vmask = vld1q_u16(src);
michael@0 223
michael@0 224 // Get all the color masks on 5 bits
michael@0 225 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
michael@0 226 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
michael@0 227 SK_B16_BITS + SK_R16_BITS + 1);
michael@0 228 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
michael@0 229
michael@0 230 // Upscale to 0..32
michael@0 231 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
michael@0 232 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
michael@0 233 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
michael@0 234
michael@0 235 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
michael@0 236 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
michael@0 237 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
michael@0 238
michael@0 239 vdst.val[NEON_A] = vdup_n_u8(0xFF);
michael@0 240 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
michael@0 241 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
michael@0 242 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
michael@0 243
michael@0 244 vst4_u8((uint8_t*)dst, vdst);
michael@0 245
michael@0 246 dst += 8;
michael@0 247 src += 8;
michael@0 248 width -= 8;
michael@0 249 }
michael@0 250
michael@0 251 for (int i = 0; i < width; i++) {
michael@0 252 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
michael@0 253 }
michael@0 254 }

mercurial