|
1 |
|
2 #include "SkBlitMask.h" |
|
3 #include "SkColor_opts_neon.h" |
|
4 |
|
5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB, |
|
6 const void* SK_RESTRICT maskPtr, size_t maskRB, |
|
7 SkColor, int width, int height) { |
|
8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
|
9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
|
10 |
|
11 maskRB -= width; |
|
12 dstRB -= (width << 2); |
|
13 do { |
|
14 int w = width; |
|
15 while (w >= 8) { |
|
16 uint8x8_t vmask = vld1_u8(mask); |
|
17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
|
18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); |
|
19 |
|
20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale); |
|
21 vdevice.val[NEON_A] += vmask; |
|
22 |
|
23 vst4_u8((uint8_t*)device, vdevice); |
|
24 |
|
25 mask += 8; |
|
26 device += 8; |
|
27 w -= 8; |
|
28 } |
|
29 while (w-- > 0) { |
|
30 unsigned aa = *mask++; |
|
31 *device = (aa << SK_A32_SHIFT) |
|
32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
|
33 device += 1; |
|
34 }; |
|
35 device = (uint32_t*)((char*)device + dstRB); |
|
36 mask += maskRB; |
|
37 } while (--height != 0); |
|
38 } |
|
39 |
|
40 template <bool isColor> |
|
41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
|
42 const void* SK_RESTRICT maskPtr, size_t maskRB, |
|
43 SkColor color, int width, int height) { |
|
44 SkPMColor pmc = SkPreMultiplyColor(color); |
|
45 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
|
46 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
|
47 uint8x8x4_t vpmc; |
|
48 |
|
49 maskRB -= width; |
|
50 dstRB -= (width << 2); |
|
51 |
|
52 if (width >= 8) { |
|
53 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); |
|
54 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); |
|
55 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); |
|
56 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); |
|
57 } |
|
58 do { |
|
59 int w = width; |
|
60 while (w >= 8) { |
|
61 uint8x8_t vmask = vld1_u8(mask); |
|
62 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); |
|
63 if (isColor) { |
|
64 vscale = vsubw_u8(vdupq_n_u16(256), |
|
65 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); |
|
66 } else { |
|
67 vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
|
68 } |
|
69 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); |
|
70 |
|
71 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) |
|
72 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); |
|
73 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) |
|
74 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); |
|
75 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) |
|
76 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); |
|
77 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) |
|
78 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); |
|
79 |
|
80 vst4_u8((uint8_t*)device, vdev); |
|
81 |
|
82 mask += 8; |
|
83 device += 8; |
|
84 w -= 8; |
|
85 } |
|
86 |
|
87 while (w--) { |
|
88 unsigned aa = *mask++; |
|
89 if (isColor) { |
|
90 *device = SkBlendARGB32(pmc, *device, aa); |
|
91 } else { |
|
92 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
|
93 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
|
94 } |
|
95 device += 1; |
|
96 }; |
|
97 |
|
98 device = (uint32_t*)((char*)device + dstRB); |
|
99 mask += maskRB; |
|
100 |
|
101 } while (--height != 0); |
|
102 } |
|
103 |
|
104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB, |
|
105 const void* SK_RESTRICT maskPtr, size_t maskRB, |
|
106 SkColor color, int width, int height) { |
|
107 D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height); |
|
108 } |
|
109 |
|
110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
|
111 const void* SK_RESTRICT maskPtr, size_t maskRB, |
|
112 SkColor color, int width, int height) { |
|
113 D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height); |
|
114 } |
|
115 |
|
116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) { |
|
117 if (SK_ColorBLACK == color) { |
|
118 return D32_A8_Black_neon; |
|
119 } else if (0xFF == SkColorGetA(color)) { |
|
120 return D32_A8_Opaque_neon; |
|
121 } else { |
|
122 return D32_A8_Color_neon; |
|
123 } |
|
124 } |
|
125 |
|
126 //////////////////////////////////////////////////////////////////////////////// |
|
127 |
|
128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], |
|
129 SkColor color, int width, |
|
130 SkPMColor opaqueDst) { |
|
131 int colR = SkColorGetR(color); |
|
132 int colG = SkColorGetG(color); |
|
133 int colB = SkColorGetB(color); |
|
134 |
|
135 uint8x8_t vcolR, vcolG, vcolB; |
|
136 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB; |
|
137 |
|
138 if (width >= 8) { |
|
139 vcolR = vdup_n_u8(colR); |
|
140 vcolG = vdup_n_u8(colG); |
|
141 vcolB = vdup_n_u8(colB); |
|
142 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); |
|
143 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); |
|
144 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); |
|
145 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); |
|
146 } |
|
147 |
|
148 while (width >= 8) { |
|
149 uint8x8x4_t vdst; |
|
150 uint16x8_t vmask; |
|
151 uint16x8_t vmaskR, vmaskG, vmaskB; |
|
152 uint8x8_t vsel_trans, vsel_opq; |
|
153 |
|
154 vdst = vld4_u8((uint8_t*)dst); |
|
155 vmask = vld1q_u16(src); |
|
156 |
|
157 // Prepare compare masks |
|
158 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); |
|
159 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); |
|
160 |
|
161 // Get all the color masks on 5 bits |
|
162 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); |
|
163 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), |
|
164 SK_B16_BITS + SK_R16_BITS + 1); |
|
165 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); |
|
166 |
|
167 // Upscale to 0..32 |
|
168 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); |
|
169 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); |
|
170 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); |
|
171 |
|
172 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF)); |
|
173 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); |
|
174 |
|
175 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); |
|
176 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); |
|
177 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); |
|
178 |
|
179 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); |
|
180 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); |
|
181 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); |
|
182 |
|
183 vst4_u8((uint8_t*)dst, vdst); |
|
184 |
|
185 dst += 8; |
|
186 src += 8; |
|
187 width -= 8; |
|
188 } |
|
189 |
|
190 // Leftovers |
|
191 for (int i = 0; i < width; i++) { |
|
192 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i], |
|
193 opaqueDst); |
|
194 } |
|
195 } |
|
196 |
|
197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], |
|
198 SkColor color, int width, SkPMColor) { |
|
199 int colA = SkColorGetA(color); |
|
200 int colR = SkColorGetR(color); |
|
201 int colG = SkColorGetG(color); |
|
202 int colB = SkColorGetB(color); |
|
203 |
|
204 colA = SkAlpha255To256(colA); |
|
205 |
|
206 uint8x8_t vcolR, vcolG, vcolB; |
|
207 uint16x8_t vcolA; |
|
208 |
|
209 if (width >= 8) { |
|
210 vcolA = vdupq_n_u16(colA); |
|
211 vcolR = vdup_n_u8(colR); |
|
212 vcolG = vdup_n_u8(colG); |
|
213 vcolB = vdup_n_u8(colB); |
|
214 } |
|
215 |
|
216 while (width >= 8) { |
|
217 uint8x8x4_t vdst; |
|
218 uint16x8_t vmask; |
|
219 uint16x8_t vmaskR, vmaskG, vmaskB; |
|
220 |
|
221 vdst = vld4_u8((uint8_t*)dst); |
|
222 vmask = vld1q_u16(src); |
|
223 |
|
224 // Get all the color masks on 5 bits |
|
225 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); |
|
226 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), |
|
227 SK_B16_BITS + SK_R16_BITS + 1); |
|
228 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); |
|
229 |
|
230 // Upscale to 0..32 |
|
231 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); |
|
232 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); |
|
233 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); |
|
234 |
|
235 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); |
|
236 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); |
|
237 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); |
|
238 |
|
239 vdst.val[NEON_A] = vdup_n_u8(0xFF); |
|
240 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); |
|
241 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); |
|
242 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); |
|
243 |
|
244 vst4_u8((uint8_t*)dst, vdst); |
|
245 |
|
246 dst += 8; |
|
247 src += 8; |
|
248 width -= 8; |
|
249 } |
|
250 |
|
251 for (int i = 0; i < width; i++) { |
|
252 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); |
|
253 } |
|
254 } |