1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/skia/trunk/src/opts/SkXfermode_opts_arm_neon.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,937 @@ 1.4 +#include "SkXfermode.h" 1.5 +#include "SkXfermode_proccoeff.h" 1.6 +#include "SkColorPriv.h" 1.7 + 1.8 +#include <arm_neon.h> 1.9 +#include "SkColor_opts_neon.h" 1.10 +#include "SkXfermode_opts_arm_neon.h" 1.11 + 1.12 +#define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) 1.13 + 1.14 + 1.15 +//////////////////////////////////////////////////////////////////////////////// 1.16 +// NEONized skia functions 1.17 +//////////////////////////////////////////////////////////////////////////////// 1.18 + 1.19 +static inline uint8x8_t SkAlphaMulAlpha_neon8(uint8x8_t color, uint8x8_t alpha) { 1.20 + uint16x8_t tmp; 1.21 + uint8x8_t ret; 1.22 + 1.23 + tmp = vmull_u8(color, alpha); 1.24 + tmp = vaddq_u16(tmp, vdupq_n_u16(128)); 1.25 + tmp = vaddq_u16(tmp, vshrq_n_u16(tmp, 8)); 1.26 + 1.27 + ret = vshrn_n_u16(tmp, 8); 1.28 + 1.29 + return ret; 1.30 +} 1.31 + 1.32 +static inline uint16x8_t SkAlphaMulAlpha_neon8_16(uint8x8_t color, uint8x8_t alpha) { 1.33 + uint16x8_t ret; 1.34 + 1.35 + ret = vmull_u8(color, alpha); 1.36 + ret = vaddq_u16(ret, vdupq_n_u16(128)); 1.37 + ret = vaddq_u16(ret, vshrq_n_u16(ret, 8)); 1.38 + 1.39 + ret = vshrq_n_u16(ret, 8); 1.40 + 1.41 + return ret; 1.42 +} 1.43 + 1.44 +static inline uint8x8_t SkDiv255Round_neon8_32_8(int32x4_t p1, int32x4_t p2) { 1.45 + uint16x8_t tmp; 1.46 + 1.47 + tmp = vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(p1)), 1.48 + vmovn_u32(vreinterpretq_u32_s32(p2))); 1.49 + 1.50 + tmp += vdupq_n_u16(128); 1.51 + tmp += vshrq_n_u16(tmp, 8); 1.52 + 1.53 + return vshrn_n_u16(tmp, 8); 1.54 +} 1.55 + 1.56 +static inline uint16x8_t SkDiv255Round_neon8_16_16(uint16x8_t prod) { 1.57 + prod += vdupq_n_u16(128); 1.58 + prod += vshrq_n_u16(prod, 8); 1.59 + 1.60 + return vshrq_n_u16(prod, 8); 1.61 +} 1.62 + 1.63 +static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val2) { 1.64 + uint8x8_t ret; 1.65 + uint32x4_t cmp1, cmp2; 1.66 + uint16x8_t cmp16; 1.67 + uint8x8_t cmp8, cmp8_1; 1.68 + 1.69 + // Test if <= 0 1.70 + cmp1 = vcleq_s32(val1, vdupq_n_s32(0)); 1.71 + cmp2 = vcleq_s32(val2, vdupq_n_s32(0)); 1.72 + cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); 1.73 + cmp8_1 = vmovn_u16(cmp16); 1.74 + 1.75 + // Init to zero 1.76 + ret = vdup_n_u8(0); 1.77 + 1.78 + // Test if >= 255*255 1.79 + cmp1 = vcgeq_s32(val1, vdupq_n_s32(255*255)); 1.80 + cmp2 = vcgeq_s32(val2, vdupq_n_s32(255*255)); 1.81 + cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); 1.82 + cmp8 = vmovn_u16(cmp16); 1.83 + 1.84 + // Insert 255 where true 1.85 + ret = vbsl_u8(cmp8, vdup_n_u8(255), ret); 1.86 + 1.87 + // Calc SkDiv255Round 1.88 + uint8x8_t div = SkDiv255Round_neon8_32_8(val1, val2); 1.89 + 1.90 + // Insert where false and previous test false 1.91 + cmp8 = cmp8 | cmp8_1; 1.92 + ret = vbsl_u8(cmp8, ret, div); 1.93 + 1.94 + // Return the final combination 1.95 + return ret; 1.96 +} 1.97 + 1.98 +//////////////////////////////////////////////////////////////////////////////// 1.99 +// 1 pixel modeprocs 1.100 +//////////////////////////////////////////////////////////////////////////////// 1.101 + 1.102 +// kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc] 1.103 +SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) { 1.104 + unsigned sa = SkGetPackedA32(src); 1.105 + unsigned da = SkGetPackedA32(dst); 1.106 + unsigned isa = 255 - sa; 1.107 + 1.108 + uint8x8_t vda, visa, vsrc, vdst; 1.109 + 1.110 + vda = vdup_n_u8(da); 1.111 + visa = vdup_n_u8(isa); 1.112 + 1.113 + uint16x8_t vsrc_wide, vdst_wide; 1.114 + vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src))); 1.115 + vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst))); 1.116 + 1.117 + vsrc_wide += vdupq_n_u16(128); 1.118 + vsrc_wide += vshrq_n_u16(vsrc_wide, 8); 1.119 + 1.120 + vdst_wide += vdupq_n_u16(128); 1.121 + vdst_wide += vshrq_n_u16(vdst_wide, 8); 1.122 + 1.123 + vsrc = vshrn_n_u16(vsrc_wide, 8); 1.124 + vdst = vshrn_n_u16(vdst_wide, 8); 1.125 + 1.126 + vsrc += vdst; 1.127 + vsrc = vset_lane_u8(da, vsrc, 3); 1.128 + 1.129 + return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); 1.130 +} 1.131 + 1.132 +// kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)] 1.133 +SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) { 1.134 + unsigned sa = SkGetPackedA32(src); 1.135 + unsigned da = SkGetPackedA32(dst); 1.136 + unsigned ida = 255 - da; 1.137 + 1.138 + uint8x8_t vsa, vida, vsrc, vdst; 1.139 + 1.140 + vsa = vdup_n_u8(sa); 1.141 + vida = vdup_n_u8(ida); 1.142 + 1.143 + uint16x8_t vsrc_wide, vdst_wide; 1.144 + vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src))); 1.145 + vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst))); 1.146 + 1.147 + vsrc_wide += vdupq_n_u16(128); 1.148 + vsrc_wide += vshrq_n_u16(vsrc_wide, 8); 1.149 + 1.150 + vdst_wide += vdupq_n_u16(128); 1.151 + vdst_wide += vshrq_n_u16(vdst_wide, 8); 1.152 + 1.153 + vsrc = vshrn_n_u16(vsrc_wide, 8); 1.154 + vdst = vshrn_n_u16(vdst_wide, 8); 1.155 + 1.156 + vsrc += vdst; 1.157 + vsrc = vset_lane_u8(sa, vsrc, 3); 1.158 + 1.159 + return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); 1.160 +} 1.161 + 1.162 +// kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc] 1.163 +SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) { 1.164 + unsigned sa = SkGetPackedA32(src); 1.165 + unsigned da = SkGetPackedA32(dst); 1.166 + unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1); 1.167 + unsigned isa = 255 - sa; 1.168 + unsigned ida = 255 - da; 1.169 + 1.170 + uint8x8_t vsrc, vdst, visa, vida; 1.171 + uint16x8_t vsrc_wide, vdst_wide; 1.172 + 1.173 + visa = vdup_n_u8(isa); 1.174 + vida = vdup_n_u8(ida); 1.175 + vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); 1.176 + vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); 1.177 + 1.178 + vsrc_wide = vmull_u8(vsrc, vida); 1.179 + vdst_wide = vmull_u8(vdst, visa); 1.180 + 1.181 + vsrc_wide += vdupq_n_u16(128); 1.182 + vsrc_wide += vshrq_n_u16(vsrc_wide, 8); 1.183 + 1.184 + vdst_wide += vdupq_n_u16(128); 1.185 + vdst_wide += vshrq_n_u16(vdst_wide, 8); 1.186 + 1.187 + vsrc = vshrn_n_u16(vsrc_wide, 8); 1.188 + vdst = vshrn_n_u16(vdst_wide, 8); 1.189 + 1.190 + vsrc += vdst; 1.191 + 1.192 + vsrc = vset_lane_u8(ret_alpha, vsrc, 3); 1.193 + 1.194 + return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); 1.195 +} 1.196 + 1.197 +// kPlus_Mode 1.198 +SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) { 1.199 + uint8x8_t vsrc, vdst; 1.200 + vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); 1.201 + vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); 1.202 + vsrc = vqadd_u8(vsrc, vdst); 1.203 + 1.204 + return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); 1.205 +} 1.206 + 1.207 +// kModulate_Mode 1.208 +SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) { 1.209 + uint8x8_t vsrc, vdst, vres; 1.210 + uint16x8_t vres_wide; 1.211 + 1.212 + vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); 1.213 + vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); 1.214 + 1.215 + vres_wide = vmull_u8(vsrc, vdst); 1.216 + 1.217 + vres_wide += vdupq_n_u16(128); 1.218 + vres_wide += vshrq_n_u16(vres_wide, 8); 1.219 + 1.220 + vres = vshrn_n_u16(vres_wide, 8); 1.221 + 1.222 + return vget_lane_u32(vreinterpret_u32_u8(vres), 0); 1.223 +} 1.224 + 1.225 +//////////////////////////////////////////////////////////////////////////////// 1.226 +// 8 pixels modeprocs 1.227 +//////////////////////////////////////////////////////////////////////////////// 1.228 + 1.229 +uint8x8x4_t dstover_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.230 + uint8x8x4_t ret; 1.231 + uint16x8_t src_scale; 1.232 + 1.233 + src_scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]); 1.234 + 1.235 + ret.val[NEON_A] = dst.val[NEON_A] + SkAlphaMul_neon8(src.val[NEON_A], src_scale); 1.236 + ret.val[NEON_R] = dst.val[NEON_R] + SkAlphaMul_neon8(src.val[NEON_R], src_scale); 1.237 + ret.val[NEON_G] = dst.val[NEON_G] + SkAlphaMul_neon8(src.val[NEON_G], src_scale); 1.238 + ret.val[NEON_B] = dst.val[NEON_B] + SkAlphaMul_neon8(src.val[NEON_B], src_scale); 1.239 + 1.240 + return ret; 1.241 +} 1.242 + 1.243 +uint8x8x4_t srcin_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.244 + uint8x8x4_t ret; 1.245 + uint16x8_t scale; 1.246 + 1.247 + scale = SkAlpha255To256_neon8(dst.val[NEON_A]); 1.248 + 1.249 + ret.val[NEON_A] = SkAlphaMul_neon8(src.val[NEON_A], scale); 1.250 + ret.val[NEON_R] = SkAlphaMul_neon8(src.val[NEON_R], scale); 1.251 + ret.val[NEON_G] = SkAlphaMul_neon8(src.val[NEON_G], scale); 1.252 + ret.val[NEON_B] = SkAlphaMul_neon8(src.val[NEON_B], scale); 1.253 + 1.254 + return ret; 1.255 +} 1.256 + 1.257 +uint8x8x4_t dstin_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.258 + uint8x8x4_t ret; 1.259 + uint16x8_t scale; 1.260 + 1.261 + scale = SkAlpha255To256_neon8(src.val[NEON_A]); 1.262 + 1.263 + ret = SkAlphaMulQ_neon8(dst, scale); 1.264 + 1.265 + return ret; 1.266 +} 1.267 + 1.268 +uint8x8x4_t srcout_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.269 + uint8x8x4_t ret; 1.270 + uint16x8_t scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]); 1.271 + 1.272 + ret = SkAlphaMulQ_neon8(src, scale); 1.273 + 1.274 + return ret; 1.275 +} 1.276 + 1.277 +uint8x8x4_t dstout_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.278 + uint8x8x4_t ret; 1.279 + uint16x8_t scale = vsubw_u8(vdupq_n_u16(256), src.val[NEON_A]); 1.280 + 1.281 + ret = SkAlphaMulQ_neon8(dst, scale); 1.282 + 1.283 + return ret; 1.284 +} 1.285 + 1.286 +uint8x8x4_t srcatop_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.287 + uint8x8x4_t ret; 1.288 + uint8x8_t isa; 1.289 + 1.290 + isa = vsub_u8(vdup_n_u8(255), src.val[NEON_A]); 1.291 + 1.292 + ret.val[NEON_A] = dst.val[NEON_A]; 1.293 + ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], dst.val[NEON_A]) 1.294 + + SkAlphaMulAlpha_neon8(dst.val[NEON_R], isa); 1.295 + ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], dst.val[NEON_A]) 1.296 + + SkAlphaMulAlpha_neon8(dst.val[NEON_G], isa); 1.297 + ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], dst.val[NEON_A]) 1.298 + + SkAlphaMulAlpha_neon8(dst.val[NEON_B], isa); 1.299 + 1.300 + return ret; 1.301 +} 1.302 + 1.303 +uint8x8x4_t dstatop_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.304 + uint8x8x4_t ret; 1.305 + uint8x8_t ida; 1.306 + 1.307 + ida = vsub_u8(vdup_n_u8(255), dst.val[NEON_A]); 1.308 + 1.309 + ret.val[NEON_A] = src.val[NEON_A]; 1.310 + ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], ida) 1.311 + + SkAlphaMulAlpha_neon8(dst.val[NEON_R], src.val[NEON_A]); 1.312 + ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], ida) 1.313 + + SkAlphaMulAlpha_neon8(dst.val[NEON_G], src.val[NEON_A]); 1.314 + ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], ida) 1.315 + + SkAlphaMulAlpha_neon8(dst.val[NEON_B], src.val[NEON_A]); 1.316 + 1.317 + return ret; 1.318 +} 1.319 + 1.320 +uint8x8x4_t xor_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.321 + uint8x8x4_t ret; 1.322 + uint8x8_t isa, ida; 1.323 + uint16x8_t tmp_wide, tmp_wide2; 1.324 + 1.325 + isa = vsub_u8(vdup_n_u8(255), src.val[NEON_A]); 1.326 + ida = vsub_u8(vdup_n_u8(255), dst.val[NEON_A]); 1.327 + 1.328 + // First calc alpha 1.329 + tmp_wide = vmovl_u8(src.val[NEON_A]); 1.330 + tmp_wide = vaddw_u8(tmp_wide, dst.val[NEON_A]); 1.331 + tmp_wide2 = vshll_n_u8(SkAlphaMulAlpha_neon8(src.val[NEON_A], dst.val[NEON_A]), 1); 1.332 + tmp_wide = vsubq_u16(tmp_wide, tmp_wide2); 1.333 + ret.val[NEON_A] = vmovn_u16(tmp_wide); 1.334 + 1.335 + // Then colors 1.336 + ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], ida) 1.337 + + SkAlphaMulAlpha_neon8(dst.val[NEON_R], isa); 1.338 + ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], ida) 1.339 + + SkAlphaMulAlpha_neon8(dst.val[NEON_G], isa); 1.340 + ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], ida) 1.341 + + SkAlphaMulAlpha_neon8(dst.val[NEON_B], isa); 1.342 + 1.343 + return ret; 1.344 +} 1.345 + 1.346 +uint8x8x4_t plus_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.347 + uint8x8x4_t ret; 1.348 + 1.349 + ret.val[NEON_A] = vqadd_u8(src.val[NEON_A], dst.val[NEON_A]); 1.350 + ret.val[NEON_R] = vqadd_u8(src.val[NEON_R], dst.val[NEON_R]); 1.351 + ret.val[NEON_G] = vqadd_u8(src.val[NEON_G], dst.val[NEON_G]); 1.352 + ret.val[NEON_B] = vqadd_u8(src.val[NEON_B], dst.val[NEON_B]); 1.353 + 1.354 + return ret; 1.355 +} 1.356 + 1.357 +uint8x8x4_t modulate_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.358 + uint8x8x4_t ret; 1.359 + 1.360 + ret.val[NEON_A] = SkAlphaMulAlpha_neon8(src.val[NEON_A], dst.val[NEON_A]); 1.361 + ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], dst.val[NEON_R]); 1.362 + ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], dst.val[NEON_G]); 1.363 + ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], dst.val[NEON_B]); 1.364 + 1.365 + return ret; 1.366 +} 1.367 + 1.368 +static inline uint8x8_t srcover_color(uint8x8_t a, uint8x8_t b) { 1.369 + uint16x8_t tmp; 1.370 + 1.371 + tmp = vaddl_u8(a, b); 1.372 + tmp -= SkAlphaMulAlpha_neon8_16(a, b); 1.373 + 1.374 + return vmovn_u16(tmp); 1.375 +} 1.376 + 1.377 +uint8x8x4_t screen_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.378 + uint8x8x4_t ret; 1.379 + 1.380 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.381 + ret.val[NEON_R] = srcover_color(src.val[NEON_R], dst.val[NEON_R]); 1.382 + ret.val[NEON_G] = srcover_color(src.val[NEON_G], dst.val[NEON_G]); 1.383 + ret.val[NEON_B] = srcover_color(src.val[NEON_B], dst.val[NEON_B]); 1.384 + 1.385 + return ret; 1.386 +} 1.387 + 1.388 +template <bool overlay> 1.389 +static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc, 1.390 + uint8x8_t sa, uint8x8_t da) { 1.391 + /* 1.392 + * In the end we're gonna use (rc + tmp) with a different rc 1.393 + * coming from an alternative. 1.394 + * The whole value (rc + tmp) can always be expressed as 1.395 + * VAL = COM - SUB in the if case 1.396 + * VAL = COM + SUB - sa*da in the else case 1.397 + * 1.398 + * with COM = 255 * (sc + dc) 1.399 + * and SUB = sc*da + dc*sa - 2*dc*sc 1.400 + */ 1.401 + 1.402 + // Prepare common subexpressions 1.403 + uint16x8_t const255 = vdupq_n_u16(255); 1.404 + uint16x8_t sc_plus_dc = vaddl_u8(sc, dc); 1.405 + uint16x8_t scda = vmull_u8(sc, da); 1.406 + uint16x8_t dcsa = vmull_u8(dc, sa); 1.407 + uint16x8_t sada = vmull_u8(sa, da); 1.408 + 1.409 + // Prepare non common subexpressions 1.410 + uint16x8_t dc2, sc2; 1.411 + uint32x4_t scdc2_1, scdc2_2; 1.412 + if (overlay) { 1.413 + dc2 = vshll_n_u8(dc, 1); 1.414 + scdc2_1 = vmull_u16(vget_low_u16(dc2), vget_low_u16(vmovl_u8(sc))); 1.415 + scdc2_2 = vmull_u16(vget_high_u16(dc2), vget_high_u16(vmovl_u8(sc))); 1.416 + } else { 1.417 + sc2 = vshll_n_u8(sc, 1); 1.418 + scdc2_1 = vmull_u16(vget_low_u16(sc2), vget_low_u16(vmovl_u8(dc))); 1.419 + scdc2_2 = vmull_u16(vget_high_u16(sc2), vget_high_u16(vmovl_u8(dc))); 1.420 + } 1.421 + 1.422 + // Calc COM 1.423 + int32x4_t com1, com2; 1.424 + com1 = vreinterpretq_s32_u32( 1.425 + vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); 1.426 + com2 = vreinterpretq_s32_u32( 1.427 + vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); 1.428 + 1.429 + // Calc SUB 1.430 + int32x4_t sub1, sub2; 1.431 + sub1 = vreinterpretq_s32_u32(vaddl_u16(vget_low_u16(scda), vget_low_u16(dcsa))); 1.432 + sub2 = vreinterpretq_s32_u32(vaddl_u16(vget_high_u16(scda), vget_high_u16(dcsa))); 1.433 + sub1 = vsubq_s32(sub1, vreinterpretq_s32_u32(scdc2_1)); 1.434 + sub2 = vsubq_s32(sub2, vreinterpretq_s32_u32(scdc2_2)); 1.435 + 1.436 + // Compare 2*dc <= da 1.437 + uint16x8_t cmp; 1.438 + 1.439 + if (overlay) { 1.440 + cmp = vcleq_u16(dc2, vmovl_u8(da)); 1.441 + } else { 1.442 + cmp = vcleq_u16(sc2, vmovl_u8(sa)); 1.443 + } 1.444 + 1.445 + // Prepare variables 1.446 + int32x4_t val1_1, val1_2; 1.447 + int32x4_t val2_1, val2_2; 1.448 + uint32x4_t cmp1, cmp2; 1.449 + 1.450 + cmp1 = vmovl_u16(vget_low_u16(cmp)); 1.451 + cmp1 |= vshlq_n_u32(cmp1, 16); 1.452 + cmp2 = vmovl_u16(vget_high_u16(cmp)); 1.453 + cmp2 |= vshlq_n_u32(cmp2, 16); 1.454 + 1.455 + // Calc COM - SUB 1.456 + val1_1 = com1 - sub1; 1.457 + val1_2 = com2 - sub2; 1.458 + 1.459 + // Calc COM + SUB - sa*da 1.460 + val2_1 = com1 + sub1; 1.461 + val2_2 = com2 + sub2; 1.462 + 1.463 + val2_1 = vsubq_s32(val2_1, vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sada)))); 1.464 + val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sada)))); 1.465 + 1.466 + // Insert where needed 1.467 + val1_1 = vbslq_s32(cmp1, val1_1, val2_1); 1.468 + val1_2 = vbslq_s32(cmp2, val1_2, val2_2); 1.469 + 1.470 + // Call the clamp_div255round function 1.471 + return clamp_div255round_simd8_32(val1_1, val1_2); 1.472 +} 1.473 + 1.474 +static inline uint8x8_t overlay_color(uint8x8_t sc, uint8x8_t dc, 1.475 + uint8x8_t sa, uint8x8_t da) { 1.476 + return overlay_hardlight_color<true>(sc, dc, sa, da); 1.477 +} 1.478 + 1.479 +uint8x8x4_t overlay_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.480 + uint8x8x4_t ret; 1.481 + 1.482 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.483 + ret.val[NEON_R] = overlay_color(src.val[NEON_R], dst.val[NEON_R], 1.484 + src.val[NEON_A], dst.val[NEON_A]); 1.485 + ret.val[NEON_G] = overlay_color(src.val[NEON_G], dst.val[NEON_G], 1.486 + src.val[NEON_A], dst.val[NEON_A]); 1.487 + ret.val[NEON_B] = overlay_color(src.val[NEON_B], dst.val[NEON_B], 1.488 + src.val[NEON_A], dst.val[NEON_A]); 1.489 + 1.490 + return ret; 1.491 +} 1.492 + 1.493 +template <bool lighten> 1.494 +static inline uint8x8_t lighten_darken_color(uint8x8_t sc, uint8x8_t dc, 1.495 + uint8x8_t sa, uint8x8_t da) { 1.496 + uint16x8_t sd, ds, cmp, tmp, tmp2; 1.497 + 1.498 + // Prepare 1.499 + sd = vmull_u8(sc, da); 1.500 + ds = vmull_u8(dc, sa); 1.501 + 1.502 + // Do test 1.503 + if (lighten) { 1.504 + cmp = vcgtq_u16(sd, ds); 1.505 + } else { 1.506 + cmp = vcltq_u16(sd, ds); 1.507 + } 1.508 + 1.509 + // Assign if 1.510 + tmp = vaddl_u8(sc, dc); 1.511 + tmp2 = tmp; 1.512 + tmp -= SkDiv255Round_neon8_16_16(ds); 1.513 + 1.514 + // Calc else 1.515 + tmp2 -= SkDiv255Round_neon8_16_16(sd); 1.516 + 1.517 + // Insert where needed 1.518 + tmp = vbslq_u16(cmp, tmp, tmp2); 1.519 + 1.520 + return vmovn_u16(tmp); 1.521 +} 1.522 + 1.523 +static inline uint8x8_t darken_color(uint8x8_t sc, uint8x8_t dc, 1.524 + uint8x8_t sa, uint8x8_t da) { 1.525 + return lighten_darken_color<false>(sc, dc, sa, da); 1.526 +} 1.527 + 1.528 +uint8x8x4_t darken_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.529 + uint8x8x4_t ret; 1.530 + 1.531 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.532 + ret.val[NEON_R] = darken_color(src.val[NEON_R], dst.val[NEON_R], 1.533 + src.val[NEON_A], dst.val[NEON_A]); 1.534 + ret.val[NEON_G] = darken_color(src.val[NEON_G], dst.val[NEON_G], 1.535 + src.val[NEON_A], dst.val[NEON_A]); 1.536 + ret.val[NEON_B] = darken_color(src.val[NEON_B], dst.val[NEON_B], 1.537 + src.val[NEON_A], dst.val[NEON_A]); 1.538 + 1.539 + return ret; 1.540 +} 1.541 + 1.542 +static inline uint8x8_t lighten_color(uint8x8_t sc, uint8x8_t dc, 1.543 + uint8x8_t sa, uint8x8_t da) { 1.544 + return lighten_darken_color<true>(sc, dc, sa, da); 1.545 +} 1.546 + 1.547 +uint8x8x4_t lighten_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.548 + uint8x8x4_t ret; 1.549 + 1.550 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.551 + ret.val[NEON_R] = lighten_color(src.val[NEON_R], dst.val[NEON_R], 1.552 + src.val[NEON_A], dst.val[NEON_A]); 1.553 + ret.val[NEON_G] = lighten_color(src.val[NEON_G], dst.val[NEON_G], 1.554 + src.val[NEON_A], dst.val[NEON_A]); 1.555 + ret.val[NEON_B] = lighten_color(src.val[NEON_B], dst.val[NEON_B], 1.556 + src.val[NEON_A], dst.val[NEON_A]); 1.557 + 1.558 + return ret; 1.559 +} 1.560 + 1.561 +static inline uint8x8_t hardlight_color(uint8x8_t sc, uint8x8_t dc, 1.562 + uint8x8_t sa, uint8x8_t da) { 1.563 + return overlay_hardlight_color<false>(sc, dc, sa, da); 1.564 +} 1.565 + 1.566 +uint8x8x4_t hardlight_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.567 + uint8x8x4_t ret; 1.568 + 1.569 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.570 + ret.val[NEON_R] = hardlight_color(src.val[NEON_R], dst.val[NEON_R], 1.571 + src.val[NEON_A], dst.val[NEON_A]); 1.572 + ret.val[NEON_G] = hardlight_color(src.val[NEON_G], dst.val[NEON_G], 1.573 + src.val[NEON_A], dst.val[NEON_A]); 1.574 + ret.val[NEON_B] = hardlight_color(src.val[NEON_B], dst.val[NEON_B], 1.575 + src.val[NEON_A], dst.val[NEON_A]); 1.576 + 1.577 + return ret; 1.578 +} 1.579 + 1.580 +static inline uint8x8_t difference_color(uint8x8_t sc, uint8x8_t dc, 1.581 + uint8x8_t sa, uint8x8_t da) { 1.582 + uint16x8_t sd, ds, tmp; 1.583 + int16x8_t val; 1.584 + 1.585 + sd = vmull_u8(sc, da); 1.586 + ds = vmull_u8(dc, sa); 1.587 + 1.588 + tmp = vminq_u16(sd, ds); 1.589 + tmp = SkDiv255Round_neon8_16_16(tmp); 1.590 + tmp = vshlq_n_u16(tmp, 1); 1.591 + 1.592 + val = vreinterpretq_s16_u16(vaddl_u8(sc, dc)); 1.593 + 1.594 + val -= vreinterpretq_s16_u16(tmp); 1.595 + 1.596 + val = vmaxq_s16(val, vdupq_n_s16(0)); 1.597 + val = vminq_s16(val, vdupq_n_s16(255)); 1.598 + 1.599 + return vmovn_u16(vreinterpretq_u16_s16(val)); 1.600 +} 1.601 + 1.602 +uint8x8x4_t difference_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.603 + uint8x8x4_t ret; 1.604 + 1.605 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.606 + ret.val[NEON_R] = difference_color(src.val[NEON_R], dst.val[NEON_R], 1.607 + src.val[NEON_A], dst.val[NEON_A]); 1.608 + ret.val[NEON_G] = difference_color(src.val[NEON_G], dst.val[NEON_G], 1.609 + src.val[NEON_A], dst.val[NEON_A]); 1.610 + ret.val[NEON_B] = difference_color(src.val[NEON_B], dst.val[NEON_B], 1.611 + src.val[NEON_A], dst.val[NEON_A]); 1.612 + 1.613 + return ret; 1.614 +} 1.615 + 1.616 +static inline uint8x8_t exclusion_color(uint8x8_t sc, uint8x8_t dc, 1.617 + uint8x8_t sa, uint8x8_t da) { 1.618 + /* The equation can be simplified to 255(sc + dc) - 2 * sc * dc */ 1.619 + 1.620 + uint16x8_t sc_plus_dc, scdc, const255; 1.621 + int32x4_t term1_1, term1_2, term2_1, term2_2; 1.622 + 1.623 + /* Calc (sc + dc) and (sc * dc) */ 1.624 + sc_plus_dc = vaddl_u8(sc, dc); 1.625 + scdc = vmull_u8(sc, dc); 1.626 + 1.627 + /* Prepare constants */ 1.628 + const255 = vdupq_n_u16(255); 1.629 + 1.630 + /* Calc the first term */ 1.631 + term1_1 = vreinterpretq_s32_u32( 1.632 + vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); 1.633 + term1_2 = vreinterpretq_s32_u32( 1.634 + vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); 1.635 + 1.636 + /* Calc the second term */ 1.637 + term2_1 = vreinterpretq_s32_u32(vshll_n_u16(vget_low_u16(scdc), 1)); 1.638 + term2_2 = vreinterpretq_s32_u32(vshll_n_u16(vget_high_u16(scdc), 1)); 1.639 + 1.640 + return clamp_div255round_simd8_32(term1_1 - term2_1, term1_2 - term2_2); 1.641 +} 1.642 + 1.643 +uint8x8x4_t exclusion_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.644 + uint8x8x4_t ret; 1.645 + 1.646 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.647 + ret.val[NEON_R] = exclusion_color(src.val[NEON_R], dst.val[NEON_R], 1.648 + src.val[NEON_A], dst.val[NEON_A]); 1.649 + ret.val[NEON_G] = exclusion_color(src.val[NEON_G], dst.val[NEON_G], 1.650 + src.val[NEON_A], dst.val[NEON_A]); 1.651 + ret.val[NEON_B] = exclusion_color(src.val[NEON_B], dst.val[NEON_B], 1.652 + src.val[NEON_A], dst.val[NEON_A]); 1.653 + 1.654 + return ret; 1.655 +} 1.656 + 1.657 +static inline uint8x8_t blendfunc_multiply_color(uint8x8_t sc, uint8x8_t dc, 1.658 + uint8x8_t sa, uint8x8_t da) { 1.659 + uint32x4_t val1, val2; 1.660 + uint16x8_t scdc, t1, t2; 1.661 + 1.662 + t1 = vmull_u8(sc, vdup_n_u8(255) - da); 1.663 + t2 = vmull_u8(dc, vdup_n_u8(255) - sa); 1.664 + scdc = vmull_u8(sc, dc); 1.665 + 1.666 + val1 = vaddl_u16(vget_low_u16(t1), vget_low_u16(t2)); 1.667 + val2 = vaddl_u16(vget_high_u16(t1), vget_high_u16(t2)); 1.668 + 1.669 + val1 = vaddw_u16(val1, vget_low_u16(scdc)); 1.670 + val2 = vaddw_u16(val2, vget_high_u16(scdc)); 1.671 + 1.672 + return clamp_div255round_simd8_32( 1.673 + vreinterpretq_s32_u32(val1), vreinterpretq_s32_u32(val2)); 1.674 +} 1.675 + 1.676 +uint8x8x4_t multiply_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { 1.677 + uint8x8x4_t ret; 1.678 + 1.679 + ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]); 1.680 + ret.val[NEON_R] = blendfunc_multiply_color(src.val[NEON_R], dst.val[NEON_R], 1.681 + src.val[NEON_A], dst.val[NEON_A]); 1.682 + ret.val[NEON_G] = blendfunc_multiply_color(src.val[NEON_G], dst.val[NEON_G], 1.683 + src.val[NEON_A], dst.val[NEON_A]); 1.684 + ret.val[NEON_B] = blendfunc_multiply_color(src.val[NEON_B], dst.val[NEON_B], 1.685 + src.val[NEON_A], dst.val[NEON_A]); 1.686 + 1.687 + return ret; 1.688 +} 1.689 + 1.690 +//////////////////////////////////////////////////////////////////////////////// 1.691 + 1.692 +typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst); 1.693 + 1.694 +extern SkXfermodeProcSIMD gNEONXfermodeProcs[]; 1.695 + 1.696 +SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer) 1.697 + : INHERITED(buffer) { 1.698 + fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]); 1.699 +} 1.700 + 1.701 +void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[], 1.702 + int count, const SkAlpha aa[]) const { 1.703 + SkASSERT(dst && src && count >= 0); 1.704 + 1.705 + SkXfermodeProc proc = this->getProc(); 1.706 + SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD); 1.707 + SkASSERT(procSIMD != NULL); 1.708 + 1.709 + if (NULL == aa) { 1.710 + // Unrolled NEON code 1.711 + while (count >= 8) { 1.712 + uint8x8x4_t vsrc, vdst, vres; 1.713 + 1.714 +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 1.715 + asm volatile ( 1.716 + "vld4.u8 %h[vsrc], [%[src]]! \t\n" 1.717 + "vld4.u8 %h[vdst], [%[dst]] \t\n" 1.718 + : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src) 1.719 + : [dst] "r" (dst) 1.720 + : 1.721 + ); 1.722 +#else 1.723 + register uint8x8_t d0 asm("d0"); 1.724 + register uint8x8_t d1 asm("d1"); 1.725 + register uint8x8_t d2 asm("d2"); 1.726 + register uint8x8_t d3 asm("d3"); 1.727 + register uint8x8_t d4 asm("d4"); 1.728 + register uint8x8_t d5 asm("d5"); 1.729 + register uint8x8_t d6 asm("d6"); 1.730 + register uint8x8_t d7 asm("d7"); 1.731 + 1.732 + asm volatile ( 1.733 + "vld4.u8 {d0-d3},[%[src]]!;" 1.734 + "vld4.u8 {d4-d7},[%[dst]];" 1.735 + : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 1.736 + "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7), 1.737 + [src] "+&r" (src) 1.738 + : [dst] "r" (dst) 1.739 + : 1.740 + ); 1.741 + vsrc.val[0] = d0; vdst.val[0] = d4; 1.742 + vsrc.val[1] = d1; vdst.val[1] = d5; 1.743 + vsrc.val[2] = d2; vdst.val[2] = d6; 1.744 + vsrc.val[3] = d3; vdst.val[3] = d7; 1.745 +#endif 1.746 + 1.747 + vres = procSIMD(vsrc, vdst); 1.748 + 1.749 + vst4_u8((uint8_t*)dst, vres); 1.750 + 1.751 + count -= 8; 1.752 + dst += 8; 1.753 + } 1.754 + // Leftovers 1.755 + for (int i = 0; i < count; i++) { 1.756 + dst[i] = proc(src[i], dst[i]); 1.757 + } 1.758 + } else { 1.759 + for (int i = count - 1; i >= 0; --i) { 1.760 + unsigned a = aa[i]; 1.761 + if (0 != a) { 1.762 + SkPMColor dstC = dst[i]; 1.763 + SkPMColor C = proc(src[i], dstC); 1.764 + if (a != 0xFF) { 1.765 + C = SkFourByteInterp_neon(C, dstC, a); 1.766 + } 1.767 + dst[i] = C; 1.768 + } 1.769 + } 1.770 + } 1.771 +} 1.772 + 1.773 +void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst, 1.774 + const SkPMColor* SK_RESTRICT src, int count, 1.775 + const SkAlpha* SK_RESTRICT aa) const { 1.776 + SkASSERT(dst && src && count >= 0); 1.777 + 1.778 + SkXfermodeProc proc = this->getProc(); 1.779 + SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD); 1.780 + SkASSERT(procSIMD != NULL); 1.781 + 1.782 + if (NULL == aa) { 1.783 + while(count >= 8) { 1.784 + uint16x8_t vdst, vres16; 1.785 + uint8x8x4_t vdst32, vsrc, vres; 1.786 + 1.787 + vdst = vld1q_u16(dst); 1.788 + 1.789 +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 1.790 + asm volatile ( 1.791 + "vld4.u8 %h[vsrc], [%[src]]! \t\n" 1.792 + : [vsrc] "=w" (vsrc), [src] "+&r" (src) 1.793 + : : 1.794 + ); 1.795 +#else 1.796 + register uint8x8_t d0 asm("d0"); 1.797 + register uint8x8_t d1 asm("d1"); 1.798 + register uint8x8_t d2 asm("d2"); 1.799 + register uint8x8_t d3 asm("d3"); 1.800 + 1.801 + asm volatile ( 1.802 + "vld4.u8 {d0-d3},[%[src]]!;" 1.803 + : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 1.804 + [src] "+&r" (src) 1.805 + : : 1.806 + ); 1.807 + vsrc.val[0] = d0; 1.808 + vsrc.val[1] = d1; 1.809 + vsrc.val[2] = d2; 1.810 + vsrc.val[3] = d3; 1.811 +#endif 1.812 + 1.813 + vdst32 = SkPixel16ToPixel32_neon8(vdst); 1.814 + vres = procSIMD(vsrc, vdst32); 1.815 + vres16 = SkPixel32ToPixel16_neon8(vres); 1.816 + 1.817 + vst1q_u16(dst, vres16); 1.818 + 1.819 + count -= 8; 1.820 + dst += 8; 1.821 + } 1.822 + for (int i = 0; i < count; i++) { 1.823 + SkPMColor dstC = SkPixel16ToPixel32(dst[i]); 1.824 + dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC)); 1.825 + } 1.826 + } else { 1.827 + for (int i = count - 1; i >= 0; --i) { 1.828 + unsigned a = aa[i]; 1.829 + if (0 != a) { 1.830 + SkPMColor dstC = SkPixel16ToPixel32(dst[i]); 1.831 + SkPMColor C = proc(src[i], dstC); 1.832 + if (0xFF != a) { 1.833 + C = SkFourByteInterp_neon(C, dstC, a); 1.834 + } 1.835 + dst[i] = SkPixel32ToPixel16_ToU16(C); 1.836 + } 1.837 + } 1.838 + } 1.839 +} 1.840 + 1.841 +#ifndef SK_IGNORE_TO_STRING 1.842 +void SkNEONProcCoeffXfermode::toString(SkString* str) const { 1.843 + this->INHERITED::toString(str); 1.844 +} 1.845 +#endif 1.846 + 1.847 +//////////////////////////////////////////////////////////////////////////////// 1.848 + 1.849 +SkXfermodeProcSIMD gNEONXfermodeProcs[] = { 1.850 + NULL, // kClear_Mode 1.851 + NULL, // kSrc_Mode 1.852 + NULL, // kDst_Mode 1.853 + NULL, // kSrcOver_Mode 1.854 + dstover_modeproc_neon8, 1.855 + srcin_modeproc_neon8, 1.856 + dstin_modeproc_neon8, 1.857 + srcout_modeproc_neon8, 1.858 + dstout_modeproc_neon8, 1.859 + srcatop_modeproc_neon8, 1.860 + dstatop_modeproc_neon8, 1.861 + xor_modeproc_neon8, 1.862 + plus_modeproc_neon8, 1.863 + modulate_modeproc_neon8, 1.864 + screen_modeproc_neon8, 1.865 + 1.866 + overlay_modeproc_neon8, 1.867 + darken_modeproc_neon8, 1.868 + lighten_modeproc_neon8, 1.869 + NULL, // kColorDodge_Mode 1.870 + NULL, // kColorBurn_Mode 1.871 + hardlight_modeproc_neon8, 1.872 + NULL, // kSoftLight_Mode 1.873 + difference_modeproc_neon8, 1.874 + exclusion_modeproc_neon8, 1.875 + multiply_modeproc_neon8, 1.876 + 1.877 + NULL, // kHue_Mode 1.878 + NULL, // kSaturation_Mode 1.879 + NULL, // kColor_Mode 1.880 + NULL, // kLuminosity_Mode 1.881 +}; 1.882 + 1.883 +SK_COMPILE_ASSERT( 1.884 + SK_ARRAY_COUNT(gNEONXfermodeProcs) == SkXfermode::kLastMode + 1, 1.885 + mode_count_arm 1.886 +); 1.887 + 1.888 +SkXfermodeProc gNEONXfermodeProcs1[] = { 1.889 + NULL, // kClear_Mode 1.890 + NULL, // kSrc_Mode 1.891 + NULL, // kDst_Mode 1.892 + NULL, // kSrcOver_Mode 1.893 + NULL, // kDstOver_Mode 1.894 + NULL, // kSrcIn_Mode 1.895 + NULL, // kDstIn_Mode 1.896 + NULL, // kSrcOut_Mode 1.897 + NULL, // kDstOut_Mode 1.898 + srcatop_modeproc_neon, 1.899 + dstatop_modeproc_neon, 1.900 + xor_modeproc_neon, 1.901 + plus_modeproc_neon, 1.902 + modulate_modeproc_neon, 1.903 + NULL, // kScreen_Mode 1.904 + 1.905 + NULL, // kOverlay_Mode 1.906 + NULL, // kDarken_Mode 1.907 + NULL, // kLighten_Mode 1.908 + NULL, // kColorDodge_Mode 1.909 + NULL, // kColorBurn_Mode 1.910 + NULL, // kHardLight_Mode 1.911 + NULL, // kSoftLight_Mode 1.912 + NULL, // kDifference_Mode 1.913 + NULL, // kExclusion_Mode 1.914 + NULL, // kMultiply_Mode 1.915 + 1.916 + NULL, // kHue_Mode 1.917 + NULL, // kSaturation_Mode 1.918 + NULL, // kColor_Mode 1.919 + NULL, // kLuminosity_Mode 1.920 +}; 1.921 + 1.922 +SK_COMPILE_ASSERT( 1.923 + SK_ARRAY_COUNT(gNEONXfermodeProcs1) == SkXfermode::kLastMode + 1, 1.924 + mode1_count_arm 1.925 +); 1.926 + 1.927 +SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, 1.928 + SkXfermode::Mode mode) { 1.929 + 1.930 + void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); 1.931 + 1.932 + if (procSIMD != NULL) { 1.933 + return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); 1.934 + } 1.935 + return NULL; 1.936 +} 1.937 + 1.938 +SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) { 1.939 + return gNEONXfermodeProcs1[mode]; 1.940 +}