gfx/skia/trunk/src/opts/SkXfermode_opts_arm_neon.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #include "SkXfermode.h"
michael@0 2 #include "SkXfermode_proccoeff.h"
michael@0 3 #include "SkColorPriv.h"
michael@0 4
michael@0 5 #include <arm_neon.h>
michael@0 6 #include "SkColor_opts_neon.h"
michael@0 7 #include "SkXfermode_opts_arm_neon.h"
michael@0 8
michael@0 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b)
michael@0 10
michael@0 11
michael@0 12 ////////////////////////////////////////////////////////////////////////////////
michael@0 13 // NEONized skia functions
michael@0 14 ////////////////////////////////////////////////////////////////////////////////
michael@0 15
michael@0 16 static inline uint8x8_t SkAlphaMulAlpha_neon8(uint8x8_t color, uint8x8_t alpha) {
michael@0 17 uint16x8_t tmp;
michael@0 18 uint8x8_t ret;
michael@0 19
michael@0 20 tmp = vmull_u8(color, alpha);
michael@0 21 tmp = vaddq_u16(tmp, vdupq_n_u16(128));
michael@0 22 tmp = vaddq_u16(tmp, vshrq_n_u16(tmp, 8));
michael@0 23
michael@0 24 ret = vshrn_n_u16(tmp, 8);
michael@0 25
michael@0 26 return ret;
michael@0 27 }
michael@0 28
michael@0 29 static inline uint16x8_t SkAlphaMulAlpha_neon8_16(uint8x8_t color, uint8x8_t alpha) {
michael@0 30 uint16x8_t ret;
michael@0 31
michael@0 32 ret = vmull_u8(color, alpha);
michael@0 33 ret = vaddq_u16(ret, vdupq_n_u16(128));
michael@0 34 ret = vaddq_u16(ret, vshrq_n_u16(ret, 8));
michael@0 35
michael@0 36 ret = vshrq_n_u16(ret, 8);
michael@0 37
michael@0 38 return ret;
michael@0 39 }
michael@0 40
michael@0 41 static inline uint8x8_t SkDiv255Round_neon8_32_8(int32x4_t p1, int32x4_t p2) {
michael@0 42 uint16x8_t tmp;
michael@0 43
michael@0 44 tmp = vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(p1)),
michael@0 45 vmovn_u32(vreinterpretq_u32_s32(p2)));
michael@0 46
michael@0 47 tmp += vdupq_n_u16(128);
michael@0 48 tmp += vshrq_n_u16(tmp, 8);
michael@0 49
michael@0 50 return vshrn_n_u16(tmp, 8);
michael@0 51 }
michael@0 52
michael@0 53 static inline uint16x8_t SkDiv255Round_neon8_16_16(uint16x8_t prod) {
michael@0 54 prod += vdupq_n_u16(128);
michael@0 55 prod += vshrq_n_u16(prod, 8);
michael@0 56
michael@0 57 return vshrq_n_u16(prod, 8);
michael@0 58 }
michael@0 59
michael@0 60 static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val2) {
michael@0 61 uint8x8_t ret;
michael@0 62 uint32x4_t cmp1, cmp2;
michael@0 63 uint16x8_t cmp16;
michael@0 64 uint8x8_t cmp8, cmp8_1;
michael@0 65
michael@0 66 // Test if <= 0
michael@0 67 cmp1 = vcleq_s32(val1, vdupq_n_s32(0));
michael@0 68 cmp2 = vcleq_s32(val2, vdupq_n_s32(0));
michael@0 69 cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2));
michael@0 70 cmp8_1 = vmovn_u16(cmp16);
michael@0 71
michael@0 72 // Init to zero
michael@0 73 ret = vdup_n_u8(0);
michael@0 74
michael@0 75 // Test if >= 255*255
michael@0 76 cmp1 = vcgeq_s32(val1, vdupq_n_s32(255*255));
michael@0 77 cmp2 = vcgeq_s32(val2, vdupq_n_s32(255*255));
michael@0 78 cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2));
michael@0 79 cmp8 = vmovn_u16(cmp16);
michael@0 80
michael@0 81 // Insert 255 where true
michael@0 82 ret = vbsl_u8(cmp8, vdup_n_u8(255), ret);
michael@0 83
michael@0 84 // Calc SkDiv255Round
michael@0 85 uint8x8_t div = SkDiv255Round_neon8_32_8(val1, val2);
michael@0 86
michael@0 87 // Insert where false and previous test false
michael@0 88 cmp8 = cmp8 | cmp8_1;
michael@0 89 ret = vbsl_u8(cmp8, ret, div);
michael@0 90
michael@0 91 // Return the final combination
michael@0 92 return ret;
michael@0 93 }
michael@0 94
michael@0 95 ////////////////////////////////////////////////////////////////////////////////
michael@0 96 // 1 pixel modeprocs
michael@0 97 ////////////////////////////////////////////////////////////////////////////////
michael@0 98
michael@0 99 // kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
michael@0 100 SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
michael@0 101 unsigned sa = SkGetPackedA32(src);
michael@0 102 unsigned da = SkGetPackedA32(dst);
michael@0 103 unsigned isa = 255 - sa;
michael@0 104
michael@0 105 uint8x8_t vda, visa, vsrc, vdst;
michael@0 106
michael@0 107 vda = vdup_n_u8(da);
michael@0 108 visa = vdup_n_u8(isa);
michael@0 109
michael@0 110 uint16x8_t vsrc_wide, vdst_wide;
michael@0 111 vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src)));
michael@0 112 vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst)));
michael@0 113
michael@0 114 vsrc_wide += vdupq_n_u16(128);
michael@0 115 vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
michael@0 116
michael@0 117 vdst_wide += vdupq_n_u16(128);
michael@0 118 vdst_wide += vshrq_n_u16(vdst_wide, 8);
michael@0 119
michael@0 120 vsrc = vshrn_n_u16(vsrc_wide, 8);
michael@0 121 vdst = vshrn_n_u16(vdst_wide, 8);
michael@0 122
michael@0 123 vsrc += vdst;
michael@0 124 vsrc = vset_lane_u8(da, vsrc, 3);
michael@0 125
michael@0 126 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
michael@0 127 }
michael@0 128
michael@0 129 // kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
michael@0 130 SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) {
michael@0 131 unsigned sa = SkGetPackedA32(src);
michael@0 132 unsigned da = SkGetPackedA32(dst);
michael@0 133 unsigned ida = 255 - da;
michael@0 134
michael@0 135 uint8x8_t vsa, vida, vsrc, vdst;
michael@0 136
michael@0 137 vsa = vdup_n_u8(sa);
michael@0 138 vida = vdup_n_u8(ida);
michael@0 139
michael@0 140 uint16x8_t vsrc_wide, vdst_wide;
michael@0 141 vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src)));
michael@0 142 vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst)));
michael@0 143
michael@0 144 vsrc_wide += vdupq_n_u16(128);
michael@0 145 vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
michael@0 146
michael@0 147 vdst_wide += vdupq_n_u16(128);
michael@0 148 vdst_wide += vshrq_n_u16(vdst_wide, 8);
michael@0 149
michael@0 150 vsrc = vshrn_n_u16(vsrc_wide, 8);
michael@0 151 vdst = vshrn_n_u16(vdst_wide, 8);
michael@0 152
michael@0 153 vsrc += vdst;
michael@0 154 vsrc = vset_lane_u8(sa, vsrc, 3);
michael@0 155
michael@0 156 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
michael@0 157 }
michael@0 158
michael@0 159 // kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
michael@0 160 SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) {
michael@0 161 unsigned sa = SkGetPackedA32(src);
michael@0 162 unsigned da = SkGetPackedA32(dst);
michael@0 163 unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1);
michael@0 164 unsigned isa = 255 - sa;
michael@0 165 unsigned ida = 255 - da;
michael@0 166
michael@0 167 uint8x8_t vsrc, vdst, visa, vida;
michael@0 168 uint16x8_t vsrc_wide, vdst_wide;
michael@0 169
michael@0 170 visa = vdup_n_u8(isa);
michael@0 171 vida = vdup_n_u8(ida);
michael@0 172 vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
michael@0 173 vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
michael@0 174
michael@0 175 vsrc_wide = vmull_u8(vsrc, vida);
michael@0 176 vdst_wide = vmull_u8(vdst, visa);
michael@0 177
michael@0 178 vsrc_wide += vdupq_n_u16(128);
michael@0 179 vsrc_wide += vshrq_n_u16(vsrc_wide, 8);
michael@0 180
michael@0 181 vdst_wide += vdupq_n_u16(128);
michael@0 182 vdst_wide += vshrq_n_u16(vdst_wide, 8);
michael@0 183
michael@0 184 vsrc = vshrn_n_u16(vsrc_wide, 8);
michael@0 185 vdst = vshrn_n_u16(vdst_wide, 8);
michael@0 186
michael@0 187 vsrc += vdst;
michael@0 188
michael@0 189 vsrc = vset_lane_u8(ret_alpha, vsrc, 3);
michael@0 190
michael@0 191 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
michael@0 192 }
michael@0 193
michael@0 194 // kPlus_Mode
michael@0 195 SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) {
michael@0 196 uint8x8_t vsrc, vdst;
michael@0 197 vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
michael@0 198 vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
michael@0 199 vsrc = vqadd_u8(vsrc, vdst);
michael@0 200
michael@0 201 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0);
michael@0 202 }
michael@0 203
michael@0 204 // kModulate_Mode
michael@0 205 SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) {
michael@0 206 uint8x8_t vsrc, vdst, vres;
michael@0 207 uint16x8_t vres_wide;
michael@0 208
michael@0 209 vsrc = vreinterpret_u8_u32(vdup_n_u32(src));
michael@0 210 vdst = vreinterpret_u8_u32(vdup_n_u32(dst));
michael@0 211
michael@0 212 vres_wide = vmull_u8(vsrc, vdst);
michael@0 213
michael@0 214 vres_wide += vdupq_n_u16(128);
michael@0 215 vres_wide += vshrq_n_u16(vres_wide, 8);
michael@0 216
michael@0 217 vres = vshrn_n_u16(vres_wide, 8);
michael@0 218
michael@0 219 return vget_lane_u32(vreinterpret_u32_u8(vres), 0);
michael@0 220 }
michael@0 221
michael@0 222 ////////////////////////////////////////////////////////////////////////////////
michael@0 223 // 8 pixels modeprocs
michael@0 224 ////////////////////////////////////////////////////////////////////////////////
michael@0 225
michael@0 226 uint8x8x4_t dstover_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 227 uint8x8x4_t ret;
michael@0 228 uint16x8_t src_scale;
michael@0 229
michael@0 230 src_scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]);
michael@0 231
michael@0 232 ret.val[NEON_A] = dst.val[NEON_A] + SkAlphaMul_neon8(src.val[NEON_A], src_scale);
michael@0 233 ret.val[NEON_R] = dst.val[NEON_R] + SkAlphaMul_neon8(src.val[NEON_R], src_scale);
michael@0 234 ret.val[NEON_G] = dst.val[NEON_G] + SkAlphaMul_neon8(src.val[NEON_G], src_scale);
michael@0 235 ret.val[NEON_B] = dst.val[NEON_B] + SkAlphaMul_neon8(src.val[NEON_B], src_scale);
michael@0 236
michael@0 237 return ret;
michael@0 238 }
michael@0 239
michael@0 240 uint8x8x4_t srcin_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 241 uint8x8x4_t ret;
michael@0 242 uint16x8_t scale;
michael@0 243
michael@0 244 scale = SkAlpha255To256_neon8(dst.val[NEON_A]);
michael@0 245
michael@0 246 ret.val[NEON_A] = SkAlphaMul_neon8(src.val[NEON_A], scale);
michael@0 247 ret.val[NEON_R] = SkAlphaMul_neon8(src.val[NEON_R], scale);
michael@0 248 ret.val[NEON_G] = SkAlphaMul_neon8(src.val[NEON_G], scale);
michael@0 249 ret.val[NEON_B] = SkAlphaMul_neon8(src.val[NEON_B], scale);
michael@0 250
michael@0 251 return ret;
michael@0 252 }
michael@0 253
michael@0 254 uint8x8x4_t dstin_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 255 uint8x8x4_t ret;
michael@0 256 uint16x8_t scale;
michael@0 257
michael@0 258 scale = SkAlpha255To256_neon8(src.val[NEON_A]);
michael@0 259
michael@0 260 ret = SkAlphaMulQ_neon8(dst, scale);
michael@0 261
michael@0 262 return ret;
michael@0 263 }
michael@0 264
michael@0 265 uint8x8x4_t srcout_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 266 uint8x8x4_t ret;
michael@0 267 uint16x8_t scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]);
michael@0 268
michael@0 269 ret = SkAlphaMulQ_neon8(src, scale);
michael@0 270
michael@0 271 return ret;
michael@0 272 }
michael@0 273
michael@0 274 uint8x8x4_t dstout_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 275 uint8x8x4_t ret;
michael@0 276 uint16x8_t scale = vsubw_u8(vdupq_n_u16(256), src.val[NEON_A]);
michael@0 277
michael@0 278 ret = SkAlphaMulQ_neon8(dst, scale);
michael@0 279
michael@0 280 return ret;
michael@0 281 }
michael@0 282
michael@0 283 uint8x8x4_t srcatop_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 284 uint8x8x4_t ret;
michael@0 285 uint8x8_t isa;
michael@0 286
michael@0 287 isa = vsub_u8(vdup_n_u8(255), src.val[NEON_A]);
michael@0 288
michael@0 289 ret.val[NEON_A] = dst.val[NEON_A];
michael@0 290 ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], dst.val[NEON_A])
michael@0 291 + SkAlphaMulAlpha_neon8(dst.val[NEON_R], isa);
michael@0 292 ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], dst.val[NEON_A])
michael@0 293 + SkAlphaMulAlpha_neon8(dst.val[NEON_G], isa);
michael@0 294 ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], dst.val[NEON_A])
michael@0 295 + SkAlphaMulAlpha_neon8(dst.val[NEON_B], isa);
michael@0 296
michael@0 297 return ret;
michael@0 298 }
michael@0 299
michael@0 300 uint8x8x4_t dstatop_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 301 uint8x8x4_t ret;
michael@0 302 uint8x8_t ida;
michael@0 303
michael@0 304 ida = vsub_u8(vdup_n_u8(255), dst.val[NEON_A]);
michael@0 305
michael@0 306 ret.val[NEON_A] = src.val[NEON_A];
michael@0 307 ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], ida)
michael@0 308 + SkAlphaMulAlpha_neon8(dst.val[NEON_R], src.val[NEON_A]);
michael@0 309 ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], ida)
michael@0 310 + SkAlphaMulAlpha_neon8(dst.val[NEON_G], src.val[NEON_A]);
michael@0 311 ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], ida)
michael@0 312 + SkAlphaMulAlpha_neon8(dst.val[NEON_B], src.val[NEON_A]);
michael@0 313
michael@0 314 return ret;
michael@0 315 }
michael@0 316
michael@0 317 uint8x8x4_t xor_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 318 uint8x8x4_t ret;
michael@0 319 uint8x8_t isa, ida;
michael@0 320 uint16x8_t tmp_wide, tmp_wide2;
michael@0 321
michael@0 322 isa = vsub_u8(vdup_n_u8(255), src.val[NEON_A]);
michael@0 323 ida = vsub_u8(vdup_n_u8(255), dst.val[NEON_A]);
michael@0 324
michael@0 325 // First calc alpha
michael@0 326 tmp_wide = vmovl_u8(src.val[NEON_A]);
michael@0 327 tmp_wide = vaddw_u8(tmp_wide, dst.val[NEON_A]);
michael@0 328 tmp_wide2 = vshll_n_u8(SkAlphaMulAlpha_neon8(src.val[NEON_A], dst.val[NEON_A]), 1);
michael@0 329 tmp_wide = vsubq_u16(tmp_wide, tmp_wide2);
michael@0 330 ret.val[NEON_A] = vmovn_u16(tmp_wide);
michael@0 331
michael@0 332 // Then colors
michael@0 333 ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], ida)
michael@0 334 + SkAlphaMulAlpha_neon8(dst.val[NEON_R], isa);
michael@0 335 ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], ida)
michael@0 336 + SkAlphaMulAlpha_neon8(dst.val[NEON_G], isa);
michael@0 337 ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], ida)
michael@0 338 + SkAlphaMulAlpha_neon8(dst.val[NEON_B], isa);
michael@0 339
michael@0 340 return ret;
michael@0 341 }
michael@0 342
michael@0 343 uint8x8x4_t plus_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 344 uint8x8x4_t ret;
michael@0 345
michael@0 346 ret.val[NEON_A] = vqadd_u8(src.val[NEON_A], dst.val[NEON_A]);
michael@0 347 ret.val[NEON_R] = vqadd_u8(src.val[NEON_R], dst.val[NEON_R]);
michael@0 348 ret.val[NEON_G] = vqadd_u8(src.val[NEON_G], dst.val[NEON_G]);
michael@0 349 ret.val[NEON_B] = vqadd_u8(src.val[NEON_B], dst.val[NEON_B]);
michael@0 350
michael@0 351 return ret;
michael@0 352 }
michael@0 353
michael@0 354 uint8x8x4_t modulate_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 355 uint8x8x4_t ret;
michael@0 356
michael@0 357 ret.val[NEON_A] = SkAlphaMulAlpha_neon8(src.val[NEON_A], dst.val[NEON_A]);
michael@0 358 ret.val[NEON_R] = SkAlphaMulAlpha_neon8(src.val[NEON_R], dst.val[NEON_R]);
michael@0 359 ret.val[NEON_G] = SkAlphaMulAlpha_neon8(src.val[NEON_G], dst.val[NEON_G]);
michael@0 360 ret.val[NEON_B] = SkAlphaMulAlpha_neon8(src.val[NEON_B], dst.val[NEON_B]);
michael@0 361
michael@0 362 return ret;
michael@0 363 }
michael@0 364
michael@0 365 static inline uint8x8_t srcover_color(uint8x8_t a, uint8x8_t b) {
michael@0 366 uint16x8_t tmp;
michael@0 367
michael@0 368 tmp = vaddl_u8(a, b);
michael@0 369 tmp -= SkAlphaMulAlpha_neon8_16(a, b);
michael@0 370
michael@0 371 return vmovn_u16(tmp);
michael@0 372 }
michael@0 373
michael@0 374 uint8x8x4_t screen_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 375 uint8x8x4_t ret;
michael@0 376
michael@0 377 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 378 ret.val[NEON_R] = srcover_color(src.val[NEON_R], dst.val[NEON_R]);
michael@0 379 ret.val[NEON_G] = srcover_color(src.val[NEON_G], dst.val[NEON_G]);
michael@0 380 ret.val[NEON_B] = srcover_color(src.val[NEON_B], dst.val[NEON_B]);
michael@0 381
michael@0 382 return ret;
michael@0 383 }
michael@0 384
michael@0 385 template <bool overlay>
michael@0 386 static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc,
michael@0 387 uint8x8_t sa, uint8x8_t da) {
michael@0 388 /*
michael@0 389 * In the end we're gonna use (rc + tmp) with a different rc
michael@0 390 * coming from an alternative.
michael@0 391 * The whole value (rc + tmp) can always be expressed as
michael@0 392 * VAL = COM - SUB in the if case
michael@0 393 * VAL = COM + SUB - sa*da in the else case
michael@0 394 *
michael@0 395 * with COM = 255 * (sc + dc)
michael@0 396 * and SUB = sc*da + dc*sa - 2*dc*sc
michael@0 397 */
michael@0 398
michael@0 399 // Prepare common subexpressions
michael@0 400 uint16x8_t const255 = vdupq_n_u16(255);
michael@0 401 uint16x8_t sc_plus_dc = vaddl_u8(sc, dc);
michael@0 402 uint16x8_t scda = vmull_u8(sc, da);
michael@0 403 uint16x8_t dcsa = vmull_u8(dc, sa);
michael@0 404 uint16x8_t sada = vmull_u8(sa, da);
michael@0 405
michael@0 406 // Prepare non common subexpressions
michael@0 407 uint16x8_t dc2, sc2;
michael@0 408 uint32x4_t scdc2_1, scdc2_2;
michael@0 409 if (overlay) {
michael@0 410 dc2 = vshll_n_u8(dc, 1);
michael@0 411 scdc2_1 = vmull_u16(vget_low_u16(dc2), vget_low_u16(vmovl_u8(sc)));
michael@0 412 scdc2_2 = vmull_u16(vget_high_u16(dc2), vget_high_u16(vmovl_u8(sc)));
michael@0 413 } else {
michael@0 414 sc2 = vshll_n_u8(sc, 1);
michael@0 415 scdc2_1 = vmull_u16(vget_low_u16(sc2), vget_low_u16(vmovl_u8(dc)));
michael@0 416 scdc2_2 = vmull_u16(vget_high_u16(sc2), vget_high_u16(vmovl_u8(dc)));
michael@0 417 }
michael@0 418
michael@0 419 // Calc COM
michael@0 420 int32x4_t com1, com2;
michael@0 421 com1 = vreinterpretq_s32_u32(
michael@0 422 vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc)));
michael@0 423 com2 = vreinterpretq_s32_u32(
michael@0 424 vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc)));
michael@0 425
michael@0 426 // Calc SUB
michael@0 427 int32x4_t sub1, sub2;
michael@0 428 sub1 = vreinterpretq_s32_u32(vaddl_u16(vget_low_u16(scda), vget_low_u16(dcsa)));
michael@0 429 sub2 = vreinterpretq_s32_u32(vaddl_u16(vget_high_u16(scda), vget_high_u16(dcsa)));
michael@0 430 sub1 = vsubq_s32(sub1, vreinterpretq_s32_u32(scdc2_1));
michael@0 431 sub2 = vsubq_s32(sub2, vreinterpretq_s32_u32(scdc2_2));
michael@0 432
michael@0 433 // Compare 2*dc <= da
michael@0 434 uint16x8_t cmp;
michael@0 435
michael@0 436 if (overlay) {
michael@0 437 cmp = vcleq_u16(dc2, vmovl_u8(da));
michael@0 438 } else {
michael@0 439 cmp = vcleq_u16(sc2, vmovl_u8(sa));
michael@0 440 }
michael@0 441
michael@0 442 // Prepare variables
michael@0 443 int32x4_t val1_1, val1_2;
michael@0 444 int32x4_t val2_1, val2_2;
michael@0 445 uint32x4_t cmp1, cmp2;
michael@0 446
michael@0 447 cmp1 = vmovl_u16(vget_low_u16(cmp));
michael@0 448 cmp1 |= vshlq_n_u32(cmp1, 16);
michael@0 449 cmp2 = vmovl_u16(vget_high_u16(cmp));
michael@0 450 cmp2 |= vshlq_n_u32(cmp2, 16);
michael@0 451
michael@0 452 // Calc COM - SUB
michael@0 453 val1_1 = com1 - sub1;
michael@0 454 val1_2 = com2 - sub2;
michael@0 455
michael@0 456 // Calc COM + SUB - sa*da
michael@0 457 val2_1 = com1 + sub1;
michael@0 458 val2_2 = com2 + sub2;
michael@0 459
michael@0 460 val2_1 = vsubq_s32(val2_1, vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sada))));
michael@0 461 val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sada))));
michael@0 462
michael@0 463 // Insert where needed
michael@0 464 val1_1 = vbslq_s32(cmp1, val1_1, val2_1);
michael@0 465 val1_2 = vbslq_s32(cmp2, val1_2, val2_2);
michael@0 466
michael@0 467 // Call the clamp_div255round function
michael@0 468 return clamp_div255round_simd8_32(val1_1, val1_2);
michael@0 469 }
michael@0 470
michael@0 471 static inline uint8x8_t overlay_color(uint8x8_t sc, uint8x8_t dc,
michael@0 472 uint8x8_t sa, uint8x8_t da) {
michael@0 473 return overlay_hardlight_color<true>(sc, dc, sa, da);
michael@0 474 }
michael@0 475
michael@0 476 uint8x8x4_t overlay_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 477 uint8x8x4_t ret;
michael@0 478
michael@0 479 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 480 ret.val[NEON_R] = overlay_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 481 src.val[NEON_A], dst.val[NEON_A]);
michael@0 482 ret.val[NEON_G] = overlay_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 483 src.val[NEON_A], dst.val[NEON_A]);
michael@0 484 ret.val[NEON_B] = overlay_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 485 src.val[NEON_A], dst.val[NEON_A]);
michael@0 486
michael@0 487 return ret;
michael@0 488 }
michael@0 489
michael@0 490 template <bool lighten>
michael@0 491 static inline uint8x8_t lighten_darken_color(uint8x8_t sc, uint8x8_t dc,
michael@0 492 uint8x8_t sa, uint8x8_t da) {
michael@0 493 uint16x8_t sd, ds, cmp, tmp, tmp2;
michael@0 494
michael@0 495 // Prepare
michael@0 496 sd = vmull_u8(sc, da);
michael@0 497 ds = vmull_u8(dc, sa);
michael@0 498
michael@0 499 // Do test
michael@0 500 if (lighten) {
michael@0 501 cmp = vcgtq_u16(sd, ds);
michael@0 502 } else {
michael@0 503 cmp = vcltq_u16(sd, ds);
michael@0 504 }
michael@0 505
michael@0 506 // Assign if
michael@0 507 tmp = vaddl_u8(sc, dc);
michael@0 508 tmp2 = tmp;
michael@0 509 tmp -= SkDiv255Round_neon8_16_16(ds);
michael@0 510
michael@0 511 // Calc else
michael@0 512 tmp2 -= SkDiv255Round_neon8_16_16(sd);
michael@0 513
michael@0 514 // Insert where needed
michael@0 515 tmp = vbslq_u16(cmp, tmp, tmp2);
michael@0 516
michael@0 517 return vmovn_u16(tmp);
michael@0 518 }
michael@0 519
michael@0 520 static inline uint8x8_t darken_color(uint8x8_t sc, uint8x8_t dc,
michael@0 521 uint8x8_t sa, uint8x8_t da) {
michael@0 522 return lighten_darken_color<false>(sc, dc, sa, da);
michael@0 523 }
michael@0 524
michael@0 525 uint8x8x4_t darken_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 526 uint8x8x4_t ret;
michael@0 527
michael@0 528 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 529 ret.val[NEON_R] = darken_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 530 src.val[NEON_A], dst.val[NEON_A]);
michael@0 531 ret.val[NEON_G] = darken_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 532 src.val[NEON_A], dst.val[NEON_A]);
michael@0 533 ret.val[NEON_B] = darken_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 534 src.val[NEON_A], dst.val[NEON_A]);
michael@0 535
michael@0 536 return ret;
michael@0 537 }
michael@0 538
michael@0 539 static inline uint8x8_t lighten_color(uint8x8_t sc, uint8x8_t dc,
michael@0 540 uint8x8_t sa, uint8x8_t da) {
michael@0 541 return lighten_darken_color<true>(sc, dc, sa, da);
michael@0 542 }
michael@0 543
michael@0 544 uint8x8x4_t lighten_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 545 uint8x8x4_t ret;
michael@0 546
michael@0 547 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 548 ret.val[NEON_R] = lighten_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 549 src.val[NEON_A], dst.val[NEON_A]);
michael@0 550 ret.val[NEON_G] = lighten_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 551 src.val[NEON_A], dst.val[NEON_A]);
michael@0 552 ret.val[NEON_B] = lighten_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 553 src.val[NEON_A], dst.val[NEON_A]);
michael@0 554
michael@0 555 return ret;
michael@0 556 }
michael@0 557
michael@0 558 static inline uint8x8_t hardlight_color(uint8x8_t sc, uint8x8_t dc,
michael@0 559 uint8x8_t sa, uint8x8_t da) {
michael@0 560 return overlay_hardlight_color<false>(sc, dc, sa, da);
michael@0 561 }
michael@0 562
michael@0 563 uint8x8x4_t hardlight_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 564 uint8x8x4_t ret;
michael@0 565
michael@0 566 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 567 ret.val[NEON_R] = hardlight_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 568 src.val[NEON_A], dst.val[NEON_A]);
michael@0 569 ret.val[NEON_G] = hardlight_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 570 src.val[NEON_A], dst.val[NEON_A]);
michael@0 571 ret.val[NEON_B] = hardlight_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 572 src.val[NEON_A], dst.val[NEON_A]);
michael@0 573
michael@0 574 return ret;
michael@0 575 }
michael@0 576
michael@0 577 static inline uint8x8_t difference_color(uint8x8_t sc, uint8x8_t dc,
michael@0 578 uint8x8_t sa, uint8x8_t da) {
michael@0 579 uint16x8_t sd, ds, tmp;
michael@0 580 int16x8_t val;
michael@0 581
michael@0 582 sd = vmull_u8(sc, da);
michael@0 583 ds = vmull_u8(dc, sa);
michael@0 584
michael@0 585 tmp = vminq_u16(sd, ds);
michael@0 586 tmp = SkDiv255Round_neon8_16_16(tmp);
michael@0 587 tmp = vshlq_n_u16(tmp, 1);
michael@0 588
michael@0 589 val = vreinterpretq_s16_u16(vaddl_u8(sc, dc));
michael@0 590
michael@0 591 val -= vreinterpretq_s16_u16(tmp);
michael@0 592
michael@0 593 val = vmaxq_s16(val, vdupq_n_s16(0));
michael@0 594 val = vminq_s16(val, vdupq_n_s16(255));
michael@0 595
michael@0 596 return vmovn_u16(vreinterpretq_u16_s16(val));
michael@0 597 }
michael@0 598
michael@0 599 uint8x8x4_t difference_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 600 uint8x8x4_t ret;
michael@0 601
michael@0 602 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 603 ret.val[NEON_R] = difference_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 604 src.val[NEON_A], dst.val[NEON_A]);
michael@0 605 ret.val[NEON_G] = difference_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 606 src.val[NEON_A], dst.val[NEON_A]);
michael@0 607 ret.val[NEON_B] = difference_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 608 src.val[NEON_A], dst.val[NEON_A]);
michael@0 609
michael@0 610 return ret;
michael@0 611 }
michael@0 612
michael@0 613 static inline uint8x8_t exclusion_color(uint8x8_t sc, uint8x8_t dc,
michael@0 614 uint8x8_t sa, uint8x8_t da) {
michael@0 615 /* The equation can be simplified to 255(sc + dc) - 2 * sc * dc */
michael@0 616
michael@0 617 uint16x8_t sc_plus_dc, scdc, const255;
michael@0 618 int32x4_t term1_1, term1_2, term2_1, term2_2;
michael@0 619
michael@0 620 /* Calc (sc + dc) and (sc * dc) */
michael@0 621 sc_plus_dc = vaddl_u8(sc, dc);
michael@0 622 scdc = vmull_u8(sc, dc);
michael@0 623
michael@0 624 /* Prepare constants */
michael@0 625 const255 = vdupq_n_u16(255);
michael@0 626
michael@0 627 /* Calc the first term */
michael@0 628 term1_1 = vreinterpretq_s32_u32(
michael@0 629 vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc)));
michael@0 630 term1_2 = vreinterpretq_s32_u32(
michael@0 631 vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc)));
michael@0 632
michael@0 633 /* Calc the second term */
michael@0 634 term2_1 = vreinterpretq_s32_u32(vshll_n_u16(vget_low_u16(scdc), 1));
michael@0 635 term2_2 = vreinterpretq_s32_u32(vshll_n_u16(vget_high_u16(scdc), 1));
michael@0 636
michael@0 637 return clamp_div255round_simd8_32(term1_1 - term2_1, term1_2 - term2_2);
michael@0 638 }
michael@0 639
michael@0 640 uint8x8x4_t exclusion_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 641 uint8x8x4_t ret;
michael@0 642
michael@0 643 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 644 ret.val[NEON_R] = exclusion_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 645 src.val[NEON_A], dst.val[NEON_A]);
michael@0 646 ret.val[NEON_G] = exclusion_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 647 src.val[NEON_A], dst.val[NEON_A]);
michael@0 648 ret.val[NEON_B] = exclusion_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 649 src.val[NEON_A], dst.val[NEON_A]);
michael@0 650
michael@0 651 return ret;
michael@0 652 }
michael@0 653
michael@0 654 static inline uint8x8_t blendfunc_multiply_color(uint8x8_t sc, uint8x8_t dc,
michael@0 655 uint8x8_t sa, uint8x8_t da) {
michael@0 656 uint32x4_t val1, val2;
michael@0 657 uint16x8_t scdc, t1, t2;
michael@0 658
michael@0 659 t1 = vmull_u8(sc, vdup_n_u8(255) - da);
michael@0 660 t2 = vmull_u8(dc, vdup_n_u8(255) - sa);
michael@0 661 scdc = vmull_u8(sc, dc);
michael@0 662
michael@0 663 val1 = vaddl_u16(vget_low_u16(t1), vget_low_u16(t2));
michael@0 664 val2 = vaddl_u16(vget_high_u16(t1), vget_high_u16(t2));
michael@0 665
michael@0 666 val1 = vaddw_u16(val1, vget_low_u16(scdc));
michael@0 667 val2 = vaddw_u16(val2, vget_high_u16(scdc));
michael@0 668
michael@0 669 return clamp_div255round_simd8_32(
michael@0 670 vreinterpretq_s32_u32(val1), vreinterpretq_s32_u32(val2));
michael@0 671 }
michael@0 672
michael@0 673 uint8x8x4_t multiply_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) {
michael@0 674 uint8x8x4_t ret;
michael@0 675
michael@0 676 ret.val[NEON_A] = srcover_color(src.val[NEON_A], dst.val[NEON_A]);
michael@0 677 ret.val[NEON_R] = blendfunc_multiply_color(src.val[NEON_R], dst.val[NEON_R],
michael@0 678 src.val[NEON_A], dst.val[NEON_A]);
michael@0 679 ret.val[NEON_G] = blendfunc_multiply_color(src.val[NEON_G], dst.val[NEON_G],
michael@0 680 src.val[NEON_A], dst.val[NEON_A]);
michael@0 681 ret.val[NEON_B] = blendfunc_multiply_color(src.val[NEON_B], dst.val[NEON_B],
michael@0 682 src.val[NEON_A], dst.val[NEON_A]);
michael@0 683
michael@0 684 return ret;
michael@0 685 }
michael@0 686
michael@0 687 ////////////////////////////////////////////////////////////////////////////////
michael@0 688
michael@0 689 typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst);
michael@0 690
michael@0 691 extern SkXfermodeProcSIMD gNEONXfermodeProcs[];
michael@0 692
michael@0 693 SkNEONProcCoeffXfermode::SkNEONProcCoeffXfermode(SkReadBuffer& buffer)
michael@0 694 : INHERITED(buffer) {
michael@0 695 fProcSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[this->getMode()]);
michael@0 696 }
michael@0 697
michael@0 698 void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
michael@0 699 int count, const SkAlpha aa[]) const {
michael@0 700 SkASSERT(dst && src && count >= 0);
michael@0 701
michael@0 702 SkXfermodeProc proc = this->getProc();
michael@0 703 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
michael@0 704 SkASSERT(procSIMD != NULL);
michael@0 705
michael@0 706 if (NULL == aa) {
michael@0 707 // Unrolled NEON code
michael@0 708 while (count >= 8) {
michael@0 709 uint8x8x4_t vsrc, vdst, vres;
michael@0 710
michael@0 711 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
michael@0 712 asm volatile (
michael@0 713 "vld4.u8 %h[vsrc], [%[src]]! \t\n"
michael@0 714 "vld4.u8 %h[vdst], [%[dst]] \t\n"
michael@0 715 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst), [src] "+&r" (src)
michael@0 716 : [dst] "r" (dst)
michael@0 717 :
michael@0 718 );
michael@0 719 #else
michael@0 720 register uint8x8_t d0 asm("d0");
michael@0 721 register uint8x8_t d1 asm("d1");
michael@0 722 register uint8x8_t d2 asm("d2");
michael@0 723 register uint8x8_t d3 asm("d3");
michael@0 724 register uint8x8_t d4 asm("d4");
michael@0 725 register uint8x8_t d5 asm("d5");
michael@0 726 register uint8x8_t d6 asm("d6");
michael@0 727 register uint8x8_t d7 asm("d7");
michael@0 728
michael@0 729 asm volatile (
michael@0 730 "vld4.u8 {d0-d3},[%[src]]!;"
michael@0 731 "vld4.u8 {d4-d7},[%[dst]];"
michael@0 732 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
michael@0 733 "=w" (d4), "=w" (d5), "=w" (d6), "=w" (d7),
michael@0 734 [src] "+&r" (src)
michael@0 735 : [dst] "r" (dst)
michael@0 736 :
michael@0 737 );
michael@0 738 vsrc.val[0] = d0; vdst.val[0] = d4;
michael@0 739 vsrc.val[1] = d1; vdst.val[1] = d5;
michael@0 740 vsrc.val[2] = d2; vdst.val[2] = d6;
michael@0 741 vsrc.val[3] = d3; vdst.val[3] = d7;
michael@0 742 #endif
michael@0 743
michael@0 744 vres = procSIMD(vsrc, vdst);
michael@0 745
michael@0 746 vst4_u8((uint8_t*)dst, vres);
michael@0 747
michael@0 748 count -= 8;
michael@0 749 dst += 8;
michael@0 750 }
michael@0 751 // Leftovers
michael@0 752 for (int i = 0; i < count; i++) {
michael@0 753 dst[i] = proc(src[i], dst[i]);
michael@0 754 }
michael@0 755 } else {
michael@0 756 for (int i = count - 1; i >= 0; --i) {
michael@0 757 unsigned a = aa[i];
michael@0 758 if (0 != a) {
michael@0 759 SkPMColor dstC = dst[i];
michael@0 760 SkPMColor C = proc(src[i], dstC);
michael@0 761 if (a != 0xFF) {
michael@0 762 C = SkFourByteInterp_neon(C, dstC, a);
michael@0 763 }
michael@0 764 dst[i] = C;
michael@0 765 }
michael@0 766 }
michael@0 767 }
michael@0 768 }
michael@0 769
michael@0 770 void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
michael@0 771 const SkPMColor* SK_RESTRICT src, int count,
michael@0 772 const SkAlpha* SK_RESTRICT aa) const {
michael@0 773 SkASSERT(dst && src && count >= 0);
michael@0 774
michael@0 775 SkXfermodeProc proc = this->getProc();
michael@0 776 SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
michael@0 777 SkASSERT(procSIMD != NULL);
michael@0 778
michael@0 779 if (NULL == aa) {
michael@0 780 while(count >= 8) {
michael@0 781 uint16x8_t vdst, vres16;
michael@0 782 uint8x8x4_t vdst32, vsrc, vres;
michael@0 783
michael@0 784 vdst = vld1q_u16(dst);
michael@0 785
michael@0 786 #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
michael@0 787 asm volatile (
michael@0 788 "vld4.u8 %h[vsrc], [%[src]]! \t\n"
michael@0 789 : [vsrc] "=w" (vsrc), [src] "+&r" (src)
michael@0 790 : :
michael@0 791 );
michael@0 792 #else
michael@0 793 register uint8x8_t d0 asm("d0");
michael@0 794 register uint8x8_t d1 asm("d1");
michael@0 795 register uint8x8_t d2 asm("d2");
michael@0 796 register uint8x8_t d3 asm("d3");
michael@0 797
michael@0 798 asm volatile (
michael@0 799 "vld4.u8 {d0-d3},[%[src]]!;"
michael@0 800 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
michael@0 801 [src] "+&r" (src)
michael@0 802 : :
michael@0 803 );
michael@0 804 vsrc.val[0] = d0;
michael@0 805 vsrc.val[1] = d1;
michael@0 806 vsrc.val[2] = d2;
michael@0 807 vsrc.val[3] = d3;
michael@0 808 #endif
michael@0 809
michael@0 810 vdst32 = SkPixel16ToPixel32_neon8(vdst);
michael@0 811 vres = procSIMD(vsrc, vdst32);
michael@0 812 vres16 = SkPixel32ToPixel16_neon8(vres);
michael@0 813
michael@0 814 vst1q_u16(dst, vres16);
michael@0 815
michael@0 816 count -= 8;
michael@0 817 dst += 8;
michael@0 818 }
michael@0 819 for (int i = 0; i < count; i++) {
michael@0 820 SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
michael@0 821 dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC));
michael@0 822 }
michael@0 823 } else {
michael@0 824 for (int i = count - 1; i >= 0; --i) {
michael@0 825 unsigned a = aa[i];
michael@0 826 if (0 != a) {
michael@0 827 SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
michael@0 828 SkPMColor C = proc(src[i], dstC);
michael@0 829 if (0xFF != a) {
michael@0 830 C = SkFourByteInterp_neon(C, dstC, a);
michael@0 831 }
michael@0 832 dst[i] = SkPixel32ToPixel16_ToU16(C);
michael@0 833 }
michael@0 834 }
michael@0 835 }
michael@0 836 }
michael@0 837
michael@0 838 #ifndef SK_IGNORE_TO_STRING
michael@0 839 void SkNEONProcCoeffXfermode::toString(SkString* str) const {
michael@0 840 this->INHERITED::toString(str);
michael@0 841 }
michael@0 842 #endif
michael@0 843
michael@0 844 ////////////////////////////////////////////////////////////////////////////////
michael@0 845
michael@0 846 SkXfermodeProcSIMD gNEONXfermodeProcs[] = {
michael@0 847 NULL, // kClear_Mode
michael@0 848 NULL, // kSrc_Mode
michael@0 849 NULL, // kDst_Mode
michael@0 850 NULL, // kSrcOver_Mode
michael@0 851 dstover_modeproc_neon8,
michael@0 852 srcin_modeproc_neon8,
michael@0 853 dstin_modeproc_neon8,
michael@0 854 srcout_modeproc_neon8,
michael@0 855 dstout_modeproc_neon8,
michael@0 856 srcatop_modeproc_neon8,
michael@0 857 dstatop_modeproc_neon8,
michael@0 858 xor_modeproc_neon8,
michael@0 859 plus_modeproc_neon8,
michael@0 860 modulate_modeproc_neon8,
michael@0 861 screen_modeproc_neon8,
michael@0 862
michael@0 863 overlay_modeproc_neon8,
michael@0 864 darken_modeproc_neon8,
michael@0 865 lighten_modeproc_neon8,
michael@0 866 NULL, // kColorDodge_Mode
michael@0 867 NULL, // kColorBurn_Mode
michael@0 868 hardlight_modeproc_neon8,
michael@0 869 NULL, // kSoftLight_Mode
michael@0 870 difference_modeproc_neon8,
michael@0 871 exclusion_modeproc_neon8,
michael@0 872 multiply_modeproc_neon8,
michael@0 873
michael@0 874 NULL, // kHue_Mode
michael@0 875 NULL, // kSaturation_Mode
michael@0 876 NULL, // kColor_Mode
michael@0 877 NULL, // kLuminosity_Mode
michael@0 878 };
michael@0 879
michael@0 880 SK_COMPILE_ASSERT(
michael@0 881 SK_ARRAY_COUNT(gNEONXfermodeProcs) == SkXfermode::kLastMode + 1,
michael@0 882 mode_count_arm
michael@0 883 );
michael@0 884
michael@0 885 SkXfermodeProc gNEONXfermodeProcs1[] = {
michael@0 886 NULL, // kClear_Mode
michael@0 887 NULL, // kSrc_Mode
michael@0 888 NULL, // kDst_Mode
michael@0 889 NULL, // kSrcOver_Mode
michael@0 890 NULL, // kDstOver_Mode
michael@0 891 NULL, // kSrcIn_Mode
michael@0 892 NULL, // kDstIn_Mode
michael@0 893 NULL, // kSrcOut_Mode
michael@0 894 NULL, // kDstOut_Mode
michael@0 895 srcatop_modeproc_neon,
michael@0 896 dstatop_modeproc_neon,
michael@0 897 xor_modeproc_neon,
michael@0 898 plus_modeproc_neon,
michael@0 899 modulate_modeproc_neon,
michael@0 900 NULL, // kScreen_Mode
michael@0 901
michael@0 902 NULL, // kOverlay_Mode
michael@0 903 NULL, // kDarken_Mode
michael@0 904 NULL, // kLighten_Mode
michael@0 905 NULL, // kColorDodge_Mode
michael@0 906 NULL, // kColorBurn_Mode
michael@0 907 NULL, // kHardLight_Mode
michael@0 908 NULL, // kSoftLight_Mode
michael@0 909 NULL, // kDifference_Mode
michael@0 910 NULL, // kExclusion_Mode
michael@0 911 NULL, // kMultiply_Mode
michael@0 912
michael@0 913 NULL, // kHue_Mode
michael@0 914 NULL, // kSaturation_Mode
michael@0 915 NULL, // kColor_Mode
michael@0 916 NULL, // kLuminosity_Mode
michael@0 917 };
michael@0 918
michael@0 919 SK_COMPILE_ASSERT(
michael@0 920 SK_ARRAY_COUNT(gNEONXfermodeProcs1) == SkXfermode::kLastMode + 1,
michael@0 921 mode1_count_arm
michael@0 922 );
michael@0 923
michael@0 924 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec,
michael@0 925 SkXfermode::Mode mode) {
michael@0 926
michael@0 927 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]);
michael@0 928
michael@0 929 if (procSIMD != NULL) {
michael@0 930 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD));
michael@0 931 }
michael@0 932 return NULL;
michael@0 933 }
michael@0 934
michael@0 935 SkXfermodeProc SkPlatformXfermodeProcFactory_impl_neon(SkXfermode::Mode mode) {
michael@0 936 return gNEONXfermodeProcs1[mode];
michael@0 937 }

mercurial