diff -r 000000000000 -r 6474c204b198 gfx/skia/trunk/src/opts/memset32_neon.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gfx/skia/trunk/src/opts/memset32_neon.S Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,113 @@ +/*************************************************************************** + * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + ***************************************************************************/ + + .code 32 + .fpu neon + .align 4 + .globl memset32_neon + .func + + /* r0 = buffer, r1 = value, r2 = times to write */ +memset32_neon: + cmp r2, #1 + streq r1, [r0], #4 + bxeq lr + + cmp r2, #4 + bgt memset32_neon_start + cmp r2, #0 + bxeq lr +memset32_neon_small: + str r1, [r0], #4 + subs r2, r2, #1 + bne memset32_neon_small + bx lr +memset32_neon_start: + cmp r2, #16 + blt memset32_dropthru + vdup.32 q0, r1 + vmov q1, q0 + cmp r2, #32 + blt memset32_16 + cmp r2, #64 + blt memset32_32 + cmp r2, #128 + blt memset32_64 +memset32_128: + movs r12, r2, lsr #7 +memset32_loop128: + subs r12, r12, #1 + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + bne memset32_loop128 + ands r2, r2, #0x7f + bxeq lr +memset32_64: + movs r12, r2, lsr #6 + beq memset32_32 + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + ands r2, r2, #0x3f + bxeq lr +memset32_32: + movs r12, r2, lsr #5 + beq memset32_16 + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! + ands r2, r2, #0x1f + bxeq lr +memset32_16: + movs r12, r2, lsr #4 + beq memset32_dropthru + and r2, r2, #0xf + vst1.64 {q0, q1}, [r0]! + vst1.64 {q0, q1}, [r0]! +memset32_dropthru: + rsb r2, r2, #15 + add pc, pc, r2, lsl #2 + nop + str r1, [r0, #56] + str r1, [r0, #52] + str r1, [r0, #48] + str r1, [r0, #44] + str r1, [r0, #40] + str r1, [r0, #36] + str r1, [r0, #32] + str r1, [r0, #28] + str r1, [r0, #24] + str r1, [r0, #20] + str r1, [r0, #16] + str r1, [r0, #12] + str r1, [r0, #8] + str r1, [r0, #4] + str r1, [r0, #0] + bx lr + + .endfunc + .end