|
1 /*************************************************************************** |
|
2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license that can be |
|
5 * found in the LICENSE file. |
|
6 ***************************************************************************/ |
|
7 |
|
8 .code 32 |
|
9 .fpu neon |
|
10 .align 4 |
|
11 .globl memset32_neon |
|
12 .func |
|
13 |
|
14 /* r0 = buffer, r1 = value, r2 = times to write */ |
|
15 memset32_neon: |
|
16 cmp r2, #1 |
|
17 streq r1, [r0], #4 |
|
18 bxeq lr |
|
19 |
|
20 cmp r2, #4 |
|
21 bgt memset32_neon_start |
|
22 cmp r2, #0 |
|
23 bxeq lr |
|
24 memset32_neon_small: |
|
25 str r1, [r0], #4 |
|
26 subs r2, r2, #1 |
|
27 bne memset32_neon_small |
|
28 bx lr |
|
29 memset32_neon_start: |
|
30 cmp r2, #16 |
|
31 blt memset32_dropthru |
|
32 vdup.32 q0, r1 |
|
33 vmov q1, q0 |
|
34 cmp r2, #32 |
|
35 blt memset32_16 |
|
36 cmp r2, #64 |
|
37 blt memset32_32 |
|
38 cmp r2, #128 |
|
39 blt memset32_64 |
|
40 memset32_128: |
|
41 movs r12, r2, lsr #7 |
|
42 memset32_loop128: |
|
43 subs r12, r12, #1 |
|
44 vst1.64 {q0, q1}, [r0]! |
|
45 vst1.64 {q0, q1}, [r0]! |
|
46 vst1.64 {q0, q1}, [r0]! |
|
47 vst1.64 {q0, q1}, [r0]! |
|
48 vst1.64 {q0, q1}, [r0]! |
|
49 vst1.64 {q0, q1}, [r0]! |
|
50 vst1.64 {q0, q1}, [r0]! |
|
51 vst1.64 {q0, q1}, [r0]! |
|
52 vst1.64 {q0, q1}, [r0]! |
|
53 vst1.64 {q0, q1}, [r0]! |
|
54 vst1.64 {q0, q1}, [r0]! |
|
55 vst1.64 {q0, q1}, [r0]! |
|
56 vst1.64 {q0, q1}, [r0]! |
|
57 vst1.64 {q0, q1}, [r0]! |
|
58 vst1.64 {q0, q1}, [r0]! |
|
59 vst1.64 {q0, q1}, [r0]! |
|
60 bne memset32_loop128 |
|
61 ands r2, r2, #0x7f |
|
62 bxeq lr |
|
63 memset32_64: |
|
64 movs r12, r2, lsr #6 |
|
65 beq memset32_32 |
|
66 vst1.64 {q0, q1}, [r0]! |
|
67 vst1.64 {q0, q1}, [r0]! |
|
68 vst1.64 {q0, q1}, [r0]! |
|
69 vst1.64 {q0, q1}, [r0]! |
|
70 vst1.64 {q0, q1}, [r0]! |
|
71 vst1.64 {q0, q1}, [r0]! |
|
72 vst1.64 {q0, q1}, [r0]! |
|
73 vst1.64 {q0, q1}, [r0]! |
|
74 ands r2, r2, #0x3f |
|
75 bxeq lr |
|
76 memset32_32: |
|
77 movs r12, r2, lsr #5 |
|
78 beq memset32_16 |
|
79 vst1.64 {q0, q1}, [r0]! |
|
80 vst1.64 {q0, q1}, [r0]! |
|
81 vst1.64 {q0, q1}, [r0]! |
|
82 vst1.64 {q0, q1}, [r0]! |
|
83 ands r2, r2, #0x1f |
|
84 bxeq lr |
|
85 memset32_16: |
|
86 movs r12, r2, lsr #4 |
|
87 beq memset32_dropthru |
|
88 and r2, r2, #0xf |
|
89 vst1.64 {q0, q1}, [r0]! |
|
90 vst1.64 {q0, q1}, [r0]! |
|
91 memset32_dropthru: |
|
92 rsb r2, r2, #15 |
|
93 add pc, pc, r2, lsl #2 |
|
94 nop |
|
95 str r1, [r0, #56] |
|
96 str r1, [r0, #52] |
|
97 str r1, [r0, #48] |
|
98 str r1, [r0, #44] |
|
99 str r1, [r0, #40] |
|
100 str r1, [r0, #36] |
|
101 str r1, [r0, #32] |
|
102 str r1, [r0, #28] |
|
103 str r1, [r0, #24] |
|
104 str r1, [r0, #20] |
|
105 str r1, [r0, #16] |
|
106 str r1, [r0, #12] |
|
107 str r1, [r0, #8] |
|
108 str r1, [r0, #4] |
|
109 str r1, [r0, #0] |
|
110 bx lr |
|
111 |
|
112 .endfunc |
|
113 .end |