|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 EXPORT |vp8_short_walsh4x4_armv6| |
|
12 |
|
13 ARM |
|
14 REQUIRE8 |
|
15 PRESERVE8 |
|
16 |
|
17 AREA |.text|, CODE, READONLY ; name this block of code |
|
18 |
|
19 ;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch) |
|
20 ; r0 short *input, |
|
21 ; r1 short *output, |
|
22 ; r2 int pitch |
|
23 |vp8_short_walsh4x4_armv6| PROC |
|
24 |
|
25 stmdb sp!, {r4 - r11, lr} |
|
26 |
|
27 ldrd r4, r5, [r0], r2 |
|
28 ldr lr, c00040004 |
|
29 ldrd r6, r7, [r0], r2 |
|
30 |
|
31 ; 0-3 |
|
32 qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] |
|
33 qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] |
|
34 |
|
35 ldrd r8, r9, [r0], r2 |
|
36 ; 4-7 |
|
37 qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] |
|
38 qsub16 r6, r6, r7 ; [c1|b1] [5-7 | 4-6] |
|
39 |
|
40 ldrd r10, r11, [r0] |
|
41 ; 8-11 |
|
42 qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] |
|
43 qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] |
|
44 |
|
45 ; 12-15 |
|
46 qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] |
|
47 qsub16 r10, r10, r11 ; [c1|b1] [13-15 | 12-14] |
|
48 |
|
49 |
|
50 lsls r2, r3, #16 |
|
51 smuad r11, r3, lr ; A0 = a1<<2 + d1<<2 |
|
52 addne r11, r11, #1 ; A0 += (a1!=0) |
|
53 |
|
54 lsls r2, r7, #16 |
|
55 smuad r12, r7, lr ; C0 = a1<<2 + d1<<2 |
|
56 addne r12, r12, #1 ; C0 += (a1!=0) |
|
57 |
|
58 add r0, r11, r12 ; a1_0 = A0 + C0 |
|
59 sub r11, r11, r12 ; b1_0 = A0 - C0 |
|
60 |
|
61 lsls r2, r5, #16 |
|
62 smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 |
|
63 addne r12, r12, #1 ; B0 += (a1!=0) |
|
64 |
|
65 lsls r2, r9, #16 |
|
66 smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 |
|
67 addne r2, r2, #1 ; D0 += (a1!=0) |
|
68 |
|
69 add lr, r12, r2 ; d1_0 = B0 + D0 |
|
70 sub r12, r12, r2 ; c1_0 = B0 - D0 |
|
71 |
|
72 ; op[0,4,8,12] |
|
73 adds r2, r0, lr ; a2 = a1_0 + d1_0 |
|
74 addmi r2, r2, #1 ; += a2 < 0 |
|
75 add r2, r2, #3 ; += 3 |
|
76 subs r0, r0, lr ; d2 = a1_0 - d1_0 |
|
77 mov r2, r2, asr #3 ; >> 3 |
|
78 strh r2, [r1] ; op[0] |
|
79 |
|
80 addmi r0, r0, #1 ; += a2 < 0 |
|
81 add r0, r0, #3 ; += 3 |
|
82 ldr lr, c00040004 |
|
83 mov r0, r0, asr #3 ; >> 3 |
|
84 strh r0, [r1, #24] ; op[12] |
|
85 |
|
86 adds r2, r11, r12 ; b2 = b1_0 + c1_0 |
|
87 addmi r2, r2, #1 ; += a2 < 0 |
|
88 add r2, r2, #3 ; += 3 |
|
89 subs r0, r11, r12 ; c2 = b1_0 - c1_0 |
|
90 mov r2, r2, asr #3 ; >> 3 |
|
91 strh r2, [r1, #8] ; op[4] |
|
92 |
|
93 addmi r0, r0, #1 ; += a2 < 0 |
|
94 add r0, r0, #3 ; += 3 |
|
95 smusd r3, r3, lr ; A3 = a1<<2 - d1<<2 |
|
96 smusd r7, r7, lr ; C3 = a1<<2 - d1<<2 |
|
97 mov r0, r0, asr #3 ; >> 3 |
|
98 strh r0, [r1, #16] ; op[8] |
|
99 |
|
100 |
|
101 ; op[3,7,11,15] |
|
102 add r0, r3, r7 ; a1_3 = A3 + C3 |
|
103 sub r3, r3, r7 ; b1_3 = A3 - C3 |
|
104 |
|
105 smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 |
|
106 smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 |
|
107 add r7, r5, r9 ; d1_3 = B3 + D3 |
|
108 sub r5, r5, r9 ; c1_3 = B3 - D3 |
|
109 |
|
110 adds r2, r0, r7 ; a2 = a1_3 + d1_3 |
|
111 addmi r2, r2, #1 ; += a2 < 0 |
|
112 add r2, r2, #3 ; += 3 |
|
113 adds r9, r3, r5 ; b2 = b1_3 + c1_3 |
|
114 mov r2, r2, asr #3 ; >> 3 |
|
115 strh r2, [r1, #6] ; op[3] |
|
116 |
|
117 addmi r9, r9, #1 ; += a2 < 0 |
|
118 add r9, r9, #3 ; += 3 |
|
119 subs r2, r3, r5 ; c2 = b1_3 - c1_3 |
|
120 mov r9, r9, asr #3 ; >> 3 |
|
121 strh r9, [r1, #14] ; op[7] |
|
122 |
|
123 addmi r2, r2, #1 ; += a2 < 0 |
|
124 add r2, r2, #3 ; += 3 |
|
125 subs r9, r0, r7 ; d2 = a1_3 - d1_3 |
|
126 mov r2, r2, asr #3 ; >> 3 |
|
127 strh r2, [r1, #22] ; op[11] |
|
128 |
|
129 addmi r9, r9, #1 ; += a2 < 0 |
|
130 add r9, r9, #3 ; += 3 |
|
131 smuad r3, r4, lr ; A1 = b1<<2 + c1<<2 |
|
132 smuad r5, r8, lr ; C1 = b1<<2 + c1<<2 |
|
133 mov r9, r9, asr #3 ; >> 3 |
|
134 strh r9, [r1, #30] ; op[15] |
|
135 |
|
136 ; op[1,5,9,13] |
|
137 add r0, r3, r5 ; a1_1 = A1 + C1 |
|
138 sub r3, r3, r5 ; b1_1 = A1 - C1 |
|
139 |
|
140 smuad r7, r6, lr ; B1 = b1<<2 + c1<<2 |
|
141 smuad r9, r10, lr ; D1 = b1<<2 + c1<<2 |
|
142 add r5, r7, r9 ; d1_1 = B1 + D1 |
|
143 sub r7, r7, r9 ; c1_1 = B1 - D1 |
|
144 |
|
145 adds r2, r0, r5 ; a2 = a1_1 + d1_1 |
|
146 addmi r2, r2, #1 ; += a2 < 0 |
|
147 add r2, r2, #3 ; += 3 |
|
148 adds r9, r3, r7 ; b2 = b1_1 + c1_1 |
|
149 mov r2, r2, asr #3 ; >> 3 |
|
150 strh r2, [r1, #2] ; op[1] |
|
151 |
|
152 addmi r9, r9, #1 ; += a2 < 0 |
|
153 add r9, r9, #3 ; += 3 |
|
154 subs r2, r3, r7 ; c2 = b1_1 - c1_1 |
|
155 mov r9, r9, asr #3 ; >> 3 |
|
156 strh r9, [r1, #10] ; op[5] |
|
157 |
|
158 addmi r2, r2, #1 ; += a2 < 0 |
|
159 add r2, r2, #3 ; += 3 |
|
160 subs r9, r0, r5 ; d2 = a1_1 - d1_1 |
|
161 mov r2, r2, asr #3 ; >> 3 |
|
162 strh r2, [r1, #18] ; op[9] |
|
163 |
|
164 addmi r9, r9, #1 ; += a2 < 0 |
|
165 add r9, r9, #3 ; += 3 |
|
166 smusd r4, r4, lr ; A2 = b1<<2 - c1<<2 |
|
167 smusd r8, r8, lr ; C2 = b1<<2 - c1<<2 |
|
168 mov r9, r9, asr #3 ; >> 3 |
|
169 strh r9, [r1, #26] ; op[13] |
|
170 |
|
171 |
|
172 ; op[2,6,10,14] |
|
173 add r11, r4, r8 ; a1_2 = A2 + C2 |
|
174 sub r12, r4, r8 ; b1_2 = A2 - C2 |
|
175 |
|
176 smusd r6, r6, lr ; B2 = b1<<2 - c1<<2 |
|
177 smusd r10, r10, lr ; D2 = b1<<2 - c1<<2 |
|
178 add r4, r6, r10 ; d1_2 = B2 + D2 |
|
179 sub r8, r6, r10 ; c1_2 = B2 - D2 |
|
180 |
|
181 adds r2, r11, r4 ; a2 = a1_2 + d1_2 |
|
182 addmi r2, r2, #1 ; += a2 < 0 |
|
183 add r2, r2, #3 ; += 3 |
|
184 adds r9, r12, r8 ; b2 = b1_2 + c1_2 |
|
185 mov r2, r2, asr #3 ; >> 3 |
|
186 strh r2, [r1, #4] ; op[2] |
|
187 |
|
188 addmi r9, r9, #1 ; += a2 < 0 |
|
189 add r9, r9, #3 ; += 3 |
|
190 subs r2, r12, r8 ; c2 = b1_2 - c1_2 |
|
191 mov r9, r9, asr #3 ; >> 3 |
|
192 strh r9, [r1, #12] ; op[6] |
|
193 |
|
194 addmi r2, r2, #1 ; += a2 < 0 |
|
195 add r2, r2, #3 ; += 3 |
|
196 subs r9, r11, r4 ; d2 = a1_2 - d1_2 |
|
197 mov r2, r2, asr #3 ; >> 3 |
|
198 strh r2, [r1, #20] ; op[10] |
|
199 |
|
200 addmi r9, r9, #1 ; += a2 < 0 |
|
201 add r9, r9, #3 ; += 3 |
|
202 mov r9, r9, asr #3 ; >> 3 |
|
203 strh r9, [r1, #28] ; op[14] |
|
204 |
|
205 |
|
206 ldmia sp!, {r4 - r11, pc} |
|
207 ENDP ; |vp8_short_walsh4x4_armv6| |
|
208 |
|
209 c00040004 |
|
210 DCD 0x00040004 |
|
211 |
|
212 END |