|
1 ; |
|
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 EXPORT |vp8_subtract_mby_armv6| |
|
13 EXPORT |vp8_subtract_mbuv_armv6| |
|
14 EXPORT |vp8_subtract_b_armv6| |
|
15 |
|
16 INCLUDE vp8_asm_enc_offsets.asm |
|
17 |
|
18 ARM |
|
19 REQUIRE8 |
|
20 PRESERVE8 |
|
21 |
|
22 AREA ||.text||, CODE, READONLY, ALIGN=2 |
|
23 |
|
24 ; r0 BLOCK *be |
|
25 ; r1 BLOCKD *bd |
|
26 ; r2 int pitch |
|
27 |vp8_subtract_b_armv6| PROC |
|
28 |
|
29 stmfd sp!, {r4-r9} |
|
30 |
|
31 ldr r4, [r0, #vp8_block_base_src] |
|
32 ldr r5, [r0, #vp8_block_src] |
|
33 ldr r6, [r0, #vp8_block_src_diff] |
|
34 |
|
35 ldr r3, [r4] |
|
36 ldr r7, [r0, #vp8_block_src_stride] |
|
37 add r3, r3, r5 ; src = *base_src + src |
|
38 ldr r8, [r1, #vp8_blockd_predictor] |
|
39 |
|
40 mov r9, #4 ; loop count |
|
41 |
|
42 loop_block |
|
43 |
|
44 ldr r0, [r3], r7 ; src |
|
45 ldr r1, [r8], r2 ; pred |
|
46 |
|
47 uxtb16 r4, r0 ; [s2 | s0] |
|
48 uxtb16 r5, r1 ; [p2 | p0] |
|
49 uxtb16 r0, r0, ror #8 ; [s3 | s1] |
|
50 uxtb16 r1, r1, ror #8 ; [p3 | p1] |
|
51 |
|
52 usub16 r4, r4, r5 ; [d2 | d0] |
|
53 usub16 r5, r0, r1 ; [d3 | d1] |
|
54 |
|
55 subs r9, r9, #1 ; decrement loop counter |
|
56 |
|
57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] |
|
58 pkhtb r1, r5, r4, asr #16 ; [d3 | d2] |
|
59 |
|
60 str r0, [r6, #0] ; diff |
|
61 str r1, [r6, #4] ; diff |
|
62 |
|
63 add r6, r6, r2, lsl #1 ; update diff pointer |
|
64 bne loop_block |
|
65 |
|
66 ldmfd sp!, {r4-r9} |
|
67 mov pc, lr |
|
68 |
|
69 ENDP |
|
70 |
|
71 |
|
72 ; r0 short *diff |
|
73 ; r1 unsigned char *usrc |
|
74 ; r2 unsigned char *vsrc |
|
75 ; r3 int src_stride |
|
76 ; sp unsigned char *upred |
|
77 ; sp unsigned char *vpred |
|
78 ; sp int pred_stride |
|
79 |vp8_subtract_mbuv_armv6| PROC |
|
80 |
|
81 stmfd sp!, {r4-r11} |
|
82 |
|
83 add r0, r0, #512 ; set *diff point to Cb |
|
84 mov r4, #8 ; loop count |
|
85 ldr r5, [sp, #32] ; upred |
|
86 ldr r12, [sp, #40] ; pred_stride |
|
87 |
|
88 ; Subtract U block |
|
89 loop_u |
|
90 ldr r6, [r1] ; usrc (A) |
|
91 ldr r7, [r5] ; upred (A) |
|
92 |
|
93 uxtb16 r8, r6 ; [s2 | s0] (A) |
|
94 uxtb16 r9, r7 ; [p2 | p0] (A) |
|
95 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
|
96 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
|
97 |
|
98 usub16 r6, r8, r9 ; [d2 | d0] (A) |
|
99 usub16 r7, r10, r11 ; [d3 | d1] (A) |
|
100 |
|
101 ldr r10, [r1, #4] ; usrc (B) |
|
102 ldr r11, [r5, #4] ; upred (B) |
|
103 |
|
104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
|
105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
|
106 |
|
107 str r8, [r0], #4 ; diff (A) |
|
108 uxtb16 r8, r10 ; [s2 | s0] (B) |
|
109 str r9, [r0], #4 ; diff (A) |
|
110 |
|
111 uxtb16 r9, r11 ; [p2 | p0] (B) |
|
112 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
|
113 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
|
114 |
|
115 usub16 r6, r8, r9 ; [d2 | d0] (B) |
|
116 usub16 r7, r10, r11 ; [d3 | d1] (B) |
|
117 |
|
118 add r1, r1, r3 ; update usrc pointer |
|
119 add r5, r5, r12 ; update upred pointer |
|
120 |
|
121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
|
122 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
|
123 |
|
124 str r8, [r0], #4 ; diff (B) |
|
125 subs r4, r4, #1 ; update loop counter |
|
126 str r9, [r0], #4 ; diff (B) |
|
127 |
|
128 bne loop_u |
|
129 |
|
130 ldr r5, [sp, #36] ; vpred |
|
131 mov r4, #8 ; loop count |
|
132 |
|
133 ; Subtract V block |
|
134 loop_v |
|
135 ldr r6, [r2] ; vsrc (A) |
|
136 ldr r7, [r5] ; vpred (A) |
|
137 |
|
138 uxtb16 r8, r6 ; [s2 | s0] (A) |
|
139 uxtb16 r9, r7 ; [p2 | p0] (A) |
|
140 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
|
141 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
|
142 |
|
143 usub16 r6, r8, r9 ; [d2 | d0] (A) |
|
144 usub16 r7, r10, r11 ; [d3 | d1] (A) |
|
145 |
|
146 ldr r10, [r2, #4] ; vsrc (B) |
|
147 ldr r11, [r5, #4] ; vpred (B) |
|
148 |
|
149 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
|
150 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
|
151 |
|
152 str r8, [r0], #4 ; diff (A) |
|
153 uxtb16 r8, r10 ; [s2 | s0] (B) |
|
154 str r9, [r0], #4 ; diff (A) |
|
155 |
|
156 uxtb16 r9, r11 ; [p2 | p0] (B) |
|
157 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
|
158 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
|
159 |
|
160 usub16 r6, r8, r9 ; [d2 | d0] (B) |
|
161 usub16 r7, r10, r11 ; [d3 | d1] (B) |
|
162 |
|
163 add r2, r2, r3 ; update vsrc pointer |
|
164 add r5, r5, r12 ; update vpred pointer |
|
165 |
|
166 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
|
167 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
|
168 |
|
169 str r8, [r0], #4 ; diff (B) |
|
170 subs r4, r4, #1 ; update loop counter |
|
171 str r9, [r0], #4 ; diff (B) |
|
172 |
|
173 bne loop_v |
|
174 |
|
175 ldmfd sp!, {r4-r11} |
|
176 bx lr |
|
177 |
|
178 ENDP |
|
179 |
|
180 |
|
181 ; r0 short *diff |
|
182 ; r1 unsigned char *src |
|
183 ; r2 int src_stride |
|
184 ; r3 unsigned char *pred |
|
185 ; sp int pred_stride |
|
186 |vp8_subtract_mby_armv6| PROC |
|
187 |
|
188 stmfd sp!, {r4-r11} |
|
189 ldr r12, [sp, #32] ; pred_stride |
|
190 mov r4, #16 |
|
191 loop |
|
192 ldr r6, [r1] ; src (A) |
|
193 ldr r7, [r3] ; pred (A) |
|
194 |
|
195 uxtb16 r8, r6 ; [s2 | s0] (A) |
|
196 uxtb16 r9, r7 ; [p2 | p0] (A) |
|
197 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) |
|
198 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) |
|
199 |
|
200 usub16 r6, r8, r9 ; [d2 | d0] (A) |
|
201 usub16 r7, r10, r11 ; [d3 | d1] (A) |
|
202 |
|
203 ldr r10, [r1, #4] ; src (B) |
|
204 ldr r11, [r3, #4] ; pred (B) |
|
205 |
|
206 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) |
|
207 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) |
|
208 |
|
209 str r8, [r0], #4 ; diff (A) |
|
210 uxtb16 r8, r10 ; [s2 | s0] (B) |
|
211 str r9, [r0], #4 ; diff (A) |
|
212 |
|
213 uxtb16 r9, r11 ; [p2 | p0] (B) |
|
214 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) |
|
215 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) |
|
216 |
|
217 usub16 r6, r8, r9 ; [d2 | d0] (B) |
|
218 usub16 r7, r10, r11 ; [d3 | d1] (B) |
|
219 |
|
220 ldr r10, [r1, #8] ; src (C) |
|
221 ldr r11, [r3, #8] ; pred (C) |
|
222 |
|
223 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) |
|
224 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) |
|
225 |
|
226 str r8, [r0], #4 ; diff (B) |
|
227 uxtb16 r8, r10 ; [s2 | s0] (C) |
|
228 str r9, [r0], #4 ; diff (B) |
|
229 |
|
230 uxtb16 r9, r11 ; [p2 | p0] (C) |
|
231 uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) |
|
232 uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) |
|
233 |
|
234 usub16 r6, r8, r9 ; [d2 | d0] (C) |
|
235 usub16 r7, r10, r11 ; [d3 | d1] (C) |
|
236 |
|
237 ldr r10, [r1, #12] ; src (D) |
|
238 ldr r11, [r3, #12] ; pred (D) |
|
239 |
|
240 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) |
|
241 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) |
|
242 |
|
243 str r8, [r0], #4 ; diff (C) |
|
244 uxtb16 r8, r10 ; [s2 | s0] (D) |
|
245 str r9, [r0], #4 ; diff (C) |
|
246 |
|
247 uxtb16 r9, r11 ; [p2 | p0] (D) |
|
248 uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) |
|
249 uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) |
|
250 |
|
251 usub16 r6, r8, r9 ; [d2 | d0] (D) |
|
252 usub16 r7, r10, r11 ; [d3 | d1] (D) |
|
253 |
|
254 add r1, r1, r2 ; update src pointer |
|
255 add r3, r3, r12 ; update pred pointer |
|
256 |
|
257 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) |
|
258 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) |
|
259 |
|
260 str r8, [r0], #4 ; diff (D) |
|
261 subs r4, r4, #1 ; update loop counter |
|
262 str r9, [r0], #4 ; diff (D) |
|
263 |
|
264 bne loop |
|
265 |
|
266 ldmfd sp!, {r4-r11} |
|
267 bx lr |
|
268 |
|
269 ENDP |
|
270 |
|
271 END |
|
272 |