media/libvpx/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm

branch
TOR_BUG_9701
changeset 10
ac0c01689b40
equal deleted inserted replaced
-1:000000000000 0:e0a3ebca8b33
1 ;
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_subtract_mby_armv6|
13 EXPORT |vp8_subtract_mbuv_armv6|
14 EXPORT |vp8_subtract_b_armv6|
15
16 INCLUDE vp8_asm_enc_offsets.asm
17
18 ARM
19 REQUIRE8
20 PRESERVE8
21
22 AREA ||.text||, CODE, READONLY, ALIGN=2
23
24 ; r0 BLOCK *be
25 ; r1 BLOCKD *bd
26 ; r2 int pitch
27 |vp8_subtract_b_armv6| PROC
28
29 stmfd sp!, {r4-r9}
30
31 ldr r4, [r0, #vp8_block_base_src]
32 ldr r5, [r0, #vp8_block_src]
33 ldr r6, [r0, #vp8_block_src_diff]
34
35 ldr r3, [r4]
36 ldr r7, [r0, #vp8_block_src_stride]
37 add r3, r3, r5 ; src = *base_src + src
38 ldr r8, [r1, #vp8_blockd_predictor]
39
40 mov r9, #4 ; loop count
41
42 loop_block
43
44 ldr r0, [r3], r7 ; src
45 ldr r1, [r8], r2 ; pred
46
47 uxtb16 r4, r0 ; [s2 | s0]
48 uxtb16 r5, r1 ; [p2 | p0]
49 uxtb16 r0, r0, ror #8 ; [s3 | s1]
50 uxtb16 r1, r1, ror #8 ; [p3 | p1]
51
52 usub16 r4, r4, r5 ; [d2 | d0]
53 usub16 r5, r0, r1 ; [d3 | d1]
54
55 subs r9, r9, #1 ; decrement loop counter
56
57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
58 pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
59
60 str r0, [r6, #0] ; diff
61 str r1, [r6, #4] ; diff
62
63 add r6, r6, r2, lsl #1 ; update diff pointer
64 bne loop_block
65
66 ldmfd sp!, {r4-r9}
67 mov pc, lr
68
69 ENDP
70
71
72 ; r0 short *diff
73 ; r1 unsigned char *usrc
74 ; r2 unsigned char *vsrc
75 ; r3 int src_stride
76 ; sp unsigned char *upred
77 ; sp unsigned char *vpred
78 ; sp int pred_stride
79 |vp8_subtract_mbuv_armv6| PROC
80
81 stmfd sp!, {r4-r11}
82
83 add r0, r0, #512 ; set *diff point to Cb
84 mov r4, #8 ; loop count
85 ldr r5, [sp, #32] ; upred
86 ldr r12, [sp, #40] ; pred_stride
87
88 ; Subtract U block
89 loop_u
90 ldr r6, [r1] ; usrc (A)
91 ldr r7, [r5] ; upred (A)
92
93 uxtb16 r8, r6 ; [s2 | s0] (A)
94 uxtb16 r9, r7 ; [p2 | p0] (A)
95 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
96 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
97
98 usub16 r6, r8, r9 ; [d2 | d0] (A)
99 usub16 r7, r10, r11 ; [d3 | d1] (A)
100
101 ldr r10, [r1, #4] ; usrc (B)
102 ldr r11, [r5, #4] ; upred (B)
103
104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
106
107 str r8, [r0], #4 ; diff (A)
108 uxtb16 r8, r10 ; [s2 | s0] (B)
109 str r9, [r0], #4 ; diff (A)
110
111 uxtb16 r9, r11 ; [p2 | p0] (B)
112 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
113 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
114
115 usub16 r6, r8, r9 ; [d2 | d0] (B)
116 usub16 r7, r10, r11 ; [d3 | d1] (B)
117
118 add r1, r1, r3 ; update usrc pointer
119 add r5, r5, r12 ; update upred pointer
120
121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
122 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
123
124 str r8, [r0], #4 ; diff (B)
125 subs r4, r4, #1 ; update loop counter
126 str r9, [r0], #4 ; diff (B)
127
128 bne loop_u
129
130 ldr r5, [sp, #36] ; vpred
131 mov r4, #8 ; loop count
132
133 ; Subtract V block
134 loop_v
135 ldr r6, [r2] ; vsrc (A)
136 ldr r7, [r5] ; vpred (A)
137
138 uxtb16 r8, r6 ; [s2 | s0] (A)
139 uxtb16 r9, r7 ; [p2 | p0] (A)
140 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
141 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
142
143 usub16 r6, r8, r9 ; [d2 | d0] (A)
144 usub16 r7, r10, r11 ; [d3 | d1] (A)
145
146 ldr r10, [r2, #4] ; vsrc (B)
147 ldr r11, [r5, #4] ; vpred (B)
148
149 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
150 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
151
152 str r8, [r0], #4 ; diff (A)
153 uxtb16 r8, r10 ; [s2 | s0] (B)
154 str r9, [r0], #4 ; diff (A)
155
156 uxtb16 r9, r11 ; [p2 | p0] (B)
157 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
158 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
159
160 usub16 r6, r8, r9 ; [d2 | d0] (B)
161 usub16 r7, r10, r11 ; [d3 | d1] (B)
162
163 add r2, r2, r3 ; update vsrc pointer
164 add r5, r5, r12 ; update vpred pointer
165
166 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
167 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
168
169 str r8, [r0], #4 ; diff (B)
170 subs r4, r4, #1 ; update loop counter
171 str r9, [r0], #4 ; diff (B)
172
173 bne loop_v
174
175 ldmfd sp!, {r4-r11}
176 bx lr
177
178 ENDP
179
180
181 ; r0 short *diff
182 ; r1 unsigned char *src
183 ; r2 int src_stride
184 ; r3 unsigned char *pred
185 ; sp int pred_stride
186 |vp8_subtract_mby_armv6| PROC
187
188 stmfd sp!, {r4-r11}
189 ldr r12, [sp, #32] ; pred_stride
190 mov r4, #16
191 loop
192 ldr r6, [r1] ; src (A)
193 ldr r7, [r3] ; pred (A)
194
195 uxtb16 r8, r6 ; [s2 | s0] (A)
196 uxtb16 r9, r7 ; [p2 | p0] (A)
197 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
198 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
199
200 usub16 r6, r8, r9 ; [d2 | d0] (A)
201 usub16 r7, r10, r11 ; [d3 | d1] (A)
202
203 ldr r10, [r1, #4] ; src (B)
204 ldr r11, [r3, #4] ; pred (B)
205
206 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
207 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
208
209 str r8, [r0], #4 ; diff (A)
210 uxtb16 r8, r10 ; [s2 | s0] (B)
211 str r9, [r0], #4 ; diff (A)
212
213 uxtb16 r9, r11 ; [p2 | p0] (B)
214 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
215 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
216
217 usub16 r6, r8, r9 ; [d2 | d0] (B)
218 usub16 r7, r10, r11 ; [d3 | d1] (B)
219
220 ldr r10, [r1, #8] ; src (C)
221 ldr r11, [r3, #8] ; pred (C)
222
223 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
224 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
225
226 str r8, [r0], #4 ; diff (B)
227 uxtb16 r8, r10 ; [s2 | s0] (C)
228 str r9, [r0], #4 ; diff (B)
229
230 uxtb16 r9, r11 ; [p2 | p0] (C)
231 uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
232 uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
233
234 usub16 r6, r8, r9 ; [d2 | d0] (C)
235 usub16 r7, r10, r11 ; [d3 | d1] (C)
236
237 ldr r10, [r1, #12] ; src (D)
238 ldr r11, [r3, #12] ; pred (D)
239
240 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
241 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
242
243 str r8, [r0], #4 ; diff (C)
244 uxtb16 r8, r10 ; [s2 | s0] (D)
245 str r9, [r0], #4 ; diff (C)
246
247 uxtb16 r9, r11 ; [p2 | p0] (D)
248 uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
249 uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
250
251 usub16 r6, r8, r9 ; [d2 | d0] (D)
252 usub16 r7, r10, r11 ; [d3 | d1] (D)
253
254 add r1, r1, r2 ; update src pointer
255 add r3, r3, r12 ; update pred pointer
256
257 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
258 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
259
260 str r8, [r0], #4 ; diff (D)
261 subs r4, r4, #1 ; update loop counter
262 str r9, [r0], #4 ; diff (D)
263
264 bne loop
265
266 ldmfd sp!, {r4-r11}
267 bx lr
268
269 ENDP
270
271 END
272

mercurial