|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 EXPORT |vp8cx_pack_mb_row_tokens_armv5| |
|
13 IMPORT |vp8_validate_buffer_arm| |
|
14 |
|
15 INCLUDE vp8_asm_enc_offsets.asm |
|
16 |
|
17 ARM |
|
18 REQUIRE8 |
|
19 PRESERVE8 |
|
20 |
|
21 AREA |.text|, CODE, READONLY |
|
22 |
|
23 |
|
24 ; macro for validating write buffer position |
|
25 ; needs vp8_writer in r0 |
|
26 ; start shall not be in r1 |
|
27 MACRO |
|
28 VALIDATE_POS $start, $pos |
|
29 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call |
|
30 ldr r2, [r0, #vp8_writer_buffer_end] |
|
31 ldr r3, [r0, #vp8_writer_error] |
|
32 mov r1, $pos |
|
33 mov r0, $start |
|
34 bl vp8_validate_buffer_arm |
|
35 pop {r0-r3, r12, lr} |
|
36 MEND |
|
37 |
|
38 ; r0 VP8_COMP *cpi |
|
39 ; r1 vp8_writer *w |
|
40 ; r2 vp8_coef_encodings |
|
41 ; r3 vp8_extra_bits |
|
42 ; s0 vp8_coef_tree |
|
43 |
|
44 |vp8cx_pack_mb_row_tokens_armv5| PROC |
|
45 push {r4-r12, lr} |
|
46 sub sp, sp, #24 |
|
47 |
|
48 ; Compute address of cpi->common.mb_rows |
|
49 ldr r4, _VP8_COMP_common_ |
|
50 ldr r6, _VP8_COMMON_MBrows_ |
|
51 add r4, r0, r4 |
|
52 |
|
53 ldr r5, [r4, r6] ; load up mb_rows |
|
54 |
|
55 str r2, [sp, #20] ; save vp8_coef_encodings |
|
56 str r5, [sp, #12] ; save mb_rows |
|
57 str r3, [sp, #8] ; save vp8_extra_bits |
|
58 |
|
59 ldr r4, _VP8_COMP_tplist_ |
|
60 add r4, r0, r4 |
|
61 ldr r7, [r4, #0] ; dereference cpi->tp_list |
|
62 |
|
63 mov r0, r1 ; keep same as other loops |
|
64 |
|
65 ldr r2, [r0, #vp8_writer_lowvalue] |
|
66 ldr r5, [r0, #vp8_writer_range] |
|
67 ldr r3, [r0, #vp8_writer_count] |
|
68 |
|
69 mb_row_loop |
|
70 |
|
71 ldr r1, [r7, #tokenlist_start] |
|
72 ldr r9, [r7, #tokenlist_stop] |
|
73 str r9, [sp, #0] ; save stop for later comparison |
|
74 str r7, [sp, #16] ; tokenlist address for next time |
|
75 |
|
76 b check_p_lt_stop |
|
77 |
|
78 ; actuall work gets done here! |
|
79 |
|
80 while_p_lt_stop |
|
81 ldrb r6, [r1, #tokenextra_token] ; t |
|
82 ldr r4, [sp, #20] ; vp8_coef_encodings |
|
83 mov lr, #0 |
|
84 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t |
|
85 ldr r9, [r1, #tokenextra_context_tree] ; pp |
|
86 |
|
87 ldrb r7, [r1, #tokenextra_skip_eob_node] |
|
88 |
|
89 ldr r6, [r4, #vp8_token_value] ; v |
|
90 ldr r8, [r4, #vp8_token_len] ; n |
|
91 |
|
92 ; vp8 specific skip_eob_node |
|
93 cmp r7, #0 |
|
94 movne lr, #2 ; i = 2 |
|
95 subne r8, r8, #1 ; --n |
|
96 |
|
97 rsb r4, r8, #32 ; 32-n |
|
98 ldr r10, [sp, #64] ; vp8_coef_tree |
|
99 |
|
100 ; v is kept in r12 during the token pack loop |
|
101 lsl r12, r6, r4 ; r12 = v << 32 - n |
|
102 |
|
103 ; loop start |
|
104 token_loop |
|
105 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] |
|
106 sub r7, r5, #1 ; range-1 |
|
107 |
|
108 ; Decisions are made based on the bit value shifted |
|
109 ; off of v, so set a flag here based on this. |
|
110 ; This value is refered to as "bb" |
|
111 lsls r12, r12, #1 ; bb = v >> n |
|
112 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) |
|
113 |
|
114 ; bb can only be 0 or 1. So only execute this statement |
|
115 ; if bb == 1, otherwise it will act like i + 0 |
|
116 addcs lr, lr, #1 ; i + bb |
|
117 |
|
118 mov r7, #1 |
|
119 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] |
|
120 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) |
|
121 |
|
122 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
123 subcs r4, r5, r4 ; if (bb) range = range-split |
|
124 |
|
125 ; Counting the leading zeros is used to normalize range. |
|
126 clz r6, r4 |
|
127 sub r6, r6, #24 ; shift |
|
128 |
|
129 ; Flag is set on the sum of count. This flag is used later |
|
130 ; to determine if count >= 0 |
|
131 adds r3, r3, r6 ; count += shift |
|
132 lsl r5, r4, r6 ; range <<= shift |
|
133 bmi token_count_lt_zero ; if(count >= 0) |
|
134 |
|
135 sub r6, r6, r3 ; offset = shift - count |
|
136 sub r4, r6, #1 ; offset-1 |
|
137 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
138 bpl token_high_bit_not_set |
|
139 |
|
140 ldr r4, [r0, #vp8_writer_pos] ; x |
|
141 sub r4, r4, #1 ; x = w->pos-1 |
|
142 b token_zero_while_start |
|
143 token_zero_while_loop |
|
144 mov r10, #0 |
|
145 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
146 sub r4, r4, #1 ; x-- |
|
147 token_zero_while_start |
|
148 cmp r4, #0 |
|
149 ldrge r7, [r0, #vp8_writer_buffer] |
|
150 ldrb r11, [r7, r4] |
|
151 cmpge r11, #0xff |
|
152 beq token_zero_while_loop |
|
153 |
|
154 ldr r7, [r0, #vp8_writer_buffer] |
|
155 ldrb r10, [r7, r4] ; w->buffer[x] |
|
156 add r10, r10, #1 |
|
157 strb r10, [r7, r4] ; w->buffer[x] + 1 |
|
158 token_high_bit_not_set |
|
159 rsb r4, r6, #24 ; 24-offset |
|
160 ldr r10, [r0, #vp8_writer_buffer] |
|
161 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
162 ldr r4, [r0, #vp8_writer_pos] ; w->pos |
|
163 lsl r2, r2, r6 ; lowvalue <<= offset |
|
164 mov r6, r3 ; shift = count |
|
165 add r11, r4, #1 ; w->pos++ |
|
166 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
167 str r11, [r0, #vp8_writer_pos] |
|
168 sub r3, r3, #8 ; count -= 8 |
|
169 |
|
170 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
171 |
|
172 strb r7, [r10, r4] ; w->buffer[w->pos++] |
|
173 |
|
174 ; r10 is used earlier in the loop, but r10 is used as |
|
175 ; temp variable here. So after r10 is used, reload |
|
176 ; vp8_coef_tree_dcd into r10 |
|
177 ldr r10, [sp, #64] ; vp8_coef_tree |
|
178 |
|
179 token_count_lt_zero |
|
180 lsl r2, r2, r6 ; lowvalue <<= shift |
|
181 |
|
182 subs r8, r8, #1 ; --n |
|
183 bne token_loop |
|
184 |
|
185 ldrb r6, [r1, #tokenextra_token] ; t |
|
186 ldr r7, [sp, #8] ; vp8_extra_bits |
|
187 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired |
|
188 ; element. Here vp8_extra_bit_struct == 16 |
|
189 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t |
|
190 |
|
191 ldr r4, [r12, #vp8_extra_bit_struct_base_val] |
|
192 cmp r4, #0 |
|
193 beq skip_extra_bits |
|
194 |
|
195 ; if( b->base_val) |
|
196 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L |
|
197 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra |
|
198 cmp r8, #0 ; if( L) |
|
199 beq no_extra_bits |
|
200 |
|
201 ldr r9, [r12, #vp8_extra_bit_struct_prob] |
|
202 asr r7, lr, #1 ; v=e>>1 |
|
203 |
|
204 ldr r10, [r12, #vp8_extra_bit_struct_tree] |
|
205 str r10, [sp, #4] ; b->tree |
|
206 |
|
207 rsb r4, r8, #32 |
|
208 lsl r12, r7, r4 |
|
209 |
|
210 mov lr, #0 ; i = 0 |
|
211 |
|
212 extra_bits_loop |
|
213 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] |
|
214 sub r7, r5, #1 ; range-1 |
|
215 lsls r12, r12, #1 ; v >> n |
|
216 mul r6, r4, r7 ; (range-1) * pp[i>>1] |
|
217 addcs lr, lr, #1 ; i + bb |
|
218 |
|
219 mov r7, #1 |
|
220 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] |
|
221 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) |
|
222 |
|
223 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
224 subcs r4, r5, r4 ; if (bb) range = range-split |
|
225 |
|
226 clz r6, r4 |
|
227 sub r6, r6, #24 |
|
228 |
|
229 adds r3, r3, r6 ; count += shift |
|
230 lsl r5, r4, r6 ; range <<= shift |
|
231 bmi extra_count_lt_zero ; if(count >= 0) |
|
232 |
|
233 sub r6, r6, r3 ; offset= shift - count |
|
234 sub r4, r6, #1 ; offset-1 |
|
235 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
236 bpl extra_high_bit_not_set |
|
237 |
|
238 ldr r4, [r0, #vp8_writer_pos] ; x |
|
239 sub r4, r4, #1 ; x = w->pos - 1 |
|
240 b extra_zero_while_start |
|
241 extra_zero_while_loop |
|
242 mov r10, #0 |
|
243 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
244 sub r4, r4, #1 ; x-- |
|
245 extra_zero_while_start |
|
246 cmp r4, #0 |
|
247 ldrge r7, [r0, #vp8_writer_buffer] |
|
248 ldrb r11, [r7, r4] |
|
249 cmpge r11, #0xff |
|
250 beq extra_zero_while_loop |
|
251 |
|
252 ldr r7, [r0, #vp8_writer_buffer] |
|
253 ldrb r10, [r7, r4] |
|
254 add r10, r10, #1 |
|
255 strb r10, [r7, r4] |
|
256 extra_high_bit_not_set |
|
257 rsb r4, r6, #24 ; 24-offset |
|
258 ldr r10, [r0, #vp8_writer_buffer] |
|
259 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
260 ldr r4, [r0, #vp8_writer_pos] |
|
261 lsl r2, r2, r6 ; lowvalue <<= offset |
|
262 mov r6, r3 ; shift = count |
|
263 add r11, r4, #1 ; w->pos++ |
|
264 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
265 str r11, [r0, #vp8_writer_pos] |
|
266 sub r3, r3, #8 ; count -= 8 |
|
267 |
|
268 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
269 |
|
270 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) |
|
271 ldr r10, [sp, #4] ; b->tree |
|
272 extra_count_lt_zero |
|
273 lsl r2, r2, r6 |
|
274 |
|
275 subs r8, r8, #1 ; --n |
|
276 bne extra_bits_loop ; while (n) |
|
277 |
|
278 no_extra_bits |
|
279 ldr lr, [r1, #4] ; e = p->Extra |
|
280 add r4, r5, #1 ; range + 1 |
|
281 tst lr, #1 |
|
282 lsr r4, r4, #1 ; split = (range + 1) >> 1 |
|
283 addne r2, r2, r4 ; lowvalue += split |
|
284 subne r4, r5, r4 ; range = range-split |
|
285 tst r2, #0x80000000 ; lowvalue & 0x80000000 |
|
286 lsl r5, r4, #1 ; range <<= 1 |
|
287 beq end_high_bit_not_set |
|
288 |
|
289 ldr r4, [r0, #vp8_writer_pos] |
|
290 mov r7, #0 |
|
291 sub r4, r4, #1 |
|
292 b end_zero_while_start |
|
293 end_zero_while_loop |
|
294 strb r7, [r6, r4] |
|
295 sub r4, r4, #1 ; x-- |
|
296 end_zero_while_start |
|
297 cmp r4, #0 |
|
298 ldrge r6, [r0, #vp8_writer_buffer] |
|
299 ldrb r12, [r6, r4] |
|
300 cmpge r12, #0xff |
|
301 beq end_zero_while_loop |
|
302 |
|
303 ldr r6, [r0, #vp8_writer_buffer] |
|
304 ldrb r7, [r6, r4] |
|
305 add r7, r7, #1 |
|
306 strb r7, [r6, r4] |
|
307 end_high_bit_not_set |
|
308 adds r3, r3, #1 ; ++count |
|
309 lsl r2, r2, #1 ; lowvalue <<= 1 |
|
310 bne end_count_zero |
|
311 |
|
312 ldr r4, [r0, #vp8_writer_pos] |
|
313 mvn r3, #7 |
|
314 ldr r7, [r0, #vp8_writer_buffer] |
|
315 lsr r6, r2, #24 ; lowvalue >> 24 |
|
316 add r12, r4, #1 ; w->pos++ |
|
317 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
318 str r12, [r0, #vp8_writer_pos] |
|
319 |
|
320 VALIDATE_POS r7, r12 ; validate_buffer at pos |
|
321 |
|
322 strb r6, [r7, r4] |
|
323 end_count_zero |
|
324 skip_extra_bits |
|
325 add r1, r1, #TOKENEXTRA_SZ ; ++p |
|
326 check_p_lt_stop |
|
327 ldr r4, [sp, #0] ; stop |
|
328 cmp r1, r4 ; while( p < stop) |
|
329 bcc while_p_lt_stop |
|
330 |
|
331 ldr r6, [sp, #12] ; mb_rows |
|
332 ldr r7, [sp, #16] ; tokenlist address |
|
333 subs r6, r6, #1 |
|
334 add r7, r7, #TOKENLIST_SZ ; next element in the array |
|
335 str r6, [sp, #12] |
|
336 bne mb_row_loop |
|
337 |
|
338 str r2, [r0, #vp8_writer_lowvalue] |
|
339 str r5, [r0, #vp8_writer_range] |
|
340 str r3, [r0, #vp8_writer_count] |
|
341 add sp, sp, #24 |
|
342 pop {r4-r12, pc} |
|
343 ENDP |
|
344 |
|
345 _VP8_COMP_common_ |
|
346 DCD vp8_comp_common |
|
347 _VP8_COMMON_MBrows_ |
|
348 DCD vp8_common_mb_rows |
|
349 _VP8_COMP_tplist_ |
|
350 DCD vp8_comp_tplist |
|
351 |
|
352 END |