|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5| |
|
13 IMPORT |vp8_validate_buffer_arm| |
|
14 |
|
15 INCLUDE vp8_asm_enc_offsets.asm |
|
16 |
|
17 ARM |
|
18 REQUIRE8 |
|
19 PRESERVE8 |
|
20 |
|
21 AREA |.text|, CODE, READONLY |
|
22 |
|
23 ; macro for validating write buffer position |
|
24 ; needs vp8_writer in r0 |
|
25 ; start shall not be in r1 |
|
26 MACRO |
|
27 VALIDATE_POS $start, $pos |
|
28 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call |
|
29 ldr r2, [r0, #vp8_writer_buffer_end] |
|
30 ldr r3, [r0, #vp8_writer_error] |
|
31 mov r1, $pos |
|
32 mov r0, $start |
|
33 bl vp8_validate_buffer_arm |
|
34 pop {r0-r3, r12, lr} |
|
35 MEND |
|
36 |
|
37 ; r0 VP8_COMP *cpi |
|
38 ; r1 unsigned char *cx_data |
|
39 ; r2 const unsigned char *cx_data_end |
|
40 ; r3 int num_part |
|
41 ; s0 vp8_coef_encodings |
|
42 ; s1 vp8_extra_bits, |
|
43 ; s2 const vp8_tree_index * |
|
44 |
|
45 |vp8cx_pack_tokens_into_partitions_armv5| PROC |
|
46 push {r4-r12, lr} |
|
47 sub sp, sp, #40 |
|
48 |
|
49 ; Compute address of cpi->common.mb_rows |
|
50 ldr r4, _VP8_COMP_common_ |
|
51 ldr r6, _VP8_COMMON_MBrows_ |
|
52 add r4, r0, r4 |
|
53 |
|
54 ldr r5, [r4, r6] ; load up mb_rows |
|
55 |
|
56 str r5, [sp, #36] ; save mb_rows |
|
57 str r1, [sp, #24] ; save ptr = cx_data |
|
58 str r3, [sp, #20] ; save num_part |
|
59 str r2, [sp, #8] ; save cx_data_end |
|
60 |
|
61 ldr r4, _VP8_COMP_tplist_ |
|
62 add r4, r0, r4 |
|
63 ldr r7, [r4, #0] ; dereference cpi->tp_list |
|
64 str r7, [sp, #32] ; store start of cpi->tp_list |
|
65 |
|
66 ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi |
|
67 add r0, r0, r11 |
|
68 |
|
69 mov r11, #0 |
|
70 str r11, [sp, #28] ; i |
|
71 |
|
72 numparts_loop |
|
73 ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer) |
|
74 add r0, r2 ; bc[i + 1] |
|
75 |
|
76 ldr r10, [sp, #24] ; ptr |
|
77 ldr r5, [sp, #36] ; move mb_rows to the counting section |
|
78 subs r5, r5, r11 ; move start point with each partition |
|
79 ; mb_rows starts at i |
|
80 str r5, [sp, #12] |
|
81 |
|
82 ; Reset all of the VP8 Writer data for each partition that |
|
83 ; is processed. |
|
84 ; start_encode |
|
85 |
|
86 ldr r3, [sp, #8] |
|
87 str r3, [r0, #vp8_writer_buffer_end] |
|
88 |
|
89 mov r2, #0 ; vp8_writer_lowvalue |
|
90 mov r5, #255 ; vp8_writer_range |
|
91 mvn r3, #23 ; vp8_writer_count |
|
92 |
|
93 str r2, [r0, #vp8_writer_pos] |
|
94 str r10, [r0, #vp8_writer_buffer] |
|
95 |
|
96 ble end_partition ; if (mb_rows <= 0) end partition |
|
97 |
|
98 mb_row_loop |
|
99 |
|
100 ldr r1, [r7, #tokenlist_start] |
|
101 ldr r9, [r7, #tokenlist_stop] |
|
102 str r9, [sp, #0] ; save stop for later comparison |
|
103 str r7, [sp, #16] ; tokenlist address for next time |
|
104 |
|
105 b check_p_lt_stop |
|
106 |
|
107 ; actual work gets done here! |
|
108 |
|
109 while_p_lt_stop |
|
110 ldrb r6, [r1, #tokenextra_token] ; t |
|
111 ldr r4, [sp, #80] ; vp8_coef_encodings |
|
112 mov lr, #0 |
|
113 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t |
|
114 ldr r9, [r1, #tokenextra_context_tree] ; pp |
|
115 |
|
116 ldrb r7, [r1, #tokenextra_skip_eob_node] |
|
117 |
|
118 ldr r6, [r4, #vp8_token_value] ; v |
|
119 ldr r8, [r4, #vp8_token_len] ; n |
|
120 |
|
121 ; vp8 specific skip_eob_node |
|
122 cmp r7, #0 |
|
123 movne lr, #2 ; i = 2 |
|
124 subne r8, r8, #1 ; --n |
|
125 |
|
126 rsb r4, r8, #32 ; 32-n |
|
127 ldr r10, [sp, #88] ; vp8_coef_tree |
|
128 |
|
129 ; v is kept in r12 during the token pack loop |
|
130 lsl r12, r6, r4 ; r12 = v << 32 - n |
|
131 |
|
132 ; loop start |
|
133 token_loop |
|
134 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] |
|
135 sub r7, r5, #1 ; range-1 |
|
136 |
|
137 ; Decisions are made based on the bit value shifted |
|
138 ; off of v, so set a flag here based on this. |
|
139 ; This value is refered to as "bb" |
|
140 lsls r12, r12, #1 ; bb = v >> n |
|
141 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) |
|
142 |
|
143 ; bb can only be 0 or 1. So only execute this statement |
|
144 ; if bb == 1, otherwise it will act like i + 0 |
|
145 addcs lr, lr, #1 ; i + bb |
|
146 |
|
147 mov r7, #1 |
|
148 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] |
|
149 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) |
|
150 |
|
151 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
152 subcs r4, r5, r4 ; if (bb) range = range-split |
|
153 |
|
154 ; Counting the leading zeros is used to normalize range. |
|
155 clz r6, r4 |
|
156 sub r6, r6, #24 ; shift |
|
157 |
|
158 ; Flag is set on the sum of count. This flag is used later |
|
159 ; to determine if count >= 0 |
|
160 adds r3, r3, r6 ; count += shift |
|
161 lsl r5, r4, r6 ; range <<= shift |
|
162 bmi token_count_lt_zero ; if(count >= 0) |
|
163 |
|
164 sub r6, r6, r3 ; offset = shift - count |
|
165 sub r4, r6, #1 ; offset-1 |
|
166 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
167 bpl token_high_bit_not_set |
|
168 |
|
169 ldr r4, [r0, #vp8_writer_pos] ; x |
|
170 sub r4, r4, #1 ; x = w->pos-1 |
|
171 b token_zero_while_start |
|
172 token_zero_while_loop |
|
173 mov r10, #0 |
|
174 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
175 sub r4, r4, #1 ; x-- |
|
176 token_zero_while_start |
|
177 cmp r4, #0 |
|
178 ldrge r7, [r0, #vp8_writer_buffer] |
|
179 ldrb r11, [r7, r4] |
|
180 cmpge r11, #0xff |
|
181 beq token_zero_while_loop |
|
182 |
|
183 ldr r7, [r0, #vp8_writer_buffer] |
|
184 ldrb r10, [r7, r4] ; w->buffer[x] |
|
185 add r10, r10, #1 |
|
186 strb r10, [r7, r4] ; w->buffer[x] + 1 |
|
187 token_high_bit_not_set |
|
188 rsb r4, r6, #24 ; 24-offset |
|
189 ldr r10, [r0, #vp8_writer_buffer] |
|
190 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
191 ldr r4, [r0, #vp8_writer_pos] ; w->pos |
|
192 lsl r2, r2, r6 ; lowvalue <<= offset |
|
193 mov r6, r3 ; shift = count |
|
194 add r11, r4, #1 ; w->pos++ |
|
195 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
196 str r11, [r0, #vp8_writer_pos] |
|
197 sub r3, r3, #8 ; count -= 8 |
|
198 |
|
199 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
200 |
|
201 strb r7, [r10, r4] ; w->buffer[w->pos++] |
|
202 |
|
203 ; r10 is used earlier in the loop, but r10 is used as |
|
204 ; temp variable here. So after r10 is used, reload |
|
205 ; vp8_coef_tree_dcd into r10 |
|
206 ldr r10, [sp, #88] ; vp8_coef_tree |
|
207 |
|
208 token_count_lt_zero |
|
209 lsl r2, r2, r6 ; lowvalue <<= shift |
|
210 |
|
211 subs r8, r8, #1 ; --n |
|
212 bne token_loop |
|
213 |
|
214 ldrb r6, [r1, #tokenextra_token] ; t |
|
215 ldr r7, [sp, #84] ; vp8_extra_bits |
|
216 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired |
|
217 ; element. Here vp8_extra_bit_struct == 16 |
|
218 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t |
|
219 |
|
220 ldr r4, [r12, #vp8_extra_bit_struct_base_val] |
|
221 cmp r4, #0 |
|
222 beq skip_extra_bits |
|
223 |
|
224 ; if( b->base_val) |
|
225 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L |
|
226 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra |
|
227 cmp r8, #0 ; if( L) |
|
228 beq no_extra_bits |
|
229 |
|
230 ldr r9, [r12, #vp8_extra_bit_struct_prob] |
|
231 asr r7, lr, #1 ; v=e>>1 |
|
232 |
|
233 ldr r10, [r12, #vp8_extra_bit_struct_tree] |
|
234 str r10, [sp, #4] ; b->tree |
|
235 |
|
236 rsb r4, r8, #32 |
|
237 lsl r12, r7, r4 |
|
238 |
|
239 mov lr, #0 ; i = 0 |
|
240 |
|
241 extra_bits_loop |
|
242 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] |
|
243 sub r7, r5, #1 ; range-1 |
|
244 lsls r12, r12, #1 ; v >> n |
|
245 mul r6, r4, r7 ; (range-1) * pp[i>>1] |
|
246 addcs lr, lr, #1 ; i + bb |
|
247 |
|
248 mov r7, #1 |
|
249 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] |
|
250 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) |
|
251 |
|
252 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
253 subcs r4, r5, r4 ; if (bb) range = range-split |
|
254 |
|
255 clz r6, r4 |
|
256 sub r6, r6, #24 |
|
257 |
|
258 adds r3, r3, r6 ; count += shift |
|
259 lsl r5, r4, r6 ; range <<= shift |
|
260 bmi extra_count_lt_zero ; if(count >= 0) |
|
261 |
|
262 sub r6, r6, r3 ; offset= shift - count |
|
263 sub r4, r6, #1 ; offset-1 |
|
264 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
265 bpl extra_high_bit_not_set |
|
266 |
|
267 ldr r4, [r0, #vp8_writer_pos] ; x |
|
268 sub r4, r4, #1 ; x = w->pos - 1 |
|
269 b extra_zero_while_start |
|
270 extra_zero_while_loop |
|
271 mov r10, #0 |
|
272 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
273 sub r4, r4, #1 ; x-- |
|
274 extra_zero_while_start |
|
275 cmp r4, #0 |
|
276 ldrge r7, [r0, #vp8_writer_buffer] |
|
277 ldrb r11, [r7, r4] |
|
278 cmpge r11, #0xff |
|
279 beq extra_zero_while_loop |
|
280 |
|
281 ldr r7, [r0, #vp8_writer_buffer] |
|
282 ldrb r10, [r7, r4] |
|
283 add r10, r10, #1 |
|
284 strb r10, [r7, r4] |
|
285 extra_high_bit_not_set |
|
286 rsb r4, r6, #24 ; 24-offset |
|
287 ldr r10, [r0, #vp8_writer_buffer] |
|
288 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
289 ldr r4, [r0, #vp8_writer_pos] |
|
290 lsl r2, r2, r6 ; lowvalue <<= offset |
|
291 mov r6, r3 ; shift = count |
|
292 add r11, r4, #1 ; w->pos++ |
|
293 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
294 str r11, [r0, #vp8_writer_pos] |
|
295 sub r3, r3, #8 ; count -= 8 |
|
296 |
|
297 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
298 |
|
299 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) |
|
300 ldr r10, [sp, #4] ; b->tree |
|
301 extra_count_lt_zero |
|
302 lsl r2, r2, r6 |
|
303 |
|
304 subs r8, r8, #1 ; --n |
|
305 bne extra_bits_loop ; while (n) |
|
306 |
|
307 no_extra_bits |
|
308 ldr lr, [r1, #4] ; e = p->Extra |
|
309 add r4, r5, #1 ; range + 1 |
|
310 tst lr, #1 |
|
311 lsr r4, r4, #1 ; split = (range + 1) >> 1 |
|
312 addne r2, r2, r4 ; lowvalue += split |
|
313 subne r4, r5, r4 ; range = range-split |
|
314 tst r2, #0x80000000 ; lowvalue & 0x80000000 |
|
315 lsl r5, r4, #1 ; range <<= 1 |
|
316 beq end_high_bit_not_set |
|
317 |
|
318 ldr r4, [r0, #vp8_writer_pos] |
|
319 mov r7, #0 |
|
320 sub r4, r4, #1 |
|
321 b end_zero_while_start |
|
322 end_zero_while_loop |
|
323 strb r7, [r6, r4] |
|
324 sub r4, r4, #1 ; x-- |
|
325 end_zero_while_start |
|
326 cmp r4, #0 |
|
327 ldrge r6, [r0, #vp8_writer_buffer] |
|
328 ldrb r12, [r6, r4] |
|
329 cmpge r12, #0xff |
|
330 beq end_zero_while_loop |
|
331 |
|
332 ldr r6, [r0, #vp8_writer_buffer] |
|
333 ldrb r7, [r6, r4] |
|
334 add r7, r7, #1 |
|
335 strb r7, [r6, r4] |
|
336 end_high_bit_not_set |
|
337 adds r3, r3, #1 ; ++count |
|
338 lsl r2, r2, #1 ; lowvalue <<= 1 |
|
339 bne end_count_zero |
|
340 |
|
341 ldr r4, [r0, #vp8_writer_pos] |
|
342 mvn r3, #7 ; count = -8 |
|
343 ldr r7, [r0, #vp8_writer_buffer] |
|
344 lsr r6, r2, #24 ; lowvalue >> 24 |
|
345 add r12, r4, #1 ; w->pos++ |
|
346 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
347 str r12, [r0, #vp8_writer_pos] |
|
348 |
|
349 VALIDATE_POS r7, r12 ; validate_buffer at pos |
|
350 |
|
351 strb r6, [r7, r4] |
|
352 end_count_zero |
|
353 skip_extra_bits |
|
354 add r1, r1, #TOKENEXTRA_SZ ; ++p |
|
355 check_p_lt_stop |
|
356 ldr r4, [sp, #0] ; stop |
|
357 cmp r1, r4 ; while( p < stop) |
|
358 bcc while_p_lt_stop |
|
359 |
|
360 ldr r10, [sp, #20] ; num_parts |
|
361 mov r1, #TOKENLIST_SZ |
|
362 mul r1, r10, r1 |
|
363 |
|
364 ldr r6, [sp, #12] ; mb_rows |
|
365 ldr r7, [sp, #16] ; tokenlist address |
|
366 subs r6, r6, r10 |
|
367 add r7, r7, r1 ; next element in the array |
|
368 str r6, [sp, #12] |
|
369 bgt mb_row_loop |
|
370 |
|
371 end_partition |
|
372 mov r12, #32 |
|
373 |
|
374 stop_encode_loop |
|
375 sub r7, r5, #1 ; range-1 |
|
376 |
|
377 mov r4, r7, lsl #7 ; ((range-1) * 128) |
|
378 |
|
379 mov r7, #1 |
|
380 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) |
|
381 |
|
382 ; Counting the leading zeros is used to normalize range. |
|
383 clz r6, r4 |
|
384 sub r6, r6, #24 ; shift |
|
385 |
|
386 ; Flag is set on the sum of count. This flag is used later |
|
387 ; to determine if count >= 0 |
|
388 adds r3, r3, r6 ; count += shift |
|
389 lsl r5, r4, r6 ; range <<= shift |
|
390 bmi token_count_lt_zero_se ; if(count >= 0) |
|
391 |
|
392 sub r6, r6, r3 ; offset = shift - count |
|
393 sub r4, r6, #1 ; offset-1 |
|
394 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
395 bpl token_high_bit_not_set_se |
|
396 |
|
397 ldr r4, [r0, #vp8_writer_pos] ; x |
|
398 sub r4, r4, #1 ; x = w->pos-1 |
|
399 b token_zero_while_start_se |
|
400 token_zero_while_loop_se |
|
401 mov r10, #0 |
|
402 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
403 sub r4, r4, #1 ; x-- |
|
404 token_zero_while_start_se |
|
405 cmp r4, #0 |
|
406 ldrge r7, [r0, #vp8_writer_buffer] |
|
407 ldrb r11, [r7, r4] |
|
408 cmpge r11, #0xff |
|
409 beq token_zero_while_loop_se |
|
410 |
|
411 ldr r7, [r0, #vp8_writer_buffer] |
|
412 ldrb r10, [r7, r4] ; w->buffer[x] |
|
413 add r10, r10, #1 |
|
414 strb r10, [r7, r4] ; w->buffer[x] + 1 |
|
415 token_high_bit_not_set_se |
|
416 rsb r4, r6, #24 ; 24-offset |
|
417 ldr r10, [r0, #vp8_writer_buffer] |
|
418 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
419 ldr r4, [r0, #vp8_writer_pos] ; w->pos |
|
420 lsl r2, r2, r6 ; lowvalue <<= offset |
|
421 mov r6, r3 ; shift = count |
|
422 add r11, r4, #1 ; w->pos++ |
|
423 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
424 str r11, [r0, #vp8_writer_pos] |
|
425 sub r3, r3, #8 ; count -= 8 |
|
426 |
|
427 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
428 |
|
429 strb r7, [r10, r4] ; w->buffer[w->pos++] |
|
430 |
|
431 token_count_lt_zero_se |
|
432 lsl r2, r2, r6 ; lowvalue <<= shift |
|
433 |
|
434 subs r12, r12, #1 |
|
435 bne stop_encode_loop |
|
436 |
|
437 ldr r4, [r0, #vp8_writer_pos] ; w->pos |
|
438 ldr r12, [sp, #24] ; ptr |
|
439 add r12, r12, r4 ; ptr += w->pos |
|
440 str r12, [sp, #24] |
|
441 |
|
442 ldr r11, [sp, #28] ; i |
|
443 ldr r10, [sp, #20] ; num_parts |
|
444 |
|
445 add r11, r11, #1 ; i++ |
|
446 str r11, [sp, #28] |
|
447 |
|
448 ldr r7, [sp, #32] ; cpi->tp_list[i] |
|
449 mov r1, #TOKENLIST_SZ |
|
450 add r7, r7, r1 ; next element in cpi->tp_list |
|
451 str r7, [sp, #32] ; cpi->tp_list[i+1] |
|
452 |
|
453 cmp r10, r11 |
|
454 bgt numparts_loop |
|
455 |
|
456 add sp, sp, #40 |
|
457 pop {r4-r12, pc} |
|
458 ENDP |
|
459 |
|
460 _VP8_COMP_common_ |
|
461 DCD vp8_comp_common |
|
462 _VP8_COMMON_MBrows_ |
|
463 DCD vp8_common_mb_rows |
|
464 _VP8_COMP_tplist_ |
|
465 DCD vp8_comp_tplist |
|
466 _VP8_COMP_bc_ |
|
467 DCD vp8_comp_bc |
|
468 _vp8_writer_sz_ |
|
469 DCD vp8_writer_sz |
|
470 |
|
471 END |