|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 EXPORT |vp8cx_pack_tokens_armv5| |
|
13 IMPORT |vp8_validate_buffer_arm| |
|
14 |
|
15 INCLUDE vp8_asm_enc_offsets.asm |
|
16 |
|
17 ARM |
|
18 REQUIRE8 |
|
19 PRESERVE8 |
|
20 |
|
21 AREA |.text|, CODE, READONLY |
|
22 |
|
23 |
|
24 ; macro for validating write buffer position |
|
25 ; needs vp8_writer in r0 |
|
26 ; start shall not be in r1 |
|
27 MACRO |
|
28 VALIDATE_POS $start, $pos |
|
29 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call |
|
30 ldr r2, [r0, #vp8_writer_buffer_end] |
|
31 ldr r3, [r0, #vp8_writer_error] |
|
32 mov r1, $pos |
|
33 mov r0, $start |
|
34 bl vp8_validate_buffer_arm |
|
35 pop {r0-r3, r12, lr} |
|
36 MEND |
|
37 |
|
38 |
|
39 ; r0 vp8_writer *w |
|
40 ; r1 const TOKENEXTRA *p |
|
41 ; r2 int xcount |
|
42 ; r3 vp8_coef_encodings |
|
43 ; s0 vp8_extra_bits |
|
44 ; s1 vp8_coef_tree |
|
45 |vp8cx_pack_tokens_armv5| PROC |
|
46 push {r4-r12, lr} |
|
47 sub sp, sp, #16 |
|
48 |
|
49 ; Add size of xcount * sizeof (TOKENEXTRA) to get stop |
|
50 ; sizeof (TOKENEXTRA) is 8 |
|
51 add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) |
|
52 str r2, [sp, #0] |
|
53 str r3, [sp, #8] ; save vp8_coef_encodings |
|
54 ldr r2, [r0, #vp8_writer_lowvalue] |
|
55 ldr r5, [r0, #vp8_writer_range] |
|
56 ldr r3, [r0, #vp8_writer_count] |
|
57 b check_p_lt_stop |
|
58 |
|
59 while_p_lt_stop |
|
60 ldrb r6, [r1, #tokenextra_token] ; t |
|
61 ldr r4, [sp, #8] ; vp8_coef_encodings |
|
62 mov lr, #0 |
|
63 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t |
|
64 ldr r9, [r1, #tokenextra_context_tree] ; pp |
|
65 |
|
66 ldrb r7, [r1, #tokenextra_skip_eob_node] |
|
67 |
|
68 ldr r6, [r4, #vp8_token_value] ; v |
|
69 ldr r8, [r4, #vp8_token_len] ; n |
|
70 |
|
71 ; vp8 specific skip_eob_node |
|
72 cmp r7, #0 |
|
73 movne lr, #2 ; i = 2 |
|
74 subne r8, r8, #1 ; --n |
|
75 |
|
76 rsb r4, r8, #32 ; 32-n |
|
77 ldr r10, [sp, #60] ; vp8_coef_tree |
|
78 |
|
79 ; v is kept in r12 during the token pack loop |
|
80 lsl r12, r6, r4 ; r12 = v << 32 - n |
|
81 |
|
82 ; loop start |
|
83 token_loop |
|
84 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] |
|
85 sub r7, r5, #1 ; range-1 |
|
86 |
|
87 ; Decisions are made based on the bit value shifted |
|
88 ; off of v, so set a flag here based on this. |
|
89 ; This value is refered to as "bb" |
|
90 lsls r12, r12, #1 ; bb = v >> n |
|
91 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) |
|
92 |
|
93 ; bb can only be 0 or 1. So only execute this statement |
|
94 ; if bb == 1, otherwise it will act like i + 0 |
|
95 addcs lr, lr, #1 ; i + bb |
|
96 |
|
97 mov r7, #1 |
|
98 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] |
|
99 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) |
|
100 |
|
101 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
102 subcs r4, r5, r4 ; if (bb) range = range-split |
|
103 |
|
104 ; Counting the leading zeros is used to normalize range. |
|
105 clz r6, r4 |
|
106 sub r6, r6, #24 ; shift |
|
107 |
|
108 ; Flag is set on the sum of count. This flag is used later |
|
109 ; to determine if count >= 0 |
|
110 adds r3, r3, r6 ; count += shift |
|
111 lsl r5, r4, r6 ; range <<= shift |
|
112 bmi token_count_lt_zero ; if(count >= 0) |
|
113 |
|
114 sub r6, r6, r3 ; offset = shift - count |
|
115 sub r4, r6, #1 ; offset-1 |
|
116 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
117 bpl token_high_bit_not_set |
|
118 |
|
119 ldr r4, [r0, #vp8_writer_pos] ; x |
|
120 sub r4, r4, #1 ; x = w->pos-1 |
|
121 b token_zero_while_start |
|
122 token_zero_while_loop |
|
123 mov r10, #0 |
|
124 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
125 sub r4, r4, #1 ; x-- |
|
126 token_zero_while_start |
|
127 cmp r4, #0 |
|
128 ldrge r7, [r0, #vp8_writer_buffer] |
|
129 ldrb r11, [r7, r4] |
|
130 cmpge r11, #0xff |
|
131 beq token_zero_while_loop |
|
132 |
|
133 ldr r7, [r0, #vp8_writer_buffer] |
|
134 ldrb r10, [r7, r4] ; w->buffer[x] |
|
135 add r10, r10, #1 |
|
136 strb r10, [r7, r4] ; w->buffer[x] + 1 |
|
137 token_high_bit_not_set |
|
138 rsb r4, r6, #24 ; 24-offset |
|
139 ldr r10, [r0, #vp8_writer_buffer] |
|
140 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
141 ldr r4, [r0, #vp8_writer_pos] ; w->pos |
|
142 lsl r2, r2, r6 ; lowvalue <<= offset |
|
143 mov r6, r3 ; shift = count |
|
144 add r11, r4, #1 ; w->pos++ |
|
145 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
146 str r11, [r0, #vp8_writer_pos] |
|
147 sub r3, r3, #8 ; count -= 8 |
|
148 |
|
149 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
150 |
|
151 strb r7, [r10, r4] ; w->buffer[w->pos++] |
|
152 |
|
153 ; r10 is used earlier in the loop, but r10 is used as |
|
154 ; temp variable here. So after r10 is used, reload |
|
155 ; vp8_coef_tree_dcd into r10 |
|
156 ldr r10, [sp, #60] ; vp8_coef_tree |
|
157 |
|
158 token_count_lt_zero |
|
159 lsl r2, r2, r6 ; lowvalue <<= shift |
|
160 |
|
161 subs r8, r8, #1 ; --n |
|
162 bne token_loop |
|
163 |
|
164 ldrb r6, [r1, #tokenextra_token] ; t |
|
165 ldr r7, [sp, #56] ; vp8_extra_bits |
|
166 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired |
|
167 ; element. Here vp8_extra_bit_struct == 16 |
|
168 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t |
|
169 |
|
170 ldr r4, [r12, #vp8_extra_bit_struct_base_val] |
|
171 cmp r4, #0 |
|
172 beq skip_extra_bits |
|
173 |
|
174 ; if( b->base_val) |
|
175 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L |
|
176 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra |
|
177 cmp r8, #0 ; if( L) |
|
178 beq no_extra_bits |
|
179 |
|
180 ldr r9, [r12, #vp8_extra_bit_struct_prob] |
|
181 asr r7, lr, #1 ; v=e>>1 |
|
182 |
|
183 ldr r10, [r12, #vp8_extra_bit_struct_tree] |
|
184 str r10, [sp, #4] ; b->tree |
|
185 |
|
186 rsb r4, r8, #32 |
|
187 lsl r12, r7, r4 |
|
188 |
|
189 mov lr, #0 ; i = 0 |
|
190 |
|
191 extra_bits_loop |
|
192 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] |
|
193 sub r7, r5, #1 ; range-1 |
|
194 lsls r12, r12, #1 ; v >> n |
|
195 mul r6, r4, r7 ; (range-1) * pp[i>>1] |
|
196 addcs lr, lr, #1 ; i + bb |
|
197 |
|
198 mov r7, #1 |
|
199 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] |
|
200 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) |
|
201 |
|
202 addcs r2, r2, r4 ; if (bb) lowvalue += split |
|
203 subcs r4, r5, r4 ; if (bb) range = range-split |
|
204 |
|
205 clz r6, r4 |
|
206 sub r6, r6, #24 |
|
207 |
|
208 adds r3, r3, r6 ; count += shift |
|
209 lsl r5, r4, r6 ; range <<= shift |
|
210 bmi extra_count_lt_zero ; if(count >= 0) |
|
211 |
|
212 sub r6, r6, r3 ; offset= shift - count |
|
213 sub r4, r6, #1 ; offset-1 |
|
214 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) |
|
215 bpl extra_high_bit_not_set |
|
216 |
|
217 ldr r4, [r0, #vp8_writer_pos] ; x |
|
218 sub r4, r4, #1 ; x = w->pos - 1 |
|
219 b extra_zero_while_start |
|
220 extra_zero_while_loop |
|
221 mov r10, #0 |
|
222 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 |
|
223 sub r4, r4, #1 ; x-- |
|
224 extra_zero_while_start |
|
225 cmp r4, #0 |
|
226 ldrge r7, [r0, #vp8_writer_buffer] |
|
227 ldrb r11, [r7, r4] |
|
228 cmpge r11, #0xff |
|
229 beq extra_zero_while_loop |
|
230 |
|
231 ldr r7, [r0, #vp8_writer_buffer] |
|
232 ldrb r10, [r7, r4] |
|
233 add r10, r10, #1 |
|
234 strb r10, [r7, r4] |
|
235 extra_high_bit_not_set |
|
236 rsb r4, r6, #24 ; 24-offset |
|
237 ldr r10, [r0, #vp8_writer_buffer] |
|
238 lsr r7, r2, r4 ; lowvalue >> (24-offset) |
|
239 ldr r4, [r0, #vp8_writer_pos] |
|
240 lsl r2, r2, r6 ; lowvalue <<= offset |
|
241 mov r6, r3 ; shift = count |
|
242 add r11, r4, #1 ; w->pos++ |
|
243 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
244 str r11, [r0, #vp8_writer_pos] |
|
245 sub r3, r3, #8 ; count -= 8 |
|
246 |
|
247 VALIDATE_POS r10, r11 ; validate_buffer at pos |
|
248 |
|
249 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) |
|
250 ldr r10, [sp, #4] ; b->tree |
|
251 extra_count_lt_zero |
|
252 lsl r2, r2, r6 |
|
253 |
|
254 subs r8, r8, #1 ; --n |
|
255 bne extra_bits_loop ; while (n) |
|
256 |
|
257 no_extra_bits |
|
258 ldr lr, [r1, #4] ; e = p->Extra |
|
259 add r4, r5, #1 ; range + 1 |
|
260 tst lr, #1 |
|
261 lsr r4, r4, #1 ; split = (range + 1) >> 1 |
|
262 addne r2, r2, r4 ; lowvalue += split |
|
263 subne r4, r5, r4 ; range = range-split |
|
264 tst r2, #0x80000000 ; lowvalue & 0x80000000 |
|
265 lsl r5, r4, #1 ; range <<= 1 |
|
266 beq end_high_bit_not_set |
|
267 |
|
268 ldr r4, [r0, #vp8_writer_pos] |
|
269 mov r7, #0 |
|
270 sub r4, r4, #1 |
|
271 b end_zero_while_start |
|
272 end_zero_while_loop |
|
273 strb r7, [r6, r4] |
|
274 sub r4, r4, #1 ; x-- |
|
275 end_zero_while_start |
|
276 cmp r4, #0 |
|
277 ldrge r6, [r0, #vp8_writer_buffer] |
|
278 ldrb r12, [r6, r4] |
|
279 cmpge r12, #0xff |
|
280 beq end_zero_while_loop |
|
281 |
|
282 ldr r6, [r0, #vp8_writer_buffer] |
|
283 ldrb r7, [r6, r4] |
|
284 add r7, r7, #1 |
|
285 strb r7, [r6, r4] |
|
286 end_high_bit_not_set |
|
287 adds r3, r3, #1 ; ++count |
|
288 lsl r2, r2, #1 ; lowvalue <<= 1 |
|
289 bne end_count_zero |
|
290 |
|
291 ldr r4, [r0, #vp8_writer_pos] |
|
292 mvn r3, #7 |
|
293 ldr r7, [r0, #vp8_writer_buffer] |
|
294 lsr r6, r2, #24 ; lowvalue >> 24 |
|
295 add r12, r4, #1 ; w->pos++ |
|
296 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff |
|
297 str r12, [r0, #vp8_writer_pos] |
|
298 |
|
299 VALIDATE_POS r7, r12 ; validate_buffer at pos |
|
300 |
|
301 strb r6, [r7, r4] |
|
302 end_count_zero |
|
303 skip_extra_bits |
|
304 add r1, r1, #TOKENEXTRA_SZ ; ++p |
|
305 check_p_lt_stop |
|
306 ldr r4, [sp, #0] ; stop |
|
307 cmp r1, r4 ; while( p < stop) |
|
308 bcc while_p_lt_stop |
|
309 |
|
310 str r2, [r0, #vp8_writer_lowvalue] |
|
311 str r5, [r0, #vp8_writer_range] |
|
312 str r3, [r0, #vp8_writer_count] |
|
313 add sp, sp, #16 |
|
314 pop {r4-r12, pc} |
|
315 ENDP |
|
316 |
|
317 END |