1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,317 @@ 1.4 +; 1.5 +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 + EXPORT |vp8cx_pack_tokens_armv5| 1.16 + IMPORT |vp8_validate_buffer_arm| 1.17 + 1.18 + INCLUDE vp8_asm_enc_offsets.asm 1.19 + 1.20 + ARM 1.21 + REQUIRE8 1.22 + PRESERVE8 1.23 + 1.24 + AREA |.text|, CODE, READONLY 1.25 + 1.26 + 1.27 + ; macro for validating write buffer position 1.28 + ; needs vp8_writer in r0 1.29 + ; start shall not be in r1 1.30 + MACRO 1.31 + VALIDATE_POS $start, $pos 1.32 + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call 1.33 + ldr r2, [r0, #vp8_writer_buffer_end] 1.34 + ldr r3, [r0, #vp8_writer_error] 1.35 + mov r1, $pos 1.36 + mov r0, $start 1.37 + bl vp8_validate_buffer_arm 1.38 + pop {r0-r3, r12, lr} 1.39 + MEND 1.40 + 1.41 + 1.42 +; r0 vp8_writer *w 1.43 +; r1 const TOKENEXTRA *p 1.44 +; r2 int xcount 1.45 +; r3 vp8_coef_encodings 1.46 +; s0 vp8_extra_bits 1.47 +; s1 vp8_coef_tree 1.48 +|vp8cx_pack_tokens_armv5| PROC 1.49 + push {r4-r12, lr} 1.50 + sub sp, sp, #16 1.51 + 1.52 + ; Add size of xcount * sizeof (TOKENEXTRA) to get stop 1.53 + ; sizeof (TOKENEXTRA) is 8 1.54 + add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA) 1.55 + str r2, [sp, #0] 1.56 + str r3, [sp, #8] ; save vp8_coef_encodings 1.57 + ldr r2, [r0, #vp8_writer_lowvalue] 1.58 + ldr r5, [r0, #vp8_writer_range] 1.59 + ldr r3, [r0, #vp8_writer_count] 1.60 + b check_p_lt_stop 1.61 + 1.62 +while_p_lt_stop 1.63 + ldrb r6, [r1, #tokenextra_token] ; t 1.64 + ldr r4, [sp, #8] ; vp8_coef_encodings 1.65 + mov lr, #0 1.66 + add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t 1.67 + ldr r9, [r1, #tokenextra_context_tree] ; pp 1.68 + 1.69 + ldrb r7, [r1, #tokenextra_skip_eob_node] 1.70 + 1.71 + ldr r6, [r4, #vp8_token_value] ; v 1.72 + ldr r8, [r4, #vp8_token_len] ; n 1.73 + 1.74 + ; vp8 specific skip_eob_node 1.75 + cmp r7, #0 1.76 + movne lr, #2 ; i = 2 1.77 + subne r8, r8, #1 ; --n 1.78 + 1.79 + rsb r4, r8, #32 ; 32-n 1.80 + ldr r10, [sp, #60] ; vp8_coef_tree 1.81 + 1.82 + ; v is kept in r12 during the token pack loop 1.83 + lsl r12, r6, r4 ; r12 = v << 32 - n 1.84 + 1.85 +; loop start 1.86 +token_loop 1.87 + ldrb r4, [r9, lr, asr #1] ; pp [i>>1] 1.88 + sub r7, r5, #1 ; range-1 1.89 + 1.90 + ; Decisions are made based on the bit value shifted 1.91 + ; off of v, so set a flag here based on this. 1.92 + ; This value is refered to as "bb" 1.93 + lsls r12, r12, #1 ; bb = v >> n 1.94 + mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 1.95 + 1.96 + ; bb can only be 0 or 1. So only execute this statement 1.97 + ; if bb == 1, otherwise it will act like i + 0 1.98 + addcs lr, lr, #1 ; i + bb 1.99 + 1.100 + mov r7, #1 1.101 + ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] 1.102 + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) 1.103 + 1.104 + addcs r2, r2, r4 ; if (bb) lowvalue += split 1.105 + subcs r4, r5, r4 ; if (bb) range = range-split 1.106 + 1.107 + ; Counting the leading zeros is used to normalize range. 1.108 + clz r6, r4 1.109 + sub r6, r6, #24 ; shift 1.110 + 1.111 + ; Flag is set on the sum of count. This flag is used later 1.112 + ; to determine if count >= 0 1.113 + adds r3, r3, r6 ; count += shift 1.114 + lsl r5, r4, r6 ; range <<= shift 1.115 + bmi token_count_lt_zero ; if(count >= 0) 1.116 + 1.117 + sub r6, r6, r3 ; offset = shift - count 1.118 + sub r4, r6, #1 ; offset-1 1.119 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 1.120 + bpl token_high_bit_not_set 1.121 + 1.122 + ldr r4, [r0, #vp8_writer_pos] ; x 1.123 + sub r4, r4, #1 ; x = w->pos-1 1.124 + b token_zero_while_start 1.125 +token_zero_while_loop 1.126 + mov r10, #0 1.127 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 1.128 + sub r4, r4, #1 ; x-- 1.129 +token_zero_while_start 1.130 + cmp r4, #0 1.131 + ldrge r7, [r0, #vp8_writer_buffer] 1.132 + ldrb r11, [r7, r4] 1.133 + cmpge r11, #0xff 1.134 + beq token_zero_while_loop 1.135 + 1.136 + ldr r7, [r0, #vp8_writer_buffer] 1.137 + ldrb r10, [r7, r4] ; w->buffer[x] 1.138 + add r10, r10, #1 1.139 + strb r10, [r7, r4] ; w->buffer[x] + 1 1.140 +token_high_bit_not_set 1.141 + rsb r4, r6, #24 ; 24-offset 1.142 + ldr r10, [r0, #vp8_writer_buffer] 1.143 + lsr r7, r2, r4 ; lowvalue >> (24-offset) 1.144 + ldr r4, [r0, #vp8_writer_pos] ; w->pos 1.145 + lsl r2, r2, r6 ; lowvalue <<= offset 1.146 + mov r6, r3 ; shift = count 1.147 + add r11, r4, #1 ; w->pos++ 1.148 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.149 + str r11, [r0, #vp8_writer_pos] 1.150 + sub r3, r3, #8 ; count -= 8 1.151 + 1.152 + VALIDATE_POS r10, r11 ; validate_buffer at pos 1.153 + 1.154 + strb r7, [r10, r4] ; w->buffer[w->pos++] 1.155 + 1.156 + ; r10 is used earlier in the loop, but r10 is used as 1.157 + ; temp variable here. So after r10 is used, reload 1.158 + ; vp8_coef_tree_dcd into r10 1.159 + ldr r10, [sp, #60] ; vp8_coef_tree 1.160 + 1.161 +token_count_lt_zero 1.162 + lsl r2, r2, r6 ; lowvalue <<= shift 1.163 + 1.164 + subs r8, r8, #1 ; --n 1.165 + bne token_loop 1.166 + 1.167 + ldrb r6, [r1, #tokenextra_token] ; t 1.168 + ldr r7, [sp, #56] ; vp8_extra_bits 1.169 + ; Add t * sizeof (vp8_extra_bit_struct) to get the desired 1.170 + ; element. Here vp8_extra_bit_struct == 16 1.171 + add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t 1.172 + 1.173 + ldr r4, [r12, #vp8_extra_bit_struct_base_val] 1.174 + cmp r4, #0 1.175 + beq skip_extra_bits 1.176 + 1.177 +; if( b->base_val) 1.178 + ldr r8, [r12, #vp8_extra_bit_struct_len] ; L 1.179 + ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra 1.180 + cmp r8, #0 ; if( L) 1.181 + beq no_extra_bits 1.182 + 1.183 + ldr r9, [r12, #vp8_extra_bit_struct_prob] 1.184 + asr r7, lr, #1 ; v=e>>1 1.185 + 1.186 + ldr r10, [r12, #vp8_extra_bit_struct_tree] 1.187 + str r10, [sp, #4] ; b->tree 1.188 + 1.189 + rsb r4, r8, #32 1.190 + lsl r12, r7, r4 1.191 + 1.192 + mov lr, #0 ; i = 0 1.193 + 1.194 +extra_bits_loop 1.195 + ldrb r4, [r9, lr, asr #1] ; pp[i>>1] 1.196 + sub r7, r5, #1 ; range-1 1.197 + lsls r12, r12, #1 ; v >> n 1.198 + mul r6, r4, r7 ; (range-1) * pp[i>>1] 1.199 + addcs lr, lr, #1 ; i + bb 1.200 + 1.201 + mov r7, #1 1.202 + ldrsb lr, [r10, lr] ; i = b->tree[i+bb] 1.203 + add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) 1.204 + 1.205 + addcs r2, r2, r4 ; if (bb) lowvalue += split 1.206 + subcs r4, r5, r4 ; if (bb) range = range-split 1.207 + 1.208 + clz r6, r4 1.209 + sub r6, r6, #24 1.210 + 1.211 + adds r3, r3, r6 ; count += shift 1.212 + lsl r5, r4, r6 ; range <<= shift 1.213 + bmi extra_count_lt_zero ; if(count >= 0) 1.214 + 1.215 + sub r6, r6, r3 ; offset= shift - count 1.216 + sub r4, r6, #1 ; offset-1 1.217 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 1.218 + bpl extra_high_bit_not_set 1.219 + 1.220 + ldr r4, [r0, #vp8_writer_pos] ; x 1.221 + sub r4, r4, #1 ; x = w->pos - 1 1.222 + b extra_zero_while_start 1.223 +extra_zero_while_loop 1.224 + mov r10, #0 1.225 + strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 1.226 + sub r4, r4, #1 ; x-- 1.227 +extra_zero_while_start 1.228 + cmp r4, #0 1.229 + ldrge r7, [r0, #vp8_writer_buffer] 1.230 + ldrb r11, [r7, r4] 1.231 + cmpge r11, #0xff 1.232 + beq extra_zero_while_loop 1.233 + 1.234 + ldr r7, [r0, #vp8_writer_buffer] 1.235 + ldrb r10, [r7, r4] 1.236 + add r10, r10, #1 1.237 + strb r10, [r7, r4] 1.238 +extra_high_bit_not_set 1.239 + rsb r4, r6, #24 ; 24-offset 1.240 + ldr r10, [r0, #vp8_writer_buffer] 1.241 + lsr r7, r2, r4 ; lowvalue >> (24-offset) 1.242 + ldr r4, [r0, #vp8_writer_pos] 1.243 + lsl r2, r2, r6 ; lowvalue <<= offset 1.244 + mov r6, r3 ; shift = count 1.245 + add r11, r4, #1 ; w->pos++ 1.246 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.247 + str r11, [r0, #vp8_writer_pos] 1.248 + sub r3, r3, #8 ; count -= 8 1.249 + 1.250 + VALIDATE_POS r10, r11 ; validate_buffer at pos 1.251 + 1.252 + strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) 1.253 + ldr r10, [sp, #4] ; b->tree 1.254 +extra_count_lt_zero 1.255 + lsl r2, r2, r6 1.256 + 1.257 + subs r8, r8, #1 ; --n 1.258 + bne extra_bits_loop ; while (n) 1.259 + 1.260 +no_extra_bits 1.261 + ldr lr, [r1, #4] ; e = p->Extra 1.262 + add r4, r5, #1 ; range + 1 1.263 + tst lr, #1 1.264 + lsr r4, r4, #1 ; split = (range + 1) >> 1 1.265 + addne r2, r2, r4 ; lowvalue += split 1.266 + subne r4, r5, r4 ; range = range-split 1.267 + tst r2, #0x80000000 ; lowvalue & 0x80000000 1.268 + lsl r5, r4, #1 ; range <<= 1 1.269 + beq end_high_bit_not_set 1.270 + 1.271 + ldr r4, [r0, #vp8_writer_pos] 1.272 + mov r7, #0 1.273 + sub r4, r4, #1 1.274 + b end_zero_while_start 1.275 +end_zero_while_loop 1.276 + strb r7, [r6, r4] 1.277 + sub r4, r4, #1 ; x-- 1.278 +end_zero_while_start 1.279 + cmp r4, #0 1.280 + ldrge r6, [r0, #vp8_writer_buffer] 1.281 + ldrb r12, [r6, r4] 1.282 + cmpge r12, #0xff 1.283 + beq end_zero_while_loop 1.284 + 1.285 + ldr r6, [r0, #vp8_writer_buffer] 1.286 + ldrb r7, [r6, r4] 1.287 + add r7, r7, #1 1.288 + strb r7, [r6, r4] 1.289 +end_high_bit_not_set 1.290 + adds r3, r3, #1 ; ++count 1.291 + lsl r2, r2, #1 ; lowvalue <<= 1 1.292 + bne end_count_zero 1.293 + 1.294 + ldr r4, [r0, #vp8_writer_pos] 1.295 + mvn r3, #7 1.296 + ldr r7, [r0, #vp8_writer_buffer] 1.297 + lsr r6, r2, #24 ; lowvalue >> 24 1.298 + add r12, r4, #1 ; w->pos++ 1.299 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.300 + str r12, [r0, #vp8_writer_pos] 1.301 + 1.302 + VALIDATE_POS r7, r12 ; validate_buffer at pos 1.303 + 1.304 + strb r6, [r7, r4] 1.305 +end_count_zero 1.306 +skip_extra_bits 1.307 + add r1, r1, #TOKENEXTRA_SZ ; ++p 1.308 +check_p_lt_stop 1.309 + ldr r4, [sp, #0] ; stop 1.310 + cmp r1, r4 ; while( p < stop) 1.311 + bcc while_p_lt_stop 1.312 + 1.313 + str r2, [r0, #vp8_writer_lowvalue] 1.314 + str r5, [r0, #vp8_writer_range] 1.315 + str r3, [r0, #vp8_writer_count] 1.316 + add sp, sp, #16 1.317 + pop {r4-r12, pc} 1.318 + ENDP 1.319 + 1.320 + END