1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,310 @@ 1.4 +; 1.5 +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 + EXPORT |vp8_start_encode| 1.16 + EXPORT |vp8_encode_bool| 1.17 + EXPORT |vp8_stop_encode| 1.18 + EXPORT |vp8_encode_value| 1.19 + IMPORT |vp8_validate_buffer_arm| 1.20 + 1.21 + INCLUDE vp8_asm_enc_offsets.asm 1.22 + 1.23 + ARM 1.24 + REQUIRE8 1.25 + PRESERVE8 1.26 + 1.27 + AREA |.text|, CODE, READONLY 1.28 + 1.29 + ; macro for validating write buffer position 1.30 + ; needs vp8_writer in r0 1.31 + ; start shall not be in r1 1.32 + MACRO 1.33 + VALIDATE_POS $start, $pos 1.34 + push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call 1.35 + ldr r2, [r0, #vp8_writer_buffer_end] 1.36 + ldr r3, [r0, #vp8_writer_error] 1.37 + mov r1, $pos 1.38 + mov r0, $start 1.39 + bl vp8_validate_buffer_arm 1.40 + pop {r0-r3, r12, lr} 1.41 + MEND 1.42 + 1.43 +; r0 BOOL_CODER *br 1.44 +; r1 unsigned char *source 1.45 +; r2 unsigned char *source_end 1.46 +|vp8_start_encode| PROC 1.47 + str r2, [r0, #vp8_writer_buffer_end] 1.48 + mov r12, #0 1.49 + mov r3, #255 1.50 + mvn r2, #23 1.51 + str r12, [r0, #vp8_writer_lowvalue] 1.52 + str r3, [r0, #vp8_writer_range] 1.53 + str r2, [r0, #vp8_writer_count] 1.54 + str r12, [r0, #vp8_writer_pos] 1.55 + str r1, [r0, #vp8_writer_buffer] 1.56 + bx lr 1.57 + ENDP 1.58 + 1.59 +; r0 BOOL_CODER *br 1.60 +; r1 int bit 1.61 +; r2 int probability 1.62 +|vp8_encode_bool| PROC 1.63 + push {r4-r10, lr} 1.64 + 1.65 + mov r4, r2 1.66 + 1.67 + ldr r2, [r0, #vp8_writer_lowvalue] 1.68 + ldr r5, [r0, #vp8_writer_range] 1.69 + ldr r3, [r0, #vp8_writer_count] 1.70 + 1.71 + sub r7, r5, #1 ; range-1 1.72 + 1.73 + cmp r1, #0 1.74 + mul r6, r4, r7 ; ((range-1) * probability) 1.75 + 1.76 + mov r7, #1 1.77 + add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8) 1.78 + 1.79 + addne r2, r2, r4 ; if (bit) lowvalue += split 1.80 + subne r4, r5, r4 ; if (bit) range = range-split 1.81 + 1.82 + ; Counting the leading zeros is used to normalize range. 1.83 + clz r6, r4 1.84 + sub r6, r6, #24 ; shift 1.85 + 1.86 + ; Flag is set on the sum of count. This flag is used later 1.87 + ; to determine if count >= 0 1.88 + adds r3, r3, r6 ; count += shift 1.89 + lsl r5, r4, r6 ; range <<= shift 1.90 + bmi token_count_lt_zero ; if(count >= 0) 1.91 + 1.92 + sub r6, r6, r3 ; offset = shift - count 1.93 + sub r4, r6, #1 ; offset-1 1.94 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 1.95 + bpl token_high_bit_not_set 1.96 + 1.97 + ldr r4, [r0, #vp8_writer_pos] ; x 1.98 + sub r4, r4, #1 ; x = w->pos-1 1.99 + b token_zero_while_start 1.100 +token_zero_while_loop 1.101 + mov r9, #0 1.102 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 1.103 + sub r4, r4, #1 ; x-- 1.104 +token_zero_while_start 1.105 + cmp r4, #0 1.106 + ldrge r7, [r0, #vp8_writer_buffer] 1.107 + ldrb r1, [r7, r4] 1.108 + cmpge r1, #0xff 1.109 + beq token_zero_while_loop 1.110 + 1.111 + ldr r7, [r0, #vp8_writer_buffer] 1.112 + ldrb r9, [r7, r4] ; w->buffer[x] 1.113 + add r9, r9, #1 1.114 + strb r9, [r7, r4] ; w->buffer[x] + 1 1.115 +token_high_bit_not_set 1.116 + rsb r4, r6, #24 ; 24-offset 1.117 + ldr r9, [r0, #vp8_writer_buffer] 1.118 + lsr r7, r2, r4 ; lowvalue >> (24-offset) 1.119 + ldr r4, [r0, #vp8_writer_pos] ; w->pos 1.120 + lsl r2, r2, r6 ; lowvalue <<= offset 1.121 + mov r6, r3 ; shift = count 1.122 + add r1, r4, #1 ; w->pos++ 1.123 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.124 + str r1, [r0, #vp8_writer_pos] 1.125 + sub r3, r3, #8 ; count -= 8 1.126 + 1.127 + VALIDATE_POS r9, r1 ; validate_buffer at pos 1.128 + 1.129 + strb r7, [r9, r4] ; w->buffer[w->pos++] 1.130 + 1.131 +token_count_lt_zero 1.132 + lsl r2, r2, r6 ; lowvalue <<= shift 1.133 + 1.134 + str r2, [r0, #vp8_writer_lowvalue] 1.135 + str r5, [r0, #vp8_writer_range] 1.136 + str r3, [r0, #vp8_writer_count] 1.137 + pop {r4-r10, pc} 1.138 + ENDP 1.139 + 1.140 +; r0 BOOL_CODER *br 1.141 +|vp8_stop_encode| PROC 1.142 + push {r4-r10, lr} 1.143 + 1.144 + ldr r2, [r0, #vp8_writer_lowvalue] 1.145 + ldr r5, [r0, #vp8_writer_range] 1.146 + ldr r3, [r0, #vp8_writer_count] 1.147 + 1.148 + mov r10, #32 1.149 + 1.150 +stop_encode_loop 1.151 + sub r7, r5, #1 ; range-1 1.152 + 1.153 + mov r4, r7, lsl #7 ; ((range-1) * 128) 1.154 + 1.155 + mov r7, #1 1.156 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) 1.157 + 1.158 + ; Counting the leading zeros is used to normalize range. 1.159 + clz r6, r4 1.160 + sub r6, r6, #24 ; shift 1.161 + 1.162 + ; Flag is set on the sum of count. This flag is used later 1.163 + ; to determine if count >= 0 1.164 + adds r3, r3, r6 ; count += shift 1.165 + lsl r5, r4, r6 ; range <<= shift 1.166 + bmi token_count_lt_zero_se ; if(count >= 0) 1.167 + 1.168 + sub r6, r6, r3 ; offset = shift - count 1.169 + sub r4, r6, #1 ; offset-1 1.170 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 1.171 + bpl token_high_bit_not_set_se 1.172 + 1.173 + ldr r4, [r0, #vp8_writer_pos] ; x 1.174 + sub r4, r4, #1 ; x = w->pos-1 1.175 + b token_zero_while_start_se 1.176 +token_zero_while_loop_se 1.177 + mov r9, #0 1.178 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 1.179 + sub r4, r4, #1 ; x-- 1.180 +token_zero_while_start_se 1.181 + cmp r4, #0 1.182 + ldrge r7, [r0, #vp8_writer_buffer] 1.183 + ldrb r1, [r7, r4] 1.184 + cmpge r1, #0xff 1.185 + beq token_zero_while_loop_se 1.186 + 1.187 + ldr r7, [r0, #vp8_writer_buffer] 1.188 + ldrb r9, [r7, r4] ; w->buffer[x] 1.189 + add r9, r9, #1 1.190 + strb r9, [r7, r4] ; w->buffer[x] + 1 1.191 +token_high_bit_not_set_se 1.192 + rsb r4, r6, #24 ; 24-offset 1.193 + ldr r9, [r0, #vp8_writer_buffer] 1.194 + lsr r7, r2, r4 ; lowvalue >> (24-offset) 1.195 + ldr r4, [r0, #vp8_writer_pos] ; w->pos 1.196 + lsl r2, r2, r6 ; lowvalue <<= offset 1.197 + mov r6, r3 ; shift = count 1.198 + add r1, r4, #1 ; w->pos++ 1.199 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.200 + str r1, [r0, #vp8_writer_pos] 1.201 + sub r3, r3, #8 ; count -= 8 1.202 + 1.203 + VALIDATE_POS r9, r1 ; validate_buffer at pos 1.204 + 1.205 + strb r7, [r9, r4] ; w->buffer[w->pos++] 1.206 + 1.207 +token_count_lt_zero_se 1.208 + lsl r2, r2, r6 ; lowvalue <<= shift 1.209 + 1.210 + subs r10, r10, #1 1.211 + bne stop_encode_loop 1.212 + 1.213 + str r2, [r0, #vp8_writer_lowvalue] 1.214 + str r5, [r0, #vp8_writer_range] 1.215 + str r3, [r0, #vp8_writer_count] 1.216 + pop {r4-r10, pc} 1.217 + 1.218 + ENDP 1.219 + 1.220 +; r0 BOOL_CODER *br 1.221 +; r1 int data 1.222 +; r2 int bits 1.223 +|vp8_encode_value| PROC 1.224 + push {r4-r12, lr} 1.225 + 1.226 + mov r10, r2 1.227 + 1.228 + ldr r2, [r0, #vp8_writer_lowvalue] 1.229 + ldr r5, [r0, #vp8_writer_range] 1.230 + ldr r3, [r0, #vp8_writer_count] 1.231 + 1.232 + rsb r4, r10, #32 ; 32-n 1.233 + 1.234 + ; v is kept in r1 during the token pack loop 1.235 + lsl r1, r1, r4 ; r1 = v << 32 - n 1.236 + 1.237 +encode_value_loop 1.238 + sub r7, r5, #1 ; range-1 1.239 + 1.240 + ; Decisions are made based on the bit value shifted 1.241 + ; off of v, so set a flag here based on this. 1.242 + ; This value is refered to as "bb" 1.243 + lsls r1, r1, #1 ; bit = v >> n 1.244 + mov r4, r7, lsl #7 ; ((range-1) * 128) 1.245 + 1.246 + mov r7, #1 1.247 + add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) 1.248 + 1.249 + addcs r2, r2, r4 ; if (bit) lowvalue += split 1.250 + subcs r4, r5, r4 ; if (bit) range = range-split 1.251 + 1.252 + ; Counting the leading zeros is used to normalize range. 1.253 + clz r6, r4 1.254 + sub r6, r6, #24 ; shift 1.255 + 1.256 + ; Flag is set on the sum of count. This flag is used later 1.257 + ; to determine if count >= 0 1.258 + adds r3, r3, r6 ; count += shift 1.259 + lsl r5, r4, r6 ; range <<= shift 1.260 + bmi token_count_lt_zero_ev ; if(count >= 0) 1.261 + 1.262 + sub r6, r6, r3 ; offset = shift - count 1.263 + sub r4, r6, #1 ; offset-1 1.264 + lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 1.265 + bpl token_high_bit_not_set_ev 1.266 + 1.267 + ldr r4, [r0, #vp8_writer_pos] ; x 1.268 + sub r4, r4, #1 ; x = w->pos-1 1.269 + b token_zero_while_start_ev 1.270 +token_zero_while_loop_ev 1.271 + mov r9, #0 1.272 + strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0 1.273 + sub r4, r4, #1 ; x-- 1.274 +token_zero_while_start_ev 1.275 + cmp r4, #0 1.276 + ldrge r7, [r0, #vp8_writer_buffer] 1.277 + ldrb r11, [r7, r4] 1.278 + cmpge r11, #0xff 1.279 + beq token_zero_while_loop_ev 1.280 + 1.281 + ldr r7, [r0, #vp8_writer_buffer] 1.282 + ldrb r9, [r7, r4] ; w->buffer[x] 1.283 + add r9, r9, #1 1.284 + strb r9, [r7, r4] ; w->buffer[x] + 1 1.285 +token_high_bit_not_set_ev 1.286 + rsb r4, r6, #24 ; 24-offset 1.287 + ldr r9, [r0, #vp8_writer_buffer] 1.288 + lsr r7, r2, r4 ; lowvalue >> (24-offset) 1.289 + ldr r4, [r0, #vp8_writer_pos] ; w->pos 1.290 + lsl r2, r2, r6 ; lowvalue <<= offset 1.291 + mov r6, r3 ; shift = count 1.292 + add r11, r4, #1 ; w->pos++ 1.293 + bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 1.294 + str r11, [r0, #vp8_writer_pos] 1.295 + sub r3, r3, #8 ; count -= 8 1.296 + 1.297 + VALIDATE_POS r9, r11 ; validate_buffer at pos 1.298 + 1.299 + strb r7, [r9, r4] ; w->buffer[w->pos++] 1.300 + 1.301 +token_count_lt_zero_ev 1.302 + lsl r2, r2, r6 ; lowvalue <<= shift 1.303 + 1.304 + subs r10, r10, #1 1.305 + bne encode_value_loop 1.306 + 1.307 + str r2, [r0, #vp8_writer_lowvalue] 1.308 + str r5, [r0, #vp8_writer_range] 1.309 + str r3, [r0, #vp8_writer_count] 1.310 + pop {r4-r12, pc} 1.311 + ENDP 1.312 + 1.313 + END