1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtheora/lib/arm/armbits.s Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,236 @@ 1.4 +;******************************************************************** 1.5 +;* * 1.6 +;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 1.7 +;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.8 +;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.9 +;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.10 +;* * 1.11 +;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * 1.12 +;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 1.13 +;* * 1.14 +;******************************************************************** 1.15 +; 1.16 +; function: 1.17 +; last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $ 1.18 +; 1.19 +;******************************************************************** 1.20 + 1.21 + AREA |.text|, CODE, READONLY 1.22 + 1.23 + ; Explicitly specifying alignment here because some versions of 1.24 + ; gas don't align code correctly. See 1.25 + ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html 1.26 + ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 1.27 + ALIGN 1.28 + 1.29 + EXPORT oc_pack_read_arm 1.30 + EXPORT oc_pack_read1_arm 1.31 + EXPORT oc_huff_token_decode_arm 1.32 + 1.33 +oc_pack_read1_arm PROC 1.34 + ; r0 = oc_pack_buf *_b 1.35 + ADD r12,r0,#8 1.36 + LDMIA r12,{r2,r3} ; r2 = window 1.37 + ; Stall... ; r3 = available 1.38 + ; Stall... 1.39 + SUBS r3,r3,#1 ; r3 = available-1, available<1 => LT 1.40 + BLT oc_pack_read1_refill 1.41 + MOV r0,r2,LSR #31 ; r0 = window>>31 1.42 + MOV r2,r2,LSL #1 ; r2 = window<<=1 1.43 + STMIA r12,{r2,r3} ; window = r2 1.44 + ; available = r3 1.45 + MOV PC,r14 1.46 + ENDP 1.47 + 1.48 +oc_pack_read_arm PROC 1.49 + ; r0 = oc_pack_buf *_b 1.50 + ; r1 = int _bits 1.51 + ADD r12,r0,#8 1.52 + LDMIA r12,{r2,r3} ; r2 = window 1.53 + ; Stall... ; r3 = available 1.54 + ; Stall... 1.55 + SUBS r3,r3,r1 ; r3 = available-_bits, available<_bits => LT 1.56 + BLT oc_pack_read_refill 1.57 + RSB r0,r1,#32 ; r0 = 32-_bits 1.58 + MOV r0,r2,LSR r0 ; r0 = window>>32-_bits 1.59 + MOV r2,r2,LSL r1 ; r2 = window<<=_bits 1.60 + STMIA r12,{r2,r3} ; window = r2 1.61 + ; available = r3 1.62 + MOV PC,r14 1.63 + 1.64 +; We need to refill window. 1.65 +oc_pack_read1_refill 1.66 + MOV r1,#1 1.67 +oc_pack_read_refill 1.68 + STMFD r13!,{r10,r11,r14} 1.69 + LDMIA r0,{r10,r11} ; r10 = stop 1.70 + ; r11 = ptr 1.71 + RSB r0,r1,#32 ; r0 = 32-_bits 1.72 + RSB r3,r3,r0 ; r3 = 32-available 1.73 +; We can use unsigned compares for both the pointers and for available 1.74 +; (allowing us to chain condition codes) because available will never be 1.75 +; larger than 32 (or we wouldn't be here), and thus 32-available will never be 1.76 +; negative. 1.77 + CMP r10,r11 ; ptr<stop => HI 1.78 + CMPHI r3,#7 ; available<=24 => HI 1.79 + LDRHIB r14,[r11],#1 ; r14 = *ptr++ 1.80 + SUBHI r3,#8 ; available += 8 1.81 + ; (HI) Stall... 1.82 + ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available 1.83 + CMPHI r10,r11 ; ptr<stop => HI 1.84 + CMPHI r3,#7 ; available<=24 => HI 1.85 + LDRHIB r14,[r11],#1 ; r14 = *ptr++ 1.86 + SUBHI r3,#8 ; available += 8 1.87 + ; (HI) Stall... 1.88 + ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available 1.89 + CMPHI r10,r11 ; ptr<stop => HI 1.90 + CMPHI r3,#7 ; available<=24 => HI 1.91 + LDRHIB r14,[r11],#1 ; r14 = *ptr++ 1.92 + SUBHI r3,#8 ; available += 8 1.93 + ; (HI) Stall... 1.94 + ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available 1.95 + CMPHI r10,r11 ; ptr<stop => HI 1.96 + CMPHI r3,#7 ; available<=24 => HI 1.97 + LDRHIB r14,[r11],#1 ; r14 = *ptr++ 1.98 + SUBHI r3,#8 ; available += 8 1.99 + ; (HI) Stall... 1.100 + ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available 1.101 + SUBS r3,r0,r3 ; r3 = available-=_bits, available<bits => GT 1.102 + BLT oc_pack_read_refill_last 1.103 + MOV r0,r2,LSR r0 ; r0 = window>>32-_bits 1.104 + MOV r2,r2,LSL r1 ; r2 = window<<=_bits 1.105 + STR r11,[r12,#-4] ; ptr = r11 1.106 + STMIA r12,{r2,r3} ; window = r2 1.107 + ; available = r3 1.108 + LDMFD r13!,{r10,r11,PC} 1.109 + 1.110 +; Either we wanted to read more than 24 bits and didn't have enough room to 1.111 +; stuff the last byte into the window, or we hit the end of the packet. 1.112 +oc_pack_read_refill_last 1.113 + CMP r11,r10 ; ptr<stop => LO 1.114 +; If we didn't hit the end of the packet, then pull enough of the next byte to 1.115 +; to fill up the window. 1.116 + LDRLOB r14,[r11] ; (LO) r14 = *ptr 1.117 +; Otherwise, set the EOF flag and pretend we have lots of available bits. 1.118 + MOVHS r14,#1 ; (HS) r14 = 1 1.119 + ADDLO r10,r3,r1 ; (LO) r10 = available 1.120 + STRHS r14,[r12,#8] ; (HS) eof = 1 1.121 + ANDLO r10,r10,#7 ; (LO) r10 = available&7 1.122 + MOVHS r3,#1<<30 ; (HS) available = OC_LOTS_OF_BITS 1.123 + ORRLO r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7) 1.124 + MOV r0,r2,LSR r0 ; r0 = window>>32-_bits 1.125 + MOV r2,r2,LSL r1 ; r2 = window<<=_bits 1.126 + STR r11,[r12,#-4] ; ptr = r11 1.127 + STMIA r12,{r2,r3} ; window = r2 1.128 + ; available = r3 1.129 + LDMFD r13!,{r10,r11,PC} 1.130 + ENDP 1.131 + 1.132 + 1.133 + 1.134 +oc_huff_token_decode_arm PROC 1.135 + ; r0 = oc_pack_buf *_b 1.136 + ; r1 = const ogg_int16_t *_tree 1.137 + STMFD r13!,{r4,r5,r10,r14} 1.138 + LDRSH r10,[r1] ; r10 = n=_tree[0] 1.139 + LDMIA r0,{r2-r5} ; r2 = stop 1.140 + ; Stall... ; r3 = ptr 1.141 + ; Stall... ; r4 = window 1.142 + ; r5 = available 1.143 + CMP r10,r5 ; n>available => GT 1.144 + BGT oc_huff_token_decode_refill0 1.145 + RSB r14,r10,#32 ; r14 = 32-n 1.146 + MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n 1.147 + ADD r14,r1,r14,LSL #1 ; r14 = _tree+bits 1.148 + LDRSH r12,[r14,#2] ; r12 = node=_tree[1+bits] 1.149 + ; Stall... 1.150 + ; Stall... 1.151 + RSBS r14,r12,#0 ; r14 = -node, node>0 => MI 1.152 + BMI oc_huff_token_decode_continue 1.153 + MOV r10,r14,LSR #8 ; r10 = n=node>>8 1.154 + MOV r4,r4,LSL r10 ; r4 = window<<=n 1.155 + SUB r5,r10 ; r5 = available-=n 1.156 + STMIB r0,{r3-r5} ; ptr = r3 1.157 + ; window = r4 1.158 + ; available = r5 1.159 + AND r0,r14,#255 ; r0 = node&255 1.160 + LDMFD r13!,{r4,r5,r10,pc} 1.161 + 1.162 +; The first tree node wasn't enough to reach a leaf, read another 1.163 +oc_huff_token_decode_continue 1.164 + ADD r12,r1,r12,LSL #1 ; r12 = _tree+node 1.165 + MOV r4,r4,LSL r10 ; r4 = window<<=n 1.166 + SUB r5,r5,r10 ; r5 = available-=n 1.167 + LDRSH r10,[r12],#2 ; r10 = n=_tree[node] 1.168 + ; Stall... ; r12 = _tree+node+1 1.169 + ; Stall... 1.170 + CMP r10,r5 ; n>available => GT 1.171 + BGT oc_huff_token_decode_refill 1.172 + RSB r14,r10,#32 ; r14 = 32-n 1.173 + MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n 1.174 + ADD r12,r12,r14 ; 1.175 + LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] 1.176 + ; Stall... 1.177 + ; Stall... 1.178 + RSBS r14,r12,#0 ; r14 = -node, node>0 => MI 1.179 + BMI oc_huff_token_decode_continue 1.180 + MOV r10,r14,LSR #8 ; r10 = n=node>>8 1.181 + MOV r4,r4,LSL r10 ; r4 = window<<=n 1.182 + SUB r5,r10 ; r5 = available-=n 1.183 + STMIB r0,{r3-r5} ; ptr = r3 1.184 + ; window = r4 1.185 + ; available = r5 1.186 + AND r0,r14,#255 ; r0 = node&255 1.187 + LDMFD r13!,{r4,r5,r10,pc} 1.188 + 1.189 +oc_huff_token_decode_refill0 1.190 + ADD r12,r1,#2 ; r12 = _tree+1 1.191 +oc_huff_token_decode_refill 1.192 +; We can't possibly need more than 15 bits, so available must be <= 15. 1.193 +; Therefore we can load at least two bytes without checking it. 1.194 + CMP r2,r3 ; ptr<stop => HI 1.195 + LDRHIB r14,[r3],#1 ; r14 = *ptr++ 1.196 + RSBHI r5,r5,#24 ; (HI) available = 32-(available+=8) 1.197 + RSBLS r5,r5,#32 ; (LS) r5 = 32-available 1.198 + ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available 1.199 + CMPHI r2,r3 ; ptr<stop => HI 1.200 + LDRHIB r14,[r3],#1 ; r14 = *ptr++ 1.201 + SUBHI r5,#8 ; available += 8 1.202 + ; (HI) Stall... 1.203 + ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available 1.204 +; We can use unsigned compares for both the pointers and for available 1.205 +; (allowing us to chain condition codes) because available will never be 1.206 +; larger than 32 (or we wouldn't be here), and thus 32-available will never be 1.207 +; negative. 1.208 + CMPHI r2,r3 ; ptr<stop => HI 1.209 + CMPHI r5,#7 ; available<=24 => HI 1.210 + LDRHIB r14,[r3],#1 ; r14 = *ptr++ 1.211 + SUBHI r5,#8 ; available += 8 1.212 + ; (HI) Stall... 1.213 + ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available 1.214 + CMP r2,r3 ; ptr<stop => HI 1.215 + MOVLS r5,#-1<<30 ; (LS) available = OC_LOTS_OF_BITS+32 1.216 + CMPHI r5,#7 ; (HI) available<=24 => HI 1.217 + LDRHIB r14,[r3],#1 ; (HI) r14 = *ptr++ 1.218 + SUBHI r5,#8 ; (HI) available += 8 1.219 + ; (HI) Stall... 1.220 + ORRHI r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available 1.221 + RSB r14,r10,#32 ; r14 = 32-n 1.222 + MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n 1.223 + ADD r12,r12,r14 ; 1.224 + LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] 1.225 + RSB r5,r5,#32 ; r5 = available 1.226 + ; Stall... 1.227 + RSBS r14,r12,#0 ; r14 = -node, node>0 => MI 1.228 + BMI oc_huff_token_decode_continue 1.229 + MOV r10,r14,LSR #8 ; r10 = n=node>>8 1.230 + MOV r4,r4,LSL r10 ; r4 = window<<=n 1.231 + SUB r5,r10 ; r5 = available-=n 1.232 + STMIB r0,{r3-r5} ; ptr = r3 1.233 + ; window = r4 1.234 + ; available = r5 1.235 + AND r0,r14,#255 ; r0 = node&255 1.236 + LDMFD r13!,{r4,r5,r10,pc} 1.237 + ENDP 1.238 + 1.239 + END