media/libtheora/lib/arm/armbits.s

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libtheora/lib/arm/armbits.s	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,236 @@
     1.4 +;********************************************************************
     1.5 +;*                                                                  *
     1.6 +;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     1.7 +;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     1.8 +;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     1.9 +;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
    1.10 +;*                                                                  *
    1.11 +;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
    1.12 +;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    1.13 +;*                                                                  *
    1.14 +;********************************************************************
    1.15 +;
    1.16 +; function:
    1.17 +;   last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
    1.18 +;
    1.19 +;********************************************************************
    1.20 +
    1.21 +	AREA	|.text|, CODE, READONLY
    1.22 +
    1.23 +	; Explicitly specifying alignment here because some versions of
    1.24 +	; gas don't align code correctly. See
    1.25 +	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
    1.26 +	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
    1.27 +	ALIGN
    1.28 +
    1.29 +	EXPORT oc_pack_read_arm
    1.30 +	EXPORT oc_pack_read1_arm
    1.31 +	EXPORT oc_huff_token_decode_arm
    1.32 +
    1.33 +oc_pack_read1_arm PROC
    1.34 +	; r0 = oc_pack_buf *_b
    1.35 +	ADD r12,r0,#8
    1.36 +	LDMIA r12,{r2,r3}      ; r2 = window
    1.37 +	; Stall...             ; r3 = available
    1.38 +	; Stall...
    1.39 +	SUBS r3,r3,#1          ; r3 = available-1, available<1 => LT
    1.40 +	BLT oc_pack_read1_refill
    1.41 +	MOV r0,r2,LSR #31      ; r0 = window>>31
    1.42 +	MOV r2,r2,LSL #1       ; r2 = window<<=1
    1.43 +	STMIA r12,{r2,r3}      ; window = r2
    1.44 +	                       ; available = r3
    1.45 +	MOV PC,r14
    1.46 +	ENDP
    1.47 +
    1.48 +oc_pack_read_arm PROC
    1.49 +	; r0 = oc_pack_buf *_b
    1.50 +	; r1 = int          _bits
    1.51 +	ADD r12,r0,#8
    1.52 +	LDMIA r12,{r2,r3}      ; r2 = window
    1.53 +	; Stall...             ; r3 = available
    1.54 +	; Stall...
    1.55 +	SUBS r3,r3,r1          ; r3 = available-_bits, available<_bits => LT
    1.56 +	BLT oc_pack_read_refill
    1.57 +	RSB r0,r1,#32          ; r0 = 32-_bits
    1.58 +	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
    1.59 +	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
    1.60 +	STMIA r12,{r2,r3}      ; window = r2
    1.61 +	                       ; available = r3
    1.62 +	MOV PC,r14
    1.63 +
    1.64 +; We need to refill window.
    1.65 +oc_pack_read1_refill
    1.66 +	MOV r1,#1
    1.67 +oc_pack_read_refill
    1.68 +	STMFD r13!,{r10,r11,r14}
    1.69 +	LDMIA r0,{r10,r11}     ; r10 = stop
    1.70 +	                       ; r11 = ptr
    1.71 +	RSB r0,r1,#32          ; r0 = 32-_bits
    1.72 +	RSB r3,r3,r0           ; r3 = 32-available
    1.73 +; We can use unsigned compares for both the pointers and for available
    1.74 +;  (allowing us to chain condition codes) because available will never be
    1.75 +;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
    1.76 +;  negative.
    1.77 +	CMP r10,r11            ; ptr<stop => HI
    1.78 +	CMPHI r3,#7            ;   available<=24 => HI
    1.79 +	LDRHIB r14,[r11],#1    ;     r14 = *ptr++
    1.80 +	SUBHI r3,#8            ;     available += 8
    1.81 +	; (HI) Stall...
    1.82 +	ORRHI r2,r14,LSL r3    ;     r2 = window|=r14<<32-available
    1.83 +	CMPHI r10,r11          ;     ptr<stop => HI
    1.84 +	CMPHI r3,#7            ;       available<=24 => HI
    1.85 +	LDRHIB r14,[r11],#1    ;         r14 = *ptr++
    1.86 +	SUBHI r3,#8            ;         available += 8
    1.87 +	; (HI) Stall...
    1.88 +	ORRHI r2,r14,LSL r3    ;         r2 = window|=r14<<32-available
    1.89 +	CMPHI r10,r11          ;         ptr<stop => HI
    1.90 +	CMPHI r3,#7            ;           available<=24 => HI
    1.91 +	LDRHIB r14,[r11],#1    ;             r14 = *ptr++
    1.92 +	SUBHI r3,#8            ;             available += 8
    1.93 +	; (HI) Stall...
    1.94 +	ORRHI r2,r14,LSL r3    ;             r2 = window|=r14<<32-available
    1.95 +	CMPHI r10,r11          ;             ptr<stop => HI
    1.96 +	CMPHI r3,#7            ;               available<=24 => HI
    1.97 +	LDRHIB r14,[r11],#1    ;                 r14 = *ptr++
    1.98 +	SUBHI r3,#8            ;                 available += 8
    1.99 +	; (HI) Stall...
   1.100 +	ORRHI r2,r14,LSL r3    ;                 r2 = window|=r14<<32-available
   1.101 +	SUBS r3,r0,r3          ; r3 = available-=_bits, available<bits => GT
   1.102 +	BLT oc_pack_read_refill_last
   1.103 +	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
   1.104 +	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
   1.105 +	STR r11,[r12,#-4]      ; ptr = r11
   1.106 +	STMIA r12,{r2,r3}      ; window = r2
   1.107 +	                       ; available = r3
   1.108 +	LDMFD r13!,{r10,r11,PC}
   1.109 +
   1.110 +; Either we wanted to read more than 24 bits and didn't have enough room to
   1.111 +;  stuff the last byte into the window, or we hit the end of the packet.
   1.112 +oc_pack_read_refill_last
   1.113 +	CMP r11,r10            ; ptr<stop => LO
   1.114 +; If we didn't hit the end of the packet, then pull enough of the next byte to
   1.115 +;  to fill up the window.
   1.116 +	LDRLOB r14,[r11]       ; (LO) r14 = *ptr
   1.117 +; Otherwise, set the EOF flag and pretend we have lots of available bits.
   1.118 +	MOVHS r14,#1           ; (HS) r14 = 1
   1.119 +	ADDLO r10,r3,r1        ; (LO) r10 = available
   1.120 +	STRHS r14,[r12,#8]     ; (HS) eof = 1
   1.121 +	ANDLO r10,r10,#7       ; (LO) r10 = available&7
   1.122 +	MOVHS r3,#1<<30        ; (HS) available = OC_LOTS_OF_BITS
   1.123 +	ORRLO r2,r14,LSL r10   ; (LO) r2 = window|=*ptr>>(available&7)
   1.124 +	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
   1.125 +	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
   1.126 +	STR r11,[r12,#-4]      ; ptr = r11
   1.127 +	STMIA r12,{r2,r3}      ; window = r2
   1.128 +	                       ; available = r3
   1.129 +	LDMFD r13!,{r10,r11,PC}
   1.130 +	ENDP
   1.131 +
   1.132 +
   1.133 +
   1.134 +oc_huff_token_decode_arm PROC
   1.135 +	; r0 = oc_pack_buf       *_b
   1.136 +	; r1 = const ogg_int16_t *_tree
   1.137 +	STMFD r13!,{r4,r5,r10,r14}
   1.138 +	LDRSH r10,[r1]         ; r10 = n=_tree[0]
   1.139 +	LDMIA r0,{r2-r5}       ; r2 = stop
   1.140 +	; Stall...             ; r3 = ptr
   1.141 +	; Stall...             ; r4 = window
   1.142 +	                       ; r5 = available
   1.143 +	CMP r10,r5             ; n>available => GT
   1.144 +	BGT oc_huff_token_decode_refill0
   1.145 +	RSB r14,r10,#32        ; r14 = 32-n
   1.146 +	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   1.147 +	ADD r14,r1,r14,LSL #1  ; r14 = _tree+bits
   1.148 +	LDRSH r12,[r14,#2]     ; r12 = node=_tree[1+bits]
   1.149 +	; Stall...
   1.150 +	; Stall...
   1.151 +	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   1.152 +	BMI oc_huff_token_decode_continue
   1.153 +	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   1.154 +	MOV r4,r4,LSL r10      ; r4 = window<<=n
   1.155 +	SUB r5,r10             ; r5 = available-=n
   1.156 +	STMIB r0,{r3-r5}       ; ptr = r3
   1.157 +	                       ; window = r4
   1.158 +	                       ; available = r5
   1.159 +	AND r0,r14,#255        ; r0 = node&255
   1.160 +	LDMFD r13!,{r4,r5,r10,pc}
   1.161 +
   1.162 +; The first tree node wasn't enough to reach a leaf, read another
   1.163 +oc_huff_token_decode_continue
   1.164 +	ADD r12,r1,r12,LSL #1  ; r12 = _tree+node
   1.165 +	MOV r4,r4,LSL r10      ; r4 = window<<=n
   1.166 +	SUB r5,r5,r10          ; r5 = available-=n
   1.167 +	LDRSH r10,[r12],#2     ; r10 = n=_tree[node]
   1.168 +	; Stall...             ; r12 = _tree+node+1
   1.169 +	; Stall...
   1.170 +	CMP r10,r5             ; n>available => GT
   1.171 +	BGT oc_huff_token_decode_refill
   1.172 +	RSB r14,r10,#32        ; r14 = 32-n
   1.173 +	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   1.174 +	ADD r12,r12,r14        ;
   1.175 +	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
   1.176 +	; Stall...
   1.177 +	; Stall...
   1.178 +	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   1.179 +	BMI oc_huff_token_decode_continue
   1.180 +	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   1.181 +	MOV r4,r4,LSL r10      ; r4 = window<<=n
   1.182 +	SUB r5,r10             ; r5 = available-=n
   1.183 +	STMIB r0,{r3-r5}       ; ptr = r3
   1.184 +	                       ; window = r4
   1.185 +	                       ; available = r5
   1.186 +	AND r0,r14,#255        ; r0 = node&255
   1.187 +	LDMFD r13!,{r4,r5,r10,pc}
   1.188 +
   1.189 +oc_huff_token_decode_refill0
   1.190 +	ADD r12,r1,#2          ; r12 = _tree+1
   1.191 +oc_huff_token_decode_refill
   1.192 +; We can't possibly need more than 15 bits, so available must be <= 15.
   1.193 +; Therefore we can load at least two bytes without checking it.
   1.194 +	CMP r2,r3              ; ptr<stop => HI
   1.195 +	LDRHIB r14,[r3],#1     ;   r14 = *ptr++
   1.196 +	RSBHI r5,r5,#24        ; (HI) available = 32-(available+=8)
   1.197 +	RSBLS r5,r5,#32        ; (LS) r5 = 32-available
   1.198 +	ORRHI r4,r14,LSL r5    ;   r4 = window|=r14<<32-available
   1.199 +	CMPHI r2,r3            ;   ptr<stop => HI
   1.200 +	LDRHIB r14,[r3],#1     ;     r14 = *ptr++
   1.201 +	SUBHI r5,#8            ;     available += 8
   1.202 +	; (HI) Stall...
   1.203 +	ORRHI r4,r14,LSL r5    ;     r4 = window|=r14<<32-available
   1.204 +; We can use unsigned compares for both the pointers and for available
   1.205 +;  (allowing us to chain condition codes) because available will never be
   1.206 +;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
   1.207 +;  negative.
   1.208 +	CMPHI r2,r3            ;     ptr<stop => HI
   1.209 +	CMPHI r5,#7            ;       available<=24 => HI
   1.210 +	LDRHIB r14,[r3],#1     ;         r14 = *ptr++
   1.211 +	SUBHI r5,#8            ;         available += 8
   1.212 +	; (HI) Stall...
   1.213 +	ORRHI r4,r14,LSL r5    ;         r4 = window|=r14<<32-available
   1.214 +	CMP r2,r3              ; ptr<stop => HI
   1.215 +	MOVLS r5,#-1<<30       ; (LS) available = OC_LOTS_OF_BITS+32
   1.216 +	CMPHI r5,#7            ; (HI) available<=24 => HI
   1.217 +	LDRHIB r14,[r3],#1     ; (HI)   r14 = *ptr++
   1.218 +	SUBHI r5,#8            ; (HI)   available += 8
   1.219 +	; (HI) Stall...
   1.220 +	ORRHI r4,r14,LSL r5    ; (HI)   r4 = window|=r14<<32-available
   1.221 +	RSB r14,r10,#32        ; r14 = 32-n
   1.222 +	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   1.223 +	ADD r12,r12,r14        ;
   1.224 +	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
   1.225 +	RSB r5,r5,#32          ; r5 = available
   1.226 +	; Stall...
   1.227 +	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   1.228 +	BMI oc_huff_token_decode_continue
   1.229 +	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   1.230 +	MOV r4,r4,LSL r10      ; r4 = window<<=n
   1.231 +	SUB r5,r10             ; r5 = available-=n
   1.232 +	STMIB r0,{r3-r5}       ; ptr = r3
   1.233 +	                       ; window = r4
   1.234 +	                       ; available = r5
   1.235 +	AND r0,r14,#255        ; r0 = node&255
   1.236 +	LDMFD r13!,{r4,r5,r10,pc}
   1.237 +	ENDP
   1.238 +
   1.239 +	END

mercurial