Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 ;********************************************************************
2 ;* *
3 ;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 ;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 ;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 ;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 ;* *
8 ;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
9 ;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 ;* *
11 ;********************************************************************
12 ;
13 ; function:
14 ; last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
15 ;
16 ;********************************************************************
18 AREA |.text|, CODE, READONLY
20 ; Explicitly specifying alignment here because some versions of
21 ; gas don't align code correctly. See
22 ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
23 ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
24 ALIGN
26 EXPORT oc_pack_read_arm
27 EXPORT oc_pack_read1_arm
28 EXPORT oc_huff_token_decode_arm
30 oc_pack_read1_arm PROC
31 ; r0 = oc_pack_buf *_b
32 ADD r12,r0,#8
33 LDMIA r12,{r2,r3} ; r2 = window
34 ; Stall... ; r3 = available
35 ; Stall...
36 SUBS r3,r3,#1 ; r3 = available-1, available<1 => LT
37 BLT oc_pack_read1_refill
38 MOV r0,r2,LSR #31 ; r0 = window>>31
39 MOV r2,r2,LSL #1 ; r2 = window<<=1
40 STMIA r12,{r2,r3} ; window = r2
41 ; available = r3
42 MOV PC,r14
43 ENDP
45 oc_pack_read_arm PROC
46 ; r0 = oc_pack_buf *_b
47 ; r1 = int _bits
48 ADD r12,r0,#8
49 LDMIA r12,{r2,r3} ; r2 = window
50 ; Stall... ; r3 = available
51 ; Stall...
52 SUBS r3,r3,r1 ; r3 = available-_bits, available<_bits => LT
53 BLT oc_pack_read_refill
54 RSB r0,r1,#32 ; r0 = 32-_bits
55 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits
56 MOV r2,r2,LSL r1 ; r2 = window<<=_bits
57 STMIA r12,{r2,r3} ; window = r2
58 ; available = r3
59 MOV PC,r14
61 ; We need to refill window.
62 oc_pack_read1_refill
63 MOV r1,#1
64 oc_pack_read_refill
65 STMFD r13!,{r10,r11,r14}
66 LDMIA r0,{r10,r11} ; r10 = stop
67 ; r11 = ptr
68 RSB r0,r1,#32 ; r0 = 32-_bits
69 RSB r3,r3,r0 ; r3 = 32-available
70 ; We can use unsigned compares for both the pointers and for available
71 ; (allowing us to chain condition codes) because available will never be
72 ; larger than 32 (or we wouldn't be here), and thus 32-available will never be
73 ; negative.
74 CMP r10,r11 ; ptr<stop => HI
75 CMPHI r3,#7 ; available<=24 => HI
76 LDRHIB r14,[r11],#1 ; r14 = *ptr++
77 SUBHI r3,#8 ; available += 8
78 ; (HI) Stall...
79 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
80 CMPHI r10,r11 ; ptr<stop => HI
81 CMPHI r3,#7 ; available<=24 => HI
82 LDRHIB r14,[r11],#1 ; r14 = *ptr++
83 SUBHI r3,#8 ; available += 8
84 ; (HI) Stall...
85 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
86 CMPHI r10,r11 ; ptr<stop => HI
87 CMPHI r3,#7 ; available<=24 => HI
88 LDRHIB r14,[r11],#1 ; r14 = *ptr++
89 SUBHI r3,#8 ; available += 8
90 ; (HI) Stall...
91 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
92 CMPHI r10,r11 ; ptr<stop => HI
93 CMPHI r3,#7 ; available<=24 => HI
94 LDRHIB r14,[r11],#1 ; r14 = *ptr++
95 SUBHI r3,#8 ; available += 8
96 ; (HI) Stall...
97 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available
98 SUBS r3,r0,r3 ; r3 = available-=_bits, available<bits => GT
99 BLT oc_pack_read_refill_last
100 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits
101 MOV r2,r2,LSL r1 ; r2 = window<<=_bits
102 STR r11,[r12,#-4] ; ptr = r11
103 STMIA r12,{r2,r3} ; window = r2
104 ; available = r3
105 LDMFD r13!,{r10,r11,PC}
107 ; Either we wanted to read more than 24 bits and didn't have enough room to
108 ; stuff the last byte into the window, or we hit the end of the packet.
109 oc_pack_read_refill_last
110 CMP r11,r10 ; ptr<stop => LO
111 ; If we didn't hit the end of the packet, then pull enough of the next byte to
112 ; to fill up the window.
113 LDRLOB r14,[r11] ; (LO) r14 = *ptr
114 ; Otherwise, set the EOF flag and pretend we have lots of available bits.
115 MOVHS r14,#1 ; (HS) r14 = 1
116 ADDLO r10,r3,r1 ; (LO) r10 = available
117 STRHS r14,[r12,#8] ; (HS) eof = 1
118 ANDLO r10,r10,#7 ; (LO) r10 = available&7
119 MOVHS r3,#1<<30 ; (HS) available = OC_LOTS_OF_BITS
120 ORRLO r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7)
121 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits
122 MOV r2,r2,LSL r1 ; r2 = window<<=_bits
123 STR r11,[r12,#-4] ; ptr = r11
124 STMIA r12,{r2,r3} ; window = r2
125 ; available = r3
126 LDMFD r13!,{r10,r11,PC}
127 ENDP
131 oc_huff_token_decode_arm PROC
132 ; r0 = oc_pack_buf *_b
133 ; r1 = const ogg_int16_t *_tree
134 STMFD r13!,{r4,r5,r10,r14}
135 LDRSH r10,[r1] ; r10 = n=_tree[0]
136 LDMIA r0,{r2-r5} ; r2 = stop
137 ; Stall... ; r3 = ptr
138 ; Stall... ; r4 = window
139 ; r5 = available
140 CMP r10,r5 ; n>available => GT
141 BGT oc_huff_token_decode_refill0
142 RSB r14,r10,#32 ; r14 = 32-n
143 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n
144 ADD r14,r1,r14,LSL #1 ; r14 = _tree+bits
145 LDRSH r12,[r14,#2] ; r12 = node=_tree[1+bits]
146 ; Stall...
147 ; Stall...
148 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI
149 BMI oc_huff_token_decode_continue
150 MOV r10,r14,LSR #8 ; r10 = n=node>>8
151 MOV r4,r4,LSL r10 ; r4 = window<<=n
152 SUB r5,r10 ; r5 = available-=n
153 STMIB r0,{r3-r5} ; ptr = r3
154 ; window = r4
155 ; available = r5
156 AND r0,r14,#255 ; r0 = node&255
157 LDMFD r13!,{r4,r5,r10,pc}
159 ; The first tree node wasn't enough to reach a leaf, read another
160 oc_huff_token_decode_continue
161 ADD r12,r1,r12,LSL #1 ; r12 = _tree+node
162 MOV r4,r4,LSL r10 ; r4 = window<<=n
163 SUB r5,r5,r10 ; r5 = available-=n
164 LDRSH r10,[r12],#2 ; r10 = n=_tree[node]
165 ; Stall... ; r12 = _tree+node+1
166 ; Stall...
167 CMP r10,r5 ; n>available => GT
168 BGT oc_huff_token_decode_refill
169 RSB r14,r10,#32 ; r14 = 32-n
170 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n
171 ADD r12,r12,r14 ;
172 LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits]
173 ; Stall...
174 ; Stall...
175 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI
176 BMI oc_huff_token_decode_continue
177 MOV r10,r14,LSR #8 ; r10 = n=node>>8
178 MOV r4,r4,LSL r10 ; r4 = window<<=n
179 SUB r5,r10 ; r5 = available-=n
180 STMIB r0,{r3-r5} ; ptr = r3
181 ; window = r4
182 ; available = r5
183 AND r0,r14,#255 ; r0 = node&255
184 LDMFD r13!,{r4,r5,r10,pc}
186 oc_huff_token_decode_refill0
187 ADD r12,r1,#2 ; r12 = _tree+1
188 oc_huff_token_decode_refill
189 ; We can't possibly need more than 15 bits, so available must be <= 15.
190 ; Therefore we can load at least two bytes without checking it.
191 CMP r2,r3 ; ptr<stop => HI
192 LDRHIB r14,[r3],#1 ; r14 = *ptr++
193 RSBHI r5,r5,#24 ; (HI) available = 32-(available+=8)
194 RSBLS r5,r5,#32 ; (LS) r5 = 32-available
195 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
196 CMPHI r2,r3 ; ptr<stop => HI
197 LDRHIB r14,[r3],#1 ; r14 = *ptr++
198 SUBHI r5,#8 ; available += 8
199 ; (HI) Stall...
200 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
201 ; We can use unsigned compares for both the pointers and for available
202 ; (allowing us to chain condition codes) because available will never be
203 ; larger than 32 (or we wouldn't be here), and thus 32-available will never be
204 ; negative.
205 CMPHI r2,r3 ; ptr<stop => HI
206 CMPHI r5,#7 ; available<=24 => HI
207 LDRHIB r14,[r3],#1 ; r14 = *ptr++
208 SUBHI r5,#8 ; available += 8
209 ; (HI) Stall...
210 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available
211 CMP r2,r3 ; ptr<stop => HI
212 MOVLS r5,#-1<<30 ; (LS) available = OC_LOTS_OF_BITS+32
213 CMPHI r5,#7 ; (HI) available<=24 => HI
214 LDRHIB r14,[r3],#1 ; (HI) r14 = *ptr++
215 SUBHI r5,#8 ; (HI) available += 8
216 ; (HI) Stall...
217 ORRHI r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available
218 RSB r14,r10,#32 ; r14 = 32-n
219 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n
220 ADD r12,r12,r14 ;
221 LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits]
222 RSB r5,r5,#32 ; r5 = available
223 ; Stall...
224 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI
225 BMI oc_huff_token_decode_continue
226 MOV r10,r14,LSR #8 ; r10 = n=node>>8
227 MOV r4,r4,LSL r10 ; r4 = window<<=n
228 SUB r5,r10 ; r5 = available-=n
229 STMIB r0,{r3-r5} ; ptr = r3
230 ; window = r4
231 ; available = r5
232 AND r0,r14,#255 ; r0 = node&255
233 LDMFD r13!,{r4,r5,r10,pc}
234 ENDP
236 END