|
1 ;******************************************************************** |
|
2 ;* * |
|
3 ;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
|
4 ;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
|
5 ;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
|
6 ;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
|
7 ;* * |
|
8 ;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * |
|
9 ;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
|
10 ;* * |
|
11 ;******************************************************************** |
|
12 ; |
|
13 ; function: |
|
14 ; last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $ |
|
15 ; |
|
16 ;******************************************************************** |
|
17 |
|
18 AREA |.text|, CODE, READONLY |
|
19 |
|
20 ; Explicitly specifying alignment here because some versions of |
|
21 ; gas don't align code correctly. See |
|
22 ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html |
|
23 ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 |
|
24 ALIGN |
|
25 |
|
26 EXPORT oc_pack_read_arm |
|
27 EXPORT oc_pack_read1_arm |
|
28 EXPORT oc_huff_token_decode_arm |
|
29 |
|
30 oc_pack_read1_arm PROC |
|
31 ; r0 = oc_pack_buf *_b |
|
32 ADD r12,r0,#8 |
|
33 LDMIA r12,{r2,r3} ; r2 = window |
|
34 ; Stall... ; r3 = available |
|
35 ; Stall... |
|
36 SUBS r3,r3,#1 ; r3 = available-1, available<1 => LT |
|
37 BLT oc_pack_read1_refill |
|
38 MOV r0,r2,LSR #31 ; r0 = window>>31 |
|
39 MOV r2,r2,LSL #1 ; r2 = window<<=1 |
|
40 STMIA r12,{r2,r3} ; window = r2 |
|
41 ; available = r3 |
|
42 MOV PC,r14 |
|
43 ENDP |
|
44 |
|
45 oc_pack_read_arm PROC |
|
46 ; r0 = oc_pack_buf *_b |
|
47 ; r1 = int _bits |
|
48 ADD r12,r0,#8 |
|
49 LDMIA r12,{r2,r3} ; r2 = window |
|
50 ; Stall... ; r3 = available |
|
51 ; Stall... |
|
52 SUBS r3,r3,r1 ; r3 = available-_bits, available<_bits => LT |
|
53 BLT oc_pack_read_refill |
|
54 RSB r0,r1,#32 ; r0 = 32-_bits |
|
55 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits |
|
56 MOV r2,r2,LSL r1 ; r2 = window<<=_bits |
|
57 STMIA r12,{r2,r3} ; window = r2 |
|
58 ; available = r3 |
|
59 MOV PC,r14 |
|
60 |
|
61 ; We need to refill window. |
|
62 oc_pack_read1_refill |
|
63 MOV r1,#1 |
|
64 oc_pack_read_refill |
|
65 STMFD r13!,{r10,r11,r14} |
|
66 LDMIA r0,{r10,r11} ; r10 = stop |
|
67 ; r11 = ptr |
|
68 RSB r0,r1,#32 ; r0 = 32-_bits |
|
69 RSB r3,r3,r0 ; r3 = 32-available |
|
70 ; We can use unsigned compares for both the pointers and for available |
|
71 ; (allowing us to chain condition codes) because available will never be |
|
72 ; larger than 32 (or we wouldn't be here), and thus 32-available will never be |
|
73 ; negative. |
|
74 CMP r10,r11 ; ptr<stop => HI |
|
75 CMPHI r3,#7 ; available<=24 => HI |
|
76 LDRHIB r14,[r11],#1 ; r14 = *ptr++ |
|
77 SUBHI r3,#8 ; available += 8 |
|
78 ; (HI) Stall... |
|
79 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available |
|
80 CMPHI r10,r11 ; ptr<stop => HI |
|
81 CMPHI r3,#7 ; available<=24 => HI |
|
82 LDRHIB r14,[r11],#1 ; r14 = *ptr++ |
|
83 SUBHI r3,#8 ; available += 8 |
|
84 ; (HI) Stall... |
|
85 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available |
|
86 CMPHI r10,r11 ; ptr<stop => HI |
|
87 CMPHI r3,#7 ; available<=24 => HI |
|
88 LDRHIB r14,[r11],#1 ; r14 = *ptr++ |
|
89 SUBHI r3,#8 ; available += 8 |
|
90 ; (HI) Stall... |
|
91 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available |
|
92 CMPHI r10,r11 ; ptr<stop => HI |
|
93 CMPHI r3,#7 ; available<=24 => HI |
|
94 LDRHIB r14,[r11],#1 ; r14 = *ptr++ |
|
95 SUBHI r3,#8 ; available += 8 |
|
96 ; (HI) Stall... |
|
97 ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available |
|
98 SUBS r3,r0,r3 ; r3 = available-=_bits, available<bits => GT |
|
99 BLT oc_pack_read_refill_last |
|
100 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits |
|
101 MOV r2,r2,LSL r1 ; r2 = window<<=_bits |
|
102 STR r11,[r12,#-4] ; ptr = r11 |
|
103 STMIA r12,{r2,r3} ; window = r2 |
|
104 ; available = r3 |
|
105 LDMFD r13!,{r10,r11,PC} |
|
106 |
|
107 ; Either we wanted to read more than 24 bits and didn't have enough room to |
|
108 ; stuff the last byte into the window, or we hit the end of the packet. |
|
109 oc_pack_read_refill_last |
|
110 CMP r11,r10 ; ptr<stop => LO |
|
111 ; If we didn't hit the end of the packet, then pull enough of the next byte to |
|
112 ; to fill up the window. |
|
113 LDRLOB r14,[r11] ; (LO) r14 = *ptr |
|
114 ; Otherwise, set the EOF flag and pretend we have lots of available bits. |
|
115 MOVHS r14,#1 ; (HS) r14 = 1 |
|
116 ADDLO r10,r3,r1 ; (LO) r10 = available |
|
117 STRHS r14,[r12,#8] ; (HS) eof = 1 |
|
118 ANDLO r10,r10,#7 ; (LO) r10 = available&7 |
|
119 MOVHS r3,#1<<30 ; (HS) available = OC_LOTS_OF_BITS |
|
120 ORRLO r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7) |
|
121 MOV r0,r2,LSR r0 ; r0 = window>>32-_bits |
|
122 MOV r2,r2,LSL r1 ; r2 = window<<=_bits |
|
123 STR r11,[r12,#-4] ; ptr = r11 |
|
124 STMIA r12,{r2,r3} ; window = r2 |
|
125 ; available = r3 |
|
126 LDMFD r13!,{r10,r11,PC} |
|
127 ENDP |
|
128 |
|
129 |
|
130 |
|
131 oc_huff_token_decode_arm PROC |
|
132 ; r0 = oc_pack_buf *_b |
|
133 ; r1 = const ogg_int16_t *_tree |
|
134 STMFD r13!,{r4,r5,r10,r14} |
|
135 LDRSH r10,[r1] ; r10 = n=_tree[0] |
|
136 LDMIA r0,{r2-r5} ; r2 = stop |
|
137 ; Stall... ; r3 = ptr |
|
138 ; Stall... ; r4 = window |
|
139 ; r5 = available |
|
140 CMP r10,r5 ; n>available => GT |
|
141 BGT oc_huff_token_decode_refill0 |
|
142 RSB r14,r10,#32 ; r14 = 32-n |
|
143 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n |
|
144 ADD r14,r1,r14,LSL #1 ; r14 = _tree+bits |
|
145 LDRSH r12,[r14,#2] ; r12 = node=_tree[1+bits] |
|
146 ; Stall... |
|
147 ; Stall... |
|
148 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI |
|
149 BMI oc_huff_token_decode_continue |
|
150 MOV r10,r14,LSR #8 ; r10 = n=node>>8 |
|
151 MOV r4,r4,LSL r10 ; r4 = window<<=n |
|
152 SUB r5,r10 ; r5 = available-=n |
|
153 STMIB r0,{r3-r5} ; ptr = r3 |
|
154 ; window = r4 |
|
155 ; available = r5 |
|
156 AND r0,r14,#255 ; r0 = node&255 |
|
157 LDMFD r13!,{r4,r5,r10,pc} |
|
158 |
|
159 ; The first tree node wasn't enough to reach a leaf, read another |
|
160 oc_huff_token_decode_continue |
|
161 ADD r12,r1,r12,LSL #1 ; r12 = _tree+node |
|
162 MOV r4,r4,LSL r10 ; r4 = window<<=n |
|
163 SUB r5,r5,r10 ; r5 = available-=n |
|
164 LDRSH r10,[r12],#2 ; r10 = n=_tree[node] |
|
165 ; Stall... ; r12 = _tree+node+1 |
|
166 ; Stall... |
|
167 CMP r10,r5 ; n>available => GT |
|
168 BGT oc_huff_token_decode_refill |
|
169 RSB r14,r10,#32 ; r14 = 32-n |
|
170 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n |
|
171 ADD r12,r12,r14 ; |
|
172 LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] |
|
173 ; Stall... |
|
174 ; Stall... |
|
175 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI |
|
176 BMI oc_huff_token_decode_continue |
|
177 MOV r10,r14,LSR #8 ; r10 = n=node>>8 |
|
178 MOV r4,r4,LSL r10 ; r4 = window<<=n |
|
179 SUB r5,r10 ; r5 = available-=n |
|
180 STMIB r0,{r3-r5} ; ptr = r3 |
|
181 ; window = r4 |
|
182 ; available = r5 |
|
183 AND r0,r14,#255 ; r0 = node&255 |
|
184 LDMFD r13!,{r4,r5,r10,pc} |
|
185 |
|
186 oc_huff_token_decode_refill0 |
|
187 ADD r12,r1,#2 ; r12 = _tree+1 |
|
188 oc_huff_token_decode_refill |
|
189 ; We can't possibly need more than 15 bits, so available must be <= 15. |
|
190 ; Therefore we can load at least two bytes without checking it. |
|
191 CMP r2,r3 ; ptr<stop => HI |
|
192 LDRHIB r14,[r3],#1 ; r14 = *ptr++ |
|
193 RSBHI r5,r5,#24 ; (HI) available = 32-(available+=8) |
|
194 RSBLS r5,r5,#32 ; (LS) r5 = 32-available |
|
195 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available |
|
196 CMPHI r2,r3 ; ptr<stop => HI |
|
197 LDRHIB r14,[r3],#1 ; r14 = *ptr++ |
|
198 SUBHI r5,#8 ; available += 8 |
|
199 ; (HI) Stall... |
|
200 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available |
|
201 ; We can use unsigned compares for both the pointers and for available |
|
202 ; (allowing us to chain condition codes) because available will never be |
|
203 ; larger than 32 (or we wouldn't be here), and thus 32-available will never be |
|
204 ; negative. |
|
205 CMPHI r2,r3 ; ptr<stop => HI |
|
206 CMPHI r5,#7 ; available<=24 => HI |
|
207 LDRHIB r14,[r3],#1 ; r14 = *ptr++ |
|
208 SUBHI r5,#8 ; available += 8 |
|
209 ; (HI) Stall... |
|
210 ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available |
|
211 CMP r2,r3 ; ptr<stop => HI |
|
212 MOVLS r5,#-1<<30 ; (LS) available = OC_LOTS_OF_BITS+32 |
|
213 CMPHI r5,#7 ; (HI) available<=24 => HI |
|
214 LDRHIB r14,[r3],#1 ; (HI) r14 = *ptr++ |
|
215 SUBHI r5,#8 ; (HI) available += 8 |
|
216 ; (HI) Stall... |
|
217 ORRHI r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available |
|
218 RSB r14,r10,#32 ; r14 = 32-n |
|
219 MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n |
|
220 ADD r12,r12,r14 ; |
|
221 LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] |
|
222 RSB r5,r5,#32 ; r5 = available |
|
223 ; Stall... |
|
224 RSBS r14,r12,#0 ; r14 = -node, node>0 => MI |
|
225 BMI oc_huff_token_decode_continue |
|
226 MOV r10,r14,LSR #8 ; r10 = n=node>>8 |
|
227 MOV r4,r4,LSL r10 ; r4 = window<<=n |
|
228 SUB r5,r10 ; r5 = available-=n |
|
229 STMIB r0,{r3-r5} ; ptr = r3 |
|
230 ; window = r4 |
|
231 ; available = r5 |
|
232 AND r0,r14,#255 ; r0 = node&255 |
|
233 LDMFD r13!,{r4,r5,r10,pc} |
|
234 ENDP |
|
235 |
|
236 END |