michael@0: ;******************************************************************** michael@0: ;* * michael@0: ;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * michael@0: ;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * michael@0: ;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * michael@0: ;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * michael@0: ;* * michael@0: ;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * michael@0: ;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ * michael@0: ;* * michael@0: ;******************************************************************** michael@0: ; michael@0: ; function: michael@0: ; last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $ michael@0: ; michael@0: ;******************************************************************** michael@0: michael@0: AREA |.text|, CODE, READONLY michael@0: michael@0: ; Explicitly specifying alignment here because some versions of michael@0: ; gas don't align code correctly. See michael@0: ; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html michael@0: ; https://bugzilla.mozilla.org/show_bug.cgi?id=920992 michael@0: ALIGN michael@0: michael@0: EXPORT oc_pack_read_arm michael@0: EXPORT oc_pack_read1_arm michael@0: EXPORT oc_huff_token_decode_arm michael@0: michael@0: oc_pack_read1_arm PROC michael@0: ; r0 = oc_pack_buf *_b michael@0: ADD r12,r0,#8 michael@0: LDMIA r12,{r2,r3} ; r2 = window michael@0: ; Stall... ; r3 = available michael@0: ; Stall... michael@0: SUBS r3,r3,#1 ; r3 = available-1, available<1 => LT michael@0: BLT oc_pack_read1_refill michael@0: MOV r0,r2,LSR #31 ; r0 = window>>31 michael@0: MOV r2,r2,LSL #1 ; r2 = window<<=1 michael@0: STMIA r12,{r2,r3} ; window = r2 michael@0: ; available = r3 michael@0: MOV PC,r14 michael@0: ENDP michael@0: michael@0: oc_pack_read_arm PROC michael@0: ; r0 = oc_pack_buf *_b michael@0: ; r1 = int _bits michael@0: ADD r12,r0,#8 michael@0: LDMIA r12,{r2,r3} ; r2 = window michael@0: ; Stall... ; r3 = available michael@0: ; Stall... michael@0: SUBS r3,r3,r1 ; r3 = available-_bits, available<_bits => LT michael@0: BLT oc_pack_read_refill michael@0: RSB r0,r1,#32 ; r0 = 32-_bits michael@0: MOV r0,r2,LSR r0 ; r0 = window>>32-_bits michael@0: MOV r2,r2,LSL r1 ; r2 = window<<=_bits michael@0: STMIA r12,{r2,r3} ; window = r2 michael@0: ; available = r3 michael@0: MOV PC,r14 michael@0: michael@0: ; We need to refill window. michael@0: oc_pack_read1_refill michael@0: MOV r1,#1 michael@0: oc_pack_read_refill michael@0: STMFD r13!,{r10,r11,r14} michael@0: LDMIA r0,{r10,r11} ; r10 = stop michael@0: ; r11 = ptr michael@0: RSB r0,r1,#32 ; r0 = 32-_bits michael@0: RSB r3,r3,r0 ; r3 = 32-available michael@0: ; We can use unsigned compares for both the pointers and for available michael@0: ; (allowing us to chain condition codes) because available will never be michael@0: ; larger than 32 (or we wouldn't be here), and thus 32-available will never be michael@0: ; negative. michael@0: CMP r10,r11 ; ptr HI michael@0: CMPHI r3,#7 ; available<=24 => HI michael@0: LDRHIB r14,[r11],#1 ; r14 = *ptr++ michael@0: SUBHI r3,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available michael@0: CMPHI r10,r11 ; ptr HI michael@0: CMPHI r3,#7 ; available<=24 => HI michael@0: LDRHIB r14,[r11],#1 ; r14 = *ptr++ michael@0: SUBHI r3,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available michael@0: CMPHI r10,r11 ; ptr HI michael@0: CMPHI r3,#7 ; available<=24 => HI michael@0: LDRHIB r14,[r11],#1 ; r14 = *ptr++ michael@0: SUBHI r3,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available michael@0: CMPHI r10,r11 ; ptr HI michael@0: CMPHI r3,#7 ; available<=24 => HI michael@0: LDRHIB r14,[r11],#1 ; r14 = *ptr++ michael@0: SUBHI r3,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r2,r14,LSL r3 ; r2 = window|=r14<<32-available michael@0: SUBS r3,r0,r3 ; r3 = available-=_bits, available GT michael@0: BLT oc_pack_read_refill_last michael@0: MOV r0,r2,LSR r0 ; r0 = window>>32-_bits michael@0: MOV r2,r2,LSL r1 ; r2 = window<<=_bits michael@0: STR r11,[r12,#-4] ; ptr = r11 michael@0: STMIA r12,{r2,r3} ; window = r2 michael@0: ; available = r3 michael@0: LDMFD r13!,{r10,r11,PC} michael@0: michael@0: ; Either we wanted to read more than 24 bits and didn't have enough room to michael@0: ; stuff the last byte into the window, or we hit the end of the packet. michael@0: oc_pack_read_refill_last michael@0: CMP r11,r10 ; ptr LO michael@0: ; If we didn't hit the end of the packet, then pull enough of the next byte to michael@0: ; to fill up the window. michael@0: LDRLOB r14,[r11] ; (LO) r14 = *ptr michael@0: ; Otherwise, set the EOF flag and pretend we have lots of available bits. michael@0: MOVHS r14,#1 ; (HS) r14 = 1 michael@0: ADDLO r10,r3,r1 ; (LO) r10 = available michael@0: STRHS r14,[r12,#8] ; (HS) eof = 1 michael@0: ANDLO r10,r10,#7 ; (LO) r10 = available&7 michael@0: MOVHS r3,#1<<30 ; (HS) available = OC_LOTS_OF_BITS michael@0: ORRLO r2,r14,LSL r10 ; (LO) r2 = window|=*ptr>>(available&7) michael@0: MOV r0,r2,LSR r0 ; r0 = window>>32-_bits michael@0: MOV r2,r2,LSL r1 ; r2 = window<<=_bits michael@0: STR r11,[r12,#-4] ; ptr = r11 michael@0: STMIA r12,{r2,r3} ; window = r2 michael@0: ; available = r3 michael@0: LDMFD r13!,{r10,r11,PC} michael@0: ENDP michael@0: michael@0: michael@0: michael@0: oc_huff_token_decode_arm PROC michael@0: ; r0 = oc_pack_buf *_b michael@0: ; r1 = const ogg_int16_t *_tree michael@0: STMFD r13!,{r4,r5,r10,r14} michael@0: LDRSH r10,[r1] ; r10 = n=_tree[0] michael@0: LDMIA r0,{r2-r5} ; r2 = stop michael@0: ; Stall... ; r3 = ptr michael@0: ; Stall... ; r4 = window michael@0: ; r5 = available michael@0: CMP r10,r5 ; n>available => GT michael@0: BGT oc_huff_token_decode_refill0 michael@0: RSB r14,r10,#32 ; r14 = 32-n michael@0: MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n michael@0: ADD r14,r1,r14,LSL #1 ; r14 = _tree+bits michael@0: LDRSH r12,[r14,#2] ; r12 = node=_tree[1+bits] michael@0: ; Stall... michael@0: ; Stall... michael@0: RSBS r14,r12,#0 ; r14 = -node, node>0 => MI michael@0: BMI oc_huff_token_decode_continue michael@0: MOV r10,r14,LSR #8 ; r10 = n=node>>8 michael@0: MOV r4,r4,LSL r10 ; r4 = window<<=n michael@0: SUB r5,r10 ; r5 = available-=n michael@0: STMIB r0,{r3-r5} ; ptr = r3 michael@0: ; window = r4 michael@0: ; available = r5 michael@0: AND r0,r14,#255 ; r0 = node&255 michael@0: LDMFD r13!,{r4,r5,r10,pc} michael@0: michael@0: ; The first tree node wasn't enough to reach a leaf, read another michael@0: oc_huff_token_decode_continue michael@0: ADD r12,r1,r12,LSL #1 ; r12 = _tree+node michael@0: MOV r4,r4,LSL r10 ; r4 = window<<=n michael@0: SUB r5,r5,r10 ; r5 = available-=n michael@0: LDRSH r10,[r12],#2 ; r10 = n=_tree[node] michael@0: ; Stall... ; r12 = _tree+node+1 michael@0: ; Stall... michael@0: CMP r10,r5 ; n>available => GT michael@0: BGT oc_huff_token_decode_refill michael@0: RSB r14,r10,#32 ; r14 = 32-n michael@0: MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n michael@0: ADD r12,r12,r14 ; michael@0: LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] michael@0: ; Stall... michael@0: ; Stall... michael@0: RSBS r14,r12,#0 ; r14 = -node, node>0 => MI michael@0: BMI oc_huff_token_decode_continue michael@0: MOV r10,r14,LSR #8 ; r10 = n=node>>8 michael@0: MOV r4,r4,LSL r10 ; r4 = window<<=n michael@0: SUB r5,r10 ; r5 = available-=n michael@0: STMIB r0,{r3-r5} ; ptr = r3 michael@0: ; window = r4 michael@0: ; available = r5 michael@0: AND r0,r14,#255 ; r0 = node&255 michael@0: LDMFD r13!,{r4,r5,r10,pc} michael@0: michael@0: oc_huff_token_decode_refill0 michael@0: ADD r12,r1,#2 ; r12 = _tree+1 michael@0: oc_huff_token_decode_refill michael@0: ; We can't possibly need more than 15 bits, so available must be <= 15. michael@0: ; Therefore we can load at least two bytes without checking it. michael@0: CMP r2,r3 ; ptr HI michael@0: LDRHIB r14,[r3],#1 ; r14 = *ptr++ michael@0: RSBHI r5,r5,#24 ; (HI) available = 32-(available+=8) michael@0: RSBLS r5,r5,#32 ; (LS) r5 = 32-available michael@0: ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available michael@0: CMPHI r2,r3 ; ptr HI michael@0: LDRHIB r14,[r3],#1 ; r14 = *ptr++ michael@0: SUBHI r5,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available michael@0: ; We can use unsigned compares for both the pointers and for available michael@0: ; (allowing us to chain condition codes) because available will never be michael@0: ; larger than 32 (or we wouldn't be here), and thus 32-available will never be michael@0: ; negative. michael@0: CMPHI r2,r3 ; ptr HI michael@0: CMPHI r5,#7 ; available<=24 => HI michael@0: LDRHIB r14,[r3],#1 ; r14 = *ptr++ michael@0: SUBHI r5,#8 ; available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r4,r14,LSL r5 ; r4 = window|=r14<<32-available michael@0: CMP r2,r3 ; ptr HI michael@0: MOVLS r5,#-1<<30 ; (LS) available = OC_LOTS_OF_BITS+32 michael@0: CMPHI r5,#7 ; (HI) available<=24 => HI michael@0: LDRHIB r14,[r3],#1 ; (HI) r14 = *ptr++ michael@0: SUBHI r5,#8 ; (HI) available += 8 michael@0: ; (HI) Stall... michael@0: ORRHI r4,r14,LSL r5 ; (HI) r4 = window|=r14<<32-available michael@0: RSB r14,r10,#32 ; r14 = 32-n michael@0: MOV r14,r4,LSR r14 ; r14 = bits=window>>32-n michael@0: ADD r12,r12,r14 ; michael@0: LDRSH r12,[r12,r14] ; r12 = node=_tree[node+1+bits] michael@0: RSB r5,r5,#32 ; r5 = available michael@0: ; Stall... michael@0: RSBS r14,r12,#0 ; r14 = -node, node>0 => MI michael@0: BMI oc_huff_token_decode_continue michael@0: MOV r10,r14,LSR #8 ; r10 = n=node>>8 michael@0: MOV r4,r4,LSL r10 ; r4 = window<<=n michael@0: SUB r5,r10 ; r5 = available-=n michael@0: STMIB r0,{r3-r5} ; ptr = r3 michael@0: ; window = r4 michael@0: ; available = r5 michael@0: AND r0,r14,#255 ; r0 = node&255 michael@0: LDMFD r13!,{r4,r5,r10,pc} michael@0: ENDP michael@0: michael@0: END