media/libtheora/lib/arm/armbits.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 ;********************************************************************
     2 ;*                                                                  *
     3 ;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     4 ;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     5 ;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     6 ;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     7 ;*                                                                  *
     8 ;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
     9 ;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    10 ;*                                                                  *
    11 ;********************************************************************
    12 ;
    13 ; function:
    14 ;   last mod: $Id: armbits.s 17481 2010-10-03 22:49:42Z tterribe $
    15 ;
    16 ;********************************************************************
    18 	AREA	|.text|, CODE, READONLY
    20 	; Explicitly specifying alignment here because some versions of
    21 	; gas don't align code correctly. See
    22 	; http://lists.gnu.org/archive/html/bug-binutils/2011-06/msg00199.html
    23 	; https://bugzilla.mozilla.org/show_bug.cgi?id=920992
    24 	ALIGN
    26 	EXPORT oc_pack_read_arm
    27 	EXPORT oc_pack_read1_arm
    28 	EXPORT oc_huff_token_decode_arm
    30 oc_pack_read1_arm PROC
    31 	; r0 = oc_pack_buf *_b
    32 	ADD r12,r0,#8
    33 	LDMIA r12,{r2,r3}      ; r2 = window
    34 	; Stall...             ; r3 = available
    35 	; Stall...
    36 	SUBS r3,r3,#1          ; r3 = available-1, available<1 => LT
    37 	BLT oc_pack_read1_refill
    38 	MOV r0,r2,LSR #31      ; r0 = window>>31
    39 	MOV r2,r2,LSL #1       ; r2 = window<<=1
    40 	STMIA r12,{r2,r3}      ; window = r2
    41 	                       ; available = r3
    42 	MOV PC,r14
    43 	ENDP
    45 oc_pack_read_arm PROC
    46 	; r0 = oc_pack_buf *_b
    47 	; r1 = int          _bits
    48 	ADD r12,r0,#8
    49 	LDMIA r12,{r2,r3}      ; r2 = window
    50 	; Stall...             ; r3 = available
    51 	; Stall...
    52 	SUBS r3,r3,r1          ; r3 = available-_bits, available<_bits => LT
    53 	BLT oc_pack_read_refill
    54 	RSB r0,r1,#32          ; r0 = 32-_bits
    55 	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
    56 	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
    57 	STMIA r12,{r2,r3}      ; window = r2
    58 	                       ; available = r3
    59 	MOV PC,r14
    61 ; We need to refill window.
    62 oc_pack_read1_refill
    63 	MOV r1,#1
    64 oc_pack_read_refill
    65 	STMFD r13!,{r10,r11,r14}
    66 	LDMIA r0,{r10,r11}     ; r10 = stop
    67 	                       ; r11 = ptr
    68 	RSB r0,r1,#32          ; r0 = 32-_bits
    69 	RSB r3,r3,r0           ; r3 = 32-available
    70 ; We can use unsigned compares for both the pointers and for available
    71 ;  (allowing us to chain condition codes) because available will never be
    72 ;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
    73 ;  negative.
    74 	CMP r10,r11            ; ptr<stop => HI
    75 	CMPHI r3,#7            ;   available<=24 => HI
    76 	LDRHIB r14,[r11],#1    ;     r14 = *ptr++
    77 	SUBHI r3,#8            ;     available += 8
    78 	; (HI) Stall...
    79 	ORRHI r2,r14,LSL r3    ;     r2 = window|=r14<<32-available
    80 	CMPHI r10,r11          ;     ptr<stop => HI
    81 	CMPHI r3,#7            ;       available<=24 => HI
    82 	LDRHIB r14,[r11],#1    ;         r14 = *ptr++
    83 	SUBHI r3,#8            ;         available += 8
    84 	; (HI) Stall...
    85 	ORRHI r2,r14,LSL r3    ;         r2 = window|=r14<<32-available
    86 	CMPHI r10,r11          ;         ptr<stop => HI
    87 	CMPHI r3,#7            ;           available<=24 => HI
    88 	LDRHIB r14,[r11],#1    ;             r14 = *ptr++
    89 	SUBHI r3,#8            ;             available += 8
    90 	; (HI) Stall...
    91 	ORRHI r2,r14,LSL r3    ;             r2 = window|=r14<<32-available
    92 	CMPHI r10,r11          ;             ptr<stop => HI
    93 	CMPHI r3,#7            ;               available<=24 => HI
    94 	LDRHIB r14,[r11],#1    ;                 r14 = *ptr++
    95 	SUBHI r3,#8            ;                 available += 8
    96 	; (HI) Stall...
    97 	ORRHI r2,r14,LSL r3    ;                 r2 = window|=r14<<32-available
    98 	SUBS r3,r0,r3          ; r3 = available-=_bits, available<bits => GT
    99 	BLT oc_pack_read_refill_last
   100 	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
   101 	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
   102 	STR r11,[r12,#-4]      ; ptr = r11
   103 	STMIA r12,{r2,r3}      ; window = r2
   104 	                       ; available = r3
   105 	LDMFD r13!,{r10,r11,PC}
   107 ; Either we wanted to read more than 24 bits and didn't have enough room to
   108 ;  stuff the last byte into the window, or we hit the end of the packet.
   109 oc_pack_read_refill_last
   110 	CMP r11,r10            ; ptr<stop => LO
   111 ; If we didn't hit the end of the packet, then pull enough of the next byte to
   112 ;  to fill up the window.
   113 	LDRLOB r14,[r11]       ; (LO) r14 = *ptr
   114 ; Otherwise, set the EOF flag and pretend we have lots of available bits.
   115 	MOVHS r14,#1           ; (HS) r14 = 1
   116 	ADDLO r10,r3,r1        ; (LO) r10 = available
   117 	STRHS r14,[r12,#8]     ; (HS) eof = 1
   118 	ANDLO r10,r10,#7       ; (LO) r10 = available&7
   119 	MOVHS r3,#1<<30        ; (HS) available = OC_LOTS_OF_BITS
   120 	ORRLO r2,r14,LSL r10   ; (LO) r2 = window|=*ptr>>(available&7)
   121 	MOV r0,r2,LSR r0       ; r0 = window>>32-_bits
   122 	MOV r2,r2,LSL r1       ; r2 = window<<=_bits
   123 	STR r11,[r12,#-4]      ; ptr = r11
   124 	STMIA r12,{r2,r3}      ; window = r2
   125 	                       ; available = r3
   126 	LDMFD r13!,{r10,r11,PC}
   127 	ENDP
   131 oc_huff_token_decode_arm PROC
   132 	; r0 = oc_pack_buf       *_b
   133 	; r1 = const ogg_int16_t *_tree
   134 	STMFD r13!,{r4,r5,r10,r14}
   135 	LDRSH r10,[r1]         ; r10 = n=_tree[0]
   136 	LDMIA r0,{r2-r5}       ; r2 = stop
   137 	; Stall...             ; r3 = ptr
   138 	; Stall...             ; r4 = window
   139 	                       ; r5 = available
   140 	CMP r10,r5             ; n>available => GT
   141 	BGT oc_huff_token_decode_refill0
   142 	RSB r14,r10,#32        ; r14 = 32-n
   143 	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   144 	ADD r14,r1,r14,LSL #1  ; r14 = _tree+bits
   145 	LDRSH r12,[r14,#2]     ; r12 = node=_tree[1+bits]
   146 	; Stall...
   147 	; Stall...
   148 	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   149 	BMI oc_huff_token_decode_continue
   150 	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   151 	MOV r4,r4,LSL r10      ; r4 = window<<=n
   152 	SUB r5,r10             ; r5 = available-=n
   153 	STMIB r0,{r3-r5}       ; ptr = r3
   154 	                       ; window = r4
   155 	                       ; available = r5
   156 	AND r0,r14,#255        ; r0 = node&255
   157 	LDMFD r13!,{r4,r5,r10,pc}
   159 ; The first tree node wasn't enough to reach a leaf, read another
   160 oc_huff_token_decode_continue
   161 	ADD r12,r1,r12,LSL #1  ; r12 = _tree+node
   162 	MOV r4,r4,LSL r10      ; r4 = window<<=n
   163 	SUB r5,r5,r10          ; r5 = available-=n
   164 	LDRSH r10,[r12],#2     ; r10 = n=_tree[node]
   165 	; Stall...             ; r12 = _tree+node+1
   166 	; Stall...
   167 	CMP r10,r5             ; n>available => GT
   168 	BGT oc_huff_token_decode_refill
   169 	RSB r14,r10,#32        ; r14 = 32-n
   170 	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   171 	ADD r12,r12,r14        ;
   172 	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
   173 	; Stall...
   174 	; Stall...
   175 	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   176 	BMI oc_huff_token_decode_continue
   177 	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   178 	MOV r4,r4,LSL r10      ; r4 = window<<=n
   179 	SUB r5,r10             ; r5 = available-=n
   180 	STMIB r0,{r3-r5}       ; ptr = r3
   181 	                       ; window = r4
   182 	                       ; available = r5
   183 	AND r0,r14,#255        ; r0 = node&255
   184 	LDMFD r13!,{r4,r5,r10,pc}
   186 oc_huff_token_decode_refill0
   187 	ADD r12,r1,#2          ; r12 = _tree+1
   188 oc_huff_token_decode_refill
   189 ; We can't possibly need more than 15 bits, so available must be <= 15.
   190 ; Therefore we can load at least two bytes without checking it.
   191 	CMP r2,r3              ; ptr<stop => HI
   192 	LDRHIB r14,[r3],#1     ;   r14 = *ptr++
   193 	RSBHI r5,r5,#24        ; (HI) available = 32-(available+=8)
   194 	RSBLS r5,r5,#32        ; (LS) r5 = 32-available
   195 	ORRHI r4,r14,LSL r5    ;   r4 = window|=r14<<32-available
   196 	CMPHI r2,r3            ;   ptr<stop => HI
   197 	LDRHIB r14,[r3],#1     ;     r14 = *ptr++
   198 	SUBHI r5,#8            ;     available += 8
   199 	; (HI) Stall...
   200 	ORRHI r4,r14,LSL r5    ;     r4 = window|=r14<<32-available
   201 ; We can use unsigned compares for both the pointers and for available
   202 ;  (allowing us to chain condition codes) because available will never be
   203 ;  larger than 32 (or we wouldn't be here), and thus 32-available will never be
   204 ;  negative.
   205 	CMPHI r2,r3            ;     ptr<stop => HI
   206 	CMPHI r5,#7            ;       available<=24 => HI
   207 	LDRHIB r14,[r3],#1     ;         r14 = *ptr++
   208 	SUBHI r5,#8            ;         available += 8
   209 	; (HI) Stall...
   210 	ORRHI r4,r14,LSL r5    ;         r4 = window|=r14<<32-available
   211 	CMP r2,r3              ; ptr<stop => HI
   212 	MOVLS r5,#-1<<30       ; (LS) available = OC_LOTS_OF_BITS+32
   213 	CMPHI r5,#7            ; (HI) available<=24 => HI
   214 	LDRHIB r14,[r3],#1     ; (HI)   r14 = *ptr++
   215 	SUBHI r5,#8            ; (HI)   available += 8
   216 	; (HI) Stall...
   217 	ORRHI r4,r14,LSL r5    ; (HI)   r4 = window|=r14<<32-available
   218 	RSB r14,r10,#32        ; r14 = 32-n
   219 	MOV r14,r4,LSR r14     ; r14 = bits=window>>32-n
   220 	ADD r12,r12,r14        ;
   221 	LDRSH r12,[r12,r14]    ; r12 = node=_tree[node+1+bits]
   222 	RSB r5,r5,#32          ; r5 = available
   223 	; Stall...
   224 	RSBS r14,r12,#0        ; r14 = -node, node>0 => MI
   225 	BMI oc_huff_token_decode_continue
   226 	MOV r10,r14,LSR #8     ; r10 = n=node>>8
   227 	MOV r4,r4,LSL r10      ; r4 = window<<=n
   228 	SUB r5,r10             ; r5 = available-=n
   229 	STMIB r0,{r3-r5}       ; ptr = r3
   230 	                       ; window = r4
   231 	                       ; available = r5
   232 	AND r0,r14,#255        ; r0 = node&255
   233 	LDMFD r13!,{r4,r5,r10,pc}
   234 	ENDP
   236 	END

mercurial