media/libtheora/lib/x86/mmxstate.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /********************************************************************
michael@0 2 * *
michael@0 3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
michael@0 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
michael@0 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
michael@0 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
michael@0 7 * *
michael@0 8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
michael@0 9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
michael@0 10 * *
michael@0 11 ********************************************************************
michael@0 12
michael@0 13 function:
michael@0 14 last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
michael@0 15
michael@0 16 ********************************************************************/
michael@0 17
michael@0 18 /*MMX acceleration of complete fragment reconstruction algorithm.
michael@0 19 Originally written by Rudolf Marek.*/
michael@0 20 #include <string.h>
michael@0 21 #include "x86int.h"
michael@0 22 #include "mmxloop.h"
michael@0 23
michael@0 24 #if defined(OC_X86_ASM)
michael@0 25
michael@0 26 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
michael@0 27 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
michael@0 28 unsigned char *dst;
michael@0 29 ptrdiff_t frag_buf_off;
michael@0 30 int ystride;
michael@0 31 int refi;
michael@0 32 /*Apply the inverse transform.*/
michael@0 33 /*Special case only having a DC component.*/
michael@0 34 if(_last_zzi<2){
michael@0 35 /*Note that this value must be unsigned, to keep the __asm__ block from
michael@0 36 sign-extending it when it puts it in a register.*/
michael@0 37 ogg_uint16_t p;
michael@0 38 int i;
michael@0 39 /*We round this dequant product (and not any of the others) because there's
michael@0 40 no iDCT rounding.*/
michael@0 41 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
michael@0 42 /*Fill _dct_coeffs with p.*/
michael@0 43 __asm__ __volatile__(
michael@0 44 /*mm0=0000 0000 0000 AAAA*/
michael@0 45 "movd %[p],%%mm0\n\t"
michael@0 46 /*mm0=0000 0000 AAAA AAAA*/
michael@0 47 "punpcklwd %%mm0,%%mm0\n\t"
michael@0 48 /*mm0=AAAA AAAA AAAA AAAA*/
michael@0 49 "punpckldq %%mm0,%%mm0\n\t"
michael@0 50 :
michael@0 51 :[p]"r"((unsigned)p)
michael@0 52 );
michael@0 53 for(i=0;i<4;i++){
michael@0 54 __asm__ __volatile__(
michael@0 55 "movq %%mm0,"OC_MEM_OFFS(0x00,y)"\n\t"
michael@0 56 "movq %%mm0,"OC_MEM_OFFS(0x08,y)"\n\t"
michael@0 57 "movq %%mm0,"OC_MEM_OFFS(0x10,y)"\n\t"
michael@0 58 "movq %%mm0,"OC_MEM_OFFS(0x18,y)"\n\t"
michael@0 59 :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_dct_coeffs+64+16*i,16)
michael@0 60 );
michael@0 61 }
michael@0 62 }
michael@0 63 else{
michael@0 64 /*Dequantize the DC coefficient.*/
michael@0 65 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
michael@0 66 oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
michael@0 67 }
michael@0 68 /*Fill in the target buffer.*/
michael@0 69 frag_buf_off=_state->frag_buf_offs[_fragi];
michael@0 70 refi=_state->frags[_fragi].refi;
michael@0 71 ystride=_state->ref_ystride[_pli];
michael@0 72 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
michael@0 73 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
michael@0 74 else{
michael@0 75 const unsigned char *ref;
michael@0 76 int mvoffsets[2];
michael@0 77 ref=_state->ref_frame_data[refi]+frag_buf_off;
michael@0 78 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
michael@0 79 _state->frag_mvs[_fragi])>1){
michael@0 80 oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
michael@0 81 _dct_coeffs+64);
michael@0 82 }
michael@0 83 else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
michael@0 84 }
michael@0 85 }
michael@0 86
michael@0 87 /*We copy these entire function to inline the actual MMX routines so that we
michael@0 88 use only a single indirect call.*/
michael@0 89
michael@0 90 void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
michael@0 91 memset(_bv,_flimit,8);
michael@0 92 }
michael@0 93
michael@0 94 /*Apply the loop filter to a given set of fragment rows in the given plane.
michael@0 95 The filter may be run on the bottom edge, affecting pixels in the next row of
michael@0 96 fragments, so this row also needs to be available.
michael@0 97 _bv: The bounding values array.
michael@0 98 _refi: The index of the frame buffer to filter.
michael@0 99 _pli: The color plane to filter.
michael@0 100 _fragy0: The Y coordinate of the first fragment row to filter.
michael@0 101 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
michael@0 102 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
michael@0 103 signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
michael@0 104 OC_ALIGN8(unsigned char ll[8]);
michael@0 105 const oc_fragment_plane *fplane;
michael@0 106 const oc_fragment *frags;
michael@0 107 const ptrdiff_t *frag_buf_offs;
michael@0 108 unsigned char *ref_frame_data;
michael@0 109 ptrdiff_t fragi_top;
michael@0 110 ptrdiff_t fragi_bot;
michael@0 111 ptrdiff_t fragi0;
michael@0 112 ptrdiff_t fragi0_end;
michael@0 113 int ystride;
michael@0 114 int nhfrags;
michael@0 115 memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
michael@0 116 fplane=_state->fplanes+_pli;
michael@0 117 nhfrags=fplane->nhfrags;
michael@0 118 fragi_top=fplane->froffset;
michael@0 119 fragi_bot=fragi_top+fplane->nfrags;
michael@0 120 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
michael@0 121 fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
michael@0 122 ystride=_state->ref_ystride[_pli];
michael@0 123 frags=_state->frags;
michael@0 124 frag_buf_offs=_state->frag_buf_offs;
michael@0 125 ref_frame_data=_state->ref_frame_data[_refi];
michael@0 126 /*The following loops are constructed somewhat non-intuitively on purpose.
michael@0 127 The main idea is: if a block boundary has at least one coded fragment on
michael@0 128 it, the filter is applied to it.
michael@0 129 However, the order that the filters are applied in matters, and VP3 chose
michael@0 130 the somewhat strange ordering used below.*/
michael@0 131 while(fragi0<fragi0_end){
michael@0 132 ptrdiff_t fragi;
michael@0 133 ptrdiff_t fragi_end;
michael@0 134 fragi=fragi0;
michael@0 135 fragi_end=fragi+nhfrags;
michael@0 136 while(fragi<fragi_end){
michael@0 137 if(frags[fragi].coded){
michael@0 138 unsigned char *ref;
michael@0 139 ref=ref_frame_data+frag_buf_offs[fragi];
michael@0 140 if(fragi>fragi0){
michael@0 141 OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
michael@0 142 }
michael@0 143 if(fragi0>fragi_top){
michael@0 144 OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
michael@0 145 }
michael@0 146 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
michael@0 147 OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref+8,ystride,ll);
michael@0 148 }
michael@0 149 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
michael@0 150 OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref+(ystride<<3),ystride,ll);
michael@0 151 }
michael@0 152 }
michael@0 153 fragi++;
michael@0 154 }
michael@0 155 fragi0+=nhfrags;
michael@0 156 }
michael@0 157 }
michael@0 158
michael@0 159 void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit){
michael@0 160 memset(_bv,~(_flimit<<1),8);
michael@0 161 }
michael@0 162
michael@0 163 /*Apply the loop filter to a given set of fragment rows in the given plane.
michael@0 164 The filter may be run on the bottom edge, affecting pixels in the next row of
michael@0 165 fragments, so this row also needs to be available.
michael@0 166 _bv: The bounding values array.
michael@0 167 _refi: The index of the frame buffer to filter.
michael@0 168 _pli: The color plane to filter.
michael@0 169 _fragy0: The Y coordinate of the first fragment row to filter.
michael@0 170 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
michael@0 171 void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
michael@0 172 signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
michael@0 173 const oc_fragment_plane *fplane;
michael@0 174 const oc_fragment *frags;
michael@0 175 const ptrdiff_t *frag_buf_offs;
michael@0 176 unsigned char *ref_frame_data;
michael@0 177 ptrdiff_t fragi_top;
michael@0 178 ptrdiff_t fragi_bot;
michael@0 179 ptrdiff_t fragi0;
michael@0 180 ptrdiff_t fragi0_end;
michael@0 181 int ystride;
michael@0 182 int nhfrags;
michael@0 183 fplane=_state->fplanes+_pli;
michael@0 184 nhfrags=fplane->nhfrags;
michael@0 185 fragi_top=fplane->froffset;
michael@0 186 fragi_bot=fragi_top+fplane->nfrags;
michael@0 187 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
michael@0 188 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
michael@0 189 ystride=_state->ref_ystride[_pli];
michael@0 190 frags=_state->frags;
michael@0 191 frag_buf_offs=_state->frag_buf_offs;
michael@0 192 ref_frame_data=_state->ref_frame_data[_refi];
michael@0 193 /*The following loops are constructed somewhat non-intuitively on purpose.
michael@0 194 The main idea is: if a block boundary has at least one coded fragment on
michael@0 195 it, the filter is applied to it.
michael@0 196 However, the order that the filters are applied in matters, and VP3 chose
michael@0 197 the somewhat strange ordering used below.*/
michael@0 198 while(fragi0<fragi0_end){
michael@0 199 ptrdiff_t fragi;
michael@0 200 ptrdiff_t fragi_end;
michael@0 201 fragi=fragi0;
michael@0 202 fragi_end=fragi+nhfrags;
michael@0 203 while(fragi<fragi_end){
michael@0 204 if(frags[fragi].coded){
michael@0 205 unsigned char *ref;
michael@0 206 ref=ref_frame_data+frag_buf_offs[fragi];
michael@0 207 if(fragi>fragi0){
michael@0 208 OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
michael@0 209 }
michael@0 210 if(fragi0>fragi_top){
michael@0 211 OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
michael@0 212 }
michael@0 213 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
michael@0 214 OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref+8,ystride,_bv);
michael@0 215 }
michael@0 216 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
michael@0 217 OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref+(ystride<<3),ystride,_bv);
michael@0 218 }
michael@0 219 }
michael@0 220 fragi++;
michael@0 221 }
michael@0 222 fragi0+=nhfrags;
michael@0 223 }
michael@0 224 }
michael@0 225
michael@0 226 #endif

mercurial