media/libtheora/lib/x86_vc/mmxstate.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /********************************************************************
michael@0 2 * *
michael@0 3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
michael@0 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
michael@0 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
michael@0 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
michael@0 7 * *
michael@0 8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
michael@0 9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
michael@0 10 * *
michael@0 11 ********************************************************************
michael@0 12
michael@0 13 function:
michael@0 14 last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
michael@0 15
michael@0 16 ********************************************************************/
michael@0 17
michael@0 18 /*MMX acceleration of complete fragment reconstruction algorithm.
michael@0 19 Originally written by Rudolf Marek.*/
michael@0 20 #include <string.h>
michael@0 21 #include "x86int.h"
michael@0 22 #include "mmxloop.h"
michael@0 23
michael@0 24 #if defined(OC_X86_ASM)
michael@0 25
michael@0 26 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
michael@0 27 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
michael@0 28 unsigned char *dst;
michael@0 29 ptrdiff_t frag_buf_off;
michael@0 30 int ystride;
michael@0 31 int refi;
michael@0 32 /*Apply the inverse transform.*/
michael@0 33 /*Special case only having a DC component.*/
michael@0 34 if(_last_zzi<2){
michael@0 35 /*Note that this value must be unsigned, to keep the __asm__ block from
michael@0 36 sign-extending it when it puts it in a register.*/
michael@0 37 ogg_uint16_t p;
michael@0 38 /*We round this dequant product (and not any of the others) because there's
michael@0 39 no iDCT rounding.*/
michael@0 40 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
michael@0 41 /*Fill _dct_coeffs with p.*/
michael@0 42 __asm{
michael@0 43 #define Y eax
michael@0 44 #define P ecx
michael@0 45 mov Y,_dct_coeffs
michael@0 46 movzx P,p
michael@0 47 lea Y,[Y+128]
michael@0 48 /*mm0=0000 0000 0000 AAAA*/
michael@0 49 movd mm0,P
michael@0 50 /*mm0=0000 0000 AAAA AAAA*/
michael@0 51 punpcklwd mm0,mm0
michael@0 52 /*mm0=AAAA AAAA AAAA AAAA*/
michael@0 53 punpckldq mm0,mm0
michael@0 54 movq [Y],mm0
michael@0 55 movq [8+Y],mm0
michael@0 56 movq [16+Y],mm0
michael@0 57 movq [24+Y],mm0
michael@0 58 movq [32+Y],mm0
michael@0 59 movq [40+Y],mm0
michael@0 60 movq [48+Y],mm0
michael@0 61 movq [56+Y],mm0
michael@0 62 movq [64+Y],mm0
michael@0 63 movq [72+Y],mm0
michael@0 64 movq [80+Y],mm0
michael@0 65 movq [88+Y],mm0
michael@0 66 movq [96+Y],mm0
michael@0 67 movq [104+Y],mm0
michael@0 68 movq [112+Y],mm0
michael@0 69 movq [120+Y],mm0
michael@0 70 #undef Y
michael@0 71 #undef P
michael@0 72 }
michael@0 73 }
michael@0 74 else{
michael@0 75 /*Dequantize the DC coefficient.*/
michael@0 76 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
michael@0 77 oc_idct8x8_mmx(_dct_coeffs+64,_dct_coeffs,_last_zzi);
michael@0 78 }
michael@0 79 /*Fill in the target buffer.*/
michael@0 80 frag_buf_off=_state->frag_buf_offs[_fragi];
michael@0 81 refi=_state->frags[_fragi].refi;
michael@0 82 ystride=_state->ref_ystride[_pli];
michael@0 83 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
michael@0 84 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
michael@0 85 else{
michael@0 86 const unsigned char *ref;
michael@0 87 int mvoffsets[2];
michael@0 88 ref=_state->ref_frame_data[refi]+frag_buf_off;
michael@0 89 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
michael@0 90 _state->frag_mvs[_fragi])>1){
michael@0 91 oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
michael@0 92 _dct_coeffs+64);
michael@0 93 }
michael@0 94 else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
michael@0 95 }
michael@0 96 }
michael@0 97
michael@0 98 /*We copy these entire function to inline the actual MMX routines so that we
michael@0 99 use only a single indirect call.*/
michael@0 100
michael@0 101 void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
michael@0 102 memset(_bv,~(_flimit<<1),8);
michael@0 103 }
michael@0 104
michael@0 105 /*Apply the loop filter to a given set of fragment rows in the given plane.
michael@0 106 The filter may be run on the bottom edge, affecting pixels in the next row of
michael@0 107 fragments, so this row also needs to be available.
michael@0 108 _bv: The bounding values array.
michael@0 109 _refi: The index of the frame buffer to filter.
michael@0 110 _pli: The color plane to filter.
michael@0 111 _fragy0: The Y coordinate of the first fragment row to filter.
michael@0 112 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
michael@0 113 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
michael@0 114 signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
michael@0 115 const oc_fragment_plane *fplane;
michael@0 116 const oc_fragment *frags;
michael@0 117 const ptrdiff_t *frag_buf_offs;
michael@0 118 unsigned char *ref_frame_data;
michael@0 119 ptrdiff_t fragi_top;
michael@0 120 ptrdiff_t fragi_bot;
michael@0 121 ptrdiff_t fragi0;
michael@0 122 ptrdiff_t fragi0_end;
michael@0 123 int ystride;
michael@0 124 int nhfrags;
michael@0 125 fplane=_state->fplanes+_pli;
michael@0 126 nhfrags=fplane->nhfrags;
michael@0 127 fragi_top=fplane->froffset;
michael@0 128 fragi_bot=fragi_top+fplane->nfrags;
michael@0 129 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
michael@0 130 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
michael@0 131 ystride=_state->ref_ystride[_pli];
michael@0 132 frags=_state->frags;
michael@0 133 frag_buf_offs=_state->frag_buf_offs;
michael@0 134 ref_frame_data=_state->ref_frame_data[_refi];
michael@0 135 /*The following loops are constructed somewhat non-intuitively on purpose.
michael@0 136 The main idea is: if a block boundary has at least one coded fragment on
michael@0 137 it, the filter is applied to it.
michael@0 138 However, the order that the filters are applied in matters, and VP3 chose
michael@0 139 the somewhat strange ordering used below.*/
michael@0 140 while(fragi0<fragi0_end){
michael@0 141 ptrdiff_t fragi;
michael@0 142 ptrdiff_t fragi_end;
michael@0 143 fragi=fragi0;
michael@0 144 fragi_end=fragi+nhfrags;
michael@0 145 while(fragi<fragi_end){
michael@0 146 if(frags[fragi].coded){
michael@0 147 unsigned char *ref;
michael@0 148 ref=ref_frame_data+frag_buf_offs[fragi];
michael@0 149 #define PIX eax
michael@0 150 #define YSTRIDE3 edi
michael@0 151 #define YSTRIDE ecx
michael@0 152 #define LL edx
michael@0 153 #define D esi
michael@0 154 #define D_WORD si
michael@0 155 if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,_bv);
michael@0 156 if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,_bv);
michael@0 157 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
michael@0 158 OC_LOOP_FILTER_H_MMX(ref+8,ystride,_bv);
michael@0 159 }
michael@0 160 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
michael@0 161 OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,_bv);
michael@0 162 }
michael@0 163 #undef PIX
michael@0 164 #undef YSTRIDE3
michael@0 165 #undef YSTRIDE
michael@0 166 #undef LL
michael@0 167 #undef D
michael@0 168 #undef D_WORD
michael@0 169 }
michael@0 170 fragi++;
michael@0 171 }
michael@0 172 fragi0+=nhfrags;
michael@0 173 }
michael@0 174 }
michael@0 175
michael@0 176 #endif

mercurial