media/libtheora/lib/x86_vc/mmxstate.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /********************************************************************
     2  *                                                                  *
     3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     7  *                                                                  *
     8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
     9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    10  *                                                                  *
    11  ********************************************************************
    13   function:
    14     last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
    16  ********************************************************************/
    18 /*MMX acceleration of complete fragment reconstruction algorithm.
    19   Originally written by Rudolf Marek.*/
    20 #include <string.h>
    21 #include "x86int.h"
    22 #include "mmxloop.h"
    24 #if defined(OC_X86_ASM)
    26 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
    27  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
    28   unsigned char *dst;
    29   ptrdiff_t      frag_buf_off;
    30   int            ystride;
    31   int            refi;
    32   /*Apply the inverse transform.*/
    33   /*Special case only having a DC component.*/
    34   if(_last_zzi<2){
    35     /*Note that this value must be unsigned, to keep the __asm__ block from
    36        sign-extending it when it puts it in a register.*/
    37     ogg_uint16_t p;
    38     /*We round this dequant product (and not any of the others) because there's
    39        no iDCT rounding.*/
    40     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
    41     /*Fill _dct_coeffs with p.*/
    42     __asm{
    43 #define Y eax
    44 #define P ecx
    45       mov Y,_dct_coeffs
    46       movzx P,p
    47       lea Y,[Y+128]
    48       /*mm0=0000 0000 0000 AAAA*/
    49       movd mm0,P
    50       /*mm0=0000 0000 AAAA AAAA*/
    51       punpcklwd mm0,mm0
    52       /*mm0=AAAA AAAA AAAA AAAA*/
    53       punpckldq mm0,mm0
    54       movq [Y],mm0
    55       movq [8+Y],mm0
    56       movq [16+Y],mm0
    57       movq [24+Y],mm0
    58       movq [32+Y],mm0
    59       movq [40+Y],mm0
    60       movq [48+Y],mm0
    61       movq [56+Y],mm0
    62       movq [64+Y],mm0
    63       movq [72+Y],mm0
    64       movq [80+Y],mm0
    65       movq [88+Y],mm0
    66       movq [96+Y],mm0
    67       movq [104+Y],mm0
    68       movq [112+Y],mm0
    69       movq [120+Y],mm0
    70 #undef Y
    71 #undef P
    72     }
    73   }
    74   else{
    75     /*Dequantize the DC coefficient.*/
    76     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
    77     oc_idct8x8_mmx(_dct_coeffs+64,_dct_coeffs,_last_zzi);
    78   }
    79   /*Fill in the target buffer.*/
    80   frag_buf_off=_state->frag_buf_offs[_fragi];
    81   refi=_state->frags[_fragi].refi;
    82   ystride=_state->ref_ystride[_pli];
    83   dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
    84   if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
    85   else{
    86     const unsigned char *ref;
    87     int                  mvoffsets[2];
    88     ref=_state->ref_frame_data[refi]+frag_buf_off;
    89     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
    90      _state->frag_mvs[_fragi])>1){
    91       oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
    92        _dct_coeffs+64);
    93     }
    94     else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
    95   }
    96 }
    98 /*We copy these entire function to inline the actual MMX routines so that we
    99    use only a single indirect call.*/
   101 void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
   102   memset(_bv,~(_flimit<<1),8);
   103 }
   105 /*Apply the loop filter to a given set of fragment rows in the given plane.
   106   The filter may be run on the bottom edge, affecting pixels in the next row of
   107    fragments, so this row also needs to be available.
   108   _bv:        The bounding values array.
   109   _refi:      The index of the frame buffer to filter.
   110   _pli:       The color plane to filter.
   111   _fragy0:    The Y coordinate of the first fragment row to filter.
   112   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
   113 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
   114  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
   115   const oc_fragment_plane *fplane;
   116   const oc_fragment       *frags;
   117   const ptrdiff_t         *frag_buf_offs;
   118   unsigned char           *ref_frame_data;
   119   ptrdiff_t                fragi_top;
   120   ptrdiff_t                fragi_bot;
   121   ptrdiff_t                fragi0;
   122   ptrdiff_t                fragi0_end;
   123   int                      ystride;
   124   int                      nhfrags;
   125   fplane=_state->fplanes+_pli;
   126   nhfrags=fplane->nhfrags;
   127   fragi_top=fplane->froffset;
   128   fragi_bot=fragi_top+fplane->nfrags;
   129   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
   130   fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
   131   ystride=_state->ref_ystride[_pli];
   132   frags=_state->frags;
   133   frag_buf_offs=_state->frag_buf_offs;
   134   ref_frame_data=_state->ref_frame_data[_refi];
   135   /*The following loops are constructed somewhat non-intuitively on purpose.
   136     The main idea is: if a block boundary has at least one coded fragment on
   137      it, the filter is applied to it.
   138     However, the order that the filters are applied in matters, and VP3 chose
   139      the somewhat strange ordering used below.*/
   140   while(fragi0<fragi0_end){
   141     ptrdiff_t fragi;
   142     ptrdiff_t fragi_end;
   143     fragi=fragi0;
   144     fragi_end=fragi+nhfrags;
   145     while(fragi<fragi_end){
   146       if(frags[fragi].coded){
   147         unsigned char *ref;
   148         ref=ref_frame_data+frag_buf_offs[fragi];
   149 #define PIX eax
   150 #define YSTRIDE3 edi
   151 #define YSTRIDE ecx
   152 #define LL edx
   153 #define D esi
   154 #define D_WORD si
   155         if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,_bv);
   156         if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,_bv);
   157         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
   158           OC_LOOP_FILTER_H_MMX(ref+8,ystride,_bv);
   159         }
   160         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
   161           OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,_bv);
   162         }
   163 #undef PIX
   164 #undef YSTRIDE3
   165 #undef YSTRIDE
   166 #undef LL
   167 #undef D
   168 #undef D_WORD
   169       }
   170       fragi++;
   171     }
   172     fragi0+=nhfrags;
   173   }
   174 }
   176 #endif

mercurial