Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function:
14 last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
16 ********************************************************************/
18 /*MMX acceleration of complete fragment reconstruction algorithm.
19 Originally written by Rudolf Marek.*/
20 #include <string.h>
21 #include "x86int.h"
22 #include "mmxloop.h"
24 #if defined(OC_X86_ASM)
26 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
27 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
28 unsigned char *dst;
29 ptrdiff_t frag_buf_off;
30 int ystride;
31 int refi;
32 /*Apply the inverse transform.*/
33 /*Special case only having a DC component.*/
34 if(_last_zzi<2){
35 /*Note that this value must be unsigned, to keep the __asm__ block from
36 sign-extending it when it puts it in a register.*/
37 ogg_uint16_t p;
38 /*We round this dequant product (and not any of the others) because there's
39 no iDCT rounding.*/
40 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
41 /*Fill _dct_coeffs with p.*/
42 __asm{
43 #define Y eax
44 #define P ecx
45 mov Y,_dct_coeffs
46 movzx P,p
47 lea Y,[Y+128]
48 /*mm0=0000 0000 0000 AAAA*/
49 movd mm0,P
50 /*mm0=0000 0000 AAAA AAAA*/
51 punpcklwd mm0,mm0
52 /*mm0=AAAA AAAA AAAA AAAA*/
53 punpckldq mm0,mm0
54 movq [Y],mm0
55 movq [8+Y],mm0
56 movq [16+Y],mm0
57 movq [24+Y],mm0
58 movq [32+Y],mm0
59 movq [40+Y],mm0
60 movq [48+Y],mm0
61 movq [56+Y],mm0
62 movq [64+Y],mm0
63 movq [72+Y],mm0
64 movq [80+Y],mm0
65 movq [88+Y],mm0
66 movq [96+Y],mm0
67 movq [104+Y],mm0
68 movq [112+Y],mm0
69 movq [120+Y],mm0
70 #undef Y
71 #undef P
72 }
73 }
74 else{
75 /*Dequantize the DC coefficient.*/
76 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
77 oc_idct8x8_mmx(_dct_coeffs+64,_dct_coeffs,_last_zzi);
78 }
79 /*Fill in the target buffer.*/
80 frag_buf_off=_state->frag_buf_offs[_fragi];
81 refi=_state->frags[_fragi].refi;
82 ystride=_state->ref_ystride[_pli];
83 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
84 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
85 else{
86 const unsigned char *ref;
87 int mvoffsets[2];
88 ref=_state->ref_frame_data[refi]+frag_buf_off;
89 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
90 _state->frag_mvs[_fragi])>1){
91 oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
92 _dct_coeffs+64);
93 }
94 else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
95 }
96 }
98 /*We copy these entire function to inline the actual MMX routines so that we
99 use only a single indirect call.*/
101 void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
102 memset(_bv,~(_flimit<<1),8);
103 }
105 /*Apply the loop filter to a given set of fragment rows in the given plane.
106 The filter may be run on the bottom edge, affecting pixels in the next row of
107 fragments, so this row also needs to be available.
108 _bv: The bounding values array.
109 _refi: The index of the frame buffer to filter.
110 _pli: The color plane to filter.
111 _fragy0: The Y coordinate of the first fragment row to filter.
112 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
113 void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
114 signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
115 const oc_fragment_plane *fplane;
116 const oc_fragment *frags;
117 const ptrdiff_t *frag_buf_offs;
118 unsigned char *ref_frame_data;
119 ptrdiff_t fragi_top;
120 ptrdiff_t fragi_bot;
121 ptrdiff_t fragi0;
122 ptrdiff_t fragi0_end;
123 int ystride;
124 int nhfrags;
125 fplane=_state->fplanes+_pli;
126 nhfrags=fplane->nhfrags;
127 fragi_top=fplane->froffset;
128 fragi_bot=fragi_top+fplane->nfrags;
129 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
130 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
131 ystride=_state->ref_ystride[_pli];
132 frags=_state->frags;
133 frag_buf_offs=_state->frag_buf_offs;
134 ref_frame_data=_state->ref_frame_data[_refi];
135 /*The following loops are constructed somewhat non-intuitively on purpose.
136 The main idea is: if a block boundary has at least one coded fragment on
137 it, the filter is applied to it.
138 However, the order that the filters are applied in matters, and VP3 chose
139 the somewhat strange ordering used below.*/
140 while(fragi0<fragi0_end){
141 ptrdiff_t fragi;
142 ptrdiff_t fragi_end;
143 fragi=fragi0;
144 fragi_end=fragi+nhfrags;
145 while(fragi<fragi_end){
146 if(frags[fragi].coded){
147 unsigned char *ref;
148 ref=ref_frame_data+frag_buf_offs[fragi];
149 #define PIX eax
150 #define YSTRIDE3 edi
151 #define YSTRIDE ecx
152 #define LL edx
153 #define D esi
154 #define D_WORD si
155 if(fragi>fragi0)OC_LOOP_FILTER_H_MMX(ref,ystride,_bv);
156 if(fragi0>fragi_top)OC_LOOP_FILTER_V_MMX(ref,ystride,_bv);
157 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
158 OC_LOOP_FILTER_H_MMX(ref+8,ystride,_bv);
159 }
160 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
161 OC_LOOP_FILTER_V_MMX(ref+(ystride<<3),ystride,_bv);
162 }
163 #undef PIX
164 #undef YSTRIDE3
165 #undef YSTRIDE
166 #undef LL
167 #undef D
168 #undef D_WORD
169 }
170 fragi++;
171 }
172 fragi0+=nhfrags;
173 }
174 }
176 #endif