|
1 /******************************************************************** |
|
2 * * |
|
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
|
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
|
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
|
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
|
7 * * |
|
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
|
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
|
10 * * |
|
11 ******************************************************************** |
|
12 |
|
13 function: |
|
14 last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $ |
|
15 |
|
16 ********************************************************************/ |
|
17 |
|
18 #include <stdlib.h> |
|
19 #include <string.h> |
|
20 #include "state.h" |
|
21 #if defined(OC_DUMP_IMAGES) |
|
22 # include <stdio.h> |
|
23 # include "png.h" |
|
24 #endif |
|
25 |
|
26 /*The function used to fill in the chroma plane motion vectors for a macro |
|
27 block when 4 different motion vectors are specified in the luma plane. |
|
28 This version is for use with chroma decimated in the X and Y directions |
|
29 (4:2:0). |
|
30 _cbmvs: The chroma block-level motion vectors to fill in. |
|
31 _lbmvs: The luma block-level motion vectors.*/ |
|
32 static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
|
33 int dx; |
|
34 int dy; |
|
35 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]) |
|
36 +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); |
|
37 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]) |
|
38 +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); |
|
39 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2)); |
|
40 } |
|
41 |
|
42 /*The function used to fill in the chroma plane motion vectors for a macro |
|
43 block when 4 different motion vectors are specified in the luma plane. |
|
44 This version is for use with chroma decimated in the Y direction. |
|
45 _cbmvs: The chroma block-level motion vectors to fill in. |
|
46 _lbmvs: The luma block-level motion vectors.*/ |
|
47 static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
|
48 int dx; |
|
49 int dy; |
|
50 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]); |
|
51 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]); |
|
52 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
|
53 dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]); |
|
54 dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]); |
|
55 _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
|
56 } |
|
57 |
|
58 /*The function used to fill in the chroma plane motion vectors for a macro |
|
59 block when 4 different motion vectors are specified in the luma plane. |
|
60 This version is for use with chroma decimated in the X direction (4:2:2). |
|
61 _cbmvs: The chroma block-level motion vectors to fill in. |
|
62 _lbmvs: The luma block-level motion vectors.*/ |
|
63 static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
|
64 int dx; |
|
65 int dy; |
|
66 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]); |
|
67 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]); |
|
68 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
|
69 dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); |
|
70 dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); |
|
71 _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
|
72 } |
|
73 |
|
74 /*The function used to fill in the chroma plane motion vectors for a macro |
|
75 block when 4 different motion vectors are specified in the luma plane. |
|
76 This version is for use with no chroma decimation (4:4:4). |
|
77 _cbmvs: The chroma block-level motion vectors to fill in. |
|
78 _lmbmv: The luma macro-block level motion vector to fill in for use in |
|
79 prediction. |
|
80 _lbmvs: The luma block-level motion vectors.*/ |
|
81 static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
|
82 _cbmvs[0]=_lbmvs[0]; |
|
83 _cbmvs[1]=_lbmvs[1]; |
|
84 _cbmvs[2]=_lbmvs[2]; |
|
85 _cbmvs[3]=_lbmvs[3]; |
|
86 } |
|
87 |
|
88 /*A table of functions used to fill in the chroma plane motion vectors for a |
|
89 macro block when 4 different motion vectors are specified in the luma |
|
90 plane.*/ |
|
91 const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ |
|
92 (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, |
|
93 (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, |
|
94 (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, |
|
95 (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 |
|
96 }; |
|
97 |
|
98 |
|
99 |
|
100 /*Returns the fragment index of the top-left block in a macro block. |
|
101 This can be used to test whether or not the whole macro block is valid. |
|
102 _sb_map: The super block map. |
|
103 _quadi: The quadrant number. |
|
104 Return: The index of the fragment of the upper left block in the macro |
|
105 block, or -1 if the block lies outside the coded frame.*/ |
|
106 static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ |
|
107 /*It so happens that under the Hilbert curve ordering described below, the |
|
108 upper-left block in each macro block is at index 0, except in macro block |
|
109 3, where it is at index 2.*/ |
|
110 return _sb_map[_quadi][_quadi&_quadi<<1]; |
|
111 } |
|
112 |
|
113 /*Fills in the mapping from block positions to fragment numbers for a single |
|
114 color plane. |
|
115 This function also fills in the "valid" flag of each quadrant in the super |
|
116 block flags. |
|
117 _sb_maps: The array of super block maps for the color plane. |
|
118 _sb_flags: The array of super block flags for the color plane. |
|
119 _frag0: The index of the first fragment in the plane. |
|
120 _hfrags: The number of horizontal fragments in a coded frame. |
|
121 _vfrags: The number of vertical fragments in a coded frame.*/ |
|
122 static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], |
|
123 oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ |
|
124 /*Contains the (macro_block,block) indices for a 4x4 grid of |
|
125 fragments. |
|
126 The pattern is a 4x4 Hilbert space-filling curve. |
|
127 A Hilbert curve has the nice property that as the curve grows larger, its |
|
128 fractal dimension approaches 2. |
|
129 The intuition is that nearby blocks in the curve are also close spatially, |
|
130 with the previous element always an immediate neighbor, so that runs of |
|
131 blocks should be well correlated.*/ |
|
132 static const int SB_MAP[4][4][2]={ |
|
133 {{0,0},{0,1},{3,2},{3,3}}, |
|
134 {{0,3},{0,2},{3,1},{3,0}}, |
|
135 {{1,0},{1,3},{2,0},{2,3}}, |
|
136 {{1,1},{1,2},{2,1},{2,2}} |
|
137 }; |
|
138 ptrdiff_t yfrag; |
|
139 unsigned sbi; |
|
140 int y; |
|
141 sbi=0; |
|
142 yfrag=_frag0; |
|
143 for(y=0;;y+=4){ |
|
144 int imax; |
|
145 int x; |
|
146 /*Figure out how many columns of blocks in this super block lie within the |
|
147 image.*/ |
|
148 imax=_vfrags-y; |
|
149 if(imax>4)imax=4; |
|
150 else if(imax<=0)break; |
|
151 for(x=0;;x+=4,sbi++){ |
|
152 ptrdiff_t xfrag; |
|
153 int jmax; |
|
154 int quadi; |
|
155 int i; |
|
156 /*Figure out how many rows of blocks in this super block lie within the |
|
157 image.*/ |
|
158 jmax=_hfrags-x; |
|
159 if(jmax>4)jmax=4; |
|
160 else if(jmax<=0)break; |
|
161 /*By default, set all fragment indices to -1.*/ |
|
162 memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi])); |
|
163 /*Fill in the fragment map for this super block.*/ |
|
164 xfrag=yfrag+x; |
|
165 for(i=0;i<imax;i++){ |
|
166 int j; |
|
167 for(j=0;j<jmax;j++){ |
|
168 _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j; |
|
169 } |
|
170 xfrag+=_hfrags; |
|
171 } |
|
172 /*Mark which quadrants of this super block lie within the image.*/ |
|
173 for(quadi=0;quadi<4;quadi++){ |
|
174 _sb_flags[sbi].quad_valid|= |
|
175 (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi; |
|
176 } |
|
177 } |
|
178 yfrag+=_hfrags<<2; |
|
179 } |
|
180 } |
|
181 |
|
182 /*Fills in the Y plane fragment map for a macro block given the fragment |
|
183 coordinates of its upper-left hand corner. |
|
184 _mb_map: The macro block map to fill. |
|
185 _fplane: The description of the Y plane. |
|
186 _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
|
187 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
|
188 static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3], |
|
189 const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){ |
|
190 int i; |
|
191 int j; |
|
192 for(i=0;i<2;i++)for(j=0;j<2;j++){ |
|
193 _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j; |
|
194 } |
|
195 } |
|
196 |
|
197 /*Fills in the chroma plane fragment maps for a macro block. |
|
198 This version is for use with chroma decimated in the X and Y directions |
|
199 (4:2:0). |
|
200 _mb_map: The macro block map to fill. |
|
201 _fplanes: The descriptions of the fragment planes. |
|
202 _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
|
203 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
|
204 static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], |
|
205 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
|
206 ptrdiff_t fragi; |
|
207 _xfrag0>>=1; |
|
208 _yfrag0>>=1; |
|
209 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
|
210 _mb_map[1][0]=fragi+_fplanes[1].froffset; |
|
211 _mb_map[2][0]=fragi+_fplanes[2].froffset; |
|
212 } |
|
213 |
|
214 /*Fills in the chroma plane fragment maps for a macro block. |
|
215 This version is for use with chroma decimated in the Y direction. |
|
216 _mb_map: The macro block map to fill. |
|
217 _fplanes: The descriptions of the fragment planes. |
|
218 _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
|
219 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
|
220 static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], |
|
221 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
|
222 ptrdiff_t fragi; |
|
223 int j; |
|
224 _yfrag0>>=1; |
|
225 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
|
226 for(j=0;j<2;j++){ |
|
227 _mb_map[1][j]=fragi+_fplanes[1].froffset; |
|
228 _mb_map[2][j]=fragi+_fplanes[2].froffset; |
|
229 fragi++; |
|
230 } |
|
231 } |
|
232 |
|
233 /*Fills in the chroma plane fragment maps for a macro block. |
|
234 This version is for use with chroma decimated in the X direction (4:2:2). |
|
235 _mb_map: The macro block map to fill. |
|
236 _fplanes: The descriptions of the fragment planes. |
|
237 _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
|
238 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
|
239 static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], |
|
240 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
|
241 ptrdiff_t fragi; |
|
242 int i; |
|
243 _xfrag0>>=1; |
|
244 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
|
245 for(i=0;i<2;i++){ |
|
246 _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; |
|
247 _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; |
|
248 fragi+=_fplanes[1].nhfrags; |
|
249 } |
|
250 } |
|
251 |
|
252 /*Fills in the chroma plane fragment maps for a macro block. |
|
253 This version is for use with no chroma decimation (4:4:4). |
|
254 This uses the already filled-in luma plane values. |
|
255 _mb_map: The macro block map to fill. |
|
256 _fplanes: The descriptions of the fragment planes.*/ |
|
257 static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], |
|
258 const oc_fragment_plane _fplanes[3]){ |
|
259 int k; |
|
260 for(k=0;k<4;k++){ |
|
261 _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; |
|
262 _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; |
|
263 } |
|
264 } |
|
265 |
|
266 /*The function type used to fill in the chroma plane fragment maps for a |
|
267 macro block. |
|
268 _mb_map: The macro block map to fill. |
|
269 _fplanes: The descriptions of the fragment planes. |
|
270 _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
|
271 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
|
272 typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], |
|
273 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); |
|
274 |
|
275 /*A table of functions used to fill in the chroma plane fragment maps for a |
|
276 macro block for each type of chrominance decimation.*/ |
|
277 static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ |
|
278 oc_mb_fill_cmapping00, |
|
279 oc_mb_fill_cmapping01, |
|
280 oc_mb_fill_cmapping10, |
|
281 (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 |
|
282 }; |
|
283 |
|
284 /*Fills in the mapping from macro blocks to their corresponding fragment |
|
285 numbers in each plane. |
|
286 _mb_maps: The list of macro block maps. |
|
287 _mb_modes: The list of macro block modes; macro blocks completely outside |
|
288 the coded region are marked invalid. |
|
289 _fplanes: The descriptions of the fragment planes. |
|
290 _pixel_fmt: The chroma decimation type.*/ |
|
291 static void oc_mb_create_mapping(oc_mb_map _mb_maps[], |
|
292 signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ |
|
293 oc_mb_fill_cmapping_func mb_fill_cmapping; |
|
294 unsigned sbi; |
|
295 int y; |
|
296 mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; |
|
297 /*Loop through the luma plane super blocks.*/ |
|
298 for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ |
|
299 int x; |
|
300 for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ |
|
301 int ymb; |
|
302 /*Loop through the macro blocks in each super block in display order.*/ |
|
303 for(ymb=0;ymb<2;ymb++){ |
|
304 int xmb; |
|
305 for(xmb=0;xmb<2;xmb++){ |
|
306 unsigned mbi; |
|
307 int mbx; |
|
308 int mby; |
|
309 mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; |
|
310 mbx=x|xmb<<1; |
|
311 mby=y|ymb<<1; |
|
312 /*Initialize fragment indices to -1.*/ |
|
313 memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); |
|
314 /*Make sure this macro block is within the encoded region.*/ |
|
315 if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ |
|
316 _mb_modes[mbi]=OC_MODE_INVALID; |
|
317 continue; |
|
318 } |
|
319 /*Fill in the fragment indices for the luma plane.*/ |
|
320 oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); |
|
321 /*Fill in the fragment indices for the chroma planes.*/ |
|
322 (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); |
|
323 } |
|
324 } |
|
325 } |
|
326 } |
|
327 } |
|
328 |
|
329 /*Marks the fragments which fall all or partially outside the displayable |
|
330 region of the frame. |
|
331 _state: The Theora state containing the fragments to be marked.*/ |
|
332 static void oc_state_border_init(oc_theora_state *_state){ |
|
333 oc_fragment *frag; |
|
334 oc_fragment *yfrag_end; |
|
335 oc_fragment *xfrag_end; |
|
336 oc_fragment_plane *fplane; |
|
337 int crop_x0; |
|
338 int crop_y0; |
|
339 int crop_xf; |
|
340 int crop_yf; |
|
341 int pli; |
|
342 int y; |
|
343 int x; |
|
344 /*The method we use here is slow, but the code is dead simple and handles |
|
345 all the special cases easily. |
|
346 We only ever need to do it once.*/ |
|
347 /*Loop through the fragments, marking those completely outside the |
|
348 displayable region and constructing a border mask for those that straddle |
|
349 the border.*/ |
|
350 _state->nborders=0; |
|
351 yfrag_end=frag=_state->frags; |
|
352 for(pli=0;pli<3;pli++){ |
|
353 fplane=_state->fplanes+pli; |
|
354 /*Set up the cropping rectangle for this plane.*/ |
|
355 crop_x0=_state->info.pic_x; |
|
356 crop_xf=_state->info.pic_x+_state->info.pic_width; |
|
357 crop_y0=_state->info.pic_y; |
|
358 crop_yf=_state->info.pic_y+_state->info.pic_height; |
|
359 if(pli>0){ |
|
360 if(!(_state->info.pixel_fmt&1)){ |
|
361 crop_x0=crop_x0>>1; |
|
362 crop_xf=crop_xf+1>>1; |
|
363 } |
|
364 if(!(_state->info.pixel_fmt&2)){ |
|
365 crop_y0=crop_y0>>1; |
|
366 crop_yf=crop_yf+1>>1; |
|
367 } |
|
368 } |
|
369 y=0; |
|
370 for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){ |
|
371 x=0; |
|
372 for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){ |
|
373 /*First check to see if this fragment is completely outside the |
|
374 displayable region.*/ |
|
375 /*Note the special checks for an empty cropping rectangle. |
|
376 This guarantees that if we count a fragment as straddling the |
|
377 border below, at least one pixel in the fragment will be inside |
|
378 the displayable region.*/ |
|
379 if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y|| |
|
380 crop_x0>=crop_xf||crop_y0>=crop_yf){ |
|
381 frag->invalid=1; |
|
382 } |
|
383 /*Otherwise, check to see if it straddles the border.*/ |
|
384 else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8|| |
|
385 y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){ |
|
386 ogg_int64_t mask; |
|
387 int npixels; |
|
388 int i; |
|
389 mask=npixels=0; |
|
390 for(i=0;i<8;i++){ |
|
391 int j; |
|
392 for(j=0;j<8;j++){ |
|
393 if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){ |
|
394 mask|=(ogg_int64_t)1<<(i<<3|j); |
|
395 npixels++; |
|
396 } |
|
397 } |
|
398 } |
|
399 /*Search the fragment array for border info with the same pattern. |
|
400 In general, there will be at most 8 different patterns (per |
|
401 plane).*/ |
|
402 for(i=0;;i++){ |
|
403 if(i>=_state->nborders){ |
|
404 _state->nborders++; |
|
405 _state->borders[i].mask=mask; |
|
406 _state->borders[i].npixels=npixels; |
|
407 } |
|
408 else if(_state->borders[i].mask!=mask)continue; |
|
409 frag->borderi=i; |
|
410 break; |
|
411 } |
|
412 } |
|
413 else frag->borderi=-1; |
|
414 } |
|
415 } |
|
416 } |
|
417 } |
|
418 |
|
419 static int oc_state_frarray_init(oc_theora_state *_state){ |
|
420 int yhfrags; |
|
421 int yvfrags; |
|
422 int chfrags; |
|
423 int cvfrags; |
|
424 ptrdiff_t yfrags; |
|
425 ptrdiff_t cfrags; |
|
426 ptrdiff_t nfrags; |
|
427 unsigned yhsbs; |
|
428 unsigned yvsbs; |
|
429 unsigned chsbs; |
|
430 unsigned cvsbs; |
|
431 unsigned ysbs; |
|
432 unsigned csbs; |
|
433 unsigned nsbs; |
|
434 size_t nmbs; |
|
435 int hdec; |
|
436 int vdec; |
|
437 int pli; |
|
438 /*Figure out the number of fragments in each plane.*/ |
|
439 /*These parameters have already been validated to be multiples of 16.*/ |
|
440 yhfrags=_state->info.frame_width>>3; |
|
441 yvfrags=_state->info.frame_height>>3; |
|
442 hdec=!(_state->info.pixel_fmt&1); |
|
443 vdec=!(_state->info.pixel_fmt&2); |
|
444 chfrags=yhfrags+hdec>>hdec; |
|
445 cvfrags=yvfrags+vdec>>vdec; |
|
446 yfrags=yhfrags*(ptrdiff_t)yvfrags; |
|
447 cfrags=chfrags*(ptrdiff_t)cvfrags; |
|
448 nfrags=yfrags+2*cfrags; |
|
449 /*Figure out the number of super blocks in each plane.*/ |
|
450 yhsbs=yhfrags+3>>2; |
|
451 yvsbs=yvfrags+3>>2; |
|
452 chsbs=chfrags+3>>2; |
|
453 cvsbs=cvfrags+3>>2; |
|
454 ysbs=yhsbs*yvsbs; |
|
455 csbs=chsbs*cvsbs; |
|
456 nsbs=ysbs+2*csbs; |
|
457 nmbs=(size_t)ysbs<<2; |
|
458 /*Check for overflow. |
|
459 We support the ridiculous upper limits of the specification (1048560 by |
|
460 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, |
|
461 but for those with 32-bit pointers (or smaller!) we have to check. |
|
462 If the caller wants to prevent denial-of-service by imposing a more |
|
463 reasonable upper limit on the size of attempted allocations, they must do |
|
464 so themselves; we have no platform independent way to determine how much |
|
465 system memory there is nor an application-independent way to decide what a |
|
466 "reasonable" allocation is.*/ |
|
467 if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags|| |
|
468 ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){ |
|
469 return TH_EIMPL; |
|
470 } |
|
471 /*Initialize the fragment array.*/ |
|
472 _state->fplanes[0].nhfrags=yhfrags; |
|
473 _state->fplanes[0].nvfrags=yvfrags; |
|
474 _state->fplanes[0].froffset=0; |
|
475 _state->fplanes[0].nfrags=yfrags; |
|
476 _state->fplanes[0].nhsbs=yhsbs; |
|
477 _state->fplanes[0].nvsbs=yvsbs; |
|
478 _state->fplanes[0].sboffset=0; |
|
479 _state->fplanes[0].nsbs=ysbs; |
|
480 _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; |
|
481 _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; |
|
482 _state->fplanes[1].froffset=yfrags; |
|
483 _state->fplanes[2].froffset=yfrags+cfrags; |
|
484 _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; |
|
485 _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; |
|
486 _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; |
|
487 _state->fplanes[1].sboffset=ysbs; |
|
488 _state->fplanes[2].sboffset=ysbs+csbs; |
|
489 _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; |
|
490 _state->nfrags=nfrags; |
|
491 _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); |
|
492 _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); |
|
493 _state->nsbs=nsbs; |
|
494 _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); |
|
495 _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); |
|
496 _state->nhmbs=yhsbs<<1; |
|
497 _state->nvmbs=yvsbs<<1; |
|
498 _state->nmbs=nmbs; |
|
499 _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); |
|
500 _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); |
|
501 _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); |
|
502 if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| |
|
503 _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| |
|
504 _state->coded_fragis==NULL){ |
|
505 return TH_EFAULT; |
|
506 } |
|
507 /*Create the mapping from super blocks to fragments.*/ |
|
508 for(pli=0;pli<3;pli++){ |
|
509 oc_fragment_plane *fplane; |
|
510 fplane=_state->fplanes+pli; |
|
511 oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, |
|
512 _state->sb_flags+fplane->sboffset,fplane->froffset, |
|
513 fplane->nhfrags,fplane->nvfrags); |
|
514 } |
|
515 /*Create the mapping from macro blocks to fragments.*/ |
|
516 oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, |
|
517 _state->fplanes,_state->info.pixel_fmt); |
|
518 /*Initialize the invalid and borderi fields of each fragment.*/ |
|
519 oc_state_border_init(_state); |
|
520 return 0; |
|
521 } |
|
522 |
|
523 static void oc_state_frarray_clear(oc_theora_state *_state){ |
|
524 _ogg_free(_state->coded_fragis); |
|
525 _ogg_free(_state->mb_modes); |
|
526 _ogg_free(_state->mb_maps); |
|
527 _ogg_free(_state->sb_flags); |
|
528 _ogg_free(_state->sb_maps); |
|
529 _ogg_free(_state->frag_mvs); |
|
530 _ogg_free(_state->frags); |
|
531 } |
|
532 |
|
533 |
|
534 /*Initializes the buffers used for reconstructed frames. |
|
535 These buffers are padded with 16 extra pixels on each side, to allow |
|
536 unrestricted motion vectors without special casing the boundary. |
|
537 If chroma is decimated in either direction, the padding is reduced by a |
|
538 factor of 2 on the appropriate sides. |
|
539 _nrefs: The number of reference buffers to init; must be in the range 3...6.*/ |
|
540 static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ |
|
541 th_info *info; |
|
542 unsigned char *ref_frame_data; |
|
543 size_t ref_frame_data_sz; |
|
544 size_t ref_frame_sz; |
|
545 size_t yplane_sz; |
|
546 size_t cplane_sz; |
|
547 int yhstride; |
|
548 int yheight; |
|
549 int chstride; |
|
550 int cheight; |
|
551 ptrdiff_t align; |
|
552 ptrdiff_t yoffset; |
|
553 ptrdiff_t coffset; |
|
554 ptrdiff_t *frag_buf_offs; |
|
555 ptrdiff_t fragi; |
|
556 int hdec; |
|
557 int vdec; |
|
558 int rfi; |
|
559 int pli; |
|
560 if(_nrefs<3||_nrefs>6)return TH_EINVAL; |
|
561 info=&_state->info; |
|
562 /*Compute the image buffer parameters for each plane.*/ |
|
563 hdec=!(info->pixel_fmt&1); |
|
564 vdec=!(info->pixel_fmt&2); |
|
565 yhstride=info->frame_width+2*OC_UMV_PADDING; |
|
566 yheight=info->frame_height+2*OC_UMV_PADDING; |
|
567 /*Require 16-byte aligned rows in the chroma planes.*/ |
|
568 chstride=(yhstride>>hdec)+15&~15; |
|
569 cheight=yheight>>vdec; |
|
570 yplane_sz=yhstride*(size_t)yheight; |
|
571 cplane_sz=chstride*(size_t)cheight; |
|
572 yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; |
|
573 coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; |
|
574 /*Although we guarantee the rows of the chroma planes are a multiple of 16 |
|
575 bytes, the initial padding on the first row may only be 8 bytes. |
|
576 Compute the offset needed to the actual image data to a multiple of 16.*/ |
|
577 align=-coffset&15; |
|
578 ref_frame_sz=yplane_sz+2*cplane_sz+16; |
|
579 ref_frame_data_sz=_nrefs*ref_frame_sz; |
|
580 /*Check for overflow. |
|
581 The same caveats apply as for oc_state_frarray_init().*/ |
|
582 if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz|| |
|
583 ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){ |
|
584 return TH_EIMPL; |
|
585 } |
|
586 ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16); |
|
587 frag_buf_offs=_state->frag_buf_offs= |
|
588 _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); |
|
589 if(ref_frame_data==NULL||frag_buf_offs==NULL){ |
|
590 _ogg_free(frag_buf_offs); |
|
591 oc_aligned_free(ref_frame_data); |
|
592 return TH_EFAULT; |
|
593 } |
|
594 /*Set up the width, height and stride for the image buffers.*/ |
|
595 _state->ref_frame_bufs[0][0].width=info->frame_width; |
|
596 _state->ref_frame_bufs[0][0].height=info->frame_height; |
|
597 _state->ref_frame_bufs[0][0].stride=yhstride; |
|
598 _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= |
|
599 info->frame_width>>hdec; |
|
600 _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= |
|
601 info->frame_height>>vdec; |
|
602 _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= |
|
603 chstride; |
|
604 for(rfi=1;rfi<_nrefs;rfi++){ |
|
605 memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], |
|
606 sizeof(_state->ref_frame_bufs[0])); |
|
607 } |
|
608 _state->ref_frame_handle=ref_frame_data; |
|
609 /*Set up the data pointers for the image buffers.*/ |
|
610 for(rfi=0;rfi<_nrefs;rfi++){ |
|
611 _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; |
|
612 ref_frame_data+=yplane_sz+align; |
|
613 _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; |
|
614 ref_frame_data+=cplane_sz; |
|
615 _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; |
|
616 ref_frame_data+=cplane_sz+(16-align); |
|
617 /*Flip the buffer upside down. |
|
618 This allows us to decode Theora's bottom-up frames in their natural |
|
619 order, yet return a top-down buffer with a positive stride to the user.*/ |
|
620 oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], |
|
621 _state->ref_frame_bufs[rfi]); |
|
622 } |
|
623 _state->ref_ystride[0]=-yhstride; |
|
624 _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; |
|
625 /*Initialize the fragment buffer offsets.*/ |
|
626 ref_frame_data=_state->ref_frame_bufs[0][0].data; |
|
627 fragi=0; |
|
628 for(pli=0;pli<3;pli++){ |
|
629 th_img_plane *iplane; |
|
630 oc_fragment_plane *fplane; |
|
631 unsigned char *vpix; |
|
632 ptrdiff_t stride; |
|
633 ptrdiff_t vfragi_end; |
|
634 int nhfrags; |
|
635 iplane=_state->ref_frame_bufs[0]+pli; |
|
636 fplane=_state->fplanes+pli; |
|
637 vpix=iplane->data; |
|
638 vfragi_end=fplane->froffset+fplane->nfrags; |
|
639 nhfrags=fplane->nhfrags; |
|
640 stride=iplane->stride; |
|
641 while(fragi<vfragi_end){ |
|
642 ptrdiff_t hfragi_end; |
|
643 unsigned char *hpix; |
|
644 hpix=vpix; |
|
645 for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){ |
|
646 frag_buf_offs[fragi]=hpix-ref_frame_data; |
|
647 hpix+=8; |
|
648 } |
|
649 vpix+=stride<<3; |
|
650 } |
|
651 } |
|
652 /*Initialize the reference frame pointers and indices.*/ |
|
653 _state->ref_frame_idx[OC_FRAME_GOLD]= |
|
654 _state->ref_frame_idx[OC_FRAME_PREV]= |
|
655 _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]= |
|
656 _state->ref_frame_idx[OC_FRAME_PREV_ORIG]= |
|
657 _state->ref_frame_idx[OC_FRAME_SELF]= |
|
658 _state->ref_frame_idx[OC_FRAME_IO]=-1; |
|
659 _state->ref_frame_data[OC_FRAME_GOLD]= |
|
660 _state->ref_frame_data[OC_FRAME_PREV]= |
|
661 _state->ref_frame_data[OC_FRAME_GOLD_ORIG]= |
|
662 _state->ref_frame_data[OC_FRAME_PREV_ORIG]= |
|
663 _state->ref_frame_data[OC_FRAME_SELF]= |
|
664 _state->ref_frame_data[OC_FRAME_IO]=NULL; |
|
665 return 0; |
|
666 } |
|
667 |
|
668 static void oc_state_ref_bufs_clear(oc_theora_state *_state){ |
|
669 _ogg_free(_state->frag_buf_offs); |
|
670 oc_aligned_free(_state->ref_frame_handle); |
|
671 } |
|
672 |
|
673 |
|
674 void oc_state_accel_init_c(oc_theora_state *_state){ |
|
675 _state->cpu_flags=0; |
|
676 #if defined(OC_STATE_USE_VTABLE) |
|
677 _state->opt_vtable.frag_copy=oc_frag_copy_c; |
|
678 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c; |
|
679 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; |
|
680 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; |
|
681 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; |
|
682 _state->opt_vtable.idct8x8=oc_idct8x8_c; |
|
683 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; |
|
684 _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c; |
|
685 _state->opt_vtable.state_loop_filter_frag_rows= |
|
686 oc_state_loop_filter_frag_rows_c; |
|
687 _state->opt_vtable.restore_fpu=oc_restore_fpu_c; |
|
688 #endif |
|
689 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; |
|
690 } |
|
691 |
|
692 |
|
693 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ |
|
694 int ret; |
|
695 /*First validate the parameters.*/ |
|
696 if(_info==NULL)return TH_EFAULT; |
|
697 /*The width and height of the encoded frame must be multiples of 16. |
|
698 They must also, when divided by 16, fit into a 16-bit unsigned integer. |
|
699 The displayable frame offset coordinates must fit into an 8-bit unsigned |
|
700 integer. |
|
701 Note that the offset Y in the API is specified on the opposite side from |
|
702 how it is specified in the bitstream, because the Y axis is flipped in |
|
703 the bitstream. |
|
704 The displayable frame must fit inside the encoded frame. |
|
705 The color space must be one known by the encoder.*/ |
|
706 if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| |
|
707 _info->frame_width<=0||_info->frame_width>=0x100000|| |
|
708 _info->frame_height<=0||_info->frame_height>=0x100000|| |
|
709 _info->pic_x+_info->pic_width>_info->frame_width|| |
|
710 _info->pic_y+_info->pic_height>_info->frame_height|| |
|
711 _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| |
|
712 /*Note: the following <0 comparisons may generate spurious warnings on |
|
713 platforms where enums are unsigned. |
|
714 We could cast them to unsigned and just use the following >= comparison, |
|
715 but there are a number of compilers which will mis-optimize this. |
|
716 It's better to live with the spurious warnings.*/ |
|
717 _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| |
|
718 _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ |
|
719 return TH_EINVAL; |
|
720 } |
|
721 memset(_state,0,sizeof(*_state)); |
|
722 memcpy(&_state->info,_info,sizeof(*_info)); |
|
723 /*Invert the sense of pic_y to match Theora's right-handed coordinate |
|
724 system.*/ |
|
725 _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; |
|
726 _state->frame_type=OC_UNKWN_FRAME; |
|
727 oc_state_accel_init(_state); |
|
728 ret=oc_state_frarray_init(_state); |
|
729 if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); |
|
730 if(ret<0){ |
|
731 oc_state_frarray_clear(_state); |
|
732 return ret; |
|
733 } |
|
734 /*If the keyframe_granule_shift is out of range, use the maximum allowable |
|
735 value.*/ |
|
736 if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ |
|
737 _state->info.keyframe_granule_shift=31; |
|
738 } |
|
739 _state->keyframe_num=0; |
|
740 _state->curframe_num=-1; |
|
741 /*3.2.0 streams mark the frame index instead of the frame count. |
|
742 This was changed with stream version 3.2.1 to conform to other Ogg |
|
743 codecs. |
|
744 We add an extra bias when computing granule positions for new streams.*/ |
|
745 _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); |
|
746 return 0; |
|
747 } |
|
748 |
|
749 void oc_state_clear(oc_theora_state *_state){ |
|
750 oc_state_ref_bufs_clear(_state); |
|
751 oc_state_frarray_clear(_state); |
|
752 } |
|
753 |
|
754 |
|
755 /*Duplicates the pixels on the border of the image plane out into the |
|
756 surrounding padding for use by unrestricted motion vectors. |
|
757 This function only adds the left and right borders, and only for the fragment |
|
758 rows specified. |
|
759 _refi: The index of the reference buffer to pad. |
|
760 _pli: The color plane. |
|
761 _y0: The Y coordinate of the first row to pad. |
|
762 _yend: The Y coordinate of the row to stop padding at.*/ |
|
763 void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, |
|
764 int _y0,int _yend){ |
|
765 th_img_plane *iplane; |
|
766 unsigned char *apix; |
|
767 unsigned char *bpix; |
|
768 unsigned char *epix; |
|
769 int stride; |
|
770 int hpadding; |
|
771 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); |
|
772 iplane=_state->ref_frame_bufs[_refi]+_pli; |
|
773 stride=iplane->stride; |
|
774 apix=iplane->data+_y0*(ptrdiff_t)stride; |
|
775 bpix=apix+iplane->width-1; |
|
776 epix=iplane->data+_yend*(ptrdiff_t)stride; |
|
777 /*Note the use of != instead of <, which allows the stride to be negative.*/ |
|
778 while(apix!=epix){ |
|
779 memset(apix-hpadding,apix[0],hpadding); |
|
780 memset(bpix+1,bpix[0],hpadding); |
|
781 apix+=stride; |
|
782 bpix+=stride; |
|
783 } |
|
784 } |
|
785 |
|
786 /*Duplicates the pixels on the border of the image plane out into the |
|
787 surrounding padding for use by unrestricted motion vectors. |
|
788 This function only adds the top and bottom borders, and must be called after |
|
789 the left and right borders are added. |
|
790 _refi: The index of the reference buffer to pad. |
|
791 _pli: The color plane.*/ |
|
792 void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ |
|
793 th_img_plane *iplane; |
|
794 unsigned char *apix; |
|
795 unsigned char *bpix; |
|
796 unsigned char *epix; |
|
797 int stride; |
|
798 int hpadding; |
|
799 int vpadding; |
|
800 int fullw; |
|
801 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); |
|
802 vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); |
|
803 iplane=_state->ref_frame_bufs[_refi]+_pli; |
|
804 stride=iplane->stride; |
|
805 fullw=iplane->width+(hpadding<<1); |
|
806 apix=iplane->data-hpadding; |
|
807 bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; |
|
808 epix=apix-stride*(ptrdiff_t)vpadding; |
|
809 while(apix!=epix){ |
|
810 memcpy(apix-stride,apix,fullw); |
|
811 memcpy(bpix+stride,bpix,fullw); |
|
812 apix-=stride; |
|
813 bpix+=stride; |
|
814 } |
|
815 } |
|
816 |
|
817 /*Duplicates the pixels on the border of the given reference image out into |
|
818 the surrounding padding for use by unrestricted motion vectors. |
|
819 _state: The context containing the reference buffers. |
|
820 _refi: The index of the reference buffer to pad.*/ |
|
821 void oc_state_borders_fill(oc_theora_state *_state,int _refi){ |
|
822 int pli; |
|
823 for(pli=0;pli<3;pli++){ |
|
824 oc_state_borders_fill_rows(_state,_refi,pli,0, |
|
825 _state->ref_frame_bufs[_refi][pli].height); |
|
826 oc_state_borders_fill_caps(_state,_refi,pli); |
|
827 } |
|
828 } |
|
829 |
|
830 /*Determines the offsets in an image buffer to use for motion compensation. |
|
831 _state: The Theora state the offsets are to be computed with. |
|
832 _offsets: Returns the offset for the buffer(s). |
|
833 _offsets[0] is always set. |
|
834 _offsets[1] is set if the motion vector has non-zero fractional |
|
835 components. |
|
836 _pli: The color plane index. |
|
837 _mv: The motion vector. |
|
838 Return: The number of offsets returned: 1 or 2.*/ |
|
839 int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], |
|
840 int _pli,oc_mv _mv){ |
|
841 /*Here is a brief description of how Theora handles motion vectors: |
|
842 Motion vector components are specified to half-pixel accuracy in |
|
843 undecimated directions of each plane, and quarter-pixel accuracy in |
|
844 decimated directions. |
|
845 Integer parts are extracted by dividing (not shifting) by the |
|
846 appropriate amount, with truncation towards zero. |
|
847 These integer values are used to calculate the first offset. |
|
848 |
|
849 If either of the fractional parts are non-zero, then a second offset is |
|
850 computed. |
|
851 No third or fourth offsets are computed, even if both components have |
|
852 non-zero fractional parts. |
|
853 The second offset is computed by dividing (not shifting) by the |
|
854 appropriate amount, always truncating _away_ from zero.*/ |
|
855 #if 0 |
|
856 /*This version of the code doesn't use any tables, but is slower.*/ |
|
857 int ystride; |
|
858 int xprec; |
|
859 int yprec; |
|
860 int xfrac; |
|
861 int yfrac; |
|
862 int offs; |
|
863 int dx; |
|
864 int dy; |
|
865 ystride=_state->ref_ystride[_pli]; |
|
866 /*These two variables decide whether we are in half- or quarter-pixel |
|
867 precision in each component.*/ |
|
868 xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); |
|
869 yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); |
|
870 dx=OC_MV_X(_mv); |
|
871 dy=OC_MV_Y(_mv); |
|
872 /*These two variables are either 0 if all the fractional bits are zero or -1 |
|
873 if any of them are non-zero.*/ |
|
874 xfrac=OC_SIGNMASK(-(dx&(xprec|1))); |
|
875 yfrac=OC_SIGNMASK(-(dy&(yprec|1))); |
|
876 offs=(dx>>xprec)+(dy>>yprec)*ystride; |
|
877 if(xfrac||yfrac){ |
|
878 int xmask; |
|
879 int ymask; |
|
880 xmask=OC_SIGNMASK(dx); |
|
881 ymask=OC_SIGNMASK(dy); |
|
882 yfrac&=ystride; |
|
883 _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); |
|
884 _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); |
|
885 return 2; |
|
886 } |
|
887 else{ |
|
888 _offsets[0]=offs; |
|
889 return 1; |
|
890 } |
|
891 #else |
|
892 /*Using tables simplifies the code, and there's enough arithmetic to hide the |
|
893 latencies of the memory references.*/ |
|
894 static const signed char OC_MVMAP[2][64]={ |
|
895 { |
|
896 -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, |
|
897 -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, |
|
898 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, |
|
899 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 |
|
900 }, |
|
901 { |
|
902 -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, |
|
903 -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, |
|
904 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, |
|
905 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 |
|
906 } |
|
907 }; |
|
908 static const signed char OC_MVMAP2[2][64]={ |
|
909 { |
|
910 -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, |
|
911 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, |
|
912 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
|
913 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 |
|
914 }, |
|
915 { |
|
916 -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, |
|
917 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, |
|
918 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, |
|
919 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 |
|
920 } |
|
921 }; |
|
922 int ystride; |
|
923 int qpx; |
|
924 int qpy; |
|
925 int mx; |
|
926 int my; |
|
927 int mx2; |
|
928 int my2; |
|
929 int offs; |
|
930 int dx; |
|
931 int dy; |
|
932 ystride=_state->ref_ystride[_pli]; |
|
933 qpy=_pli!=0&&!(_state->info.pixel_fmt&2); |
|
934 dx=OC_MV_X(_mv); |
|
935 dy=OC_MV_Y(_mv); |
|
936 my=OC_MVMAP[qpy][dy+31]; |
|
937 my2=OC_MVMAP2[qpy][dy+31]; |
|
938 qpx=_pli!=0&&!(_state->info.pixel_fmt&1); |
|
939 mx=OC_MVMAP[qpx][dx+31]; |
|
940 mx2=OC_MVMAP2[qpx][dx+31]; |
|
941 offs=my*ystride+mx; |
|
942 if(mx2||my2){ |
|
943 _offsets[1]=offs+my2*ystride+mx2; |
|
944 _offsets[0]=offs; |
|
945 return 2; |
|
946 } |
|
947 _offsets[0]=offs; |
|
948 return 1; |
|
949 #endif |
|
950 } |
|
951 |
|
952 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, |
|
953 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
|
954 unsigned char *dst; |
|
955 ptrdiff_t frag_buf_off; |
|
956 int ystride; |
|
957 int refi; |
|
958 /*Apply the inverse transform.*/ |
|
959 /*Special case only having a DC component.*/ |
|
960 if(_last_zzi<2){ |
|
961 ogg_int16_t p; |
|
962 int ci; |
|
963 /*We round this dequant product (and not any of the others) because there's |
|
964 no iDCT rounding.*/ |
|
965 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
|
966 /*LOOP VECTORIZES.*/ |
|
967 for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p; |
|
968 } |
|
969 else{ |
|
970 /*First, dequantize the DC coefficient.*/ |
|
971 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
|
972 oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi); |
|
973 } |
|
974 /*Fill in the target buffer.*/ |
|
975 frag_buf_off=_state->frag_buf_offs[_fragi]; |
|
976 refi=_state->frags[_fragi].refi; |
|
977 ystride=_state->ref_ystride[_pli]; |
|
978 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
|
979 if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64); |
|
980 else{ |
|
981 const unsigned char *ref; |
|
982 int mvoffsets[2]; |
|
983 ref=_state->ref_frame_data[refi]+frag_buf_off; |
|
984 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
|
985 _state->frag_mvs[_fragi])>1){ |
|
986 oc_frag_recon_inter2(_state, |
|
987 dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64); |
|
988 } |
|
989 else{ |
|
990 oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
|
991 } |
|
992 } |
|
993 } |
|
994 |
|
995 static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){ |
|
996 int y; |
|
997 _pix-=2; |
|
998 for(y=0;y<8;y++){ |
|
999 int f; |
|
1000 f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); |
|
1001 /*The _bv array is used to compute the function |
|
1002 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); |
|
1003 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ |
|
1004 f=*(_bv+(f+4>>3)); |
|
1005 _pix[1]=OC_CLAMP255(_pix[1]+f); |
|
1006 _pix[2]=OC_CLAMP255(_pix[2]-f); |
|
1007 _pix+=_ystride; |
|
1008 } |
|
1009 } |
|
1010 |
|
1011 static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){ |
|
1012 int x; |
|
1013 _pix-=_ystride*2; |
|
1014 for(x=0;x<8;x++){ |
|
1015 int f; |
|
1016 f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); |
|
1017 /*The _bv array is used to compute the function |
|
1018 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); |
|
1019 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ |
|
1020 f=*(_bv+(f+4>>3)); |
|
1021 _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); |
|
1022 _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); |
|
1023 } |
|
1024 } |
|
1025 |
|
1026 /*Initialize the bounding values array used by the loop filter. |
|
1027 _bv: Storage for the array. |
|
1028 _flimit: The filter limit as defined in Section 7.10 of the spec.*/ |
|
1029 void oc_loop_filter_init_c(signed char _bv[256],int _flimit){ |
|
1030 int i; |
|
1031 memset(_bv,0,sizeof(_bv[0])*256); |
|
1032 for(i=0;i<_flimit;i++){ |
|
1033 if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit); |
|
1034 _bv[127-i]=(signed char)(-i); |
|
1035 _bv[127+i]=(signed char)(i); |
|
1036 if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i); |
|
1037 } |
|
1038 } |
|
1039 |
|
1040 /*Apply the loop filter to a given set of fragment rows in the given plane. |
|
1041 The filter may be run on the bottom edge, affecting pixels in the next row of |
|
1042 fragments, so this row also needs to be available. |
|
1043 _bv: The bounding values array. |
|
1044 _refi: The index of the frame buffer to filter. |
|
1045 _pli: The color plane to filter. |
|
1046 _fragy0: The Y coordinate of the first fragment row to filter. |
|
1047 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ |
|
1048 void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, |
|
1049 signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){ |
|
1050 const oc_fragment_plane *fplane; |
|
1051 const oc_fragment *frags; |
|
1052 const ptrdiff_t *frag_buf_offs; |
|
1053 unsigned char *ref_frame_data; |
|
1054 ptrdiff_t fragi_top; |
|
1055 ptrdiff_t fragi_bot; |
|
1056 ptrdiff_t fragi0; |
|
1057 ptrdiff_t fragi0_end; |
|
1058 int ystride; |
|
1059 int nhfrags; |
|
1060 _bv+=127; |
|
1061 fplane=_state->fplanes+_pli; |
|
1062 nhfrags=fplane->nhfrags; |
|
1063 fragi_top=fplane->froffset; |
|
1064 fragi_bot=fragi_top+fplane->nfrags; |
|
1065 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; |
|
1066 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags; |
|
1067 ystride=_state->ref_ystride[_pli]; |
|
1068 frags=_state->frags; |
|
1069 frag_buf_offs=_state->frag_buf_offs; |
|
1070 ref_frame_data=_state->ref_frame_data[_refi]; |
|
1071 /*The following loops are constructed somewhat non-intuitively on purpose. |
|
1072 The main idea is: if a block boundary has at least one coded fragment on |
|
1073 it, the filter is applied to it. |
|
1074 However, the order that the filters are applied in matters, and VP3 chose |
|
1075 the somewhat strange ordering used below.*/ |
|
1076 while(fragi0<fragi0_end){ |
|
1077 ptrdiff_t fragi; |
|
1078 ptrdiff_t fragi_end; |
|
1079 fragi=fragi0; |
|
1080 fragi_end=fragi+nhfrags; |
|
1081 while(fragi<fragi_end){ |
|
1082 if(frags[fragi].coded){ |
|
1083 unsigned char *ref; |
|
1084 ref=ref_frame_data+frag_buf_offs[fragi]; |
|
1085 if(fragi>fragi0)loop_filter_h(ref,ystride,_bv); |
|
1086 if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); |
|
1087 if(fragi+1<fragi_end&&!frags[fragi+1].coded){ |
|
1088 loop_filter_h(ref+8,ystride,_bv); |
|
1089 } |
|
1090 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){ |
|
1091 loop_filter_v(ref+(ystride<<3),ystride,_bv); |
|
1092 } |
|
1093 } |
|
1094 fragi++; |
|
1095 } |
|
1096 fragi0+=nhfrags; |
|
1097 } |
|
1098 } |
|
1099 |
|
1100 #if defined(OC_DUMP_IMAGES) |
|
1101 int oc_state_dump_frame(const oc_theora_state *_state,int _frame, |
|
1102 const char *_suf){ |
|
1103 /*Dump a PNG of the reconstructed image.*/ |
|
1104 png_structp png; |
|
1105 png_infop info; |
|
1106 png_bytep *image; |
|
1107 FILE *fp; |
|
1108 char fname[16]; |
|
1109 unsigned char *y_row; |
|
1110 unsigned char *u_row; |
|
1111 unsigned char *v_row; |
|
1112 unsigned char *y; |
|
1113 unsigned char *u; |
|
1114 unsigned char *v; |
|
1115 ogg_int64_t iframe; |
|
1116 ogg_int64_t pframe; |
|
1117 int y_stride; |
|
1118 int u_stride; |
|
1119 int v_stride; |
|
1120 int framei; |
|
1121 int width; |
|
1122 int height; |
|
1123 int imgi; |
|
1124 int imgj; |
|
1125 width=_state->info.frame_width; |
|
1126 height=_state->info.frame_height; |
|
1127 iframe=_state->granpos>>_state->info.keyframe_granule_shift; |
|
1128 pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); |
|
1129 sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); |
|
1130 fp=fopen(fname,"wb"); |
|
1131 if(fp==NULL)return TH_EFAULT; |
|
1132 image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); |
|
1133 if(image==NULL){ |
|
1134 fclose(fp); |
|
1135 return TH_EFAULT; |
|
1136 } |
|
1137 png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); |
|
1138 if(png==NULL){ |
|
1139 oc_free_2d(image); |
|
1140 fclose(fp); |
|
1141 return TH_EFAULT; |
|
1142 } |
|
1143 info=png_create_info_struct(png); |
|
1144 if(info==NULL){ |
|
1145 png_destroy_write_struct(&png,NULL); |
|
1146 oc_free_2d(image); |
|
1147 fclose(fp); |
|
1148 return TH_EFAULT; |
|
1149 } |
|
1150 if(setjmp(png_jmpbuf(png))){ |
|
1151 png_destroy_write_struct(&png,&info); |
|
1152 oc_free_2d(image); |
|
1153 fclose(fp); |
|
1154 return TH_EFAULT; |
|
1155 } |
|
1156 framei=_state->ref_frame_idx[_frame]; |
|
1157 y_row=_state->ref_frame_bufs[framei][0].data; |
|
1158 u_row=_state->ref_frame_bufs[framei][1].data; |
|
1159 v_row=_state->ref_frame_bufs[framei][2].data; |
|
1160 y_stride=_state->ref_frame_bufs[framei][0].stride; |
|
1161 u_stride=_state->ref_frame_bufs[framei][1].stride; |
|
1162 v_stride=_state->ref_frame_bufs[framei][2].stride; |
|
1163 /*Chroma up-sampling is just done with a box filter. |
|
1164 This is very likely what will actually be used in practice on a real |
|
1165 display, and also removes one more layer to search in for the source of |
|
1166 artifacts. |
|
1167 As an added bonus, it's dead simple.*/ |
|
1168 for(imgi=height;imgi-->0;){ |
|
1169 int dc; |
|
1170 y=y_row; |
|
1171 u=u_row; |
|
1172 v=v_row; |
|
1173 for(imgj=0;imgj<6*width;){ |
|
1174 float yval; |
|
1175 float uval; |
|
1176 float vval; |
|
1177 unsigned rval; |
|
1178 unsigned gval; |
|
1179 unsigned bval; |
|
1180 /*This is intentionally slow and very accurate.*/ |
|
1181 yval=(*y-16)*(1.0F/219); |
|
1182 uval=(*u-128)*(2*(1-0.114F)/224); |
|
1183 vval=(*v-128)*(2*(1-0.299F)/224); |
|
1184 rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); |
|
1185 gval=OC_CLAMPI(0,(int)(65535*( |
|
1186 yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); |
|
1187 bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); |
|
1188 image[imgi][imgj++]=(unsigned char)(rval>>8); |
|
1189 image[imgi][imgj++]=(unsigned char)(rval&0xFF); |
|
1190 image[imgi][imgj++]=(unsigned char)(gval>>8); |
|
1191 image[imgi][imgj++]=(unsigned char)(gval&0xFF); |
|
1192 image[imgi][imgj++]=(unsigned char)(bval>>8); |
|
1193 image[imgi][imgj++]=(unsigned char)(bval&0xFF); |
|
1194 dc=(y-y_row&1)|(_state->info.pixel_fmt&1); |
|
1195 y++; |
|
1196 u+=dc; |
|
1197 v+=dc; |
|
1198 } |
|
1199 dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); |
|
1200 y_row+=y_stride; |
|
1201 u_row+=dc&u_stride; |
|
1202 v_row+=dc&v_stride; |
|
1203 } |
|
1204 png_init_io(png,fp); |
|
1205 png_set_compression_level(png,Z_BEST_COMPRESSION); |
|
1206 png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, |
|
1207 PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); |
|
1208 switch(_state->info.colorspace){ |
|
1209 case TH_CS_ITU_REC_470M:{ |
|
1210 png_set_gAMA(png,info,2.2); |
|
1211 png_set_cHRM_fixed(png,info,31006,31616, |
|
1212 67000,32000,21000,71000,14000,8000); |
|
1213 }break; |
|
1214 case TH_CS_ITU_REC_470BG:{ |
|
1215 png_set_gAMA(png,info,2.67); |
|
1216 png_set_cHRM_fixed(png,info,31271,32902, |
|
1217 64000,33000,29000,60000,15000,6000); |
|
1218 }break; |
|
1219 default:break; |
|
1220 } |
|
1221 png_set_pHYs(png,info,_state->info.aspect_numerator, |
|
1222 _state->info.aspect_denominator,0); |
|
1223 png_set_rows(png,info,image); |
|
1224 png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); |
|
1225 png_write_end(png,info); |
|
1226 png_destroy_write_struct(&png,&info); |
|
1227 oc_free_2d(image); |
|
1228 fclose(fp); |
|
1229 return 0; |
|
1230 } |
|
1231 #endif |
|
1232 |
|
1233 |
|
1234 |
|
1235 ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ |
|
1236 oc_theora_state *state; |
|
1237 state=(oc_theora_state *)_encdec; |
|
1238 if(_granpos>=0){ |
|
1239 ogg_int64_t iframe; |
|
1240 ogg_int64_t pframe; |
|
1241 iframe=_granpos>>state->info.keyframe_granule_shift; |
|
1242 pframe=_granpos-(iframe<<state->info.keyframe_granule_shift); |
|
1243 /*3.2.0 streams store the frame index in the granule position. |
|
1244 3.2.1 and later store the frame count. |
|
1245 We return the index, so adjust the value if we have a 3.2.1 or later |
|
1246 stream.*/ |
|
1247 return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); |
|
1248 } |
|
1249 return -1; |
|
1250 } |
|
1251 |
|
1252 double th_granule_time(void *_encdec,ogg_int64_t _granpos){ |
|
1253 oc_theora_state *state; |
|
1254 state=(oc_theora_state *)_encdec; |
|
1255 if(_granpos>=0){ |
|
1256 return (th_granule_frame(_encdec, _granpos)+1)*( |
|
1257 (double)state->info.fps_denominator/state->info.fps_numerator); |
|
1258 } |
|
1259 return -1; |
|
1260 } |