1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtheora/lib/state.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1260 @@ 1.4 +/******************************************************************** 1.5 + * * 1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.10 + * * 1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * 1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 1.13 + * * 1.14 + ******************************************************************** 1.15 + 1.16 + function: 1.17 + last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $ 1.18 + 1.19 + ********************************************************************/ 1.20 + 1.21 +#include <stdlib.h> 1.22 +#include <string.h> 1.23 +#include "state.h" 1.24 +#if defined(OC_DUMP_IMAGES) 1.25 +# include <stdio.h> 1.26 +# include "png.h" 1.27 +#endif 1.28 + 1.29 +/*The function used to fill in the chroma plane motion vectors for a macro 1.30 + block when 4 different motion vectors are specified in the luma plane. 1.31 + This version is for use with chroma decimated in the X and Y directions 1.32 + (4:2:0). 1.33 + _cbmvs: The chroma block-level motion vectors to fill in. 1.34 + _lbmvs: The luma block-level motion vectors.*/ 1.35 +static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ 1.36 + int dx; 1.37 + int dy; 1.38 + dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]) 1.39 + +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); 1.40 + dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]) 1.41 + +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); 1.42 + _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2)); 1.43 +} 1.44 + 1.45 +/*The function used to fill in the chroma plane motion vectors for a macro 1.46 + block when 4 different motion vectors are specified in the luma plane. 1.47 + This version is for use with chroma decimated in the Y direction. 1.48 + _cbmvs: The chroma block-level motion vectors to fill in. 1.49 + _lbmvs: The luma block-level motion vectors.*/ 1.50 +static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ 1.51 + int dx; 1.52 + int dy; 1.53 + dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]); 1.54 + dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]); 1.55 + _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); 1.56 + dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]); 1.57 + dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]); 1.58 + _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); 1.59 +} 1.60 + 1.61 +/*The function used to fill in the chroma plane motion vectors for a macro 1.62 + block when 4 different motion vectors are specified in the luma plane. 1.63 + This version is for use with chroma decimated in the X direction (4:2:2). 1.64 + _cbmvs: The chroma block-level motion vectors to fill in. 1.65 + _lbmvs: The luma block-level motion vectors.*/ 1.66 +static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ 1.67 + int dx; 1.68 + int dy; 1.69 + dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]); 1.70 + dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]); 1.71 + _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); 1.72 + dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); 1.73 + dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); 1.74 + _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); 1.75 +} 1.76 + 1.77 +/*The function used to fill in the chroma plane motion vectors for a macro 1.78 + block when 4 different motion vectors are specified in the luma plane. 1.79 + This version is for use with no chroma decimation (4:4:4). 1.80 + _cbmvs: The chroma block-level motion vectors to fill in. 1.81 + _lmbmv: The luma macro-block level motion vector to fill in for use in 1.82 + prediction. 1.83 + _lbmvs: The luma block-level motion vectors.*/ 1.84 +static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ 1.85 + _cbmvs[0]=_lbmvs[0]; 1.86 + _cbmvs[1]=_lbmvs[1]; 1.87 + _cbmvs[2]=_lbmvs[2]; 1.88 + _cbmvs[3]=_lbmvs[3]; 1.89 +} 1.90 + 1.91 +/*A table of functions used to fill in the chroma plane motion vectors for a 1.92 + macro block when 4 different motion vectors are specified in the luma 1.93 + plane.*/ 1.94 +const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ 1.95 + (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, 1.96 + (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, 1.97 + (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, 1.98 + (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 1.99 +}; 1.100 + 1.101 + 1.102 + 1.103 +/*Returns the fragment index of the top-left block in a macro block. 1.104 + This can be used to test whether or not the whole macro block is valid. 1.105 + _sb_map: The super block map. 1.106 + _quadi: The quadrant number. 1.107 + Return: The index of the fragment of the upper left block in the macro 1.108 + block, or -1 if the block lies outside the coded frame.*/ 1.109 +static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ 1.110 + /*It so happens that under the Hilbert curve ordering described below, the 1.111 + upper-left block in each macro block is at index 0, except in macro block 1.112 + 3, where it is at index 2.*/ 1.113 + return _sb_map[_quadi][_quadi&_quadi<<1]; 1.114 +} 1.115 + 1.116 +/*Fills in the mapping from block positions to fragment numbers for a single 1.117 + color plane. 1.118 + This function also fills in the "valid" flag of each quadrant in the super 1.119 + block flags. 1.120 + _sb_maps: The array of super block maps for the color plane. 1.121 + _sb_flags: The array of super block flags for the color plane. 1.122 + _frag0: The index of the first fragment in the plane. 1.123 + _hfrags: The number of horizontal fragments in a coded frame. 1.124 + _vfrags: The number of vertical fragments in a coded frame.*/ 1.125 +static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], 1.126 + oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ 1.127 + /*Contains the (macro_block,block) indices for a 4x4 grid of 1.128 + fragments. 1.129 + The pattern is a 4x4 Hilbert space-filling curve. 1.130 + A Hilbert curve has the nice property that as the curve grows larger, its 1.131 + fractal dimension approaches 2. 1.132 + The intuition is that nearby blocks in the curve are also close spatially, 1.133 + with the previous element always an immediate neighbor, so that runs of 1.134 + blocks should be well correlated.*/ 1.135 + static const int SB_MAP[4][4][2]={ 1.136 + {{0,0},{0,1},{3,2},{3,3}}, 1.137 + {{0,3},{0,2},{3,1},{3,0}}, 1.138 + {{1,0},{1,3},{2,0},{2,3}}, 1.139 + {{1,1},{1,2},{2,1},{2,2}} 1.140 + }; 1.141 + ptrdiff_t yfrag; 1.142 + unsigned sbi; 1.143 + int y; 1.144 + sbi=0; 1.145 + yfrag=_frag0; 1.146 + for(y=0;;y+=4){ 1.147 + int imax; 1.148 + int x; 1.149 + /*Figure out how many columns of blocks in this super block lie within the 1.150 + image.*/ 1.151 + imax=_vfrags-y; 1.152 + if(imax>4)imax=4; 1.153 + else if(imax<=0)break; 1.154 + for(x=0;;x+=4,sbi++){ 1.155 + ptrdiff_t xfrag; 1.156 + int jmax; 1.157 + int quadi; 1.158 + int i; 1.159 + /*Figure out how many rows of blocks in this super block lie within the 1.160 + image.*/ 1.161 + jmax=_hfrags-x; 1.162 + if(jmax>4)jmax=4; 1.163 + else if(jmax<=0)break; 1.164 + /*By default, set all fragment indices to -1.*/ 1.165 + memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi])); 1.166 + /*Fill in the fragment map for this super block.*/ 1.167 + xfrag=yfrag+x; 1.168 + for(i=0;i<imax;i++){ 1.169 + int j; 1.170 + for(j=0;j<jmax;j++){ 1.171 + _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j; 1.172 + } 1.173 + xfrag+=_hfrags; 1.174 + } 1.175 + /*Mark which quadrants of this super block lie within the image.*/ 1.176 + for(quadi=0;quadi<4;quadi++){ 1.177 + _sb_flags[sbi].quad_valid|= 1.178 + (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi; 1.179 + } 1.180 + } 1.181 + yfrag+=_hfrags<<2; 1.182 + } 1.183 +} 1.184 + 1.185 +/*Fills in the Y plane fragment map for a macro block given the fragment 1.186 + coordinates of its upper-left hand corner. 1.187 + _mb_map: The macro block map to fill. 1.188 + _fplane: The description of the Y plane. 1.189 + _xfrag0: The X location of the upper-left hand fragment in the luma plane. 1.190 + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ 1.191 +static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3], 1.192 + const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){ 1.193 + int i; 1.194 + int j; 1.195 + for(i=0;i<2;i++)for(j=0;j<2;j++){ 1.196 + _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j; 1.197 + } 1.198 +} 1.199 + 1.200 +/*Fills in the chroma plane fragment maps for a macro block. 1.201 + This version is for use with chroma decimated in the X and Y directions 1.202 + (4:2:0). 1.203 + _mb_map: The macro block map to fill. 1.204 + _fplanes: The descriptions of the fragment planes. 1.205 + _xfrag0: The X location of the upper-left hand fragment in the luma plane. 1.206 + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ 1.207 +static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], 1.208 + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ 1.209 + ptrdiff_t fragi; 1.210 + _xfrag0>>=1; 1.211 + _yfrag0>>=1; 1.212 + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; 1.213 + _mb_map[1][0]=fragi+_fplanes[1].froffset; 1.214 + _mb_map[2][0]=fragi+_fplanes[2].froffset; 1.215 +} 1.216 + 1.217 +/*Fills in the chroma plane fragment maps for a macro block. 1.218 + This version is for use with chroma decimated in the Y direction. 1.219 + _mb_map: The macro block map to fill. 1.220 + _fplanes: The descriptions of the fragment planes. 1.221 + _xfrag0: The X location of the upper-left hand fragment in the luma plane. 1.222 + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ 1.223 +static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], 1.224 + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ 1.225 + ptrdiff_t fragi; 1.226 + int j; 1.227 + _yfrag0>>=1; 1.228 + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; 1.229 + for(j=0;j<2;j++){ 1.230 + _mb_map[1][j]=fragi+_fplanes[1].froffset; 1.231 + _mb_map[2][j]=fragi+_fplanes[2].froffset; 1.232 + fragi++; 1.233 + } 1.234 +} 1.235 + 1.236 +/*Fills in the chroma plane fragment maps for a macro block. 1.237 + This version is for use with chroma decimated in the X direction (4:2:2). 1.238 + _mb_map: The macro block map to fill. 1.239 + _fplanes: The descriptions of the fragment planes. 1.240 + _xfrag0: The X location of the upper-left hand fragment in the luma plane. 1.241 + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ 1.242 +static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], 1.243 + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ 1.244 + ptrdiff_t fragi; 1.245 + int i; 1.246 + _xfrag0>>=1; 1.247 + fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; 1.248 + for(i=0;i<2;i++){ 1.249 + _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; 1.250 + _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; 1.251 + fragi+=_fplanes[1].nhfrags; 1.252 + } 1.253 +} 1.254 + 1.255 +/*Fills in the chroma plane fragment maps for a macro block. 1.256 + This version is for use with no chroma decimation (4:4:4). 1.257 + This uses the already filled-in luma plane values. 1.258 + _mb_map: The macro block map to fill. 1.259 + _fplanes: The descriptions of the fragment planes.*/ 1.260 +static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], 1.261 + const oc_fragment_plane _fplanes[3]){ 1.262 + int k; 1.263 + for(k=0;k<4;k++){ 1.264 + _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; 1.265 + _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; 1.266 + } 1.267 +} 1.268 + 1.269 +/*The function type used to fill in the chroma plane fragment maps for a 1.270 + macro block. 1.271 + _mb_map: The macro block map to fill. 1.272 + _fplanes: The descriptions of the fragment planes. 1.273 + _xfrag0: The X location of the upper-left hand fragment in the luma plane. 1.274 + _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ 1.275 +typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], 1.276 + const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); 1.277 + 1.278 +/*A table of functions used to fill in the chroma plane fragment maps for a 1.279 + macro block for each type of chrominance decimation.*/ 1.280 +static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ 1.281 + oc_mb_fill_cmapping00, 1.282 + oc_mb_fill_cmapping01, 1.283 + oc_mb_fill_cmapping10, 1.284 + (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 1.285 +}; 1.286 + 1.287 +/*Fills in the mapping from macro blocks to their corresponding fragment 1.288 + numbers in each plane. 1.289 + _mb_maps: The list of macro block maps. 1.290 + _mb_modes: The list of macro block modes; macro blocks completely outside 1.291 + the coded region are marked invalid. 1.292 + _fplanes: The descriptions of the fragment planes. 1.293 + _pixel_fmt: The chroma decimation type.*/ 1.294 +static void oc_mb_create_mapping(oc_mb_map _mb_maps[], 1.295 + signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ 1.296 + oc_mb_fill_cmapping_func mb_fill_cmapping; 1.297 + unsigned sbi; 1.298 + int y; 1.299 + mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; 1.300 + /*Loop through the luma plane super blocks.*/ 1.301 + for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ 1.302 + int x; 1.303 + for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ 1.304 + int ymb; 1.305 + /*Loop through the macro blocks in each super block in display order.*/ 1.306 + for(ymb=0;ymb<2;ymb++){ 1.307 + int xmb; 1.308 + for(xmb=0;xmb<2;xmb++){ 1.309 + unsigned mbi; 1.310 + int mbx; 1.311 + int mby; 1.312 + mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; 1.313 + mbx=x|xmb<<1; 1.314 + mby=y|ymb<<1; 1.315 + /*Initialize fragment indices to -1.*/ 1.316 + memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); 1.317 + /*Make sure this macro block is within the encoded region.*/ 1.318 + if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ 1.319 + _mb_modes[mbi]=OC_MODE_INVALID; 1.320 + continue; 1.321 + } 1.322 + /*Fill in the fragment indices for the luma plane.*/ 1.323 + oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); 1.324 + /*Fill in the fragment indices for the chroma planes.*/ 1.325 + (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); 1.326 + } 1.327 + } 1.328 + } 1.329 + } 1.330 +} 1.331 + 1.332 +/*Marks the fragments which fall all or partially outside the displayable 1.333 + region of the frame. 1.334 + _state: The Theora state containing the fragments to be marked.*/ 1.335 +static void oc_state_border_init(oc_theora_state *_state){ 1.336 + oc_fragment *frag; 1.337 + oc_fragment *yfrag_end; 1.338 + oc_fragment *xfrag_end; 1.339 + oc_fragment_plane *fplane; 1.340 + int crop_x0; 1.341 + int crop_y0; 1.342 + int crop_xf; 1.343 + int crop_yf; 1.344 + int pli; 1.345 + int y; 1.346 + int x; 1.347 + /*The method we use here is slow, but the code is dead simple and handles 1.348 + all the special cases easily. 1.349 + We only ever need to do it once.*/ 1.350 + /*Loop through the fragments, marking those completely outside the 1.351 + displayable region and constructing a border mask for those that straddle 1.352 + the border.*/ 1.353 + _state->nborders=0; 1.354 + yfrag_end=frag=_state->frags; 1.355 + for(pli=0;pli<3;pli++){ 1.356 + fplane=_state->fplanes+pli; 1.357 + /*Set up the cropping rectangle for this plane.*/ 1.358 + crop_x0=_state->info.pic_x; 1.359 + crop_xf=_state->info.pic_x+_state->info.pic_width; 1.360 + crop_y0=_state->info.pic_y; 1.361 + crop_yf=_state->info.pic_y+_state->info.pic_height; 1.362 + if(pli>0){ 1.363 + if(!(_state->info.pixel_fmt&1)){ 1.364 + crop_x0=crop_x0>>1; 1.365 + crop_xf=crop_xf+1>>1; 1.366 + } 1.367 + if(!(_state->info.pixel_fmt&2)){ 1.368 + crop_y0=crop_y0>>1; 1.369 + crop_yf=crop_yf+1>>1; 1.370 + } 1.371 + } 1.372 + y=0; 1.373 + for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){ 1.374 + x=0; 1.375 + for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){ 1.376 + /*First check to see if this fragment is completely outside the 1.377 + displayable region.*/ 1.378 + /*Note the special checks for an empty cropping rectangle. 1.379 + This guarantees that if we count a fragment as straddling the 1.380 + border below, at least one pixel in the fragment will be inside 1.381 + the displayable region.*/ 1.382 + if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y|| 1.383 + crop_x0>=crop_xf||crop_y0>=crop_yf){ 1.384 + frag->invalid=1; 1.385 + } 1.386 + /*Otherwise, check to see if it straddles the border.*/ 1.387 + else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8|| 1.388 + y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){ 1.389 + ogg_int64_t mask; 1.390 + int npixels; 1.391 + int i; 1.392 + mask=npixels=0; 1.393 + for(i=0;i<8;i++){ 1.394 + int j; 1.395 + for(j=0;j<8;j++){ 1.396 + if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){ 1.397 + mask|=(ogg_int64_t)1<<(i<<3|j); 1.398 + npixels++; 1.399 + } 1.400 + } 1.401 + } 1.402 + /*Search the fragment array for border info with the same pattern. 1.403 + In general, there will be at most 8 different patterns (per 1.404 + plane).*/ 1.405 + for(i=0;;i++){ 1.406 + if(i>=_state->nborders){ 1.407 + _state->nborders++; 1.408 + _state->borders[i].mask=mask; 1.409 + _state->borders[i].npixels=npixels; 1.410 + } 1.411 + else if(_state->borders[i].mask!=mask)continue; 1.412 + frag->borderi=i; 1.413 + break; 1.414 + } 1.415 + } 1.416 + else frag->borderi=-1; 1.417 + } 1.418 + } 1.419 + } 1.420 +} 1.421 + 1.422 +static int oc_state_frarray_init(oc_theora_state *_state){ 1.423 + int yhfrags; 1.424 + int yvfrags; 1.425 + int chfrags; 1.426 + int cvfrags; 1.427 + ptrdiff_t yfrags; 1.428 + ptrdiff_t cfrags; 1.429 + ptrdiff_t nfrags; 1.430 + unsigned yhsbs; 1.431 + unsigned yvsbs; 1.432 + unsigned chsbs; 1.433 + unsigned cvsbs; 1.434 + unsigned ysbs; 1.435 + unsigned csbs; 1.436 + unsigned nsbs; 1.437 + size_t nmbs; 1.438 + int hdec; 1.439 + int vdec; 1.440 + int pli; 1.441 + /*Figure out the number of fragments in each plane.*/ 1.442 + /*These parameters have already been validated to be multiples of 16.*/ 1.443 + yhfrags=_state->info.frame_width>>3; 1.444 + yvfrags=_state->info.frame_height>>3; 1.445 + hdec=!(_state->info.pixel_fmt&1); 1.446 + vdec=!(_state->info.pixel_fmt&2); 1.447 + chfrags=yhfrags+hdec>>hdec; 1.448 + cvfrags=yvfrags+vdec>>vdec; 1.449 + yfrags=yhfrags*(ptrdiff_t)yvfrags; 1.450 + cfrags=chfrags*(ptrdiff_t)cvfrags; 1.451 + nfrags=yfrags+2*cfrags; 1.452 + /*Figure out the number of super blocks in each plane.*/ 1.453 + yhsbs=yhfrags+3>>2; 1.454 + yvsbs=yvfrags+3>>2; 1.455 + chsbs=chfrags+3>>2; 1.456 + cvsbs=cvfrags+3>>2; 1.457 + ysbs=yhsbs*yvsbs; 1.458 + csbs=chsbs*cvsbs; 1.459 + nsbs=ysbs+2*csbs; 1.460 + nmbs=(size_t)ysbs<<2; 1.461 + /*Check for overflow. 1.462 + We support the ridiculous upper limits of the specification (1048560 by 1.463 + 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, 1.464 + but for those with 32-bit pointers (or smaller!) we have to check. 1.465 + If the caller wants to prevent denial-of-service by imposing a more 1.466 + reasonable upper limit on the size of attempted allocations, they must do 1.467 + so themselves; we have no platform independent way to determine how much 1.468 + system memory there is nor an application-independent way to decide what a 1.469 + "reasonable" allocation is.*/ 1.470 + if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags|| 1.471 + ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){ 1.472 + return TH_EIMPL; 1.473 + } 1.474 + /*Initialize the fragment array.*/ 1.475 + _state->fplanes[0].nhfrags=yhfrags; 1.476 + _state->fplanes[0].nvfrags=yvfrags; 1.477 + _state->fplanes[0].froffset=0; 1.478 + _state->fplanes[0].nfrags=yfrags; 1.479 + _state->fplanes[0].nhsbs=yhsbs; 1.480 + _state->fplanes[0].nvsbs=yvsbs; 1.481 + _state->fplanes[0].sboffset=0; 1.482 + _state->fplanes[0].nsbs=ysbs; 1.483 + _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; 1.484 + _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; 1.485 + _state->fplanes[1].froffset=yfrags; 1.486 + _state->fplanes[2].froffset=yfrags+cfrags; 1.487 + _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; 1.488 + _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; 1.489 + _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; 1.490 + _state->fplanes[1].sboffset=ysbs; 1.491 + _state->fplanes[2].sboffset=ysbs+csbs; 1.492 + _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; 1.493 + _state->nfrags=nfrags; 1.494 + _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); 1.495 + _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); 1.496 + _state->nsbs=nsbs; 1.497 + _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); 1.498 + _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); 1.499 + _state->nhmbs=yhsbs<<1; 1.500 + _state->nvmbs=yvsbs<<1; 1.501 + _state->nmbs=nmbs; 1.502 + _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); 1.503 + _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); 1.504 + _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); 1.505 + if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| 1.506 + _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| 1.507 + _state->coded_fragis==NULL){ 1.508 + return TH_EFAULT; 1.509 + } 1.510 + /*Create the mapping from super blocks to fragments.*/ 1.511 + for(pli=0;pli<3;pli++){ 1.512 + oc_fragment_plane *fplane; 1.513 + fplane=_state->fplanes+pli; 1.514 + oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, 1.515 + _state->sb_flags+fplane->sboffset,fplane->froffset, 1.516 + fplane->nhfrags,fplane->nvfrags); 1.517 + } 1.518 + /*Create the mapping from macro blocks to fragments.*/ 1.519 + oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, 1.520 + _state->fplanes,_state->info.pixel_fmt); 1.521 + /*Initialize the invalid and borderi fields of each fragment.*/ 1.522 + oc_state_border_init(_state); 1.523 + return 0; 1.524 +} 1.525 + 1.526 +static void oc_state_frarray_clear(oc_theora_state *_state){ 1.527 + _ogg_free(_state->coded_fragis); 1.528 + _ogg_free(_state->mb_modes); 1.529 + _ogg_free(_state->mb_maps); 1.530 + _ogg_free(_state->sb_flags); 1.531 + _ogg_free(_state->sb_maps); 1.532 + _ogg_free(_state->frag_mvs); 1.533 + _ogg_free(_state->frags); 1.534 +} 1.535 + 1.536 + 1.537 +/*Initializes the buffers used for reconstructed frames. 1.538 + These buffers are padded with 16 extra pixels on each side, to allow 1.539 + unrestricted motion vectors without special casing the boundary. 1.540 + If chroma is decimated in either direction, the padding is reduced by a 1.541 + factor of 2 on the appropriate sides. 1.542 + _nrefs: The number of reference buffers to init; must be in the range 3...6.*/ 1.543 +static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ 1.544 + th_info *info; 1.545 + unsigned char *ref_frame_data; 1.546 + size_t ref_frame_data_sz; 1.547 + size_t ref_frame_sz; 1.548 + size_t yplane_sz; 1.549 + size_t cplane_sz; 1.550 + int yhstride; 1.551 + int yheight; 1.552 + int chstride; 1.553 + int cheight; 1.554 + ptrdiff_t align; 1.555 + ptrdiff_t yoffset; 1.556 + ptrdiff_t coffset; 1.557 + ptrdiff_t *frag_buf_offs; 1.558 + ptrdiff_t fragi; 1.559 + int hdec; 1.560 + int vdec; 1.561 + int rfi; 1.562 + int pli; 1.563 + if(_nrefs<3||_nrefs>6)return TH_EINVAL; 1.564 + info=&_state->info; 1.565 + /*Compute the image buffer parameters for each plane.*/ 1.566 + hdec=!(info->pixel_fmt&1); 1.567 + vdec=!(info->pixel_fmt&2); 1.568 + yhstride=info->frame_width+2*OC_UMV_PADDING; 1.569 + yheight=info->frame_height+2*OC_UMV_PADDING; 1.570 + /*Require 16-byte aligned rows in the chroma planes.*/ 1.571 + chstride=(yhstride>>hdec)+15&~15; 1.572 + cheight=yheight>>vdec; 1.573 + yplane_sz=yhstride*(size_t)yheight; 1.574 + cplane_sz=chstride*(size_t)cheight; 1.575 + yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; 1.576 + coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; 1.577 + /*Although we guarantee the rows of the chroma planes are a multiple of 16 1.578 + bytes, the initial padding on the first row may only be 8 bytes. 1.579 + Compute the offset needed to the actual image data to a multiple of 16.*/ 1.580 + align=-coffset&15; 1.581 + ref_frame_sz=yplane_sz+2*cplane_sz+16; 1.582 + ref_frame_data_sz=_nrefs*ref_frame_sz; 1.583 + /*Check for overflow. 1.584 + The same caveats apply as for oc_state_frarray_init().*/ 1.585 + if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz|| 1.586 + ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){ 1.587 + return TH_EIMPL; 1.588 + } 1.589 + ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16); 1.590 + frag_buf_offs=_state->frag_buf_offs= 1.591 + _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); 1.592 + if(ref_frame_data==NULL||frag_buf_offs==NULL){ 1.593 + _ogg_free(frag_buf_offs); 1.594 + oc_aligned_free(ref_frame_data); 1.595 + return TH_EFAULT; 1.596 + } 1.597 + /*Set up the width, height and stride for the image buffers.*/ 1.598 + _state->ref_frame_bufs[0][0].width=info->frame_width; 1.599 + _state->ref_frame_bufs[0][0].height=info->frame_height; 1.600 + _state->ref_frame_bufs[0][0].stride=yhstride; 1.601 + _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= 1.602 + info->frame_width>>hdec; 1.603 + _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= 1.604 + info->frame_height>>vdec; 1.605 + _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= 1.606 + chstride; 1.607 + for(rfi=1;rfi<_nrefs;rfi++){ 1.608 + memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], 1.609 + sizeof(_state->ref_frame_bufs[0])); 1.610 + } 1.611 + _state->ref_frame_handle=ref_frame_data; 1.612 + /*Set up the data pointers for the image buffers.*/ 1.613 + for(rfi=0;rfi<_nrefs;rfi++){ 1.614 + _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; 1.615 + ref_frame_data+=yplane_sz+align; 1.616 + _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; 1.617 + ref_frame_data+=cplane_sz; 1.618 + _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; 1.619 + ref_frame_data+=cplane_sz+(16-align); 1.620 + /*Flip the buffer upside down. 1.621 + This allows us to decode Theora's bottom-up frames in their natural 1.622 + order, yet return a top-down buffer with a positive stride to the user.*/ 1.623 + oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], 1.624 + _state->ref_frame_bufs[rfi]); 1.625 + } 1.626 + _state->ref_ystride[0]=-yhstride; 1.627 + _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; 1.628 + /*Initialize the fragment buffer offsets.*/ 1.629 + ref_frame_data=_state->ref_frame_bufs[0][0].data; 1.630 + fragi=0; 1.631 + for(pli=0;pli<3;pli++){ 1.632 + th_img_plane *iplane; 1.633 + oc_fragment_plane *fplane; 1.634 + unsigned char *vpix; 1.635 + ptrdiff_t stride; 1.636 + ptrdiff_t vfragi_end; 1.637 + int nhfrags; 1.638 + iplane=_state->ref_frame_bufs[0]+pli; 1.639 + fplane=_state->fplanes+pli; 1.640 + vpix=iplane->data; 1.641 + vfragi_end=fplane->froffset+fplane->nfrags; 1.642 + nhfrags=fplane->nhfrags; 1.643 + stride=iplane->stride; 1.644 + while(fragi<vfragi_end){ 1.645 + ptrdiff_t hfragi_end; 1.646 + unsigned char *hpix; 1.647 + hpix=vpix; 1.648 + for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){ 1.649 + frag_buf_offs[fragi]=hpix-ref_frame_data; 1.650 + hpix+=8; 1.651 + } 1.652 + vpix+=stride<<3; 1.653 + } 1.654 + } 1.655 + /*Initialize the reference frame pointers and indices.*/ 1.656 + _state->ref_frame_idx[OC_FRAME_GOLD]= 1.657 + _state->ref_frame_idx[OC_FRAME_PREV]= 1.658 + _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]= 1.659 + _state->ref_frame_idx[OC_FRAME_PREV_ORIG]= 1.660 + _state->ref_frame_idx[OC_FRAME_SELF]= 1.661 + _state->ref_frame_idx[OC_FRAME_IO]=-1; 1.662 + _state->ref_frame_data[OC_FRAME_GOLD]= 1.663 + _state->ref_frame_data[OC_FRAME_PREV]= 1.664 + _state->ref_frame_data[OC_FRAME_GOLD_ORIG]= 1.665 + _state->ref_frame_data[OC_FRAME_PREV_ORIG]= 1.666 + _state->ref_frame_data[OC_FRAME_SELF]= 1.667 + _state->ref_frame_data[OC_FRAME_IO]=NULL; 1.668 + return 0; 1.669 +} 1.670 + 1.671 +static void oc_state_ref_bufs_clear(oc_theora_state *_state){ 1.672 + _ogg_free(_state->frag_buf_offs); 1.673 + oc_aligned_free(_state->ref_frame_handle); 1.674 +} 1.675 + 1.676 + 1.677 +void oc_state_accel_init_c(oc_theora_state *_state){ 1.678 + _state->cpu_flags=0; 1.679 +#if defined(OC_STATE_USE_VTABLE) 1.680 + _state->opt_vtable.frag_copy=oc_frag_copy_c; 1.681 + _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c; 1.682 + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; 1.683 + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; 1.684 + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; 1.685 + _state->opt_vtable.idct8x8=oc_idct8x8_c; 1.686 + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; 1.687 + _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c; 1.688 + _state->opt_vtable.state_loop_filter_frag_rows= 1.689 + oc_state_loop_filter_frag_rows_c; 1.690 + _state->opt_vtable.restore_fpu=oc_restore_fpu_c; 1.691 +#endif 1.692 + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; 1.693 +} 1.694 + 1.695 + 1.696 +int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ 1.697 + int ret; 1.698 + /*First validate the parameters.*/ 1.699 + if(_info==NULL)return TH_EFAULT; 1.700 + /*The width and height of the encoded frame must be multiples of 16. 1.701 + They must also, when divided by 16, fit into a 16-bit unsigned integer. 1.702 + The displayable frame offset coordinates must fit into an 8-bit unsigned 1.703 + integer. 1.704 + Note that the offset Y in the API is specified on the opposite side from 1.705 + how it is specified in the bitstream, because the Y axis is flipped in 1.706 + the bitstream. 1.707 + The displayable frame must fit inside the encoded frame. 1.708 + The color space must be one known by the encoder.*/ 1.709 + if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| 1.710 + _info->frame_width<=0||_info->frame_width>=0x100000|| 1.711 + _info->frame_height<=0||_info->frame_height>=0x100000|| 1.712 + _info->pic_x+_info->pic_width>_info->frame_width|| 1.713 + _info->pic_y+_info->pic_height>_info->frame_height|| 1.714 + _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| 1.715 + /*Note: the following <0 comparisons may generate spurious warnings on 1.716 + platforms where enums are unsigned. 1.717 + We could cast them to unsigned and just use the following >= comparison, 1.718 + but there are a number of compilers which will mis-optimize this. 1.719 + It's better to live with the spurious warnings.*/ 1.720 + _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| 1.721 + _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ 1.722 + return TH_EINVAL; 1.723 + } 1.724 + memset(_state,0,sizeof(*_state)); 1.725 + memcpy(&_state->info,_info,sizeof(*_info)); 1.726 + /*Invert the sense of pic_y to match Theora's right-handed coordinate 1.727 + system.*/ 1.728 + _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; 1.729 + _state->frame_type=OC_UNKWN_FRAME; 1.730 + oc_state_accel_init(_state); 1.731 + ret=oc_state_frarray_init(_state); 1.732 + if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); 1.733 + if(ret<0){ 1.734 + oc_state_frarray_clear(_state); 1.735 + return ret; 1.736 + } 1.737 + /*If the keyframe_granule_shift is out of range, use the maximum allowable 1.738 + value.*/ 1.739 + if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ 1.740 + _state->info.keyframe_granule_shift=31; 1.741 + } 1.742 + _state->keyframe_num=0; 1.743 + _state->curframe_num=-1; 1.744 + /*3.2.0 streams mark the frame index instead of the frame count. 1.745 + This was changed with stream version 3.2.1 to conform to other Ogg 1.746 + codecs. 1.747 + We add an extra bias when computing granule positions for new streams.*/ 1.748 + _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); 1.749 + return 0; 1.750 +} 1.751 + 1.752 +void oc_state_clear(oc_theora_state *_state){ 1.753 + oc_state_ref_bufs_clear(_state); 1.754 + oc_state_frarray_clear(_state); 1.755 +} 1.756 + 1.757 + 1.758 +/*Duplicates the pixels on the border of the image plane out into the 1.759 + surrounding padding for use by unrestricted motion vectors. 1.760 + This function only adds the left and right borders, and only for the fragment 1.761 + rows specified. 1.762 + _refi: The index of the reference buffer to pad. 1.763 + _pli: The color plane. 1.764 + _y0: The Y coordinate of the first row to pad. 1.765 + _yend: The Y coordinate of the row to stop padding at.*/ 1.766 +void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, 1.767 + int _y0,int _yend){ 1.768 + th_img_plane *iplane; 1.769 + unsigned char *apix; 1.770 + unsigned char *bpix; 1.771 + unsigned char *epix; 1.772 + int stride; 1.773 + int hpadding; 1.774 + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); 1.775 + iplane=_state->ref_frame_bufs[_refi]+_pli; 1.776 + stride=iplane->stride; 1.777 + apix=iplane->data+_y0*(ptrdiff_t)stride; 1.778 + bpix=apix+iplane->width-1; 1.779 + epix=iplane->data+_yend*(ptrdiff_t)stride; 1.780 + /*Note the use of != instead of <, which allows the stride to be negative.*/ 1.781 + while(apix!=epix){ 1.782 + memset(apix-hpadding,apix[0],hpadding); 1.783 + memset(bpix+1,bpix[0],hpadding); 1.784 + apix+=stride; 1.785 + bpix+=stride; 1.786 + } 1.787 +} 1.788 + 1.789 +/*Duplicates the pixels on the border of the image plane out into the 1.790 + surrounding padding for use by unrestricted motion vectors. 1.791 + This function only adds the top and bottom borders, and must be called after 1.792 + the left and right borders are added. 1.793 + _refi: The index of the reference buffer to pad. 1.794 + _pli: The color plane.*/ 1.795 +void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ 1.796 + th_img_plane *iplane; 1.797 + unsigned char *apix; 1.798 + unsigned char *bpix; 1.799 + unsigned char *epix; 1.800 + int stride; 1.801 + int hpadding; 1.802 + int vpadding; 1.803 + int fullw; 1.804 + hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); 1.805 + vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); 1.806 + iplane=_state->ref_frame_bufs[_refi]+_pli; 1.807 + stride=iplane->stride; 1.808 + fullw=iplane->width+(hpadding<<1); 1.809 + apix=iplane->data-hpadding; 1.810 + bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; 1.811 + epix=apix-stride*(ptrdiff_t)vpadding; 1.812 + while(apix!=epix){ 1.813 + memcpy(apix-stride,apix,fullw); 1.814 + memcpy(bpix+stride,bpix,fullw); 1.815 + apix-=stride; 1.816 + bpix+=stride; 1.817 + } 1.818 +} 1.819 + 1.820 +/*Duplicates the pixels on the border of the given reference image out into 1.821 + the surrounding padding for use by unrestricted motion vectors. 1.822 + _state: The context containing the reference buffers. 1.823 + _refi: The index of the reference buffer to pad.*/ 1.824 +void oc_state_borders_fill(oc_theora_state *_state,int _refi){ 1.825 + int pli; 1.826 + for(pli=0;pli<3;pli++){ 1.827 + oc_state_borders_fill_rows(_state,_refi,pli,0, 1.828 + _state->ref_frame_bufs[_refi][pli].height); 1.829 + oc_state_borders_fill_caps(_state,_refi,pli); 1.830 + } 1.831 +} 1.832 + 1.833 +/*Determines the offsets in an image buffer to use for motion compensation. 1.834 + _state: The Theora state the offsets are to be computed with. 1.835 + _offsets: Returns the offset for the buffer(s). 1.836 + _offsets[0] is always set. 1.837 + _offsets[1] is set if the motion vector has non-zero fractional 1.838 + components. 1.839 + _pli: The color plane index. 1.840 + _mv: The motion vector. 1.841 + Return: The number of offsets returned: 1 or 2.*/ 1.842 +int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], 1.843 + int _pli,oc_mv _mv){ 1.844 + /*Here is a brief description of how Theora handles motion vectors: 1.845 + Motion vector components are specified to half-pixel accuracy in 1.846 + undecimated directions of each plane, and quarter-pixel accuracy in 1.847 + decimated directions. 1.848 + Integer parts are extracted by dividing (not shifting) by the 1.849 + appropriate amount, with truncation towards zero. 1.850 + These integer values are used to calculate the first offset. 1.851 + 1.852 + If either of the fractional parts are non-zero, then a second offset is 1.853 + computed. 1.854 + No third or fourth offsets are computed, even if both components have 1.855 + non-zero fractional parts. 1.856 + The second offset is computed by dividing (not shifting) by the 1.857 + appropriate amount, always truncating _away_ from zero.*/ 1.858 +#if 0 1.859 + /*This version of the code doesn't use any tables, but is slower.*/ 1.860 + int ystride; 1.861 + int xprec; 1.862 + int yprec; 1.863 + int xfrac; 1.864 + int yfrac; 1.865 + int offs; 1.866 + int dx; 1.867 + int dy; 1.868 + ystride=_state->ref_ystride[_pli]; 1.869 + /*These two variables decide whether we are in half- or quarter-pixel 1.870 + precision in each component.*/ 1.871 + xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); 1.872 + yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); 1.873 + dx=OC_MV_X(_mv); 1.874 + dy=OC_MV_Y(_mv); 1.875 + /*These two variables are either 0 if all the fractional bits are zero or -1 1.876 + if any of them are non-zero.*/ 1.877 + xfrac=OC_SIGNMASK(-(dx&(xprec|1))); 1.878 + yfrac=OC_SIGNMASK(-(dy&(yprec|1))); 1.879 + offs=(dx>>xprec)+(dy>>yprec)*ystride; 1.880 + if(xfrac||yfrac){ 1.881 + int xmask; 1.882 + int ymask; 1.883 + xmask=OC_SIGNMASK(dx); 1.884 + ymask=OC_SIGNMASK(dy); 1.885 + yfrac&=ystride; 1.886 + _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); 1.887 + _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); 1.888 + return 2; 1.889 + } 1.890 + else{ 1.891 + _offsets[0]=offs; 1.892 + return 1; 1.893 + } 1.894 +#else 1.895 + /*Using tables simplifies the code, and there's enough arithmetic to hide the 1.896 + latencies of the memory references.*/ 1.897 + static const signed char OC_MVMAP[2][64]={ 1.898 + { 1.899 + -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, 1.900 + -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, 1.901 + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 1.902 + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 1.903 + }, 1.904 + { 1.905 + -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, 1.906 + -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, 1.907 + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1.908 + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 1.909 + } 1.910 + }; 1.911 + static const signed char OC_MVMAP2[2][64]={ 1.912 + { 1.913 + -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 1.914 + 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 1.915 + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1.916 + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 1.917 + }, 1.918 + { 1.919 + -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 1.920 + 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 1.921 + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1.922 + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 1.923 + } 1.924 + }; 1.925 + int ystride; 1.926 + int qpx; 1.927 + int qpy; 1.928 + int mx; 1.929 + int my; 1.930 + int mx2; 1.931 + int my2; 1.932 + int offs; 1.933 + int dx; 1.934 + int dy; 1.935 + ystride=_state->ref_ystride[_pli]; 1.936 + qpy=_pli!=0&&!(_state->info.pixel_fmt&2); 1.937 + dx=OC_MV_X(_mv); 1.938 + dy=OC_MV_Y(_mv); 1.939 + my=OC_MVMAP[qpy][dy+31]; 1.940 + my2=OC_MVMAP2[qpy][dy+31]; 1.941 + qpx=_pli!=0&&!(_state->info.pixel_fmt&1); 1.942 + mx=OC_MVMAP[qpx][dx+31]; 1.943 + mx2=OC_MVMAP2[qpx][dx+31]; 1.944 + offs=my*ystride+mx; 1.945 + if(mx2||my2){ 1.946 + _offsets[1]=offs+my2*ystride+mx2; 1.947 + _offsets[0]=offs; 1.948 + return 2; 1.949 + } 1.950 + _offsets[0]=offs; 1.951 + return 1; 1.952 +#endif 1.953 +} 1.954 + 1.955 +void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, 1.956 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ 1.957 + unsigned char *dst; 1.958 + ptrdiff_t frag_buf_off; 1.959 + int ystride; 1.960 + int refi; 1.961 + /*Apply the inverse transform.*/ 1.962 + /*Special case only having a DC component.*/ 1.963 + if(_last_zzi<2){ 1.964 + ogg_int16_t p; 1.965 + int ci; 1.966 + /*We round this dequant product (and not any of the others) because there's 1.967 + no iDCT rounding.*/ 1.968 + p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); 1.969 + /*LOOP VECTORIZES.*/ 1.970 + for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p; 1.971 + } 1.972 + else{ 1.973 + /*First, dequantize the DC coefficient.*/ 1.974 + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); 1.975 + oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi); 1.976 + } 1.977 + /*Fill in the target buffer.*/ 1.978 + frag_buf_off=_state->frag_buf_offs[_fragi]; 1.979 + refi=_state->frags[_fragi].refi; 1.980 + ystride=_state->ref_ystride[_pli]; 1.981 + dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; 1.982 + if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64); 1.983 + else{ 1.984 + const unsigned char *ref; 1.985 + int mvoffsets[2]; 1.986 + ref=_state->ref_frame_data[refi]+frag_buf_off; 1.987 + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, 1.988 + _state->frag_mvs[_fragi])>1){ 1.989 + oc_frag_recon_inter2(_state, 1.990 + dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64); 1.991 + } 1.992 + else{ 1.993 + oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); 1.994 + } 1.995 + } 1.996 +} 1.997 + 1.998 +static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){ 1.999 + int y; 1.1000 + _pix-=2; 1.1001 + for(y=0;y<8;y++){ 1.1002 + int f; 1.1003 + f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); 1.1004 + /*The _bv array is used to compute the function 1.1005 + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); 1.1006 + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ 1.1007 + f=*(_bv+(f+4>>3)); 1.1008 + _pix[1]=OC_CLAMP255(_pix[1]+f); 1.1009 + _pix[2]=OC_CLAMP255(_pix[2]-f); 1.1010 + _pix+=_ystride; 1.1011 + } 1.1012 +} 1.1013 + 1.1014 +static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){ 1.1015 + int x; 1.1016 + _pix-=_ystride*2; 1.1017 + for(x=0;x<8;x++){ 1.1018 + int f; 1.1019 + f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); 1.1020 + /*The _bv array is used to compute the function 1.1021 + f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); 1.1022 + where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ 1.1023 + f=*(_bv+(f+4>>3)); 1.1024 + _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); 1.1025 + _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); 1.1026 + } 1.1027 +} 1.1028 + 1.1029 +/*Initialize the bounding values array used by the loop filter. 1.1030 + _bv: Storage for the array. 1.1031 + _flimit: The filter limit as defined in Section 7.10 of the spec.*/ 1.1032 +void oc_loop_filter_init_c(signed char _bv[256],int _flimit){ 1.1033 + int i; 1.1034 + memset(_bv,0,sizeof(_bv[0])*256); 1.1035 + for(i=0;i<_flimit;i++){ 1.1036 + if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit); 1.1037 + _bv[127-i]=(signed char)(-i); 1.1038 + _bv[127+i]=(signed char)(i); 1.1039 + if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i); 1.1040 + } 1.1041 +} 1.1042 + 1.1043 +/*Apply the loop filter to a given set of fragment rows in the given plane. 1.1044 + The filter may be run on the bottom edge, affecting pixels in the next row of 1.1045 + fragments, so this row also needs to be available. 1.1046 + _bv: The bounding values array. 1.1047 + _refi: The index of the frame buffer to filter. 1.1048 + _pli: The color plane to filter. 1.1049 + _fragy0: The Y coordinate of the first fragment row to filter. 1.1050 + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ 1.1051 +void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, 1.1052 + signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){ 1.1053 + const oc_fragment_plane *fplane; 1.1054 + const oc_fragment *frags; 1.1055 + const ptrdiff_t *frag_buf_offs; 1.1056 + unsigned char *ref_frame_data; 1.1057 + ptrdiff_t fragi_top; 1.1058 + ptrdiff_t fragi_bot; 1.1059 + ptrdiff_t fragi0; 1.1060 + ptrdiff_t fragi0_end; 1.1061 + int ystride; 1.1062 + int nhfrags; 1.1063 + _bv+=127; 1.1064 + fplane=_state->fplanes+_pli; 1.1065 + nhfrags=fplane->nhfrags; 1.1066 + fragi_top=fplane->froffset; 1.1067 + fragi_bot=fragi_top+fplane->nfrags; 1.1068 + fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; 1.1069 + fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags; 1.1070 + ystride=_state->ref_ystride[_pli]; 1.1071 + frags=_state->frags; 1.1072 + frag_buf_offs=_state->frag_buf_offs; 1.1073 + ref_frame_data=_state->ref_frame_data[_refi]; 1.1074 + /*The following loops are constructed somewhat non-intuitively on purpose. 1.1075 + The main idea is: if a block boundary has at least one coded fragment on 1.1076 + it, the filter is applied to it. 1.1077 + However, the order that the filters are applied in matters, and VP3 chose 1.1078 + the somewhat strange ordering used below.*/ 1.1079 + while(fragi0<fragi0_end){ 1.1080 + ptrdiff_t fragi; 1.1081 + ptrdiff_t fragi_end; 1.1082 + fragi=fragi0; 1.1083 + fragi_end=fragi+nhfrags; 1.1084 + while(fragi<fragi_end){ 1.1085 + if(frags[fragi].coded){ 1.1086 + unsigned char *ref; 1.1087 + ref=ref_frame_data+frag_buf_offs[fragi]; 1.1088 + if(fragi>fragi0)loop_filter_h(ref,ystride,_bv); 1.1089 + if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); 1.1090 + if(fragi+1<fragi_end&&!frags[fragi+1].coded){ 1.1091 + loop_filter_h(ref+8,ystride,_bv); 1.1092 + } 1.1093 + if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){ 1.1094 + loop_filter_v(ref+(ystride<<3),ystride,_bv); 1.1095 + } 1.1096 + } 1.1097 + fragi++; 1.1098 + } 1.1099 + fragi0+=nhfrags; 1.1100 + } 1.1101 +} 1.1102 + 1.1103 +#if defined(OC_DUMP_IMAGES) 1.1104 +int oc_state_dump_frame(const oc_theora_state *_state,int _frame, 1.1105 + const char *_suf){ 1.1106 + /*Dump a PNG of the reconstructed image.*/ 1.1107 + png_structp png; 1.1108 + png_infop info; 1.1109 + png_bytep *image; 1.1110 + FILE *fp; 1.1111 + char fname[16]; 1.1112 + unsigned char *y_row; 1.1113 + unsigned char *u_row; 1.1114 + unsigned char *v_row; 1.1115 + unsigned char *y; 1.1116 + unsigned char *u; 1.1117 + unsigned char *v; 1.1118 + ogg_int64_t iframe; 1.1119 + ogg_int64_t pframe; 1.1120 + int y_stride; 1.1121 + int u_stride; 1.1122 + int v_stride; 1.1123 + int framei; 1.1124 + int width; 1.1125 + int height; 1.1126 + int imgi; 1.1127 + int imgj; 1.1128 + width=_state->info.frame_width; 1.1129 + height=_state->info.frame_height; 1.1130 + iframe=_state->granpos>>_state->info.keyframe_granule_shift; 1.1131 + pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); 1.1132 + sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); 1.1133 + fp=fopen(fname,"wb"); 1.1134 + if(fp==NULL)return TH_EFAULT; 1.1135 + image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); 1.1136 + if(image==NULL){ 1.1137 + fclose(fp); 1.1138 + return TH_EFAULT; 1.1139 + } 1.1140 + png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); 1.1141 + if(png==NULL){ 1.1142 + oc_free_2d(image); 1.1143 + fclose(fp); 1.1144 + return TH_EFAULT; 1.1145 + } 1.1146 + info=png_create_info_struct(png); 1.1147 + if(info==NULL){ 1.1148 + png_destroy_write_struct(&png,NULL); 1.1149 + oc_free_2d(image); 1.1150 + fclose(fp); 1.1151 + return TH_EFAULT; 1.1152 + } 1.1153 + if(setjmp(png_jmpbuf(png))){ 1.1154 + png_destroy_write_struct(&png,&info); 1.1155 + oc_free_2d(image); 1.1156 + fclose(fp); 1.1157 + return TH_EFAULT; 1.1158 + } 1.1159 + framei=_state->ref_frame_idx[_frame]; 1.1160 + y_row=_state->ref_frame_bufs[framei][0].data; 1.1161 + u_row=_state->ref_frame_bufs[framei][1].data; 1.1162 + v_row=_state->ref_frame_bufs[framei][2].data; 1.1163 + y_stride=_state->ref_frame_bufs[framei][0].stride; 1.1164 + u_stride=_state->ref_frame_bufs[framei][1].stride; 1.1165 + v_stride=_state->ref_frame_bufs[framei][2].stride; 1.1166 + /*Chroma up-sampling is just done with a box filter. 1.1167 + This is very likely what will actually be used in practice on a real 1.1168 + display, and also removes one more layer to search in for the source of 1.1169 + artifacts. 1.1170 + As an added bonus, it's dead simple.*/ 1.1171 + for(imgi=height;imgi-->0;){ 1.1172 + int dc; 1.1173 + y=y_row; 1.1174 + u=u_row; 1.1175 + v=v_row; 1.1176 + for(imgj=0;imgj<6*width;){ 1.1177 + float yval; 1.1178 + float uval; 1.1179 + float vval; 1.1180 + unsigned rval; 1.1181 + unsigned gval; 1.1182 + unsigned bval; 1.1183 + /*This is intentionally slow and very accurate.*/ 1.1184 + yval=(*y-16)*(1.0F/219); 1.1185 + uval=(*u-128)*(2*(1-0.114F)/224); 1.1186 + vval=(*v-128)*(2*(1-0.299F)/224); 1.1187 + rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); 1.1188 + gval=OC_CLAMPI(0,(int)(65535*( 1.1189 + yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); 1.1190 + bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); 1.1191 + image[imgi][imgj++]=(unsigned char)(rval>>8); 1.1192 + image[imgi][imgj++]=(unsigned char)(rval&0xFF); 1.1193 + image[imgi][imgj++]=(unsigned char)(gval>>8); 1.1194 + image[imgi][imgj++]=(unsigned char)(gval&0xFF); 1.1195 + image[imgi][imgj++]=(unsigned char)(bval>>8); 1.1196 + image[imgi][imgj++]=(unsigned char)(bval&0xFF); 1.1197 + dc=(y-y_row&1)|(_state->info.pixel_fmt&1); 1.1198 + y++; 1.1199 + u+=dc; 1.1200 + v+=dc; 1.1201 + } 1.1202 + dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); 1.1203 + y_row+=y_stride; 1.1204 + u_row+=dc&u_stride; 1.1205 + v_row+=dc&v_stride; 1.1206 + } 1.1207 + png_init_io(png,fp); 1.1208 + png_set_compression_level(png,Z_BEST_COMPRESSION); 1.1209 + png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, 1.1210 + PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); 1.1211 + switch(_state->info.colorspace){ 1.1212 + case TH_CS_ITU_REC_470M:{ 1.1213 + png_set_gAMA(png,info,2.2); 1.1214 + png_set_cHRM_fixed(png,info,31006,31616, 1.1215 + 67000,32000,21000,71000,14000,8000); 1.1216 + }break; 1.1217 + case TH_CS_ITU_REC_470BG:{ 1.1218 + png_set_gAMA(png,info,2.67); 1.1219 + png_set_cHRM_fixed(png,info,31271,32902, 1.1220 + 64000,33000,29000,60000,15000,6000); 1.1221 + }break; 1.1222 + default:break; 1.1223 + } 1.1224 + png_set_pHYs(png,info,_state->info.aspect_numerator, 1.1225 + _state->info.aspect_denominator,0); 1.1226 + png_set_rows(png,info,image); 1.1227 + png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); 1.1228 + png_write_end(png,info); 1.1229 + png_destroy_write_struct(&png,&info); 1.1230 + oc_free_2d(image); 1.1231 + fclose(fp); 1.1232 + return 0; 1.1233 +} 1.1234 +#endif 1.1235 + 1.1236 + 1.1237 + 1.1238 +ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ 1.1239 + oc_theora_state *state; 1.1240 + state=(oc_theora_state *)_encdec; 1.1241 + if(_granpos>=0){ 1.1242 + ogg_int64_t iframe; 1.1243 + ogg_int64_t pframe; 1.1244 + iframe=_granpos>>state->info.keyframe_granule_shift; 1.1245 + pframe=_granpos-(iframe<<state->info.keyframe_granule_shift); 1.1246 + /*3.2.0 streams store the frame index in the granule position. 1.1247 + 3.2.1 and later store the frame count. 1.1248 + We return the index, so adjust the value if we have a 3.2.1 or later 1.1249 + stream.*/ 1.1250 + return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); 1.1251 + } 1.1252 + return -1; 1.1253 +} 1.1254 + 1.1255 +double th_granule_time(void *_encdec,ogg_int64_t _granpos){ 1.1256 + oc_theora_state *state; 1.1257 + state=(oc_theora_state *)_encdec; 1.1258 + if(_granpos>=0){ 1.1259 + return (th_granule_frame(_encdec, _granpos)+1)*( 1.1260 + (double)state->info.fps_denominator/state->info.fps_numerator); 1.1261 + } 1.1262 + return -1; 1.1263 +}