michael@0: /******************************************************************** michael@0: * * michael@0: * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * michael@0: * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * michael@0: * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * michael@0: * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * michael@0: * * michael@0: * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * michael@0: * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * michael@0: * * michael@0: ******************************************************************** michael@0: michael@0: function: michael@0: last mod: $Id: state.c 17576 2010-10-29 01:07:51Z tterribe $ michael@0: michael@0: ********************************************************************/ michael@0: michael@0: #include michael@0: #include michael@0: #include "state.h" michael@0: #if defined(OC_DUMP_IMAGES) michael@0: # include michael@0: # include "png.h" michael@0: #endif michael@0: michael@0: /*The function used to fill in the chroma plane motion vectors for a macro michael@0: block when 4 different motion vectors are specified in the luma plane. michael@0: This version is for use with chroma decimated in the X and Y directions michael@0: (4:2:0). michael@0: _cbmvs: The chroma block-level motion vectors to fill in. michael@0: _lbmvs: The luma block-level motion vectors.*/ michael@0: static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ michael@0: int dx; michael@0: int dy; michael@0: dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]) michael@0: +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); michael@0: dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]) michael@0: +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); michael@0: _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2)); michael@0: } michael@0: michael@0: /*The function used to fill in the chroma plane motion vectors for a macro michael@0: block when 4 different motion vectors are specified in the luma plane. michael@0: This version is for use with chroma decimated in the Y direction. michael@0: _cbmvs: The chroma block-level motion vectors to fill in. michael@0: _lbmvs: The luma block-level motion vectors.*/ michael@0: static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ michael@0: int dx; michael@0: int dy; michael@0: dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]); michael@0: dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]); michael@0: _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); michael@0: dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]); michael@0: dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]); michael@0: _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); michael@0: } michael@0: michael@0: /*The function used to fill in the chroma plane motion vectors for a macro michael@0: block when 4 different motion vectors are specified in the luma plane. michael@0: This version is for use with chroma decimated in the X direction (4:2:2). michael@0: _cbmvs: The chroma block-level motion vectors to fill in. michael@0: _lbmvs: The luma block-level motion vectors.*/ michael@0: static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ michael@0: int dx; michael@0: int dy; michael@0: dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]); michael@0: dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]); michael@0: _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); michael@0: dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); michael@0: dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); michael@0: _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); michael@0: } michael@0: michael@0: /*The function used to fill in the chroma plane motion vectors for a macro michael@0: block when 4 different motion vectors are specified in the luma plane. michael@0: This version is for use with no chroma decimation (4:4:4). michael@0: _cbmvs: The chroma block-level motion vectors to fill in. michael@0: _lmbmv: The luma macro-block level motion vector to fill in for use in michael@0: prediction. michael@0: _lbmvs: The luma block-level motion vectors.*/ michael@0: static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ michael@0: _cbmvs[0]=_lbmvs[0]; michael@0: _cbmvs[1]=_lbmvs[1]; michael@0: _cbmvs[2]=_lbmvs[2]; michael@0: _cbmvs[3]=_lbmvs[3]; michael@0: } michael@0: michael@0: /*A table of functions used to fill in the chroma plane motion vectors for a michael@0: macro block when 4 different motion vectors are specified in the luma michael@0: plane.*/ michael@0: const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ michael@0: (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, michael@0: (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, michael@0: (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, michael@0: (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 michael@0: }; michael@0: michael@0: michael@0: michael@0: /*Returns the fragment index of the top-left block in a macro block. michael@0: This can be used to test whether or not the whole macro block is valid. michael@0: _sb_map: The super block map. michael@0: _quadi: The quadrant number. michael@0: Return: The index of the fragment of the upper left block in the macro michael@0: block, or -1 if the block lies outside the coded frame.*/ michael@0: static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ michael@0: /*It so happens that under the Hilbert curve ordering described below, the michael@0: upper-left block in each macro block is at index 0, except in macro block michael@0: 3, where it is at index 2.*/ michael@0: return _sb_map[_quadi][_quadi&_quadi<<1]; michael@0: } michael@0: michael@0: /*Fills in the mapping from block positions to fragment numbers for a single michael@0: color plane. michael@0: This function also fills in the "valid" flag of each quadrant in the super michael@0: block flags. michael@0: _sb_maps: The array of super block maps for the color plane. michael@0: _sb_flags: The array of super block flags for the color plane. michael@0: _frag0: The index of the first fragment in the plane. michael@0: _hfrags: The number of horizontal fragments in a coded frame. michael@0: _vfrags: The number of vertical fragments in a coded frame.*/ michael@0: static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], michael@0: oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ michael@0: /*Contains the (macro_block,block) indices for a 4x4 grid of michael@0: fragments. michael@0: The pattern is a 4x4 Hilbert space-filling curve. michael@0: A Hilbert curve has the nice property that as the curve grows larger, its michael@0: fractal dimension approaches 2. michael@0: The intuition is that nearby blocks in the curve are also close spatially, michael@0: with the previous element always an immediate neighbor, so that runs of michael@0: blocks should be well correlated.*/ michael@0: static const int SB_MAP[4][4][2]={ michael@0: {{0,0},{0,1},{3,2},{3,3}}, michael@0: {{0,3},{0,2},{3,1},{3,0}}, michael@0: {{1,0},{1,3},{2,0},{2,3}}, michael@0: {{1,1},{1,2},{2,1},{2,2}} michael@0: }; michael@0: ptrdiff_t yfrag; michael@0: unsigned sbi; michael@0: int y; michael@0: sbi=0; michael@0: yfrag=_frag0; michael@0: for(y=0;;y+=4){ michael@0: int imax; michael@0: int x; michael@0: /*Figure out how many columns of blocks in this super block lie within the michael@0: image.*/ michael@0: imax=_vfrags-y; michael@0: if(imax>4)imax=4; michael@0: else if(imax<=0)break; michael@0: for(x=0;;x+=4,sbi++){ michael@0: ptrdiff_t xfrag; michael@0: int jmax; michael@0: int quadi; michael@0: int i; michael@0: /*Figure out how many rows of blocks in this super block lie within the michael@0: image.*/ michael@0: jmax=_hfrags-x; michael@0: if(jmax>4)jmax=4; michael@0: else if(jmax<=0)break; michael@0: /*By default, set all fragment indices to -1.*/ michael@0: memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi])); michael@0: /*Fill in the fragment map for this super block.*/ michael@0: xfrag=yfrag+x; michael@0: for(i=0;i=0)<nhfrags+_xfrag0+j; michael@0: } michael@0: } michael@0: michael@0: /*Fills in the chroma plane fragment maps for a macro block. michael@0: This version is for use with chroma decimated in the X and Y directions michael@0: (4:2:0). michael@0: _mb_map: The macro block map to fill. michael@0: _fplanes: The descriptions of the fragment planes. michael@0: _xfrag0: The X location of the upper-left hand fragment in the luma plane. michael@0: _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ michael@0: static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], michael@0: const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ michael@0: ptrdiff_t fragi; michael@0: _xfrag0>>=1; michael@0: _yfrag0>>=1; michael@0: fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; michael@0: _mb_map[1][0]=fragi+_fplanes[1].froffset; michael@0: _mb_map[2][0]=fragi+_fplanes[2].froffset; michael@0: } michael@0: michael@0: /*Fills in the chroma plane fragment maps for a macro block. michael@0: This version is for use with chroma decimated in the Y direction. michael@0: _mb_map: The macro block map to fill. michael@0: _fplanes: The descriptions of the fragment planes. michael@0: _xfrag0: The X location of the upper-left hand fragment in the luma plane. michael@0: _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ michael@0: static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], michael@0: const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ michael@0: ptrdiff_t fragi; michael@0: int j; michael@0: _yfrag0>>=1; michael@0: fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; michael@0: for(j=0;j<2;j++){ michael@0: _mb_map[1][j]=fragi+_fplanes[1].froffset; michael@0: _mb_map[2][j]=fragi+_fplanes[2].froffset; michael@0: fragi++; michael@0: } michael@0: } michael@0: michael@0: /*Fills in the chroma plane fragment maps for a macro block. michael@0: This version is for use with chroma decimated in the X direction (4:2:2). michael@0: _mb_map: The macro block map to fill. michael@0: _fplanes: The descriptions of the fragment planes. michael@0: _xfrag0: The X location of the upper-left hand fragment in the luma plane. michael@0: _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ michael@0: static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], michael@0: const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ michael@0: ptrdiff_t fragi; michael@0: int i; michael@0: _xfrag0>>=1; michael@0: fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; michael@0: for(i=0;i<2;i++){ michael@0: _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; michael@0: _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; michael@0: fragi+=_fplanes[1].nhfrags; michael@0: } michael@0: } michael@0: michael@0: /*Fills in the chroma plane fragment maps for a macro block. michael@0: This version is for use with no chroma decimation (4:4:4). michael@0: This uses the already filled-in luma plane values. michael@0: _mb_map: The macro block map to fill. michael@0: _fplanes: The descriptions of the fragment planes.*/ michael@0: static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], michael@0: const oc_fragment_plane _fplanes[3]){ michael@0: int k; michael@0: for(k=0;k<4;k++){ michael@0: _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; michael@0: _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; michael@0: } michael@0: } michael@0: michael@0: /*The function type used to fill in the chroma plane fragment maps for a michael@0: macro block. michael@0: _mb_map: The macro block map to fill. michael@0: _fplanes: The descriptions of the fragment planes. michael@0: _xfrag0: The X location of the upper-left hand fragment in the luma plane. michael@0: _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ michael@0: typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], michael@0: const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); michael@0: michael@0: /*A table of functions used to fill in the chroma plane fragment maps for a michael@0: macro block for each type of chrominance decimation.*/ michael@0: static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ michael@0: oc_mb_fill_cmapping00, michael@0: oc_mb_fill_cmapping01, michael@0: oc_mb_fill_cmapping10, michael@0: (oc_mb_fill_cmapping_func)oc_mb_fill_cmapping11 michael@0: }; michael@0: michael@0: /*Fills in the mapping from macro blocks to their corresponding fragment michael@0: numbers in each plane. michael@0: _mb_maps: The list of macro block maps. michael@0: _mb_modes: The list of macro block modes; macro blocks completely outside michael@0: the coded region are marked invalid. michael@0: _fplanes: The descriptions of the fragment planes. michael@0: _pixel_fmt: The chroma decimation type.*/ michael@0: static void oc_mb_create_mapping(oc_mb_map _mb_maps[], michael@0: signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ michael@0: oc_mb_fill_cmapping_func mb_fill_cmapping; michael@0: unsigned sbi; michael@0: int y; michael@0: mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; michael@0: /*Loop through the luma plane super blocks.*/ michael@0: for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ michael@0: int x; michael@0: for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ michael@0: int ymb; michael@0: /*Loop through the macro blocks in each super block in display order.*/ michael@0: for(ymb=0;ymb<2;ymb++){ michael@0: int xmb; michael@0: for(xmb=0;xmb<2;xmb++){ michael@0: unsigned mbi; michael@0: int mbx; michael@0: int mby; michael@0: mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; michael@0: mbx=x|xmb<<1; michael@0: mby=y|ymb<<1; michael@0: /*Initialize fragment indices to -1.*/ michael@0: memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); michael@0: /*Make sure this macro block is within the encoded region.*/ michael@0: if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ michael@0: _mb_modes[mbi]=OC_MODE_INVALID; michael@0: continue; michael@0: } michael@0: /*Fill in the fragment indices for the luma plane.*/ michael@0: oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); michael@0: /*Fill in the fragment indices for the chroma planes.*/ michael@0: (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /*Marks the fragments which fall all or partially outside the displayable michael@0: region of the frame. michael@0: _state: The Theora state containing the fragments to be marked.*/ michael@0: static void oc_state_border_init(oc_theora_state *_state){ michael@0: oc_fragment *frag; michael@0: oc_fragment *yfrag_end; michael@0: oc_fragment *xfrag_end; michael@0: oc_fragment_plane *fplane; michael@0: int crop_x0; michael@0: int crop_y0; michael@0: int crop_xf; michael@0: int crop_yf; michael@0: int pli; michael@0: int y; michael@0: int x; michael@0: /*The method we use here is slow, but the code is dead simple and handles michael@0: all the special cases easily. michael@0: We only ever need to do it once.*/ michael@0: /*Loop through the fragments, marking those completely outside the michael@0: displayable region and constructing a border mask for those that straddle michael@0: the border.*/ michael@0: _state->nborders=0; michael@0: yfrag_end=frag=_state->frags; michael@0: for(pli=0;pli<3;pli++){ michael@0: fplane=_state->fplanes+pli; michael@0: /*Set up the cropping rectangle for this plane.*/ michael@0: crop_x0=_state->info.pic_x; michael@0: crop_xf=_state->info.pic_x+_state->info.pic_width; michael@0: crop_y0=_state->info.pic_y; michael@0: crop_yf=_state->info.pic_y+_state->info.pic_height; michael@0: if(pli>0){ michael@0: if(!(_state->info.pixel_fmt&1)){ michael@0: crop_x0=crop_x0>>1; michael@0: crop_xf=crop_xf+1>>1; michael@0: } michael@0: if(!(_state->info.pixel_fmt&2)){ michael@0: crop_y0=crop_y0>>1; michael@0: crop_yf=crop_yf+1>>1; michael@0: } michael@0: } michael@0: y=0; michael@0: for(yfrag_end+=fplane->nfrags;fragnhfrags;frag=crop_xf||crop_y0>=crop_yf){ michael@0: frag->invalid=1; michael@0: } michael@0: /*Otherwise, check to see if it straddles the border.*/ michael@0: else if(x=crop_x0&&x+j=crop_y0&&y+i=_state->nborders){ michael@0: _state->nborders++; michael@0: _state->borders[i].mask=mask; michael@0: _state->borders[i].npixels=npixels; michael@0: } michael@0: else if(_state->borders[i].mask!=mask)continue; michael@0: frag->borderi=i; michael@0: break; michael@0: } michael@0: } michael@0: else frag->borderi=-1; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: static int oc_state_frarray_init(oc_theora_state *_state){ michael@0: int yhfrags; michael@0: int yvfrags; michael@0: int chfrags; michael@0: int cvfrags; michael@0: ptrdiff_t yfrags; michael@0: ptrdiff_t cfrags; michael@0: ptrdiff_t nfrags; michael@0: unsigned yhsbs; michael@0: unsigned yvsbs; michael@0: unsigned chsbs; michael@0: unsigned cvsbs; michael@0: unsigned ysbs; michael@0: unsigned csbs; michael@0: unsigned nsbs; michael@0: size_t nmbs; michael@0: int hdec; michael@0: int vdec; michael@0: int pli; michael@0: /*Figure out the number of fragments in each plane.*/ michael@0: /*These parameters have already been validated to be multiples of 16.*/ michael@0: yhfrags=_state->info.frame_width>>3; michael@0: yvfrags=_state->info.frame_height>>3; michael@0: hdec=!(_state->info.pixel_fmt&1); michael@0: vdec=!(_state->info.pixel_fmt&2); michael@0: chfrags=yhfrags+hdec>>hdec; michael@0: cvfrags=yvfrags+vdec>>vdec; michael@0: yfrags=yhfrags*(ptrdiff_t)yvfrags; michael@0: cfrags=chfrags*(ptrdiff_t)cvfrags; michael@0: nfrags=yfrags+2*cfrags; michael@0: /*Figure out the number of super blocks in each plane.*/ michael@0: yhsbs=yhfrags+3>>2; michael@0: yvsbs=yvfrags+3>>2; michael@0: chsbs=chfrags+3>>2; michael@0: cvsbs=cvfrags+3>>2; michael@0: ysbs=yhsbs*yvsbs; michael@0: csbs=chsbs*cvsbs; michael@0: nsbs=ysbs+2*csbs; michael@0: nmbs=(size_t)ysbs<<2; michael@0: /*Check for overflow. michael@0: We support the ridiculous upper limits of the specification (1048560 by michael@0: 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, michael@0: but for those with 32-bit pointers (or smaller!) we have to check. michael@0: If the caller wants to prevent denial-of-service by imposing a more michael@0: reasonable upper limit on the size of attempted allocations, they must do michael@0: so themselves; we have no platform independent way to determine how much michael@0: system memory there is nor an application-independent way to decide what a michael@0: "reasonable" allocation is.*/ michael@0: if(yfrags/yhfrags!=yvfrags||2*cfrags>2!=ysbs){ michael@0: return TH_EIMPL; michael@0: } michael@0: /*Initialize the fragment array.*/ michael@0: _state->fplanes[0].nhfrags=yhfrags; michael@0: _state->fplanes[0].nvfrags=yvfrags; michael@0: _state->fplanes[0].froffset=0; michael@0: _state->fplanes[0].nfrags=yfrags; michael@0: _state->fplanes[0].nhsbs=yhsbs; michael@0: _state->fplanes[0].nvsbs=yvsbs; michael@0: _state->fplanes[0].sboffset=0; michael@0: _state->fplanes[0].nsbs=ysbs; michael@0: _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; michael@0: _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; michael@0: _state->fplanes[1].froffset=yfrags; michael@0: _state->fplanes[2].froffset=yfrags+cfrags; michael@0: _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; michael@0: _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; michael@0: _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; michael@0: _state->fplanes[1].sboffset=ysbs; michael@0: _state->fplanes[2].sboffset=ysbs+csbs; michael@0: _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; michael@0: _state->nfrags=nfrags; michael@0: _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); michael@0: _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); michael@0: _state->nsbs=nsbs; michael@0: _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); michael@0: _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); michael@0: _state->nhmbs=yhsbs<<1; michael@0: _state->nvmbs=yvsbs<<1; michael@0: _state->nmbs=nmbs; michael@0: _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); michael@0: _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); michael@0: _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); michael@0: if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| michael@0: _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| michael@0: _state->coded_fragis==NULL){ michael@0: return TH_EFAULT; michael@0: } michael@0: /*Create the mapping from super blocks to fragments.*/ michael@0: for(pli=0;pli<3;pli++){ michael@0: oc_fragment_plane *fplane; michael@0: fplane=_state->fplanes+pli; michael@0: oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, michael@0: _state->sb_flags+fplane->sboffset,fplane->froffset, michael@0: fplane->nhfrags,fplane->nvfrags); michael@0: } michael@0: /*Create the mapping from macro blocks to fragments.*/ michael@0: oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, michael@0: _state->fplanes,_state->info.pixel_fmt); michael@0: /*Initialize the invalid and borderi fields of each fragment.*/ michael@0: oc_state_border_init(_state); michael@0: return 0; michael@0: } michael@0: michael@0: static void oc_state_frarray_clear(oc_theora_state *_state){ michael@0: _ogg_free(_state->coded_fragis); michael@0: _ogg_free(_state->mb_modes); michael@0: _ogg_free(_state->mb_maps); michael@0: _ogg_free(_state->sb_flags); michael@0: _ogg_free(_state->sb_maps); michael@0: _ogg_free(_state->frag_mvs); michael@0: _ogg_free(_state->frags); michael@0: } michael@0: michael@0: michael@0: /*Initializes the buffers used for reconstructed frames. michael@0: These buffers are padded with 16 extra pixels on each side, to allow michael@0: unrestricted motion vectors without special casing the boundary. michael@0: If chroma is decimated in either direction, the padding is reduced by a michael@0: factor of 2 on the appropriate sides. michael@0: _nrefs: The number of reference buffers to init; must be in the range 3...6.*/ michael@0: static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ michael@0: th_info *info; michael@0: unsigned char *ref_frame_data; michael@0: size_t ref_frame_data_sz; michael@0: size_t ref_frame_sz; michael@0: size_t yplane_sz; michael@0: size_t cplane_sz; michael@0: int yhstride; michael@0: int yheight; michael@0: int chstride; michael@0: int cheight; michael@0: ptrdiff_t align; michael@0: ptrdiff_t yoffset; michael@0: ptrdiff_t coffset; michael@0: ptrdiff_t *frag_buf_offs; michael@0: ptrdiff_t fragi; michael@0: int hdec; michael@0: int vdec; michael@0: int rfi; michael@0: int pli; michael@0: if(_nrefs<3||_nrefs>6)return TH_EINVAL; michael@0: info=&_state->info; michael@0: /*Compute the image buffer parameters for each plane.*/ michael@0: hdec=!(info->pixel_fmt&1); michael@0: vdec=!(info->pixel_fmt&2); michael@0: yhstride=info->frame_width+2*OC_UMV_PADDING; michael@0: yheight=info->frame_height+2*OC_UMV_PADDING; michael@0: /*Require 16-byte aligned rows in the chroma planes.*/ michael@0: chstride=(yhstride>>hdec)+15&~15; michael@0: cheight=yheight>>vdec; michael@0: yplane_sz=yhstride*(size_t)yheight; michael@0: cplane_sz=chstride*(size_t)cheight; michael@0: yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; michael@0: coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; michael@0: /*Although we guarantee the rows of the chroma planes are a multiple of 16 michael@0: bytes, the initial padding on the first row may only be 8 bytes. michael@0: Compute the offset needed to the actual image data to a multiple of 16.*/ michael@0: align=-coffset&15; michael@0: ref_frame_sz=yplane_sz+2*cplane_sz+16; michael@0: ref_frame_data_sz=_nrefs*ref_frame_sz; michael@0: /*Check for overflow. michael@0: The same caveats apply as for oc_state_frarray_init().*/ michael@0: if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16frag_buf_offs= michael@0: _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); michael@0: if(ref_frame_data==NULL||frag_buf_offs==NULL){ michael@0: _ogg_free(frag_buf_offs); michael@0: oc_aligned_free(ref_frame_data); michael@0: return TH_EFAULT; michael@0: } michael@0: /*Set up the width, height and stride for the image buffers.*/ michael@0: _state->ref_frame_bufs[0][0].width=info->frame_width; michael@0: _state->ref_frame_bufs[0][0].height=info->frame_height; michael@0: _state->ref_frame_bufs[0][0].stride=yhstride; michael@0: _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= michael@0: info->frame_width>>hdec; michael@0: _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= michael@0: info->frame_height>>vdec; michael@0: _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= michael@0: chstride; michael@0: for(rfi=1;rfi<_nrefs;rfi++){ michael@0: memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], michael@0: sizeof(_state->ref_frame_bufs[0])); michael@0: } michael@0: _state->ref_frame_handle=ref_frame_data; michael@0: /*Set up the data pointers for the image buffers.*/ michael@0: for(rfi=0;rfi<_nrefs;rfi++){ michael@0: _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; michael@0: ref_frame_data+=yplane_sz+align; michael@0: _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; michael@0: ref_frame_data+=cplane_sz; michael@0: _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; michael@0: ref_frame_data+=cplane_sz+(16-align); michael@0: /*Flip the buffer upside down. michael@0: This allows us to decode Theora's bottom-up frames in their natural michael@0: order, yet return a top-down buffer with a positive stride to the user.*/ michael@0: oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], michael@0: _state->ref_frame_bufs[rfi]); michael@0: } michael@0: _state->ref_ystride[0]=-yhstride; michael@0: _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; michael@0: /*Initialize the fragment buffer offsets.*/ michael@0: ref_frame_data=_state->ref_frame_bufs[0][0].data; michael@0: fragi=0; michael@0: for(pli=0;pli<3;pli++){ michael@0: th_img_plane *iplane; michael@0: oc_fragment_plane *fplane; michael@0: unsigned char *vpix; michael@0: ptrdiff_t stride; michael@0: ptrdiff_t vfragi_end; michael@0: int nhfrags; michael@0: iplane=_state->ref_frame_bufs[0]+pli; michael@0: fplane=_state->fplanes+pli; michael@0: vpix=iplane->data; michael@0: vfragi_end=fplane->froffset+fplane->nfrags; michael@0: nhfrags=fplane->nhfrags; michael@0: stride=iplane->stride; michael@0: while(fragiref_frame_idx[OC_FRAME_GOLD]= michael@0: _state->ref_frame_idx[OC_FRAME_PREV]= michael@0: _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]= michael@0: _state->ref_frame_idx[OC_FRAME_PREV_ORIG]= michael@0: _state->ref_frame_idx[OC_FRAME_SELF]= michael@0: _state->ref_frame_idx[OC_FRAME_IO]=-1; michael@0: _state->ref_frame_data[OC_FRAME_GOLD]= michael@0: _state->ref_frame_data[OC_FRAME_PREV]= michael@0: _state->ref_frame_data[OC_FRAME_GOLD_ORIG]= michael@0: _state->ref_frame_data[OC_FRAME_PREV_ORIG]= michael@0: _state->ref_frame_data[OC_FRAME_SELF]= michael@0: _state->ref_frame_data[OC_FRAME_IO]=NULL; michael@0: return 0; michael@0: } michael@0: michael@0: static void oc_state_ref_bufs_clear(oc_theora_state *_state){ michael@0: _ogg_free(_state->frag_buf_offs); michael@0: oc_aligned_free(_state->ref_frame_handle); michael@0: } michael@0: michael@0: michael@0: void oc_state_accel_init_c(oc_theora_state *_state){ michael@0: _state->cpu_flags=0; michael@0: #if defined(OC_STATE_USE_VTABLE) michael@0: _state->opt_vtable.frag_copy=oc_frag_copy_c; michael@0: _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c; michael@0: _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; michael@0: _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; michael@0: _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; michael@0: _state->opt_vtable.idct8x8=oc_idct8x8_c; michael@0: _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; michael@0: _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c; michael@0: _state->opt_vtable.state_loop_filter_frag_rows= michael@0: oc_state_loop_filter_frag_rows_c; michael@0: _state->opt_vtable.restore_fpu=oc_restore_fpu_c; michael@0: #endif michael@0: _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; michael@0: } michael@0: michael@0: michael@0: int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ michael@0: int ret; michael@0: /*First validate the parameters.*/ michael@0: if(_info==NULL)return TH_EFAULT; michael@0: /*The width and height of the encoded frame must be multiples of 16. michael@0: They must also, when divided by 16, fit into a 16-bit unsigned integer. michael@0: The displayable frame offset coordinates must fit into an 8-bit unsigned michael@0: integer. michael@0: Note that the offset Y in the API is specified on the opposite side from michael@0: how it is specified in the bitstream, because the Y axis is flipped in michael@0: the bitstream. michael@0: The displayable frame must fit inside the encoded frame. michael@0: The color space must be one known by the encoder.*/ michael@0: if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| michael@0: _info->frame_width<=0||_info->frame_width>=0x100000|| michael@0: _info->frame_height<=0||_info->frame_height>=0x100000|| michael@0: _info->pic_x+_info->pic_width>_info->frame_width|| michael@0: _info->pic_y+_info->pic_height>_info->frame_height|| michael@0: _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| michael@0: /*Note: the following <0 comparisons may generate spurious warnings on michael@0: platforms where enums are unsigned. michael@0: We could cast them to unsigned and just use the following >= comparison, michael@0: but there are a number of compilers which will mis-optimize this. michael@0: It's better to live with the spurious warnings.*/ michael@0: _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| michael@0: _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS){ michael@0: return TH_EINVAL; michael@0: } michael@0: memset(_state,0,sizeof(*_state)); michael@0: memcpy(&_state->info,_info,sizeof(*_info)); michael@0: /*Invert the sense of pic_y to match Theora's right-handed coordinate michael@0: system.*/ michael@0: _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; michael@0: _state->frame_type=OC_UNKWN_FRAME; michael@0: oc_state_accel_init(_state); michael@0: ret=oc_state_frarray_init(_state); michael@0: if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); michael@0: if(ret<0){ michael@0: oc_state_frarray_clear(_state); michael@0: return ret; michael@0: } michael@0: /*If the keyframe_granule_shift is out of range, use the maximum allowable michael@0: value.*/ michael@0: if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ michael@0: _state->info.keyframe_granule_shift=31; michael@0: } michael@0: _state->keyframe_num=0; michael@0: _state->curframe_num=-1; michael@0: /*3.2.0 streams mark the frame index instead of the frame count. michael@0: This was changed with stream version 3.2.1 to conform to other Ogg michael@0: codecs. michael@0: We add an extra bias when computing granule positions for new streams.*/ michael@0: _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); michael@0: return 0; michael@0: } michael@0: michael@0: void oc_state_clear(oc_theora_state *_state){ michael@0: oc_state_ref_bufs_clear(_state); michael@0: oc_state_frarray_clear(_state); michael@0: } michael@0: michael@0: michael@0: /*Duplicates the pixels on the border of the image plane out into the michael@0: surrounding padding for use by unrestricted motion vectors. michael@0: This function only adds the left and right borders, and only for the fragment michael@0: rows specified. michael@0: _refi: The index of the reference buffer to pad. michael@0: _pli: The color plane. michael@0: _y0: The Y coordinate of the first row to pad. michael@0: _yend: The Y coordinate of the row to stop padding at.*/ michael@0: void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, michael@0: int _y0,int _yend){ michael@0: th_img_plane *iplane; michael@0: unsigned char *apix; michael@0: unsigned char *bpix; michael@0: unsigned char *epix; michael@0: int stride; michael@0: int hpadding; michael@0: hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); michael@0: iplane=_state->ref_frame_bufs[_refi]+_pli; michael@0: stride=iplane->stride; michael@0: apix=iplane->data+_y0*(ptrdiff_t)stride; michael@0: bpix=apix+iplane->width-1; michael@0: epix=iplane->data+_yend*(ptrdiff_t)stride; michael@0: /*Note the use of != instead of <, which allows the stride to be negative.*/ michael@0: while(apix!=epix){ michael@0: memset(apix-hpadding,apix[0],hpadding); michael@0: memset(bpix+1,bpix[0],hpadding); michael@0: apix+=stride; michael@0: bpix+=stride; michael@0: } michael@0: } michael@0: michael@0: /*Duplicates the pixels on the border of the image plane out into the michael@0: surrounding padding for use by unrestricted motion vectors. michael@0: This function only adds the top and bottom borders, and must be called after michael@0: the left and right borders are added. michael@0: _refi: The index of the reference buffer to pad. michael@0: _pli: The color plane.*/ michael@0: void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ michael@0: th_img_plane *iplane; michael@0: unsigned char *apix; michael@0: unsigned char *bpix; michael@0: unsigned char *epix; michael@0: int stride; michael@0: int hpadding; michael@0: int vpadding; michael@0: int fullw; michael@0: hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); michael@0: vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); michael@0: iplane=_state->ref_frame_bufs[_refi]+_pli; michael@0: stride=iplane->stride; michael@0: fullw=iplane->width+(hpadding<<1); michael@0: apix=iplane->data-hpadding; michael@0: bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; michael@0: epix=apix-stride*(ptrdiff_t)vpadding; michael@0: while(apix!=epix){ michael@0: memcpy(apix-stride,apix,fullw); michael@0: memcpy(bpix+stride,bpix,fullw); michael@0: apix-=stride; michael@0: bpix+=stride; michael@0: } michael@0: } michael@0: michael@0: /*Duplicates the pixels on the border of the given reference image out into michael@0: the surrounding padding for use by unrestricted motion vectors. michael@0: _state: The context containing the reference buffers. michael@0: _refi: The index of the reference buffer to pad.*/ michael@0: void oc_state_borders_fill(oc_theora_state *_state,int _refi){ michael@0: int pli; michael@0: for(pli=0;pli<3;pli++){ michael@0: oc_state_borders_fill_rows(_state,_refi,pli,0, michael@0: _state->ref_frame_bufs[_refi][pli].height); michael@0: oc_state_borders_fill_caps(_state,_refi,pli); michael@0: } michael@0: } michael@0: michael@0: /*Determines the offsets in an image buffer to use for motion compensation. michael@0: _state: The Theora state the offsets are to be computed with. michael@0: _offsets: Returns the offset for the buffer(s). michael@0: _offsets[0] is always set. michael@0: _offsets[1] is set if the motion vector has non-zero fractional michael@0: components. michael@0: _pli: The color plane index. michael@0: _mv: The motion vector. michael@0: Return: The number of offsets returned: 1 or 2.*/ michael@0: int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], michael@0: int _pli,oc_mv _mv){ michael@0: /*Here is a brief description of how Theora handles motion vectors: michael@0: Motion vector components are specified to half-pixel accuracy in michael@0: undecimated directions of each plane, and quarter-pixel accuracy in michael@0: decimated directions. michael@0: Integer parts are extracted by dividing (not shifting) by the michael@0: appropriate amount, with truncation towards zero. michael@0: These integer values are used to calculate the first offset. michael@0: michael@0: If either of the fractional parts are non-zero, then a second offset is michael@0: computed. michael@0: No third or fourth offsets are computed, even if both components have michael@0: non-zero fractional parts. michael@0: The second offset is computed by dividing (not shifting) by the michael@0: appropriate amount, always truncating _away_ from zero.*/ michael@0: #if 0 michael@0: /*This version of the code doesn't use any tables, but is slower.*/ michael@0: int ystride; michael@0: int xprec; michael@0: int yprec; michael@0: int xfrac; michael@0: int yfrac; michael@0: int offs; michael@0: int dx; michael@0: int dy; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: /*These two variables decide whether we are in half- or quarter-pixel michael@0: precision in each component.*/ michael@0: xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); michael@0: yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); michael@0: dx=OC_MV_X(_mv); michael@0: dy=OC_MV_Y(_mv); michael@0: /*These two variables are either 0 if all the fractional bits are zero or -1 michael@0: if any of them are non-zero.*/ michael@0: xfrac=OC_SIGNMASK(-(dx&(xprec|1))); michael@0: yfrac=OC_SIGNMASK(-(dy&(yprec|1))); michael@0: offs=(dx>>xprec)+(dy>>yprec)*ystride; michael@0: if(xfrac||yfrac){ michael@0: int xmask; michael@0: int ymask; michael@0: xmask=OC_SIGNMASK(dx); michael@0: ymask=OC_SIGNMASK(dy); michael@0: yfrac&=ystride; michael@0: _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); michael@0: _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); michael@0: return 2; michael@0: } michael@0: else{ michael@0: _offsets[0]=offs; michael@0: return 1; michael@0: } michael@0: #else michael@0: /*Using tables simplifies the code, and there's enough arithmetic to hide the michael@0: latencies of the memory references.*/ michael@0: static const signed char OC_MVMAP[2][64]={ michael@0: { michael@0: -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, michael@0: -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, michael@0: 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, michael@0: 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 michael@0: }, michael@0: { michael@0: -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, michael@0: -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, michael@0: 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, michael@0: 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 michael@0: } michael@0: }; michael@0: static const signed char OC_MVMAP2[2][64]={ michael@0: { michael@0: -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, michael@0: 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, michael@0: 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, michael@0: 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 michael@0: }, michael@0: { michael@0: -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, michael@0: 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, michael@0: 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, michael@0: 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 michael@0: } michael@0: }; michael@0: int ystride; michael@0: int qpx; michael@0: int qpy; michael@0: int mx; michael@0: int my; michael@0: int mx2; michael@0: int my2; michael@0: int offs; michael@0: int dx; michael@0: int dy; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: qpy=_pli!=0&&!(_state->info.pixel_fmt&2); michael@0: dx=OC_MV_X(_mv); michael@0: dy=OC_MV_Y(_mv); michael@0: my=OC_MVMAP[qpy][dy+31]; michael@0: my2=OC_MVMAP2[qpy][dy+31]; michael@0: qpx=_pli!=0&&!(_state->info.pixel_fmt&1); michael@0: mx=OC_MVMAP[qpx][dx+31]; michael@0: mx2=OC_MVMAP2[qpx][dx+31]; michael@0: offs=my*ystride+mx; michael@0: if(mx2||my2){ michael@0: _offsets[1]=offs+my2*ystride+mx2; michael@0: _offsets[0]=offs; michael@0: return 2; michael@0: } michael@0: _offsets[0]=offs; michael@0: return 1; michael@0: #endif michael@0: } michael@0: michael@0: void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, michael@0: int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ michael@0: unsigned char *dst; michael@0: ptrdiff_t frag_buf_off; michael@0: int ystride; michael@0: int refi; michael@0: /*Apply the inverse transform.*/ michael@0: /*Special case only having a DC component.*/ michael@0: if(_last_zzi<2){ michael@0: ogg_int16_t p; michael@0: int ci; michael@0: /*We round this dequant product (and not any of the others) because there's michael@0: no iDCT rounding.*/ michael@0: p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); michael@0: /*LOOP VECTORIZES.*/ michael@0: for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p; michael@0: } michael@0: else{ michael@0: /*First, dequantize the DC coefficient.*/ michael@0: _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); michael@0: oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi); michael@0: } michael@0: /*Fill in the target buffer.*/ michael@0: frag_buf_off=_state->frag_buf_offs[_fragi]; michael@0: refi=_state->frags[_fragi].refi; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; michael@0: if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64); michael@0: else{ michael@0: const unsigned char *ref; michael@0: int mvoffsets[2]; michael@0: ref=_state->ref_frame_data[refi]+frag_buf_off; michael@0: if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, michael@0: _state->frag_mvs[_fragi])>1){ michael@0: oc_frag_recon_inter2(_state, michael@0: dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64); michael@0: } michael@0: else{ michael@0: oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){ michael@0: int y; michael@0: _pix-=2; michael@0: for(y=0;y<8;y++){ michael@0: int f; michael@0: f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); michael@0: /*The _bv array is used to compute the function michael@0: f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); michael@0: where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ michael@0: f=*(_bv+(f+4>>3)); michael@0: _pix[1]=OC_CLAMP255(_pix[1]+f); michael@0: _pix[2]=OC_CLAMP255(_pix[2]-f); michael@0: _pix+=_ystride; michael@0: } michael@0: } michael@0: michael@0: static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){ michael@0: int x; michael@0: _pix-=_ystride*2; michael@0: for(x=0;x<8;x++){ michael@0: int f; michael@0: f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); michael@0: /*The _bv array is used to compute the function michael@0: f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); michael@0: where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ michael@0: f=*(_bv+(f+4>>3)); michael@0: _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); michael@0: _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); michael@0: } michael@0: } michael@0: michael@0: /*Initialize the bounding values array used by the loop filter. michael@0: _bv: Storage for the array. michael@0: _flimit: The filter limit as defined in Section 7.10 of the spec.*/ michael@0: void oc_loop_filter_init_c(signed char _bv[256],int _flimit){ michael@0: int i; michael@0: memset(_bv,0,sizeof(_bv[0])*256); michael@0: for(i=0;i<_flimit;i++){ michael@0: if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit); michael@0: _bv[127-i]=(signed char)(-i); michael@0: _bv[127+i]=(signed char)(i); michael@0: if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i); michael@0: } michael@0: } michael@0: michael@0: /*Apply the loop filter to a given set of fragment rows in the given plane. michael@0: The filter may be run on the bottom edge, affecting pixels in the next row of michael@0: fragments, so this row also needs to be available. michael@0: _bv: The bounding values array. michael@0: _refi: The index of the frame buffer to filter. michael@0: _pli: The color plane to filter. michael@0: _fragy0: The Y coordinate of the first fragment row to filter. michael@0: _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ michael@0: void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, michael@0: signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){ michael@0: const oc_fragment_plane *fplane; michael@0: const oc_fragment *frags; michael@0: const ptrdiff_t *frag_buf_offs; michael@0: unsigned char *ref_frame_data; michael@0: ptrdiff_t fragi_top; michael@0: ptrdiff_t fragi_bot; michael@0: ptrdiff_t fragi0; michael@0: ptrdiff_t fragi0_end; michael@0: int ystride; michael@0: int nhfrags; michael@0: _bv+=127; michael@0: fplane=_state->fplanes+_pli; michael@0: nhfrags=fplane->nhfrags; michael@0: fragi_top=fplane->froffset; michael@0: fragi_bot=fragi_top+fplane->nfrags; michael@0: fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; michael@0: fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: frags=_state->frags; michael@0: frag_buf_offs=_state->frag_buf_offs; michael@0: ref_frame_data=_state->ref_frame_data[_refi]; michael@0: /*The following loops are constructed somewhat non-intuitively on purpose. michael@0: The main idea is: if a block boundary has at least one coded fragment on michael@0: it, the filter is applied to it. michael@0: However, the order that the filters are applied in matters, and VP3 chose michael@0: the somewhat strange ordering used below.*/ michael@0: while(fragi0fragi0)loop_filter_h(ref,ystride,_bv); michael@0: if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); michael@0: if(fragi+1info.frame_width; michael@0: height=_state->info.frame_height; michael@0: iframe=_state->granpos>>_state->info.keyframe_granule_shift; michael@0: pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); michael@0: sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf); michael@0: fp=fopen(fname,"wb"); michael@0: if(fp==NULL)return TH_EFAULT; michael@0: image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); michael@0: if(image==NULL){ michael@0: fclose(fp); michael@0: return TH_EFAULT; michael@0: } michael@0: png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); michael@0: if(png==NULL){ michael@0: oc_free_2d(image); michael@0: fclose(fp); michael@0: return TH_EFAULT; michael@0: } michael@0: info=png_create_info_struct(png); michael@0: if(info==NULL){ michael@0: png_destroy_write_struct(&png,NULL); michael@0: oc_free_2d(image); michael@0: fclose(fp); michael@0: return TH_EFAULT; michael@0: } michael@0: if(setjmp(png_jmpbuf(png))){ michael@0: png_destroy_write_struct(&png,&info); michael@0: oc_free_2d(image); michael@0: fclose(fp); michael@0: return TH_EFAULT; michael@0: } michael@0: framei=_state->ref_frame_idx[_frame]; michael@0: y_row=_state->ref_frame_bufs[framei][0].data; michael@0: u_row=_state->ref_frame_bufs[framei][1].data; michael@0: v_row=_state->ref_frame_bufs[framei][2].data; michael@0: y_stride=_state->ref_frame_bufs[framei][0].stride; michael@0: u_stride=_state->ref_frame_bufs[framei][1].stride; michael@0: v_stride=_state->ref_frame_bufs[framei][2].stride; michael@0: /*Chroma up-sampling is just done with a box filter. michael@0: This is very likely what will actually be used in practice on a real michael@0: display, and also removes one more layer to search in for the source of michael@0: artifacts. michael@0: As an added bonus, it's dead simple.*/ michael@0: for(imgi=height;imgi-->0;){ michael@0: int dc; michael@0: y=y_row; michael@0: u=u_row; michael@0: v=v_row; michael@0: for(imgj=0;imgj<6*width;){ michael@0: float yval; michael@0: float uval; michael@0: float vval; michael@0: unsigned rval; michael@0: unsigned gval; michael@0: unsigned bval; michael@0: /*This is intentionally slow and very accurate.*/ michael@0: yval=(*y-16)*(1.0F/219); michael@0: uval=(*u-128)*(2*(1-0.114F)/224); michael@0: vval=(*v-128)*(2*(1-0.299F)/224); michael@0: rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); michael@0: gval=OC_CLAMPI(0,(int)(65535*( michael@0: yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); michael@0: bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); michael@0: image[imgi][imgj++]=(unsigned char)(rval>>8); michael@0: image[imgi][imgj++]=(unsigned char)(rval&0xFF); michael@0: image[imgi][imgj++]=(unsigned char)(gval>>8); michael@0: image[imgi][imgj++]=(unsigned char)(gval&0xFF); michael@0: image[imgi][imgj++]=(unsigned char)(bval>>8); michael@0: image[imgi][imgj++]=(unsigned char)(bval&0xFF); michael@0: dc=(y-y_row&1)|(_state->info.pixel_fmt&1); michael@0: y++; michael@0: u+=dc; michael@0: v+=dc; michael@0: } michael@0: dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); michael@0: y_row+=y_stride; michael@0: u_row+=dc&u_stride; michael@0: v_row+=dc&v_stride; michael@0: } michael@0: png_init_io(png,fp); michael@0: png_set_compression_level(png,Z_BEST_COMPRESSION); michael@0: png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, michael@0: PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); michael@0: switch(_state->info.colorspace){ michael@0: case TH_CS_ITU_REC_470M:{ michael@0: png_set_gAMA(png,info,2.2); michael@0: png_set_cHRM_fixed(png,info,31006,31616, michael@0: 67000,32000,21000,71000,14000,8000); michael@0: }break; michael@0: case TH_CS_ITU_REC_470BG:{ michael@0: png_set_gAMA(png,info,2.67); michael@0: png_set_cHRM_fixed(png,info,31271,32902, michael@0: 64000,33000,29000,60000,15000,6000); michael@0: }break; michael@0: default:break; michael@0: } michael@0: png_set_pHYs(png,info,_state->info.aspect_numerator, michael@0: _state->info.aspect_denominator,0); michael@0: png_set_rows(png,info,image); michael@0: png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); michael@0: png_write_end(png,info); michael@0: png_destroy_write_struct(&png,&info); michael@0: oc_free_2d(image); michael@0: fclose(fp); michael@0: return 0; michael@0: } michael@0: #endif michael@0: michael@0: michael@0: michael@0: ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ michael@0: oc_theora_state *state; michael@0: state=(oc_theora_state *)_encdec; michael@0: if(_granpos>=0){ michael@0: ogg_int64_t iframe; michael@0: ogg_int64_t pframe; michael@0: iframe=_granpos>>state->info.keyframe_granule_shift; michael@0: pframe=_granpos-(iframe<info.keyframe_granule_shift); michael@0: /*3.2.0 streams store the frame index in the granule position. michael@0: 3.2.1 and later store the frame count. michael@0: We return the index, so adjust the value if we have a 3.2.1 or later michael@0: stream.*/ michael@0: return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); michael@0: } michael@0: return -1; michael@0: } michael@0: michael@0: double th_granule_time(void *_encdec,ogg_int64_t _granpos){ michael@0: oc_theora_state *state; michael@0: state=(oc_theora_state *)_encdec; michael@0: if(_granpos>=0){ michael@0: return (th_granule_frame(_encdec, _granpos)+1)*( michael@0: (double)state->info.fps_denominator/state->info.fps_numerator); michael@0: } michael@0: return -1; michael@0: }