1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtheora/lib/decode.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2963 @@ 1.4 +/******************************************************************** 1.5 + * * 1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.10 + * * 1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * 1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 1.13 + * * 1.14 + ******************************************************************** 1.15 + 1.16 + function: 1.17 + last mod: $Id: decode.c 17576 2010-10-29 01:07:51Z tterribe $ 1.18 + 1.19 + ********************************************************************/ 1.20 + 1.21 +#include <stdlib.h> 1.22 +#include <string.h> 1.23 +#include <ogg/ogg.h> 1.24 +#include "decint.h" 1.25 +#if defined(OC_DUMP_IMAGES) 1.26 +# include <stdio.h> 1.27 +# include "png.h" 1.28 +#endif 1.29 +#if defined(HAVE_CAIRO) 1.30 +# include <cairo.h> 1.31 +#endif 1.32 + 1.33 + 1.34 +/*No post-processing.*/ 1.35 +#define OC_PP_LEVEL_DISABLED (0) 1.36 +/*Keep track of DC qi for each block only.*/ 1.37 +#define OC_PP_LEVEL_TRACKDCQI (1) 1.38 +/*Deblock the luma plane.*/ 1.39 +#define OC_PP_LEVEL_DEBLOCKY (2) 1.40 +/*Dering the luma plane.*/ 1.41 +#define OC_PP_LEVEL_DERINGY (3) 1.42 +/*Stronger luma plane deringing.*/ 1.43 +#define OC_PP_LEVEL_SDERINGY (4) 1.44 +/*Deblock the chroma planes.*/ 1.45 +#define OC_PP_LEVEL_DEBLOCKC (5) 1.46 +/*Dering the chroma planes.*/ 1.47 +#define OC_PP_LEVEL_DERINGC (6) 1.48 +/*Stronger chroma plane deringing.*/ 1.49 +#define OC_PP_LEVEL_SDERINGC (7) 1.50 +/*Maximum valid post-processing level.*/ 1.51 +#define OC_PP_LEVEL_MAX (7) 1.52 + 1.53 + 1.54 + 1.55 +/*The mode alphabets for the various mode coding schemes. 1.56 + Scheme 0 uses a custom alphabet, which is not stored in this table.*/ 1.57 +static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={ 1.58 + /*Last MV dominates */ 1.59 + { 1.60 + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV, 1.61 + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, 1.62 + OC_MODE_INTER_MV_FOUR 1.63 + }, 1.64 + { 1.65 + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV, 1.66 + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, 1.67 + OC_MODE_INTER_MV_FOUR 1.68 + }, 1.69 + { 1.70 + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2, 1.71 + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, 1.72 + OC_MODE_INTER_MV_FOUR 1.73 + }, 1.74 + { 1.75 + OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV, 1.76 + OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV, 1.77 + OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR 1.78 + }, 1.79 + /*No MV dominates.*/ 1.80 + { 1.81 + OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2, 1.82 + OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, 1.83 + OC_MODE_INTER_MV_FOUR 1.84 + }, 1.85 + { 1.86 + OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST, 1.87 + OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV, 1.88 + OC_MODE_INTER_MV_FOUR 1.89 + }, 1.90 + /*Default ordering.*/ 1.91 + { 1.92 + OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST, 1.93 + OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV, 1.94 + OC_MODE_INTER_MV_FOUR 1.95 + } 1.96 +}; 1.97 + 1.98 + 1.99 +/*The original DCT tokens are extended and reordered during the construction of 1.100 + the Huffman tables. 1.101 + The extension means more bits can be read with fewer calls to the bitpacker 1.102 + during the Huffman decoding process (at the cost of larger Huffman tables), 1.103 + and fewer tokens require additional extra bits (reducing the average storage 1.104 + per decoded token). 1.105 + The revised ordering reveals essential information in the token value 1.106 + itself; specifically, whether or not there are additional extra bits to read 1.107 + and the parameter to which those extra bits are applied. 1.108 + The token is used to fetch a code word from the OC_DCT_CODE_WORD table below. 1.109 + The extra bits are added into code word at the bit position inferred from the 1.110 + token value, giving the final code word from which all required parameters 1.111 + are derived. 1.112 + The number of EOBs and the leading zero run length can be extracted directly. 1.113 + The coefficient magnitude is optionally negated before extraction, according 1.114 + to a 'flip' bit.*/ 1.115 + 1.116 +/*The number of additional extra bits that are decoded with each of the 1.117 + internal DCT tokens.*/ 1.118 +static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={ 1.119 + 12,4,3,3,4,4,5,5,8,8,8,8,3,3,6 1.120 +}; 1.121 + 1.122 +/*Whether or not an internal token needs any additional extra bits.*/ 1.123 +#define OC_DCT_TOKEN_NEEDS_MORE(token) \ 1.124 + (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \ 1.125 + sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS))) 1.126 + 1.127 +/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/ 1.128 +#define OC_DCT_TOKEN_FAT_EOB (0) 1.129 + 1.130 +/*The number of EOBs to use for an end-of-frame token. 1.131 + Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which 1.132 + is not yet available everywhere; this should be equivalent.*/ 1.133 +#define OC_DCT_EOB_FINISH (~(size_t)0>>1) 1.134 + 1.135 +/*The location of the (6) run length bits in the code word. 1.136 + These are placed at index 0 and given 8 bits (even though 6 would suffice) 1.137 + because it may be faster to extract the lower byte on some platforms.*/ 1.138 +#define OC_DCT_CW_RLEN_SHIFT (0) 1.139 +/*The location of the (12) EOB bits in the code word.*/ 1.140 +#define OC_DCT_CW_EOB_SHIFT (8) 1.141 +/*The location of the (1) flip bit in the code word. 1.142 + This must be right under the magnitude bits.*/ 1.143 +#define OC_DCT_CW_FLIP_BIT (20) 1.144 +/*The location of the (11) token magnitude bits in the code word. 1.145 + These must be last, and rely on a sign-extending right shift.*/ 1.146 +#define OC_DCT_CW_MAG_SHIFT (21) 1.147 + 1.148 +/*Pack the given fields into a code word.*/ 1.149 +#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \ 1.150 + ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \ 1.151 + (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \ 1.152 + (_flip)<<OC_DCT_CW_FLIP_BIT| \ 1.153 + (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT) 1.154 + 1.155 +/*A special code word value that signals the end of the frame (a long EOB run 1.156 + of zero).*/ 1.157 +#define OC_DCT_CW_FINISH (0) 1.158 + 1.159 +/*The position at which to insert the extra bits in the code word. 1.160 + We use this formulation because Intel has no useful cmov. 1.161 + A real architecture would probably do better with two of those. 1.162 + This translates to 11 instructions(!), and is _still_ faster than either a 1.163 + table lookup (just barely) or the naive double-ternary implementation (which 1.164 + gcc translates to a jump and a cmov). 1.165 + This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if 1.166 + you want to make one of the other shifts zero.*/ 1.167 +#define OC_DCT_TOKEN_EB_POS(_token) \ 1.168 + ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \ 1.169 + +(OC_DCT_CW_MAG_SHIFT&-((_token)<12))) 1.170 + 1.171 +/*The code words for each internal token. 1.172 + See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of 1.173 + order.*/ 1.174 +static const ogg_int32_t OC_DCT_CODE_WORD[92]={ 1.175 + /*These tokens require additional extra bits for the EOB count.*/ 1.176 + /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ 1.177 + OC_DCT_CW_FINISH, 1.178 + /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ 1.179 + OC_DCT_CW_PACK(16, 0, 0,0), 1.180 + /*These tokens require additional extra bits for the magnitude.*/ 1.181 + /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/ 1.182 + OC_DCT_CW_PACK( 0, 0, 13,0), 1.183 + OC_DCT_CW_PACK( 0, 0, 13,1), 1.184 + /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/ 1.185 + OC_DCT_CW_PACK( 0, 0, 21,0), 1.186 + OC_DCT_CW_PACK( 0, 0, 21,1), 1.187 + /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/ 1.188 + OC_DCT_CW_PACK( 0, 0, 37,0), 1.189 + OC_DCT_CW_PACK( 0, 0, 37,1), 1.190 + /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/ 1.191 + OC_DCT_CW_PACK( 0, 0, 69,0), 1.192 + OC_DCT_CW_PACK( 0, 0,325,0), 1.193 + OC_DCT_CW_PACK( 0, 0, 69,1), 1.194 + OC_DCT_CW_PACK( 0, 0,325,1), 1.195 + /*These tokens require additional extra bits for the run length.*/ 1.196 + /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/ 1.197 + OC_DCT_CW_PACK( 0,10, +1,0), 1.198 + OC_DCT_CW_PACK( 0,10, -1,0), 1.199 + /*OC_DCT_ZRL_TOKEN (6 extra bits) 1.200 + Flip is set to distinguish this from OC_DCT_CW_FINISH.*/ 1.201 + OC_DCT_CW_PACK( 0, 0, 0,1), 1.202 + /*The remaining tokens require no additional extra bits.*/ 1.203 + /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ 1.204 + OC_DCT_CW_PACK( 1, 0, 0,0), 1.205 + /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ 1.206 + OC_DCT_CW_PACK( 2, 0, 0,0), 1.207 + /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ 1.208 + OC_DCT_CW_PACK( 3, 0, 0,0), 1.209 + /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/ 1.210 + OC_DCT_CW_PACK( 0, 1, +1,0), 1.211 + OC_DCT_CW_PACK( 0, 1, -1,0), 1.212 + OC_DCT_CW_PACK( 0, 2, +1,0), 1.213 + OC_DCT_CW_PACK( 0, 2, -1,0), 1.214 + OC_DCT_CW_PACK( 0, 3, +1,0), 1.215 + OC_DCT_CW_PACK( 0, 3, -1,0), 1.216 + OC_DCT_CW_PACK( 0, 4, +1,0), 1.217 + OC_DCT_CW_PACK( 0, 4, -1,0), 1.218 + OC_DCT_CW_PACK( 0, 5, +1,0), 1.219 + OC_DCT_CW_PACK( 0, 5, -1,0), 1.220 + /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/ 1.221 + OC_DCT_CW_PACK( 0, 1, +2,0), 1.222 + OC_DCT_CW_PACK( 0, 1, +3,0), 1.223 + OC_DCT_CW_PACK( 0, 1, -2,0), 1.224 + OC_DCT_CW_PACK( 0, 1, -3,0), 1.225 + /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/ 1.226 + OC_DCT_CW_PACK( 0, 6, +1,0), 1.227 + OC_DCT_CW_PACK( 0, 7, +1,0), 1.228 + OC_DCT_CW_PACK( 0, 8, +1,0), 1.229 + OC_DCT_CW_PACK( 0, 9, +1,0), 1.230 + OC_DCT_CW_PACK( 0, 6, -1,0), 1.231 + OC_DCT_CW_PACK( 0, 7, -1,0), 1.232 + OC_DCT_CW_PACK( 0, 8, -1,0), 1.233 + OC_DCT_CW_PACK( 0, 9, -1,0), 1.234 + /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/ 1.235 + OC_DCT_CW_PACK( 0, 2, +2,0), 1.236 + OC_DCT_CW_PACK( 0, 3, +2,0), 1.237 + OC_DCT_CW_PACK( 0, 2, +3,0), 1.238 + OC_DCT_CW_PACK( 0, 3, +3,0), 1.239 + OC_DCT_CW_PACK( 0, 2, -2,0), 1.240 + OC_DCT_CW_PACK( 0, 3, -2,0), 1.241 + OC_DCT_CW_PACK( 0, 2, -3,0), 1.242 + OC_DCT_CW_PACK( 0, 3, -3,0), 1.243 + /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read) 1.244 + Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/ 1.245 + OC_DCT_CW_PACK( 0, 0, 0,1), 1.246 + OC_DCT_CW_PACK( 0, 1, 0,0), 1.247 + OC_DCT_CW_PACK( 0, 2, 0,0), 1.248 + OC_DCT_CW_PACK( 0, 3, 0,0), 1.249 + OC_DCT_CW_PACK( 0, 4, 0,0), 1.250 + OC_DCT_CW_PACK( 0, 5, 0,0), 1.251 + OC_DCT_CW_PACK( 0, 6, 0,0), 1.252 + OC_DCT_CW_PACK( 0, 7, 0,0), 1.253 + /*OC_ONE_TOKEN (0 extra bits)*/ 1.254 + OC_DCT_CW_PACK( 0, 0, +1,0), 1.255 + /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ 1.256 + OC_DCT_CW_PACK( 0, 0, -1,0), 1.257 + /*OC_TWO_TOKEN (0 extra bits)*/ 1.258 + OC_DCT_CW_PACK( 0, 0, +2,0), 1.259 + /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ 1.260 + OC_DCT_CW_PACK( 0, 0, -2,0), 1.261 + /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/ 1.262 + OC_DCT_CW_PACK( 0, 0, +3,0), 1.263 + OC_DCT_CW_PACK( 0, 0, -3,0), 1.264 + OC_DCT_CW_PACK( 0, 0, +4,0), 1.265 + OC_DCT_CW_PACK( 0, 0, -4,0), 1.266 + OC_DCT_CW_PACK( 0, 0, +5,0), 1.267 + OC_DCT_CW_PACK( 0, 0, -5,0), 1.268 + OC_DCT_CW_PACK( 0, 0, +6,0), 1.269 + OC_DCT_CW_PACK( 0, 0, -6,0), 1.270 + /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/ 1.271 + OC_DCT_CW_PACK( 0, 0, +7,0), 1.272 + OC_DCT_CW_PACK( 0, 0, +8,0), 1.273 + OC_DCT_CW_PACK( 0, 0, -7,0), 1.274 + OC_DCT_CW_PACK( 0, 0, -8,0), 1.275 + /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/ 1.276 + OC_DCT_CW_PACK( 0, 0, +9,0), 1.277 + OC_DCT_CW_PACK( 0, 0,+10,0), 1.278 + OC_DCT_CW_PACK( 0, 0,+11,0), 1.279 + OC_DCT_CW_PACK( 0, 0,+12,0), 1.280 + OC_DCT_CW_PACK( 0, 0, -9,0), 1.281 + OC_DCT_CW_PACK( 0, 0,-10,0), 1.282 + OC_DCT_CW_PACK( 0, 0,-11,0), 1.283 + OC_DCT_CW_PACK( 0, 0,-12,0), 1.284 + /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/ 1.285 + OC_DCT_CW_PACK( 8, 0, 0,0), 1.286 + OC_DCT_CW_PACK( 9, 0, 0,0), 1.287 + OC_DCT_CW_PACK(10, 0, 0,0), 1.288 + OC_DCT_CW_PACK(11, 0, 0,0), 1.289 + OC_DCT_CW_PACK(12, 0, 0,0), 1.290 + OC_DCT_CW_PACK(13, 0, 0,0), 1.291 + OC_DCT_CW_PACK(14, 0, 0,0), 1.292 + OC_DCT_CW_PACK(15, 0, 0,0), 1.293 + /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/ 1.294 + OC_DCT_CW_PACK( 4, 0, 0,0), 1.295 + OC_DCT_CW_PACK( 5, 0, 0,0), 1.296 + OC_DCT_CW_PACK( 6, 0, 0,0), 1.297 + OC_DCT_CW_PACK( 7, 0, 0,0), 1.298 +}; 1.299 + 1.300 + 1.301 + 1.302 +static int oc_sb_run_unpack(oc_pack_buf *_opb){ 1.303 + /*Coding scheme: 1.304 + Codeword Run Length 1.305 + 0 1 1.306 + 10x 2-3 1.307 + 110x 4-5 1.308 + 1110xx 6-9 1.309 + 11110xxx 10-17 1.310 + 111110xxxx 18-33 1.311 + 111111xxxxxxxxxxxx 34-4129*/ 1.312 + static const ogg_int16_t OC_SB_RUN_TREE[22]={ 1.313 + 4, 1.314 + -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1), 1.315 + -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1), 1.316 + -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3), 1.317 + -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17, 1.318 + 2, 1.319 + -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6) 1.320 + }; 1.321 + int ret; 1.322 + ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE); 1.323 + if(ret>=0x10){ 1.324 + int offs; 1.325 + offs=ret&0x1F; 1.326 + ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4); 1.327 + } 1.328 + return ret; 1.329 +} 1.330 + 1.331 +static int oc_block_run_unpack(oc_pack_buf *_opb){ 1.332 + /*Coding scheme: 1.333 + Codeword Run Length 1.334 + 0x 1-2 1.335 + 10x 3-4 1.336 + 110x 5-6 1.337 + 1110xx 7-10 1.338 + 11110xx 11-14 1.339 + 11111xxxx 15-30*/ 1.340 + static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={ 1.341 + 5, 1.342 + -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1), 1.343 + -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1), 1.344 + -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2), 1.345 + -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2), 1.346 + -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3), 1.347 + -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4), 1.348 + -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6), 1.349 + 33, 36, 39, 44, 1.350 + 1,-(1<<8|7),-(1<<8|8), 1.351 + 1,-(1<<8|9),-(1<<8|10), 1.352 + 2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14), 1.353 + 4, 1.354 + -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18), 1.355 + -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22), 1.356 + -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26), 1.357 + -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30) 1.358 + }; 1.359 + return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE); 1.360 +} 1.361 + 1.362 + 1.363 + 1.364 +void oc_dec_accel_init_c(oc_dec_ctx *_dec){ 1.365 +# if defined(OC_DEC_USE_VTABLE) 1.366 + _dec->opt_vtable.dc_unpredict_mcu_plane= 1.367 + oc_dec_dc_unpredict_mcu_plane_c; 1.368 +# endif 1.369 +} 1.370 + 1.371 +static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info, 1.372 + const th_setup_info *_setup){ 1.373 + int qti; 1.374 + int pli; 1.375 + int qi; 1.376 + int ret; 1.377 + ret=oc_state_init(&_dec->state,_info,3); 1.378 + if(ret<0)return ret; 1.379 + ret=oc_huff_trees_copy(_dec->huff_tables, 1.380 + (const ogg_int16_t *const *)_setup->huff_tables); 1.381 + if(ret<0){ 1.382 + oc_state_clear(&_dec->state); 1.383 + return ret; 1.384 + } 1.385 + /*For each fragment, allocate one byte for every DCT coefficient token, plus 1.386 + one byte for extra-bits for each token, plus one more byte for the long 1.387 + EOB run, just in case it's the very last token and has a run length of 1.388 + one.*/ 1.389 + _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)* 1.390 + _dec->state.nfrags*sizeof(_dec->dct_tokens[0])); 1.391 + if(_dec->dct_tokens==NULL){ 1.392 + oc_huff_trees_clear(_dec->huff_tables); 1.393 + oc_state_clear(&_dec->state); 1.394 + return TH_EFAULT; 1.395 + } 1.396 + for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){ 1.397 + _dec->state.dequant_tables[qi][pli][qti]= 1.398 + _dec->state.dequant_table_data[qi][pli][qti]; 1.399 + } 1.400 + oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, 1.401 + &_setup->qinfo); 1.402 + for(qi=0;qi<64;qi++){ 1.403 + int qsum; 1.404 + qsum=0; 1.405 + for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ 1.406 + qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+ 1.407 + _dec->state.dequant_tables[qi][pli][qti][17]+ 1.408 + _dec->state.dequant_tables[qi][pli][qti][18]+ 1.409 + _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0); 1.410 + } 1.411 + _dec->pp_sharp_mod[qi]=-(qsum>>11); 1.412 + } 1.413 + memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits, 1.414 + sizeof(_dec->state.loop_filter_limits)); 1.415 + oc_dec_accel_init(_dec); 1.416 + _dec->pp_level=OC_PP_LEVEL_DISABLED; 1.417 + _dec->dc_qis=NULL; 1.418 + _dec->variances=NULL; 1.419 + _dec->pp_frame_data=NULL; 1.420 + _dec->stripe_cb.ctx=NULL; 1.421 + _dec->stripe_cb.stripe_decoded=NULL; 1.422 +#if defined(HAVE_CAIRO) 1.423 + _dec->telemetry=0; 1.424 + _dec->telemetry_bits=0; 1.425 + _dec->telemetry_qi=0; 1.426 + _dec->telemetry_mbmode=0; 1.427 + _dec->telemetry_mv=0; 1.428 + _dec->telemetry_frame_data=NULL; 1.429 +#endif 1.430 + return 0; 1.431 +} 1.432 + 1.433 +static void oc_dec_clear(oc_dec_ctx *_dec){ 1.434 +#if defined(HAVE_CAIRO) 1.435 + _ogg_free(_dec->telemetry_frame_data); 1.436 +#endif 1.437 + _ogg_free(_dec->pp_frame_data); 1.438 + _ogg_free(_dec->variances); 1.439 + _ogg_free(_dec->dc_qis); 1.440 + _ogg_free(_dec->dct_tokens); 1.441 + oc_huff_trees_clear(_dec->huff_tables); 1.442 + oc_state_clear(&_dec->state); 1.443 +} 1.444 + 1.445 + 1.446 +static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ 1.447 + long val; 1.448 + /*Check to make sure this is a data packet.*/ 1.449 + val=oc_pack_read1(&_dec->opb); 1.450 + if(val!=0)return TH_EBADPACKET; 1.451 + /*Read in the frame type (I or P).*/ 1.452 + val=oc_pack_read1(&_dec->opb); 1.453 + _dec->state.frame_type=(int)val; 1.454 + /*Read in the qi list.*/ 1.455 + val=oc_pack_read(&_dec->opb,6); 1.456 + _dec->state.qis[0]=(unsigned char)val; 1.457 + val=oc_pack_read1(&_dec->opb); 1.458 + if(!val)_dec->state.nqis=1; 1.459 + else{ 1.460 + val=oc_pack_read(&_dec->opb,6); 1.461 + _dec->state.qis[1]=(unsigned char)val; 1.462 + val=oc_pack_read1(&_dec->opb); 1.463 + if(!val)_dec->state.nqis=2; 1.464 + else{ 1.465 + val=oc_pack_read(&_dec->opb,6); 1.466 + _dec->state.qis[2]=(unsigned char)val; 1.467 + _dec->state.nqis=3; 1.468 + } 1.469 + } 1.470 + if(_dec->state.frame_type==OC_INTRA_FRAME){ 1.471 + /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. 1.472 + Most of the other unused bits in the VP3 headers were eliminated. 1.473 + I don't know why these remain.*/ 1.474 + /*I wanted to eliminate wasted bits, but not all config wiggle room 1.475 + --Monty.*/ 1.476 + val=oc_pack_read(&_dec->opb,3); 1.477 + if(val!=0)return TH_EIMPL; 1.478 + } 1.479 + return 0; 1.480 +} 1.481 + 1.482 +/*Mark all fragments as coded and in OC_MODE_INTRA. 1.483 + This also builds up the coded fragment list (in coded order), and clears the 1.484 + uncoded fragment list. 1.485 + It does not update the coded macro block list nor the super block flags, as 1.486 + those are not used when decoding INTRA frames.*/ 1.487 +static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){ 1.488 + const oc_sb_map *sb_maps; 1.489 + const oc_sb_flags *sb_flags; 1.490 + oc_fragment *frags; 1.491 + ptrdiff_t *coded_fragis; 1.492 + ptrdiff_t ncoded_fragis; 1.493 + ptrdiff_t prev_ncoded_fragis; 1.494 + unsigned nsbs; 1.495 + unsigned sbi; 1.496 + int pli; 1.497 + coded_fragis=_dec->state.coded_fragis; 1.498 + prev_ncoded_fragis=ncoded_fragis=0; 1.499 + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; 1.500 + sb_flags=_dec->state.sb_flags; 1.501 + frags=_dec->state.frags; 1.502 + sbi=nsbs=0; 1.503 + for(pli=0;pli<3;pli++){ 1.504 + nsbs+=_dec->state.fplanes[pli].nsbs; 1.505 + for(;sbi<nsbs;sbi++){ 1.506 + int quadi; 1.507 + for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){ 1.508 + int bi; 1.509 + for(bi=0;bi<4;bi++){ 1.510 + ptrdiff_t fragi; 1.511 + fragi=sb_maps[sbi][quadi][bi]; 1.512 + if(fragi>=0){ 1.513 + frags[fragi].coded=1; 1.514 + frags[fragi].refi=OC_FRAME_SELF; 1.515 + frags[fragi].mb_mode=OC_MODE_INTRA; 1.516 + coded_fragis[ncoded_fragis++]=fragi; 1.517 + } 1.518 + } 1.519 + } 1.520 + } 1.521 + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; 1.522 + prev_ncoded_fragis=ncoded_fragis; 1.523 + } 1.524 + _dec->state.ntotal_coded_fragis=ncoded_fragis; 1.525 +} 1.526 + 1.527 +/*Decodes the bit flags indicating whether each super block is partially coded 1.528 + or not. 1.529 + Return: The number of partially coded super blocks.*/ 1.530 +static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ 1.531 + oc_sb_flags *sb_flags; 1.532 + unsigned nsbs; 1.533 + unsigned sbi; 1.534 + unsigned npartial; 1.535 + unsigned run_count; 1.536 + long val; 1.537 + int flag; 1.538 + val=oc_pack_read1(&_dec->opb); 1.539 + flag=(int)val; 1.540 + sb_flags=_dec->state.sb_flags; 1.541 + nsbs=_dec->state.nsbs; 1.542 + sbi=npartial=0; 1.543 + while(sbi<nsbs){ 1.544 + int full_run; 1.545 + run_count=oc_sb_run_unpack(&_dec->opb); 1.546 + full_run=run_count>=4129; 1.547 + do{ 1.548 + sb_flags[sbi].coded_partially=flag; 1.549 + sb_flags[sbi].coded_fully=0; 1.550 + npartial+=flag; 1.551 + sbi++; 1.552 + } 1.553 + while(--run_count>0&&sbi<nsbs); 1.554 + if(full_run&&sbi<nsbs){ 1.555 + val=oc_pack_read1(&_dec->opb); 1.556 + flag=(int)val; 1.557 + } 1.558 + else flag=!flag; 1.559 + } 1.560 + /*TODO: run_count should be 0 here. 1.561 + If it's not, we should issue a warning of some kind.*/ 1.562 + return npartial; 1.563 +} 1.564 + 1.565 +/*Decodes the bit flags for whether or not each non-partially-coded super 1.566 + block is fully coded or not. 1.567 + This function should only be called if there is at least one 1.568 + non-partially-coded super block. 1.569 + Return: The number of partially coded super blocks.*/ 1.570 +static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ 1.571 + oc_sb_flags *sb_flags; 1.572 + unsigned nsbs; 1.573 + unsigned sbi; 1.574 + unsigned run_count; 1.575 + long val; 1.576 + int flag; 1.577 + sb_flags=_dec->state.sb_flags; 1.578 + nsbs=_dec->state.nsbs; 1.579 + /*Skip partially coded super blocks.*/ 1.580 + for(sbi=0;sb_flags[sbi].coded_partially;sbi++); 1.581 + val=oc_pack_read1(&_dec->opb); 1.582 + flag=(int)val; 1.583 + do{ 1.584 + int full_run; 1.585 + run_count=oc_sb_run_unpack(&_dec->opb); 1.586 + full_run=run_count>=4129; 1.587 + for(;sbi<nsbs;sbi++){ 1.588 + if(sb_flags[sbi].coded_partially)continue; 1.589 + if(run_count--<=0)break; 1.590 + sb_flags[sbi].coded_fully=flag; 1.591 + } 1.592 + if(full_run&&sbi<nsbs){ 1.593 + val=oc_pack_read1(&_dec->opb); 1.594 + flag=(int)val; 1.595 + } 1.596 + else flag=!flag; 1.597 + } 1.598 + while(sbi<nsbs); 1.599 + /*TODO: run_count should be 0 here. 1.600 + If it's not, we should issue a warning of some kind.*/ 1.601 +} 1.602 + 1.603 +static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){ 1.604 + const oc_sb_map *sb_maps; 1.605 + const oc_sb_flags *sb_flags; 1.606 + signed char *mb_modes; 1.607 + oc_fragment *frags; 1.608 + unsigned nsbs; 1.609 + unsigned sbi; 1.610 + unsigned npartial; 1.611 + long val; 1.612 + int pli; 1.613 + int flag; 1.614 + int run_count; 1.615 + ptrdiff_t *coded_fragis; 1.616 + ptrdiff_t *uncoded_fragis; 1.617 + ptrdiff_t ncoded_fragis; 1.618 + ptrdiff_t nuncoded_fragis; 1.619 + ptrdiff_t prev_ncoded_fragis; 1.620 + npartial=oc_dec_partial_sb_flags_unpack(_dec); 1.621 + if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec); 1.622 + if(npartial>0){ 1.623 + val=oc_pack_read1(&_dec->opb); 1.624 + flag=!(int)val; 1.625 + } 1.626 + else flag=0; 1.627 + sb_maps=(const oc_sb_map *)_dec->state.sb_maps; 1.628 + sb_flags=_dec->state.sb_flags; 1.629 + mb_modes=_dec->state.mb_modes; 1.630 + frags=_dec->state.frags; 1.631 + sbi=nsbs=run_count=0; 1.632 + coded_fragis=_dec->state.coded_fragis; 1.633 + uncoded_fragis=coded_fragis+_dec->state.nfrags; 1.634 + prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0; 1.635 + for(pli=0;pli<3;pli++){ 1.636 + nsbs+=_dec->state.fplanes[pli].nsbs; 1.637 + for(;sbi<nsbs;sbi++){ 1.638 + int quadi; 1.639 + for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){ 1.640 + int quad_coded; 1.641 + int bi; 1.642 + quad_coded=0; 1.643 + for(bi=0;bi<4;bi++){ 1.644 + ptrdiff_t fragi; 1.645 + fragi=sb_maps[sbi][quadi][bi]; 1.646 + if(fragi>=0){ 1.647 + int coded; 1.648 + if(sb_flags[sbi].coded_fully)coded=1; 1.649 + else if(!sb_flags[sbi].coded_partially)coded=0; 1.650 + else{ 1.651 + if(run_count<=0){ 1.652 + run_count=oc_block_run_unpack(&_dec->opb); 1.653 + flag=!flag; 1.654 + } 1.655 + run_count--; 1.656 + coded=flag; 1.657 + } 1.658 + if(coded)coded_fragis[ncoded_fragis++]=fragi; 1.659 + else *(uncoded_fragis-++nuncoded_fragis)=fragi; 1.660 + quad_coded|=coded; 1.661 + frags[fragi].coded=coded; 1.662 + frags[fragi].refi=OC_FRAME_NONE; 1.663 + } 1.664 + } 1.665 + /*Remember if there's a coded luma block in this macro block.*/ 1.666 + if(!pli)mb_modes[sbi<<2|quadi]=quad_coded; 1.667 + } 1.668 + } 1.669 + _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis; 1.670 + prev_ncoded_fragis=ncoded_fragis; 1.671 + } 1.672 + _dec->state.ntotal_coded_fragis=ncoded_fragis; 1.673 + /*TODO: run_count should be 0 here. 1.674 + If it's not, we should issue a warning of some kind.*/ 1.675 +} 1.676 + 1.677 + 1.678 +/*Coding scheme: 1.679 + Codeword Mode Index 1.680 + 0 0 1.681 + 10 1 1.682 + 110 2 1.683 + 1110 3 1.684 + 11110 4 1.685 + 111110 5 1.686 + 1111110 6 1.687 + 1111111 7*/ 1.688 +static const ogg_int16_t OC_VLC_MODE_TREE[26]={ 1.689 + 4, 1.690 + -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0), 1.691 + -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0), 1.692 + -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1), 1.693 + -(3<<8|2),-(3<<8|2),-(4<<8|3),17, 1.694 + 3, 1.695 + -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4), 1.696 + -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7) 1.697 +}; 1.698 + 1.699 +static const ogg_int16_t OC_CLC_MODE_TREE[9]={ 1.700 + 3, 1.701 + -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3), 1.702 + -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7) 1.703 +}; 1.704 + 1.705 +/*Unpacks the list of macro block modes for INTER frames.*/ 1.706 +static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ 1.707 + signed char *mb_modes; 1.708 + const unsigned char *alphabet; 1.709 + unsigned char scheme0_alphabet[8]; 1.710 + const ogg_int16_t *mode_tree; 1.711 + size_t nmbs; 1.712 + size_t mbi; 1.713 + long val; 1.714 + int mode_scheme; 1.715 + val=oc_pack_read(&_dec->opb,3); 1.716 + mode_scheme=(int)val; 1.717 + if(mode_scheme==0){ 1.718 + int mi; 1.719 + /*Just in case, initialize the modes to something. 1.720 + If the bitstream doesn't contain each index exactly once, it's likely 1.721 + corrupt and the rest of the packet is garbage anyway, but this way we 1.722 + won't crash, and we'll decode SOMETHING.*/ 1.723 + /*LOOP VECTORIZES*/ 1.724 + for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV; 1.725 + for(mi=0;mi<OC_NMODES;mi++){ 1.726 + val=oc_pack_read(&_dec->opb,3); 1.727 + scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; 1.728 + } 1.729 + alphabet=scheme0_alphabet; 1.730 + } 1.731 + else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; 1.732 + mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE; 1.733 + mb_modes=_dec->state.mb_modes; 1.734 + nmbs=_dec->state.nmbs; 1.735 + for(mbi=0;mbi<nmbs;mbi++){ 1.736 + if(mb_modes[mbi]>0){ 1.737 + /*We have a coded luma block; decode a mode.*/ 1.738 + mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)]; 1.739 + } 1.740 + /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the 1.741 + fact that OC_MODE_INTER_NOMV is already 0.*/ 1.742 + } 1.743 +} 1.744 + 1.745 + 1.746 + 1.747 +static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={ 1.748 + 5, 1.749 + -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0), 1.750 + -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1), 1.751 + -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1), 1.752 + -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2), 1.753 + -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3), 1.754 + 33, 36, 39, 42, 1.755 + 45, 50, 55, 60, 1.756 + 65, 74, 83, 92, 1.757 + 1,-(1<<8|32+4),-(1<<8|32-4), 1.758 + 1,-(1<<8|32+5),-(1<<8|32-5), 1.759 + 1,-(1<<8|32+6),-(1<<8|32-6), 1.760 + 1,-(1<<8|32+7),-(1<<8|32-7), 1.761 + 2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9), 1.762 + 2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11), 1.763 + 2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13), 1.764 + 2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15), 1.765 + 3, 1.766 + -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17), 1.767 + -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19), 1.768 + 3, 1.769 + -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21), 1.770 + -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23), 1.771 + 3, 1.772 + -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25), 1.773 + -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27), 1.774 + 3, 1.775 + -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29), 1.776 + -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31) 1.777 +}; 1.778 + 1.779 +static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={ 1.780 + 6, 1.781 + -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1), 1.782 + -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3), 1.783 + -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5), 1.784 + -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7), 1.785 + -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9), 1.786 + -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11), 1.787 + -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13), 1.788 + -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15), 1.789 + -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17), 1.790 + -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19), 1.791 + -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21), 1.792 + -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23), 1.793 + -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25), 1.794 + -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27), 1.795 + -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29), 1.796 + -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31) 1.797 +}; 1.798 + 1.799 + 1.800 +static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){ 1.801 + int dx; 1.802 + int dy; 1.803 + dx=oc_huff_token_decode(_opb,_tree)-32; 1.804 + dy=oc_huff_token_decode(_opb,_tree)-32; 1.805 + return OC_MV(dx,dy); 1.806 +} 1.807 + 1.808 +/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro 1.809 + block modes and motion vectors to the individual fragments.*/ 1.810 +static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ 1.811 + const oc_mb_map *mb_maps; 1.812 + const signed char *mb_modes; 1.813 + oc_set_chroma_mvs_func set_chroma_mvs; 1.814 + const ogg_int16_t *mv_comp_tree; 1.815 + oc_fragment *frags; 1.816 + oc_mv *frag_mvs; 1.817 + const unsigned char *map_idxs; 1.818 + int map_nidxs; 1.819 + oc_mv last_mv; 1.820 + oc_mv prior_mv; 1.821 + oc_mv cbmvs[4]; 1.822 + size_t nmbs; 1.823 + size_t mbi; 1.824 + long val; 1.825 + set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; 1.826 + val=oc_pack_read1(&_dec->opb); 1.827 + mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE; 1.828 + map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; 1.829 + map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; 1.830 + prior_mv=last_mv=0; 1.831 + frags=_dec->state.frags; 1.832 + frag_mvs=_dec->state.frag_mvs; 1.833 + mb_maps=(const oc_mb_map *)_dec->state.mb_maps; 1.834 + mb_modes=_dec->state.mb_modes; 1.835 + nmbs=_dec->state.nmbs; 1.836 + for(mbi=0;mbi<nmbs;mbi++){ 1.837 + int mb_mode; 1.838 + mb_mode=mb_modes[mbi]; 1.839 + if(mb_mode!=OC_MODE_INVALID){ 1.840 + oc_mv mbmv; 1.841 + ptrdiff_t fragi; 1.842 + int mapi; 1.843 + int mapii; 1.844 + int refi; 1.845 + if(mb_mode==OC_MODE_INTER_MV_FOUR){ 1.846 + oc_mv lbmvs[4]; 1.847 + int bi; 1.848 + prior_mv=last_mv; 1.849 + for(bi=0;bi<4;bi++){ 1.850 + fragi=mb_maps[mbi][0][bi]; 1.851 + if(frags[fragi].coded){ 1.852 + frags[fragi].refi=OC_FRAME_PREV; 1.853 + frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR; 1.854 + lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree); 1.855 + frag_mvs[fragi]=lbmvs[bi]; 1.856 + } 1.857 + else lbmvs[bi]=0; 1.858 + } 1.859 + (*set_chroma_mvs)(cbmvs,lbmvs); 1.860 + for(mapii=4;mapii<map_nidxs;mapii++){ 1.861 + mapi=map_idxs[mapii]; 1.862 + bi=mapi&3; 1.863 + fragi=mb_maps[mbi][mapi>>2][bi]; 1.864 + if(frags[fragi].coded){ 1.865 + frags[fragi].refi=OC_FRAME_PREV; 1.866 + frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR; 1.867 + frag_mvs[fragi]=cbmvs[bi]; 1.868 + } 1.869 + } 1.870 + } 1.871 + else{ 1.872 + switch(mb_mode){ 1.873 + case OC_MODE_INTER_MV:{ 1.874 + prior_mv=last_mv; 1.875 + last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree); 1.876 + }break; 1.877 + case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break; 1.878 + case OC_MODE_INTER_MV_LAST2:{ 1.879 + mbmv=prior_mv; 1.880 + prior_mv=last_mv; 1.881 + last_mv=mbmv; 1.882 + }break; 1.883 + case OC_MODE_GOLDEN_MV:{ 1.884 + mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree); 1.885 + }break; 1.886 + default:mbmv=0;break; 1.887 + } 1.888 + /*Fill in the MVs for the fragments.*/ 1.889 + refi=OC_FRAME_FOR_MODE(mb_mode); 1.890 + mapii=0; 1.891 + do{ 1.892 + mapi=map_idxs[mapii]; 1.893 + fragi=mb_maps[mbi][mapi>>2][mapi&3]; 1.894 + if(frags[fragi].coded){ 1.895 + frags[fragi].refi=refi; 1.896 + frags[fragi].mb_mode=mb_mode; 1.897 + frag_mvs[fragi]=mbmv; 1.898 + } 1.899 + } 1.900 + while(++mapii<map_nidxs); 1.901 + } 1.902 + } 1.903 + } 1.904 +} 1.905 + 1.906 +static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ 1.907 + oc_fragment *frags; 1.908 + const ptrdiff_t *coded_fragis; 1.909 + ptrdiff_t ncoded_fragis; 1.910 + ptrdiff_t fragii; 1.911 + ptrdiff_t fragi; 1.912 + ncoded_fragis=_dec->state.ntotal_coded_fragis; 1.913 + if(ncoded_fragis<=0)return; 1.914 + frags=_dec->state.frags; 1.915 + coded_fragis=_dec->state.coded_fragis; 1.916 + if(_dec->state.nqis==1){ 1.917 + /*If this frame has only a single qi value, then just use it for all coded 1.918 + fragments.*/ 1.919 + for(fragii=0;fragii<ncoded_fragis;fragii++){ 1.920 + frags[coded_fragis[fragii]].qii=0; 1.921 + } 1.922 + } 1.923 + else{ 1.924 + long val; 1.925 + int flag; 1.926 + int nqi1; 1.927 + int run_count; 1.928 + /*Otherwise, we decode a qi index for each fragment, using two passes of 1.929 + the same binary RLE scheme used for super-block coded bits. 1.930 + The first pass marks each fragment as having a qii of 0 or greater than 1.931 + 0, and the second pass (if necessary), distinguishes between a qii of 1.932 + 1 and 2. 1.933 + At first we just store the qii in the fragment. 1.934 + After all the qii's are decoded, we make a final pass to replace them 1.935 + with the corresponding qi's for this frame.*/ 1.936 + val=oc_pack_read1(&_dec->opb); 1.937 + flag=(int)val; 1.938 + nqi1=0; 1.939 + fragii=0; 1.940 + while(fragii<ncoded_fragis){ 1.941 + int full_run; 1.942 + run_count=oc_sb_run_unpack(&_dec->opb); 1.943 + full_run=run_count>=4129; 1.944 + do{ 1.945 + frags[coded_fragis[fragii++]].qii=flag; 1.946 + nqi1+=flag; 1.947 + } 1.948 + while(--run_count>0&&fragii<ncoded_fragis); 1.949 + if(full_run&&fragii<ncoded_fragis){ 1.950 + val=oc_pack_read1(&_dec->opb); 1.951 + flag=(int)val; 1.952 + } 1.953 + else flag=!flag; 1.954 + } 1.955 + /*TODO: run_count should be 0 here. 1.956 + If it's not, we should issue a warning of some kind.*/ 1.957 + /*If we have 3 different qi's for this frame, and there was at least one 1.958 + fragment with a non-zero qi, make the second pass.*/ 1.959 + if(_dec->state.nqis==3&&nqi1>0){ 1.960 + /*Skip qii==0 fragments.*/ 1.961 + for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++); 1.962 + val=oc_pack_read1(&_dec->opb); 1.963 + flag=(int)val; 1.964 + do{ 1.965 + int full_run; 1.966 + run_count=oc_sb_run_unpack(&_dec->opb); 1.967 + full_run=run_count>=4129; 1.968 + for(;fragii<ncoded_fragis;fragii++){ 1.969 + fragi=coded_fragis[fragii]; 1.970 + if(frags[fragi].qii==0)continue; 1.971 + if(run_count--<=0)break; 1.972 + frags[fragi].qii+=flag; 1.973 + } 1.974 + if(full_run&&fragii<ncoded_fragis){ 1.975 + val=oc_pack_read1(&_dec->opb); 1.976 + flag=(int)val; 1.977 + } 1.978 + else flag=!flag; 1.979 + } 1.980 + while(fragii<ncoded_fragis); 1.981 + /*TODO: run_count should be 0 here. 1.982 + If it's not, we should issue a warning of some kind.*/ 1.983 + } 1.984 + } 1.985 +} 1.986 + 1.987 + 1.988 + 1.989 +/*Unpacks the DC coefficient tokens. 1.990 + Unlike when unpacking the AC coefficient tokens, we actually need to decode 1.991 + the DC coefficient values now so that we can do DC prediction. 1.992 + _huff_idx: The index of the Huffman table to use for each color plane. 1.993 + _ntoks_left: The number of tokens left to be decoded in each color plane for 1.994 + each coefficient. 1.995 + This is updated as EOB tokens and zero run tokens are decoded. 1.996 + Return: The length of any outstanding EOB run.*/ 1.997 +static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2], 1.998 + ptrdiff_t _ntoks_left[3][64]){ 1.999 + unsigned char *dct_tokens; 1.1000 + oc_fragment *frags; 1.1001 + const ptrdiff_t *coded_fragis; 1.1002 + ptrdiff_t ncoded_fragis; 1.1003 + ptrdiff_t fragii; 1.1004 + ptrdiff_t eobs; 1.1005 + ptrdiff_t ti; 1.1006 + int pli; 1.1007 + dct_tokens=_dec->dct_tokens; 1.1008 + frags=_dec->state.frags; 1.1009 + coded_fragis=_dec->state.coded_fragis; 1.1010 + ncoded_fragis=fragii=eobs=ti=0; 1.1011 + for(pli=0;pli<3;pli++){ 1.1012 + ptrdiff_t run_counts[64]; 1.1013 + ptrdiff_t eob_count; 1.1014 + ptrdiff_t eobi; 1.1015 + int rli; 1.1016 + ncoded_fragis+=_dec->state.ncoded_fragis[pli]; 1.1017 + memset(run_counts,0,sizeof(run_counts)); 1.1018 + _dec->eob_runs[pli][0]=eobs; 1.1019 + _dec->ti0[pli][0]=ti; 1.1020 + /*Continue any previous EOB run, if there was one.*/ 1.1021 + eobi=eobs; 1.1022 + if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii; 1.1023 + eob_count=eobi; 1.1024 + eobs-=eobi; 1.1025 + while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; 1.1026 + while(fragii<ncoded_fragis){ 1.1027 + int token; 1.1028 + int cw; 1.1029 + int eb; 1.1030 + int skip; 1.1031 + token=oc_huff_token_decode(&_dec->opb, 1.1032 + _dec->huff_tables[_huff_idxs[pli+1>>1]]); 1.1033 + dct_tokens[ti++]=(unsigned char)token; 1.1034 + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ 1.1035 + eb=(int)oc_pack_read(&_dec->opb, 1.1036 + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); 1.1037 + dct_tokens[ti++]=(unsigned char)eb; 1.1038 + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); 1.1039 + eb<<=OC_DCT_TOKEN_EB_POS(token); 1.1040 + } 1.1041 + else eb=0; 1.1042 + cw=OC_DCT_CODE_WORD[token]+eb; 1.1043 + eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; 1.1044 + if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH; 1.1045 + if(eobs){ 1.1046 + eobi=OC_MINI(eobs,ncoded_fragis-fragii); 1.1047 + eob_count+=eobi; 1.1048 + eobs-=eobi; 1.1049 + while(eobi-->0)frags[coded_fragis[fragii++]].dc=0; 1.1050 + } 1.1051 + else{ 1.1052 + int coeff; 1.1053 + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); 1.1054 + cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT); 1.1055 + coeff=cw>>OC_DCT_CW_MAG_SHIFT; 1.1056 + if(skip)coeff=0; 1.1057 + run_counts[skip]++; 1.1058 + frags[coded_fragis[fragii++]].dc=coeff; 1.1059 + } 1.1060 + } 1.1061 + /*Add the total EOB count to the longest run length.*/ 1.1062 + run_counts[63]+=eob_count; 1.1063 + /*And convert the run_counts array to a moment table.*/ 1.1064 + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; 1.1065 + /*Finally, subtract off the number of coefficients that have been 1.1066 + accounted for by runs started in this coefficient.*/ 1.1067 + for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli]; 1.1068 + } 1.1069 + _dec->dct_tokens_count=ti; 1.1070 + return eobs; 1.1071 +} 1.1072 + 1.1073 +/*Unpacks the AC coefficient tokens. 1.1074 + This can completely discard coefficient values while unpacking, and so is 1.1075 + somewhat simpler than unpacking the DC coefficient tokens. 1.1076 + _huff_idx: The index of the Huffman table to use for each color plane. 1.1077 + _ntoks_left: The number of tokens left to be decoded in each color plane for 1.1078 + each coefficient. 1.1079 + This is updated as EOB tokens and zero run tokens are decoded. 1.1080 + _eobs: The length of any outstanding EOB run from previous 1.1081 + coefficients. 1.1082 + Return: The length of any outstanding EOB run.*/ 1.1083 +static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2], 1.1084 + ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){ 1.1085 + unsigned char *dct_tokens; 1.1086 + ptrdiff_t ti; 1.1087 + int pli; 1.1088 + dct_tokens=_dec->dct_tokens; 1.1089 + ti=_dec->dct_tokens_count; 1.1090 + for(pli=0;pli<3;pli++){ 1.1091 + ptrdiff_t run_counts[64]; 1.1092 + ptrdiff_t eob_count; 1.1093 + size_t ntoks_left; 1.1094 + size_t ntoks; 1.1095 + int rli; 1.1096 + _dec->eob_runs[pli][_zzi]=_eobs; 1.1097 + _dec->ti0[pli][_zzi]=ti; 1.1098 + ntoks_left=_ntoks_left[pli][_zzi]; 1.1099 + memset(run_counts,0,sizeof(run_counts)); 1.1100 + eob_count=0; 1.1101 + ntoks=0; 1.1102 + while(ntoks+_eobs<ntoks_left){ 1.1103 + int token; 1.1104 + int cw; 1.1105 + int eb; 1.1106 + int skip; 1.1107 + ntoks+=_eobs; 1.1108 + eob_count+=_eobs; 1.1109 + token=oc_huff_token_decode(&_dec->opb, 1.1110 + _dec->huff_tables[_huff_idxs[pli+1>>1]]); 1.1111 + dct_tokens[ti++]=(unsigned char)token; 1.1112 + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ 1.1113 + eb=(int)oc_pack_read(&_dec->opb, 1.1114 + OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]); 1.1115 + dct_tokens[ti++]=(unsigned char)eb; 1.1116 + if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8); 1.1117 + eb<<=OC_DCT_TOKEN_EB_POS(token); 1.1118 + } 1.1119 + else eb=0; 1.1120 + cw=OC_DCT_CODE_WORD[token]+eb; 1.1121 + skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); 1.1122 + _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; 1.1123 + if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH; 1.1124 + if(_eobs==0){ 1.1125 + run_counts[skip]++; 1.1126 + ntoks++; 1.1127 + } 1.1128 + } 1.1129 + /*Add the portion of the last EOB run actually used by this coefficient.*/ 1.1130 + eob_count+=ntoks_left-ntoks; 1.1131 + /*And remove it from the remaining EOB count.*/ 1.1132 + _eobs-=ntoks_left-ntoks; 1.1133 + /*Add the total EOB count to the longest run length.*/ 1.1134 + run_counts[63]+=eob_count; 1.1135 + /*And convert the run_counts array to a moment table.*/ 1.1136 + for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1]; 1.1137 + /*Finally, subtract off the number of coefficients that have been 1.1138 + accounted for by runs started in this coefficient.*/ 1.1139 + for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli]; 1.1140 + } 1.1141 + _dec->dct_tokens_count=ti; 1.1142 + return _eobs; 1.1143 +} 1.1144 + 1.1145 +/*Tokens describing the DCT coefficients that belong to each fragment are 1.1146 + stored in the bitstream grouped by coefficient, not by fragment. 1.1147 + 1.1148 + This means that we either decode all the tokens in order, building up a 1.1149 + separate coefficient list for each fragment as we go, and then go back and 1.1150 + do the iDCT on each fragment, or we have to create separate lists of tokens 1.1151 + for each coefficient, so that we can pull the next token required off the 1.1152 + head of the appropriate list when decoding a specific fragment. 1.1153 + 1.1154 + The former was VP3's choice, and it meant 2*w*h extra storage for all the 1.1155 + decoded coefficient values. 1.1156 + 1.1157 + We take the second option, which lets us store just one to three bytes per 1.1158 + token (generally far fewer than the number of coefficients, due to EOB 1.1159 + tokens and zero runs), and which requires us to only maintain a counter for 1.1160 + each of the 64 coefficients, instead of a counter for every fragment to 1.1161 + determine where the next token goes. 1.1162 + 1.1163 + We actually use 3 counters per coefficient, one for each color plane, so we 1.1164 + can decode all color planes simultaneously. 1.1165 + This lets color conversion, etc., be done as soon as a full MCU (one or 1.1166 + two super block rows) is decoded, while the image data is still in cache.*/ 1.1167 + 1.1168 +static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){ 1.1169 + static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64}; 1.1170 + ptrdiff_t ntoks_left[3][64]; 1.1171 + int huff_idxs[2]; 1.1172 + ptrdiff_t eobs; 1.1173 + long val; 1.1174 + int pli; 1.1175 + int zzi; 1.1176 + int hgi; 1.1177 + for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ 1.1178 + ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli]; 1.1179 + } 1.1180 + val=oc_pack_read(&_dec->opb,4); 1.1181 + huff_idxs[0]=(int)val; 1.1182 + val=oc_pack_read(&_dec->opb,4); 1.1183 + huff_idxs[1]=(int)val; 1.1184 + _dec->eob_runs[0][0]=0; 1.1185 + eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left); 1.1186 +#if defined(HAVE_CAIRO) 1.1187 + _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb); 1.1188 +#endif 1.1189 + val=oc_pack_read(&_dec->opb,4); 1.1190 + huff_idxs[0]=(int)val; 1.1191 + val=oc_pack_read(&_dec->opb,4); 1.1192 + huff_idxs[1]=(int)val; 1.1193 + zzi=1; 1.1194 + for(hgi=1;hgi<5;hgi++){ 1.1195 + huff_idxs[0]+=16; 1.1196 + huff_idxs[1]+=16; 1.1197 + for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){ 1.1198 + eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs); 1.1199 + } 1.1200 + } 1.1201 + /*TODO: eobs should be exactly zero, or 4096 or greater. 1.1202 + The second case occurs when an EOB run of size zero is encountered, which 1.1203 + gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX). 1.1204 + If neither of these conditions holds, then a warning should be issued.*/ 1.1205 +} 1.1206 + 1.1207 + 1.1208 +static int oc_dec_postprocess_init(oc_dec_ctx *_dec){ 1.1209 + /*pp_level 0: disabled; free any memory used and return*/ 1.1210 + if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){ 1.1211 + if(_dec->dc_qis!=NULL){ 1.1212 + _ogg_free(_dec->dc_qis); 1.1213 + _dec->dc_qis=NULL; 1.1214 + _ogg_free(_dec->variances); 1.1215 + _dec->variances=NULL; 1.1216 + _ogg_free(_dec->pp_frame_data); 1.1217 + _dec->pp_frame_data=NULL; 1.1218 + } 1.1219 + return 1; 1.1220 + } 1.1221 + if(_dec->dc_qis==NULL){ 1.1222 + /*If we haven't been tracking DC quantization indices, there's no point in 1.1223 + starting now.*/ 1.1224 + if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1; 1.1225 + _dec->dc_qis=(unsigned char *)_ogg_malloc( 1.1226 + _dec->state.nfrags*sizeof(_dec->dc_qis[0])); 1.1227 + if(_dec->dc_qis==NULL)return 1; 1.1228 + memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags); 1.1229 + } 1.1230 + else{ 1.1231 + unsigned char *dc_qis; 1.1232 + const ptrdiff_t *coded_fragis; 1.1233 + ptrdiff_t ncoded_fragis; 1.1234 + ptrdiff_t fragii; 1.1235 + unsigned char qi0; 1.1236 + /*Update the DC quantization index of each coded block.*/ 1.1237 + dc_qis=_dec->dc_qis; 1.1238 + coded_fragis=_dec->state.coded_fragis; 1.1239 + ncoded_fragis=_dec->state.ncoded_fragis[0]+ 1.1240 + _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2]; 1.1241 + qi0=(unsigned char)_dec->state.qis[0]; 1.1242 + for(fragii=0;fragii<ncoded_fragis;fragii++){ 1.1243 + dc_qis[coded_fragis[fragii]]=qi0; 1.1244 + } 1.1245 + } 1.1246 + /*pp_level 1: Stop after updating DC quantization indices.*/ 1.1247 + if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){ 1.1248 + if(_dec->variances!=NULL){ 1.1249 + _ogg_free(_dec->variances); 1.1250 + _dec->variances=NULL; 1.1251 + _ogg_free(_dec->pp_frame_data); 1.1252 + _dec->pp_frame_data=NULL; 1.1253 + } 1.1254 + return 1; 1.1255 + } 1.1256 + if(_dec->variances==NULL){ 1.1257 + size_t frame_sz; 1.1258 + size_t c_sz; 1.1259 + int c_w; 1.1260 + int c_h; 1.1261 + frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; 1.1262 + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); 1.1263 + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); 1.1264 + c_sz=c_w*(size_t)c_h; 1.1265 + /*Allocate space for the chroma planes, even if we're not going to use 1.1266 + them; this simplifies allocation state management, though it may waste 1.1267 + memory on the few systems that don't overcommit pages.*/ 1.1268 + frame_sz+=c_sz<<1; 1.1269 + _dec->pp_frame_data=(unsigned char *)_ogg_malloc( 1.1270 + frame_sz*sizeof(_dec->pp_frame_data[0])); 1.1271 + _dec->variances=(int *)_ogg_malloc( 1.1272 + _dec->state.nfrags*sizeof(_dec->variances[0])); 1.1273 + if(_dec->variances==NULL||_dec->pp_frame_data==NULL){ 1.1274 + _ogg_free(_dec->pp_frame_data); 1.1275 + _dec->pp_frame_data=NULL; 1.1276 + _ogg_free(_dec->variances); 1.1277 + _dec->variances=NULL; 1.1278 + return 1; 1.1279 + } 1.1280 + /*Force an update of the PP buffer pointers.*/ 1.1281 + _dec->pp_frame_state=0; 1.1282 + } 1.1283 + /*Update the PP buffer pointers if necessary.*/ 1.1284 + if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){ 1.1285 + if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){ 1.1286 + /*If chroma processing is disabled, just use the PP luma plane.*/ 1.1287 + _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; 1.1288 + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; 1.1289 + _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width; 1.1290 + _dec->pp_frame_buf[0].data=_dec->pp_frame_data+ 1.1291 + (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride; 1.1292 + } 1.1293 + else{ 1.1294 + size_t y_sz; 1.1295 + size_t c_sz; 1.1296 + int c_w; 1.1297 + int c_h; 1.1298 + /*Otherwise, set up pointers to all three PP planes.*/ 1.1299 + y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height; 1.1300 + c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1); 1.1301 + c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); 1.1302 + c_sz=c_w*(size_t)c_h; 1.1303 + _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; 1.1304 + _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; 1.1305 + _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width; 1.1306 + _dec->pp_frame_buf[0].data=_dec->pp_frame_data; 1.1307 + _dec->pp_frame_buf[1].width=c_w; 1.1308 + _dec->pp_frame_buf[1].height=c_h; 1.1309 + _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width; 1.1310 + _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz; 1.1311 + _dec->pp_frame_buf[2].width=c_w; 1.1312 + _dec->pp_frame_buf[2].height=c_h; 1.1313 + _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width; 1.1314 + _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz; 1.1315 + oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf); 1.1316 + } 1.1317 + _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC); 1.1318 + } 1.1319 + /*If we're not processing chroma, copy the reference frame's chroma planes.*/ 1.1320 + if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){ 1.1321 + memcpy(_dec->pp_frame_buf+1, 1.1322 + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1, 1.1323 + sizeof(_dec->pp_frame_buf[1])*2); 1.1324 + } 1.1325 + return 0; 1.1326 +} 1.1327 + 1.1328 + 1.1329 +/*Initialize the main decoding pipeline.*/ 1.1330 +static void oc_dec_pipeline_init(oc_dec_ctx *_dec, 1.1331 + oc_dec_pipeline_state *_pipe){ 1.1332 + const ptrdiff_t *coded_fragis; 1.1333 + const ptrdiff_t *uncoded_fragis; 1.1334 + int flimit; 1.1335 + int pli; 1.1336 + int qii; 1.1337 + int qti; 1.1338 + int zzi; 1.1339 + /*If chroma is sub-sampled in the vertical direction, we have to decode two 1.1340 + super block rows of Y' for each super block row of Cb and Cr.*/ 1.1341 + _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2); 1.1342 + /*Initialize the token and extra bits indices for each plane and 1.1343 + coefficient.*/ 1.1344 + memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti)); 1.1345 + /*Also copy over the initial the EOB run counts.*/ 1.1346 + memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs)); 1.1347 + /*Set up per-plane pointers to the coded and uncoded fragments lists.*/ 1.1348 + coded_fragis=_dec->state.coded_fragis; 1.1349 + uncoded_fragis=coded_fragis+_dec->state.nfrags; 1.1350 + for(pli=0;pli<3;pli++){ 1.1351 + ptrdiff_t ncoded_fragis; 1.1352 + _pipe->coded_fragis[pli]=coded_fragis; 1.1353 + _pipe->uncoded_fragis[pli]=uncoded_fragis; 1.1354 + ncoded_fragis=_dec->state.ncoded_fragis[pli]; 1.1355 + coded_fragis+=ncoded_fragis; 1.1356 + uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags; 1.1357 + } 1.1358 + /*Set up condensed quantizer tables.*/ 1.1359 + for(pli=0;pli<3;pli++){ 1.1360 + for(qii=0;qii<_dec->state.nqis;qii++){ 1.1361 + for(qti=0;qti<2;qti++){ 1.1362 + _pipe->dequant[pli][qii][qti]= 1.1363 + _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti]; 1.1364 + } 1.1365 + } 1.1366 + } 1.1367 + /*Set the previous DC predictor to 0 for all color planes and frame types.*/ 1.1368 + memset(_pipe->pred_last,0,sizeof(_pipe->pred_last)); 1.1369 + /*Initialize the bounding value array for the loop filter.*/ 1.1370 + flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]]; 1.1371 + _pipe->loop_filter=flimit!=0; 1.1372 + if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit); 1.1373 + /*Initialize any buffers needed for post-processing. 1.1374 + We also save the current post-processing level, to guard against the user 1.1375 + changing it from a callback.*/ 1.1376 + if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level; 1.1377 + /*If we don't have enough information to post-process, disable it, regardless 1.1378 + of the user-requested level.*/ 1.1379 + else{ 1.1380 + _pipe->pp_level=OC_PP_LEVEL_DISABLED; 1.1381 + memcpy(_dec->pp_frame_buf, 1.1382 + _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]], 1.1383 + sizeof(_dec->pp_frame_buf[0])*3); 1.1384 + } 1.1385 + /*Clear down the DCT coefficient buffer for the first block.*/ 1.1386 + for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0; 1.1387 +} 1.1388 + 1.1389 +/*Undo the DC prediction in a single plane of an MCU (one or two super block 1.1390 + rows). 1.1391 + As a side effect, the number of coded and uncoded fragments in this plane of 1.1392 + the MCU is also computed.*/ 1.1393 +void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec, 1.1394 + oc_dec_pipeline_state *_pipe,int _pli){ 1.1395 + const oc_fragment_plane *fplane; 1.1396 + oc_fragment *frags; 1.1397 + int *pred_last; 1.1398 + ptrdiff_t ncoded_fragis; 1.1399 + ptrdiff_t fragi; 1.1400 + int fragx; 1.1401 + int fragy; 1.1402 + int fragy0; 1.1403 + int fragy_end; 1.1404 + int nhfrags; 1.1405 + /*Compute the first and last fragment row of the current MCU for this 1.1406 + plane.*/ 1.1407 + fplane=_dec->state.fplanes+_pli; 1.1408 + fragy0=_pipe->fragy0[_pli]; 1.1409 + fragy_end=_pipe->fragy_end[_pli]; 1.1410 + nhfrags=fplane->nhfrags; 1.1411 + pred_last=_pipe->pred_last[_pli]; 1.1412 + frags=_dec->state.frags; 1.1413 + ncoded_fragis=0; 1.1414 + fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags; 1.1415 + for(fragy=fragy0;fragy<fragy_end;fragy++){ 1.1416 + if(fragy==0){ 1.1417 + /*For the first row, all of the cases reduce to just using the previous 1.1418 + predictor for the same reference frame.*/ 1.1419 + for(fragx=0;fragx<nhfrags;fragx++,fragi++){ 1.1420 + if(frags[fragi].coded){ 1.1421 + int refi; 1.1422 + refi=frags[fragi].refi; 1.1423 + pred_last[refi]=frags[fragi].dc+=pred_last[refi]; 1.1424 + ncoded_fragis++; 1.1425 + } 1.1426 + } 1.1427 + } 1.1428 + else{ 1.1429 + oc_fragment *u_frags; 1.1430 + int l_ref; 1.1431 + int ul_ref; 1.1432 + int u_ref; 1.1433 + u_frags=frags-nhfrags; 1.1434 + l_ref=-1; 1.1435 + ul_ref=-1; 1.1436 + u_ref=u_frags[fragi].refi; 1.1437 + for(fragx=0;fragx<nhfrags;fragx++,fragi++){ 1.1438 + int ur_ref; 1.1439 + if(fragx+1>=nhfrags)ur_ref=-1; 1.1440 + else ur_ref=u_frags[fragi+1].refi; 1.1441 + if(frags[fragi].coded){ 1.1442 + int pred; 1.1443 + int refi; 1.1444 + refi=frags[fragi].refi; 1.1445 + /*We break out a separate case based on which of our neighbors use 1.1446 + the same reference frames. 1.1447 + This is somewhat faster than trying to make a generic case which 1.1448 + handles all of them, since it reduces lots of poorly predicted 1.1449 + jumps to one switch statement, and also lets a number of the 1.1450 + multiplications be optimized out by strength reduction.*/ 1.1451 + switch((l_ref==refi)|(ul_ref==refi)<<1| 1.1452 + (u_ref==refi)<<2|(ur_ref==refi)<<3){ 1.1453 + default:pred=pred_last[refi];break; 1.1454 + case 1: 1.1455 + case 3:pred=frags[fragi-1].dc;break; 1.1456 + case 2:pred=u_frags[fragi-1].dc;break; 1.1457 + case 4: 1.1458 + case 6: 1.1459 + case 12:pred=u_frags[fragi].dc;break; 1.1460 + case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; 1.1461 + case 8:pred=u_frags[fragi+1].dc;break; 1.1462 + case 9: 1.1463 + case 11: 1.1464 + case 13:{ 1.1465 + /*The TI compiler mis-compiles this line.*/ 1.1466 + pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; 1.1467 + }break; 1.1468 + case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; 1.1469 + case 14:{ 1.1470 + pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) 1.1471 + +10*u_frags[fragi].dc)/16; 1.1472 + }break; 1.1473 + case 7: 1.1474 + case 15:{ 1.1475 + int p0; 1.1476 + int p1; 1.1477 + int p2; 1.1478 + p0=frags[fragi-1].dc; 1.1479 + p1=u_frags[fragi-1].dc; 1.1480 + p2=u_frags[fragi].dc; 1.1481 + pred=(29*(p0+p2)-26*p1)/32; 1.1482 + if(abs(pred-p2)>128)pred=p2; 1.1483 + else if(abs(pred-p0)>128)pred=p0; 1.1484 + else if(abs(pred-p1)>128)pred=p1; 1.1485 + }break; 1.1486 + } 1.1487 + pred_last[refi]=frags[fragi].dc+=pred; 1.1488 + ncoded_fragis++; 1.1489 + l_ref=refi; 1.1490 + } 1.1491 + else l_ref=-1; 1.1492 + ul_ref=u_ref; 1.1493 + u_ref=ur_ref; 1.1494 + } 1.1495 + } 1.1496 + } 1.1497 + _pipe->ncoded_fragis[_pli]=ncoded_fragis; 1.1498 + /*Also save the number of uncoded fragments so we know how many to copy.*/ 1.1499 + _pipe->nuncoded_fragis[_pli]= 1.1500 + (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis; 1.1501 +} 1.1502 + 1.1503 +/*Reconstructs all coded fragments in a single MCU (one or two super block 1.1504 + rows). 1.1505 + This requires that each coded fragment have a proper macro block mode and 1.1506 + motion vector (if not in INTRA mode), and have its DC value decoded, with 1.1507 + the DC prediction process reversed, and the number of coded and uncoded 1.1508 + fragments in this plane of the MCU be counted. 1.1509 + The token lists for each color plane and coefficient should also be filled 1.1510 + in, along with initial token offsets, extra bits offsets, and EOB run 1.1511 + counts.*/ 1.1512 +static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, 1.1513 + oc_dec_pipeline_state *_pipe,int _pli){ 1.1514 + unsigned char *dct_tokens; 1.1515 + const unsigned char *dct_fzig_zag; 1.1516 + ogg_uint16_t dc_quant[2]; 1.1517 + const oc_fragment *frags; 1.1518 + const ptrdiff_t *coded_fragis; 1.1519 + ptrdiff_t ncoded_fragis; 1.1520 + ptrdiff_t fragii; 1.1521 + ptrdiff_t *ti; 1.1522 + ptrdiff_t *eob_runs; 1.1523 + int qti; 1.1524 + dct_tokens=_dec->dct_tokens; 1.1525 + dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag; 1.1526 + frags=_dec->state.frags; 1.1527 + coded_fragis=_pipe->coded_fragis[_pli]; 1.1528 + ncoded_fragis=_pipe->ncoded_fragis[_pli]; 1.1529 + ti=_pipe->ti[_pli]; 1.1530 + eob_runs=_pipe->eob_runs[_pli]; 1.1531 + for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0]; 1.1532 + for(fragii=0;fragii<ncoded_fragis;fragii++){ 1.1533 + const ogg_uint16_t *ac_quant; 1.1534 + ptrdiff_t fragi; 1.1535 + int last_zzi; 1.1536 + int zzi; 1.1537 + fragi=coded_fragis[fragii]; 1.1538 + qti=frags[fragi].mb_mode!=OC_MODE_INTRA; 1.1539 + ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti]; 1.1540 + /*Decode the AC coefficients.*/ 1.1541 + for(zzi=0;zzi<64;){ 1.1542 + int token; 1.1543 + last_zzi=zzi; 1.1544 + if(eob_runs[zzi]){ 1.1545 + eob_runs[zzi]--; 1.1546 + break; 1.1547 + } 1.1548 + else{ 1.1549 + ptrdiff_t eob; 1.1550 + int cw; 1.1551 + int rlen; 1.1552 + int coeff; 1.1553 + int lti; 1.1554 + lti=ti[zzi]; 1.1555 + token=dct_tokens[lti++]; 1.1556 + cw=OC_DCT_CODE_WORD[token]; 1.1557 + /*These parts could be done branchless, but the branches are fairly 1.1558 + predictable and the C code translates into more than a few 1.1559 + instructions, so it's worth it to avoid them.*/ 1.1560 + if(OC_DCT_TOKEN_NEEDS_MORE(token)){ 1.1561 + cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token); 1.1562 + } 1.1563 + eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF; 1.1564 + if(token==OC_DCT_TOKEN_FAT_EOB){ 1.1565 + eob+=dct_tokens[lti++]<<8; 1.1566 + if(eob==0)eob=OC_DCT_EOB_FINISH; 1.1567 + } 1.1568 + rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT); 1.1569 + cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT); 1.1570 + coeff=cw>>OC_DCT_CW_MAG_SHIFT; 1.1571 + eob_runs[zzi]=eob; 1.1572 + ti[zzi]=lti; 1.1573 + zzi+=rlen; 1.1574 + _pipe->dct_coeffs[dct_fzig_zag[zzi]]= 1.1575 + (ogg_int16_t)(coeff*(int)ac_quant[zzi]); 1.1576 + zzi+=!eob; 1.1577 + } 1.1578 + } 1.1579 + /*TODO: zzi should be exactly 64 here. 1.1580 + If it's not, we should report some kind of warning.*/ 1.1581 + zzi=OC_MINI(zzi,64); 1.1582 + _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc; 1.1583 + /*last_zzi is always initialized. 1.1584 + If your compiler thinks otherwise, it is dumb.*/ 1.1585 + oc_state_frag_recon(&_dec->state,fragi,_pli, 1.1586 + _pipe->dct_coeffs,last_zzi,dc_quant[qti]); 1.1587 + } 1.1588 + _pipe->coded_fragis[_pli]+=ncoded_fragis; 1.1589 + /*Right now the reconstructed MCU has only the coded blocks in it.*/ 1.1590 + /*TODO: We make the decision here to always copy the uncoded blocks into it 1.1591 + from the reference frame. 1.1592 + We could also copy the coded blocks back over the reference frame, if we 1.1593 + wait for an additional MCU to be decoded, which might be faster if only a 1.1594 + small number of blocks are coded. 1.1595 + However, this introduces more latency, creating a larger cache footprint. 1.1596 + It's unknown which decision is better, but this one results in simpler 1.1597 + code, and the hard case (high bitrate, high resolution) is handled 1.1598 + correctly.*/ 1.1599 + /*Copy the uncoded blocks from the previous reference frame.*/ 1.1600 + if(_pipe->nuncoded_fragis[_pli]>0){ 1.1601 + _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; 1.1602 + oc_frag_copy_list(&_dec->state, 1.1603 + _dec->state.ref_frame_data[OC_FRAME_SELF], 1.1604 + _dec->state.ref_frame_data[OC_FRAME_PREV], 1.1605 + _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli], 1.1606 + _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs); 1.1607 + } 1.1608 +} 1.1609 + 1.1610 +/*Filter a horizontal block edge.*/ 1.1611 +static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride, 1.1612 + const unsigned char *_src,int _src_ystride,int _qstep,int _flimit, 1.1613 + int *_variance0,int *_variance1){ 1.1614 + unsigned char *rdst; 1.1615 + const unsigned char *rsrc; 1.1616 + unsigned char *cdst; 1.1617 + const unsigned char *csrc; 1.1618 + int r[10]; 1.1619 + int sum0; 1.1620 + int sum1; 1.1621 + int bx; 1.1622 + int by; 1.1623 + rdst=_dst; 1.1624 + rsrc=_src; 1.1625 + for(bx=0;bx<8;bx++){ 1.1626 + cdst=rdst; 1.1627 + csrc=rsrc; 1.1628 + for(by=0;by<10;by++){ 1.1629 + r[by]=*csrc; 1.1630 + csrc+=_src_ystride; 1.1631 + } 1.1632 + sum0=sum1=0; 1.1633 + for(by=0;by<4;by++){ 1.1634 + sum0+=abs(r[by+1]-r[by]); 1.1635 + sum1+=abs(r[by+5]-r[by+6]); 1.1636 + } 1.1637 + *_variance0+=OC_MINI(255,sum0); 1.1638 + *_variance1+=OC_MINI(255,sum1); 1.1639 + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ 1.1640 + *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); 1.1641 + cdst+=_dst_ystride; 1.1642 + *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); 1.1643 + cdst+=_dst_ystride; 1.1644 + for(by=0;by<4;by++){ 1.1645 + *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+ 1.1646 + r[by+4]+r[by+5]+r[by+6]+4>>3); 1.1647 + cdst+=_dst_ystride; 1.1648 + } 1.1649 + *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); 1.1650 + cdst+=_dst_ystride; 1.1651 + *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); 1.1652 + } 1.1653 + else{ 1.1654 + for(by=1;by<=8;by++){ 1.1655 + *cdst=(unsigned char)r[by]; 1.1656 + cdst+=_dst_ystride; 1.1657 + } 1.1658 + } 1.1659 + rdst++; 1.1660 + rsrc++; 1.1661 + } 1.1662 +} 1.1663 + 1.1664 +/*Filter a vertical block edge.*/ 1.1665 +static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride, 1.1666 + int _qstep,int _flimit,int *_variances){ 1.1667 + unsigned char *rdst; 1.1668 + const unsigned char *rsrc; 1.1669 + unsigned char *cdst; 1.1670 + int r[10]; 1.1671 + int sum0; 1.1672 + int sum1; 1.1673 + int bx; 1.1674 + int by; 1.1675 + cdst=_dst; 1.1676 + for(by=0;by<8;by++){ 1.1677 + rsrc=cdst-1; 1.1678 + rdst=cdst; 1.1679 + for(bx=0;bx<10;bx++)r[bx]=*rsrc++; 1.1680 + sum0=sum1=0; 1.1681 + for(bx=0;bx<4;bx++){ 1.1682 + sum0+=abs(r[bx+1]-r[bx]); 1.1683 + sum1+=abs(r[bx+5]-r[bx+6]); 1.1684 + } 1.1685 + _variances[0]+=OC_MINI(255,sum0); 1.1686 + _variances[1]+=OC_MINI(255,sum1); 1.1687 + if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){ 1.1688 + *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3); 1.1689 + *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3); 1.1690 + for(bx=0;bx<4;bx++){ 1.1691 + *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+ 1.1692 + r[bx+4]+r[bx+5]+r[bx+6]+4>>3); 1.1693 + } 1.1694 + *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3); 1.1695 + *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3); 1.1696 + } 1.1697 + cdst+=_dst_ystride; 1.1698 + } 1.1699 +} 1.1700 + 1.1701 +static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec, 1.1702 + th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0, 1.1703 + int _fragy_end){ 1.1704 + oc_fragment_plane *fplane; 1.1705 + int *variance; 1.1706 + unsigned char *dc_qi; 1.1707 + unsigned char *dst; 1.1708 + const unsigned char *src; 1.1709 + ptrdiff_t froffset; 1.1710 + int dst_ystride; 1.1711 + int src_ystride; 1.1712 + int nhfrags; 1.1713 + int width; 1.1714 + int notstart; 1.1715 + int notdone; 1.1716 + int flimit; 1.1717 + int qstep; 1.1718 + int y_end; 1.1719 + int y; 1.1720 + int x; 1.1721 + _dst+=_pli; 1.1722 + _src+=_pli; 1.1723 + fplane=_dec->state.fplanes+_pli; 1.1724 + nhfrags=fplane->nhfrags; 1.1725 + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; 1.1726 + variance=_dec->variances+froffset; 1.1727 + dc_qi=_dec->dc_qis+froffset; 1.1728 + notstart=_fragy0>0; 1.1729 + notdone=_fragy_end<fplane->nvfrags; 1.1730 + /*We want to clear an extra row of variances, except at the end.*/ 1.1731 + memset(variance+(nhfrags&-notstart),0, 1.1732 + (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0]))); 1.1733 + /*Except for the first time, we want to point to the middle of the row.*/ 1.1734 + y=(_fragy0<<3)+(notstart<<2); 1.1735 + dst_ystride=_dst->stride; 1.1736 + src_ystride=_src->stride; 1.1737 + dst=_dst->data+y*(ptrdiff_t)dst_ystride; 1.1738 + src=_src->data+y*(ptrdiff_t)src_ystride; 1.1739 + width=_dst->width; 1.1740 + for(;y<4;y++){ 1.1741 + memcpy(dst,src,width*sizeof(dst[0])); 1.1742 + dst+=dst_ystride; 1.1743 + src+=src_ystride; 1.1744 + } 1.1745 + /*We also want to skip the last row in the frame for this loop.*/ 1.1746 + y_end=_fragy_end-!notdone<<3; 1.1747 + for(;y<y_end;y+=8){ 1.1748 + qstep=_dec->pp_dc_scale[*dc_qi]; 1.1749 + flimit=(qstep*3)>>2; 1.1750 + oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride, 1.1751 + qstep,flimit,variance,variance+nhfrags); 1.1752 + variance++; 1.1753 + dc_qi++; 1.1754 + for(x=8;x<width;x+=8){ 1.1755 + qstep=_dec->pp_dc_scale[*dc_qi]; 1.1756 + flimit=(qstep*3)>>2; 1.1757 + oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride, 1.1758 + qstep,flimit,variance,variance+nhfrags); 1.1759 + oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride, 1.1760 + qstep,flimit,variance-1); 1.1761 + variance++; 1.1762 + dc_qi++; 1.1763 + } 1.1764 + dst+=dst_ystride<<3; 1.1765 + src+=src_ystride<<3; 1.1766 + } 1.1767 + /*And finally, handle the last row in the frame, if it's in the range.*/ 1.1768 + if(!notdone){ 1.1769 + int height; 1.1770 + height=_dst->height; 1.1771 + for(;y<height;y++){ 1.1772 + memcpy(dst,src,width*sizeof(dst[0])); 1.1773 + dst+=dst_ystride; 1.1774 + src+=src_ystride; 1.1775 + } 1.1776 + /*Filter the last row of vertical block edges.*/ 1.1777 + dc_qi++; 1.1778 + for(x=8;x<width;x+=8){ 1.1779 + qstep=_dec->pp_dc_scale[*dc_qi++]; 1.1780 + flimit=(qstep*3)>>2; 1.1781 + oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride, 1.1782 + qstep,flimit,variance++); 1.1783 + } 1.1784 + } 1.1785 +} 1.1786 + 1.1787 +static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, 1.1788 + int _dc_scale,int _sharp_mod,int _strong){ 1.1789 + static const unsigned char OC_MOD_MAX[2]={24,32}; 1.1790 + static const unsigned char OC_MOD_SHIFT[2]={1,0}; 1.1791 + const unsigned char *psrc; 1.1792 + const unsigned char *src; 1.1793 + const unsigned char *nsrc; 1.1794 + unsigned char *dst; 1.1795 + int vmod[72]; 1.1796 + int hmod[72]; 1.1797 + int mod_hi; 1.1798 + int by; 1.1799 + int bx; 1.1800 + mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]); 1.1801 + dst=_idata; 1.1802 + src=dst; 1.1803 + psrc=src-(_ystride&-!(_b&4)); 1.1804 + for(by=0;by<9;by++){ 1.1805 + for(bx=0;bx<8;bx++){ 1.1806 + int mod; 1.1807 + mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]); 1.1808 + vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi); 1.1809 + } 1.1810 + psrc=src; 1.1811 + src+=_ystride&-(!(_b&8)|by<7); 1.1812 + } 1.1813 + nsrc=dst; 1.1814 + psrc=dst-!(_b&1); 1.1815 + for(bx=0;bx<9;bx++){ 1.1816 + src=nsrc; 1.1817 + for(by=0;by<8;by++){ 1.1818 + int mod; 1.1819 + mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]); 1.1820 + hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi); 1.1821 + psrc+=_ystride; 1.1822 + src+=_ystride; 1.1823 + } 1.1824 + psrc=nsrc; 1.1825 + nsrc+=!(_b&2)|bx<7; 1.1826 + } 1.1827 + src=dst; 1.1828 + psrc=src-(_ystride&-!(_b&4)); 1.1829 + nsrc=src+_ystride; 1.1830 + for(by=0;by<8;by++){ 1.1831 + int a; 1.1832 + int b; 1.1833 + int w; 1.1834 + a=128; 1.1835 + b=64; 1.1836 + w=hmod[by]; 1.1837 + a-=w; 1.1838 + b+=w**(src-!(_b&1)); 1.1839 + w=vmod[by<<3]; 1.1840 + a-=w; 1.1841 + b+=w*psrc[0]; 1.1842 + w=vmod[by+1<<3]; 1.1843 + a-=w; 1.1844 + b+=w*nsrc[0]; 1.1845 + w=hmod[(1<<3)+by]; 1.1846 + a-=w; 1.1847 + b+=w*src[1]; 1.1848 + dst[0]=OC_CLAMP255(a*src[0]+b>>7); 1.1849 + for(bx=1;bx<7;bx++){ 1.1850 + a=128; 1.1851 + b=64; 1.1852 + w=hmod[(bx<<3)+by]; 1.1853 + a-=w; 1.1854 + b+=w*src[bx-1]; 1.1855 + w=vmod[(by<<3)+bx]; 1.1856 + a-=w; 1.1857 + b+=w*psrc[bx]; 1.1858 + w=vmod[(by+1<<3)+bx]; 1.1859 + a-=w; 1.1860 + b+=w*nsrc[bx]; 1.1861 + w=hmod[(bx+1<<3)+by]; 1.1862 + a-=w; 1.1863 + b+=w*src[bx+1]; 1.1864 + dst[bx]=OC_CLAMP255(a*src[bx]+b>>7); 1.1865 + } 1.1866 + a=128; 1.1867 + b=64; 1.1868 + w=hmod[(7<<3)+by]; 1.1869 + a-=w; 1.1870 + b+=w*src[6]; 1.1871 + w=vmod[(by<<3)+7]; 1.1872 + a-=w; 1.1873 + b+=w*psrc[7]; 1.1874 + w=vmod[(by+1<<3)+7]; 1.1875 + a-=w; 1.1876 + b+=w*nsrc[7]; 1.1877 + w=hmod[(8<<3)+by]; 1.1878 + a-=w; 1.1879 + b+=w*src[7+!(_b&2)]; 1.1880 + dst[7]=OC_CLAMP255(a*src[7]+b>>7); 1.1881 + dst+=_ystride; 1.1882 + psrc=src; 1.1883 + src=nsrc; 1.1884 + nsrc+=_ystride&-(!(_b&8)|by<6); 1.1885 + } 1.1886 +} 1.1887 + 1.1888 +#define OC_DERING_THRESH1 (384) 1.1889 +#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1) 1.1890 +#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1) 1.1891 +#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1) 1.1892 + 1.1893 +static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, 1.1894 + int _pli,int _fragy0,int _fragy_end){ 1.1895 + th_img_plane *iplane; 1.1896 + oc_fragment_plane *fplane; 1.1897 + oc_fragment *frag; 1.1898 + int *variance; 1.1899 + unsigned char *idata; 1.1900 + ptrdiff_t froffset; 1.1901 + int ystride; 1.1902 + int nhfrags; 1.1903 + int sthresh; 1.1904 + int strong; 1.1905 + int y_end; 1.1906 + int width; 1.1907 + int height; 1.1908 + int y; 1.1909 + int x; 1.1910 + iplane=_img+_pli; 1.1911 + fplane=_dec->state.fplanes+_pli; 1.1912 + nhfrags=fplane->nhfrags; 1.1913 + froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags; 1.1914 + variance=_dec->variances+froffset; 1.1915 + frag=_dec->state.frags+froffset; 1.1916 + strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY); 1.1917 + sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3; 1.1918 + y=_fragy0<<3; 1.1919 + ystride=iplane->stride; 1.1920 + idata=iplane->data+y*(ptrdiff_t)ystride; 1.1921 + y_end=_fragy_end<<3; 1.1922 + width=iplane->width; 1.1923 + height=iplane->height; 1.1924 + for(;y<y_end;y+=8){ 1.1925 + for(x=0;x<width;x+=8){ 1.1926 + int b; 1.1927 + int qi; 1.1928 + int var; 1.1929 + qi=_dec->state.qis[frag->qii]; 1.1930 + var=*variance; 1.1931 + b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3; 1.1932 + if(strong&&var>sthresh){ 1.1933 + oc_dering_block(idata+x,ystride,b, 1.1934 + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); 1.1935 + if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| 1.1936 + !(b&2)&&variance[1]>OC_DERING_THRESH4|| 1.1937 + !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4|| 1.1938 + !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){ 1.1939 + oc_dering_block(idata+x,ystride,b, 1.1940 + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); 1.1941 + oc_dering_block(idata+x,ystride,b, 1.1942 + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); 1.1943 + } 1.1944 + } 1.1945 + else if(var>OC_DERING_THRESH2){ 1.1946 + oc_dering_block(idata+x,ystride,b, 1.1947 + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); 1.1948 + } 1.1949 + else if(var>OC_DERING_THRESH1){ 1.1950 + oc_dering_block(idata+x,ystride,b, 1.1951 + _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0); 1.1952 + } 1.1953 + frag++; 1.1954 + variance++; 1.1955 + } 1.1956 + idata+=ystride<<3; 1.1957 + } 1.1958 +} 1.1959 + 1.1960 + 1.1961 + 1.1962 +th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){ 1.1963 + oc_dec_ctx *dec; 1.1964 + if(_info==NULL||_setup==NULL)return NULL; 1.1965 + dec=oc_aligned_malloc(sizeof(*dec),16); 1.1966 + if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){ 1.1967 + oc_aligned_free(dec); 1.1968 + return NULL; 1.1969 + } 1.1970 + dec->state.curframe_num=0; 1.1971 + return dec; 1.1972 +} 1.1973 + 1.1974 +void th_decode_free(th_dec_ctx *_dec){ 1.1975 + if(_dec!=NULL){ 1.1976 + oc_dec_clear(_dec); 1.1977 + oc_aligned_free(_dec); 1.1978 + } 1.1979 +} 1.1980 + 1.1981 +int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, 1.1982 + size_t _buf_sz){ 1.1983 + switch(_req){ 1.1984 + case TH_DECCTL_GET_PPLEVEL_MAX:{ 1.1985 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.1986 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.1987 + (*(int *)_buf)=OC_PP_LEVEL_MAX; 1.1988 + return 0; 1.1989 + }break; 1.1990 + case TH_DECCTL_SET_PPLEVEL:{ 1.1991 + int pp_level; 1.1992 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.1993 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.1994 + pp_level=*(int *)_buf; 1.1995 + if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL; 1.1996 + _dec->pp_level=pp_level; 1.1997 + return 0; 1.1998 + }break; 1.1999 + case TH_DECCTL_SET_GRANPOS:{ 1.2000 + ogg_int64_t granpos; 1.2001 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2002 + if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL; 1.2003 + granpos=*(ogg_int64_t *)_buf; 1.2004 + if(granpos<0)return TH_EINVAL; 1.2005 + _dec->state.granpos=granpos; 1.2006 + _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift) 1.2007 + -_dec->state.granpos_bias; 1.2008 + _dec->state.curframe_num=_dec->state.keyframe_num 1.2009 + +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1); 1.2010 + return 0; 1.2011 + }break; 1.2012 + case TH_DECCTL_SET_STRIPE_CB:{ 1.2013 + th_stripe_callback *cb; 1.2014 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2015 + if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL; 1.2016 + cb=(th_stripe_callback *)_buf; 1.2017 + _dec->stripe_cb.ctx=cb->ctx; 1.2018 + _dec->stripe_cb.stripe_decoded=cb->stripe_decoded; 1.2019 + return 0; 1.2020 + }break; 1.2021 +#ifdef HAVE_CAIRO 1.2022 + case TH_DECCTL_SET_TELEMETRY_MBMODE:{ 1.2023 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2024 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.2025 + _dec->telemetry=1; 1.2026 + _dec->telemetry_mbmode=*(int *)_buf; 1.2027 + return 0; 1.2028 + }break; 1.2029 + case TH_DECCTL_SET_TELEMETRY_MV:{ 1.2030 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2031 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.2032 + _dec->telemetry=1; 1.2033 + _dec->telemetry_mv=*(int *)_buf; 1.2034 + return 0; 1.2035 + }break; 1.2036 + case TH_DECCTL_SET_TELEMETRY_QI:{ 1.2037 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2038 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.2039 + _dec->telemetry=1; 1.2040 + _dec->telemetry_qi=*(int *)_buf; 1.2041 + return 0; 1.2042 + }break; 1.2043 + case TH_DECCTL_SET_TELEMETRY_BITS:{ 1.2044 + if(_dec==NULL||_buf==NULL)return TH_EFAULT; 1.2045 + if(_buf_sz!=sizeof(int))return TH_EINVAL; 1.2046 + _dec->telemetry=1; 1.2047 + _dec->telemetry_bits=*(int *)_buf; 1.2048 + return 0; 1.2049 + }break; 1.2050 +#endif 1.2051 + default:return TH_EIMPL; 1.2052 + } 1.2053 +} 1.2054 + 1.2055 +/*We're decoding an INTER frame, but have no initialized reference 1.2056 + buffers (i.e., decoding did not start on a key frame). 1.2057 + We initialize them to a solid gray here.*/ 1.2058 +static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){ 1.2059 + th_info *info; 1.2060 + size_t yplane_sz; 1.2061 + size_t cplane_sz; 1.2062 + ptrdiff_t yoffset; 1.2063 + int yhstride; 1.2064 + int yheight; 1.2065 + int chstride; 1.2066 + int cheight; 1.2067 + _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0; 1.2068 + _dec->state.ref_frame_idx[OC_FRAME_PREV]=0; 1.2069 + _dec->state.ref_frame_idx[OC_FRAME_SELF]=0; 1.2070 + _dec->state.ref_frame_data[OC_FRAME_GOLD]= 1.2071 + _dec->state.ref_frame_data[OC_FRAME_PREV]= 1.2072 + _dec->state.ref_frame_data[OC_FRAME_SELF]= 1.2073 + _dec->state.ref_frame_bufs[0][0].data; 1.2074 + memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0], 1.2075 + sizeof(_dec->pp_frame_buf[0])*3); 1.2076 + info=&_dec->state.info; 1.2077 + yhstride=abs(_dec->state.ref_ystride[0]); 1.2078 + yheight=info->frame_height+2*OC_UMV_PADDING; 1.2079 + chstride=abs(_dec->state.ref_ystride[1]); 1.2080 + cheight=yheight>>!(info->pixel_fmt&2); 1.2081 + yplane_sz=yhstride*(size_t)yheight+16; 1.2082 + cplane_sz=chstride*(size_t)cheight; 1.2083 + yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING; 1.2084 + memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz); 1.2085 +} 1.2086 + 1.2087 +int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, 1.2088 + ogg_int64_t *_granpos){ 1.2089 + int ret; 1.2090 + if(_dec==NULL||_op==NULL)return TH_EFAULT; 1.2091 + /*A completely empty packet indicates a dropped frame and is treated exactly 1.2092 + like an inter frame with no coded blocks.*/ 1.2093 + if(_op->bytes==0){ 1.2094 + _dec->state.frame_type=OC_INTER_FRAME; 1.2095 + _dec->state.ntotal_coded_fragis=0; 1.2096 + } 1.2097 + else{ 1.2098 + oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes); 1.2099 + ret=oc_dec_frame_header_unpack(_dec); 1.2100 + if(ret<0)return ret; 1.2101 + if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec); 1.2102 + else oc_dec_coded_flags_unpack(_dec); 1.2103 + } 1.2104 + /*If there have been no reference frames, and we need one, initialize one.*/ 1.2105 + if(_dec->state.frame_type!=OC_INTRA_FRAME&& 1.2106 + (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0|| 1.2107 + _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){ 1.2108 + oc_dec_init_dummy_frame(_dec); 1.2109 + } 1.2110 + /*If this was an inter frame with no coded blocks...*/ 1.2111 + if(_dec->state.ntotal_coded_fragis<=0){ 1.2112 + /*Just update the granule position and return.*/ 1.2113 + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< 1.2114 + _dec->state.info.keyframe_granule_shift) 1.2115 + +(_dec->state.curframe_num-_dec->state.keyframe_num); 1.2116 + _dec->state.curframe_num++; 1.2117 + if(_granpos!=NULL)*_granpos=_dec->state.granpos; 1.2118 + return TH_DUPFRAME; 1.2119 + } 1.2120 + else{ 1.2121 + th_ycbcr_buffer stripe_buf; 1.2122 + int stripe_fragy; 1.2123 + int refi; 1.2124 + int pli; 1.2125 + int notstart; 1.2126 + int notdone; 1.2127 + /*Select a free buffer to use for the reconstructed version of this frame.*/ 1.2128 + for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]|| 1.2129 + refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++); 1.2130 + _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi; 1.2131 + _dec->state.ref_frame_data[OC_FRAME_SELF]= 1.2132 + _dec->state.ref_frame_bufs[refi][0].data; 1.2133 +#if defined(HAVE_CAIRO) 1.2134 + _dec->telemetry_frame_bytes=_op->bytes; 1.2135 +#endif 1.2136 + if(_dec->state.frame_type==OC_INTRA_FRAME){ 1.2137 + _dec->state.keyframe_num=_dec->state.curframe_num; 1.2138 +#if defined(HAVE_CAIRO) 1.2139 + _dec->telemetry_coding_bytes= 1.2140 + _dec->telemetry_mode_bytes= 1.2141 + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); 1.2142 +#endif 1.2143 + } 1.2144 + else{ 1.2145 +#if defined(HAVE_CAIRO) 1.2146 + _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb); 1.2147 +#endif 1.2148 + oc_dec_mb_modes_unpack(_dec); 1.2149 +#if defined(HAVE_CAIRO) 1.2150 + _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb); 1.2151 +#endif 1.2152 + oc_dec_mv_unpack_and_frag_modes_fill(_dec); 1.2153 +#if defined(HAVE_CAIRO) 1.2154 + _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb); 1.2155 +#endif 1.2156 + } 1.2157 + oc_dec_block_qis_unpack(_dec); 1.2158 +#if defined(HAVE_CAIRO) 1.2159 + _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb); 1.2160 +#endif 1.2161 + oc_dec_residual_tokens_unpack(_dec); 1.2162 + /*Update granule position. 1.2163 + This must be done before the striped decode callbacks so that the 1.2164 + application knows what to do with the frame data.*/ 1.2165 + _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<< 1.2166 + _dec->state.info.keyframe_granule_shift) 1.2167 + +(_dec->state.curframe_num-_dec->state.keyframe_num); 1.2168 + _dec->state.curframe_num++; 1.2169 + if(_granpos!=NULL)*_granpos=_dec->state.granpos; 1.2170 + /*All of the rest of the operations -- DC prediction reversal, 1.2171 + reconstructing coded fragments, copying uncoded fragments, loop 1.2172 + filtering, extending borders, and out-of-loop post-processing -- should 1.2173 + be pipelined. 1.2174 + I.e., DC prediction reversal, reconstruction, and uncoded fragment 1.2175 + copying are done for one or two super block rows, then loop filtering is 1.2176 + run as far as it can, then bordering copying, then post-processing. 1.2177 + For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super 1.2178 + block rows, and one chroma. 1.2179 + Otherwise, an MCU consists of one super block row from each plane. 1.2180 + Inside each MCU, we perform all of the steps on one color plane before 1.2181 + moving on to the next. 1.2182 + After reconstruction, the additional filtering stages introduce a delay 1.2183 + since they need some pixels from the next fragment row. 1.2184 + Thus the actual number of decoded rows available is slightly smaller for 1.2185 + the first MCU, and slightly larger for the last. 1.2186 + 1.2187 + This entire process allows us to operate on the data while it is still in 1.2188 + cache, resulting in big performance improvements. 1.2189 + An application callback allows further application processing (blitting 1.2190 + to video memory, color conversion, etc.) to also use the data while it's 1.2191 + in cache.*/ 1.2192 + oc_dec_pipeline_init(_dec,&_dec->pipe); 1.2193 + oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf); 1.2194 + notstart=0; 1.2195 + notdone=1; 1.2196 + for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){ 1.2197 + int avail_fragy0; 1.2198 + int avail_fragy_end; 1.2199 + avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags; 1.2200 + notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end; 1.2201 + for(pli=0;pli<3;pli++){ 1.2202 + oc_fragment_plane *fplane; 1.2203 + int frag_shift; 1.2204 + int pp_offset; 1.2205 + int sdelay; 1.2206 + int edelay; 1.2207 + fplane=_dec->state.fplanes+pli; 1.2208 + /*Compute the first and last fragment row of the current MCU for this 1.2209 + plane.*/ 1.2210 + frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2); 1.2211 + _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift; 1.2212 + _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags, 1.2213 + _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift)); 1.2214 + oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli); 1.2215 + oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli); 1.2216 + sdelay=edelay=0; 1.2217 + if(_dec->pipe.loop_filter){ 1.2218 + sdelay+=notstart; 1.2219 + edelay+=notdone; 1.2220 + oc_state_loop_filter_frag_rows(&_dec->state, 1.2221 + _dec->pipe.bounding_values,OC_FRAME_SELF,pli, 1.2222 + _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay); 1.2223 + } 1.2224 + /*To fill the borders, we have an additional two pixel delay, since a 1.2225 + fragment in the next row could filter its top edge, using two pixels 1.2226 + from a fragment in this row. 1.2227 + But there's no reason to delay a full fragment between the two.*/ 1.2228 + oc_state_borders_fill_rows(&_dec->state,refi,pli, 1.2229 + (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1), 1.2230 + (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1)); 1.2231 + /*Out-of-loop post-processing.*/ 1.2232 + pp_offset=3*(pli!=0); 1.2233 + if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){ 1.2234 + /*Perform de-blocking in one plane.*/ 1.2235 + sdelay+=notstart; 1.2236 + edelay+=notdone; 1.2237 + oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf, 1.2238 + _dec->state.ref_frame_bufs[refi],pli, 1.2239 + _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay); 1.2240 + if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){ 1.2241 + /*Perform de-ringing in one plane.*/ 1.2242 + sdelay+=notstart; 1.2243 + edelay+=notdone; 1.2244 + oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli, 1.2245 + _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay); 1.2246 + } 1.2247 + } 1.2248 + /*If no post-processing is done, we still need to delay a row for the 1.2249 + loop filter, thanks to the strange filtering order VP3 chose.*/ 1.2250 + else if(_dec->pipe.loop_filter){ 1.2251 + sdelay+=notstart; 1.2252 + edelay+=notdone; 1.2253 + } 1.2254 + /*Compute the intersection of the available rows in all planes. 1.2255 + If chroma is sub-sampled, the effect of each of its delays is 1.2256 + doubled, but luma might have more post-processing filters enabled 1.2257 + than chroma, so we don't know up front which one is the limiting 1.2258 + factor.*/ 1.2259 + avail_fragy0=OC_MINI(avail_fragy0, 1.2260 + _dec->pipe.fragy0[pli]-sdelay<<frag_shift); 1.2261 + avail_fragy_end=OC_MINI(avail_fragy_end, 1.2262 + _dec->pipe.fragy_end[pli]-edelay<<frag_shift); 1.2263 + } 1.2264 + if(_dec->stripe_cb.stripe_decoded!=NULL){ 1.2265 + /*The callback might want to use the FPU, so let's make sure they can. 1.2266 + We violate all kinds of ABI restrictions by not doing this until 1.2267 + now, but none of them actually matter since we don't use floating 1.2268 + point ourselves.*/ 1.2269 + oc_restore_fpu(&_dec->state); 1.2270 + /*Make the callback, ensuring we flip the sense of the "start" and 1.2271 + "end" of the available region upside down.*/ 1.2272 + (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf, 1.2273 + _dec->state.fplanes[0].nvfrags-avail_fragy_end, 1.2274 + _dec->state.fplanes[0].nvfrags-avail_fragy0); 1.2275 + } 1.2276 + notstart=1; 1.2277 + } 1.2278 + /*Finish filling in the reference frame borders.*/ 1.2279 + for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); 1.2280 + /*Update the reference frame indices.*/ 1.2281 + if(_dec->state.frame_type==OC_INTRA_FRAME){ 1.2282 + /*The new frame becomes both the previous and gold reference frames.*/ 1.2283 + _dec->state.ref_frame_idx[OC_FRAME_GOLD]= 1.2284 + _dec->state.ref_frame_idx[OC_FRAME_PREV]= 1.2285 + _dec->state.ref_frame_idx[OC_FRAME_SELF]; 1.2286 + _dec->state.ref_frame_data[OC_FRAME_GOLD]= 1.2287 + _dec->state.ref_frame_data[OC_FRAME_PREV]= 1.2288 + _dec->state.ref_frame_data[OC_FRAME_SELF]; 1.2289 + } 1.2290 + else{ 1.2291 + /*Otherwise, just replace the previous reference frame.*/ 1.2292 + _dec->state.ref_frame_idx[OC_FRAME_PREV]= 1.2293 + _dec->state.ref_frame_idx[OC_FRAME_SELF]; 1.2294 + _dec->state.ref_frame_data[OC_FRAME_PREV]= 1.2295 + _dec->state.ref_frame_data[OC_FRAME_SELF]; 1.2296 + } 1.2297 + /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG 1.2298 + gamma values, if nothing else).*/ 1.2299 + oc_restore_fpu(&_dec->state); 1.2300 +#if defined(OC_DUMP_IMAGES) 1.2301 + /*We only dump images if there were some coded blocks.*/ 1.2302 + oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec"); 1.2303 +#endif 1.2304 + return 0; 1.2305 + } 1.2306 +} 1.2307 + 1.2308 +int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){ 1.2309 + if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT; 1.2310 + oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf); 1.2311 +#if defined(HAVE_CAIRO) 1.2312 + /*If telemetry ioctls are active, we need to draw to the output buffer. 1.2313 + Stuff the plane into cairo.*/ 1.2314 + if(_dec->telemetry){ 1.2315 + cairo_surface_t *cs; 1.2316 + unsigned char *data; 1.2317 + unsigned char *y_row; 1.2318 + unsigned char *u_row; 1.2319 + unsigned char *v_row; 1.2320 + unsigned char *rgb_row; 1.2321 + int cstride; 1.2322 + int w; 1.2323 + int h; 1.2324 + int x; 1.2325 + int y; 1.2326 + int hdec; 1.2327 + int vdec; 1.2328 + w=_ycbcr[0].width; 1.2329 + h=_ycbcr[0].height; 1.2330 + hdec=!(_dec->state.info.pixel_fmt&1); 1.2331 + vdec=!(_dec->state.info.pixel_fmt&2); 1.2332 + /*Lazy data buffer init. 1.2333 + We could try to re-use the post-processing buffer, which would save 1.2334 + memory, but complicate the allocation logic there. 1.2335 + I don't think anyone cares about memory usage when using telemetry; it is 1.2336 + not meant for embedded devices.*/ 1.2337 + if(_dec->telemetry_frame_data==NULL){ 1.2338 + _dec->telemetry_frame_data=_ogg_malloc( 1.2339 + (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data)); 1.2340 + if(_dec->telemetry_frame_data==NULL)return 0; 1.2341 + } 1.2342 + cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h); 1.2343 + /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/ 1.2344 + data=cairo_image_surface_get_data(cs); 1.2345 + if(data==NULL){ 1.2346 + cairo_surface_destroy(cs); 1.2347 + return 0; 1.2348 + } 1.2349 + cstride=cairo_image_surface_get_stride(cs); 1.2350 + y_row=_ycbcr[0].data; 1.2351 + u_row=_ycbcr[1].data; 1.2352 + v_row=_ycbcr[2].data; 1.2353 + rgb_row=data; 1.2354 + for(y=0;y<h;y++){ 1.2355 + for(x=0;x<w;x++){ 1.2356 + int r; 1.2357 + int g; 1.2358 + int b; 1.2359 + r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200; 1.2360 + g=(3827562*y_row[x]-1287801*u_row[x>>hdec] 1.2361 + -2672387*v_row[x>>hdec]+447306710)/3287200; 1.2362 + b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600; 1.2363 + rgb_row[4*x+0]=OC_CLAMP255(b); 1.2364 + rgb_row[4*x+1]=OC_CLAMP255(g); 1.2365 + rgb_row[4*x+2]=OC_CLAMP255(r); 1.2366 + } 1.2367 + y_row+=_ycbcr[0].stride; 1.2368 + u_row+=_ycbcr[1].stride&-((y&1)|!vdec); 1.2369 + v_row+=_ycbcr[2].stride&-((y&1)|!vdec); 1.2370 + rgb_row+=cstride; 1.2371 + } 1.2372 + /*Draw coded identifier for each macroblock (stored in Hilbert order).*/ 1.2373 + { 1.2374 + cairo_t *c; 1.2375 + const oc_fragment *frags; 1.2376 + oc_mv *frag_mvs; 1.2377 + const signed char *mb_modes; 1.2378 + oc_mb_map *mb_maps; 1.2379 + size_t nmbs; 1.2380 + size_t mbi; 1.2381 + int row2; 1.2382 + int col2; 1.2383 + int qim[3]={0,0,0}; 1.2384 + if(_dec->state.nqis==2){ 1.2385 + int bqi; 1.2386 + bqi=_dec->state.qis[0]; 1.2387 + if(_dec->state.qis[1]>bqi)qim[1]=1; 1.2388 + if(_dec->state.qis[1]<bqi)qim[1]=-1; 1.2389 + } 1.2390 + if(_dec->state.nqis==3){ 1.2391 + int bqi; 1.2392 + int cqi; 1.2393 + int dqi; 1.2394 + bqi=_dec->state.qis[0]; 1.2395 + cqi=_dec->state.qis[1]; 1.2396 + dqi=_dec->state.qis[2]; 1.2397 + if(cqi>bqi&&dqi>bqi){ 1.2398 + if(dqi>cqi){ 1.2399 + qim[1]=1; 1.2400 + qim[2]=2; 1.2401 + } 1.2402 + else{ 1.2403 + qim[1]=2; 1.2404 + qim[2]=1; 1.2405 + } 1.2406 + } 1.2407 + else if(cqi<bqi&&dqi<bqi){ 1.2408 + if(dqi<cqi){ 1.2409 + qim[1]=-1; 1.2410 + qim[2]=-2; 1.2411 + } 1.2412 + else{ 1.2413 + qim[1]=-2; 1.2414 + qim[2]=-1; 1.2415 + } 1.2416 + } 1.2417 + else{ 1.2418 + if(cqi<bqi)qim[1]=-1; 1.2419 + else qim[1]=1; 1.2420 + if(dqi<bqi)qim[2]=-1; 1.2421 + else qim[2]=1; 1.2422 + } 1.2423 + } 1.2424 + c=cairo_create(cs); 1.2425 + frags=_dec->state.frags; 1.2426 + frag_mvs=_dec->state.frag_mvs; 1.2427 + mb_modes=_dec->state.mb_modes; 1.2428 + mb_maps=_dec->state.mb_maps; 1.2429 + nmbs=_dec->state.nmbs; 1.2430 + row2=0; 1.2431 + col2=0; 1.2432 + for(mbi=0;mbi<nmbs;mbi++){ 1.2433 + float x; 1.2434 + float y; 1.2435 + int bi; 1.2436 + y=h-(row2+((col2+1>>1)&1))*16-16; 1.2437 + x=(col2>>1)*16; 1.2438 + cairo_set_line_width(c,1.); 1.2439 + /*Keyframe (all intra) red box.*/ 1.2440 + if(_dec->state.frame_type==OC_INTRA_FRAME){ 1.2441 + if(_dec->telemetry_mbmode&0x02){ 1.2442 + cairo_set_source_rgba(c,1.,0,0,.5); 1.2443 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2444 + cairo_stroke_preserve(c); 1.2445 + cairo_set_source_rgba(c,1.,0,0,.25); 1.2446 + cairo_fill(c); 1.2447 + } 1.2448 + } 1.2449 + else{ 1.2450 + ptrdiff_t fragi; 1.2451 + int frag_mvx; 1.2452 + int frag_mvy; 1.2453 + for(bi=0;bi<4;bi++){ 1.2454 + fragi=mb_maps[mbi][0][bi]; 1.2455 + if(fragi>=0&&frags[fragi].coded){ 1.2456 + frag_mvx=OC_MV_X(frag_mvs[fragi]); 1.2457 + frag_mvy=OC_MV_Y(frag_mvs[fragi]); 1.2458 + break; 1.2459 + } 1.2460 + } 1.2461 + if(bi<4){ 1.2462 + switch(mb_modes[mbi]){ 1.2463 + case OC_MODE_INTRA:{ 1.2464 + if(_dec->telemetry_mbmode&0x02){ 1.2465 + cairo_set_source_rgba(c,1.,0,0,.5); 1.2466 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2467 + cairo_stroke_preserve(c); 1.2468 + cairo_set_source_rgba(c,1.,0,0,.25); 1.2469 + cairo_fill(c); 1.2470 + } 1.2471 + }break; 1.2472 + case OC_MODE_INTER_NOMV:{ 1.2473 + if(_dec->telemetry_mbmode&0x01){ 1.2474 + cairo_set_source_rgba(c,0,0,1.,.5); 1.2475 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2476 + cairo_stroke_preserve(c); 1.2477 + cairo_set_source_rgba(c,0,0,1.,.25); 1.2478 + cairo_fill(c); 1.2479 + } 1.2480 + }break; 1.2481 + case OC_MODE_INTER_MV:{ 1.2482 + if(_dec->telemetry_mbmode&0x04){ 1.2483 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2484 + cairo_set_source_rgba(c,0,1.,0,.5); 1.2485 + cairo_stroke(c); 1.2486 + } 1.2487 + if(_dec->telemetry_mv&0x04){ 1.2488 + cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy); 1.2489 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2490 + cairo_set_line_width(c,3.); 1.2491 + cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66); 1.2492 + cairo_stroke_preserve(c); 1.2493 + cairo_set_line_width(c,2.); 1.2494 + cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33); 1.2495 + cairo_stroke_preserve(c); 1.2496 + cairo_set_line_width(c,1.); 1.2497 + cairo_line_to(c,x+8,y+8); 1.2498 + cairo_stroke(c); 1.2499 + } 1.2500 + }break; 1.2501 + case OC_MODE_INTER_MV_LAST:{ 1.2502 + if(_dec->telemetry_mbmode&0x08){ 1.2503 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2504 + cairo_set_source_rgba(c,0,1.,0,.5); 1.2505 + cairo_move_to(c,x+13.5,y+2.5); 1.2506 + cairo_line_to(c,x+2.5,y+8); 1.2507 + cairo_line_to(c,x+13.5,y+13.5); 1.2508 + cairo_stroke(c); 1.2509 + } 1.2510 + if(_dec->telemetry_mv&0x08){ 1.2511 + cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy); 1.2512 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2513 + cairo_set_line_width(c,3.); 1.2514 + cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66); 1.2515 + cairo_stroke_preserve(c); 1.2516 + cairo_set_line_width(c,2.); 1.2517 + cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33); 1.2518 + cairo_stroke_preserve(c); 1.2519 + cairo_set_line_width(c,1.); 1.2520 + cairo_line_to(c,x+8,y+8); 1.2521 + cairo_stroke(c); 1.2522 + } 1.2523 + }break; 1.2524 + case OC_MODE_INTER_MV_LAST2:{ 1.2525 + if(_dec->telemetry_mbmode&0x10){ 1.2526 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2527 + cairo_set_source_rgba(c,0,1.,0,.5); 1.2528 + cairo_move_to(c,x+8,y+2.5); 1.2529 + cairo_line_to(c,x+2.5,y+8); 1.2530 + cairo_line_to(c,x+8,y+13.5); 1.2531 + cairo_move_to(c,x+13.5,y+2.5); 1.2532 + cairo_line_to(c,x+8,y+8); 1.2533 + cairo_line_to(c,x+13.5,y+13.5); 1.2534 + cairo_stroke(c); 1.2535 + } 1.2536 + if(_dec->telemetry_mv&0x10){ 1.2537 + cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy); 1.2538 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2539 + cairo_set_line_width(c,3.); 1.2540 + cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66); 1.2541 + cairo_stroke_preserve(c); 1.2542 + cairo_set_line_width(c,2.); 1.2543 + cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33); 1.2544 + cairo_stroke_preserve(c); 1.2545 + cairo_set_line_width(c,1.); 1.2546 + cairo_line_to(c,x+8,y+8); 1.2547 + cairo_stroke(c); 1.2548 + } 1.2549 + }break; 1.2550 + case OC_MODE_GOLDEN_NOMV:{ 1.2551 + if(_dec->telemetry_mbmode&0x20){ 1.2552 + cairo_set_source_rgba(c,1.,1.,0,.5); 1.2553 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2554 + cairo_stroke_preserve(c); 1.2555 + cairo_set_source_rgba(c,1.,1.,0,.25); 1.2556 + cairo_fill(c); 1.2557 + } 1.2558 + }break; 1.2559 + case OC_MODE_GOLDEN_MV:{ 1.2560 + if(_dec->telemetry_mbmode&0x40){ 1.2561 + cairo_rectangle(c,x+2.5,y+2.5,11,11); 1.2562 + cairo_set_source_rgba(c,1.,1.,0,.5); 1.2563 + cairo_stroke(c); 1.2564 + } 1.2565 + if(_dec->telemetry_mv&0x40){ 1.2566 + cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy); 1.2567 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2568 + cairo_set_line_width(c,3.); 1.2569 + cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66); 1.2570 + cairo_stroke_preserve(c); 1.2571 + cairo_set_line_width(c,2.); 1.2572 + cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33); 1.2573 + cairo_stroke_preserve(c); 1.2574 + cairo_set_line_width(c,1.); 1.2575 + cairo_line_to(c,x+8,y+8); 1.2576 + cairo_stroke(c); 1.2577 + } 1.2578 + }break; 1.2579 + case OC_MODE_INTER_MV_FOUR:{ 1.2580 + if(_dec->telemetry_mbmode&0x80){ 1.2581 + cairo_rectangle(c,x+2.5,y+2.5,4,4); 1.2582 + cairo_rectangle(c,x+9.5,y+2.5,4,4); 1.2583 + cairo_rectangle(c,x+2.5,y+9.5,4,4); 1.2584 + cairo_rectangle(c,x+9.5,y+9.5,4,4); 1.2585 + cairo_set_source_rgba(c,0,1.,0,.5); 1.2586 + cairo_stroke(c); 1.2587 + } 1.2588 + /*4mv is odd, coded in raster order.*/ 1.2589 + fragi=mb_maps[mbi][0][0]; 1.2590 + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ 1.2591 + frag_mvx=OC_MV_X(frag_mvs[fragi]); 1.2592 + frag_mvx=OC_MV_Y(frag_mvs[fragi]); 1.2593 + cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy); 1.2594 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2595 + cairo_set_line_width(c,3.); 1.2596 + cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66); 1.2597 + cairo_stroke_preserve(c); 1.2598 + cairo_set_line_width(c,2.); 1.2599 + cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33); 1.2600 + cairo_stroke_preserve(c); 1.2601 + cairo_set_line_width(c,1.); 1.2602 + cairo_line_to(c,x+4,y+12); 1.2603 + cairo_stroke(c); 1.2604 + } 1.2605 + fragi=mb_maps[mbi][0][1]; 1.2606 + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ 1.2607 + frag_mvx=OC_MV_X(frag_mvs[fragi]); 1.2608 + frag_mvx=OC_MV_Y(frag_mvs[fragi]); 1.2609 + cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy); 1.2610 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2611 + cairo_set_line_width(c,3.); 1.2612 + cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66); 1.2613 + cairo_stroke_preserve(c); 1.2614 + cairo_set_line_width(c,2.); 1.2615 + cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33); 1.2616 + cairo_stroke_preserve(c); 1.2617 + cairo_set_line_width(c,1.); 1.2618 + cairo_line_to(c,x+12,y+12); 1.2619 + cairo_stroke(c); 1.2620 + } 1.2621 + fragi=mb_maps[mbi][0][2]; 1.2622 + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ 1.2623 + frag_mvx=OC_MV_X(frag_mvs[fragi]); 1.2624 + frag_mvx=OC_MV_Y(frag_mvs[fragi]); 1.2625 + cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy); 1.2626 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2627 + cairo_set_line_width(c,3.); 1.2628 + cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66); 1.2629 + cairo_stroke_preserve(c); 1.2630 + cairo_set_line_width(c,2.); 1.2631 + cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33); 1.2632 + cairo_stroke_preserve(c); 1.2633 + cairo_set_line_width(c,1.); 1.2634 + cairo_line_to(c,x+4,y+4); 1.2635 + cairo_stroke(c); 1.2636 + } 1.2637 + fragi=mb_maps[mbi][0][3]; 1.2638 + if(frags[fragi].coded&&_dec->telemetry_mv&0x80){ 1.2639 + frag_mvx=OC_MV_X(frag_mvs[fragi]); 1.2640 + frag_mvx=OC_MV_Y(frag_mvs[fragi]); 1.2641 + cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy); 1.2642 + cairo_set_source_rgba(c,1.,1.,1.,.9); 1.2643 + cairo_set_line_width(c,3.); 1.2644 + cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66); 1.2645 + cairo_stroke_preserve(c); 1.2646 + cairo_set_line_width(c,2.); 1.2647 + cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33); 1.2648 + cairo_stroke_preserve(c); 1.2649 + cairo_set_line_width(c,1.); 1.2650 + cairo_line_to(c,x+12,y+4); 1.2651 + cairo_stroke(c); 1.2652 + } 1.2653 + }break; 1.2654 + } 1.2655 + } 1.2656 + } 1.2657 + /*qii illustration.*/ 1.2658 + if(_dec->telemetry_qi&0x2){ 1.2659 + cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE); 1.2660 + for(bi=0;bi<4;bi++){ 1.2661 + ptrdiff_t fragi; 1.2662 + int qiv; 1.2663 + int xp; 1.2664 + int yp; 1.2665 + xp=x+(bi&1)*8; 1.2666 + yp=y+8-(bi&2)*4; 1.2667 + fragi=mb_maps[mbi][0][bi]; 1.2668 + if(fragi>=0&&frags[fragi].coded){ 1.2669 + qiv=qim[frags[fragi].qii]; 1.2670 + cairo_set_line_width(c,3.); 1.2671 + cairo_set_source_rgba(c,0.,0.,0.,.5); 1.2672 + switch(qiv){ 1.2673 + /*Double plus:*/ 1.2674 + case 2:{ 1.2675 + if((bi&1)^((bi&2)>>1)){ 1.2676 + cairo_move_to(c,xp+2.5,yp+1.5); 1.2677 + cairo_line_to(c,xp+2.5,yp+3.5); 1.2678 + cairo_move_to(c,xp+1.5,yp+2.5); 1.2679 + cairo_line_to(c,xp+3.5,yp+2.5); 1.2680 + cairo_move_to(c,xp+5.5,yp+4.5); 1.2681 + cairo_line_to(c,xp+5.5,yp+6.5); 1.2682 + cairo_move_to(c,xp+4.5,yp+5.5); 1.2683 + cairo_line_to(c,xp+6.5,yp+5.5); 1.2684 + cairo_stroke_preserve(c); 1.2685 + cairo_set_source_rgba(c,0.,1.,1.,1.); 1.2686 + } 1.2687 + else{ 1.2688 + cairo_move_to(c,xp+5.5,yp+1.5); 1.2689 + cairo_line_to(c,xp+5.5,yp+3.5); 1.2690 + cairo_move_to(c,xp+4.5,yp+2.5); 1.2691 + cairo_line_to(c,xp+6.5,yp+2.5); 1.2692 + cairo_move_to(c,xp+2.5,yp+4.5); 1.2693 + cairo_line_to(c,xp+2.5,yp+6.5); 1.2694 + cairo_move_to(c,xp+1.5,yp+5.5); 1.2695 + cairo_line_to(c,xp+3.5,yp+5.5); 1.2696 + cairo_stroke_preserve(c); 1.2697 + cairo_set_source_rgba(c,0.,1.,1.,1.); 1.2698 + } 1.2699 + }break; 1.2700 + /*Double minus:*/ 1.2701 + case -2:{ 1.2702 + cairo_move_to(c,xp+2.5,yp+2.5); 1.2703 + cairo_line_to(c,xp+5.5,yp+2.5); 1.2704 + cairo_move_to(c,xp+2.5,yp+5.5); 1.2705 + cairo_line_to(c,xp+5.5,yp+5.5); 1.2706 + cairo_stroke_preserve(c); 1.2707 + cairo_set_source_rgba(c,1.,1.,1.,1.); 1.2708 + }break; 1.2709 + /*Plus:*/ 1.2710 + case 1:{ 1.2711 + if(bi&2==0)yp-=2; 1.2712 + if(bi&1==0)xp-=2; 1.2713 + cairo_move_to(c,xp+4.5,yp+2.5); 1.2714 + cairo_line_to(c,xp+4.5,yp+6.5); 1.2715 + cairo_move_to(c,xp+2.5,yp+4.5); 1.2716 + cairo_line_to(c,xp+6.5,yp+4.5); 1.2717 + cairo_stroke_preserve(c); 1.2718 + cairo_set_source_rgba(c,.1,1.,.3,1.); 1.2719 + break; 1.2720 + } 1.2721 + /*Fall through.*/ 1.2722 + /*Minus:*/ 1.2723 + case -1:{ 1.2724 + cairo_move_to(c,xp+2.5,yp+4.5); 1.2725 + cairo_line_to(c,xp+6.5,yp+4.5); 1.2726 + cairo_stroke_preserve(c); 1.2727 + cairo_set_source_rgba(c,1.,.3,.1,1.); 1.2728 + }break; 1.2729 + default:continue; 1.2730 + } 1.2731 + cairo_set_line_width(c,1.); 1.2732 + cairo_stroke(c); 1.2733 + } 1.2734 + } 1.2735 + } 1.2736 + col2++; 1.2737 + if((col2>>1)>=_dec->state.nhmbs){ 1.2738 + col2=0; 1.2739 + row2+=2; 1.2740 + } 1.2741 + } 1.2742 + /*Bit usage indicator[s]:*/ 1.2743 + if(_dec->telemetry_bits){ 1.2744 + int widths[6]; 1.2745 + int fpsn; 1.2746 + int fpsd; 1.2747 + int mult; 1.2748 + int fullw; 1.2749 + int padw; 1.2750 + int i; 1.2751 + fpsn=_dec->state.info.fps_numerator; 1.2752 + fpsd=_dec->state.info.fps_denominator; 1.2753 + mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits); 1.2754 + fullw=250.f*h*fpsd*mult/fpsn; 1.2755 + padw=w-24; 1.2756 + /*Header and coded block bits.*/ 1.2757 + if(_dec->telemetry_frame_bytes<0|| 1.2758 + _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){ 1.2759 + _dec->telemetry_frame_bytes=0; 1.2760 + } 1.2761 + if(_dec->telemetry_coding_bytes<0|| 1.2762 + _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){ 1.2763 + _dec->telemetry_coding_bytes=0; 1.2764 + } 1.2765 + if(_dec->telemetry_mode_bytes<0|| 1.2766 + _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){ 1.2767 + _dec->telemetry_mode_bytes=0; 1.2768 + } 1.2769 + if(_dec->telemetry_mv_bytes<0|| 1.2770 + _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){ 1.2771 + _dec->telemetry_mv_bytes=0; 1.2772 + } 1.2773 + if(_dec->telemetry_qi_bytes<0|| 1.2774 + _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){ 1.2775 + _dec->telemetry_qi_bytes=0; 1.2776 + } 1.2777 + if(_dec->telemetry_dc_bytes<0|| 1.2778 + _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){ 1.2779 + _dec->telemetry_dc_bytes=0; 1.2780 + } 1.2781 + widths[0]=padw*(_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw; 1.2782 + widths[1]=padw*(_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw; 1.2783 + widths[2]=padw*(_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw; 1.2784 + widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw; 1.2785 + widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw; 1.2786 + widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw; 1.2787 + for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w; 1.2788 + cairo_set_source_rgba(c,.0,.0,.0,.6); 1.2789 + cairo_rectangle(c,10,h-33,widths[0]+1,5); 1.2790 + cairo_rectangle(c,10,h-29,widths[1]+1,5); 1.2791 + cairo_rectangle(c,10,h-25,widths[2]+1,5); 1.2792 + cairo_rectangle(c,10,h-21,widths[3]+1,5); 1.2793 + cairo_rectangle(c,10,h-17,widths[4]+1,5); 1.2794 + cairo_rectangle(c,10,h-13,widths[5]+1,5); 1.2795 + cairo_fill(c); 1.2796 + cairo_set_source_rgb(c,1,0,0); 1.2797 + cairo_rectangle(c,10.5,h-32.5,widths[0],4); 1.2798 + cairo_fill(c); 1.2799 + cairo_set_source_rgb(c,0,1,0); 1.2800 + cairo_rectangle(c,10.5,h-28.5,widths[1],4); 1.2801 + cairo_fill(c); 1.2802 + cairo_set_source_rgb(c,0,0,1); 1.2803 + cairo_rectangle(c,10.5,h-24.5,widths[2],4); 1.2804 + cairo_fill(c); 1.2805 + cairo_set_source_rgb(c,.6,.4,.0); 1.2806 + cairo_rectangle(c,10.5,h-20.5,widths[3],4); 1.2807 + cairo_fill(c); 1.2808 + cairo_set_source_rgb(c,.3,.3,.3); 1.2809 + cairo_rectangle(c,10.5,h-16.5,widths[4],4); 1.2810 + cairo_fill(c); 1.2811 + cairo_set_source_rgb(c,.5,.5,.8); 1.2812 + cairo_rectangle(c,10.5,h-12.5,widths[5],4); 1.2813 + cairo_fill(c); 1.2814 + } 1.2815 + /*Master qi indicator[s]:*/ 1.2816 + if(_dec->telemetry_qi&0x1){ 1.2817 + cairo_text_extents_t extents; 1.2818 + char buffer[10]; 1.2819 + int p; 1.2820 + int y; 1.2821 + p=0; 1.2822 + y=h-7.5; 1.2823 + if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10; 1.2824 + buffer[p++]=48+_dec->state.qis[0]%10; 1.2825 + if(_dec->state.nqis>=2){ 1.2826 + buffer[p++]=' '; 1.2827 + if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10; 1.2828 + buffer[p++]=48+_dec->state.qis[1]%10; 1.2829 + } 1.2830 + if(_dec->state.nqis==3){ 1.2831 + buffer[p++]=' '; 1.2832 + if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10; 1.2833 + buffer[p++]=48+_dec->state.qis[2]%10; 1.2834 + } 1.2835 + buffer[p++]='\0'; 1.2836 + cairo_select_font_face(c,"sans", 1.2837 + CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD); 1.2838 + cairo_set_font_size(c,18); 1.2839 + cairo_text_extents(c,buffer,&extents); 1.2840 + cairo_set_source_rgb(c,1,1,1); 1.2841 + cairo_move_to(c,w-extents.x_advance-10,y); 1.2842 + cairo_show_text(c,buffer); 1.2843 + cairo_set_source_rgb(c,0,0,0); 1.2844 + cairo_move_to(c,w-extents.x_advance-10,y); 1.2845 + cairo_text_path(c,buffer); 1.2846 + cairo_set_line_width(c,.8); 1.2847 + cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND); 1.2848 + cairo_stroke(c); 1.2849 + } 1.2850 + cairo_destroy(c); 1.2851 + } 1.2852 + /*Out of the Cairo plane into the telemetry YUV buffer.*/ 1.2853 + _ycbcr[0].data=_dec->telemetry_frame_data; 1.2854 + _ycbcr[0].stride=_ycbcr[0].width; 1.2855 + _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride; 1.2856 + _ycbcr[1].stride=_ycbcr[1].width; 1.2857 + _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride; 1.2858 + _ycbcr[2].stride=_ycbcr[2].width; 1.2859 + y_row=_ycbcr[0].data; 1.2860 + u_row=_ycbcr[1].data; 1.2861 + v_row=_ycbcr[2].data; 1.2862 + rgb_row=data; 1.2863 + /*This is one of the few places it's worth handling chroma on a 1.2864 + case-by-case basis.*/ 1.2865 + switch(_dec->state.info.pixel_fmt){ 1.2866 + case TH_PF_420:{ 1.2867 + for(y=0;y<h;y+=2){ 1.2868 + unsigned char *y_row2; 1.2869 + unsigned char *rgb_row2; 1.2870 + y_row2=y_row+_ycbcr[0].stride; 1.2871 + rgb_row2=rgb_row+cstride; 1.2872 + for(x=0;x<w;x+=2){ 1.2873 + int y; 1.2874 + int u; 1.2875 + int v; 1.2876 + y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1] 1.2877 + +24966*rgb_row[4*x+0]+4207500)/255000; 1.2878 + y_row[x]=OC_CLAMP255(y); 1.2879 + y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5] 1.2880 + +24966*rgb_row[4*x+4]+4207500)/255000; 1.2881 + y_row[x+1]=OC_CLAMP255(y); 1.2882 + y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1] 1.2883 + +24966*rgb_row2[4*x+0]+4207500)/255000; 1.2884 + y_row2[x]=OC_CLAMP255(y); 1.2885 + y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5] 1.2886 + +24966*rgb_row2[4*x+4]+4207500)/255000; 1.2887 + y_row2[x+1]=OC_CLAMP255(y); 1.2888 + u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6] 1.2889 + +rgb_row2[4*x+2]+rgb_row2[4*x+6]) 1.2890 + -16436*(rgb_row[4*x+1]+rgb_row[4*x+5] 1.2891 + +rgb_row2[4*x+1]+rgb_row2[4*x+5]) 1.2892 + +24808*(rgb_row[4*x+0]+rgb_row[4*x+4] 1.2893 + +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930; 1.2894 + v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6] 1.2895 + +rgb_row2[4*x+2]+rgb_row2[4*x+6]) 1.2896 + -32872*(rgb_row[4*x+1]+rgb_row[4*x+5] 1.2897 + +rgb_row2[4*x+1]+rgb_row2[4*x+5]) 1.2898 + -6384*(rgb_row[4*x+0]+rgb_row[4*x+4] 1.2899 + +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510; 1.2900 + u_row[x>>1]=OC_CLAMP255(u); 1.2901 + v_row[x>>1]=OC_CLAMP255(v); 1.2902 + } 1.2903 + y_row+=_ycbcr[0].stride<<1; 1.2904 + u_row+=_ycbcr[1].stride; 1.2905 + v_row+=_ycbcr[2].stride; 1.2906 + rgb_row+=cstride<<1; 1.2907 + } 1.2908 + }break; 1.2909 + case TH_PF_422:{ 1.2910 + for(y=0;y<h;y++){ 1.2911 + for(x=0;x<w;x+=2){ 1.2912 + int y; 1.2913 + int u; 1.2914 + int v; 1.2915 + y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1] 1.2916 + +24966*rgb_row[4*x+0]+4207500)/255000; 1.2917 + y_row[x]=OC_CLAMP255(y); 1.2918 + y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5] 1.2919 + +24966*rgb_row[4*x+4]+4207500)/255000; 1.2920 + y_row[x+1]=OC_CLAMP255(y); 1.2921 + u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6]) 1.2922 + -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]) 1.2923 + +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930; 1.2924 + v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6]) 1.2925 + -65744*(rgb_row[4*x+1]+rgb_row[4*x+5]) 1.2926 + -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510; 1.2927 + u_row[x>>1]=OC_CLAMP255(u); 1.2928 + v_row[x>>1]=OC_CLAMP255(v); 1.2929 + } 1.2930 + y_row+=_ycbcr[0].stride; 1.2931 + u_row+=_ycbcr[1].stride; 1.2932 + v_row+=_ycbcr[2].stride; 1.2933 + rgb_row+=cstride; 1.2934 + } 1.2935 + }break; 1.2936 + /*case TH_PF_444:*/ 1.2937 + default:{ 1.2938 + for(y=0;y<h;y++){ 1.2939 + for(x=0;x<w;x++){ 1.2940 + int y; 1.2941 + int u; 1.2942 + int v; 1.2943 + y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1] 1.2944 + +24966*rgb_row[4*x+0]+4207500)/255000; 1.2945 + u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1] 1.2946 + +99232*rgb_row[4*x+0]+29032005)/225930; 1.2947 + v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1] 1.2948 + -25536*rgb_row[4*x+0]+45940035)/357510; 1.2949 + y_row[x]=OC_CLAMP255(y); 1.2950 + u_row[x]=OC_CLAMP255(u); 1.2951 + v_row[x]=OC_CLAMP255(v); 1.2952 + } 1.2953 + y_row+=_ycbcr[0].stride; 1.2954 + u_row+=_ycbcr[1].stride; 1.2955 + v_row+=_ycbcr[2].stride; 1.2956 + rgb_row+=cstride; 1.2957 + } 1.2958 + }break; 1.2959 + } 1.2960 + /*Finished. 1.2961 + Destroy the surface.*/ 1.2962 + cairo_surface_destroy(cs); 1.2963 + } 1.2964 +#endif 1.2965 + return 0; 1.2966 +}