michael@0: /******************************************************************** michael@0: * * michael@0: * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * michael@0: * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * michael@0: * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * michael@0: * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * michael@0: * * michael@0: * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * michael@0: * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * michael@0: * * michael@0: ******************************************************************** michael@0: michael@0: function: michael@0: last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $ michael@0: michael@0: ********************************************************************/ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include "huffdec.h" michael@0: #include "decint.h" michael@0: michael@0: michael@0: michael@0: /*Instead of storing every branching in the tree, subtrees can be collapsed michael@0: into one node, with a table of size 1<>8). michael@0: michael@0: @ARTICLE{Hash95, michael@0: author="Reza Hashemian", michael@0: title="Memory Efficient and High-Speed Search {Huffman} Coding", michael@0: journal="{IEEE} Transactions on Communications", michael@0: volume=43, michael@0: number=10, michael@0: pages="2576--2581", michael@0: month=Oct, michael@0: year=1995 michael@0: }*/ michael@0: michael@0: michael@0: michael@0: /*The map from external spec-defined tokens to internal tokens. michael@0: This is constructed so that any extra bits read with the original token value michael@0: can be masked off the least significant bits of its internal token index. michael@0: In addition, all of the tokens which require additional extra bits are placed michael@0: at the start of the list, and grouped by type. michael@0: OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so michael@0: giving it index 0 may simplify comparisons on some architectures. michael@0: These requirements require some substantial reordering.*/ michael@0: static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ michael@0: /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ michael@0: 15, michael@0: /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ michael@0: 16, michael@0: /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ michael@0: 17, michael@0: /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ michael@0: 88, michael@0: /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ michael@0: 80, michael@0: /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ michael@0: 1, michael@0: /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ michael@0: 0, michael@0: /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ michael@0: 48, michael@0: /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ michael@0: 14, michael@0: /*OC_ONE_TOKEN (0 extra bits)*/ michael@0: 56, michael@0: /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ michael@0: 57, michael@0: /*OC_TWO_TOKEN (0 extra bits)*/ michael@0: 58, michael@0: /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ michael@0: 59, michael@0: /*OC_DCT_VAL_CAT2 (1 extra bit)*/ michael@0: 60, michael@0: 62, michael@0: 64, michael@0: 66, michael@0: /*OC_DCT_VAL_CAT3 (2 extra bits)*/ michael@0: 68, michael@0: /*OC_DCT_VAL_CAT4 (3 extra bits)*/ michael@0: 72, michael@0: /*OC_DCT_VAL_CAT5 (4 extra bits)*/ michael@0: 2, michael@0: /*OC_DCT_VAL_CAT6 (5 extra bits)*/ michael@0: 4, michael@0: /*OC_DCT_VAL_CAT7 (6 extra bits)*/ michael@0: 6, michael@0: /*OC_DCT_VAL_CAT8 (10 extra bits)*/ michael@0: 8, michael@0: /*OC_DCT_RUN_CAT1A (1 extra bit)*/ michael@0: 18, michael@0: 20, michael@0: 22, michael@0: 24, michael@0: 26, michael@0: /*OC_DCT_RUN_CAT1B (3 extra bits)*/ michael@0: 32, michael@0: /*OC_DCT_RUN_CAT1C (4 extra bits)*/ michael@0: 12, michael@0: /*OC_DCT_RUN_CAT2A (2 extra bits)*/ michael@0: 28, michael@0: /*OC_DCT_RUN_CAT2B (3 extra bits)*/ michael@0: 40 michael@0: }; michael@0: michael@0: /*The log base 2 of number of internal tokens associated with each of the spec michael@0: tokens (i.e., how many of the extra bits are folded into the token value). michael@0: Increasing the maximum value beyond 3 will enlarge the amount of stack michael@0: required for tree construction.*/ michael@0: static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={ michael@0: 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3 michael@0: }; michael@0: michael@0: michael@0: /*The size a lookup table is allowed to grow to relative to the number of michael@0: unique nodes it contains. michael@0: E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is michael@0: wasted (1/4 of the space must be used). michael@0: Larger numbers can decode tokens with fewer read operations, while smaller michael@0: numbers may save more space. michael@0: With a sample file: michael@0: 32233473 read calls are required when no tree collapsing is done (100.0%). michael@0: 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%). michael@0: 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%). michael@0: 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%). michael@0: 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%). michael@0: Since a value of 2 gets us the vast majority of the speed-up with only a michael@0: small amount of wasted memory, this is what we use. michael@0: This value must be less than 128, or you could create a tree with more than michael@0: 32767 entries, which would overflow the 16-bit words used to index it.*/ michael@0: #define OC_HUFF_SLUSH (2) michael@0: /*The root of the tree is on the fast path, and a larger value here is more michael@0: beneficial than elsewhere in the tree. michael@0: 7 appears to give the best performance, trading off between increased use of michael@0: the single-read fast path and cache footprint for the tables, though michael@0: obviously this will depend on your cache size. michael@0: Using 7 here, the VP3 tables are about twice as large compared to using 2.*/ michael@0: #define OC_ROOT_HUFF_SLUSH (7) michael@0: michael@0: michael@0: michael@0: /*Unpacks a Huffman codebook. michael@0: _opb: The buffer to unpack from. michael@0: _tokens: Stores a list of internal tokens, in the order they were found in michael@0: the codebook, and the lengths of their corresponding codewords. michael@0: This is enough to completely define the codebook, while minimizing michael@0: stack usage and avoiding temporary allocations (for platforms michael@0: where free() is a no-op). michael@0: Return: The number of internal tokens in the codebook, or a negative value michael@0: on error.*/ michael@0: int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){ michael@0: ogg_uint32_t code; michael@0: int len; michael@0: int ntokens; michael@0: int nleaves; michael@0: code=0; michael@0: len=ntokens=nleaves=0; michael@0: for(;;){ michael@0: long bits; michael@0: bits=oc_pack_read1(_opb); michael@0: /*Only process nodes so long as there's more bits in the buffer.*/ michael@0: if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; michael@0: /*Read an internal node:*/ michael@0: if(!bits){ michael@0: len++; michael@0: /*Don't allow codewords longer than 32 bits.*/ michael@0: if(len>32)return TH_EBADHEADER; michael@0: } michael@0: /*Read a leaf node:*/ michael@0: else{ michael@0: ogg_uint32_t code_bit; michael@0: int neb; michael@0: int nentries; michael@0: int token; michael@0: /*Don't allow more than 32 spec-tokens per codebook.*/ michael@0: if(++nleaves>32)return TH_EBADHEADER; michael@0: bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); michael@0: neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits]; michael@0: token=OC_DCT_TOKEN_MAP[bits]; michael@0: nentries=1<0){ michael@0: _tokens[ntokens][0]=(unsigned char)token++; michael@0: _tokens[ntokens][1]=(unsigned char)(len+neb); michael@0: ntokens++; michael@0: } michael@0: code_bit=0x80000000U>>len-1; michael@0: while(len>0&&(code&code_bit)){ michael@0: code^=code_bit; michael@0: code_bit<<=1; michael@0: len--; michael@0: } michael@0: if(len<=0)break; michael@0: code|=code_bit; michael@0: } michael@0: } michael@0: return ntokens; michael@0: } michael@0: michael@0: /*Count how many tokens would be required to fill a subtree at depth _depth. michael@0: _tokens: A list of internal tokens, in the order they are found in the michael@0: codebook, and the lengths of their corresponding codewords. michael@0: _depth: The depth of the desired node in the corresponding tree structure. michael@0: Return: The number of tokens that belong to that subtree.*/ michael@0: static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){ michael@0: ogg_uint32_t code; michael@0: int ti; michael@0: code=0; michael@0: ti=0; michael@0: do{ michael@0: if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth; michael@0: else{ michael@0: /*Because of the expanded internal tokens, we can have codewords as long michael@0: as 35 bits. michael@0: A single recursion here is enough to advance past them.*/ michael@0: code++; michael@0: ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31); michael@0: } michael@0: } michael@0: while(code<0x80000000U); michael@0: return ti; michael@0: } michael@0: michael@0: /*Compute the number of bits to use for a collapsed tree node at the given michael@0: depth. michael@0: _tokens: A list of internal tokens, in the order they are found in the michael@0: codebook, and the lengths of their corresponding codewords. michael@0: _ntokens: The number of tokens corresponding to this tree node. michael@0: _depth: The depth of this tree node. michael@0: Return: The number of bits to use for a collapsed tree node rooted here. michael@0: This is always at least one, even if this was a leaf node.*/ michael@0: static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2], michael@0: int _ntokens,int _depth){ michael@0: int got_leaves; michael@0: int loccupancy; michael@0: int occupancy; michael@0: int slush; michael@0: int nbits; michael@0: int best_nbits; michael@0: slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH; michael@0: /*It's legal to have a tree with just a single node, which requires no bits michael@0: to decode and always returns the same token. michael@0: However, no encoder actually does this (yet). michael@0: To avoid a special case in oc_huff_token_decode(), we force the number of michael@0: lookahead bits to be at least one. michael@0: This will produce a tree that looks ahead one bit and then advances the michael@0: stream zero bits.*/ michael@0: nbits=1; michael@0: occupancy=2; michael@0: got_leaves=1; michael@0: do{ michael@0: int ti; michael@0: if(got_leaves)best_nbits=nbits; michael@0: nbits++; michael@0: got_leaves=0; michael@0: loccupancy=occupancy; michael@0: for(occupancy=ti=0;ti<_ntokens;occupancy++){ michael@0: if(_tokens[ti][1]<_depth+nbits)ti++; michael@0: else if(_tokens[ti][1]==_depth+nbits){ michael@0: got_leaves=1; michael@0: ti++; michael@0: } michael@0: else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits); michael@0: } michael@0: } michael@0: while(occupancy>loccupancy&&occupancy*slush>=1<= 1700 michael@0: #pragma optimize( "", off ) michael@0: #endif michael@0: static size_t oc_huff_tree_collapse(ogg_int16_t *_tree, michael@0: unsigned char _tokens[][2],int _ntokens){ michael@0: ogg_int16_t node[34]; michael@0: unsigned char depth[34]; michael@0: unsigned char last[34]; michael@0: size_t ntree; michael@0: int ti; michael@0: int l; michael@0: depth[0]=0; michael@0: last[0]=(unsigned char)(_ntokens-1); michael@0: ntree=0; michael@0: ti=0; michael@0: l=0; michael@0: do{ michael@0: int nbits; michael@0: nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]); michael@0: node[l]=(ogg_int16_t)ntree; michael@0: ntree+=oc_huff_node_size(nbits); michael@0: if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits; michael@0: do{ michael@0: while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){ michael@0: if(_tree!=NULL){ michael@0: ogg_int16_t leaf; michael@0: int nentries; michael@0: nentries=1<0)_tree[node[l]++]=leaf; michael@0: } michael@0: ti++; michael@0: } michael@0: if(ti<=last[l]){ michael@0: /*We need to recurse*/ michael@0: depth[l+1]=(unsigned char)(depth[l]+nbits); michael@0: if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree; michael@0: l++; michael@0: last[l]= michael@0: (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1); michael@0: break; michael@0: } michael@0: /*Pop back up a level of recursion.*/ michael@0: else if(l-->0)nbits=depth[l+1]-depth[l]; michael@0: } michael@0: while(l>=0); michael@0: } michael@0: while(l>=0); michael@0: return ntree; michael@0: } michael@0: #if defined(_MSC_VER) && _MSC_VER >= 1700 michael@0: #pragma optimize( "", on ) michael@0: #endif michael@0: michael@0: /*Unpacks a set of Huffman trees, and reduces them to a collapsed michael@0: representation. michael@0: _opb: The buffer to unpack the trees from. michael@0: _nodes: The table to fill with the Huffman trees. michael@0: Return: 0 on success, or a negative value on error. michael@0: The caller is responsible for cleaning up any partially initialized michael@0: _nodes on failure.*/ michael@0: int oc_huff_trees_unpack(oc_pack_buf *_opb, michael@0: ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){ michael@0: int i; michael@0: for(i=0;i32767)return TH_EIMPL; michael@0: tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree)); michael@0: if(tree==NULL)return TH_EFAULT; michael@0: /*Construct the collapsed the tree.*/ michael@0: oc_huff_tree_collapse(tree,tokens,ntokens); michael@0: _nodes[i]=tree; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /*Determines the size in words of a Huffman subtree. michael@0: _tree: The complete Huffman tree. michael@0: _node: The index of the root of the desired subtree. michael@0: Return: The number of words required to store the tree.*/ michael@0: static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){ michael@0: size_t size; michael@0: int nchildren; michael@0: int n; michael@0: int i; michael@0: n=_tree[_node]; michael@0: size=oc_huff_node_size(n); michael@0: nchildren=1<>8); michael@0: else{ michael@0: size+=oc_huff_tree_size(_tree,child); michael@0: i++; michael@0: } michael@0: } michael@0: while(i0)_ogg_free(_dst[i]); michael@0: return TH_EFAULT; michael@0: } michael@0: memcpy(_dst[i],_src[i],size*sizeof(*_dst[i])); michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /*Frees the memory used by a set of Huffman trees. michael@0: _nodes: The array of trees to free.*/ michael@0: void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){ michael@0: int i; michael@0: for(i=0;iptr; michael@0: window=_opb->window; michael@0: stop=_opb->stop; michael@0: available=_opb->bits; michael@0: node=0; michael@0: for(;;){ michael@0: n=_tree[node]; michael@0: if(n>available){ michael@0: unsigned shift; michael@0: shift=OC_PB_WINDOW_SIZE-available; michael@0: do{ michael@0: /*We don't bother setting eof because we won't check for it after we've michael@0: started decoding DCT tokens.*/ michael@0: if(ptr>=stop){ michael@0: shift=(unsigned)-OC_LOTS_OF_BITS; michael@0: break; michael@0: } michael@0: shift-=8; michael@0: window|=(oc_pb_window)*ptr++<=8); michael@0: /*Note: We never request more than 24 bits, so there's no need to fill in michael@0: the last partial byte here.*/ michael@0: available=OC_PB_WINDOW_SIZE-shift; michael@0: } michael@0: bits=window>>OC_PB_WINDOW_SIZE-n; michael@0: node=_tree[node+1+bits]; michael@0: if(node<=0)break; michael@0: window<<=n; michael@0: available-=n; michael@0: } michael@0: node=-node; michael@0: n=node>>8; michael@0: window<<=n; michael@0: available-=n; michael@0: _opb->ptr=ptr; michael@0: _opb->window=window; michael@0: _opb->bits=available; michael@0: return node&255; michael@0: }