Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function:
14 last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
16 ********************************************************************/
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ogg/ogg.h>
21 #include "huffdec.h"
22 #include "decint.h"
26 /*Instead of storing every branching in the tree, subtrees can be collapsed
27 into one node, with a table of size 1<<nbits pointing directly to its
28 descedents nbits levels down.
29 This allows more than one bit to be read at a time, and avoids following all
30 the intermediate branches with next to no increased code complexity once
31 the collapsed tree has been built.
32 We do _not_ require that a subtree be complete to be collapsed, but instead
33 store duplicate pointers in the table, and record the actual depth of the
34 node below its parent.
35 This tells us the number of bits to advance the stream after reaching it.
37 This turns out to be equivalent to the method described in \cite{Hash95},
38 without the requirement that codewords be sorted by length.
39 If the codewords were sorted by length (so-called ``canonical-codes''), they
40 could be decoded much faster via either Lindell and Moffat's approach or
41 Hashemian's Condensed Huffman Code approach, the latter of which has an
42 extremely small memory footprint.
43 We can't use Choueka et al.'s finite state machine approach, which is
44 extremely fast, because we can't allow multiple symbols to be output at a
45 time; the codebook can and does change between symbols.
46 It also has very large memory requirements, which impairs cache coherency.
48 We store the tree packed in an array of 16-bit integers (words).
49 Each node consists of a single word, followed consecutively by two or more
50 indices of its children.
51 Let n be the value of this first word.
52 This is the number of bits that need to be read to traverse the node, and
53 must be positive.
54 1<<n entries follow in the array, each an index to a child node.
55 If the child is positive, then it is the index of another internal node in
56 the table.
57 If the child is negative or zero, then it is a leaf node.
58 These are stored directly in the child pointer to save space, since they only
59 require a single word.
60 If a leaf node would have been encountered before reading n bits, then it is
61 duplicated the necessary number of times in this table.
62 Leaf nodes pack both a token value and their actual depth in the tree.
63 The token in the leaf node is (-leaf&255).
64 The number of bits that need to be consumed to reach the leaf, starting from
65 the current node, is (-leaf>>8).
67 @ARTICLE{Hash95,
68 author="Reza Hashemian",
69 title="Memory Efficient and High-Speed Search {Huffman} Coding",
70 journal="{IEEE} Transactions on Communications",
71 volume=43,
72 number=10,
73 pages="2576--2581",
74 month=Oct,
75 year=1995
76 }*/
80 /*The map from external spec-defined tokens to internal tokens.
81 This is constructed so that any extra bits read with the original token value
82 can be masked off the least significant bits of its internal token index.
83 In addition, all of the tokens which require additional extra bits are placed
84 at the start of the list, and grouped by type.
85 OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
86 giving it index 0 may simplify comparisons on some architectures.
87 These requirements require some substantial reordering.*/
88 static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
89 /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
90 15,
91 /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
92 16,
93 /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
94 17,
95 /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
96 88,
97 /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
98 80,
99 /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
100 1,
101 /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
102 0,
103 /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
104 48,
105 /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
106 14,
107 /*OC_ONE_TOKEN (0 extra bits)*/
108 56,
109 /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
110 57,
111 /*OC_TWO_TOKEN (0 extra bits)*/
112 58,
113 /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
114 59,
115 /*OC_DCT_VAL_CAT2 (1 extra bit)*/
116 60,
117 62,
118 64,
119 66,
120 /*OC_DCT_VAL_CAT3 (2 extra bits)*/
121 68,
122 /*OC_DCT_VAL_CAT4 (3 extra bits)*/
123 72,
124 /*OC_DCT_VAL_CAT5 (4 extra bits)*/
125 2,
126 /*OC_DCT_VAL_CAT6 (5 extra bits)*/
127 4,
128 /*OC_DCT_VAL_CAT7 (6 extra bits)*/
129 6,
130 /*OC_DCT_VAL_CAT8 (10 extra bits)*/
131 8,
132 /*OC_DCT_RUN_CAT1A (1 extra bit)*/
133 18,
134 20,
135 22,
136 24,
137 26,
138 /*OC_DCT_RUN_CAT1B (3 extra bits)*/
139 32,
140 /*OC_DCT_RUN_CAT1C (4 extra bits)*/
141 12,
142 /*OC_DCT_RUN_CAT2A (2 extra bits)*/
143 28,
144 /*OC_DCT_RUN_CAT2B (3 extra bits)*/
145 40
146 };
148 /*The log base 2 of number of internal tokens associated with each of the spec
149 tokens (i.e., how many of the extra bits are folded into the token value).
150 Increasing the maximum value beyond 3 will enlarge the amount of stack
151 required for tree construction.*/
152 static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
153 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
154 };
157 /*The size a lookup table is allowed to grow to relative to the number of
158 unique nodes it contains.
159 E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
160 wasted (1/4 of the space must be used).
161 Larger numbers can decode tokens with fewer read operations, while smaller
162 numbers may save more space.
163 With a sample file:
164 32233473 read calls are required when no tree collapsing is done (100.0%).
165 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
166 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
167 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
168 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
169 Since a value of 2 gets us the vast majority of the speed-up with only a
170 small amount of wasted memory, this is what we use.
171 This value must be less than 128, or you could create a tree with more than
172 32767 entries, which would overflow the 16-bit words used to index it.*/
173 #define OC_HUFF_SLUSH (2)
174 /*The root of the tree is on the fast path, and a larger value here is more
175 beneficial than elsewhere in the tree.
176 7 appears to give the best performance, trading off between increased use of
177 the single-read fast path and cache footprint for the tables, though
178 obviously this will depend on your cache size.
179 Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
180 #define OC_ROOT_HUFF_SLUSH (7)
184 /*Unpacks a Huffman codebook.
185 _opb: The buffer to unpack from.
186 _tokens: Stores a list of internal tokens, in the order they were found in
187 the codebook, and the lengths of their corresponding codewords.
188 This is enough to completely define the codebook, while minimizing
189 stack usage and avoiding temporary allocations (for platforms
190 where free() is a no-op).
191 Return: The number of internal tokens in the codebook, or a negative value
192 on error.*/
193 int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
194 ogg_uint32_t code;
195 int len;
196 int ntokens;
197 int nleaves;
198 code=0;
199 len=ntokens=nleaves=0;
200 for(;;){
201 long bits;
202 bits=oc_pack_read1(_opb);
203 /*Only process nodes so long as there's more bits in the buffer.*/
204 if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
205 /*Read an internal node:*/
206 if(!bits){
207 len++;
208 /*Don't allow codewords longer than 32 bits.*/
209 if(len>32)return TH_EBADHEADER;
210 }
211 /*Read a leaf node:*/
212 else{
213 ogg_uint32_t code_bit;
214 int neb;
215 int nentries;
216 int token;
217 /*Don't allow more than 32 spec-tokens per codebook.*/
218 if(++nleaves>32)return TH_EBADHEADER;
219 bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
220 neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
221 token=OC_DCT_TOKEN_MAP[bits];
222 nentries=1<<neb;
223 while(nentries-->0){
224 _tokens[ntokens][0]=(unsigned char)token++;
225 _tokens[ntokens][1]=(unsigned char)(len+neb);
226 ntokens++;
227 }
228 code_bit=0x80000000U>>len-1;
229 while(len>0&&(code&code_bit)){
230 code^=code_bit;
231 code_bit<<=1;
232 len--;
233 }
234 if(len<=0)break;
235 code|=code_bit;
236 }
237 }
238 return ntokens;
239 }
241 /*Count how many tokens would be required to fill a subtree at depth _depth.
242 _tokens: A list of internal tokens, in the order they are found in the
243 codebook, and the lengths of their corresponding codewords.
244 _depth: The depth of the desired node in the corresponding tree structure.
245 Return: The number of tokens that belong to that subtree.*/
246 static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
247 ogg_uint32_t code;
248 int ti;
249 code=0;
250 ti=0;
251 do{
252 if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
253 else{
254 /*Because of the expanded internal tokens, we can have codewords as long
255 as 35 bits.
256 A single recursion here is enough to advance past them.*/
257 code++;
258 ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
259 }
260 }
261 while(code<0x80000000U);
262 return ti;
263 }
265 /*Compute the number of bits to use for a collapsed tree node at the given
266 depth.
267 _tokens: A list of internal tokens, in the order they are found in the
268 codebook, and the lengths of their corresponding codewords.
269 _ntokens: The number of tokens corresponding to this tree node.
270 _depth: The depth of this tree node.
271 Return: The number of bits to use for a collapsed tree node rooted here.
272 This is always at least one, even if this was a leaf node.*/
273 static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
274 int _ntokens,int _depth){
275 int got_leaves;
276 int loccupancy;
277 int occupancy;
278 int slush;
279 int nbits;
280 int best_nbits;
281 slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
282 /*It's legal to have a tree with just a single node, which requires no bits
283 to decode and always returns the same token.
284 However, no encoder actually does this (yet).
285 To avoid a special case in oc_huff_token_decode(), we force the number of
286 lookahead bits to be at least one.
287 This will produce a tree that looks ahead one bit and then advances the
288 stream zero bits.*/
289 nbits=1;
290 occupancy=2;
291 got_leaves=1;
292 do{
293 int ti;
294 if(got_leaves)best_nbits=nbits;
295 nbits++;
296 got_leaves=0;
297 loccupancy=occupancy;
298 for(occupancy=ti=0;ti<_ntokens;occupancy++){
299 if(_tokens[ti][1]<_depth+nbits)ti++;
300 else if(_tokens[ti][1]==_depth+nbits){
301 got_leaves=1;
302 ti++;
303 }
304 else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
305 }
306 }
307 while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
308 return best_nbits;
309 }
311 /*Determines the size in words of a Huffman tree node that represents a
312 subtree of depth _nbits.
313 _nbits: The depth of the subtree.
314 This must be greater than zero.
315 Return: The number of words required to store the node.*/
316 static size_t oc_huff_node_size(int _nbits){
317 return 1+(1<<_nbits);
318 }
320 /*Produces a collapsed-tree representation of the given token list.
321 _tree: The storage for the collapsed Huffman tree.
322 This may be NULL to compute the required storage size instead of
323 constructing the tree.
324 _tokens: A list of internal tokens, in the order they are found in the
325 codebook, and the lengths of their corresponding codewords.
326 _ntokens: The number of tokens corresponding to this tree node.
327 Return: The number of words required to store the tree.*/
328 #if defined(_MSC_VER) && _MSC_VER >= 1700
329 #pragma optimize( "", off )
330 #endif
331 static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
332 unsigned char _tokens[][2],int _ntokens){
333 ogg_int16_t node[34];
334 unsigned char depth[34];
335 unsigned char last[34];
336 size_t ntree;
337 int ti;
338 int l;
339 depth[0]=0;
340 last[0]=(unsigned char)(_ntokens-1);
341 ntree=0;
342 ti=0;
343 l=0;
344 do{
345 int nbits;
346 nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
347 node[l]=(ogg_int16_t)ntree;
348 ntree+=oc_huff_node_size(nbits);
349 if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
350 do{
351 while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
352 if(_tree!=NULL){
353 ogg_int16_t leaf;
354 int nentries;
355 nentries=1<<depth[l]+nbits-_tokens[ti][1];
356 leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
357 while(nentries-->0)_tree[node[l]++]=leaf;
358 }
359 ti++;
360 }
361 if(ti<=last[l]){
362 /*We need to recurse*/
363 depth[l+1]=(unsigned char)(depth[l]+nbits);
364 if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
365 l++;
366 last[l]=
367 (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
368 break;
369 }
370 /*Pop back up a level of recursion.*/
371 else if(l-->0)nbits=depth[l+1]-depth[l];
372 }
373 while(l>=0);
374 }
375 while(l>=0);
376 return ntree;
377 }
378 #if defined(_MSC_VER) && _MSC_VER >= 1700
379 #pragma optimize( "", on )
380 #endif
382 /*Unpacks a set of Huffman trees, and reduces them to a collapsed
383 representation.
384 _opb: The buffer to unpack the trees from.
385 _nodes: The table to fill with the Huffman trees.
386 Return: 0 on success, or a negative value on error.
387 The caller is responsible for cleaning up any partially initialized
388 _nodes on failure.*/
389 int oc_huff_trees_unpack(oc_pack_buf *_opb,
390 ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
391 int i;
392 for(i=0;i<TH_NHUFFMAN_TABLES;i++){
393 unsigned char tokens[256][2];
394 int ntokens;
395 ogg_int16_t *tree;
396 size_t size;
397 /*Unpack the full tree into a temporary buffer.*/
398 ntokens=oc_huff_tree_unpack(_opb,tokens);
399 if(ntokens<0)return ntokens;
400 /*Figure out how big the collapsed tree will be and allocate space for it.*/
401 size=oc_huff_tree_collapse(NULL,tokens,ntokens);
402 /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
403 OC_ROOT_HUFF_SLUSH too large.*/
404 if(size>32767)return TH_EIMPL;
405 tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
406 if(tree==NULL)return TH_EFAULT;
407 /*Construct the collapsed the tree.*/
408 oc_huff_tree_collapse(tree,tokens,ntokens);
409 _nodes[i]=tree;
410 }
411 return 0;
412 }
414 /*Determines the size in words of a Huffman subtree.
415 _tree: The complete Huffman tree.
416 _node: The index of the root of the desired subtree.
417 Return: The number of words required to store the tree.*/
418 static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
419 size_t size;
420 int nchildren;
421 int n;
422 int i;
423 n=_tree[_node];
424 size=oc_huff_node_size(n);
425 nchildren=1<<n;
426 i=0;
427 do{
428 int child;
429 child=_tree[_node+i+1];
430 if(child<=0)i+=1<<n-(-child>>8);
431 else{
432 size+=oc_huff_tree_size(_tree,child);
433 i++;
434 }
435 }
436 while(i<nchildren);
437 return size;
438 }
440 /*Makes a copy of the given set of Huffman trees.
441 _dst: The array to store the copy in.
442 _src: The array of trees to copy.*/
443 int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
444 const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
445 int total;
446 int i;
447 total=0;
448 for(i=0;i<TH_NHUFFMAN_TABLES;i++){
449 size_t size;
450 size=oc_huff_tree_size(_src[i],0);
451 total+=size;
452 _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
453 if(_dst[i]==NULL){
454 while(i-->0)_ogg_free(_dst[i]);
455 return TH_EFAULT;
456 }
457 memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
458 }
459 return 0;
460 }
462 /*Frees the memory used by a set of Huffman trees.
463 _nodes: The array of trees to free.*/
464 void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
465 int i;
466 for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
467 }
470 /*Unpacks a single token using the given Huffman tree.
471 _opb: The buffer to unpack the token from.
472 _node: The tree to unpack the token with.
473 Return: The token value.*/
474 int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
475 const unsigned char *ptr;
476 const unsigned char *stop;
477 oc_pb_window window;
478 int available;
479 long bits;
480 int node;
481 int n;
482 ptr=_opb->ptr;
483 window=_opb->window;
484 stop=_opb->stop;
485 available=_opb->bits;
486 node=0;
487 for(;;){
488 n=_tree[node];
489 if(n>available){
490 unsigned shift;
491 shift=OC_PB_WINDOW_SIZE-available;
492 do{
493 /*We don't bother setting eof because we won't check for it after we've
494 started decoding DCT tokens.*/
495 if(ptr>=stop){
496 shift=(unsigned)-OC_LOTS_OF_BITS;
497 break;
498 }
499 shift-=8;
500 window|=(oc_pb_window)*ptr++<<shift;
501 }
502 while(shift>=8);
503 /*Note: We never request more than 24 bits, so there's no need to fill in
504 the last partial byte here.*/
505 available=OC_PB_WINDOW_SIZE-shift;
506 }
507 bits=window>>OC_PB_WINDOW_SIZE-n;
508 node=_tree[node+1+bits];
509 if(node<=0)break;
510 window<<=n;
511 available-=n;
512 }
513 node=-node;
514 n=node>>8;
515 window<<=n;
516 available-=n;
517 _opb->ptr=ptr;
518 _opb->window=window;
519 _opb->bits=available;
520 return node&255;
521 }