media/libtheora/lib/huffdec.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /********************************************************************
     2  *                                                                  *
     3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     7  *                                                                  *
     8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
     9  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    10  *                                                                  *
    11  ********************************************************************
    13   function:
    14     last mod: $Id: huffdec.c 17577 2010-10-29 04:00:07Z tterribe $
    16  ********************************************************************/
    18 #include <stdlib.h>
    19 #include <string.h>
    20 #include <ogg/ogg.h>
    21 #include "huffdec.h"
    22 #include "decint.h"
    26 /*Instead of storing every branching in the tree, subtrees can be collapsed
    27    into one node, with a table of size 1<<nbits pointing directly to its
    28    descedents nbits levels down.
    29   This allows more than one bit to be read at a time, and avoids following all
    30    the intermediate branches with next to no increased code complexity once
    31    the collapsed tree has been built.
    32   We do _not_ require that a subtree be complete to be collapsed, but instead
    33    store duplicate pointers in the table, and record the actual depth of the
    34    node below its parent.
    35   This tells us the number of bits to advance the stream after reaching it.
    37   This turns out to be equivalent to the method described in \cite{Hash95},
    38    without the requirement that codewords be sorted by length.
    39   If the codewords were sorted by length (so-called ``canonical-codes''), they
    40    could be decoded much faster via either Lindell and Moffat's approach or
    41    Hashemian's Condensed Huffman Code approach, the latter of which has an
    42    extremely small memory footprint.
    43   We can't use Choueka et al.'s finite state machine approach, which is
    44    extremely fast, because we can't allow multiple symbols to be output at a
    45    time; the codebook can and does change between symbols.
    46   It also has very large memory requirements, which impairs cache coherency.
    48   We store the tree packed in an array of 16-bit integers (words).
    49   Each node consists of a single word, followed consecutively by two or more
    50    indices of its children.
    51   Let n be the value of this first word.
    52   This is the number of bits that need to be read to traverse the node, and
    53    must be positive.
    54   1<<n entries follow in the array, each an index to a child node.
    55   If the child is positive, then it is the index of another internal node in
    56    the table.
    57   If the child is negative or zero, then it is a leaf node.
    58   These are stored directly in the child pointer to save space, since they only
    59    require a single word.
    60   If a leaf node would have been encountered before reading n bits, then it is
    61    duplicated the necessary number of times in this table.
    62   Leaf nodes pack both a token value and their actual depth in the tree.
    63   The token in the leaf node is (-leaf&255).
    64   The number of bits that need to be consumed to reach the leaf, starting from
    65    the current node, is (-leaf>>8).
    67   @ARTICLE{Hash95,
    68     author="Reza Hashemian",
    69     title="Memory Efficient and High-Speed Search {Huffman} Coding",
    70     journal="{IEEE} Transactions on Communications",
    71     volume=43,
    72     number=10,
    73     pages="2576--2581",
    74     month=Oct,
    75     year=1995
    76   }*/
    80 /*The map from external spec-defined tokens to internal tokens.
    81   This is constructed so that any extra bits read with the original token value
    82    can be masked off the least significant bits of its internal token index.
    83   In addition, all of the tokens which require additional extra bits are placed
    84    at the start of the list, and grouped by type.
    85   OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
    86    giving it index 0 may simplify comparisons on some architectures.
    87   These requirements require some substantial reordering.*/
    88 static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
    89   /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
    90   15,
    91   /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
    92   16,
    93   /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
    94   17,
    95   /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
    96   88,
    97   /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
    98   80,
    99   /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
   100    1,
   101   /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
   102    0,
   103   /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
   104   48,
   105   /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
   106   14,
   107   /*OC_ONE_TOKEN (0 extra bits)*/
   108   56,
   109   /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
   110   57,
   111   /*OC_TWO_TOKEN (0 extra bits)*/
   112   58,
   113   /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
   114   59,
   115   /*OC_DCT_VAL_CAT2 (1 extra bit)*/
   116   60,
   117   62,
   118   64,
   119   66,
   120   /*OC_DCT_VAL_CAT3 (2 extra bits)*/
   121   68,
   122   /*OC_DCT_VAL_CAT4 (3 extra bits)*/
   123   72,
   124   /*OC_DCT_VAL_CAT5 (4 extra bits)*/
   125    2,
   126   /*OC_DCT_VAL_CAT6 (5 extra bits)*/
   127    4,
   128   /*OC_DCT_VAL_CAT7 (6 extra bits)*/
   129    6,
   130   /*OC_DCT_VAL_CAT8 (10 extra bits)*/
   131    8,
   132   /*OC_DCT_RUN_CAT1A (1 extra bit)*/
   133   18,
   134   20,
   135   22,
   136   24,
   137   26,
   138   /*OC_DCT_RUN_CAT1B (3 extra bits)*/
   139   32,
   140   /*OC_DCT_RUN_CAT1C (4 extra bits)*/
   141   12,
   142   /*OC_DCT_RUN_CAT2A (2 extra bits)*/
   143   28,
   144   /*OC_DCT_RUN_CAT2B (3 extra bits)*/
   145   40
   146 };
   148 /*The log base 2 of number of internal tokens associated with each of the spec
   149    tokens (i.e., how many of the extra bits are folded into the token value).
   150   Increasing the maximum value beyond 3 will enlarge the amount of stack
   151    required for tree construction.*/
   152 static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
   153   0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
   154 };
   157 /*The size a lookup table is allowed to grow to relative to the number of
   158    unique nodes it contains.
   159   E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
   160    wasted (1/4 of the space must be used).
   161   Larger numbers can decode tokens with fewer read operations, while smaller
   162    numbers may save more space.
   163   With a sample file:
   164   32233473 read calls are required when no tree collapsing is done (100.0%).
   165   19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
   166   11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
   167   10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
   168   10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
   169   Since a value of 2 gets us the vast majority of the speed-up with only a
   170    small amount of wasted memory, this is what we use.
   171   This value must be less than 128, or you could create a tree with more than
   172    32767 entries, which would overflow the 16-bit words used to index it.*/
   173 #define OC_HUFF_SLUSH (2)
   174 /*The root of the tree is on the fast path, and a larger value here is more
   175    beneficial than elsewhere in the tree.
   176   7 appears to give the best performance, trading off between increased use of
   177    the single-read fast path and cache footprint for the tables, though
   178    obviously this will depend on your cache size.
   179   Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
   180 #define OC_ROOT_HUFF_SLUSH (7)
   184 /*Unpacks a Huffman codebook.
   185   _opb:    The buffer to unpack from.
   186   _tokens: Stores a list of internal tokens, in the order they were found in
   187             the codebook, and the lengths of their corresponding codewords.
   188            This is enough to completely define the codebook, while minimizing
   189             stack usage and avoiding temporary allocations (for platforms
   190             where free() is a no-op).
   191   Return: The number of internal tokens in the codebook, or a negative value
   192    on error.*/
   193 int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
   194   ogg_uint32_t code;
   195   int          len;
   196   int          ntokens;
   197   int          nleaves;
   198   code=0;
   199   len=ntokens=nleaves=0;
   200   for(;;){
   201     long bits;
   202     bits=oc_pack_read1(_opb);
   203     /*Only process nodes so long as there's more bits in the buffer.*/
   204     if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
   205     /*Read an internal node:*/
   206     if(!bits){
   207       len++;
   208       /*Don't allow codewords longer than 32 bits.*/
   209       if(len>32)return TH_EBADHEADER;
   210     }
   211     /*Read a leaf node:*/
   212     else{
   213       ogg_uint32_t code_bit;
   214       int          neb;
   215       int          nentries;
   216       int          token;
   217       /*Don't allow more than 32 spec-tokens per codebook.*/
   218       if(++nleaves>32)return TH_EBADHEADER;
   219       bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
   220       neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
   221       token=OC_DCT_TOKEN_MAP[bits];
   222       nentries=1<<neb;
   223       while(nentries-->0){
   224         _tokens[ntokens][0]=(unsigned char)token++;
   225         _tokens[ntokens][1]=(unsigned char)(len+neb);
   226         ntokens++;
   227       }
   228       code_bit=0x80000000U>>len-1;
   229       while(len>0&&(code&code_bit)){
   230         code^=code_bit;
   231         code_bit<<=1;
   232         len--;
   233       }
   234       if(len<=0)break;
   235       code|=code_bit;
   236     }
   237   }
   238   return ntokens;
   239 }
   241 /*Count how many tokens would be required to fill a subtree at depth _depth.
   242   _tokens: A list of internal tokens, in the order they are found in the
   243             codebook, and the lengths of their corresponding codewords.
   244   _depth:  The depth of the desired node in the corresponding tree structure.
   245   Return: The number of tokens that belong to that subtree.*/
   246 static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
   247   ogg_uint32_t code;
   248   int          ti;
   249   code=0;
   250   ti=0;
   251   do{
   252     if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
   253     else{
   254       /*Because of the expanded internal tokens, we can have codewords as long
   255          as 35 bits.
   256         A single recursion here is enough to advance past them.*/
   257       code++;
   258       ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
   259     }
   260   }
   261   while(code<0x80000000U);
   262   return ti;
   263 }
   265 /*Compute the number of bits to use for a collapsed tree node at the given
   266    depth.
   267   _tokens:  A list of internal tokens, in the order they are found in the
   268              codebook, and the lengths of their corresponding codewords.
   269   _ntokens: The number of tokens corresponding to this tree node.
   270   _depth:   The depth of this tree node.
   271   Return: The number of bits to use for a collapsed tree node rooted here.
   272           This is always at least one, even if this was a leaf node.*/
   273 static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
   274  int _ntokens,int _depth){
   275   int got_leaves;
   276   int loccupancy;
   277   int occupancy;
   278   int slush;
   279   int nbits;
   280   int best_nbits;
   281   slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
   282   /*It's legal to have a tree with just a single node, which requires no bits
   283      to decode and always returns the same token.
   284     However, no encoder actually does this (yet).
   285     To avoid a special case in oc_huff_token_decode(), we force the number of
   286      lookahead bits to be at least one.
   287     This will produce a tree that looks ahead one bit and then advances the
   288      stream zero bits.*/
   289   nbits=1;
   290   occupancy=2;
   291   got_leaves=1;
   292   do{
   293     int ti;
   294     if(got_leaves)best_nbits=nbits;
   295     nbits++;
   296     got_leaves=0;
   297     loccupancy=occupancy;
   298     for(occupancy=ti=0;ti<_ntokens;occupancy++){
   299       if(_tokens[ti][1]<_depth+nbits)ti++;
   300       else if(_tokens[ti][1]==_depth+nbits){
   301         got_leaves=1;
   302         ti++;
   303       }
   304       else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
   305     }
   306   }
   307   while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
   308   return best_nbits;
   309 }
   311 /*Determines the size in words of a Huffman tree node that represents a
   312    subtree of depth _nbits.
   313   _nbits: The depth of the subtree.
   314           This must be greater than zero.
   315   Return: The number of words required to store the node.*/
   316 static size_t oc_huff_node_size(int _nbits){
   317   return 1+(1<<_nbits);
   318 }
   320 /*Produces a collapsed-tree representation of the given token list.
   321   _tree: The storage for the collapsed Huffman tree.
   322          This may be NULL to compute the required storage size instead of
   323           constructing the tree.
   324   _tokens:  A list of internal tokens, in the order they are found in the
   325              codebook, and the lengths of their corresponding codewords.
   326   _ntokens: The number of tokens corresponding to this tree node.
   327   Return: The number of words required to store the tree.*/
   328 #if defined(_MSC_VER) && _MSC_VER >= 1700
   329 #pragma optimize( "", off )
   330 #endif
   331 static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
   332  unsigned char _tokens[][2],int _ntokens){
   333   ogg_int16_t   node[34];
   334   unsigned char depth[34];
   335   unsigned char last[34];
   336   size_t        ntree;
   337   int           ti;
   338   int           l;
   339   depth[0]=0;
   340   last[0]=(unsigned char)(_ntokens-1);
   341   ntree=0;
   342   ti=0;
   343   l=0;
   344   do{
   345     int nbits;
   346     nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
   347     node[l]=(ogg_int16_t)ntree;
   348     ntree+=oc_huff_node_size(nbits);
   349     if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
   350     do{
   351       while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
   352         if(_tree!=NULL){
   353           ogg_int16_t leaf;
   354           int         nentries;
   355           nentries=1<<depth[l]+nbits-_tokens[ti][1];
   356           leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
   357           while(nentries-->0)_tree[node[l]++]=leaf;
   358         }
   359         ti++;
   360       }
   361       if(ti<=last[l]){
   362         /*We need to recurse*/
   363         depth[l+1]=(unsigned char)(depth[l]+nbits);
   364         if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
   365         l++;
   366         last[l]=
   367          (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
   368         break;
   369       }
   370       /*Pop back up a level of recursion.*/
   371       else if(l-->0)nbits=depth[l+1]-depth[l];
   372     }
   373     while(l>=0);
   374   }
   375   while(l>=0);
   376   return ntree;
   377 }
   378 #if defined(_MSC_VER) && _MSC_VER >= 1700
   379 #pragma optimize( "", on )
   380 #endif
   382 /*Unpacks a set of Huffman trees, and reduces them to a collapsed
   383    representation.
   384   _opb:   The buffer to unpack the trees from.
   385   _nodes: The table to fill with the Huffman trees.
   386   Return: 0 on success, or a negative value on error.
   387           The caller is responsible for cleaning up any partially initialized
   388            _nodes on failure.*/
   389 int oc_huff_trees_unpack(oc_pack_buf *_opb,
   390  ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
   391   int i;
   392   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
   393     unsigned char  tokens[256][2];
   394     int            ntokens;
   395     ogg_int16_t   *tree;
   396     size_t         size;
   397     /*Unpack the full tree into a temporary buffer.*/
   398     ntokens=oc_huff_tree_unpack(_opb,tokens);
   399     if(ntokens<0)return ntokens;
   400     /*Figure out how big the collapsed tree will be and allocate space for it.*/
   401     size=oc_huff_tree_collapse(NULL,tokens,ntokens);
   402     /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
   403        OC_ROOT_HUFF_SLUSH too large.*/
   404     if(size>32767)return TH_EIMPL;
   405     tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
   406     if(tree==NULL)return TH_EFAULT;
   407     /*Construct the collapsed the tree.*/
   408     oc_huff_tree_collapse(tree,tokens,ntokens);
   409     _nodes[i]=tree;
   410   }
   411   return 0;
   412 }
   414 /*Determines the size in words of a Huffman subtree.
   415   _tree: The complete Huffman tree.
   416   _node: The index of the root of the desired subtree.
   417   Return: The number of words required to store the tree.*/
   418 static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
   419   size_t size;
   420   int    nchildren;
   421   int    n;
   422   int    i;
   423   n=_tree[_node];
   424   size=oc_huff_node_size(n);
   425   nchildren=1<<n;
   426   i=0;
   427   do{
   428     int child;
   429     child=_tree[_node+i+1];
   430     if(child<=0)i+=1<<n-(-child>>8);
   431     else{
   432       size+=oc_huff_tree_size(_tree,child);
   433       i++;
   434     }
   435   }
   436   while(i<nchildren);
   437   return size;
   438 }
   440 /*Makes a copy of the given set of Huffman trees.
   441   _dst: The array to store the copy in.
   442   _src: The array of trees to copy.*/
   443 int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
   444  const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
   445   int total;
   446   int i;
   447   total=0;
   448   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
   449     size_t size;
   450     size=oc_huff_tree_size(_src[i],0);
   451     total+=size;
   452     _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
   453     if(_dst[i]==NULL){
   454       while(i-->0)_ogg_free(_dst[i]);
   455       return TH_EFAULT;
   456     }
   457     memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
   458   }
   459   return 0;
   460 }
   462 /*Frees the memory used by a set of Huffman trees.
   463   _nodes: The array of trees to free.*/
   464 void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
   465   int i;
   466   for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
   467 }
   470 /*Unpacks a single token using the given Huffman tree.
   471   _opb:  The buffer to unpack the token from.
   472   _node: The tree to unpack the token with.
   473   Return: The token value.*/
   474 int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
   475   const unsigned char *ptr;
   476   const unsigned char *stop;
   477   oc_pb_window         window;
   478   int                  available;
   479   long                 bits;
   480   int                  node;
   481   int                  n;
   482   ptr=_opb->ptr;
   483   window=_opb->window;
   484   stop=_opb->stop;
   485   available=_opb->bits;
   486   node=0;
   487   for(;;){
   488     n=_tree[node];
   489     if(n>available){
   490       unsigned shift;
   491       shift=OC_PB_WINDOW_SIZE-available;
   492       do{
   493         /*We don't bother setting eof because we won't check for it after we've
   494            started decoding DCT tokens.*/
   495         if(ptr>=stop){
   496           shift=(unsigned)-OC_LOTS_OF_BITS;
   497           break;
   498         }
   499         shift-=8;
   500         window|=(oc_pb_window)*ptr++<<shift;
   501       }
   502       while(shift>=8);
   503       /*Note: We never request more than 24 bits, so there's no need to fill in
   504          the last partial byte here.*/
   505       available=OC_PB_WINDOW_SIZE-shift;
   506     }
   507     bits=window>>OC_PB_WINDOW_SIZE-n;
   508     node=_tree[node+1+bits];
   509     if(node<=0)break;
   510     window<<=n;
   511     available-=n;
   512   }
   513   node=-node;
   514   n=node>>8;
   515   window<<=n;
   516   available-=n;
   517   _opb->ptr=ptr;
   518   _opb->window=window;
   519   _opb->bits=available;
   520   return node&255;
   521 }

mercurial