media/libvpx/vp9/encoder/vp9_encodemb.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /*
     2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS.  All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    12 #include "./vp9_rtcd.h"
    13 #include "./vpx_config.h"
    15 #include "vpx_mem/vpx_mem.h"
    17 #include "vp9/common/vp9_idct.h"
    18 #include "vp9/common/vp9_reconinter.h"
    19 #include "vp9/common/vp9_reconintra.h"
    20 #include "vp9/common/vp9_systemdependent.h"
    22 #include "vp9/encoder/vp9_dct.h"
    23 #include "vp9/encoder/vp9_encodemb.h"
    24 #include "vp9/encoder/vp9_quantize.h"
    25 #include "vp9/encoder/vp9_rdopt.h"
    26 #include "vp9/encoder/vp9_tokenize.h"
    28 void vp9_subtract_block_c(int rows, int cols,
    29                           int16_t *diff_ptr, ptrdiff_t diff_stride,
    30                           const uint8_t *src_ptr, ptrdiff_t src_stride,
    31                           const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
    32   int r, c;
    34   for (r = 0; r < rows; r++) {
    35     for (c = 0; c < cols; c++)
    36       diff_ptr[c] = src_ptr[c] - pred_ptr[c];
    38     diff_ptr += diff_stride;
    39     pred_ptr += pred_stride;
    40     src_ptr  += src_stride;
    41   }
    42 }
    44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
    45   struct macroblock_plane *const p = &x->plane[plane];
    46   const MACROBLOCKD *const xd = &x->e_mbd;
    47   const struct macroblockd_plane *const pd = &xd->plane[plane];
    48   const int bw = plane_block_width(bsize, pd);
    49   const int bh = plane_block_height(bsize, pd);
    51   vp9_subtract_block(bh, bw, p->src_diff, bw,
    52                      p->src.buf, p->src.stride,
    53                      pd->dst.buf, pd->dst.stride);
    54 }
    56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
    57   subtract_plane(x, bsize, 0);
    58 }
    60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) {
    61   int i;
    63   for (i = 1; i < MAX_MB_PLANE; i++)
    64     subtract_plane(x, bsize, i);
    65 }
    67 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
    68   vp9_subtract_sby(x, bsize);
    69   vp9_subtract_sbuv(x, bsize);
    70 }
    72 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
    73 typedef struct vp9_token_state vp9_token_state;
    75 struct vp9_token_state {
    76   int           rate;
    77   int           error;
    78   int           next;
    79   signed char   token;
    80   short         qc;
    81 };
    83 // TODO(jimbankoski): experiment to find optimal RD numbers.
    84 #define Y1_RD_MULT 4
    85 #define UV_RD_MULT 2
    87 static const int plane_rd_mult[4] = {
    88   Y1_RD_MULT,
    89   UV_RD_MULT,
    90 };
    92 #define UPDATE_RD_COST()\
    93 {\
    94   rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
    95   rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
    96   if (rd_cost0 == rd_cost1) {\
    97     rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    98     rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
    99   }\
   100 }
   102 // This function is a place holder for now but may ultimately need
   103 // to scan previous tokens to work out the correct context.
   104 static int trellis_get_coeff_context(const int16_t *scan,
   105                                      const int16_t *nb,
   106                                      int idx, int token,
   107                                      uint8_t *token_cache) {
   108   int bak = token_cache[scan[idx]], pt;
   109   token_cache[scan[idx]] = vp9_pt_energy_class[token];
   110   pt = get_coef_context(nb, token_cache, idx + 1);
   111   token_cache[scan[idx]] = bak;
   112   return pt;
   113 }
   115 static void optimize_b(MACROBLOCK *mb,
   116                        int plane, int block, BLOCK_SIZE plane_bsize,
   117                        ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
   118                        TX_SIZE tx_size) {
   119   MACROBLOCKD *const xd = &mb->e_mbd;
   120   struct macroblockd_plane *pd = &xd->plane[plane];
   121   const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
   122   vp9_token_state tokens[1025][2];
   123   unsigned best_index[1025][2];
   124   const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);
   125   int16_t *qcoeff_ptr;
   126   int16_t *dqcoeff_ptr;
   127   int eob = pd->eobs[block], final_eob, sz = 0;
   128   const int i0 = 0;
   129   int rc, x, next, i;
   130   int64_t rdmult, rddiv, rd_cost0, rd_cost1;
   131   int rate0, rate1, error0, error1, t0, t1;
   132   int best, band, pt;
   133   PLANE_TYPE type = pd->plane_type;
   134   int err_mult = plane_rd_mult[type];
   135   const int default_eob = 16 << (tx_size << 1);
   136   const int16_t *scan, *nb;
   137   const int mul = 1 + (tx_size == TX_32X32);
   138   uint8_t token_cache[1024];
   139   const int16_t *dequant_ptr = pd->dequant;
   140   const uint8_t *const band_translate = get_band_translate(tx_size);
   142   assert((!type && !plane) || (type && plane));
   143   dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
   144   qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
   145   get_scan(xd, tx_size, type, block, &scan, &nb);
   146   assert(eob <= default_eob);
   148   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
   149   rdmult = mb->rdmult * err_mult;
   150   if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME)
   151     rdmult = (rdmult * 9) >> 4;
   152   rddiv = mb->rddiv;
   153   /* Initialize the sentinel node of the trellis. */
   154   tokens[eob][0].rate = 0;
   155   tokens[eob][0].error = 0;
   156   tokens[eob][0].next = default_eob;
   157   tokens[eob][0].token = DCT_EOB_TOKEN;
   158   tokens[eob][0].qc = 0;
   159   *(tokens[eob] + 1) = *(tokens[eob] + 0);
   160   next = eob;
   161   for (i = 0; i < eob; i++)
   162     token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
   163         qcoeff_ptr[scan[i]]].token];
   165   for (i = eob; i-- > i0;) {
   166     int base_bits, d2, dx;
   168     rc = scan[i];
   169     x = qcoeff_ptr[rc];
   170     /* Only add a trellis state for non-zero coefficients. */
   171     if (x) {
   172       int shortcut = 0;
   173       error0 = tokens[next][0].error;
   174       error1 = tokens[next][1].error;
   175       /* Evaluate the first possibility for this state. */
   176       rate0 = tokens[next][0].rate;
   177       rate1 = tokens[next][1].rate;
   178       t0 = (vp9_dct_value_tokens_ptr + x)->token;
   179       /* Consider both possible successor states. */
   180       if (next < default_eob) {
   181         band = band_translate[i + 1];
   182         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
   183         rate0 +=
   184           mb->token_costs[tx_size][type][ref][band][0][pt]
   185                          [tokens[next][0].token];
   186         rate1 +=
   187           mb->token_costs[tx_size][type][ref][band][0][pt]
   188                          [tokens[next][1].token];
   189       }
   190       UPDATE_RD_COST();
   191       /* And pick the best. */
   192       best = rd_cost1 < rd_cost0;
   193       base_bits = *(vp9_dct_value_cost_ptr + x);
   194       dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
   195       d2 = dx * dx;
   196       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
   197       tokens[i][0].error = d2 + (best ? error1 : error0);
   198       tokens[i][0].next = next;
   199       tokens[i][0].token = t0;
   200       tokens[i][0].qc = x;
   201       best_index[i][0] = best;
   203       /* Evaluate the second possibility for this state. */
   204       rate0 = tokens[next][0].rate;
   205       rate1 = tokens[next][1].rate;
   207       if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
   208           (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
   209                                          dequant_ptr[rc != 0]))
   210         shortcut = 1;
   211       else
   212         shortcut = 0;
   214       if (shortcut) {
   215         sz = -(x < 0);
   216         x -= 2 * sz + 1;
   217       }
   219       /* Consider both possible successor states. */
   220       if (!x) {
   221         /* If we reduced this coefficient to zero, check to see if
   222          *  we need to move the EOB back here.
   223          */
   224         t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
   225              DCT_EOB_TOKEN : ZERO_TOKEN;
   226         t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
   227              DCT_EOB_TOKEN : ZERO_TOKEN;
   228       } else {
   229         t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
   230       }
   231       if (next < default_eob) {
   232         band = band_translate[i + 1];
   233         if (t0 != DCT_EOB_TOKEN) {
   234           pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
   235           rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
   236                                   [tokens[next][0].token];
   237         }
   238         if (t1 != DCT_EOB_TOKEN) {
   239           pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
   240           rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
   241                                   [tokens[next][1].token];
   242         }
   243       }
   245       UPDATE_RD_COST();
   246       /* And pick the best. */
   247       best = rd_cost1 < rd_cost0;
   248       base_bits = *(vp9_dct_value_cost_ptr + x);
   250       if (shortcut) {
   251         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
   252         d2 = dx * dx;
   253       }
   254       tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
   255       tokens[i][1].error = d2 + (best ? error1 : error0);
   256       tokens[i][1].next = next;
   257       tokens[i][1].token = best ? t1 : t0;
   258       tokens[i][1].qc = x;
   259       best_index[i][1] = best;
   260       /* Finally, make this the new head of the trellis. */
   261       next = i;
   262     } else {
   263       /* There's no choice to make for a zero coefficient, so we don't
   264        *  add a new trellis node, but we do need to update the costs.
   265        */
   266       band = band_translate[i + 1];
   267       t0 = tokens[next][0].token;
   268       t1 = tokens[next][1].token;
   269       /* Update the cost of each path if we're past the EOB token. */
   270       if (t0 != DCT_EOB_TOKEN) {
   271         tokens[next][0].rate +=
   272             mb->token_costs[tx_size][type][ref][band][1][0][t0];
   273         tokens[next][0].token = ZERO_TOKEN;
   274       }
   275       if (t1 != DCT_EOB_TOKEN) {
   276         tokens[next][1].rate +=
   277             mb->token_costs[tx_size][type][ref][band][1][0][t1];
   278         tokens[next][1].token = ZERO_TOKEN;
   279       }
   280       best_index[i][0] = best_index[i][1] = 0;
   281       /* Don't update next, because we didn't add a new node. */
   282     }
   283   }
   285   /* Now pick the best path through the whole trellis. */
   286   band = band_translate[i + 1];
   287   pt = combine_entropy_contexts(*a, *l);
   288   rate0 = tokens[next][0].rate;
   289   rate1 = tokens[next][1].rate;
   290   error0 = tokens[next][0].error;
   291   error1 = tokens[next][1].error;
   292   t0 = tokens[next][0].token;
   293   t1 = tokens[next][1].token;
   294   rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
   295   rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
   296   UPDATE_RD_COST();
   297   best = rd_cost1 < rd_cost0;
   298   final_eob = i0 - 1;
   299   vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
   300   vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
   301   for (i = next; i < eob; i = next) {
   302     x = tokens[i][best].qc;
   303     if (x) {
   304       final_eob = i;
   305     }
   306     rc = scan[i];
   307     qcoeff_ptr[rc] = x;
   308     dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
   310     next = tokens[i][best].next;
   311     best = best_index[i][best];
   312   }
   313   final_eob++;
   315   xd->plane[plane].eobs[block] = final_eob;
   316   *a = *l = (final_eob > 0);
   317 }
   319 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   320                     TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
   321   int x, y;
   322   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
   323   optimize_b(mb, plane, block, plane_bsize,
   324              &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
   325 }
   327 static void optimize_init_b(int plane, BLOCK_SIZE bsize,
   328                             struct encode_b_args *args) {
   329   const MACROBLOCKD *xd = &args->x->e_mbd;
   330   const struct macroblockd_plane* const pd = &xd->plane[plane];
   331   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
   332   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
   333   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
   334   const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   335   const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
   337   vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
   338                            pd->above_context, pd->left_context,
   339                            num_4x4_w, num_4x4_h);
   340 }
   342 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
   343                      TX_SIZE tx_size, void *arg) {
   344   struct encode_b_args* const args = arg;
   345   MACROBLOCK* const x = args->x;
   346   MACROBLOCKD* const xd = &x->e_mbd;
   347   struct macroblock_plane *const p = &x->plane[plane];
   348   struct macroblockd_plane *const pd = &xd->plane[plane];
   349   int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
   350   int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
   351   int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   352   const int16_t *scan, *iscan;
   353   uint16_t *eob = &pd->eobs[block];
   354   const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl;
   355   const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
   356   int xoff, yoff;
   357   int16_t *src_diff;
   359   switch (tx_size) {
   360     case TX_32X32:
   361       scan = vp9_default_scan_32x32;
   362       iscan = vp9_default_iscan_32x32;
   363       block >>= 6;
   364       xoff = 32 * (block & twmask);
   365       yoff = 32 * (block >> twl);
   366       src_diff = p->src_diff + 4 * bw * yoff + xoff;
   367       if (x->use_lp32x32fdct)
   368         vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
   369       else
   370         vp9_fdct32x32(src_diff, coeff, bw * 4);
   371       vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
   372                            p->quant, p->quant_shift, qcoeff, dqcoeff,
   373                            pd->dequant, p->zbin_extra, eob, scan, iscan);
   374       break;
   375     case TX_16X16:
   376       scan = vp9_default_scan_16x16;
   377       iscan = vp9_default_iscan_16x16;
   378       block >>= 4;
   379       xoff = 16 * (block & twmask);
   380       yoff = 16 * (block >> twl);
   381       src_diff = p->src_diff + 4 * bw * yoff + xoff;
   382       vp9_fdct16x16(src_diff, coeff, bw * 4);
   383       vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
   384                      p->quant, p->quant_shift, qcoeff, dqcoeff,
   385                      pd->dequant, p->zbin_extra, eob, scan, iscan);
   386       break;
   387     case TX_8X8:
   388       scan = vp9_default_scan_8x8;
   389       iscan = vp9_default_iscan_8x8;
   390       block >>= 2;
   391       xoff = 8 * (block & twmask);
   392       yoff = 8 * (block >> twl);
   393       src_diff = p->src_diff + 4 * bw * yoff + xoff;
   394       vp9_fdct8x8(src_diff, coeff, bw * 4);
   395       vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
   396                      p->quant, p->quant_shift, qcoeff, dqcoeff,
   397                      pd->dequant, p->zbin_extra, eob, scan, iscan);
   398       break;
   399     case TX_4X4:
   400       scan = vp9_default_scan_4x4;
   401       iscan = vp9_default_iscan_4x4;
   402       xoff = 4 * (block & twmask);
   403       yoff = 4 * (block >> twl);
   404       src_diff = p->src_diff + 4 * bw * yoff + xoff;
   405       x->fwd_txm4x4(src_diff, coeff, bw * 4);
   406       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
   407                      p->quant, p->quant_shift, qcoeff, dqcoeff,
   408                      pd->dequant, p->zbin_extra, eob, scan, iscan);
   409       break;
   410     default:
   411       assert(0);
   412   }
   413 }
   415 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
   416                          TX_SIZE tx_size, void *arg) {
   417   struct encode_b_args *const args = arg;
   418   MACROBLOCK *const x = args->x;
   419   MACROBLOCKD *const xd = &x->e_mbd;
   420   struct optimize_ctx *const ctx = args->ctx;
   421   struct macroblockd_plane *const pd = &xd->plane[plane];
   422   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   423   int i, j;
   424   uint8_t *dst;
   425   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
   426   dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
   428   // TODO(jingning): per transformed block zero forcing only enabled for
   429   // luma component. will integrate chroma components as well.
   430   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
   431     pd->eobs[block] = 0;
   432     ctx->ta[plane][i] = 0;
   433     ctx->tl[plane][j] = 0;
   434     return;
   435   }
   437   if (!x->skip_recode)
   438     vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
   440   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
   441     vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
   442   } else {
   443     ctx->ta[plane][i] = pd->eobs[block] > 0;
   444     ctx->tl[plane][j] = pd->eobs[block] > 0;
   445   }
   447   if (x->skip_encode || pd->eobs[block] == 0)
   448     return;
   450   switch (tx_size) {
   451     case TX_32X32:
   452       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
   453       break;
   454     case TX_16X16:
   455       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
   456       break;
   457     case TX_8X8:
   458       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
   459       break;
   460     case TX_4X4:
   461       // this is like vp9_short_idct4x4 but has a special case around eob<=1
   462       // which is significant (not just an optimization) for the lossless
   463       // case.
   464       xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
   465       break;
   466     default:
   467       assert(!"Invalid transform size");
   468   }
   469 }
   471 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
   472                                TX_SIZE tx_size, void *arg) {
   473   struct encode_b_args *const args = arg;
   474   MACROBLOCK *const x = args->x;
   475   MACROBLOCKD *const xd = &x->e_mbd;
   476   struct macroblockd_plane *const pd = &xd->plane[plane];
   477   const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
   478                                                        block);
   480   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   481   uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
   482                                                  pd->dst.buf, pd->dst.stride);
   484   vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
   486   if (pd->eobs[block] == 0)
   487     return;
   489   xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
   490 }
   492 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
   493   MACROBLOCKD *const xd = &x->e_mbd;
   494   struct optimize_ctx ctx;
   495   struct encode_b_args arg = {x, &ctx};
   497   vp9_subtract_sby(x, bsize);
   498   if (x->optimize)
   499     optimize_init_b(0, bsize, &arg);
   501   foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg);
   502 }
   504 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
   505   MACROBLOCKD *const xd = &x->e_mbd;
   506   struct optimize_ctx ctx;
   507   struct encode_b_args arg = {x, &ctx};
   509   if (!x->skip_recode)
   510     vp9_subtract_sb(x, bsize);
   512   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
   513     int i;
   514     for (i = 0; i < MAX_MB_PLANE; ++i)
   515       optimize_init_b(i, bsize, &arg);
   516   }
   518   foreach_transformed_block(xd, bsize, encode_block, &arg);
   519 }
   521 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   522                             TX_SIZE tx_size, void *arg) {
   523   struct encode_b_args* const args = arg;
   524   MACROBLOCK *const x = args->x;
   525   MACROBLOCKD *const xd = &x->e_mbd;
   526   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   527   struct macroblock_plane *const p = &x->plane[plane];
   528   struct macroblockd_plane *const pd = &xd->plane[plane];
   529   int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
   530   int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
   531   int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   532   const int16_t *scan, *iscan;
   533   TX_TYPE tx_type;
   534   MB_PREDICTION_MODE mode;
   535   const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl;
   536   const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
   537   int xoff, yoff;
   538   uint8_t *src, *dst;
   539   int16_t *src_diff;
   540   uint16_t *eob = &pd->eobs[block];
   542   if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0)
   543     extend_for_intra(xd, plane_bsize, plane, block, tx_size);
   545   // if (x->optimize)
   546   // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
   548   switch (tx_size) {
   549     case TX_32X32:
   550       scan = vp9_default_scan_32x32;
   551       iscan = vp9_default_iscan_32x32;
   552       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
   553       block >>= 6;
   554       xoff = 32 * (block & twmask);
   555       yoff = 32 * (block >> twl);
   556       dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
   557       vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode,
   558                               dst, pd->dst.stride, dst, pd->dst.stride);
   560       if (!x->skip_recode) {
   561         src = p->src.buf + yoff * p->src.stride + xoff;
   562         src_diff = p->src_diff + 4 * bw * yoff + xoff;
   563         vp9_subtract_block(32, 32, src_diff, bw * 4,
   564                            src, p->src.stride, dst, pd->dst.stride);
   565         if (x->use_lp32x32fdct)
   566           vp9_fdct32x32_rd(src_diff, coeff, bw * 4);
   567         else
   568           vp9_fdct32x32(src_diff, coeff, bw * 4);
   569         vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
   570                              p->quant, p->quant_shift, qcoeff, dqcoeff,
   571                              pd->dequant, p->zbin_extra, eob, scan, iscan);
   572       }
   573       if (!x->skip_encode && *eob)
   574         vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
   575       break;
   576     case TX_16X16:
   577       tx_type = get_tx_type_16x16(pd->plane_type, xd);
   578       scan = get_scan_16x16(tx_type);
   579       iscan = get_iscan_16x16(tx_type);
   580       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
   581       block >>= 4;
   582       xoff = 16 * (block & twmask);
   583       yoff = 16 * (block >> twl);
   584       dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
   585       vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode,
   586                               dst, pd->dst.stride, dst, pd->dst.stride);
   587       if (!x->skip_recode) {
   588         src = p->src.buf + yoff * p->src.stride + xoff;
   589         src_diff = p->src_diff + 4 * bw * yoff + xoff;
   590         vp9_subtract_block(16, 16, src_diff, bw * 4,
   591                            src, p->src.stride, dst, pd->dst.stride);
   592         vp9_fht16x16(tx_type, src_diff, coeff, bw * 4);
   593         vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
   594                        p->quant, p->quant_shift, qcoeff, dqcoeff,
   595                        pd->dequant, p->zbin_extra, eob, scan, iscan);
   596       }
   597       if (!x->skip_encode && *eob)
   598         vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
   599       break;
   600     case TX_8X8:
   601       tx_type = get_tx_type_8x8(pd->plane_type, xd);
   602       scan = get_scan_8x8(tx_type);
   603       iscan = get_iscan_8x8(tx_type);
   604       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
   605       block >>= 2;
   606       xoff = 8 * (block & twmask);
   607       yoff = 8 * (block >> twl);
   608       dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
   609       vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode,
   610                               dst, pd->dst.stride, dst, pd->dst.stride);
   611       if (!x->skip_recode) {
   612         src = p->src.buf + yoff * p->src.stride + xoff;
   613         src_diff = p->src_diff + 4 * bw * yoff + xoff;
   614         vp9_subtract_block(8, 8, src_diff, bw * 4,
   615                            src, p->src.stride, dst, pd->dst.stride);
   616         vp9_fht8x8(tx_type, src_diff, coeff, bw * 4);
   617         vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
   618                        p->quant_shift, qcoeff, dqcoeff,
   619                        pd->dequant, p->zbin_extra, eob, scan, iscan);
   620       }
   621       if (!x->skip_encode && *eob)
   622         vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
   623       break;
   624     case TX_4X4:
   625       tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
   626       scan = get_scan_4x4(tx_type);
   627       iscan = get_iscan_4x4(tx_type);
   628       if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
   629         mode = xd->mi_8x8[0]->bmi[block].as_mode;
   630       else
   631         mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
   633       xoff = 4 * (block & twmask);
   634       yoff = 4 * (block >> twl);
   635       dst = pd->dst.buf + yoff * pd->dst.stride + xoff;
   636       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
   637                               dst, pd->dst.stride, dst, pd->dst.stride);
   639       if (!x->skip_recode) {
   640         src = p->src.buf + yoff * p->src.stride + xoff;
   641         src_diff = p->src_diff + 4 * bw * yoff + xoff;
   642         vp9_subtract_block(4, 4, src_diff, bw * 4,
   643                            src, p->src.stride, dst, pd->dst.stride);
   644         if (tx_type != DCT_DCT)
   645           vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
   646         else
   647           x->fwd_txm4x4(src_diff, coeff, bw * 4);
   648         vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
   649                        p->quant_shift, qcoeff, dqcoeff,
   650                        pd->dequant, p->zbin_extra, eob, scan, iscan);
   651       }
   653       if (!x->skip_encode && *eob) {
   654         if (tx_type == DCT_DCT)
   655           // this is like vp9_short_idct4x4 but has a special case around eob<=1
   656           // which is significant (not just an optimization) for the lossless
   657           // case.
   658           xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob);
   659         else
   660           vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type);
   661       }
   662       break;
   663     default:
   664       assert(0);
   665   }
   666 }
   668 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) {
   669   MACROBLOCKD* const xd = &x->e_mbd;
   670   struct optimize_ctx ctx;
   671   struct encode_b_args arg = {x, &ctx};
   673   foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra,
   674                                      &arg);
   675 }
   676 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) {
   677   MACROBLOCKD* const xd = &x->e_mbd;
   678   struct optimize_ctx ctx;
   679   struct encode_b_args arg = {x, &ctx};
   680   foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg);
   681 }

mercurial