shithub: libvpx

Download patch

ref: 47b6030dda54dd6422b5e01774608581b515575a
parent: a2a5c6f821483891f3488b3f14e2c72a81ffe8f6
author: Dmitry Kovalev <[email protected]>
date: Thu Oct 31 09:52:08 EDT 2013

Reducing the number of foreach_transformed_block() calls.

The change doesn't affect the bitstream. It changes the order or function
calls and affects how we reconstruct intra- and inter-blocks. Speed up is
about 1...1.5%.

For intra-blocks:
  Before:
    for each transform block read tokens
    for each transform block do prediction
    for each transform block do inverse transform
  Now:
    for each transform block
      read tokens
      do prediction
      do inverse transform

For inter-blocks:
  Before:
    for each transform block read tokens
    for each transform block do inverse transform
  Now:
    for each transform block
      read tokens
      do inverse transform

Change-Id: I12a79bf1aa5a18c351b8010369bd3ff1deae1570

--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -244,9 +244,8 @@
                               aligned_mi_cols));
 }
 
-static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
-                         TX_SIZE tx_size, void *arg) {
-  MACROBLOCKD* const xd = arg;
+static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
+                                    BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
   const int stride = pd->dst.stride;
@@ -292,9 +291,19 @@
   }
 }
 
-static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
-                               TX_SIZE tx_size, void *arg) {
-  MACROBLOCKD* const xd = arg;
+struct intra_args {
+  VP9_COMMON *cm;
+  MACROBLOCKD *xd;
+  vp9_reader *r;
+};
+
+static void predict_and_reconstruct_intra_block(int plane, int block,
+                                                BLOCK_SIZE plane_bsize,
+                                                TX_SIZE tx_size, void *arg) {
+  struct intra_args *const args = arg;
+  VP9_COMMON *const cm = args->cm;
+  MACROBLOCKD *const xd = args->xd;
+
   struct macroblockd_plane *const pd = &xd->plane[plane];
   MODE_INFO *const mi = xd->mi_8x8[0];
   const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
@@ -313,25 +322,30 @@
                           b_width_log2(plane_bsize), tx_size, mode,
                           dst, pd->dst.stride, dst, pd->dst.stride);
 
-  if (!mi->mbmi.skip_coeff)
-    decode_block(plane, block, plane_bsize, tx_size, arg);
+  if (!mi->mbmi.skip_coeff) {
+    vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size,
+                            args->r);
+    inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
+  }
 }
 
-static int decode_tokens(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                         BLOCK_SIZE bsize, vp9_reader *r) {
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+struct inter_args {
+  VP9_COMMON *cm;
+  MACROBLOCKD *xd;
+  vp9_reader *r;
+  int *eobtotal;
+};
 
-  if (mbmi->skip_coeff) {
-    reset_skip_context(xd, bsize);
-    return -1;
-  } else {
-    if (cm->seg.enabled)
-      setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id,
-                                                  cm->base_qindex));
+static void reconstruct_inter_block(int plane, int block,
+                                    BLOCK_SIZE plane_bsize,
+                                    TX_SIZE tx_size, void *arg) {
+  struct inter_args *args = arg;
+  VP9_COMMON *const cm = args->cm;
+  MACROBLOCKD *const xd = args->xd;
 
-    // TODO(dkovalev) if (!vp9_reader_has_error(r))
-    return vp9_decode_tokens(cm, xd, &cm->seg, r, bsize);
-  }
+  *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
+                                             plane_bsize, tx_size, args->r);
+  inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
 }
 
 static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -385,7 +399,6 @@
                            vp9_reader *r, BLOCK_SIZE bsize) {
   const int less8x8 = bsize < BLOCK_8X8;
   MB_MODE_INFO *mbmi;
-  int eobtotal;
 
   set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
   vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);
@@ -395,21 +408,21 @@
 
   // Has to be called after set_offsets
   mbmi = &xd->mi_8x8[0]->mbmi;
-  eobtotal = decode_tokens(cm, xd, bsize, r);
 
-  if (!is_inter_block(mbmi)) {
-    // Intra reconstruction
-    foreach_transformed_block(xd, bsize, decode_block_intra, xd);
+  if (mbmi->skip_coeff) {
+    reset_skip_context(xd, bsize);
   } else {
-    // Inter reconstruction
-    const int decode_blocks = (eobtotal > 0);
+    if (cm->seg.enabled)
+      setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id,
+                                                  cm->base_qindex));
+  }
 
-    if (!less8x8) {
-      assert(mbmi->sb_type == bsize);
-      if (eobtotal == 0)
-        mbmi->skip_coeff = 1;  // skip loopfilter
-    }
-
+  if (!is_inter_block(mbmi)) {
+    struct intra_args arg = { cm, xd, r };
+    foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
+                              &arg);
+  } else {
+    // Setup
     set_ref(cm, xd, 0, mi_row, mi_col);
     if (has_second_ref(mbmi))
       set_ref(cm, xd, 1, mi_row, mi_col);
@@ -416,11 +429,20 @@
 
     xd->subpix.filter_x = xd->subpix.filter_y =
         vp9_get_filter_kernel(mbmi->interp_filter);
+
+    // Prediction
     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
 
-    if (decode_blocks)
-      foreach_transformed_block(xd, bsize, decode_block, xd);
+    // Reconstruction
+    if (!mbmi->skip_coeff) {
+      int eobtotal = 0;
+      struct inter_args arg = { cm, xd, r, &eobtotal };
+      foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
+      if (!less8x8 && eobtotal == 0)
+        mbmi->skip_coeff = 1;  // skip loopfilter
+    }
   }
+
   xd->corrupted |= vp9_reader_has_error(r);
 }
 
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -210,31 +210,18 @@
   return c;
 }
 
-struct decode_block_args {
-  VP9_COMMON *cm;
-  MACROBLOCKD *xd;
-  struct segmentation *seg;
-  vp9_reader *r;
-  int *eobtotal;
-};
-
-static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize,
-                         TX_SIZE tx_size, void *argv) {
-  const struct decode_block_args* const arg = argv;
-
-  // find the maximum eob for this transform size, adjusted by segment
-  MACROBLOCKD *xd = arg->xd;
-  const struct segmentation *seg = arg->seg;
-  struct macroblockd_plane* pd = &xd->plane[plane];
-  const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
-  const int seg_eob = get_tx_eob(seg, segment_id, tx_size);
+int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
+                            int plane, int block, BLOCK_SIZE plane_bsize,
+                            TX_SIZE tx_size, vp9_reader *r) {
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
+                                 tx_size);
   int aoff, loff, eob, pt;
-
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
   pt = get_entropy_context(tx_size, pd->above_context + aoff,
                                     pd->left_context + loff);
 
-  eob = decode_coefs(arg->cm, xd, arg->r, block,
+  eob = decode_coefs(cm, xd, r, block,
                      pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block),
                      tx_size, pd->dequant, pt);
 
@@ -241,14 +228,7 @@
   set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
 
   pd->eobs[block] = eob;
-  *arg->eobtotal += eob;
+  return eob;
 }
 
-int vp9_decode_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
-                      struct segmentation *seg,
-                      vp9_reader *r, BLOCK_SIZE bsize) {
-  int eobtotal = 0;
-  struct decode_block_args args = {cm, xd, seg, r, &eobtotal};
-  foreach_transformed_block(xd, bsize, decode_block, &args);
-  return eobtotal;
-}
+
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -15,8 +15,8 @@
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_dboolhuff.h"
 
-int vp9_decode_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
-                      struct segmentation *seg,
-                      vp9_reader *r, BLOCK_SIZE bsize);
+int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
+                            int plane, int block, BLOCK_SIZE plane_bsize,
+                            TX_SIZE tx_size, vp9_reader *r);
 
 #endif  // VP9_DECODER_VP9_DETOKENIZE_H_