shithub: libvpx

Download patch

ref: 9c2552a1c149cbc7ee407c514e0cf78e6f45bcec
parent: 83ba1880bf6596c922e67f99290a30b758b01379
author: Yunqing Wang <[email protected]>
date: Thu Mar 16 11:45:07 EDT 2017

Record the sum of tx block eobs in the partition block

The sum of tx bloxk eobs is needed in the machine learning based partition
early termination. The eobs are first accumulated during tx search, and
then the value associated with the best tx_size is copied to ctx for later
use.

After the sum of eobs are calculated correctly, re-enabled
ml_partition_search_early_termination speed feature.

Re-did the quality/speed test to check the impact of the fix.

1. Borg test BDRATE result:
4k set:     PSNR: +0.183%; SSIM: +0.100%;
hdres set:  PSNR: +0.168%; SSIM: +0.256%;
midres set: PSNR: +0.186%; SSIM: +0.326%;

2.Average speed gain result:
4k clips: 21%;
hd clips: 26%;
midres clips: 15%.

The result is in line with the original result.

Change-Id: I4209a95c89be03b4cbfb6a95b16885f89feddbda

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -128,6 +128,9 @@
   // Set during mode selection. Read during block encoding.
   uint8_t zcoeff_blk[TX_SIZES][256];
 
+  // Accumulate the tx block eobs in a partition block.
+  int32_t sum_y_eobs[TX_SIZES];
+
   int skip;
 
   int encode_breakout;
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -73,7 +73,7 @@
   INTERP_FILTER pred_interp_filter;
 
   // Used for the machine learning-based early termination
-  int sum_eobs;
+  int32_t sum_y_eobs;
 } PICK_MODE_CONTEXT;
 
 typedef struct PC_TREE {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2711,18 +2711,6 @@
 }
 #endif
 
-// Accumulate all tx blocks' eobs results got from the partition evaluation.
-static void accumulate_eobs(int plane, int block, int row, int col,
-                            BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                            void *arg) {
-  PICK_MODE_CONTEXT *ctx = (PICK_MODE_CONTEXT *)arg;
-  (void)row;
-  (void)col;
-  (void)plane_bsize;
-  (void)tx_size;
-  ctx->sum_eobs += ctx->eobs_pbuf[plane][1][block];
-}
-
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
@@ -2899,6 +2887,8 @@
       }
 
       if (this_rdc.rdcost < best_rdc.rdcost) {
+        MODE_INFO *mi = xd->mi[0];
+
         best_rdc = this_rdc;
         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
 
@@ -2917,7 +2907,9 @@
           // Currently, the machine-learning based partition search early
           // termination is only used while bsize is 16x16, 32x32 or 64x64,
           // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
-          if (ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
+          if (!x->e_mbd.lossless &&
+              !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
+              ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
             const double *clf;
             const double *mean;
             const double *sd;
@@ -2936,10 +2928,6 @@
 
             assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
 
-            ctx->sum_eobs = 0;
-            vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
-                                                   accumulate_eobs, ctx);
-
             if (above_in_image) {
               context_size = xd->above_mi->sb_type;
               if (context_size < bsize)
@@ -2980,7 +2968,7 @@
                     clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
                     clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) *
                               sd[3]) +
-                    clf[4] * (((double)ctx->sum_eobs - mean[4]) / sd[4]) +
+                    clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
                     clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
                     clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
             if (score < 0) {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -742,9 +742,11 @@
 
   // TODO(jingning): temporarily enabled only for luma component
   rd = VPXMIN(rd1, rd2);
-  if (plane == 0)
+  if (plane == 0) {
     x->zcoeff_blk[tx_size][block] =
         !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
+    x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block];
+  }
 
   args->this_rate += rate;
   args->this_dist += dist;
@@ -3190,6 +3192,8 @@
     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
 
+    vp9_zero(x->sum_y_eobs);
+
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
     if (midx == mode_skip_start && best_mode_index >= 0) {
@@ -3469,6 +3473,7 @@
         if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+        ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size];
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
@@ -3699,6 +3704,8 @@
   mi->mv[0].as_int = 0;
   x->skip = 1;
 
+  ctx->sum_y_eobs = 0;
+
   if (cm->interp_filter != BILINEAR) {
     best_filter = EIGHTTAP;
     if (cm->interp_filter == SWITCHABLE &&
@@ -3853,6 +3860,8 @@
     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
 
+    vp9_zero(x->sum_y_eobs);
+
 #if CONFIG_BETTER_HW_COMPATIBILITY
     // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
     if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
@@ -4069,6 +4078,7 @@
               for (i = 0; i < 4; i++) {
                 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
+                x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
               }
               pred_exists = 1;
               if (switchable_filter_index == 0 && sf->use_rd_breakout &&
@@ -4233,6 +4243,7 @@
         if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+        ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4];
 
         for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
 
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -73,10 +73,9 @@
 
   // Currently, the machine-learning based partition search early termination
   // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
-  // TODO(yunqingwang): Re-enable when test failures are fixed.
-  // if (VPXMIN(cm->width, cm->height) >= 480) {
-  //   sf->ml_partition_search_early_termination = 1;
-  // }
+  if (VPXMIN(cm->width, cm->height) >= 480) {
+    sf->ml_partition_search_early_termination = 1;
+  }
 
   if (speed >= 1) {
     sf->ml_partition_search_early_termination = 0;