shithub: libvpx

Download patch

ref: 7bea8c59f934aa8bfab43935b2355b88adaa12f0
parent: 9349a28e80b568495d56d6364d3c7ed1abb51188
author: Jingning Han <[email protected]>
date: Wed Oct 29 12:37:16 EDT 2014

Rework pred pixel buffer system in non-RD coding mode

This commit makes the inter prediction buffer system to support
hybrid partition search. It reduces the runtime of speed -5 by
about 3%. No compression performance change.

vidyo1 720p 1000 kbps
11831 ms -> 11497 ms

nik 720p 1000 kbps
10919 ms -> 10645 ms

Change-Id: I5b2da747c6395c253cd074d3907f5402e1840c36

--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2678,6 +2678,22 @@
   }
 }
 
+// Reset the prediction pixel ready flag recursively.
+static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
+  pc_tree->none.pred_pixel_ready = 0;
+  pc_tree->horizontal[0].pred_pixel_ready = 0;
+  pc_tree->horizontal[1].pred_pixel_ready = 0;
+  pc_tree->vertical[0].pred_pixel_ready = 0;
+  pc_tree->vertical[1].pred_pixel_ready = 0;
+
+  if (bsize > BLOCK_8X8) {
+    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+    int i;
+    for (i = 0; i < 4; ++i)
+      pred_pixel_ready_reset(pc_tree->split[i], subsize);
+  }
+}
+
 static void nonrd_pick_partition(VP9_COMP *cpi,
                                  TileDataEnc *tile_data,
                                  TOKENEXTRA **tp, int mi_row,
@@ -2736,6 +2752,10 @@
     partition_vert_allowed &= force_vert_split;
   }
 
+  ctx->pred_pixel_ready = !(partition_vert_allowed ||
+                            partition_horz_allowed ||
+                            do_split);
+
   // PARTITION_NONE
   if (partition_none_allowed) {
     nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col,
@@ -2743,7 +2763,6 @@
     ctx->mic.mbmi = xd->mi[0].src_mi->mbmi;
     ctx->skip_txfm[0] = x->skip_txfm[0];
     ctx->skip = x->skip;
-    ctx->pred_pixel_ready = 0;
 
     if (this_rdc.rate != INT_MAX) {
       int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2819,7 +2838,7 @@
     subsize = get_subsize(bsize, PARTITION_HORZ);
     if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
-
+    pc_tree->horizontal[0].pred_pixel_ready = 1;
     nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
                         &pc_tree->horizontal[0]);
 
@@ -2826,10 +2845,10 @@
     pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
     pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->horizontal[0].skip = x->skip;
-    pc_tree->horizontal[0].pred_pixel_ready = 0;
 
     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
       load_pred_mv(x, ctx);
+      pc_tree->horizontal[1].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col,
                           &this_rdc, subsize,
                           &pc_tree->horizontal[1]);
@@ -2837,7 +2856,6 @@
       pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
       pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[1].skip = x->skip;
-      pc_tree->horizontal[1].pred_pixel_ready = 0;
 
       if (this_rdc.rate == INT_MAX) {
         vp9_rd_cost_reset(&sum_rdc);
@@ -2854,6 +2872,8 @@
     if (sum_rdc.rdcost < best_rdc.rdcost) {
       best_rdc = sum_rdc;
       pc_tree->partitioning = PARTITION_HORZ;
+    } else {
+      pred_pixel_ready_reset(pc_tree, bsize);
     }
   }
 
@@ -2860,19 +2880,18 @@
   // PARTITION_VERT
   if (partition_vert_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_VERT);
-
     if (sf->adaptive_motion_search)
       load_pred_mv(x, ctx);
-
+    pc_tree->vertical[0].pred_pixel_ready = 1;
     nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
                         &pc_tree->vertical[0]);
     pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
     pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->vertical[0].skip = x->skip;
-    pc_tree->vertical[0].pred_pixel_ready = 0;
 
     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
       load_pred_mv(x, ctx);
+      pc_tree->vertical[1].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms,
                           &this_rdc, subsize,
                           &pc_tree->vertical[1]);
@@ -2879,7 +2898,6 @@
       pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
       pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[1].skip = x->skip;
-      pc_tree->vertical[1].pred_pixel_ready = 0;
 
       if (this_rdc.rate == INT_MAX) {
         vp9_rd_cost_reset(&sum_rdc);
@@ -2896,6 +2914,8 @@
     if (sum_rdc.rdcost < best_rdc.rdcost) {
       best_rdc = sum_rdc;
       pc_tree->partitioning = PARTITION_VERT;
+    } else {
+      pred_pixel_ready_reset(pc_tree, bsize);
     }
   }
 
@@ -2977,27 +2997,27 @@
   } else {
     switch (partition) {
       case PARTITION_NONE:
+        pc_tree->none.pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->none);
         pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
         pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
         pc_tree->none.skip = x->skip;
-        pc_tree->none.pred_pixel_ready = 1;
         break;
       case PARTITION_VERT:
+        pc_tree->vertical[0].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->vertical[0]);
         pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
         pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->vertical[0].skip = x->skip;
-        pc_tree->vertical[0].pred_pixel_ready = 1;
         if (mi_col + hbs < cm->mi_cols) {
+          pc_tree->vertical[1].pred_pixel_ready = 1;
           nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
                               &this_rdc, subsize, &pc_tree->vertical[1]);
           pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
           pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
           pc_tree->vertical[1].skip = x->skip;
-          pc_tree->vertical[1].pred_pixel_ready = 1;
           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
             rd_cost->rate += this_rdc.rate;
@@ -3006,19 +3026,19 @@
         }
         break;
       case PARTITION_HORZ:
+        pc_tree->horizontal[0].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->horizontal[0]);
         pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
         pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->horizontal[0].skip = x->skip;
-        pc_tree->horizontal[0].pred_pixel_ready = 1;
         if (mi_row + hbs < cm->mi_rows) {
+          pc_tree->horizontal[1].pred_pixel_ready = 1;
           nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
                               &this_rdc, subsize, &pc_tree->horizontal[0]);
           pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
           pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
           pc_tree->horizontal[1].skip = x->skip;
-          pc_tree->horizontal[1].pred_pixel_ready = 1;
           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
             rd_cost->rate += this_rdc.rate;
@@ -3096,6 +3116,7 @@
 
   switch (partition) {
     case PARTITION_NONE:
+      pc_tree->none.pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                           subsize, &pc_tree->none);
       pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
@@ -3103,6 +3124,7 @@
       pc_tree->none.skip = x->skip;
       break;
     case PARTITION_VERT:
+      pc_tree->vertical[0].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                           subsize, &pc_tree->vertical[0]);
       pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
@@ -3109,6 +3131,7 @@
       pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[0].skip = x->skip;
       if (mi_col + hbs < cm->mi_cols) {
+        pc_tree->vertical[1].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
                             &this_rdc, subsize, &pc_tree->vertical[1]);
         pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
@@ -3122,6 +3145,7 @@
       }
       break;
     case PARTITION_HORZ:
+      pc_tree->horizontal[0].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
                           subsize, &pc_tree->horizontal[0]);
       pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
@@ -3128,6 +3152,7 @@
       pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[0].skip = x->skip;
       if (mi_row + hbs < cm->mi_rows) {
+        pc_tree->horizontal[1].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
                             &this_rdc, subsize, &pc_tree->horizontal[0]);
         pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -515,8 +515,9 @@
   PRED_BUFFER *best_pred = NULL;
   PRED_BUFFER *this_mode_pred = NULL;
   const int pixels_in_block = bh * bw;
+  int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
 
-  if (cpi->sf.reuse_inter_pred_sby) {
+  if (reuse_inter_pred) {
     int i;
     for (i = 0; i < 3; i++) {
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -639,7 +640,7 @@
       // Search for the best prediction filter type, when the resulting
       // motion vector is at sub-pixel accuracy level for luma component, i.e.,
       // the last three bits are all zeros.
-      if (cpi->sf.reuse_inter_pred_sby) {
+      if (reuse_inter_pred) {
         if (!this_mode_pred) {
           this_mode_pred = &tmp[3];
         } else {
@@ -677,7 +678,7 @@
             best_cost = cost;
             skip_txfm = x->skip_txfm[0];
 
-            if (cpi->sf.reuse_inter_pred_sby) {
+            if (reuse_inter_pred) {
               if (this_mode_pred != current_pred) {
                 free_pred_buffer(this_mode_pred);
                 this_mode_pred = current_pred;
@@ -692,7 +693,7 @@
           }
         }
 
-        if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
+        if (reuse_inter_pred && this_mode_pred != current_pred)
           free_pred_buffer(current_pred);
 
         mbmi->interp_filter = best_filter;
@@ -744,13 +745,12 @@
         best_ref_frame = ref_frame;
         skip_txfm = x->skip_txfm[0];
 
-        if (cpi->sf.reuse_inter_pred_sby) {
+        if (reuse_inter_pred) {
           free_pred_buffer(best_pred);
-
           best_pred = this_mode_pred;
         }
       } else {
-        if (cpi->sf.reuse_inter_pred_sby)
+        if (reuse_inter_pred)
           free_pred_buffer(this_mode_pred);
       }
 
@@ -764,7 +764,7 @@
 
   // If best prediction is not in dst buf, then copy the prediction block from
   // temp buf to dst buf.
-  if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
+  if (best_pred != NULL && reuse_inter_pred &&
       best_pred->data != orig_dst.buf) {
     pd->dst = orig_dst;
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -799,7 +799,7 @@
         MIN(max_txsize_lookup[bsize],
             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
 
-    if (cpi->sf.reuse_inter_pred_sby) {
+    if (reuse_inter_pred) {
       pd->dst.buf = tmp[0].data;
       pd->dst.stride = bw;
     }
@@ -831,7 +831,7 @@
         x->skip_txfm[0] = skip_txfm;
       }
     }
-    if (cpi->sf.reuse_inter_pred_sby)
+    if (reuse_inter_pred)
       pd->dst = orig_dst;
   }