shithub: libvpx

Download patch

ref: c4826c59415f2fe8b8d9ab1a66f50f7e30d64f1d
parent: 5d8642354e4e3715a314fd5f6bf4a0d495b4503b
author: Jingning Han <[email protected]>
date: Thu Sep 12 06:06:47 EDT 2013

Adaptive motion search control

This commit enables adaptive constraint on motion search range for
smaller partitions, given the motion vectors of collocated larger
partition as a candidate initial search point.

It makes speed 0 runtime of bus at CIF and 2000 kbps goes from
167s down to 162s (3% speed-up), at 0.01dB performance gains. In
the settings of speed 1, this makes the runtime goes from 33687 ms
to 32142 ms (4.5% speed-up), at 0.03dB performance gains.

Compression performance wise, it gains at speed 1:
derf  0.118%
yt    0.237%
hd    0.203%
stdhd 0.438%

Change-Id: Ic8b34c67810d9504a9579bef2825d3fa54b69454

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -50,6 +50,10 @@
   int64_t tx_rd_diff[TX_MODES];
   int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
 
+  // motion vector cache for adaptive motion search control in partition
+  // search loop
+  int_mv pred_mv[MAX_REF_FRAMES];
+
   // Bit flag for each mode whether it has high error in comparison to others.
   unsigned int modes_with_high_error;
 
@@ -149,7 +153,7 @@
 
   // Used to store sub partition's choices.
   int fast_ms;
-  int_mv pred_mv;
+  int_mv pred_mv[MAX_REF_FRAMES];
   int subblock_ref;
 
   // TODO(jingning): Need to refactor the structure arrays that buffers the
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1316,7 +1316,6 @@
   save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
 
   x->fast_ms = 0;
-  x->pred_mv.as_int = 0;
   x->subblock_ref = 0;
 
   if (cpi->sf.adjust_partitioning_from_last_frame) {
@@ -1710,10 +1709,6 @@
         // Set fast motion search level.
         x->fast_ms = 1;
 
-        // Calculate prediction MV.
-        x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
-        x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
-
         if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
             d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
           // Set fast motion search level.
@@ -1729,6 +1724,14 @@
   }
 }
 
+static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+  vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
+}
+
+static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+  vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
+}
+
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
@@ -1837,6 +1840,10 @@
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
 
+  // store estimated motion vector
+  if (cpi->sf.adaptive_motion_search)
+    store_pred_mv(x, get_block_context(x, bsize));
+
   // PARTITION_SPLIT
   sum_rd = 0;
   // TODO(jingning): use the motion vectors given by the above search as
@@ -1851,7 +1858,8 @@
         continue;
 
       *get_sb_index(xd, subsize) = i;
-
+      if (cpi->sf.adaptive_motion_search)
+        load_pred_mv(x, get_block_context(x, bsize));
       rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
                         &this_rate, &this_dist, i != 3, best_rd - sum_rd);
 
@@ -1885,7 +1893,6 @@
   }
 
   x->fast_ms = 0;
-  x->pred_mv.as_int = 0;
   x->subblock_ref = 0;
 
   if (partition_split_done &&
@@ -1897,6 +1904,8 @@
   if (partition_horz_allowed && do_rect) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
     *get_sb_index(xd, subsize) = 0;
+    if (cpi->sf.adaptive_motion_search)
+      load_pred_mv(x, get_block_context(x, bsize));
     pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                   get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -1906,6 +1915,8 @@
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
 
       *get_sb_index(xd, subsize) = 1;
+      if (cpi->sf.adaptive_motion_search)
+        load_pred_mv(x, get_block_context(x, bsize));
       pick_sb_modes(cpi, mi_row + ms, mi_col, &this_rate,
                     &this_dist, subsize, get_block_context(x, subsize),
                     best_rd - sum_rd);
@@ -1937,6 +1948,8 @@
     subsize = get_subsize(bsize, PARTITION_VERT);
 
     *get_sb_index(xd, subsize) = 0;
+    if (cpi->sf.adaptive_motion_search)
+      load_pred_mv(x, get_block_context(x, bsize));
     pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                   get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -1945,6 +1958,8 @@
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
 
       *get_sb_index(xd, subsize) = 1;
+      if (cpi->sf.adaptive_motion_search)
+        load_pred_mv(x, get_block_context(x, bsize));
       pick_sb_modes(cpi, mi_row, mi_col + ms, &this_rate,
                     &this_dist, subsize, get_block_context(x, subsize),
                     best_rd - sum_rd);
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -715,6 +715,7 @@
   sf->use_lastframe_partitioning = 0;
   sf->tx_size_search_method = USE_FULL_RD;
   sf->use_lp32x32fdct = 0;
+  sf->adaptive_motion_search = 0;
   sf->use_avoid_tested_higherror = 0;
   sf->reference_masking = 0;
   sf->skip_lots_of_modes = 0;
@@ -743,7 +744,6 @@
   sf->using_small_partition_info = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
 
-
 #if CONFIG_MULTIPLE_ARF
   // Switch segmentation off.
   sf->static_segmentation = 0;
@@ -787,6 +787,7 @@
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
         sf->use_lp32x32fdct = 1;
+        sf->adaptive_motion_search = 1;
         sf->auto_mv_step_size = 1;
 
         sf->auto_min_max_partition_size = 1;
@@ -826,6 +827,7 @@
         sf->use_rd_breakout = 1;
         sf->skip_encode_sb = 1;
         sf->use_lp32x32fdct = 1;
+        sf->adaptive_motion_search = 1;
         sf->using_small_partition_info = 0;
         sf->disable_splitmv =
             (MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -285,6 +285,8 @@
   int last_partitioning_redo_frequency;
   int disable_splitmv;
   int using_small_partition_info;
+  // TODO(jingning): combine the related motion search speed features
+  int adaptive_motion_search;
 
   // Implements various heuristics to skip searching modes
   // The heuristics selected are based on  flags
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1771,6 +1771,7 @@
             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
           else
             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+
           if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
             // Take wtd average of the step_params based on the last frame's
             // max mv magnitude and the best ref mvs of the current block for
@@ -1781,11 +1782,16 @@
             step_param = cpi->mv_step_param;
           }
 
-          further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-
           mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
           mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
 
+          if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
+            mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
+            mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
+            step_param = MAX(step_param, 8);
+          }
+
+          further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
           // adjust src pointer for this block
           mi_buf_shift(x, i);
           if (cpi->sf.search_method == HEX) {
@@ -1839,10 +1845,13 @@
                                          x->nmvjointcost, x->mvcost,
                                          &distortion, &sse);
 
-            // safe motion search result for use in compound prediction
+            // save motion search result for use in compound prediction
             seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
           }
 
+          if (cpi->sf.adaptive_motion_search)
+            x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
+
           // restore src pointers
           mi_buf_restore(x, orig_src, orig_pre);
         }
@@ -2085,10 +2094,14 @@
   uint8_t *src_y_ptr = x->plane[0].src.buf;
   uint8_t *ref_y_ptr;
   int row_offset, col_offset;
+  int num_mv_refs = MAX_MV_REF_CANDIDATES +
+                    (cpi->sf.adaptive_motion_search &&
+                     cpi->common.show_frame && block_size < BLOCK_64X64);
 
   // Get the sad for each candidate reference mv
-  for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
-    this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
+  for (i = 0; i < num_mv_refs; i++) {
+    this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
+        mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
 
     max_mv = MAX(max_mv,
                  MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
@@ -2349,7 +2362,7 @@
       step_param = 8;
 
     // Get prediction MV.
-    mvp_full.as_int = x->pred_mv.as_int;
+    mvp_full.as_int = x->pred_mv[ref].as_int;
 
     // Adjust MV sign if needed.
     if (cm->ref_frame_sign_bias[ref]) {
@@ -2368,11 +2381,19 @@
     } else {
       step_param = cpi->mv_step_param;
     }
-    // mvp_full.as_int = ref_mv[0].as_int;
-    mvp_full.as_int =
-        mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
   }
 
+  if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
+      cpi->common.show_frame) {
+    int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
+                                                       b_width_log2(bsize)));
+    step_param = MAX(step_param, boffset);
+  }
+
+  mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
+      mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
+      x->pred_mv[ref].as_int;
+
   mvp_full.as_mv.col >>= 3;
   mvp_full.as_mv.row >>= 3;
 
@@ -2422,6 +2443,10 @@
   *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
                              x->nmvjointcost, x->mvcost,
                              96);
+
+  if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
+    x->pred_mv[ref].as_int = tmp_mv->as_int;
+
   if (scaled_ref_frame) {
     int i;
     for (i = 0; i < MAX_MB_PLANE; i++)