shithub: libvpx

Download patch

ref: bca4564683a35c795973ce2a318cc4f1391f9bbb
parent: 30ef50b522c7b568921ca3e37bcc71ca7cd52972
author: Yunqing Wang <[email protected]>
date: Wed Apr 19 13:00:08 EDT 2017

Make allow_exhaustive_searches feature no longer adaptive

A previous patch turned on allow_exhaustive_searches feature only for
FC_GRAPHICS_ANIMATION content. This patch further modified the feature
by removing the exhaustive search limit, and made it no longer adaptive.
As a result, the 2 counts that recorded the number of motion searches
were removed, which helped achieve the determinism in the row based
multi-threading encoding. Tests showed that this patch didn't cause
the encoder much slower.

Used exhaustive_searches_thresh for this speed feature, and removed
allow_exhaustive_searches. Also, refactored the speed feature code
to follow the general speed feature setting style.

Change-Id: Ib96b182c4c8dfff4c1ab91d2497cc42bb9e5a4aa

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -93,11 +93,6 @@
   int rddiv;
   int rdmult;
   int mb_energy;
-  int *m_search_count_ptr;
-  int *ex_search_count_ptr;
-#if CONFIG_MULTITHREAD
-  pthread_mutex_t *search_count_mutex;
-#endif
 
   // These are set to their default values at the beginning, and then adjusted
   // further in the encoding process.
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4341,7 +4341,6 @@
           }
         }
 #if CONFIG_MULTITHREAD
-        tile_data->search_count_mutex = NULL;
         tile_data->enc_row_mt_mutex = NULL;
         tile_data->row_base_thresh_freq_fact = NULL;
 #endif
@@ -4361,10 +4360,6 @@
       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
       tplist = cpi->tplist[tile_row][tile_col];
       tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
-
-      // Set up pointers to per thread motion search counters.
-      this_tile->m_search_count = 0;   // Count of motion search hits.
-      this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.
     }
   }
 }
@@ -4408,13 +4403,6 @@
   const int mi_row_start = tile_info->mi_row_start;
   const int mi_row_end = tile_info->mi_row_end;
   int mi_row;
-
-  // Set up pointers to per thread motion search counters.
-  td->mb.m_search_count_ptr = &this_tile->m_search_count;
-  td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
-#if CONFIG_MULTITHREAD
-  td->mb.search_count_mutex = this_tile->search_count_mutex;
-#endif
 
   for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
     vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -281,8 +281,6 @@
   TileInfo tile_info;
   int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
   int mode_map[BLOCK_SIZES][MAX_MODES];
-  int m_search_count;
-  int ex_search_count;
   FIRSTPASS_DATA fp_data;
   VP9RowMTSync row_mt_sync;
 
@@ -289,7 +287,6 @@
   // Used for adaptive_rd_thresh with row multithreading
   int *row_base_thresh_freq_fact;
 #if CONFIG_MULTITHREAD
-  pthread_mutex_t *search_count_mutex;
   pthread_mutex_t *enc_row_mt_mutex;
 #endif
 } TileDataEnc;
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -552,7 +552,6 @@
   const VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
   int tile_row, tile_col;
-  TileDataEnc *this_tile;
   int end_of_frame;
   int thread_id = thread_data->thread_id;
   int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
@@ -573,13 +572,6 @@
       tile_col = proc_job->tile_col_id;
       tile_row = proc_job->tile_row_id;
       mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
-
-      this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
-      thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
-      thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
-#if CONFIG_MULTITHREAD
-      thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
-#endif
 
       vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
     }
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1998,18 +1998,6 @@
   int range = sf->mesh_patterns[0].range;
   int baseline_interval_divisor;
 
-#if CONFIG_MULTITHREAD
-  if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
-#endif
-
-  // Keep track of number of exhaustive calls (this frame in this thread).
-  ++(*x->ex_search_count_ptr);
-
-#if CONFIG_MULTITHREAD
-  if (NULL != x->search_count_mutex)
-    pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
   // Trap illegal values for interval and range for this function.
   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
       (interval > range))
@@ -2367,32 +2355,6 @@
   return best_sad;
 }
 
-#define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  int is_exhaustive_allowed;
-  int max_ex;
-
-#if CONFIG_MULTITHREAD
-  if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
-#endif
-
-  max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
-                  (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
-
-  is_exhaustive_allowed = sf->allow_exhaustive_searches &&
-                          (sf->exhaustive_searches_thresh < INT_MAX) &&
-                          (*x->ex_search_count_ptr <= max_ex) &&
-                          !cpi->rc.is_src_frame_alt_ref;
-
-#if CONFIG_MULTITHREAD
-  if (NULL != x->search_count_mutex)
-    pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
-  return is_exhaustive_allowed;
-}
-
 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
                           MV *mvp_full, int step_param, int search_method,
                           int error_per_bit, int *cost_list, const MV *ref_mv,
@@ -2435,21 +2397,9 @@
                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,
                                cost_list, fn_ptr, ref_mv, tmp_mv);
 
-#if CONFIG_MULTITHREAD
-      if (NULL != x->search_count_mutex)
-        pthread_mutex_lock(x->search_count_mutex);
-#endif
-
-      // Keep track of number of searches (this frame in this thread).
-      ++(*x->m_search_count_ptr);
-
-#if CONFIG_MULTITHREAD
-      if (NULL != x->search_count_mutex)
-        pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
       // Should we allow a follow on exhaustive search?
-      if (is_exhaustive_allowed(cpi, x)) {
+      if ((sf->exhaustive_searches_thresh < INT_MAX) &&
+          !cpi->rc.is_src_frame_alt_ref) {
         int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
         exhuastive_thr >>=
             8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
--- a/vp9/encoder/vp9_multi_thread.c
+++ b/vp9/encoder/vp9_multi_thread.c
@@ -116,11 +116,6 @@
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
 
-      CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
-                      vpx_malloc(sizeof(*this_tile->search_count_mutex)));
-
-      pthread_mutex_init(this_tile->search_count_mutex, NULL);
-
       CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
                       vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
 
@@ -170,9 +165,6 @@
           this_tile->row_base_thresh_freq_fact = NULL;
         }
       }
-      pthread_mutex_destroy(this_tile->search_count_mutex);
-      vpx_free(this_tile->search_count_mutex);
-      this_tile->search_count_mutex = NULL;
       pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
       vpx_free(this_tile->enc_row_mt_mutex);
       this_tile->enc_row_mt_mutex = NULL;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -20,19 +20,14 @@
   { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
 };
 
-#define MAX_MESH_SPEED 5  // Max speed setting for mesh motion method
+// Define 3 mesh density levels to control the number of searches.
+#define MESH_DENSITY_LEVELS 3
 static MESH_PATTERN
-    good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
+    good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
-      { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
       { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
-      { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
     };
-static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
-  50, 25, 15, 5, 1, 1
-};
 
 // Intra only frames, golden frames (except alt ref overlays) and
 // alt ref frames tend to be coded at a higher than ambient quality
@@ -163,6 +158,7 @@
                                                          SPEED_FEATURES *sf,
                                                          int speed) {
   const int boosted = frame_is_boosted(cpi);
+  int i;
 
   sf->tx_size_search_breakout = 1;
   sf->adaptive_rd_thresh = 1;
@@ -171,6 +167,19 @@
   sf->use_square_partition_only = !frame_is_boosted(cpi);
   sf->use_square_only_threshold = BLOCK_16X16;
 
+  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+    sf->exhaustive_searches_thresh = (1 << 22);
+    for (i = 0; i < MAX_MESH_STEP; ++i) {
+      int mesh_density_level = 0;
+      sf->mesh_patterns[i].range =
+          good_quality_mesh_patterns[mesh_density_level][i].range;
+      sf->mesh_patterns[i].interval =
+          good_quality_mesh_patterns[mesh_density_level][i].interval;
+    }
+  } else {
+    sf->exhaustive_searches_thresh = INT_MAX;
+  }
+
   if (speed >= 1) {
     if (cpi->oxcf.pass == 2) {
       TWO_PASS *const twopass = &cpi->twopass;
@@ -208,6 +217,10 @@
 
     sf->recode_tolerance_low = 15;
     sf->recode_tolerance_high = 30;
+
+    sf->exhaustive_searches_thresh =
+        (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
+                                                                : INT_MAX;
   }
 
   if (speed >= 2) {
@@ -229,6 +242,16 @@
     sf->allow_partition_search_skip = 1;
     sf->recode_tolerance_low = 15;
     sf->recode_tolerance_high = 45;
+
+    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+      for (i = 0; i < MAX_MESH_STEP; ++i) {
+        int mesh_density_level = 1;
+        sf->mesh_patterns[i].range =
+            good_quality_mesh_patterns[mesh_density_level][i].range;
+        sf->mesh_patterns[i].interval =
+            good_quality_mesh_patterns[mesh_density_level][i].interval;
+      }
+    }
   }
 
   if (speed >= 3) {
@@ -247,6 +270,16 @@
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
     sf->adaptive_interp_filter_search = 1;
+
+    if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+      for (i = 0; i < MAX_MESH_STEP; ++i) {
+        int mesh_density_level = 2;
+        sf->mesh_patterns[i].range =
+            good_quality_mesh_patterns[mesh_density_level][i].range;
+        sf->mesh_patterns[i].interval =
+            good_quality_mesh_patterns[mesh_density_level][i].interval;
+      }
+    }
   }
 
   if (speed >= 4) {
@@ -325,7 +358,6 @@
   sf->adaptive_rd_thresh = 1;
   sf->adaptive_rd_thresh_row_mt = 0;
   sf->use_fast_coef_costing = 1;
-  sf->allow_exhaustive_searches = 0;
   sf->exhaustive_searches_thresh = INT_MAX;
   sf->allow_acl = 0;
   sf->copy_partition_flag = 0;
@@ -609,7 +641,6 @@
   // and multiple threads match
   if (cpi->oxcf.row_mt_bit_exact) {
     sf->adaptive_rd_thresh = 0;
-    sf->allow_exhaustive_searches = 0;
     sf->adaptive_pred_interp_filter = 0;
   }
 
@@ -711,6 +742,16 @@
   sf->adaptive_rd_thresh = 1;
   sf->tx_size_search_breakout = 1;
 
+  sf->exhaustive_searches_thresh =
+      (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
+                                                              : INT_MAX;
+  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+    for (i = 0; i < MAX_MESH_STEP; ++i) {
+      sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
+      sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
+    }
+  }
+
   if (oxcf->mode == REALTIME)
     set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
                                                oxcf->content);
@@ -720,32 +761,6 @@
   cpi->full_search_sad = vp9_full_search_sad;
   cpi->diamond_search_sad = vp9_diamond_search_sad;
 
-  if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
-    sf->allow_exhaustive_searches = 1;
-    if (oxcf->mode == BEST) {
-      sf->exhaustive_searches_thresh = (1 << 20);
-      sf->max_exaustive_pct = 100;
-      for (i = 0; i < MAX_MESH_STEP; ++i) {
-        sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
-        sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
-      }
-    } else {
-      int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
-      sf->exhaustive_searches_thresh = (1 << 22);
-      sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
-      if (speed > 0)
-        sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
-
-      for (i = 0; i < MAX_MESH_STEP; ++i) {
-        sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
-        sf->mesh_patterns[i].interval =
-            good_quality_mesh_patterns[speed][i].interval;
-      }
-    }
-  } else {
-    sf->allow_exhaustive_searches = 0;
-  }
-
   // Slow quant, dct and trellis not worthwhile for first pass
   // so make sure they are always turned off.
   if (oxcf->pass == 1) sf->optimize_coefficients = 0;
@@ -783,7 +798,6 @@
   // and multiple threads match
   if (cpi->oxcf.row_mt_bit_exact) {
     sf->adaptive_rd_thresh = 0;
-    sf->allow_exhaustive_searches = 0;
     sf->adaptive_pred_interp_filter = 0;
   }
 
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -325,14 +325,8 @@
   // point for this motion search and limits the search range around it.
   int adaptive_motion_search;
 
-  // Flag for allowing some use of exhaustive searches;
-  int allow_exhaustive_searches;
-
   // Threshold for allowing exhaistive motion search.
   int exhaustive_searches_thresh;
-
-  // Maximum number of exhaustive searches for a frame.
-  int max_exaustive_pct;
 
   // Pattern to be used for any exhaustive mesh searches.
   MESH_PATTERN mesh_patterns[MAX_MESH_STEP];