shithub: libvpx

Download patch

ref: 453f18040f62f4f1699c0e7c5e1ee288e571d9d2
parent: 07c48ccfe090625e409be50ff82b6d8e6fc93578
author: Vignesh Venkatasubramanian <[email protected]>
date: Mon Feb 13 06:36:02 EST 2017

vp9,realtime: Enable row multithreading for non-rd

Enable row level multithreading for realtime encodes where non-rd
path is used (speed >= 5).

Change-Id: I5439cb49a02171166d8e1de06c7d5e6f8e819a41

--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -274,16 +274,15 @@
         encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
         encoder->Control(VP8E_SET_ARNR_TYPE, 3);
         encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
-
-        encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
-        // While row_mt = 1/0(with/without row-based multi-threading), several
-        // speed features that would adaptively adjust encoding parameters have
-        // to be disabled to guarantee the bit match of the resulted bitstream.
-        encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
       } else {
         encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
         encoder->Control(VP9E_SET_AQ_MODE, 3);
       }
+      encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
+      // While row_mt = 1, several speed features that would adaptively adjust
+      // encoding parameters have to be disabled to guarantee the bit exactness
+      // of the resulting bitstream.
+      encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
       encoder_initialized_ = true;
     }
   }
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3907,6 +3907,10 @@
   const int mi_col_start = tile_info->mi_col_start;
   const int mi_col_end = tile_info->mi_col_end;
   int mi_col;
+  const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
+  const int num_sb_cols =
+      get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
+  int sb_col_in_tile;
 
   // Initialize the left context for the new SB row
   memset(&xd->left_context, 0, sizeof(xd->left_context));
@@ -3913,7 +3917,8 @@
   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
 
   // Code each SB in the row
-  for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
+  for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
+       mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
     const struct segmentation *const seg = &cm->seg;
     RD_COST dummy_rdc;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -3921,6 +3926,10 @@
     PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
     BLOCK_SIZE bsize = BLOCK_64X64;
     int seg_skip = 0;
+
+    (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
+                                   sb_col_in_tile - 1);
+
     x->source_variance = UINT_MAX;
     vp9_zero(x->pred_mv);
     vp9_rd_cost_init(&dummy_rdc);
@@ -3996,6 +4005,9 @@
         break;
       default: assert(0); break;
     }
+
+    (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
+                                    sb_col_in_tile, num_sb_cols);
   }
 }
 // end RTC play code
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5235,4 +5235,11 @@
       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
       !cpi->use_svc)
     cpi->row_mt = 1;
+
+  // In realtime mode, enable row based multi-threading for all the speed levels
+  // where non-rd path is used.
+  if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt &&
+      !cpi->use_svc) {
+    cpi->row_mt = 1;
+  }
 }
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -625,6 +625,23 @@
       memcpy(thread_data->td->counts, &cpi->common.counts,
              sizeof(cpi->common.counts));
     }
+
+    // Handle use_nonrd_pick_mode case.
+    if (cpi->sf.use_nonrd_pick_mode) {
+      MACROBLOCK *const x = &thread_data->td->mb;
+      MACROBLOCKD *const xd = &x->e_mbd;
+      struct macroblock_plane *const p = x->plane;
+      struct macroblockd_plane *const pd = xd->plane;
+      PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
+      int j;
+
+      for (j = 0; j < MAX_MB_PLANE; ++j) {
+        p[j].coeff = ctx->coeff_pbuf[j][0];
+        p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
+        pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
+        p[j].eobs = ctx->eobs_pbuf[j][0];
+      }
+    }
   }
 
   launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1666,11 +1666,14 @@
         cpi->rc.frames_since_golden > 4)
       mode_rd_thresh = mode_rd_thresh << 3;
 
-    if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
+    if (rd_less_than_thresh(
+            best_rdc.rdcost, mode_rd_thresh,
 #if CONFIG_MULTITHREAD
-                            tile_data->enc_row_mt_mutex,
+            // Synchronization of this function is only necessary when
+            // adaptive_rd_thresh is > 0.
+            cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
 #endif
-                            &rd_thresh_freq_fact[mode_index]))
+            &rd_thresh_freq_fact[mode_index]))
       continue;
 
     if (this_mode == NEWMV) {
@@ -2030,11 +2033,14 @@
       if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
         continue;
 
-      if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
+      if (rd_less_than_thresh(
+              best_rdc.rdcost, mode_rd_thresh,
 #if CONFIG_MULTITHREAD
-                              tile_data->enc_row_mt_mutex,
+              // Synchronization of this function is only necessary when
+              // adaptive_rd_thresh is > 0.
+              cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
 #endif
-                              &rd_thresh_freq_fact[mode_index]))
+              &rd_thresh_freq_fact[mode_index]))
         continue;
 
       mi->mode = this_mode;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -558,6 +558,12 @@
     sf->limit_newmv_early_exit = 0;
     sf->use_simple_block_yrd = 0;
   }
+  // Turn off adaptive_rd_thresh if row_mt is on for all the non-rd paths. This
+  // causes too many locks in realtime mode in certain platforms (Android ARM,
+  // Mac).
+  if (speed >= 5 && cpi->row_mt && cpi->num_workers > 1) {
+    sf->adaptive_rd_thresh = 0;
+  }
 }
 
 void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {