shithub: libvpx

--- a/vp9/encoder/vp9_bitstream.c

+++ b/vp9/encoder/vp9_bitstream.c

@@ -921,10 +921,11 @@

 static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {

   MACROBLOCKD *const xd = &data->xd;

+  const int tile_row = 0;

   vpx_start_encode(&data->bit_writer, data->dest);

   write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,

-              &data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude,

-              data->interp_filter_selected);

+              &data->bit_writer, tile_row, data->tile_idx,

+              &data->max_mv_magnitude, data->interp_filter_selected);

   vpx_stop_encode(&data->bit_writer);

   return 1;

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -11,6 +11,8 @@

 #ifndef VP9_ENCODER_VP9_BLOCK_H_

 #define VP9_ENCODER_VP9_BLOCK_H_

+#include "vpx_util/vpx_thread.h"

 #include "vp9/common/vp9_entropymv.h"

 #include "vp9/common/vp9_entropy.h"

@@ -88,6 +90,9 @@

   int mb_energy;

   int *m_search_count_ptr;

   int *ex_search_count_ptr;

+#if CONFIG_MULTITHREAD

+  pthread_mutex_t *search_count_mutex;

+#endif

   // These are set to their default values at the beginning, and then adjusted

   // further in the encoding process.

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -3095,6 +3095,10 @@

   const int mi_col_start = tile_info->mi_col_start;

   const int mi_col_end = tile_info->mi_col_end;

   int mi_col;

+  const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;

+  const int num_sb_cols =

+      get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);

+  int sb_col_in_tile;

   // Initialize the left context for the new SB row

   memset(&xd->left_context, 0, sizeof(xd->left_context));

@@ -3101,7 +3105,8 @@

   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));

   // Code each SB in the row

-  for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {

+  for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;

+       mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {

     const struct segmentation *const seg = &cm->seg;

     int dummy_rate;

     int64_t dummy_dist;

@@ -3112,6 +3117,9 @@

     const int idx_str = cm->mi_stride * mi_row + mi_col;

     MODE_INFO **mi = cm->mi_grid_visible + idx_str;

+    (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,

+                                   sb_col_in_tile - 1);

     if (sf->adaptive_pred_interp_filter) {

       for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;

@@ -3163,6 +3171,8 @@

       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,

                         &dummy_rdc, INT64_MAX, td->pc_root);

+    (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,

+                                    sb_col_in_tile, num_sb_cols);

@@ -4109,13 +4119,17 @@

             tile_data->mode_map[i][j] = j;

+#if CONFIG_MULTITHREAD

+        tile_data->search_count_mutex = NULL;

+        tile_data->enc_row_mt_mutex = NULL;

+#endif

   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {

     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {

-      TileInfo *tile_info =

-          &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;

+      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];

+      TileInfo *tile_info = &this_tile->tile_info;

       vp9_tile_init(tile_info, cm, tile_row, tile_col);

       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;

@@ -4125,6 +4139,10 @@

       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;

       tplist = cpi->tplist[tile_row][tile_col];

       tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);

+      // Set up pointers to per thread motion search counters.

+      this_tile->m_search_count = 0;   // Count of motion search hits.

+      this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.

@@ -4170,10 +4188,11 @@

   int mi_row;

   // Set up pointers to per thread motion search counters.

-  this_tile->m_search_count = 0;   // Count of motion search hits.

-  this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.

   td->mb.m_search_count_ptr = &this_tile->m_search_count;

   td->mb.ex_search_count_ptr = &this_tile->ex_search_count;

+#if CONFIG_MULTITHREAD

+  td->mb.search_count_mutex = this_tile->search_count_mutex;

+#endif

   for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)

     vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);

@@ -4289,11 +4308,20 @@

 #endif

-    // If allowed, encoding tiles in parallel with one thread handling one tile.

-    if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)

-      vp9_encode_tiles_mt(cpi);

-    else

-      encode_tiles(cpi);

+    if (!cpi->new_mt) {

+      cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;

+      cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;

+      // If allowed, encoding tiles in parallel with one thread handling one

+      // tile when row based multi-threading is disabled.

+      if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)

+        vp9_encode_tiles_mt(cpi);

+      else

+        encode_tiles(cpi);

+    } else {

+      cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;

+      cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;

+      vp9_encode_tiles_row_mt(cpi);

+    }

     vpx_usec_timer_mark(&emr_timer);

     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);

--- a/vp9/encoder/vp9_encodeframe.h

+++ b/vp9/encoder/vp9_encodeframe.h

@@ -39,6 +39,9 @@

 void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row,

                      int tile_col);

+void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td,

+                       int tile_row, int tile_col, int mi_row);

 void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);

 #ifdef __cplusplus

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -1575,17 +1575,7 @@

   highbd_set_var_fns(cpi);

 #endif

-  // Enable multi-threading for first pass.

-  cpi->new_mt = 0;

-  if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&

-       cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&

-      cpi->oxcf.new_mt && !cpi->use_svc)

-    cpi->new_mt = 1;

-  if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&

-      (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&

-      !cpi->use_svc)

-    cpi->new_mt = 1;

+  vp9_set_new_mt(cpi);

 #ifndef M_LOG2_E

@@ -5212,4 +5202,18 @@

   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {

     vp9_update_entropy(cpi, 0);

+}

+void vp9_set_new_mt(VP9_COMP *cpi) {

+  // Enable row based multi-threading for supported modes of encoding

+  cpi->new_mt = 0;

+  if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&

+       cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&

+      cpi->oxcf.new_mt && !cpi->use_svc)

+    cpi->new_mt = 1;

+  if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&

+      (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&

+      !cpi->use_svc)

+    cpi->new_mt = 1;

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -276,6 +276,10 @@

   int ex_search_count;

   FIRSTPASS_DATA fp_data;

   VP9RowMTSync row_mt_sync;

+#if CONFIG_MULTITHREAD

+  pthread_mutex_t *search_count_mutex;

+  pthread_mutex_t *enc_row_mt_mutex;

+#endif

 } TileDataEnc;

 typedef struct RowMTInfo {

@@ -896,6 +900,8 @@

 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);

 void vp9_new_framerate(VP9_COMP *cpi, double framerate);

+void vp9_set_new_mt(VP9_COMP *cpi);

 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))

--- a/vp9/encoder/vp9_ethread.c

+++ b/vp9/encoder/vp9_ethread.c

@@ -341,7 +341,7 @@

 #if CONFIG_MULTITHREAD

   const int nsync = row_mt_sync->sync_range;

   int cur;

-  // Only signal when there are enough filtered SB for next row to run.

+  // Only signal when there are enough encoded blocks for next row to run.

   int sig = 1;

   if (c < cols - 1) {

@@ -541,4 +541,101 @@

   launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,

                      multi_thread_ctxt, num_workers);

+}

+static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,

+                                  MultiThreadHandle *multi_thread_ctxt) {

+  VP9_COMP *const cpi = thread_data->cpi;

+  const VP9_COMMON *const cm = &cpi->common;

+  const int tile_cols = 1 << cm->log2_tile_cols;

+  int tile_row, tile_col;

+  TileDataEnc *this_tile;

+  int end_of_frame;

+  int thread_id = thread_data->thread_id;

+  int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];

+  JobNode *proc_job = NULL;

+  int mi_row;

+  end_of_frame = 0;

+  while (0 == end_of_frame) {

+    // Get the next job in the queue

+    proc_job =

+        (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);

+    if (NULL == proc_job) {

+      // Query for the status of other tiles

+      end_of_frame = vp9_get_tiles_proc_status(

+          multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,

+          tile_cols);

+    } else {

+      tile_col = proc_job->tile_col_id;

+      tile_row = proc_job->tile_row_id;

+      mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;

+      this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];

+      thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;

+      thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;

+#if CONFIG_MULTITHREAD

+      thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;

+#endif

+      vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);

+    }

+  }

+  return 0;

+}

+void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {

+  VP9_COMMON *const cm = &cpi->common;

+  const int tile_cols = 1 << cm->log2_tile_cols;

+  const int tile_rows = 1 << cm->log2_tile_rows;

+  MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;

+  int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);

+  int i;

+  if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||

+      multi_thread_ctxt->allocated_tile_rows < tile_rows ||

+      multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {

+    vp9_row_mt_mem_dealloc(cpi);

+    vp9_init_tile_data(cpi);

+    vp9_row_mt_mem_alloc(cpi);

+  } else {

+    vp9_init_tile_data(cpi);

+  }

+  create_enc_workers(cpi, num_workers);

+  vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);

+  vp9_prepare_job_queue(cpi, ENCODE_JOB);

+  vp9_multi_thread_tile_init(cpi);

+  for (i = 0; i < num_workers; i++) {

+    EncWorkerData *thread_data;

+    thread_data = &cpi->tile_thr_data[i];

+    // Before encoding a frame, copy the thread data from cpi.

+    if (thread_data->td != &cpi->td) {

+      thread_data->td->mb = cpi->td.mb;

+      thread_data->td->rd_counts = cpi->td.rd_counts;

+    }

+    if (thread_data->td->counts != &cpi->common.counts) {

+      memcpy(thread_data->td->counts, &cpi->common.counts,

+             sizeof(cpi->common.counts));

+    }

+  }

+  launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,

+                     multi_thread_ctxt, num_workers);

+  for (i = 0; i < num_workers; i++) {

+    VPxWorker *const worker = &cpi->workers[i];

+    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

+    // Accumulate counters.

+    if (i < cpi->num_workers - 1) {

+      vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);

+      accumulate_rd_opt(&cpi->td, thread_data->td);

+    }

+  }

--- a/vp9/encoder/vp9_ethread.h

+++ b/vp9/encoder/vp9_ethread.h

@@ -44,6 +44,8 @@

 void vp9_encode_tiles_mt(struct VP9_COMP *cpi);

+void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi);

 void vp9_encode_fp_row_mt(struct VP9_COMP *cpi);

 void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c);

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -1992,9 +1992,18 @@

   int range = sf->mesh_patterns[0].range;

   int baseline_interval_divisor;

+#if CONFIG_MULTITHREAD

+  if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);

+#endif

   // Keep track of number of exhaustive calls (this frame in this thread).

   ++(*x->ex_search_count_ptr);

+#if CONFIG_MULTITHREAD

+  if (NULL != x->search_count_mutex)

+    pthread_mutex_unlock(x->search_count_mutex);

+#endif

   // Trap illegal values for interval and range for this function.

   if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||

       (interval > range))

@@ -2355,13 +2364,27 @@

 #define MIN_EX_SEARCH_LIMIT 128

 static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {

   const SPEED_FEATURES *const sf = &cpi->sf;

-  const int max_ex =

-      VPXMAX(MIN_EX_SEARCH_LIMIT,

-             (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);

+  int is_exhaustive_allowed;

+  int max_ex;

-  return sf->allow_exhaustive_searches &&

-         (sf->exhaustive_searches_thresh < INT_MAX) &&

-         (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;

+#if CONFIG_MULTITHREAD

+  if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);

+#endif

+  max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,

+                  (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);

+  is_exhaustive_allowed = sf->allow_exhaustive_searches &&

+                          (sf->exhaustive_searches_thresh < INT_MAX) &&

+                          (*x->ex_search_count_ptr <= max_ex) &&

+                          !cpi->rc.is_src_frame_alt_ref;

+#if CONFIG_MULTITHREAD

+  if (NULL != x->search_count_mutex)

+    pthread_mutex_unlock(x->search_count_mutex);

+#endif

+  return is_exhaustive_allowed;

 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,

@@ -2406,8 +2429,18 @@

                                MAX_MVSEARCH_STEPS - 1 - step_param, 1,

                                cost_list, fn_ptr, ref_mv, tmp_mv);

+#if CONFIG_MULTITHREAD

+      if (NULL != x->search_count_mutex)

+        pthread_mutex_lock(x->search_count_mutex);

+#endif

       // Keep track of number of searches (this frame in this thread).

       ++(*x->m_search_count_ptr);

+#if CONFIG_MULTITHREAD

+      if (NULL != x->search_count_mutex)

+        pthread_mutex_unlock(x->search_count_mutex);

+#endif

       // Should we allow a follow on exhaustive search?

       if (is_exhaustive_allowed(cpi, x)) {

--- a/vp9/encoder/vp9_multi_thread.c

+++ b/vp9/encoder/vp9_multi_thread.c

@@ -100,11 +100,32 @@

     multi_thread_ctxt->num_tile_vert_sbs[tile_row] =

         get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);

+#if CONFIG_MULTITHREAD

+  for (tile_row = 0; tile_row < tile_rows; tile_row++) {

+    for (tile_col = 0; tile_col < tile_cols; tile_col++) {

+      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];

+      CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,

+                      vpx_malloc(sizeof(*this_tile->search_count_mutex)));

+      pthread_mutex_init(this_tile->search_count_mutex, NULL);

+      CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,

+                      vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));

+      pthread_mutex_init(this_tile->enc_row_mt_mutex, NULL);

+    }

+  }

+#endif

 void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {

   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;

   int tile_col;

+#if CONFIG_MULTITHREAD

+  int tile_row;

+#endif

   // Deallocate memory for job queue

   if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);

@@ -124,6 +145,25 @@

     TileDataEnc *this_tile = &cpi->tile_data[tile_col];

     vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);

+#if CONFIG_MULTITHREAD

+  for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;

+       tile_row++) {

+    for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;

+         tile_col++) {

+      TileDataEnc *this_tile =

+          &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +

+                          tile_col];

+      pthread_mutex_destroy(this_tile->search_count_mutex);

+      vpx_free(this_tile->search_count_mutex);

+      this_tile->search_count_mutex = NULL;

+      pthread_mutex_destroy(this_tile->enc_row_mt_mutex);

+      vpx_free(this_tile->enc_row_mt_mutex);

+      this_tile->enc_row_mt_mutex = NULL;

+    }

+  }

+#endif

 void vp9_multi_thread_tile_init(VP9_COMP *cpi) {

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -1657,7 +1657,10 @@

       mode_rd_thresh = mode_rd_thresh << 3;

     if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,

-                            rd_thresh_freq_fact[mode_index]))

+#if CONFIG_MULTITHREAD

+                            tile_data->enc_row_mt_mutex,

+#endif

+                            &rd_thresh_freq_fact[mode_index]))

       continue;

     if (this_mode == NEWMV) {

@@ -2018,7 +2021,10 @@

         continue;

       if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,

-                              rd_thresh_freq_fact[mode_index]))

+#if CONFIG_MULTITHREAD

+                              tile_data->enc_row_mt_mutex,

+#endif

+                              &rd_thresh_freq_fact[mode_index]))

         continue;

       mi->mode = this_mode;

--- a/vp9/encoder/vp9_rd.c

+++ b/vp9/encoder/vp9_rd.c

@@ -610,7 +610,15 @@

 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,

-                               int bsize, int best_mode_index) {

+                               int bsize,

+#if CONFIG_MULTITHREAD

+                               pthread_mutex_t *enc_row_mt_mutex,

+#endif

+                               int best_mode_index) {

+#if CONFIG_MULTITHREAD

+  if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);

+#endif

   if (rd_thresh > 0) {

     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;

     int mode;

@@ -628,6 +636,10 @@

+#if CONFIG_MULTITHREAD

+  if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);

+#endif

 int vp9_get_intra_cost_penalty(int qindex, int qdelta,

--- a/vp9/encoder/vp9_rd.h

+++ b/vp9/encoder/vp9_rd.h

@@ -164,11 +164,32 @@

 void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);

 void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,

+#if CONFIG_MULTITHREAD

+                               pthread_mutex_t *enc_row_mt_mutex,

+#endif

                                int best_mode_index);

 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,

-                                      int thresh_fact) {

-  return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;

+#if CONFIG_MULTITHREAD

+                                      pthread_mutex_t *enc_row_mt_mutex,

+#endif

+                                      const int *const thresh_fact) {

+  int is_rd_less_than_thresh;

+#if CONFIG_MULTITHREAD

+  // Synchronize to ensure data coherency as thresh_freq_fact is maintained at

+  // tile level and not thread-safe with row based multi-threading

+  if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);

+#endif

+  is_rd_less_than_thresh =

+      best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX;

+#if CONFIG_MULTITHREAD

+  if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);

+#endif

+  return is_rd_less_than_thresh;

 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -3043,7 +3043,10 @@

   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];

   const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];

   int64_t mode_threshold[MAX_MODES];

-  int *mode_map = tile_data->mode_map[bsize];

+  int *tile_mode_map = tile_data->mode_map[bsize];

+  int mode_map[MAX_MODES];  // Maintain mode_map information locally to avoid

+                            // lock mechanism involved with reads from

+                            // tile_mode_map

   const int mode_search_skip_flags = sf->mode_search_skip_flags;

   int64_t mask_filter = 0;

   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];

@@ -3155,10 +3158,19 @@

       ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);

   for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;

+#if CONFIG_MULTITHREAD

+  if (NULL != tile_data->enc_row_mt_mutex)

+    pthread_mutex_lock(tile_data->enc_row_mt_mutex);

+#endif

   for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)

     mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;

   midx = sf->schedule_mode_search ? mode_skip_start : 0;

+  memcpy(mode_map, tile_mode_map, sizeof(mode_map));

   while (midx > 4) {

     uint8_t end_pos = 0;

     for (i = 5; i < midx; ++i) {

@@ -3172,6 +3184,13 @@

     midx = end_pos;

+  memcpy(tile_mode_map, mode_map, sizeof(mode_map));

+#if CONFIG_MULTITHREAD

+  if (NULL != tile_data->enc_row_mt_mutex)

+    pthread_mutex_unlock(tile_data->enc_row_mt_mutex);

+#endif

   for (midx = 0; midx < MAX_MODES; ++midx) {

     int mode_index = mode_map[midx];

     int mode_excluded = 0;

@@ -3573,6 +3592,9 @@

   if (best_mode_index < 0 || best_rd >= best_rd_so_far) {

+    // If adaptive interp filter is enabled, then the current leaf node of 8x8

+    // data is needed for sub8x8. Hence preserve the context.

+    if (cpi->new_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];

     rd_cost->rate = INT_MAX;

     rd_cost->rdcost = INT64_MAX;

     return;

@@ -3599,7 +3621,11 @@

   if (!cpi->rc.is_src_frame_alt_ref)

     vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,

-                              sf->adaptive_rd_thresh, bsize, best_mode_index);

+                              sf->adaptive_rd_thresh, bsize,

+#if CONFIG_MULTITHREAD

+                              tile_data->enc_row_mt_mutex,

+#endif

+                              best_mode_index);

   // macroblock modes

   *mi = best_mbmode;

@@ -3737,7 +3763,11 @@

          (cm->interp_filter == mi->interp_filter));

   vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,

-                            cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);

+                            cpi->sf.adaptive_rd_thresh, bsize,

+#if CONFIG_MULTITHREAD

+                            tile_data->enc_row_mt_mutex,

+#endif

+                            THR_ZEROMV);

   vp9_zero(best_pred_diff);

   vp9_zero(best_filter_diff);

@@ -3789,6 +3819,7 @@

   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];

   int internal_active_edge =

       vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);

+  const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];

   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;

   memset(x->zcoeff_blk[TX_4X4], 0, 4);

@@ -3880,7 +3911,10 @@

     if (!internal_active_edge &&

         rd_less_than_thresh(best_rd,

                             rd_opt->threshes[segment_id][bsize][ref_index],

-                            tile_data->thresh_freq_fact[bsize][ref_index]))

+#if CONFIG_MULTITHREAD

+                            tile_data->enc_row_mt_mutex,

+#endif

+                            &rd_thresh_freq_fact[ref_index]))

       continue;

     comp_pred = second_ref_frame > INTRA_FRAME;

@@ -4324,7 +4358,11 @@

          !is_inter_block(&best_mbmode));

   vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,

-                            bsize, best_ref_index);

+                            bsize,

+#if CONFIG_MULTITHREAD

+                            tile_data->enc_row_mt_mutex,

+#endif

+                            best_ref_index);

   // macroblock modes

   *mi = best_mbmode;

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -585,6 +585,15 @@

       rd->thresh_mult_sub8x8[i] = INT_MAX;

+  // With row based multi-threading, the following speed features

+  // have to be disabled to guarantee that bitstreams encoded with single thread

+  // and multiple threads match

+  if (cpi->oxcf.ethread_bit_match) {

+    sf->adaptive_rd_thresh = 0;

+    sf->allow_exhaustive_searches = 0;

+    sf->adaptive_pred_interp_filter = 0;

+  }

 void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {

@@ -746,5 +755,14 @@

   if (!cpi->oxcf.frame_periodic_boost) {

     sf->max_delta_qindex = 0;

+  }

+  // With row based multi-threading, the following speed features

+  // have to be disabled to guarantee that bitstreams encoded with single thread

+  // and multiple threads match

+  if (cpi->oxcf.ethread_bit_match) {

+    sf->adaptive_rd_thresh = 0;

+    sf->allow_exhaustive_searches = 0;

+    sf->adaptive_pred_interp_filter = 0;

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -1459,6 +1459,9 @@

       cfg->ss_number_layers > 1 && cfg->ts_number_layers > 1) {

     return VPX_CODEC_INVALID_PARAM;

+  vp9_set_new_mt(ctx->cpi);

   return VPX_CODEC_OK;