ref: 453f18040f62f4f1699c0e7c5e1ee288e571d9d2
parent: 07c48ccfe090625e409be50ff82b6d8e6fc93578
author: Vignesh Venkatasubramanian <[email protected]>
date: Mon Feb 13 06:36:02 EST 2017
vp9,realtime: Enable row multithreading for non-rd Enable row level multithreading for realtime encodes where non-rd path is used (speed >= 5). Change-Id: I5439cb49a02171166d8e1de06c7d5e6f8e819a41
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -274,16 +274,15 @@
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
-
- encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
- // While row_mt = 1/0(with/without row-based multi-threading), several
- // speed features that would adaptively adjust encoding parameters have
- // to be disabled to guarantee the bit match of the resulted bitstream.
- encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
} else {
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
encoder->Control(VP9E_SET_AQ_MODE, 3);
}
+ encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
+ // While row_mt = 1, several speed features that would adaptively adjust
+ // encoding parameters have to be disabled to guarantee the bit exactness
+ // of the resulting bitstream.
+ encoder->Control(VP9E_ENABLE_ROW_MT_BIT_EXACT, bit_exact_mode_);
encoder_initialized_ = true;
}
}
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3907,6 +3907,10 @@
const int mi_col_start = tile_info->mi_col_start;
const int mi_col_end = tile_info->mi_col_end;
int mi_col;
+ const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
+ const int num_sb_cols =
+ get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
+ int sb_col_in_tile;
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
@@ -3913,7 +3917,8 @@
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
+ mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
const struct segmentation *const seg = &cm->seg;
RD_COST dummy_rdc;
const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -3921,6 +3926,10 @@
PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
BLOCK_SIZE bsize = BLOCK_64X64;
int seg_skip = 0;
+
+ (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
+ sb_col_in_tile - 1);
+
x->source_variance = UINT_MAX;
vp9_zero(x->pred_mv);
vp9_rd_cost_init(&dummy_rdc);
@@ -3996,6 +4005,9 @@
break;
default: assert(0); break;
}
+
+ (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
+ sb_col_in_tile, num_sb_cols);
}
}
// end RTC play code
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5235,4 +5235,11 @@
(cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
!cpi->use_svc)
cpi->row_mt = 1;
+
+ // In realtime mode, enable row based multi-threading for all the speed levels
+ // where non-rd path is used.
+ if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt &&
+ !cpi->use_svc) {
+ cpi->row_mt = 1;
+ }
}
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -625,6 +625,23 @@
memcpy(thread_data->td->counts, &cpi->common.counts,
sizeof(cpi->common.counts));
}
+
+ // Handle use_nonrd_pick_mode case.
+ if (cpi->sf.use_nonrd_pick_mode) {
+ MACROBLOCK *const x = &thread_data->td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
+ int j;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ p[j].coeff = ctx->coeff_pbuf[j][0];
+ p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
+ pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
+ p[j].eobs = ctx->eobs_pbuf[j][0];
+ }
+ }
}
launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1666,11 +1666,14 @@
cpi->rc.frames_since_golden > 4)
mode_rd_thresh = mode_rd_thresh << 3;
- if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
+ if (rd_less_than_thresh(
+ best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
- tile_data->enc_row_mt_mutex,
+ // Synchronization of this function is only necessary when
+ // adaptive_rd_thresh is > 0.
+ cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
- &rd_thresh_freq_fact[mode_index]))
+ &rd_thresh_freq_fact[mode_index]))
continue;
if (this_mode == NEWMV) {
@@ -2030,11 +2033,14 @@
if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
continue;
- if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
+ if (rd_less_than_thresh(
+ best_rdc.rdcost, mode_rd_thresh,
#if CONFIG_MULTITHREAD
- tile_data->enc_row_mt_mutex,
+ // Synchronization of this function is only necessary when
+ // adaptive_rd_thresh is > 0.
+ cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL,
#endif
- &rd_thresh_freq_fact[mode_index]))
+ &rd_thresh_freq_fact[mode_index]))
continue;
mi->mode = this_mode;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -558,6 +558,12 @@
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 0;
}
+ // Turn off adaptive_rd_thresh if row_mt is on for all the non-rd paths. This
+ // causes too many locks in realtime mode in certain platforms (Android ARM,
+ // Mac).
+ if (speed >= 5 && cpi->row_mt && cpi->num_workers > 1) {
+ sf->adaptive_rd_thresh = 0;
+ }
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {