ref: bacc67f4a808e488d24fda2e93cfd5fbe0b806a4
parent: 204809bfb3dbb1ace2068a2219f5dcbb79d610fd
author: jackychen <[email protected]>
date: Fri May 20 09:45:46 EDT 2016
vp9: Skip some modes when variance is low for big blocks, for 1 pass real-time. Skip intra-mode and some inter-modes (newmv, nearmv, nearestmv) for golden frame if the variance got from choose_partitioning is very low. Only for 1 pass real-time CBR mode and bsize >= 32x32, it has ~2.5% speed up with less than 0.1% PSNR drop for rtc test set. Don't see visual regression. Change-Id: I70efbc95a1007231ae36f02c5b2fbf6cd35077ad
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -145,6 +145,11 @@
uint8_t sb_is_skin;
+ // Used to save the status of whether a block has a low variance in
+ // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
+ // 32x32.
+ uint8_t variance_low[9];
+
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -747,6 +747,8 @@
const uint8_t *d;
int sp;
int dp;
+ // Ref frame used in partitioning.
+ MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
int pixels_wide = 64, pixels_high = 64;
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
@@ -771,6 +773,10 @@
}
}
+ for (i = 0; i < 9; i++) {
+ x->variance_low[i] = 0;
+ }
+
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
if (xd->mb_to_bottom_edge < 0)
@@ -831,8 +837,10 @@
mi->ref_frame[0] = GOLDEN_FRAME;
mi->mv[0].as_int = 0;
y_sad = y_sad_g;
+ ref_frame_partition = GOLDEN_FRAME;
} else {
x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+ ref_frame_partition = LAST_FRAME;
}
set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
@@ -1017,6 +1025,31 @@
if (!is_key_frame &&
vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
force_split[0] = 1;
+ }
+
+ if (cpi->sf.short_circuit_low_temp_var) {
+ // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
+ // selected.
+ if (ref_frame_partition == LAST_FRAME) {
+ // 64x64
+ if (vt.part_variances.none.variance < (thresholds[0] >> 1))
+ x->variance_low[0] = 1;
+ // 64x32
+ if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
+ x->variance_low[1] = 1;
+ if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
+ x->variance_low[2] = 1;
+ // 32x64
+ if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
+ x->variance_low[3] = 1;
+ if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
+ x->variance_low[4] = 1;
+ // 32x32
+ for (i = 0; i < 4; i++) {
+ if (vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
+ x->variance_low[i + 5] = 1;
+ }
+ }
}
// Now go through the entire structure, splitting every block size until
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1126,34 +1126,38 @@
TileDataEnc *tile_data,
int mi_row, int mi_col,
struct buf_2d yv12_mb[4][MAX_MB_PLANE],
- BLOCK_SIZE bsize) {
+ BLOCK_SIZE bsize,
+ int force_skip_low_temp_var) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
TileInfo *const tile_info = &tile_data->tile_info;
-// TODO(jingning) placeholder for inter-frame non-RD mode decision.
+ // TODO(jingning) placeholder for inter-frame non-RD mode decision.
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
-// this needs various further optimizations. to be continued..
+ // this needs various further optimizations. to be continued..
if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
- if (cm->use_prev_frame_mvs)
+ if (cm->use_prev_frame_mvs) {
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
candidates, mi_row, mi_col,
x->mbmi_ext->mode_context);
- else
- const_motion[ref_frame] =
- mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
- candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
- (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
+ } else {
+ const_motion[ref_frame] =
+ mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
+ candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
+ (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
+ }
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
- if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) {
+ // Early exit for golden frame if force_skip_low_temp_var is set.
+ if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 &&
+ !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
ref_frame, bsize);
}
@@ -1266,6 +1270,39 @@
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
+static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ int force_skip_low_temp_var = 0;
+ // Set force_skip_low_temp_var based on the block size and block offset.
+ if (bsize == BLOCK_64X64) {
+ force_skip_low_temp_var = variance_low[0];
+ } else if (bsize == BLOCK_64X32) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[1];
+ } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[2];
+ }
+ } else if (bsize == BLOCK_32X64) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[3];
+ } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[4];
+ }
+ } else if (bsize == BLOCK_32X32) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[5];
+ } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[6];
+ } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[7];
+ } else if ((mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[8];
+ }
+ }
+ return force_skip_low_temp_var;
+}
+
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
@@ -1324,6 +1361,7 @@
int svc_force_zero_mode[3] = {0};
int perform_intra_pred = 1;
int use_golden_nonzeromv = 1;
+ int force_skip_low_temp_var = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1410,14 +1448,19 @@
}
}
+ if (cpi->sf.short_circuit_low_temp_var) {
+ force_skip_low_temp_var =
+ set_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
+ }
+
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
- !svc_force_zero_mode[GOLDEN_FRAME - 1]))
+ !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col,
- yv12_mb, bsize);
+ yv12_mb, bsize, force_skip_low_temp_var);
}
for (idx = 0; idx < RT_INTER_MODES; ++idx) {
@@ -1429,6 +1472,7 @@
int is_skippable;
int this_early_term = 0;
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
+
if (cpi->use_svc)
this_mode = ref_mode_set_svc[idx].pred_mode;
@@ -1447,9 +1491,18 @@
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
+
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
+ // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
+ // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
+ // later.
+ if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
+ frame_mv[this_mode][ref_frame].as_int != 0) {
+ continue;
+ }
+
if (cpi->use_svc) {
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
@@ -1456,8 +1509,9 @@
continue;
}
- if (!(frame_mv[this_mode][ref_frame].as_int == 0 &&
- ref_frame == LAST_FRAME)) {
+ if (!force_skip_low_temp_var &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == LAST_FRAME)) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
@@ -1548,8 +1602,10 @@
}
}
- if (use_golden_nonzeromv &&
- this_mode == NEWMV && ref_frame == LAST_FRAME &&
+ // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
+ // need to compute best_pred_sad which is only used to skip golden NEWMV.
+ if (use_golden_nonzeromv && this_mode == NEWMV &&
+ ref_frame == LAST_FRAME &&
frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
const int pre_stride = xd->plane[0].pre[0].stride;
const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
@@ -1786,11 +1842,11 @@
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
// Perform intra prediction search, if the best SAD is above a certain
- // threshold.
- if (perform_intra_pred &&
- ((best_rdc.rdcost == INT64_MAX ||
- (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
- bsize <= cpi->sf.max_intra_bsize)))) {
+ // threshold. Skip intra prediction if force_skip_low_temp_var is set.
+ if (!force_skip_low_temp_var && perform_intra_pred &&
+ (best_rdc.rdcost == INT64_MAX ||
+ (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
+ bsize <= cpi->sf.max_intra_bsize))) {
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0, 0 };
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -429,6 +429,11 @@
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
+ content != VP9E_CONTENT_SCREEN) {
+ // Enable short circuit when temporal variance is very low.
+ sf->short_circuit_low_temp_var = 1;
+ }
}
if (speed >= 7) {
@@ -554,6 +559,7 @@
sf->default_interp_filter = SWITCHABLE;
sf->simple_model_rd_from_var = 0;
sf->short_circuit_flat_blocks = 0;
+ sf->short_circuit_low_temp_var = 0;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -446,6 +446,10 @@
// Skip a number of expensive mode evaluations for blocks with zero source
// variance.
int short_circuit_flat_blocks;
+
+ // Skip a number of expensive mode evaluations for blocks with very low
+ // temporal variance.
+ int short_circuit_low_temp_var;
} SPEED_FEATURES;
struct VP9_COMP;