ref: f9c0587200b56285e6847ec88c8ea876b422acc1
parent: 181988d37273bd31708718a51e727ea1048d7c98
author: JackyChen <[email protected]>
date: Mon Jun 6 12:30:14 EDT 2016
vp9: Encoding cycle reduction for speed 8. 1. Skip golden non-zeromv and newmv-last for bsize >= 16x16 if the temporal variance obtained from choose_partitioning is very low. 2. Skip horz and vert INTRA mode for speed 8. This change works best on the clips with little noise and with some motion (e.g. gips_motion which has > 5% speed up). PSNR drop is 1.78% on rtc test set, no obvious visual quality regression found. Change-Id: Ib43b5b20e67809d03c5a6890818ddff59e1fc94a
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -146,9 +146,9 @@
uint8_t sb_is_skin;
// Used to save the status of whether a block has a low variance in
- // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
- // 32x32.
- uint8_t variance_low[9];
+ // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
+ // 32x32, 9~24 for 16x16.
+ uint8_t variance_low[25];
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -773,7 +773,7 @@
}
}
- for (i = 0; i < 9; i++) {
+ for (i = 0; i < 25; i++) {
x->variance_low[i] = 0;
}
@@ -1083,28 +1083,53 @@
}
if (cpi->sf.short_circuit_low_temp_var) {
- // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
- // selected.
- if (ref_frame_partition == LAST_FRAME) {
+ int mv_thr = cm->width > 640 ? 8 : 4;
+ // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected
+ // and int_pro mv is small. If the temporal variance is small set the
+ // variance_low flag for the block. The variance threshold can be adjusted,
+ // the higher the more aggressive.
+ if (ref_frame_partition == LAST_FRAME &&
+ (cpi->sf.short_circuit_low_temp_var == 1 ||
+ (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
if (xd->mi[0]->sb_type == BLOCK_64X64 &&
vt.part_variances.none.variance < (thresholds[0] >> 1)) {
x->variance_low[0] = 1;
} else if (xd->mi[0]->sb_type == BLOCK_64X32) {
- if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
- x->variance_low[1] = 1;
- if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
- x->variance_low[2] = 1;
+ for (j = 0; j < 2; j++) {
+ if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))
+ x->variance_low[j + 1] = 1;
+ }
} else if (xd->mi[0]->sb_type == BLOCK_32X64) {
- if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
- x->variance_low[3] = 1;
- if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
- x->variance_low[4] = 1;
+ for (j = 0; j < 2; j++) {
+ if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))
+ x->variance_low[j + 3] = 1;
+ }
} else {
- // 32x32
for (i = 0; i < 4; i++) {
- if (!force_split[i + 1] &&
- vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
- x->variance_low[i + 5] = 1;
+ if (!force_split[i + 1]) {
+ // 32x32
+ if (vt.split[i].part_variances.none.variance <
+ (thresholds[1] >> 1))
+ x->variance_low[i + 5] = 1;
+ } else if (cpi->sf.short_circuit_low_temp_var == 2) {
+ int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
+ const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
+ MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
+ // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
+ // inside.
+ if ((*this_mi)->sb_type == BLOCK_16X16 ||
+ (*this_mi)->sb_type == BLOCK_32X16 ||
+ (*this_mi)->sb_type == BLOCK_16X32) {
+ for (j = 0; j < 4; j++) {
+ if (vt.split[i].split[j].part_variances.none.variance <
+ (thresholds[2] >> 8))
+ x->variance_low[(i << 2) + j + 9] = 1;
+ }
+ }
+ }
}
}
}
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -40,6 +40,14 @@
int in_use;
} PRED_BUFFER;
+
+static const int pos_shift_16x16[4][4] = {
+ {9, 10, 13, 14},
+ {11, 12, 15, 16},
+ {17, 18, 21, 22},
+ {19, 20, 23, 24}
+};
+
static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
@@ -1274,6 +1282,8 @@
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
int force_skip_low_temp_var = 0;
+ int i = (mi_row & 0x7) >> 1;
+ int j = (mi_col & 0x7) >> 1;
// Set force_skip_low_temp_var based on the block size and block offset.
if (bsize == BLOCK_64X64) {
force_skip_low_temp_var = variance_low[0];
@@ -1299,6 +1309,19 @@
} else if ((mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[8];
}
+ } else if (bsize == BLOCK_16X16) {
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
+ } else if (bsize == BLOCK_32X16) {
+ // The col shift index for the second 16x16 block.
+ int j2 = ((mi_col + 2) & 0x7) >> 1;
+ // Only if each 16x16 block inside has low temporal variance.
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i][j2]];
+ } else if (bsize == BLOCK_16X32) {
+ // The row shift index for the second 16x16 block.
+ int i2 = ((mi_row + 2) & 0x7) >> 1;
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i2][j]];
}
return force_skip_low_temp_var;
}
@@ -1503,6 +1526,12 @@
continue;
}
+ if (cpi->sf.short_circuit_low_temp_var == 2 &&
+ force_skip_low_temp_var && ref_frame == LAST_FRAME &&
+ this_mode == NEWMV) {
+ continue;
+ }
+
if (cpi->use_svc) {
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
@@ -1842,8 +1871,9 @@
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
// Perform intra prediction search, if the best SAD is above a certain
- // threshold. Skip intra prediction if force_skip_low_temp_var is set.
- if (!force_skip_low_temp_var && perform_intra_pred &&
+ // threshold.
+ if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
+ perform_intra_pred &&
(best_rdc.rdcost == INT64_MAX ||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize))) {
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -429,7 +429,7 @@
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
- if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
@@ -450,6 +450,17 @@
sf->adaptive_rd_thresh = 4;
sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ // Only keep INTRA_DC mode for speed 8.
+ if (!is_keyframe) {
+ int i = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+ }
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
+ content != VP9E_CONTENT_SCREEN) {
+ // More aggressive short circuit for speed 8.
+ sf->short_circuit_low_temp_var = 2;
+ }
}
}
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -449,6 +449,10 @@
// Skip a number of expensive mode evaluations for blocks with very low
// temporal variance.
+ // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
+ // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
+ // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
+ // 32x16.
int short_circuit_low_temp_var;
} SPEED_FEATURES;