ref: ed995afba18ec356fa72772d20d3e2f93635b1e3
parent: d9b62160a0782ce99a62ede946390557bf22ddce
author: Ronald S. Bultje <[email protected]>
date: Mon Jul 8 10:49:33 EDT 2013
Make frame-wide filter-type decision fully RD-based. Overall, on all test sets, this gains about +0.2% on all metrics. City is a clip where this really hurts (-1.0% on all metrics), I'm not quite sure why yet. Maybe interesting to look into in the future. Change-Id: I6f0eecb20e72f0194633270d30bf00d76d9eae78
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -51,6 +51,7 @@
int comp_pred_diff;
int single_pred_diff;
int64_t txfm_rd_diff[NB_TXFM_MODES];
+ int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
// Bit flag for each mode whether it has high error in comparison to others.
unsigned int modes_with_high_error;
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -43,8 +43,6 @@
int enc_debug = 0;
#endif
-void vp9_select_interp_filter_type(VP9_COMP *cpi);
-
static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize);
@@ -345,6 +343,8 @@
}
}
}
+ // FIXME(rbultje) I'm pretty sure this should go to the end of this block
+ // (i.e. after the output_enabled)
if (bsize < BLOCK_SIZE_SB32X32) {
if (bsize < BLOCK_SIZE_MB16X16)
ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];
@@ -430,6 +430,10 @@
cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
+
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
+ }
}
}
@@ -1795,7 +1799,6 @@
cpi->inter_zz_count = 0;
vp9_zero(cm->fc.switchable_interp_count);
- vp9_zero(cpi->best_switchable_interp_count);
vp9_zero(cpi->txfm_stepdown_count);
xd->mode_info_context = cm->mi;
@@ -1827,6 +1830,7 @@
init_encode_frame_mb_context(cpi);
vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));
+ vp9_zero(cpi->rd_filter_diff);
vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
@@ -2063,6 +2067,7 @@
if (cpi->sf.RD) {
int i, pred_type;
+ INTERPOLATIONFILTERTYPE filter_type;
/*
* This code does a single RD pass over the whole frame assuming
* either compound, single or hybrid prediction as per whatever has
@@ -2089,6 +2094,30 @@
else
pred_type = HYBRID_PREDICTION;
+ /* filter type selection */
+ // FIXME(rbultje) for some odd reason, we often select smooth_filter
+ // as default filter for ARF overlay frames. This is a REALLY BAD
+ // IDEA so we explicitely disable it here.
+ if (frame_type != 3 &&
+ cpi->rd_filter_threshes[frame_type][1] >
+ cpi->rd_filter_threshes[frame_type][0] &&
+ cpi->rd_filter_threshes[frame_type][1] >
+ cpi->rd_filter_threshes[frame_type][2] &&
+ cpi->rd_filter_threshes[frame_type][1] >
+ cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ filter_type = vp9_switchable_interp[1];
+ } else if (cpi->rd_filter_threshes[frame_type][2] >
+ cpi->rd_filter_threshes[frame_type][0] &&
+ cpi->rd_filter_threshes[frame_type][2] >
+ cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ filter_type = vp9_switchable_interp[2];
+ } else if (cpi->rd_filter_threshes[frame_type][0] >
+ cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {
+ filter_type = vp9_switchable_interp[0];
+ } else {
+ filter_type = SWITCHABLE;
+ }
+
/* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
cpi->mb.e_mbd.lossless = 0;
@@ -2098,6 +2127,7 @@
select_txfm_mode(cpi);
cpi->common.comp_pred_mode = pred_type;
+ cpi->common.mcomp_filter_type = filter_type;
encode_frame_internal(cpi);
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
@@ -2106,6 +2136,12 @@
cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
}
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
+ cpi->rd_filter_threshes[frame_type][i] =
+ (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
+ }
+
for (i = 0; i < NB_TXFM_MODES; ++i) {
int64_t pd = cpi->rd_tx_select_diff[i];
int diff;
@@ -2180,10 +2216,6 @@
reset_skip_txfm_size(cpi, TX_16X16);
}
}
-
- // Update interpolation filter strategy for next frame.
- if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))
- vp9_select_interp_filter_type(cpi);
} else {
encode_frame_internal(cpi);
}
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -2395,45 +2395,6 @@
}
-void vp9_select_interp_filter_type(VP9_COMP *cpi) {
- int i;
- int high_filter_index = 0;
- unsigned int thresh;
- unsigned int high_count = 0;
- unsigned int count_sum = 0;
- unsigned int *hist = cpi->best_switchable_interp_count;
-
- if (DEFAULT_INTERP_FILTER != SWITCHABLE) {
- cpi->common.mcomp_filter_type = DEFAULT_INTERP_FILTER;
- return;
- }
-
- // TODO(agrange): Look at using RD criteria to select the interpolation
- // filter to use for the next frame rather than this simpler counting scheme.
-
- // Select the interpolation filter mode for the next frame
- // based on the selection frequency seen in the current frame.
- for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
- unsigned int count = hist[i];
- count_sum += count;
- if (count > high_count) {
- high_count = count;
- high_filter_index = i;
- }
- }
-
- thresh = (unsigned int)(0.80 * count_sum);
-
- if (high_count > thresh) {
- // One filter accounts for 80+% of cases so force the next
- // frame to use this filter exclusively using frame-level flag.
- cpi->common.mcomp_filter_type = vp9_switchable_interp[high_filter_index];
- } else {
- // Use a MB-level switchable filter selection strategy.
- cpi->common.mcomp_filter_type = SWITCHABLE;
- }
-}
-
static void scale_references(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int i;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -380,6 +380,7 @@
int rd_thresh_freq_fact[BLOCK_SIZE_TYPES][MAX_MODES];
int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];
+ // FIXME(rbultje) int64_t?
int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];
@@ -386,11 +387,14 @@
unsigned int single_ref_count[REF_CONTEXTS][2][2];
unsigned int comp_ref_count[REF_CONTEXTS][2];
- // FIXME contextualize
-
int64_t rd_tx_select_diff[NB_TXFM_MODES];
+ // FIXME(rbultje) can this overflow?
int rd_tx_select_threshes[4][NB_TXFM_MODES];
+ int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_cache[VP9_SWITCHABLE_FILTERS + 1];
+
int RDMULT;
int RDDIV;
@@ -629,7 +633,6 @@
unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS];
- unsigned int best_switchable_interp_count[VP9_SWITCHABLE_FILTERS];
unsigned int txfm_stepdown_count[TX_SIZE_MAX_SB];
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2177,12 +2177,13 @@
}
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
- int mode_index,
- PARTITION_INFO *partition,
- int_mv *ref_mv,
- int_mv *second_ref_mv,
- int64_t comp_pred_diff[NB_PREDICTION_TYPES],
- int64_t txfm_size_diff[NB_TXFM_MODES]) {
+ int mode_index,
+ PARTITION_INFO *partition,
+ int_mv *ref_mv,
+ int_mv *second_ref_mv,
+ int64_t comp_pred_diff[NB_PREDICTION_TYPES],
+ int64_t txfm_size_diff[NB_TXFM_MODES],
+ int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {
MACROBLOCKD *const xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be
@@ -2201,7 +2202,11 @@
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
+ // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
+ // doesn't actually work this way
memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));
+ memcpy(ctx->best_filter_diff, best_filter_diff,
+ sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));
}
static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2644,29 +2649,46 @@
// pred error irrespective of whether the filter will be used
if (cpi->sf.use_8tap_always) {
*best_filter = EIGHTTAP;
+ vp9_zero(cpi->rd_filter_cache);
} else {
int i, newbest;
int tmp_rate_sum = 0;
int64_t tmp_dist_sum = 0;
+
+ cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
- int rs = 0;
+ int rs;
+ int64_t rs_rd;
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
const int is_intpel_interp = intpel_mv &&
vp9_is_interpolating_filter[filter];
mbmi->interp_filter = filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ rs = get_switchable_rate(cm, x);
+ rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- if (cm->mcomp_filter_type == SWITCHABLE)
- rs = get_switchable_rate(cm, x);
-
if (interpolating_intpel_seen && is_intpel_interp) {
- rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
+ cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
+ tmp_rate_sum, tmp_dist_sum);
+ cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+ cpi->rd_filter_cache[i] + rs_rd);
+ rd = cpi->rd_filter_cache[i];
+ if (cm->mcomp_filter_type == SWITCHABLE)
+ rd += rs_rd;
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
- rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
+ cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
+ rate_sum, dist_sum);
+ cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+ cpi->rd_filter_cache[i] + rs_rd);
+ rd = cpi->rd_filter_cache[i];
+ if (cm->mcomp_filter_type == SWITCHABLE)
+ rd += rs_rd;
if (!interpolating_intpel_seen && is_intpel_interp) {
tmp_rate_sum = rate_sum;
tmp_dist_sum = dist_sum;
@@ -2891,7 +2913,6 @@
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
MB_PREDICTION_MODE this_mode;
- MB_PREDICTION_MODE best_mode = DC_PRED;
MV_REFERENCE_FRAME ref_frame;
unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
int comp_pred, i;
@@ -2909,18 +2930,18 @@
int64_t best_txfm_diff[NB_TXFM_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
int64_t best_pred_rd[NB_PREDICTION_TYPES];
+ int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];
MB_MODE_INFO best_mbmode;
int j;
int mode_index, best_mode_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vp9_prob comp_mode_p;
- int64_t best_overall_rd = INT64_MAX;
int64_t best_intra_rd = INT64_MAX;
int64_t best_inter_rd = INT64_MAX;
MB_PREDICTION_MODE best_intra_mode = DC_PRED;
// MB_PREDICTION_MODE best_inter_mode = ZEROMV;
MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
- INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];
int64_t dist_uv[TX_SIZE_MAX_SB];
@@ -2962,6 +2983,8 @@
best_pred_rd[i] = INT64_MAX;
for (i = 0; i < NB_TXFM_MODES; i++)
best_txfm_rd[i] = INT64_MAX;
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+ best_filter_rd[i] = INT64_MAX;
// Create a mask set to 1 for each frame used by a smaller resolution.
if (cpi->sf.use_avoid_tested_higherror) {
@@ -3291,10 +3314,12 @@
cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+ cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
for (switchable_filter_index = 0;
switchable_filter_index < VP9_SWITCHABLE_FILTERS;
++switchable_filter_index) {
- int newbest;
+ int newbest, rs;
+ int64_t rs_rd;
mbmi->interp_filter =
vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
@@ -3306,10 +3331,13 @@
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- const int rs = get_switchable_rate(cm, x);
- tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
- }
+ cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
+ rs = get_switchable_rate(cm, x);
+ rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+ cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+ MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
+ if (cm->mcomp_filter_type == SWITCHABLE)
+ tmp_rd += rs_rd;
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@@ -3454,6 +3482,7 @@
PRED_MBSKIP), 0);
rate2 += prob_skip_cost;
} else {
+ // FIXME(rbultje) make this work for splitmv also
int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,
PRED_MBSKIP), 1);
rate2 += prob_skip_cost;
@@ -3492,14 +3521,11 @@
// best_inter_mode = xd->mode_info_context->mbmi.mode;
}
- if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME)
+ if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) {
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
-
- if (this_rd < best_overall_rd) {
- best_overall_rd = this_rd;
- best_filter = tmp_best_filter;
- best_mode = this_mode;
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)
+ best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
}
if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
@@ -3595,6 +3621,26 @@
best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
}
+ /* keep record of best filter type */
+ if (!mode_excluded && !disable_skip && mbmi->ref_frame[0] != INTRA_FRAME &&
+ cm->mcomp_filter_type != BILINEAR) {
+ int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
+ VP9_SWITCHABLE_FILTERS :
+ vp9_switchable_interp_map[cm->mcomp_filter_type]];
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ int64_t adj_rd;
+ // In cases of poor prediction, filter_cache[] can contain really big
+ // values, which actually are bigger than this_rd itself. This can
+ // cause negative best_filter_rd[] values, which is obviously silly.
+ // Therefore, if filter_cache < ref, we do an adjusted calculation.
+ if (cpi->rd_filter_cache[i] >= ref)
+ adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
+ else // FIXME(rbultje) do this for comppred also
+ adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
+ best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
+ }
+ }
+
/* keep record of best txfm size */
if (bsize < BLOCK_SIZE_SB32X32) {
if (bsize < BLOCK_SIZE_MB16X16) {
@@ -3666,11 +3712,6 @@
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
(best_mbmode.ref_frame[0] == INTRA_FRAME));
- // Accumulate filter usage stats
- // TODO(agrange): Use RD criteria to select interpolation filter mode.
- if (is_inter_mode(best_mode))
- ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
-
// Updating rd_thresh_freq_fact[] here means that the differnt
// partition/block sizes are handled independently based on the best
// choice for the current partition. It may well be better to keep a scaled
@@ -3731,6 +3772,7 @@
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
+ vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
goto end;
}
@@ -3768,6 +3810,19 @@
}
if (!x->skip) {
+ for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {
+ if (best_filter_rd[i] == INT64_MAX)
+ best_filter_diff[i] = 0;
+ else
+ best_filter_diff[i] = best_rd - best_filter_rd[i];
+ }
+ if (cm->mcomp_filter_type == SWITCHABLE)
+ assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);
+ } else {
+ vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));
+ }
+
+ if (!x->skip) {
for (i = 0; i < NB_TXFM_MODES; i++) {
if (best_txfm_rd[i] == INT64_MAX)
best_txfm_diff[i] = 0;
@@ -3786,7 +3841,7 @@
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
&mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
mbmi->ref_frame[1]][0],
- best_pred_diff, best_txfm_diff);
+ best_pred_diff, best_txfm_diff, best_filter_diff);
return best_rd;
}