ref: 7494bba66bb88f3aacdcd403fd13004e6492c669
parent: 140447db5a7bf8a8c490ca13ee405fbbd4f39658
parent: 53ff43adc341068945f0857bcf28846080e8f368
author: Deb Mukherjee <[email protected]>
date: Wed Jul 10 11:37:11 EDT 2013
Merge "Prunes out full-rd computation based on modeled rd"
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -468,8 +468,8 @@
}
#if CONFIG_ALPHA
// TODO(jkoleszar): Using the Y w/h for now
- mb->plane[3].subsampling_x = 0;
- mb->plane[3].subsampling_y = 0;
+ xd->plane[3].subsampling_x = 0;
+ xd->plane[3].subsampling_y = 0;
#endif
}
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -720,6 +720,7 @@
sf->disable_splitmv = 0;
sf->mode_search_skip_flags = 0;
sf->last_chroma_intra_mode = TM_PRED;
+ sf->use_rd_breakout = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_SIZE_SB64X64 (skip is off)
@@ -767,6 +768,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA;
sf->last_chroma_intra_mode = H_PRED;
+ sf->use_rd_breakout = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -790,6 +792,7 @@
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
sf->last_chroma_intra_mode = DC_PRED;
+ sf->use_rd_breakout = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -804,6 +807,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
+ sf->use_rd_breakout = 1;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -818,6 +822,7 @@
FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA |
FLAG_SKIP_COMP_REFMISMATCH;
+ sf->use_rd_breakout = 1;
}
/*
if (speed == 2) {
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -275,6 +275,7 @@
// defined in the MODE_SEARCH_SKIP_HEURISTICS enum
unsigned int mode_search_skip_flags;
MB_PREDICTION_MODE last_chroma_intra_mode;
+ int use_rd_breakout;
} SPEED_FEATURES;
enum BlockSize {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -473,6 +473,31 @@
*out_dist_sum = dist_sum << 4;
}
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd,
+ int *out_rate_sum, int64_t *out_dist_sum) {
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+
+ // TODO(dkovalev) the same code in get_plane_block_size
+ const int bw = plane_block_width(bsize, pd);
+ const int bh = plane_block_height(bsize, pd);
+ const enum BlockSize bs = get_block_size(bw, bh);
+ unsigned int sse;
+ int rate;
+ int64_t dist;
+ (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, &sse);
+ // sse works better than var, since there is no dc prediction used
+ model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
+
+ *out_rate_sum = rate;
+ *out_dist_sum = dist << 4;
+}
+
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
TX_SIZE tx_size,
MACROBLOCK *x, MACROBLOCKD *xd,
@@ -1643,8 +1668,9 @@
return cost;
}
-static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
+static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
MACROBLOCK *x,
+ int64_t best_yrd,
int i,
int *labelyrate,
int64_t *distortion,
@@ -1651,6 +1677,7 @@
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
int k;
+ VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
const int bw = plane_block_width(bsize, &xd->plane[0]);
@@ -1672,9 +1699,6 @@
int64_t thisdistortion = 0;
int thisrate = 0;
- *labelyrate = 0;
- *distortion = 0;
-
vp9_build_inter_predictor(pre,
xd->plane[0].pre[0].stride,
dst,
@@ -1684,9 +1708,6 @@
bw, bh, 0 /* no avg */, &xd->subpix,
MV_PRECISION_Q3);
- // TODO(debargha): Make this work properly with the
- // implicit-compoundinter-weight experiment when implicit
- // weighting for splitmv modes is turned on.
if (xd->mode_info_context->mbmi.ref_frame[1] > 0) {
uint8_t* const second_pre =
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
@@ -1699,10 +1720,28 @@
&xd->subpix, MV_PRECISION_Q3);
}
+ // Turning this section off for now since it hurts quality and does not
+ // improve speed much
+ /*
+ if (cpi->sf.use_rd_breakout &&
+ best_yrd < INT64_MAX) {
+ int64_t thisrd;
+ model_rd_for_sb_y(cpi, bsize, x, xd, &thisrate, &thisdistortion);
+ thisrd = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
+ if (thisrd / 2 > best_yrd) {
+ *distortion = thisdistortion;
+ *labelyrate = thisrate;
+ return thisrd;
+ }
+ }
+ */
+
vp9_subtract_block(bh, bw, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
+ *labelyrate = 0;
+ *distortion = 0;
k = i;
for (idy = 0; idy < bh / 4; ++idy) {
for (idx = 0; idx < bw / 4; ++idx) {
@@ -1788,7 +1827,7 @@
MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
const int label_count = 4;
- int64_t this_segment_rd = 0, other_segment_rd;
+ int64_t this_segment_rd = 0;
int label_mv_thresh;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
@@ -1811,8 +1850,6 @@
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
- other_segment_rd = this_segment_rd;
-
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
@@ -1819,7 +1856,7 @@
// loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
+ int64_t best_label_rd = INT64_MAX;
MB_PREDICTION_MODE mode_selected = ZEROMV;
int bestlabelyrate = 0;
i = idy * 2 + idx;
@@ -1960,8 +1997,9 @@
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
- this_rd = encode_inter_mb_segment(&cpi->common,
- x, i, &labelyrate,
+ this_rd = encode_inter_mb_segment(cpi, x,
+ bsi->segment_rd - this_segment_rd,
+ i, &labelyrate,
&distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@@ -1990,8 +2028,12 @@
bd += sbd;
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
- other_segment_rd += best_other_rd;
+ if (this_segment_rd > bsi->segment_rd) {
+ bsi->segment_rd = INT64_MAX;
+ return;
+ }
+
for (j = 1; j < bh; ++j)
vpx_memcpy(&x->partition_info->bmi[i + j * 2],
&x->partition_info->bmi[i],
@@ -2003,33 +2045,31 @@
}
} /* for each label */
- if (this_segment_rd < bsi->segment_rd) {
- bsi->r = br;
- bsi->d = bd;
- bsi->segment_yrate = segmentyrate;
- bsi->segment_rd = this_segment_rd;
+ bsi->r = br;
+ bsi->d = bd;
+ bsi->segment_yrate = segmentyrate;
+ bsi->segment_rd = this_segment_rd;
- // store everything needed to come back to this!!
- for (i = 0; i < 4; i++) {
- bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
- if (mbmi->ref_frame[1] > 0)
- bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
- bsi->modes[i] = x->partition_info->bmi[i].mode;
- bsi->eobs[i] = best_eobs[i];
- }
+ // store everything needed to come back to this!!
+ for (i = 0; i < 4; i++) {
+ bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
+ if (mbmi->ref_frame[1] > 0)
+ bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
+ bsi->modes[i] = x->partition_info->bmi[i].mode;
+ bsi->eobs[i] = best_eobs[i];
}
}
-static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv,
- int_mv *second_best_ref_mv,
- int64_t best_rd,
- int *returntotrate,
- int *returnyrate,
- int64_t *returndistortion,
- int *skippable, int mvthresh,
- int_mv seg_mvs[4][MAX_REF_FRAMES],
- int mi_row, int mi_col) {
+static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int64_t best_rd,
+ int *returntotrate,
+ int *returnyrate,
+ int64_t *returndistortion,
+ int *skippable, int mvthresh,
+ int_mv seg_mvs[4][MAX_REF_FRAMES],
+ int mi_row, int mi_col) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -2078,7 +2118,7 @@
*skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
mbmi->mode = bsi.modes[3];
- return (int)(bsi.segment_rd);
+ return bsi.segment_rd;
}
static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2585,6 +2625,7 @@
int best_needs_copy = 0;
uint8_t *orig_dst[MAX_MB_PLANE];
int orig_dst_stride[MAX_MB_PLANE];
+ int rs = 0;
switch (this_mode) {
int rate_mv;
@@ -2658,6 +2699,14 @@
*rate2 += cost_mv_ref(cpi, this_mode,
mbmi->mb_mode_context[mbmi->ref_frame[0]]);
+ if (!(*mode_excluded)) {
+ if (is_comp_pred) {
+ *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
+ } else {
+ *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
+ }
+ }
+
pred_exists = 0;
interpolating_intpel_seen = 0;
// Are all MVs integer pel for Y and UV
@@ -2668,6 +2717,7 @@
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
+ *best_filter = EIGHTTAP;
if (cpi->sf.use_8tap_always) {
*best_filter = EIGHTTAP;
vp9_zero(cpi->rd_filter_cache);
@@ -2678,7 +2728,7 @@
cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
- int rs, j;
+ int j;
int64_t rs_rd;
const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
const int is_intpel_interp = intpel_mv &&
@@ -2730,6 +2780,15 @@
tmp_dist_sum = dist_sum;
}
}
+ if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ if (rd / 2 > ref_best_rd) {
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+ return INT64_MAX;
+ }
+ }
newbest = i == 0 || rd < best_rd;
if (newbest) {
@@ -2753,11 +2812,11 @@
xd->plane[i].dst.stride = orig_dst_stride[i];
}
}
-
// Set the appripriate filter
mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
cm->mcomp_filter_type : *best_filter;
vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+ rs = (cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(cm, x) : 0);
if (pred_exists) {
if (best_needs_copy) {
@@ -2773,6 +2832,23 @@
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
}
+
+ if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ int tmp_rate;
+ int64_t tmp_dist;
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ // if current pred_error modeled rd is substantially more than the best
+ // so far, do not bother doing full rd
+ if (rd / 2 > ref_best_rd) {
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+ return INT64_MAX;
+ }
+ }
+
if (cpi->common.mcomp_filter_type == SWITCHABLE)
*rate2 += get_switchable_rate(cm, x);
@@ -2817,7 +2893,7 @@
*distortion = sse + sse2;
*rate2 = 500;
- // for best_yrd calculation
+ // for best yrd calculation
*rate_uv = 0;
*distortion_uv = sse2;
@@ -2858,14 +2934,6 @@
*skippable = skippable_y && skippable_uv;
}
- if (!(*mode_excluded)) {
- if (is_comp_pred) {
- *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
- } else {
- *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
- }
- }
-
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = orig_dst[i];
xd->plane[i].dst.stride = orig_dst_stride[i];
@@ -2962,6 +3030,7 @@
cpi->gld_fb_idx,
cpi->alt_fb_idx};
int64_t best_rd = INT64_MAX;
+ int64_t best_yrd = INT64_MAX;
int64_t best_txfm_rd[NB_TXFM_MODES];
int64_t best_txfm_diff[NB_TXFM_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
@@ -3357,16 +3426,20 @@
int newbest, rs;
int64_t rs_rd;
mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index];
+ vp9_switchable_interp[switchable_filter_index];
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- second_ref, INT64_MAX,
+ second_ref,
+ best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
+ if (tmp_rd == INT64_MAX) {
+ continue;
+ }
cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
rs = get_switchable_rate(cm, x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -3374,6 +3447,7 @@
MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->mcomp_filter_type == SWITCHABLE)
tmp_rd += rs_rd;
+
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@@ -3392,8 +3466,21 @@
for (i = 0; i < 4; i++)
tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
pred_exists = 1;
+ if (switchable_filter_index == 0 &&
+ cpi->sf.use_rd_breakout &&
+ best_rd < INT64_MAX) {
+ if (tmp_best_rdu / 2 > best_rd) {
+ // skip searching the other filters if the first is
+ // already substantially larger than the best so far
+ tmp_best_filter = mbmi->interp_filter;
+ tmp_best_rdu = INT64_MAX;
+ break;
+ }
+ }
}
} // switchable_filter_index loop
+ if (tmp_best_rdu == INT64_MAX)
+ continue;
mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
tmp_best_filter : cm->mcomp_filter_type);
@@ -3403,11 +3490,14 @@
// switchable list (bilinear, 6-tap) is indicated at the frame level
tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
&mbmi->ref_mvs[mbmi->ref_frame[0]][0],
- second_ref, INT64_MAX,
+ second_ref,
+ best_yrd,
&rate, &rate_y, &distortion,
&skippable,
(int)this_rd_thresh, seg_mvs,
mi_row, mi_col);
+ if (tmp_rd == INT64_MAX)
+ continue;
} else {
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
int rs = get_switchable_rate(cm, x);
@@ -3430,21 +3520,6 @@
if (cpi->common.mcomp_filter_type == SWITCHABLE)
rate2 += get_switchable_rate(cm, x);
- // If even the 'Y' rd value of split is higher than best so far
- // then dont bother looking at UV
- vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
- BLOCK_SIZE_SB8X8);
- vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
- super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
- &uv_skippable, NULL, BLOCK_SIZE_SB8X8, TX_4X4);
- rate2 += rate_uv;
- distortion2 += distortion_uv;
- skippable = skippable && uv_skippable;
-
- txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < NB_TXFM_MODES; ++i)
- txfm_cache[i] = txfm_cache[ONLY_4X4];
-
if (!mode_excluded) {
if (is_comp_pred)
mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
@@ -3451,8 +3526,26 @@
else
mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
}
-
compmode_cost = vp9_cost_bit(comp_mode_p, is_comp_pred);
+
+ if (RDCOST(x->rdmult, x->rddiv, rate2, distortion2) <
+ best_rd) {
+ // If even the 'Y' rd value of split is higher than best so far
+ // then dont bother looking at UV
+ vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
+ BLOCK_SIZE_SB8X8);
+ vp9_subtract_sbuv(x, BLOCK_SIZE_SB8X8);
+ super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
+ &uv_skippable, NULL,
+ BLOCK_SIZE_SB8X8, TX_4X4);
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ skippable = skippable && uv_skippable;
+
+ txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ for (i = 0; i < NB_TXFM_MODES; ++i)
+ txfm_cache[i] = txfm_cache[ONLY_4X4];
+ }
} else {
compmode_cost = vp9_cost_bit(comp_mode_p,
mbmi->ref_frame[1] > INTRA_FRAME);
@@ -3494,7 +3587,7 @@
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
- // for best_yrd calculation
+ // for best yrd calculation
rate_uv = 0;
if (mb_skip_allowed) {
@@ -3592,6 +3685,8 @@
*returnrate = rate2;
*returndistortion = distortion2;
best_rd = this_rd;
+ best_yrd = best_rd -
+ RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;