ref: 1bb29e24553bc03c8cb2b3bb11daff8c109cc462
parent: 24b16ce7c9da42b23484174a41d1c5ff2ea4929a
parent: 3df55cebb25d18f75f7711f84d9786ac2ce33b72
author: Jingning Han <[email protected]>
date: Mon Jul 2 13:01:45 EDT 2018
Merge "Exploit the spatial variance in temporal dependency model"
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -92,6 +92,7 @@
int sadperbit4;
int rddiv;
int rdmult;
+ int cb_rdmult;
int mb_energy;
// These are set to their default values at the beginning, and then adjusted
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1960,6 +1960,8 @@
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
}
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
+
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (frame_is_intra_only(cm)) {
@@ -1986,8 +1988,6 @@
vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
- x->rdmult = orig_rdmult;
-
// TODO(jingning) The rate-distortion optimization flow needs to be
// refactored to provide proper exit/return handle.
if (rd_cost->rate == INT_MAX)
@@ -1995,6 +1995,8 @@
else
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ x->rdmult = orig_rdmult;
+
ctx->rate = rd_cost->rate;
ctx->dist = rd_cost->dist;
}
@@ -2120,6 +2122,7 @@
PICK_MODE_CONTEXT *ctx) {
MACROBLOCK *const x = &td->mb;
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
@@ -3611,6 +3614,47 @@
#undef Q_CTX
#undef RESOLUTION_CTX
+int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int orig_rdmult) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost = 0;
+ int64_t mc_dep_cost = 0;
+ int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+ int mi_high = num_8x8_blocks_high_lookup[bsize];
+ int row, col;
+
+ int dr = 0;
+ int count = 0;
+ double r0, rk, beta;
+
+ r0 = cpi->rd.r0;
+
+ for (row = mi_row; row < mi_row + mi_high; ++row) {
+ for (col = mi_col; col < mi_col + mi_wide; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+ if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+ intra_cost += this_stats->intra_cost;
+ mc_dep_cost += this_stats->mc_dep_cost;
+
+ ++count;
+ }
+ }
+
+ rk = (double)intra_cost / (intra_cost + mc_dep_cost);
+ beta = r0 / rk;
+ dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+ dr = VPXMIN(dr, orig_rdmult * 5 / 4);
+ dr = VPXMAX(dr, orig_rdmult * 3 / 4);
+
+ dr = VPXMAX(1, dr);
+ return dr;
+}
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@@ -3660,7 +3704,7 @@
int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
int must_split = 0;
- int partition_mul = cpi->rd.RDMULT;
+ int partition_mul = cpi->sf.enable_tpl_model ? x->cb_rdmult : cpi->rd.RDMULT;
(void)*tp_orig;
@@ -4176,6 +4220,14 @@
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, td->pc_root);
} else {
+ int orig_rdmult = cpi->rd.RDMULT;
+ x->cb_rdmult = orig_rdmult;
+ if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+ int dr =
+ get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+ x->cb_rdmult = dr;
+ }
+
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
@@ -5386,6 +5438,24 @@
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
+ } else if (cpi->twopass.gf_group.index) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost_base = 0;
+ int64_t mc_dep_cost_base = 0;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows; ++row) {
+ for (col = 0; col < cm->mi_cols; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+ intra_cost_base += this_stats->intra_cost;
+ mc_dep_cost_base += this_stats->mc_dep_cost;
+ }
+ }
+
+ cpi->rd.r0 = (double)intra_cost_base / (intra_cost_base + mc_dep_cost_base);
}
{
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5431,6 +5431,16 @@
return overlap_area = width * height;
}
+int round_floor(int ref_pos) {
+ int round;
+ if (ref_pos < 0)
+ round = -(1 + (-ref_pos - 1) / MI_SIZE);
+ else
+ round = ref_pos / MI_SIZE;
+
+ return round;
+}
+
void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
int mi_row, int mi_col) {
TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
@@ -5443,8 +5453,8 @@
int ref_pos_col = mi_col * MI_SIZE + mv_col;
// top-left on grid block location
- int grid_pos_row_base = (ref_pos_row >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
- int grid_pos_col_base = (ref_pos_col >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
+ int grid_pos_row_base = round_floor(ref_pos_row) * MI_SIZE;
+ int grid_pos_col_base = round_floor(ref_pos_col) * MI_SIZE;
int block;
for (block = 0; block < 4; ++block) {
@@ -5455,8 +5465,8 @@
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
int overlap_area = get_overlap_area(grid_pos_row, grid_pos_col,
ref_pos_row, ref_pos_col, block);
- int ref_mi_row = grid_pos_row >> MI_SIZE_LOG2;
- int ref_mi_col = grid_pos_col >> MI_SIZE_LOG2;
+ int ref_mi_row = round_floor(grid_pos_row);
+ int ref_mi_col = round_floor(grid_pos_col);
int64_t mc_flow = tpl_stats->mc_dep_cost -
(tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -299,7 +299,7 @@
int mi_cols;
} TplDepFrame;
-#define TPL_DEP_COST_SCALE_LOG2 16
+#define TPL_DEP_COST_SCALE_LOG2 4
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
typedef struct TileDataEnc {
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -200,6 +200,38 @@
return (int)rdmult;
}
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+ const VP9_COMMON *cm = &cpi->common;
+ int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t rdmult = 0;
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
+ case VPX_BITS_10:
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+ break;
+ default:
+ assert(cpi->common.bit_depth == VPX_BITS_12);
+ rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+ break;
+ }
+#else
+ int64_t rdmult = (int)((88 * q * q / beta) / 24);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+ const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+ rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ }
+ if (rdmult < 1) rdmult = 1;
+ return (int)rdmult;
+}
+
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
double q;
#if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -115,6 +115,7 @@
#endif
int RDMULT;
int RDDIV;
+ double r0;
} RD_OPT;
typedef struct RD_COST {
@@ -137,6 +138,8 @@
int qindex);
int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+
+int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta);
void vp9_initialize_rd_consts(struct VP9_COMP *cpi);