ref: 8991d1c18da94bb5aff2afd1d0940fff90dd271b
parent: 260965646d27eb962f3c51c2c00b0da2b7d19392
parent: b54cdcc3de62390dc438a36425665a89958aefea
author: Hui Su <[email protected]>
date: Mon Jul 23 22:41:25 EDT 2018
Merge "Add prune_ref_frame_for_rect_partitions feature"
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -75,6 +75,8 @@
// Used for the machine learning-based early termination
int32_t sum_y_eobs;
+ // Skip certain ref frames during RD search of rectangular partitions.
+ uint8_t skip_ref_frame_mask;
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2633,6 +2633,7 @@
ctx, INT64_MAX);
break;
case PARTITION_HORZ:
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->horizontal[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2642,6 +2643,7 @@
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
&tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -2654,6 +2656,7 @@
}
break;
case PARTITION_VERT:
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2663,6 +2666,7 @@
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
&tmp_rdc, subsize,
&pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
@@ -3712,10 +3716,12 @@
int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
int must_split = 0;
-
int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
? x->cb_rdmult
: cpi->rd.RDMULT;
+ // Ref frames picked in the [i_th] quarter subblock during square partition
+ // RD search. It may be used to prune ref frame selection of rect partitions.
+ uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
(void)*tp_orig;
@@ -3846,6 +3852,14 @@
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = ctx->mic.ref_frame[0];
+ const int ref2 = ctx->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
if (bsize >= BLOCK_8X8) {
this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
cpi->partition_cost[pl][PARTITION_NONE], 0);
@@ -3970,8 +3984,18 @@
pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
-
- if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
+ if (sum_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
+ const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
+ }
} else {
for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
++i) {
@@ -3999,6 +4023,13 @@
sum_rdc.rdcost = INT64_MAX;
break;
} else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions &&
+ pc_tree->split[i]->none.rate != INT_MAX) {
+ const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
+ const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
@@ -4034,6 +4065,22 @@
do_rect &= !partition_none_allowed;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
+
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
+ pc_tree->vertical[1].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ uint8_t used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[1];
+ if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2] | ref_frames_used[3];
+ if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[2];
+ if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1] | ref_frames_used[3];
+ if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
}
// PARTITION_HORZ
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3073,6 +3073,8 @@
// lock mechanism involved with reads from
// tile_mode_map
const int mode_search_skip_flags = sf->mode_search_skip_flags;
+ const int is_rect_partition =
+ num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize];
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
@@ -3223,6 +3225,13 @@
second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
vp9_zero(x->sum_y_eobs);
+
+ if (is_rect_partition) {
+ if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue;
+ if (second_ref_frame > 0 &&
+ (ctx->skip_ref_frame_mask & (1 << second_ref_frame)))
+ continue;
+ }
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -70,11 +70,14 @@
// speed 0 features
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
+ sf->use_square_only_threshold = BLOCK_SIZES;
- // Currently, the machine-learning based partition search early termination
- // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
if (is_480p_or_larger) {
+ // Currently, the machine-learning based partition search early termination
+ // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
sf->ml_partition_search_early_termination = 1;
+ } else {
+ sf->use_square_only_threshold = BLOCK_32X32;
}
if (!is_1080p_or_larger) {
@@ -92,6 +95,7 @@
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
+ sf->use_square_only_threshold = BLOCK_4X4;
if (is_720p_or_larger) {
sf->disable_split_mask =
@@ -193,7 +197,7 @@
sf->allow_skip_recode = 1;
sf->less_rectangular_check = 1;
sf->use_square_partition_only = !frame_is_boosted(cpi);
- sf->use_square_only_threshold = BLOCK_16X16;
+ sf->prune_ref_frame_for_rect_partitions = 1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->exhaustive_searches_thresh = (1 << 22);
@@ -210,6 +214,7 @@
if (speed >= 1) {
sf->enable_tpl_model = 0;
+ sf->prune_ref_frame_for_rect_partitions = 0;
if (oxcf->pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -226,10 +231,7 @@
sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
-
- sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
-
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->mv.auto_mv_step_size = 1;
@@ -848,6 +850,7 @@
#else
sf->enable_tpl_model = 1;
#endif
+ sf->prune_ref_frame_for_rect_partitions = 0;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -319,6 +319,9 @@
int use_square_partition_only;
BLOCK_SIZE use_square_only_threshold;
+ // Prune reference frames for rectangular partitions.
+ int prune_ref_frame_for_rect_partitions;
+
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
AUTO_MIN_MAX_MODE auto_min_max_partition_size;