ref: 6b6d3886fcd34ceeae44bc326e178643dee6fe70
parent: 5950a69213b8add4a9687268683bc70e42aa2e86
author: Jingning Han <[email protected]>
date: Fri Mar 21 07:05:39 EDT 2014
Enable recursive partition selection for non-RD coding mode This commit enables a recursive partition type search for non-RD mode decisions. It allows the encoder to choose variable block sizes in a 64x64 block based on rate-distortion modeling. It improves speed -6 coding efficiency for rtc set by 2.4%. Most of the gains come from 32-40dB range, where many sequences gain about 5% to 20%. Local tests suggest there is no speed change. Change-Id: I06300016e500a21652812b7b3b081db39a783d66
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -276,7 +276,7 @@
}
}
-static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm,
+static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
MACROBLOCKD *const xd,
int mi_row,
int mi_col,
@@ -300,7 +300,7 @@
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col);
xd->mi_8x8[0]->mbmi.sb_type = bsize;
- duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
+ duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
}
}
@@ -2690,9 +2690,342 @@
MB_PREDICTION_MODE intramode = DC_PRED;
set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode);
}
- duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize);
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
}
+static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
+ int mi_row, int mi_col, int bsize, int subsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = partition_lookup[bsl][subsize];
+
+ assert(bsize >= BLOCK_8X8);
+
+ switch (partition) {
+ case PARTITION_NONE:
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ break;
+ case PARTITION_VERT:
+ *get_sb_index(x, subsize) = 0;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+ if (mi_col + hbs < cm->mi_cols) {
+ *get_sb_index(x, subsize) = 1;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize);
+ }
+ break;
+ case PARTITION_HORZ:
+ *get_sb_index(x, subsize) = 0;
+ set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ if (mi_row + hbs < cm->mi_rows) {
+ *get_sb_index(x, subsize) = 1;
+ set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col);
+ *(xd->mi_8x8[0]) = (get_block_context(x, subsize))->mic;
+ duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize);
+ }
+ break;
+ case PARTITION_SPLIT:
+ *get_sb_index(x, subsize) = 0;
+ fill_mode_info_sb(cm, x, mi_row, mi_col, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 1;
+ fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 2;
+ fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ *get_sb_index(x, subsize) = 3;
+ fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
+ *(get_sb_partitioning(x, subsize)));
+ break;
+ default:
+ break;
+ }
+}
+
+static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *rate,
+ int64_t *dist, int do_recon, int64_t best_rd) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+ TOKENEXTRA *tp_orig = *tp;
+ PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+ int i;
+ BLOCK_SIZE subsize;
+ int this_rate, sum_rate = 0, best_rate = INT_MAX;
+ int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
+ int64_t sum_rd = 0;
+ int do_split = bsize >= BLOCK_8X8;
+ int do_rect = 1;
+ // Override skipping rectangular partition operations for edge blocks
+ const int force_horz_split = (mi_row + ms >= cm->mi_rows);
+ const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+ const int xss = x->e_mbd.plane[1].subsampling_x;
+ const int yss = x->e_mbd.plane[1].subsampling_y;
+
+ int partition_none_allowed = !force_horz_split && !force_vert_split;
+ int partition_horz_allowed = !force_vert_split && yss <= xss &&
+ bsize >= BLOCK_8X8;
+ int partition_vert_allowed = !force_horz_split && xss <= yss &&
+ bsize >= BLOCK_8X8;
+ (void) *tp_orig;
+
+ if (bsize < BLOCK_8X8) {
+ // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+ // there is nothing to be done.
+ if (x->ab_index != 0) {
+ *rate = 0;
+ *dist = 0;
+ return;
+ }
+ }
+
+ assert(num_8x8_blocks_wide_lookup[bsize] ==
+ num_8x8_blocks_high_lookup[bsize]);
+
+ // Determine partition types in search according to the speed features.
+ // The threshold set here has to be of square block size.
+ if (cpi->sf.auto_min_max_partition_size) {
+ partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
+ bsize >= cpi->sf.min_partition_size);
+ partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_horz_split);
+ partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_vert_split);
+ do_split &= bsize > cpi->sf.min_partition_size;
+ }
+ if (cpi->sf.use_square_partition_only) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
+ }
+
+ if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
+ do_split = 0;
+
+ // PARTITION_NONE
+ if (partition_none_allowed) {
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, bsize);
+ ctx->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate != INT_MAX) {
+ int pl = partition_plane_context(xd->above_seg_context,
+ xd->left_seg_context,
+ mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_NONE];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (sum_rd < best_rd) {
+ int64_t stop_thresh = 4096;
+ int64_t stop_thresh_rd;
+
+ best_rate = this_rate;
+ best_dist = this_dist;
+ best_rd = sum_rd;
+ if (bsize >= BLOCK_8X8)
+ *(get_sb_partitioning(x, bsize)) = bsize;
+
+ // Adjust threshold according to partition size.
+ stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
+ // If obtained distortion is very small, choose current partition
+ // and stop splitting.
+ if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+ }
+ if (!x->in_active_map) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+
+ // store estimated motion vector
+ if (cpi->sf.adaptive_motion_search)
+ store_pred_mv(x, ctx);
+
+ // PARTITION_SPLIT
+ sum_rd = 0;
+ if (do_split) {
+ int pl = partition_plane_context(xd->above_seg_context,
+ xd->left_seg_context,
+ mi_row, mi_col, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * ms;
+ const int y_idx = (i >> 1) * ms;
+
+ if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+ continue;
+
+ *get_sb_index(x, subsize) = i;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+ subsize, &this_rate, &this_dist, 0, INT64_MAX);
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ if (cpi->sf.less_rectangular_check)
+ do_rect &= !partition_none_allowed;
+ }
+ }
+
+ // PARTITION_HORZ
+ if (partition_horz_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ *get_sb_index(x, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+
+ if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+ *get_sb_index(x, subsize) = 1;
+
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ int pl = partition_plane_context(xd->above_seg_context,
+ xd->left_seg_context,
+ mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_HORZ];
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+ if (sum_rd < best_rd) {
+ best_rd = sum_rd;
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ }
+
+ // PARTITION_VERT
+ if (partition_vert_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_VERT);
+
+ *get_sb_index(x, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+ &this_rate, &this_dist, subsize);
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+ *get_sb_index(x, subsize) = 1;
+
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, ctx);
+
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
+ &this_rate, &this_dist, subsize);
+
+ (get_block_context(x, subsize))->mic.mbmi = xd->mi_8x8[0]->mbmi;
+
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ int pl = partition_plane_context(xd->above_seg_context,
+ xd->left_seg_context,
+ mi_row, mi_col, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_VERT];
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ }
+ }
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ }
+
+ (void) best_rd;
+ *rate = best_rate;
+ *dist = best_dist;
+
+ // update mode info array
+ fill_mode_info_sb(cm, x, mi_row, mi_col, bsize,
+ *(get_sb_partitioning(x, bsize)));
+
+ if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
+ int output_enabled = (bsize == BLOCK_64X64);
+
+ // Check the projected output rate for this SB against it's target
+ // and and if necessary apply a Q delta using segmentation to get
+ // closer to the target.
+ if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
+ select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
+ }
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ cpi->cyclic_refresh.projected_rate_sb = best_rate;
+ cpi->cyclic_refresh.projected_dist_sb = best_dist;
+ }
+
+ encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+ }
+
+ if (bsize == BLOCK_64X64) {
+ assert(tp_orig < *tp);
+ assert(best_rate < INT_MAX);
+ assert(best_dist < INT64_MAX);
+ } else {
+ assert(tp_orig == *tp);
+ }
+}
+
static void nonrd_use_partition(VP9_COMP *cpi,
const TileInfo *const tile,
MODE_INFO **mi_8x8,
@@ -2838,47 +3171,18 @@
break;
case REFERENCE_PARTITION:
if (cpi->sf.partition_check) {
- MACROBLOCK *x = &cpi->mb;
- int rate1 = 0, rate2 = 0, rate3 = 0;
- int64_t dist1 = 0, dist2 = 0, dist3 = 0;
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8);
- nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
- BLOCK_64X64, 0, &rate1, &dist1);
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_16X16);
- nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
- BLOCK_64X64, 0, &rate2, &dist2);
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_32X32);
- nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
- BLOCK_64X64, 0, &rate3, &dist3);
-
- if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
- RDCOST(x->rdmult, x->rddiv, rate2, dist2)) {
- if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) <
- RDCOST(x->rdmult, x->rddiv, rate3, dist3))
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_8X8);
- else
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_32X32);
- } else {
- if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) <
- RDCOST(x->rdmult, x->rddiv, rate3, dist3))
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_16X16);
- else
- set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
- BLOCK_32X32);
- }
+ vp9_zero(cpi->mb.pred_mv);
+ nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1, INT64_MAX);
} else {
if (!sb_has_motion(cm, prev_mi_8x8))
copy_partitioning(cm, mi_8x8, prev_mi_8x8);
else
set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+
+ nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rate, &dummy_dist);
}
- nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist);
break;
default:
assert(0);
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -868,6 +868,8 @@
sf->max_intra_bsize = BLOCK_32X32;
}
if (speed >= 6) {
+ sf->max_partition_size = BLOCK_32X32;
+ sf->min_partition_size = BLOCK_8X8;
sf->partition_check =
(cm->current_video_frame % sf->last_partitioning_redo_frequency == 1);
sf->partition_search_type = REFERENCE_PARTITION;
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -172,6 +172,8 @@
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = backup_yv12[i];
}
+
+ x->pred_mv[ref].as_mv = *tmp_mv;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,