ref: c4826c59415f2fe8b8d9ab1a66f50f7e30d64f1d
parent: 5d8642354e4e3715a314fd5f6bf4a0d495b4503b
author: Jingning Han <[email protected]>
date: Thu Sep 12 06:06:47 EDT 2013
Adaptive motion search control This commit enables adaptive constraint on motion search range for smaller partitions, given the motion vectors of collocated larger partition as a candidate initial search point. It makes speed 0 runtime of bus at CIF and 2000 kbps goes from 167s down to 162s (3% speed-up), at 0.01dB performance gains. In the settings of speed 1, this makes the runtime goes from 33687 ms to 32142 ms (4.5% speed-up), at 0.03dB performance gains. Compression performance wise, it gains at speed 1: derf 0.118% yt 0.237% hd 0.203% stdhd 0.438% Change-Id: Ic8b34c67810d9504a9579bef2825d3fa54b69454
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -50,6 +50,10 @@
int64_t tx_rd_diff[TX_MODES];
int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
+ // motion vector cache for adaptive motion search control in partition
+ // search loop
+ int_mv pred_mv[MAX_REF_FRAMES];
+
// Bit flag for each mode whether it has high error in comparison to others.
unsigned int modes_with_high_error;
@@ -149,7 +153,7 @@
// Used to store sub partition's choices.
int fast_ms;
- int_mv pred_mv;
+ int_mv pred_mv[MAX_REF_FRAMES];
int subblock_ref;
// TODO(jingning): Need to refactor the structure arrays that buffers the
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1316,7 +1316,6 @@
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
x->fast_ms = 0;
- x->pred_mv.as_int = 0;
x->subblock_ref = 0;
if (cpi->sf.adjust_partitioning_from_last_frame) {
@@ -1710,10 +1709,6 @@
// Set fast motion search level.
x->fast_ms = 1;
- // Calculate prediction MV.
- x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
- x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
-
if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
// Set fast motion search level.
@@ -1729,6 +1724,14 @@
}
}
+static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+ vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
+}
+
+static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+ vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
+}
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@@ -1837,6 +1840,10 @@
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ // store estimated motion vector
+ if (cpi->sf.adaptive_motion_search)
+ store_pred_mv(x, get_block_context(x, bsize));
+
// PARTITION_SPLIT
sum_rd = 0;
// TODO(jingning): use the motion vectors given by the above search as
@@ -1851,7 +1858,8 @@
continue;
*get_sb_index(xd, subsize) = i;
-
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, get_block_context(x, bsize));
rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
&this_rate, &this_dist, i != 3, best_rd - sum_rd);
@@ -1885,7 +1893,6 @@
}
x->fast_ms = 0;
- x->pred_mv.as_int = 0;
x->subblock_ref = 0;
if (partition_split_done &&
@@ -1897,6 +1904,8 @@
if (partition_horz_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_HORZ);
*get_sb_index(xd, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -1906,6 +1915,8 @@
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(xd, subsize) = 1;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, mi_row + ms, mi_col, &this_rate,
&this_dist, subsize, get_block_context(x, subsize),
best_rd - sum_rd);
@@ -1937,6 +1948,8 @@
subsize = get_subsize(bsize, PARTITION_VERT);
*get_sb_index(xd, subsize) = 0;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
get_block_context(x, subsize), best_rd);
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -1945,6 +1958,8 @@
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*get_sb_index(xd, subsize) = 1;
+ if (cpi->sf.adaptive_motion_search)
+ load_pred_mv(x, get_block_context(x, bsize));
pick_sb_modes(cpi, mi_row, mi_col + ms, &this_rate,
&this_dist, subsize, get_block_context(x, subsize),
best_rd - sum_rd);
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -715,6 +715,7 @@
sf->use_lastframe_partitioning = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
+ sf->adaptive_motion_search = 0;
sf->use_avoid_tested_higherror = 0;
sf->reference_masking = 0;
sf->skip_lots_of_modes = 0;
@@ -743,7 +744,6 @@
sf->using_small_partition_info = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
-
#if CONFIG_MULTIPLE_ARF
// Switch segmentation off.
sf->static_segmentation = 0;
@@ -787,6 +787,7 @@
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
+ sf->adaptive_motion_search = 1;
sf->auto_mv_step_size = 1;
sf->auto_min_max_partition_size = 1;
@@ -826,6 +827,7 @@
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
+ sf->adaptive_motion_search = 1;
sf->using_small_partition_info = 0;
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -285,6 +285,8 @@
int last_partitioning_redo_frequency;
int disable_splitmv;
int using_small_partition_info;
+ // TODO(jingning): combine the related motion search speed features
+ int adaptive_motion_search;
// Implements various heuristics to skip searching modes
// The heuristics selected are based on flags
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1771,6 +1771,7 @@
max_mv = x->max_mv_context[mbmi->ref_frame[0]];
else
max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+
if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and the best ref mvs of the current block for
@@ -1781,11 +1782,16 @@
step_param = cpi->mv_step_param;
}
- further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-
mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+ if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
+ mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
+ mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
+ step_param = MAX(step_param, 8);
+ }
+
+ further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
// adjust src pointer for this block
mi_buf_shift(x, i);
if (cpi->sf.search_method == HEX) {
@@ -1839,10 +1845,13 @@
x->nmvjointcost, x->mvcost,
&distortion, &sse);
- // safe motion search result for use in compound prediction
+ // save motion search result for use in compound prediction
seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
}
+ if (cpi->sf.adaptive_motion_search)
+ x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
+
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
}
@@ -2085,10 +2094,14 @@
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
int row_offset, col_offset;
+ int num_mv_refs = MAX_MV_REF_CANDIDATES +
+ (cpi->sf.adaptive_motion_search &&
+ cpi->common.show_frame && block_size < BLOCK_64X64);
// Get the sad for each candidate reference mv
- for (i = 0; i < MAX_MV_REF_CANDIDATES; i++) {
- this_mv.as_int = mbmi->ref_mvs[ref_frame][i].as_int;
+ for (i = 0; i < num_mv_refs; i++) {
+ this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
+ mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
max_mv = MAX(max_mv,
MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
@@ -2349,7 +2362,7 @@
step_param = 8;
// Get prediction MV.
- mvp_full.as_int = x->pred_mv.as_int;
+ mvp_full.as_int = x->pred_mv[ref].as_int;
// Adjust MV sign if needed.
if (cm->ref_frame_sign_bias[ref]) {
@@ -2368,11 +2381,19 @@
} else {
step_param = cpi->mv_step_param;
}
- // mvp_full.as_int = ref_mv[0].as_int;
- mvp_full.as_int =
- mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
}
+ if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
+ cpi->common.show_frame) {
+ int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
+ b_width_log2(bsize)));
+ step_param = MAX(step_param, boffset);
+ }
+
+ mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
+ mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
+ x->pred_mv[ref].as_int;
+
mvp_full.as_mv.col >>= 3;
mvp_full.as_mv.row >>= 3;
@@ -2422,6 +2443,10 @@
*rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
x->nmvjointcost, x->mvcost,
96);
+
+ if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
+ x->pred_mv[ref].as_int = tmp_mv->as_int;
+
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++)