ref: df90d58f4fd13a486b6f3af6e8ce7669779b1e00
parent: a33086f9253aa34f80f16849210d5da24563a24c
author: Yunqing Wang <[email protected]>
date: Wed Jul 3 10:43:23 EDT 2013
Speed up motion estimation using small partitions' result(experiment) Current partition checking starts from small sizes, and then goes up to large sizes. This experiment uses the small partitions' motion estimation result, which is already available, to speed up the large partition's motion estimation. We can decide to skip some patition checkings if they are unlikely choices. We could use the motion vector(MV) result as current partition's prediction MV, limit the search range and reference frame. Current result at speed 1: psnr loss: 1.19% for stdhd, 0.287% for derf. speed gain: 14% for sunflower(hd), 11% for akiyo. Further improvement will be done later. Change-Id: I5abfd070e9cace2e91e2a0247d1325df313887ab
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -143,6 +143,11 @@
int rd_search;
int skip_encode;
+ // Used to store sub partition's choices.
+ int fast_ms;
+ int_mv pred_mv;
+ int subblock_ref;
+
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
PICK_MODE_CONTEXT ab4x4_context[4][4][4];
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1466,6 +1466,138 @@
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
}
+
+ x->fast_ms = 0;
+ x->pred_mv.as_int = 0;
+ x->subblock_ref = 0;
+
+ // Use 4 subblocks' motion estimation results to speed up current
+ // partition's checking.
+ if (cpi->sf.using_small_partition_info) {
+ // Only use 8x8 result for non HD videos.
+ // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
+ int use_8x8 = 1;
+
+ if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
+ ((use_8x8 && bsize == BLOCK_SIZE_MB16X16) ||
+ bsize == BLOCK_SIZE_SB32X32 || bsize == BLOCK_SIZE_SB64X64)) {
+ int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
+
+ if (bsize == BLOCK_SIZE_MB16X16) {
+ ref0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.
+ ref_frame[0];
+ ref1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.
+ ref_frame[0];
+ ref2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.
+ ref_frame[0];
+ ref3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.
+ ref_frame[0];
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
+ ref0 = x->mb_context[xd->sb_index][0].mic.mbmi.ref_frame[0];
+ ref1 = x->mb_context[xd->sb_index][1].mic.mbmi.ref_frame[0];
+ ref2 = x->mb_context[xd->sb_index][2].mic.mbmi.ref_frame[0];
+ ref3 = x->mb_context[xd->sb_index][3].mic.mbmi.ref_frame[0];
+ } else if (bsize == BLOCK_SIZE_SB64X64) {
+ ref0 = x->sb32_context[0].mic.mbmi.ref_frame[0];
+ ref1 = x->sb32_context[1].mic.mbmi.ref_frame[0];
+ ref2 = x->sb32_context[2].mic.mbmi.ref_frame[0];
+ ref3 = x->sb32_context[3].mic.mbmi.ref_frame[0];
+ }
+
+ // Currently, only consider 4 inter ref frames.
+ if (ref0 && ref1 && ref2 && ref3) {
+ int16_t mvr0 = 0, mvc0 = 0, mvr1 = 0, mvc1 = 0, mvr2 = 0, mvc2 = 0,
+ mvr3 = 0, mvc3 = 0;
+ int d01, d23, d02, d13; // motion vector distance between 2 blocks
+
+ // Get each subblock's motion vectors.
+ if (bsize == BLOCK_SIZE_MB16X16) {
+ mvr0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc0 = x->sb8x8_context[xd->sb_index][xd->mb_index][0].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc1 = x->sb8x8_context[xd->sb_index][xd->mb_index][1].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc2 = x->sb8x8_context[xd->sb_index][xd->mb_index][2].mic.mbmi.mv[0].
+ as_mv.col;
+ mvr3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+ as_mv.row;
+ mvc3 = x->sb8x8_context[xd->sb_index][xd->mb_index][3].mic.mbmi.mv[0].
+ as_mv.col;
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
+ mvr0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.row;
+ mvc0 = x->mb_context[xd->sb_index][0].mic.mbmi.mv[0].as_mv.col;
+ mvr1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.row;
+ mvc1 = x->mb_context[xd->sb_index][1].mic.mbmi.mv[0].as_mv.col;
+ mvr2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.row;
+ mvc2 = x->mb_context[xd->sb_index][2].mic.mbmi.mv[0].as_mv.col;
+ mvr3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.row;
+ mvc3 = x->mb_context[xd->sb_index][3].mic.mbmi.mv[0].as_mv.col;
+ } else if (bsize == BLOCK_SIZE_SB64X64) {
+ mvr0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.row;
+ mvc0 = x->sb32_context[0].mic.mbmi.mv[0].as_mv.col;
+ mvr1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.row;
+ mvc1 = x->sb32_context[1].mic.mbmi.mv[0].as_mv.col;
+ mvr2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.row;
+ mvc2 = x->sb32_context[2].mic.mbmi.mv[0].as_mv.col;
+ mvr3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.row;
+ mvc3 = x->sb32_context[3].mic.mbmi.mv[0].as_mv.col;
+ }
+
+ // Adjust sign if ref is alt_ref
+ if (cm->ref_frame_sign_bias[ref0]) {
+ mvr0 *= -1;
+ mvc0 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref1]) {
+ mvr1 *= -1;
+ mvc1 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref2]) {
+ mvr2 *= -1;
+ mvc2 *= -1;
+ }
+
+ if (cm->ref_frame_sign_bias[ref3]) {
+ mvr3 *= -1;
+ mvc3 *= -1;
+ }
+
+ // Calculate mv distances.
+ d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
+ d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
+ d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
+ d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
+
+ if (d01 < 24 && d23 < 24 && d02 < 24 && d13 < 24) {
+ // Set fast motion search level.
+ x->fast_ms = 1;
+
+ // Calculate prediction MV
+ x->pred_mv.as_mv.row = (mvr0 + mvr1 + mvr2 + mvr3) >> 2;
+ x->pred_mv.as_mv.col = (mvc0 + mvc1 + mvc2 + mvc3) >> 2;
+
+ if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
+ d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
+ // Set fast motion search level.
+ x->fast_ms = 2;
+
+ if (!d01 && !d23 && !d02 && !d13) {
+ x->fast_ms = 3;
+ x->subblock_ref = ref0;
+ }
+ }
+ }
+ }
+ }
+ }
+
if (!cpi->sf.use_partitions_less_than
|| (cpi->sf.use_partitions_less_than
&& bsize <= cpi->sf.less_than_block_size)) {
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -723,7 +723,7 @@
sf->use_rd_breakout = 0;
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
-
+ sf->using_small_partition_info = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_SIZE_SB64X64 (skip is off)
sf->unused_mode_skip_lvl = BLOCK_SIZE_SB64X64;
@@ -795,6 +795,7 @@
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_uv_intra_rd_estimate = 1;
+ sf->using_small_partition_info = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -268,6 +268,7 @@
int adjust_partitioning_from_last_frame;
int last_partitioning_redo_frequency;
int disable_splitmv;
+ int using_small_partition_info;
// Implements various heuristics to skip searching modes
// The heuristics selected are based on flags
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2334,6 +2334,7 @@
int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
+ VP9_COMMON *cm = &cpi->common;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
int bestsme = INT_MAX;
@@ -2364,19 +2365,38 @@
vp9_clamp_mv_min_max(x, &ref_mv);
- // Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is MAX >> 1 etc.
- if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
- step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+ // Adjust search parameters based on small partitions' result.
+ if (x->fast_ms) {
+ // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
+ // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
+ // adjust search range
+ step_param = 6;
+ if (x->fast_ms > 1)
+ step_param = 8;
+
+ // Get prediction MV.
+ mvp_full.as_int = x->pred_mv.as_int;
+
+ // Adjust MV sign if needed.
+ if (cm->ref_frame_sign_bias[ref]) {
+ mvp_full.as_mv.col *= -1;
+ mvp_full.as_mv.row *= -1;
+ }
} else {
- step_param = vp9_init_search_range(
- cpi, MIN(cpi->common.width, cpi->common.height));
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+ if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+ step_param = vp9_init_search_range(cpi, cpi->max_mv_magnitude);
+ } else {
+ step_param = vp9_init_search_range(
+ cpi, MIN(cpi->common.width, cpi->common.height));
+ }
+
+ // mvp_full.as_int = ref_mv[0].as_int;
+ mvp_full.as_int =
+ mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
}
- // mvp_full.as_int = ref_mv[0].as_int;
- mvp_full.as_int =
- mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int;
-
mvp_full.as_mv.col >>= 3;
mvp_full.as_mv.row >>= 3;
@@ -3113,9 +3133,9 @@
}
// If intra is not masked off then get uv intra mode rd.
- if (!cpi->sf.use_avoid_tested_higherror
+ if (x->fast_ms < 2 && (!cpi->sf.use_avoid_tested_higherror
|| (cpi->sf.use_avoid_tested_higherror
- && (ref_frame_mask & (1 << INTRA_FRAME)))) {
+ && (ref_frame_mask & (1 << INTRA_FRAME))))) {
// Note that the enumerator TXFM_MODE "matches" TX_SIZE.
// Eg. ONLY_4X4 = TX_4X4, ALLOW_8X8 = TX_8X8 etc such that the MIN
// operation below correctly constrains max_uvtxfm_size.
@@ -3193,6 +3213,12 @@
continue;
x->skip = 0;
+
+ // Skip some checking based on small partitions' result.
+ if (x->fast_ms > 1 && !ref_frame)
+ continue;
+ if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
+ continue;
if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_SIZE_SB8X8) {
if (!(ref_frame_mask & (1 << ref_frame))) {