ref: d8aa40634a522a10635c5982cfd52bbfba15591b
parent: b8c2a4eb0c47b633096f5c428b70607e7bf8d570
author: Scott LaVarnway <[email protected]>
date: Tue Jan 12 00:09:06 EST 2016
VP9: Eliminate unnecessary nearest/near searches Prior to this patch, read_inter_block_mode_info() would find the nearmv and nearestmv for all modes. Now it does not search for ZEROMV modes and breaks out early for NEARMV and NEWMV modes. Change-Id: Ifa7b1eaf58bb03b9c7792ea5012fef477527d0fd
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -11,7 +11,7 @@
#include "vp9/common/vp9_mvref_common.h"
-// This function searches the neighbourhood of a given MB/SB
+// This function searches the neighborhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
@@ -24,7 +24,7 @@
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
int different_ref_found = 0;
int context_counter = 0;
- const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
const TileInfo *const tile = &xd->tile;
@@ -59,8 +59,8 @@
for (; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
- xd->mi_stride]->mbmi;
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
different_ref_found = 1;
if (candidate->ref_frame[0] == ref_frame)
@@ -71,7 +71,7 @@
}
// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
- // on windows platform. The sync here is unncessary if use_perv_frame_mvs
+ // on windows platform. The sync here is unnecessary if use_prev_frame_mvs
// is 0. But after removing it, there will be hang in the unit test on windows
// due to several threads waiting for a thread's signal.
#if defined(_WIN32) && !HAVE_PTHREAD_H
@@ -101,8 +101,8 @@
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
- * xd->mi_stride]->mbmi;
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
// If the candidate is INTRA we don't want to consider its mv.
IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
@@ -154,16 +154,6 @@
uint8_t *mode_context) {
find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1,
mi_row, mi_col, sync, data, mode_context);
-}
-
-static void lower_mv_precision(MV *mv, int allow_hp) {
- const int use_hp = allow_hp && vp9_use_mv_hp(mv);
- if (!use_hp) {
- if (mv->row & 1)
- mv->row += (mv->row > 0 ? -1 : 1);
- if (mv->col & 1)
- mv->col += (mv->col > 0 ? -1 : 1);
- }
}
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -157,7 +157,7 @@
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
-// skip all additional processing and jump to done!
+// skip all additional processing and jump to Done!
#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
do { \
if (refmv_count) { \
@@ -205,6 +205,16 @@
xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+ const int use_hp = allow_hp && vp9_use_mv_hp(mv);
+ if (!use_hp) {
+ if (mv->row & 1)
+ mv->row += (mv->row > 0 ? -1 : 1);
+ if (mv->col & 1)
+ mv->col += (mv->col > 0 ? -1 : 1);
+ }
}
typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -714,6 +714,18 @@
const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
const int is_compound = has_second_ref(&mi->mbmi);
+ int ref;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const MV_REFERENCE_FRAME frame = mi->mbmi.ref_frame[ref];
+ RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if (!vp9_is_valid_scale(&ref_buf->sf))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf);
+ }
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
struct macroblockd_plane *const pd = &xd->plane[plane];
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -284,12 +284,19 @@
return sign ? -mag : mag;
}
+// TODO(slavarnway): move to vp9_entropymv.h and replace vp9_use_mv_hp
+#define COMPANDED_MVREF_THRESH 8
+static int use_mv_hp(const MV *ref) {
+ return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+ (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
+}
+
static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE joint_type =
(MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints);
- const int use_hp = allow_hp && vp9_use_mv_hp(ref);
+ const int use_hp = allow_hp && use_mv_hp(ref);
MV diff = {0, 0};
if (mv_joint_vertical(joint_type))
@@ -476,6 +483,20 @@
}
}
+static void dec_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
+ int_mv *nearest_mv, int_mv *near_mv,
+ int refmv_count) {
+ int i;
+
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < refmv_count; ++i) {
+ lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+ clamp_mv2(&mvlist[i].as_mv, xd);
+ }
+ *nearest_mv = mvlist[0];
+ *near_mv = mvlist[1];
+}
+
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
@@ -482,6 +503,183 @@
mi_row << MI_BLOCK_SIZE_LOG2);
}
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector or early_break
+// it will also skip all additional processing and jump to Done!
+#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \
+ do { \
+ if (refmv_count) { \
+ if ((mv).as_int != (mv_ref_list)[0].as_int) { \
+ (mv_ref_list)[(refmv_count)] = (mv); \
+ refmv_count++; \
+ goto Done; \
+ } \
+ } else { \
+ (mv_ref_list)[(refmv_count)++] = (mv); \
+ if (early_break) \
+ goto Done; \
+ } \
+ } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \
+ refmv_count, mv_ref_list, Done) \
+ do { \
+ if (is_inter_block(mbmi)) { \
+ if ((mbmi)->ref_frame[0] != ref_frame) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ if (has_second_ref(mbmi) && \
+ (mbmi)->ref_frame[1] != ref_frame && \
+ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ } \
+ } while (0)
+
+// This function searches the neighborhood of a given MB/SB
+// to try and find candidate reference vectors.
+static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ const POSITION *const mv_ref_search,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col,
+ find_mv_refs_sync sync, void *const data) {
+ const int *ref_sign_bias = cm->ref_frame_sign_bias;
+ int i, refmv_count = 0;
+ int different_ref_found = 0;
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
+ const TileInfo *const tile = &xd->tile;
+ // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop
+ // searching after the first mv is found.
+ const int early_break = (mi->mbmi.mode == NEARESTMV) ||
+ (mi->mbmi.mode == NEWMV);
+
+ // Blank the reference vector list
+ memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done);
+ else if (candidate->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+ // on windows platform. The sync here is unnecessary if use_prev_frame_mvs
+ // is 0. But after removing it, there will be hang in the unit test on windows
+ // due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+#endif
+
+ // Check the last frame's mode and mv info.
+ if (prev_frame_mvs) {
+ // Synchronize here for frame parallel decode if sync function is provided.
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+
+ if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+ } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found) {
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+
+ // If the candidate is INTRA we don't want to consider its mv.
+ IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias,
+ refmv_count, mv_ref_list, Done);
+ }
+ }
+ }
+
+ // Since we still don't have a candidate we'll try the last frame.
+ if (prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+ prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+ int_mv mv = prev_frame_mvs->mv[0];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+
+ if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+ prev_frame_mvs->ref_frame[1] != ref_frame &&
+ prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
+ int_mv mv = prev_frame_mvs->mv[1];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ if (mi->mbmi.mode == NEARMV)
+ refmv_count = MAX_MV_REF_CANDIDATES;
+ else
+ // we only care about the nearestmv for the remaining modes
+ refmv_count = 1;
+
+ Done:
+ // Clamp vectors
+ for (i = 0; i < refmv_count; ++i)
+ clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+
+ return refmv_count;
+}
+
+static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const POSITION *const mv_ref_search,
+ int mi_row, int mi_col) {
+ int i;
+ int context_counter = 0;
+ const TileInfo *const tile = &xd->tile;
+
+ // Get mode count from nearest 2 blocks
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
+ xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+ }
+ }
+
+ return counter_to_context[context_counter];
+}
+
static void read_inter_block_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
MODE_INFO *const mi,
@@ -491,27 +689,14 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
int ref, is_compound;
- uint8_t inter_mode_ctx[MAX_REF_FRAMES];
+ uint8_t inter_mode_ctx;
+ const POSITION *const mv_ref_search = mv_ref_blocks[bsize];
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
+ inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col);
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp9_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
- &ref_buf->sf);
- vp9_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
- }
-
if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
@@ -521,14 +706,27 @@
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r,
- inter_mode_ctx[mbmi->ref_frame[0]]);
- }
+ mbmi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ else
+ // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV.
+ // Setting mode to NEARESTMV forces the search to stop after the nearestmv
+ // has been found. After b_modes have been read, mode will be overwritten
+ // by the last b_mode.
+ mbmi->mode = NEARESTMV;
- if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
- &nearestmv[ref], &nearmv[ref]);
+ if (mbmi->mode != ZEROMV) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int_mv ref_mvs[MAX_MV_REF_CANDIDATES];
+ const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ int refmv_count;
+
+ refmv_count = dec_find_mv_refs(cm, xd, mi, frame, mv_ref_search,
+ ref_mvs, mi_row, mi_col, fpm_sync,
+ (void *)pbi);
+
+ dec_find_best_ref_mvs(xd, allow_hp, ref_mvs, &nearestmv[ref],
+ &nearmv[ref], refmv_count);
+ }
}
}
@@ -546,7 +744,7 @@
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
if (b_mode == NEARESTMV || b_mode == NEARMV) {
uint8_t dummy_mode_ctx[MAX_REF_FRAMES];