ref: 706f1f10e016f30f6f68afd938e25df9765ffb87
parent: ee554c8cebcb0e3a7a549b98a8b3d398fc88fd07
author: Angie Chiang <[email protected]>
date: Wed Jul 17 08:36:14 EDT 2019
Make vp9_prepare_nb_full_mvs only return valid mvs In this case, vp9_nb_mvs_inconsistency doesn't need to check whether each neighbor mv is valid or not. non_greedy_mv encoding time is reduced by 1.5% Change-Id: I3216c98481e777d5e0b917ea20ee39b7ca9c9d23
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5912,6 +5912,7 @@
// TODO(angiebird): Figure out lambda's proper value.
const int lambda = cpi->tpl_stats[frame_idx].lambda;
int_mv nb_full_mvs[NB_MVS_NUM];
+ int nb_full_mv_num;
#endif
MV best_ref_mv1 = { 0, 0 };
@@ -5934,10 +5935,11 @@
#if CONFIG_NON_GREEDY_MV
(void)search_method;
(void)sadpb;
- vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
- bsize, nb_full_mvs);
+ nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row,
+ mi_col, rf_idx, bsize, nb_full_mvs);
vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
- &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv);
+ &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
+ mv);
#else
(void)frame_idx;
(void)mi_row;
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1899,7 +1899,7 @@
}
}
-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
int mv_num) {
// The bahavior of this function is to compute log2 of mv difference,
// i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)
@@ -1910,15 +1910,15 @@
int i;
int64_t min_abs_diff = INT64_MAX;
int cnt = 0;
+ assert(mv_num <= NB_MVS_NUM);
for (i = 0; i < mv_num; ++i) {
- if (nb_mvs[i].as_int != INVALID_MV) {
- MV nb_mv = nb_mvs[i].as_mv;
- const int64_t row_diff = abs(mv->row - nb_mv.row);
- const int64_t col_diff = abs(mv->col - nb_mv.col);
- const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
- min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
- ++cnt;
- }
+ MV nb_mv = nb_full_mvs[i].as_mv;
+ const int64_t row_diff = abs(mv->row - nb_mv.row);
+ const int64_t col_diff = abs(mv->col - nb_mv.col);
+ const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;
+ assert(nb_full_mvs[i].as_int != INVALID_MV);
+ min_abs_diff = VPXMIN(abs_diff, min_abs_diff);
+ ++cnt;
}
if (cnt) {
return log2_approximation(1 + min_abs_diff);
@@ -2251,12 +2251,13 @@
return bestsad;
}
-void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int rf_idx, BLOCK_SIZE bsize,
- int_mv *nb_full_mvs) {
+int vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs) {
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int nb_full_mv_num = 0;
int i;
for (i = 0; i < NB_MVS_NUM; ++i) {
int r = dirs[i][0] * mi_height;
@@ -2266,17 +2267,15 @@
const TplDepStats *tpl_ptr =
&tpl_frame
->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
- int_mv *mv =
- get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
if (tpl_ptr->ready[rf_idx]) {
- nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
- } else {
- nb_full_mvs[i].as_int = INVALID_MV;
+ int_mv *mv =
+ get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
+ nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv->as_mv);
+ ++nb_full_mv_num;
}
- } else {
- nb_full_mvs[i].as_int = INVALID_MV;
}
}
+ return nb_full_mv_num;
}
#endif // CONFIG_NON_GREEDY_MV
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -149,9 +149,9 @@
return out_mv;
}
struct TplDepFrame;
-void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int rf_idx, BLOCK_SIZE bsize,
- int_mv *nb_full_mvs);
+int vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs);
static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {
BLOCK_SIZE square_bsize;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2496,15 +2496,15 @@
int bestsme = INT_MAX;
#if CONFIG_NON_GREEDY_MV
- int_mv nb_full_mvs[NB_MVS_NUM];
- const int nb_full_mv_num = NB_MVS_NUM;
int gf_group_idx = cpi->twopass.gf_group.index;
int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
BLOCK_SIZE square_bsize = get_square_block_size(bsize);
+ int_mv nb_full_mvs[NB_MVS_NUM];
+ const int nb_full_mv_num =
+ vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
+ gf_rf_idx, square_bsize, nb_full_mvs);
const int lambda = (pw * ph) / 4;
assert(pw * ph == lambda << 2);
- vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
- gf_rf_idx, square_bsize, nb_full_mvs);
#else // CONFIG_NON_GREEDY_MV
int sadpb = x->sadperbit16;
#endif // CONFIG_NON_GREEDY_MV