shithub: libvpx

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -5912,6 +5912,7 @@

   // TODO(angiebird): Figure out lambda's proper value.

   const int lambda = cpi->tpl_stats[frame_idx].lambda;

   int_mv nb_full_mvs[NB_MVS_NUM];

+  int nb_full_mv_num;

 #endif

   MV best_ref_mv1 = { 0, 0 };

@@ -5934,10 +5935,11 @@

 #if CONFIG_NON_GREEDY_MV

   (void)search_method;

   (void)sadpb;

-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,

-                          bsize, nb_full_mvs);

+  nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row,

+                                           mi_col, rf_idx, bsize, nb_full_mvs);

   vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,

-                             &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv);

+                             &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,

+                             mv);

 #else

   (void)frame_idx;

   (void)mi_row;

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -1899,28 +1899,32 @@

-int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,

+int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,

                                  int mv_num) {

+  // The bahavior of this function is to compute log2 of mv difference,

+  // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)

+  // against available neghbor mvs.

+  // Since the log2 is monotonic increasing, we can compute

+  // min row_diff * row_diff + col_diff * col_diff first

+  // then apply log2 in the end

   int i;

-  int update = 0;

-  int64_t best_cost = 0;

-  vpx_clear_system_state();

+  int64_t min_abs_diff = INT64_MAX;

+  int cnt = 0;

+  assert(mv_num <= NB_MVS_NUM);

   for (i = 0; i < mv_num; ++i) {

-    if (nb_mvs[i].as_int != INVALID_MV) {

-      MV nb_mv = nb_mvs[i].as_mv;

-      const int64_t row_diff = abs(mv->row - nb_mv.row);

-      const int64_t col_diff = abs(mv->col - nb_mv.col);

-      const int64_t cost =

-          log2_approximation(1 + row_diff * row_diff + col_diff * col_diff);

-      if (update == 0) {

-        best_cost = cost;

-        update = 1;

-      } else {

-        best_cost = cost < best_cost ? cost : best_cost;

-      }

-    }

+    MV nb_mv = nb_full_mvs[i].as_mv;

+    const int64_t row_diff = abs(mv->row - nb_mv.row);

+    const int64_t col_diff = abs(mv->col - nb_mv.col);

+    const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff;

+    assert(nb_full_mvs[i].as_int != INVALID_MV);

+    min_abs_diff = VPXMIN(abs_diff, min_abs_diff);

+    ++cnt;

-  return best_cost;

+  if (cnt) {

+    return log2_approximation(1 + min_abs_diff);

+  } else {

+    return 0;

+  }

 static int64_t exhaustive_mesh_search_multi_step(

@@ -2247,12 +2251,13 @@

   return bestsad;

-void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,

-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,

-                             int_mv *nb_full_mvs) {

+int vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,

+                            int mi_col, int rf_idx, BLOCK_SIZE bsize,

+                            int_mv *nb_full_mvs) {

   const int mi_width = num_8x8_blocks_wide_lookup[bsize];

   const int mi_height = num_8x8_blocks_high_lookup[bsize];

   const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };

+  int nb_full_mv_num = 0;

   int i;

   for (i = 0; i < NB_MVS_NUM; ++i) {

     int r = dirs[i][0] * mi_height;

@@ -2262,17 +2267,15 @@

       const TplDepStats *tpl_ptr =

           &tpl_frame

                ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];

-      int_mv *mv =

-          get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);

       if (tpl_ptr->ready[rf_idx]) {

-        nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);

-      } else {

-        nb_full_mvs[i].as_int = INVALID_MV;

+        int_mv *mv =

+            get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);

+        nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv->as_mv);

+        ++nb_full_mv_num;

-    } else {

-      nb_full_mvs[i].as_int = INVALID_MV;

+  return nb_full_mv_num;

 #endif  // CONFIG_NON_GREEDY_MV

--- a/vp9/encoder/vp9_mcomp.h

+++ b/vp9/encoder/vp9_mcomp.h

@@ -149,9 +149,9 @@

   return out_mv;

 struct TplDepFrame;

-void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,

-                             int mi_col, int rf_idx, BLOCK_SIZE bsize,

-                             int_mv *nb_full_mvs);

+int vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,

+                            int mi_col, int rf_idx, BLOCK_SIZE bsize,

+                            int_mv *nb_full_mvs);

 static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {

   BLOCK_SIZE square_bsize;

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -2496,15 +2496,15 @@

   int bestsme = INT_MAX;

 #if CONFIG_NON_GREEDY_MV

-  int_mv nb_full_mvs[NB_MVS_NUM];

-  const int nb_full_mv_num = NB_MVS_NUM;

   int gf_group_idx = cpi->twopass.gf_group.index;

   int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);

   BLOCK_SIZE square_bsize = get_square_block_size(bsize);

+  int_mv nb_full_mvs[NB_MVS_NUM];

+  const int nb_full_mv_num =

+      vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,

+                              gf_rf_idx, square_bsize, nb_full_mvs);

   const int lambda = (pw * ph) / 4;

   assert(pw * ph == lambda << 2);

-  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,

-                          gf_rf_idx, square_bsize, nb_full_mvs);

 #else   // CONFIG_NON_GREEDY_MV

   int sadpb = x->sadperbit16;

 #endif  // CONFIG_NON_GREEDY_MV