shithub: libvpx

Download patch

ref: 7fd6a8f18699ff2da66315615f4ea15b722c8074
parent: 9e2229ec08940e3ec3668e4b535ea0be27ccaf97
parent: 0ad301e5b04951ecda115a2869475756760503bb
author: Dan Zhu <[email protected]>
date: Fri Aug 23 15:00:32 EDT 2019

Merge changes I13f59f52,I7441e041,I7441e041

* changes:
  add unit test for local structure computation
  add unit test for smooth motion field
  modify smooth model(float type mv + normalization)

--- a/test/non_greedy_mv_test.cc
+++ b/test/non_greedy_mv_test.cc
@@ -8,9 +8,193 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
 #include "third_party/googletest/src/include/gtest/gtest.h"
-// #include "vp9/encoder/vp9_non_greedy_mv.h"
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+#include "./vpx_dsp_rtcd.h"
 
 namespace {
-TEST(non_greedy_mv, non_greedy_mv) { printf("Let's test non_greedy_mv"); }
+
+static void read_in_mf(const char *filename, int *rows_ptr, int *cols_ptr,
+                       MV **buffer_ptr) {
+  FILE *input = fopen(filename, "rb");
+  int row, col;
+  int idx;
+
+  ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+  fscanf(input, "%d,%d\n", rows_ptr, cols_ptr);
+
+  *buffer_ptr = (MV *)malloc((*rows_ptr) * (*cols_ptr) * sizeof(MV));
+
+  for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) {
+    fscanf(input, "%d,%d;", &row, &col);
+    (*buffer_ptr)[idx].row = row;
+    (*buffer_ptr)[idx].col = col;
+  }
+  fclose(input);
+}
+
+static void read_in_local_var(const char *filename, int *rows_ptr,
+                              int *cols_ptr,
+                              int (**M_ptr)[MF_LOCAL_STRUCTURE_SIZE]) {
+  FILE *input = fopen(filename, "rb");
+  int M00, M01, M10, M11;
+  int idx;
+  int int_type;
+
+  ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+  fscanf(input, "%d,%d\n", rows_ptr, cols_ptr);
+
+  *M_ptr = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc(
+      (*rows_ptr) * (*cols_ptr) * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type));
+
+  for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) {
+    fscanf(input, "%d,%d,%d,%d;", &M00, &M01, &M10, &M11);
+    (*M_ptr)[idx][0] = M00;
+    (*M_ptr)[idx][1] = M01;
+    (*M_ptr)[idx][2] = M10;
+    (*M_ptr)[idx][3] = M11;
+  }
+  fclose(input);
+}
+
+static void compare_mf(const MV *mf1, const MV *mf2, int rows, int cols,
+                       float *mean_ptr, float *std_ptr) {
+  float float_type;
+  float *diffs = (float *)malloc(rows * cols * sizeof(float_type));
+  int idx;
+  float accu = 0.0f;
+  for (idx = 0; idx < rows * cols; ++idx) {
+    MV mv1 = mf1[idx];
+    MV mv2 = mf2[idx];
+    float row_diff2 = (float)((mv1.row - mv2.row) * (mv1.row - mv2.row));
+    float col_diff2 = (float)((mv1.col - mv2.col) * (mv1.col - mv2.col));
+    diffs[idx] = sqrt(row_diff2 + col_diff2);
+    accu += diffs[idx];
+  }
+  *mean_ptr = accu / rows / cols;
+  *std_ptr = 0;
+  for (idx = 0; idx < rows * cols; ++idx) {
+    *std_ptr += (diffs[idx] - (*mean_ptr)) * (diffs[idx] - (*mean_ptr));
+  }
+  *std_ptr = sqrt(*std_ptr / rows / cols);
+  free(diffs);
+}
+
+static void load_frame_info(const char *filename,
+                            YV12_BUFFER_CONFIG *ref_frame_ptr) {
+  FILE *input = fopen(filename, "rb");
+  int idx;
+  uint8_t data_type;
+
+  ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+  fscanf(input, "%d,%d\n", &(ref_frame_ptr->y_height),
+         &(ref_frame_ptr->y_width));
+
+  ref_frame_ptr->y_buffer = (uint8_t *)malloc(
+      (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height) * sizeof(data_type));
+
+  for (idx = 0; idx < (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height);
+       ++idx) {
+    int value;
+    fscanf(input, "%d,", &value);
+    ref_frame_ptr->y_buffer[idx] = (uint8_t)value;
+  }
+
+  ref_frame_ptr->y_stride = ref_frame_ptr->y_width;
+  fclose(input);
+}
+
+static int compare_local_var(const int (*local_var1)[MF_LOCAL_STRUCTURE_SIZE],
+                             const int (*local_var2)[MF_LOCAL_STRUCTURE_SIZE],
+                             int rows, int cols) {
+  int diff = 0;
+  int outter_idx, inner_idx;
+  for (outter_idx = 0; outter_idx < rows * cols; ++outter_idx) {
+    for (inner_idx = 0; inner_idx < MF_LOCAL_STRUCTURE_SIZE; ++inner_idx) {
+      diff += abs(local_var1[outter_idx][inner_idx] -
+                  local_var2[outter_idx][inner_idx]);
+    }
+  }
+  return diff / rows / cols;
+}
+
+TEST(non_greedy_mv, DISABLED_smooth_mf) {
+  const char *search_mf_file = "non_greedy_mv_test_files/exhaust_32x32.txt";
+  const char *local_var_file = "non_greedy_mv_test_files/localVar_32x32.txt";
+  const char *estimation_file = "non_greedy_mv_test_files/estimation_32x32.txt";
+  const char *ground_truth_file =
+      "non_greedy_mv_test_files/ground_truth_32x32.txt";
+  BLOCK_SIZE bsize = BLOCK_32X32;
+  MV *search_mf = NULL;
+  MV *smooth_mf = NULL;
+  MV *estimation = NULL;
+  MV *ground_truth = NULL;
+  int(*local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+  int rows = 0, cols = 0;
+
+  int alpha = 100, max_iter = 100;
+
+  read_in_mf(search_mf_file, &rows, &cols, &search_mf);
+  read_in_local_var(local_var_file, &rows, &cols, &local_var);
+  read_in_mf(estimation_file, &rows, &cols, &estimation);
+  read_in_mf(ground_truth_file, &rows, &cols, &ground_truth);
+
+  float sm_mean, sm_std;
+  float est_mean, est_std;
+
+  smooth_mf = (MV *)malloc(rows * cols * sizeof(MV));
+  vp9_get_smooth_motion_field(search_mf, local_var, rows, cols, bsize, alpha,
+                              max_iter, smooth_mf);
+
+  compare_mf(smooth_mf, ground_truth, rows, cols, &sm_mean, &sm_std);
+  compare_mf(smooth_mf, estimation, rows, cols, &est_mean, &est_std);
+
+  EXPECT_LE(sm_mean, 3);
+  EXPECT_LE(est_mean, 2);
+
+  free(search_mf);
+  free(local_var);
+  free(estimation);
+  free(ground_truth);
+  free(smooth_mf);
+}
+
+TEST(non_greedy_mv, DISABLED_local_var) {
+  const char *ref_frame_file = "non_greedy_mv_test_files/ref_frame_32x32.txt";
+  const char *cur_frame_file = "non_greedy_mv_test_files/cur_frame_32x32.txt";
+  const char *gt_local_var_file = "non_greedy_mv_test_files/localVar_32x32.txt";
+  const char *search_mf_file = "non_greedy_mv_test_files/exhaust_32x32.txt";
+  BLOCK_SIZE bsize = BLOCK_32X32;
+  int(*gt_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+  int(*est_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+  YV12_BUFFER_CONFIG ref_frame, cur_frame;
+  int rows, cols;
+  MV *search_mf;
+  int int_type;
+  int local_var_diff;
+  vp9_variance_fn_ptr_t fn;
+
+  load_frame_info(ref_frame_file, &ref_frame);
+  load_frame_info(cur_frame_file, &cur_frame);
+  read_in_mf(search_mf_file, &rows, &cols, &search_mf);
+
+  fn.sdf = vpx_sad32x32;
+  est_local_var = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc(
+      rows * cols * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type));
+  vp9_get_local_structure(&cur_frame, &ref_frame, search_mf, &fn, rows, cols,
+                          bsize, est_local_var);
+  read_in_local_var(gt_local_var_file, &rows, &cols, &gt_local_var);
+
+  local_var_diff = compare_local_var(est_local_var, gt_local_var, rows, cols);
+
+  EXPECT_LE(local_var_diff, 1);
+
+  free(gt_local_var);
+  free(est_local_var);
+  free(ref_frame.y_buffer);
+}
 }  // namespace
--- a/vp9/encoder/vp9_non_greedy_mv.c
+++ b/vp9/encoder/vp9_non_greedy_mv.c
@@ -271,41 +271,38 @@
   return 0;
 }
 
-static MV get_smooth_motion_vector(const MV search_mv, const MV *tmp_mf,
-                                   const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
-                                   int rows, int cols, int row, int col,
-                                   float alpha) {
-  const MV tmp_mv = tmp_mf[row * cols + col];
+static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv,
+                                        const FloatMV *tmp_mf,
+                                        const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+                                        int rows, int cols, int row, int col,
+                                        float alpha) {
+  const FloatMV tmp_mv = tmp_mf[row * cols + col];
   int idx_row, idx_col;
-  float avg_nb_mv[2] = { 0.0f, 0.0f };
-  MV mv = { 0, 0 };
+  FloatMV avg_nb_mv = { 0.0f, 0.0f };
+  FloatMV mv = { 0.0f, 0.0f };
   float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f },
                          { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f },
                          { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } };
-  int ref_row = row + search_mv.row;
-  int ref_col = col + search_mv.col;
-  ref_row = ref_row < 0 ? 0 : (ref_row >= rows ? rows - 1 : ref_row);
-  ref_col = ref_col < 0 ? 0 : (ref_col >= cols ? cols - 1 : ref_col);
   for (idx_row = 0; idx_row < 3; ++idx_row) {
     int nb_row = row + idx_row - 1;
     for (idx_col = 0; idx_col < 3; ++idx_col) {
       int nb_col = col + idx_col - 1;
       if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) {
-        avg_nb_mv[0] += (tmp_mv.row) * filter[idx_row][idx_col];
-        avg_nb_mv[1] += (tmp_mv.col) * filter[idx_row][idx_col];
+        avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col];
+        avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col];
       } else {
-        const MV nb_mv = tmp_mf[nb_row * cols + nb_col];
-        avg_nb_mv[0] += (nb_mv.row) * filter[idx_row][idx_col];
-        avg_nb_mv[1] += (nb_mv.col) * filter[idx_row][idx_col];
+        const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col];
+        avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col];
+        avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col];
       }
     }
   }
   {
     // M is the local variance of reference frame
-    float M00 = M[ref_row * cols + ref_col][0];
-    float M01 = M[ref_row * cols + ref_col][1];
-    float M10 = M[ref_row * cols + ref_col][2];
-    float M11 = M[ref_row * cols + ref_col][3];
+    float M00 = M[row * cols + col][0];
+    float M01 = M[row * cols + col][1];
+    float M10 = M[row * cols + col][2];
+    float M11 = M[row * cols + col][3];
 
     float det = (M00 + alpha) * (M11 + alpha) - M01 * M10;
 
@@ -319,36 +316,42 @@
     float inv_MM10 = inv_M10 * M00 + inv_M11 * M10;
     float inv_MM11 = inv_M10 * M01 + inv_M11 * M11;
 
-    mv.row = (int)(inv_M00 * avg_nb_mv[0] + inv_M01 * avg_nb_mv[1] +
-                   inv_MM00 * search_mv.row + inv_MM01 * search_mv.col);
-    mv.col = (int)(inv_M10 * avg_nb_mv[0] + inv_M11 * avg_nb_mv[1] +
-                   inv_MM10 * search_mv.row + inv_MM11 * search_mv.col);
+    mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha +
+             inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col;
+    mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha +
+             inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col;
   }
   return mv;
 }
 
-void vp9_get_smooth_motion_field(const MV *scaled_search_mf,
+void vp9_get_smooth_motion_field(const MV *search_mf,
                                  const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
-                                 int rows, int cols, float alpha, int num_iters,
-                                 MV *smooth_mf) {
-  // note: the scaled_search_mf and smooth_mf are all scaled by macroblock size
+                                 int rows, int cols, BLOCK_SIZE bsize,
+                                 float alpha, int num_iters, MV *smooth_mf) {
   // M is the local variation of reference frame
   // build two buffers
-  MV *input = (MV *)malloc(rows * cols * sizeof(MV));
-  MV *output = (MV *)malloc(rows * cols * sizeof(MV));
+  FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+  FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
   int idx;
   int row, col;
+  int bw = 4 << b_width_log2_lookup[bsize];
+  int bh = 4 << b_height_log2_lookup[bsize];
   // copy search results to input buffer
   for (idx = 0; idx < rows * cols; ++idx) {
-    input[idx] = scaled_search_mf[idx];
+    input[idx].row = (float)search_mf[idx].row / bh;
+    input[idx].col = (float)search_mf[idx].col / bw;
   }
   for (idx = 0; idx < num_iters; ++idx) {
-    MV *tmp;
+    FloatMV *tmp;
     for (row = 0; row < rows; ++row) {
       for (col = 0; col < cols; ++col) {
-        output[row * cols + col] =
-            get_smooth_motion_vector(scaled_search_mf[row * cols + col], input,
-                                     M, rows, cols, row, col, alpha);
+        // note: the scaled_search_mf and smooth_mf are all scaled by macroblock
+        // size
+        const MV search_mv = search_mf[row * cols + col];
+        FloatMV scaled_search_mv = { (float)search_mv.row / bh,
+                                     (float)search_mv.col / bw };
+        output[row * cols + col] = get_smooth_motion_vector(
+            scaled_search_mv, input, M, rows, cols, row, col, alpha);
       }
     }
     // swap buffers
@@ -358,55 +361,83 @@
   }
   // copy smoothed results to output
   for (idx = 0; idx < rows * cols; ++idx) {
-    smooth_mf[idx] = input[idx];
+    smooth_mf[idx].row = (int)(input[idx].row * bh);
+    smooth_mf[idx].col = (int)(input[idx].col * bw);
   }
   free(input);
   free(output);
 }
 
-void vp9_get_local_structure(const YV12_BUFFER_CONFIG *ref_frame,
-                             const vp9_variance_fn_ptr_t *fn_ptr, int mi_rows,
-                             int mi_cols, BLOCK_SIZE bsize,
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+                             const YV12_BUFFER_CONFIG *ref_frame,
+                             const MV *search_mf,
+                             const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+                             int cols, BLOCK_SIZE bsize,
                              int (*M)[MF_LOCAL_STRUCTURE_SIZE]) {
-  int stride = ref_frame->y_stride;
-  const int mi_height = num_8x8_blocks_high_lookup[bsize];
-  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
-  int cols = mi_size_to_block_size(mi_width, mi_cols);
-  int mi_row, mi_col;
-  for (mi_row = 0; mi_row < mi_rows; mi_row += mi_height) {
-    for (mi_col = 0; mi_col < mi_cols; mi_col += mi_width) {
-      const int mb_y_offset = mi_row * MI_SIZE * stride + mi_col * MI_SIZE;
-      int row = mi_row / mi_height;
-      int col = mi_col / mi_width;
-      uint8_t *center = ref_frame->y_buffer + mb_y_offset;
+  const int bw = 4 << b_width_log2_lookup[bsize];
+  const int bh = 4 << b_height_log2_lookup[bsize];
+  const int cur_stride = cur_frame->y_stride;
+  const int ref_stride = ref_frame->y_stride;
+  const int width = ref_frame->y_width;
+  const int height = ref_frame->y_height;
+  int row, col;
+  for (row = 0; row < rows; ++row) {
+    for (col = 0; col < cols; ++col) {
+      int cur_offset = row * bh * cur_stride + col * bw;
+      uint8_t *center = cur_frame->y_buffer + cur_offset;
+      int ref_h = row * bh + search_mf[row * cols + col].row;
+      int ref_w = col * bw + search_mf[row * cols + col].col;
+      int ref_offset;
+      uint8_t *target;
       uint8_t *nb;
+      int search_dist;
+      int nb_dist;
       int I_row = 0, I_col = 0;
+      // TODO(Dan): handle the case that when reference frame block beyond the
+      // boundary
+      ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h);
+      ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w);
+      // compute search results distortion
+      // TODO(Dan): maybe need to use vp9 function to find the reference block,
+      // to compare with the results of my python code, I first use my way to
+      // compute the reference block
+      ref_offset = ref_h * ref_stride + ref_w;
+      target = ref_frame->y_buffer + ref_offset;
+      search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride);
+      // compute target's neighbors' distortions
+      // TODO(Dan): if using padding, the boundary condition may vary
       // up
-      if (mi_row > 0) {
-        nb = center - MI_SIZE * stride * mi_height;
-        I_row += fn_ptr->sdf(center, stride, nb, stride);
+      if (ref_h - bh >= 0) {
+        nb = target - ref_stride * bh;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_row += nb_dist - search_dist;
       }
       // down
-      if (mi_row < mi_rows - 1) {
-        nb = center + MI_SIZE * stride * mi_height;
-        I_row += fn_ptr->sdf(center, stride, nb, stride);
+      if (ref_h + bh < height - bh) {
+        nb = target + ref_stride * bh;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_row += nb_dist - search_dist;
       }
-      if (mi_row > 0 && mi_row < mi_rows - 1) {
+      if (ref_h - bh >= 0 && ref_h + bh < height - bh) {
         I_row /= 2;
       }
+      I_row /= (bw * bh);
       // left
-      if (mi_col > 0) {
-        nb = center - MI_SIZE * mi_width;
-        I_col += fn_ptr->sdf(center, stride, nb, stride);
+      if (ref_w - bw >= 0) {
+        nb = target - bw;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_col += nb_dist - search_dist;
       }
-      // right
-      if (mi_col < mi_cols - 1) {
-        nb = center + MI_SIZE * mi_width;
-        I_col += fn_ptr->sdf(center, stride, nb, stride);
+      // down
+      if (ref_w + bw < width - bw) {
+        nb = target + bw;
+        nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+        I_col += nb_dist - search_dist;
       }
-      if (mi_col > 0 && mi_col < mi_cols - 1) {
+      if (ref_w - bw >= 0 && ref_w + bw < width - bw) {
         I_col /= 2;
       }
+      I_col /= (bw * bh);
       M[row * cols + col][0] = I_row * I_row;
       M[row * cols + col][1] = I_row * I_col;
       M[row * cols + col][2] = I_col * I_row;
--- a/vp9/encoder/vp9_non_greedy_mv.h
+++ b/vp9/encoder/vp9_non_greedy_mv.h
@@ -39,6 +39,10 @@
   MotionField (*motion_field_array)[3][SQUARE_BLOCK_SIZES];
 } MotionFieldInfo;
 
+typedef struct {
+  float row, col;
+} FloatMV;
+
 static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
   if (bsize == BLOCK_4X4) {
     return 0;
@@ -88,14 +92,15 @@
 
 void vp9_get_smooth_motion_field(const MV *search_mf,
                                  const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
-                                 int rows, int cols, float alpha, int num_iters,
-                                 MV *smooth_mf);
+                                 int rows, int cols, BLOCK_SIZE bize,
+                                 float alpha, int num_iters, MV *smooth_mf);
 
-void vp9_get_local_structure(const YV12_BUFFER_CONFIG *ref_frame,
-                             const vp9_variance_fn_ptr_t *fn_ptr, int mi_rows,
-                             int mi_cols, BLOCK_SIZE bsize,
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+                             const YV12_BUFFER_CONFIG *ref_frame,
+                             const MV *search_mf,
+                             const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+                             int cols, BLOCK_SIZE bsize,
                              int (*M)[MF_LOCAL_STRUCTURE_SIZE]);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif