ref: 7fd6a8f18699ff2da66315615f4ea15b722c8074
parent: 9e2229ec08940e3ec3668e4b535ea0be27ccaf97
parent: 0ad301e5b04951ecda115a2869475756760503bb
author: Dan Zhu <[email protected]>
date: Fri Aug 23 15:00:32 EDT 2019
Merge changes I13f59f52,I7441e041,I7441e041 * changes: add unit test for local structure computation add unit test for smooth motion field modify smooth model(float type mv + normalization)
--- a/test/non_greedy_mv_test.cc
+++ b/test/non_greedy_mv_test.cc
@@ -8,9 +8,193 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
-// #include "vp9/encoder/vp9_non_greedy_mv.h"
+#include "vp9/encoder/vp9_non_greedy_mv.h"
+#include "./vpx_dsp_rtcd.h"
namespace {
-TEST(non_greedy_mv, non_greedy_mv) { printf("Let's test non_greedy_mv"); }
+
+static void read_in_mf(const char *filename, int *rows_ptr, int *cols_ptr,
+ MV **buffer_ptr) {
+ FILE *input = fopen(filename, "rb");
+ int row, col;
+ int idx;
+
+ ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+ fscanf(input, "%d,%d\n", rows_ptr, cols_ptr);
+
+ *buffer_ptr = (MV *)malloc((*rows_ptr) * (*cols_ptr) * sizeof(MV));
+
+ for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) {
+ fscanf(input, "%d,%d;", &row, &col);
+ (*buffer_ptr)[idx].row = row;
+ (*buffer_ptr)[idx].col = col;
+ }
+ fclose(input);
+}
+
+static void read_in_local_var(const char *filename, int *rows_ptr,
+ int *cols_ptr,
+ int (**M_ptr)[MF_LOCAL_STRUCTURE_SIZE]) {
+ FILE *input = fopen(filename, "rb");
+ int M00, M01, M10, M11;
+ int idx;
+ int int_type;
+
+ ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+ fscanf(input, "%d,%d\n", rows_ptr, cols_ptr);
+
+ *M_ptr = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc(
+ (*rows_ptr) * (*cols_ptr) * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type));
+
+ for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) {
+ fscanf(input, "%d,%d,%d,%d;", &M00, &M01, &M10, &M11);
+ (*M_ptr)[idx][0] = M00;
+ (*M_ptr)[idx][1] = M01;
+ (*M_ptr)[idx][2] = M10;
+ (*M_ptr)[idx][3] = M11;
+ }
+ fclose(input);
+}
+
+static void compare_mf(const MV *mf1, const MV *mf2, int rows, int cols,
+ float *mean_ptr, float *std_ptr) {
+ float float_type;
+ float *diffs = (float *)malloc(rows * cols * sizeof(float_type));
+ int idx;
+ float accu = 0.0f;
+ for (idx = 0; idx < rows * cols; ++idx) {
+ MV mv1 = mf1[idx];
+ MV mv2 = mf2[idx];
+ float row_diff2 = (float)((mv1.row - mv2.row) * (mv1.row - mv2.row));
+ float col_diff2 = (float)((mv1.col - mv2.col) * (mv1.col - mv2.col));
+ diffs[idx] = sqrt(row_diff2 + col_diff2);
+ accu += diffs[idx];
+ }
+ *mean_ptr = accu / rows / cols;
+ *std_ptr = 0;
+ for (idx = 0; idx < rows * cols; ++idx) {
+ *std_ptr += (diffs[idx] - (*mean_ptr)) * (diffs[idx] - (*mean_ptr));
+ }
+ *std_ptr = sqrt(*std_ptr / rows / cols);
+ free(diffs);
+}
+
+static void load_frame_info(const char *filename,
+ YV12_BUFFER_CONFIG *ref_frame_ptr) {
+ FILE *input = fopen(filename, "rb");
+ int idx;
+ uint8_t data_type;
+
+ ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl;
+
+ fscanf(input, "%d,%d\n", &(ref_frame_ptr->y_height),
+ &(ref_frame_ptr->y_width));
+
+ ref_frame_ptr->y_buffer = (uint8_t *)malloc(
+ (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height) * sizeof(data_type));
+
+ for (idx = 0; idx < (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height);
+ ++idx) {
+ int value;
+ fscanf(input, "%d,", &value);
+ ref_frame_ptr->y_buffer[idx] = (uint8_t)value;
+ }
+
+ ref_frame_ptr->y_stride = ref_frame_ptr->y_width;
+ fclose(input);
+}
+
+static int compare_local_var(const int (*local_var1)[MF_LOCAL_STRUCTURE_SIZE],
+ const int (*local_var2)[MF_LOCAL_STRUCTURE_SIZE],
+ int rows, int cols) {
+ int diff = 0;
+ int outter_idx, inner_idx;
+ for (outter_idx = 0; outter_idx < rows * cols; ++outter_idx) {
+ for (inner_idx = 0; inner_idx < MF_LOCAL_STRUCTURE_SIZE; ++inner_idx) {
+ diff += abs(local_var1[outter_idx][inner_idx] -
+ local_var2[outter_idx][inner_idx]);
+ }
+ }
+ return diff / rows / cols;
+}
+
+TEST(non_greedy_mv, DISABLED_smooth_mf) {
+ const char *search_mf_file = "non_greedy_mv_test_files/exhaust_32x32.txt";
+ const char *local_var_file = "non_greedy_mv_test_files/localVar_32x32.txt";
+ const char *estimation_file = "non_greedy_mv_test_files/estimation_32x32.txt";
+ const char *ground_truth_file =
+ "non_greedy_mv_test_files/ground_truth_32x32.txt";
+ BLOCK_SIZE bsize = BLOCK_32X32;
+ MV *search_mf = NULL;
+ MV *smooth_mf = NULL;
+ MV *estimation = NULL;
+ MV *ground_truth = NULL;
+ int(*local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+ int rows = 0, cols = 0;
+
+ int alpha = 100, max_iter = 100;
+
+ read_in_mf(search_mf_file, &rows, &cols, &search_mf);
+ read_in_local_var(local_var_file, &rows, &cols, &local_var);
+ read_in_mf(estimation_file, &rows, &cols, &estimation);
+ read_in_mf(ground_truth_file, &rows, &cols, &ground_truth);
+
+ float sm_mean, sm_std;
+ float est_mean, est_std;
+
+ smooth_mf = (MV *)malloc(rows * cols * sizeof(MV));
+ vp9_get_smooth_motion_field(search_mf, local_var, rows, cols, bsize, alpha,
+ max_iter, smooth_mf);
+
+ compare_mf(smooth_mf, ground_truth, rows, cols, &sm_mean, &sm_std);
+ compare_mf(smooth_mf, estimation, rows, cols, &est_mean, &est_std);
+
+ EXPECT_LE(sm_mean, 3);
+ EXPECT_LE(est_mean, 2);
+
+ free(search_mf);
+ free(local_var);
+ free(estimation);
+ free(ground_truth);
+ free(smooth_mf);
+}
+
+TEST(non_greedy_mv, DISABLED_local_var) {
+ const char *ref_frame_file = "non_greedy_mv_test_files/ref_frame_32x32.txt";
+ const char *cur_frame_file = "non_greedy_mv_test_files/cur_frame_32x32.txt";
+ const char *gt_local_var_file = "non_greedy_mv_test_files/localVar_32x32.txt";
+ const char *search_mf_file = "non_greedy_mv_test_files/exhaust_32x32.txt";
+ BLOCK_SIZE bsize = BLOCK_32X32;
+ int(*gt_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+ int(*est_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL;
+ YV12_BUFFER_CONFIG ref_frame, cur_frame;
+ int rows, cols;
+ MV *search_mf;
+ int int_type;
+ int local_var_diff;
+ vp9_variance_fn_ptr_t fn;
+
+ load_frame_info(ref_frame_file, &ref_frame);
+ load_frame_info(cur_frame_file, &cur_frame);
+ read_in_mf(search_mf_file, &rows, &cols, &search_mf);
+
+ fn.sdf = vpx_sad32x32;
+ est_local_var = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc(
+ rows * cols * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type));
+ vp9_get_local_structure(&cur_frame, &ref_frame, search_mf, &fn, rows, cols,
+ bsize, est_local_var);
+ read_in_local_var(gt_local_var_file, &rows, &cols, >_local_var);
+
+ local_var_diff = compare_local_var(est_local_var, gt_local_var, rows, cols);
+
+ EXPECT_LE(local_var_diff, 1);
+
+ free(gt_local_var);
+ free(est_local_var);
+ free(ref_frame.y_buffer);
+}
} // namespace
--- a/vp9/encoder/vp9_non_greedy_mv.c
+++ b/vp9/encoder/vp9_non_greedy_mv.c
@@ -271,41 +271,38 @@
return 0;
}
-static MV get_smooth_motion_vector(const MV search_mv, const MV *tmp_mf,
- const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
- int rows, int cols, int row, int col,
- float alpha) {
- const MV tmp_mv = tmp_mf[row * cols + col];
+static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv,
+ const FloatMV *tmp_mf,
+ const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
+ int rows, int cols, int row, int col,
+ float alpha) {
+ const FloatMV tmp_mv = tmp_mf[row * cols + col];
int idx_row, idx_col;
- float avg_nb_mv[2] = { 0.0f, 0.0f };
- MV mv = { 0, 0 };
+ FloatMV avg_nb_mv = { 0.0f, 0.0f };
+ FloatMV mv = { 0.0f, 0.0f };
float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f },
{ 1.0f / 6.0f, 0.0f, 1.0f / 6.0f },
{ 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } };
- int ref_row = row + search_mv.row;
- int ref_col = col + search_mv.col;
- ref_row = ref_row < 0 ? 0 : (ref_row >= rows ? rows - 1 : ref_row);
- ref_col = ref_col < 0 ? 0 : (ref_col >= cols ? cols - 1 : ref_col);
for (idx_row = 0; idx_row < 3; ++idx_row) {
int nb_row = row + idx_row - 1;
for (idx_col = 0; idx_col < 3; ++idx_col) {
int nb_col = col + idx_col - 1;
if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) {
- avg_nb_mv[0] += (tmp_mv.row) * filter[idx_row][idx_col];
- avg_nb_mv[1] += (tmp_mv.col) * filter[idx_row][idx_col];
+ avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col];
+ avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col];
} else {
- const MV nb_mv = tmp_mf[nb_row * cols + nb_col];
- avg_nb_mv[0] += (nb_mv.row) * filter[idx_row][idx_col];
- avg_nb_mv[1] += (nb_mv.col) * filter[idx_row][idx_col];
+ const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col];
+ avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col];
+ avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col];
}
}
}
{
// M is the local variance of reference frame
- float M00 = M[ref_row * cols + ref_col][0];
- float M01 = M[ref_row * cols + ref_col][1];
- float M10 = M[ref_row * cols + ref_col][2];
- float M11 = M[ref_row * cols + ref_col][3];
+ float M00 = M[row * cols + col][0];
+ float M01 = M[row * cols + col][1];
+ float M10 = M[row * cols + col][2];
+ float M11 = M[row * cols + col][3];
float det = (M00 + alpha) * (M11 + alpha) - M01 * M10;
@@ -319,36 +316,42 @@
float inv_MM10 = inv_M10 * M00 + inv_M11 * M10;
float inv_MM11 = inv_M10 * M01 + inv_M11 * M11;
- mv.row = (int)(inv_M00 * avg_nb_mv[0] + inv_M01 * avg_nb_mv[1] +
- inv_MM00 * search_mv.row + inv_MM01 * search_mv.col);
- mv.col = (int)(inv_M10 * avg_nb_mv[0] + inv_M11 * avg_nb_mv[1] +
- inv_MM10 * search_mv.row + inv_MM11 * search_mv.col);
+ mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha +
+ inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col;
+ mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha +
+ inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col;
}
return mv;
}
-void vp9_get_smooth_motion_field(const MV *scaled_search_mf,
+void vp9_get_smooth_motion_field(const MV *search_mf,
const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
- int rows, int cols, float alpha, int num_iters,
- MV *smooth_mf) {
- // note: the scaled_search_mf and smooth_mf are all scaled by macroblock size
+ int rows, int cols, BLOCK_SIZE bsize,
+ float alpha, int num_iters, MV *smooth_mf) {
// M is the local variation of reference frame
// build two buffers
- MV *input = (MV *)malloc(rows * cols * sizeof(MV));
- MV *output = (MV *)malloc(rows * cols * sizeof(MV));
+ FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
+ FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV));
int idx;
int row, col;
+ int bw = 4 << b_width_log2_lookup[bsize];
+ int bh = 4 << b_height_log2_lookup[bsize];
// copy search results to input buffer
for (idx = 0; idx < rows * cols; ++idx) {
- input[idx] = scaled_search_mf[idx];
+ input[idx].row = (float)search_mf[idx].row / bh;
+ input[idx].col = (float)search_mf[idx].col / bw;
}
for (idx = 0; idx < num_iters; ++idx) {
- MV *tmp;
+ FloatMV *tmp;
for (row = 0; row < rows; ++row) {
for (col = 0; col < cols; ++col) {
- output[row * cols + col] =
- get_smooth_motion_vector(scaled_search_mf[row * cols + col], input,
- M, rows, cols, row, col, alpha);
+ // note: the scaled_search_mf and smooth_mf are all scaled by macroblock
+ // size
+ const MV search_mv = search_mf[row * cols + col];
+ FloatMV scaled_search_mv = { (float)search_mv.row / bh,
+ (float)search_mv.col / bw };
+ output[row * cols + col] = get_smooth_motion_vector(
+ scaled_search_mv, input, M, rows, cols, row, col, alpha);
}
}
// swap buffers
@@ -358,55 +361,83 @@
}
// copy smoothed results to output
for (idx = 0; idx < rows * cols; ++idx) {
- smooth_mf[idx] = input[idx];
+ smooth_mf[idx].row = (int)(input[idx].row * bh);
+ smooth_mf[idx].col = (int)(input[idx].col * bw);
}
free(input);
free(output);
}
-void vp9_get_local_structure(const YV12_BUFFER_CONFIG *ref_frame,
- const vp9_variance_fn_ptr_t *fn_ptr, int mi_rows,
- int mi_cols, BLOCK_SIZE bsize,
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+ const YV12_BUFFER_CONFIG *ref_frame,
+ const MV *search_mf,
+ const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+ int cols, BLOCK_SIZE bsize,
int (*M)[MF_LOCAL_STRUCTURE_SIZE]) {
- int stride = ref_frame->y_stride;
- const int mi_height = num_8x8_blocks_high_lookup[bsize];
- const int mi_width = num_8x8_blocks_wide_lookup[bsize];
- int cols = mi_size_to_block_size(mi_width, mi_cols);
- int mi_row, mi_col;
- for (mi_row = 0; mi_row < mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < mi_cols; mi_col += mi_width) {
- const int mb_y_offset = mi_row * MI_SIZE * stride + mi_col * MI_SIZE;
- int row = mi_row / mi_height;
- int col = mi_col / mi_width;
- uint8_t *center = ref_frame->y_buffer + mb_y_offset;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int cur_stride = cur_frame->y_stride;
+ const int ref_stride = ref_frame->y_stride;
+ const int width = ref_frame->y_width;
+ const int height = ref_frame->y_height;
+ int row, col;
+ for (row = 0; row < rows; ++row) {
+ for (col = 0; col < cols; ++col) {
+ int cur_offset = row * bh * cur_stride + col * bw;
+ uint8_t *center = cur_frame->y_buffer + cur_offset;
+ int ref_h = row * bh + search_mf[row * cols + col].row;
+ int ref_w = col * bw + search_mf[row * cols + col].col;
+ int ref_offset;
+ uint8_t *target;
uint8_t *nb;
+ int search_dist;
+ int nb_dist;
int I_row = 0, I_col = 0;
+ // TODO(Dan): handle the case that when reference frame block beyond the
+ // boundary
+ ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h);
+ ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w);
+ // compute search results distortion
+ // TODO(Dan): maybe need to use vp9 function to find the reference block,
+ // to compare with the results of my python code, I first use my way to
+ // compute the reference block
+ ref_offset = ref_h * ref_stride + ref_w;
+ target = ref_frame->y_buffer + ref_offset;
+ search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride);
+ // compute target's neighbors' distortions
+ // TODO(Dan): if using padding, the boundary condition may vary
// up
- if (mi_row > 0) {
- nb = center - MI_SIZE * stride * mi_height;
- I_row += fn_ptr->sdf(center, stride, nb, stride);
+ if (ref_h - bh >= 0) {
+ nb = target - ref_stride * bh;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_row += nb_dist - search_dist;
}
// down
- if (mi_row < mi_rows - 1) {
- nb = center + MI_SIZE * stride * mi_height;
- I_row += fn_ptr->sdf(center, stride, nb, stride);
+ if (ref_h + bh < height - bh) {
+ nb = target + ref_stride * bh;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_row += nb_dist - search_dist;
}
- if (mi_row > 0 && mi_row < mi_rows - 1) {
+ if (ref_h - bh >= 0 && ref_h + bh < height - bh) {
I_row /= 2;
}
+ I_row /= (bw * bh);
// left
- if (mi_col > 0) {
- nb = center - MI_SIZE * mi_width;
- I_col += fn_ptr->sdf(center, stride, nb, stride);
+ if (ref_w - bw >= 0) {
+ nb = target - bw;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_col += nb_dist - search_dist;
}
- // right
- if (mi_col < mi_cols - 1) {
- nb = center + MI_SIZE * mi_width;
- I_col += fn_ptr->sdf(center, stride, nb, stride);
+ // down
+ if (ref_w + bw < width - bw) {
+ nb = target + bw;
+ nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride);
+ I_col += nb_dist - search_dist;
}
- if (mi_col > 0 && mi_col < mi_cols - 1) {
+ if (ref_w - bw >= 0 && ref_w + bw < width - bw) {
I_col /= 2;
}
+ I_col /= (bw * bh);
M[row * cols + col][0] = I_row * I_row;
M[row * cols + col][1] = I_row * I_col;
M[row * cols + col][2] = I_col * I_row;
--- a/vp9/encoder/vp9_non_greedy_mv.h
+++ b/vp9/encoder/vp9_non_greedy_mv.h
@@ -39,6 +39,10 @@
MotionField (*motion_field_array)[3][SQUARE_BLOCK_SIZES];
} MotionFieldInfo;
+typedef struct {
+ float row, col;
+} FloatMV;
+
static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
if (bsize == BLOCK_4X4) {
return 0;
@@ -88,14 +92,15 @@
void vp9_get_smooth_motion_field(const MV *search_mf,
const int (*M)[MF_LOCAL_STRUCTURE_SIZE],
- int rows, int cols, float alpha, int num_iters,
- MV *smooth_mf);
+ int rows, int cols, BLOCK_SIZE bize,
+ float alpha, int num_iters, MV *smooth_mf);
-void vp9_get_local_structure(const YV12_BUFFER_CONFIG *ref_frame,
- const vp9_variance_fn_ptr_t *fn_ptr, int mi_rows,
- int mi_cols, BLOCK_SIZE bsize,
+void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame,
+ const YV12_BUFFER_CONFIG *ref_frame,
+ const MV *search_mf,
+ const vp9_variance_fn_ptr_t *fn_ptr, int rows,
+ int cols, BLOCK_SIZE bsize,
int (*M)[MF_LOCAL_STRUCTURE_SIZE]);
-
#ifdef __cplusplus
} // extern "C"
#endif