ref: 2d86965fee8636f30bd5575f2cf8f6b4f5bfd475
parent: 4788d620661a42ff99f63c53fbf9c8b95ba301ca
parent: a7aca1b5affedad769df671bff63f0999cda0e62
author: Angie Chiang <[email protected]>
date: Wed Sep 26 15:06:29 EDT 2018
Merge changes If96a8a1c,Iaf535fde,Icbde9880 * changes: Add vp9_full_pixel_diamond_new Add vp9_refining_search_sad_new Add vp9_diamond_search_sad_new
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1577,6 +1577,151 @@
return best_sad;
}
+#if CONFIG_NON_GREEDY_MV
+static double nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
+ double lambda) {
+ int i;
+ double best_cost = -1;
+ vpx_clear_system_state();
+ for (i = 0; i < NB_MVS_NUM; ++i) {
+ if (nb_mvs[i].as_int != INVALID_MV) {
+ MV nb_mv = nb_mvs[i].as_mv;
+ const double row_diff = mv->row - nb_mv.row;
+ const double col_diff = mv->col - nb_mv.col;
+ double cost = row_diff * row_diff + col_diff * col_diff;
+ cost = log2(1 + cost);
+ if (best_cost < 0) {
+ best_cost = cost;
+ } else {
+ best_cost = cost < best_cost ? cost : best_cost;
+ }
+ }
+ }
+ if (best_cost < 0) {
+ return 0;
+ } else {
+ return best_cost * lambda;
+ }
+}
+
+double vp9_diamond_search_sad_new(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const MV *init_full_mv, MV *best_full_mv,
+ int search_param, double lambda, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs) {
+ int i, j, step;
+
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ uint8_t *what = x->plane[0].src.buf;
+ const int what_stride = x->plane[0].src.stride;
+ const uint8_t *in_what;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *best_address;
+
+ double bestsad;
+ int best_site = -1;
+ int last_site = -1;
+
+ // search_param determines the length of the initial step and hence the number
+ // of iterations.
+ // 0 = initial step (MAX_FIRST_STEP) pel
+ // 1 = (MAX_FIRST_STEP/2) pel,
+ // 2 = (MAX_FIRST_STEP/4) pel...
+ // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
+ const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
+ const int tot_steps = cfg->total_steps - search_param;
+ vpx_clear_system_state();
+
+ *best_full_mv = *init_full_mv;
+ clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
+ x->mv_limits.row_min, x->mv_limits.row_max);
+ *num00 = 0;
+
+ // Work out the start point for the search
+ in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
+ best_full_mv->col;
+ best_address = in_what;
+
+ // Check the starting position
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
+ nb_mvs_inconsistency(best_full_mv, nb_full_mvs, lambda);
+
+ i = 0;
+
+ for (step = 0; step < tot_steps; step++) {
+ int all_in = 1, t;
+
+ // All_in is true if every one of the points we are checking are within
+ // the bounds of the image.
+ all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
+ all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
+ all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
+ all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
+
+ // If all the pixels are within the bounds we don't check whether the
+ // search point is valid in this loop, otherwise we check each point
+ // for validity..
+ if (all_in) {
+ unsigned int sad_array[4];
+
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
+ unsigned char const *block_offset[4];
+
+ for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
+
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+ sad_array);
+
+ for (t = 0; t < 4; t++, i++) {
+ if (sad_array[t] < bestsad) {
+ const MV this_mv = { best_full_mv->row + ss_mv[i].row,
+ best_full_mv->col + ss_mv[i].col };
+ double thissad = sad_array[t] + nb_mvs_inconsistency(
+ &this_mv, nb_full_mvs, lambda);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = i;
+ }
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ // Trap illegal vectors
+ const MV this_mv = { best_full_mv->row + ss_mv[i].row,
+ best_full_mv->col + ss_mv[i].col };
+
+ if (is_mv_in(&x->mv_limits, &this_mv)) {
+ const uint8_t *const check_here = ss_os[i] + best_address;
+ double thissad =
+ fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+
+ if (thissad < bestsad) {
+ thissad += nb_mvs_inconsistency(&this_mv, nb_full_mvs, lambda);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = i;
+ }
+ }
+ }
+ i++;
+ }
+ }
+ if (best_site != last_site) {
+ best_full_mv->row += ss_mv[best_site].row;
+ best_full_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
+ last_site = best_site;
+ } else if (best_address == in_what) {
+ (*num00)++;
+ }
+ }
+ return bestsad;
+}
+#endif // CONFIG_NON_GREEDY_MV
+
int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
@@ -1931,10 +2076,65 @@
return best_sad;
}
+#if CONFIG_NON_GREEDY_MV
// Runs sequence of diamond searches in smaller steps for RD.
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
+double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, double lambda,
+ int further_steps, int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, MV *dst_mv) {
+ MV temp_mv;
+ int n, num00 = 0;
+ double thissme;
+ double bestsme =
+ vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param, lambda, &n, fn_ptr, nb_full_mvs);
+ *dst_mv = temp_mv;
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps) do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+ if (num00) {
+ num00--;
+ } else {
+ thissme = vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param + n, lambda, &num00,
+ fn_ptr, nb_full_mvs);
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n) do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = temp_mv;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = *dst_mv;
+ thissme = vp9_refining_search_sad_new(x, &best_mv, lambda, search_range,
+ fn_ptr, nb_full_mvs);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = best_mv;
+ }
+ }
+ return bestsme;
+}
+#endif // CONFIG_NON_GREEDY_MV
+
+// Runs sequence of diamond searches in smaller steps for RD.
+/* do_refine: If last step (1-away) of n-step search doesn't pick the center
+ point as the best match, we will do a final 1-away diamond
+ refining search */
static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
int step_param, int sadpb, int further_steps,
int do_refine, int *cost_list,
@@ -2053,6 +2253,83 @@
}
return bestsme;
}
+
+#if CONFIG_NON_GREEDY_MV
+double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ double lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
+ double best_sad =
+ fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
+ nb_mvs_inconsistency(best_full_mv, nb_full_mvs, lambda);
+ int i, j;
+ vpx_clear_system_state();
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+ const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
+ ((best_full_mv->row + 1) < x->mv_limits.row_max) &
+ ((best_full_mv->col - 1) > x->mv_limits.col_min) &
+ ((best_full_mv->col + 1) < x->mv_limits.col_max);
+
+ if (all_in) {
+ unsigned int sads[4];
+ const uint8_t *const positions[4] = { best_address - in_what->stride,
+ best_address - 1, best_address + 1,
+ best_address + in_what->stride };
+
+ fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
+
+ for (j = 0; j < 4; ++j) {
+ double thissad = sads[j];
+ if (sads[j] < best_sad) {
+ const MV mv = { best_full_mv->row + neighbors[j].row,
+ best_full_mv->col + neighbors[j].col };
+
+ thissad += nb_mvs_inconsistency(&mv, nb_full_mvs, lambda);
+ if (thissad < best_sad) {
+ best_sad = thissad;
+ best_site = j;
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < 4; ++j) {
+ const MV mv = { best_full_mv->row + neighbors[j].row,
+ best_full_mv->col + neighbors[j].col };
+
+ if (is_mv_in(&x->mv_limits, &mv)) {
+ double thissad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ if (thissad < best_sad) {
+ thissad += nb_mvs_inconsistency(&mv, nb_full_mvs, lambda);
+ if (thissad < best_sad) {
+ best_sad = thissad;
+ best_site = j;
+ }
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ best_full_mv->row += neighbors[best_site].row;
+ best_full_mv->col += neighbors[best_site].col;
+ best_address = get_buf_from_mv(in_what, best_full_mv);
+ }
+ }
+
+ return best_sad;
+}
+#endif // CONFIG_NON_GREEDY_MV
int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
int search_range,
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -119,6 +119,19 @@
const MvLimits *umv_window_limits,
const MV *ref_mv);
+#if CONFIG_NON_GREEDY_MV
+#define NB_MVS_NUM 4
+double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ double lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs);
+
+double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, double lambda,
+ int further_steps, int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, MV *dst_mv);
+#endif // CONFIG_NON_GREEDY_MV
#ifdef __cplusplus
} // extern "C"
#endif