ref: 4e9c0d2ad4f97c096e69bbafb70dcb578b97e494
parent: 9ed23de13f0ddfebea2a7ff6caa370126dd84979
author: Deb Mukherjee <[email protected]>
date: Wed Sep 24 09:25:34 EDT 2014
Adds two new subpel search methods One is a more aggressive version of the pruned subpel tree search where only a single halfpel candidate is searched. The search candidate is based on a surface fit result. The other is a method to obtain the subpel position at one shot based on the same surface fit. The methods have not been deployed in any speed setting yet. Change-Id: I34fef3f2e34f11396c9d1ba97f4be8c4ffca62d3
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -286,7 +286,71 @@
bestmv->row *= 8; \
bestmv->col *= 8;
-int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
+#if CONFIG_VP9_HIGHBITDEPTH
+#define SETUP_CENTER_ERROR \
+ if (second_pred != NULL) { \
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \
+ vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \
+ y_stride); \
+ besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \
+ sse1); \
+ } else { \
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
+ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
+ } \
+ } else { \
+ besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
+ } \
+ *distortion = besterr; \
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+#else
+
+#define SETUP_CENTER_ERROR \
+ if (second_pred != NULL) { \
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
+ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \
+ } else { \
+ besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \
+ } \
+ *distortion = besterr; \
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+
+
+
+static INLINE int divide_and_round(const int n, const int d) {
+ return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
+}
+
+static INLINE int is_sad_list_wellbehaved(int *sad_list) {
+ return sad_list[0] >= sad_list[1] &&
+ sad_list[0] >= sad_list[2] &&
+ sad_list[0] >= sad_list[3] &&
+ sad_list[0] >= sad_list[4];
+}
+
+// Returns surface minima estimate at given precision in 1/2^n bits.
+// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
+// For a given set of costs S0, S1, S2, S3, S4 at points
+// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
+// the solution for the location of the minima (x0, y0) is given by:
+// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
+// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
+// The code below is an integerized version of that.
+static void get_cost_surf_min(int *sad_list, int *ir, int *ic,
+ int bits) {
+ *ic = divide_and_round((sad_list[1] - sad_list[3]) * (1 << (bits - 1)),
+ (sad_list[1] - 2 * sad_list[0] + sad_list[3]));
+ *ir = divide_and_round((sad_list[4] - sad_list[2]) * (1 << (bits - 1)),
+ (sad_list[4] - 2 * sad_list[0] + sad_list[2]));
+}
+
+int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
@@ -300,30 +364,127 @@
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
- if (second_pred != NULL) {
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
- vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
- y_stride);
- besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
- sse1);
- } else {
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
+ SETUP_CENTER_ERROR;
+ (void) halfiters;
+ (void) quarteriters;
+ (void) eighthiters;
+ (void) whichdir;
+ (void) allow_hp;
+ (void) forced_stop;
+ (void) hstep;
+
+ if (sad_list &&
+ sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
+ sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
+ sad_list[4] != INT_MAX &&
+ is_sad_list_wellbehaved(sad_list)) {
+ int ir, ic;
+ unsigned int minpt;
+ get_cost_surf_min(sad_list, &ir, &ic, 3);
+ if (ir != 0 || ic != 0) {
+ CHECK_BETTER(minpt, tr + ir, tc + ic);
}
-#else
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
-#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *sad_list,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
+ SETUP_SUBPEL_SEARCH;
+ SETUP_CENTER_ERROR;
+ if (sad_list &&
+ sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
+ sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
+ sad_list[4] != INT_MAX &&
+ is_sad_list_wellbehaved(sad_list)) {
+ unsigned int minpt;
+ int ir, ic;
+ get_cost_surf_min(sad_list, &ir, &ic, 1);
+ if (ir != 0 || ic != 0) {
+ CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
+ }
} else {
- besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
}
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ tr = br;
+ tc = bc;
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void) tr;
+ (void) tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *sad_list,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
+ SETUP_SUBPEL_SEARCH;
+ SETUP_CENTER_ERROR;
if (sad_list &&
sad_list[0] != INT_MAX && sad_list[1] != INT_MAX &&
sad_list[2] != INT_MAX && sad_list[3] != INT_MAX &&
@@ -415,29 +576,7 @@
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
- if (second_pred != NULL) {
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);
- vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
- y_stride);
- besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,
- sse1);
- } else {
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
- }
-#else
- DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
- besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- } else {
- besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
- }
- *distortion = besterr;
- besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ SETUP_CENTER_ERROR;
(void) sad_list; // to silence compiler warning
// Each subsequent iteration checks at least one point in
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -108,6 +108,8 @@
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -421,6 +421,10 @@
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
+ } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more;
+ } else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit;
}
cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -79,6 +79,8 @@
typedef enum {
SUBPEL_TREE = 0,
SUBPEL_TREE_PRUNED = 1,
+ SUBPEL_TREE_PRUNED_MORE = 2,
+ SUBPEL_SURFACE_FIT = 3,
// Other methods to come
} SUBPEL_SEARCH_METHODS;