ref: 2158909fc3a7fa5f03eeaac0790235ce94fc9fa3
parent: 9e3bcdd135da34000697dfc175aab6e3c5862a2a
parent: 1ba91a84ad1e1d2486e2aa68822ed147084cd4c8
author: Deb Mukherjee <[email protected]>
date: Thu Aug 8 08:26:55 EDT 2013
Merge "Adds a new subpel motion function"
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -245,6 +245,71 @@
}, \
v = INT_MAX;)
+#define FIRST_LEVEL_CHECKS \
+ { \
+ unsigned int left, right, up, down, diag; \
+ CHECK_BETTER(left, tr, tc - hstep); \
+ CHECK_BETTER(right, tr, tc + hstep); \
+ CHECK_BETTER(up, tr - hstep, tc); \
+ CHECK_BETTER(down, tr + hstep, tc); \
+ whichdir = (left < right ? 0 : 1) + \
+ (up < down ? 0 : 2); \
+ switch (whichdir) { \
+ case 0: \
+ CHECK_BETTER(diag, tr - hstep, tc - hstep); \
+ break; \
+ case 1: \
+ CHECK_BETTER(diag, tr - hstep, tc + hstep); \
+ break; \
+ case 2: \
+ CHECK_BETTER(diag, tr + hstep, tc - hstep); \
+ break; \
+ case 3: \
+ CHECK_BETTER(diag, tr + hstep, tc + hstep); \
+ break; \
+ } \
+ }
+
+#define SECOND_LEVEL_CHECKS \
+ { \
+ int kr, kc; \
+ unsigned int second; \
+ if (tr != br && tc != bc) { \
+ kr = br - tr; \
+ kc = bc - tc; \
+ CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
+ CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
+ } else if (tr == br && tc != bc) { \
+ kc = bc - tc; \
+ CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
+ CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
+ switch (whichdir) { \
+ case 0: \
+ case 1: \
+ CHECK_BETTER(second, tr + hstep, tc + kc); \
+ break; \
+ case 2: \
+ case 3: \
+ CHECK_BETTER(second, tr - hstep, tc + kc); \
+ break; \
+ } \
+ } else if (tr != br && tc == bc) { \
+ kr = br - tr; \
+ CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
+ CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
+ switch (whichdir) { \
+ case 0: \
+ case 2: \
+ CHECK_BETTER(second, tr + kr, tc + hstep); \
+ break; \
+ case 1: \
+ case 3: \
+ CHECK_BETTER(second, tr + kr, tc - hstep); \
+ break; \
+ } \
+ } \
+ }
+
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
@@ -261,7 +326,6 @@
int rr, rc, br, bc, hstep;
int tr, tc;
unsigned int besterr = INT_MAX;
- unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
unsigned int halfiters = iters_per_step;
@@ -306,32 +370,10 @@
// common with the last iteration could be 2 ( if diag selected)
while (halfiters--) {
// 1/2 pel
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
@@ -343,32 +385,10 @@
if (forced_stop != 2) {
hstep >>= 1;
while (quarteriters--) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
@@ -378,32 +398,10 @@
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
@@ -419,6 +417,105 @@
return besterr;
}
+int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1) {
+ uint8_t *z = x->plane[0].src.buf;
+ int src_stride = x->plane[0].src.stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int rr, rc, br, bc, hstep;
+ int tr, tc;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int whichdir;
+ int thismse;
+ int maxc, minc, maxr, minr;
+ int y_stride;
+ int offset;
+ unsigned int halfiters = iters_per_step;
+ unsigned int quarteriters = iters_per_step;
+ unsigned int eighthiters = iters_per_step;
+
+ uint8_t *y = xd->plane[0].pre[0].buf +
+ (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
+ bestmv->as_mv.col;
+
+ y_stride = xd->plane[0].pre[0].stride;
+
+ rr = ref_mv->as_mv.row;
+ rc = ref_mv->as_mv.col;
+ br = bestmv->as_mv.row << 3;
+ bc = bestmv->as_mv.col << 3;
+ hstep = 4;
+ minc = MAX(x->mv_col_min << 3,
+ (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
+ maxc = MIN(x->mv_col_max << 3,
+ (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
+ minr = MAX(x->mv_row_min << 3,
+ (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
+ maxr = MIN(x->mv_row_max << 3,
+ (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
+
+ tr = br;
+ tc = bc;
+
+ offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+ // central mv
+ bestmv->as_mv.row <<= 3;
+ bestmv->as_mv.col <<= 3;
+
+ // calculate central point error
+ besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ // 1/2 pel
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ bestmv->as_mv.row = br;
+ bestmv->as_mv.col = bc;
+
+ if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
@@ -443,7 +540,6 @@
int rr, rc, br, bc, hstep;
int tr, tc;
unsigned int besterr = INT_MAX;
- unsigned int left, right, up, down, diag;
unsigned int sse;
unsigned int whichdir;
unsigned int halfiters = iters_per_step;
@@ -478,7 +574,6 @@
tr = br;
tc = bc;
-
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
// central mv
@@ -497,32 +592,10 @@
// common with the last iteration could be 2 ( if diag selected)
while (halfiters--) {
// 1/2 pel
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
@@ -534,32 +607,10 @@
if (forced_stop != 2) {
hstep >>= 1;
while (quarteriters--) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
@@ -569,35 +620,123 @@
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
- CHECK_BETTER(left, tr, tc - hstep);
- CHECK_BETTER(right, tr, tc + hstep);
- CHECK_BETTER(up, tr - hstep, tc);
- CHECK_BETTER(down, tr + hstep, tc);
-
- whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
- switch (whichdir) {
- case 0:
- CHECK_BETTER(diag, tr - hstep, tc - hstep);
- break;
- case 1:
- CHECK_BETTER(diag, tr - hstep, tc + hstep);
- break;
- case 2:
- CHECK_BETTER(diag, tr + hstep, tc - hstep);
- break;
- case 3:
- CHECK_BETTER(diag, tr + hstep, tc + hstep);
- break;
- }
-
+ FIRST_LEVEL_CHECKS;
// no reason to check the same one again.
if (tr == br && tc == bc)
break;
-
tr = br;
tc = bc;
}
+ }
+ bestmv->as_mv.row = br;
+ bestmv->as_mv.col = bc;
+
+ if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
+ uint8_t *z = x->plane[0].src.buf;
+ int src_stride = x->plane[0].src.stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int rr, rc, br, bc, hstep;
+ int tr, tc;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int whichdir;
+ int thismse;
+ int maxc, minc, maxr, minr;
+ int y_stride;
+ int offset;
+ unsigned int halfiters = iters_per_step;
+ unsigned int quarteriters = iters_per_step;
+ unsigned int eighthiters = iters_per_step;
+
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
+ uint8_t *y = xd->plane[0].pre[0].buf +
+ (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
+ bestmv->as_mv.col;
+
+ y_stride = xd->plane[0].pre[0].stride;
+
+ rr = ref_mv->as_mv.row;
+ rc = ref_mv->as_mv.col;
+ br = bestmv->as_mv.row << 3;
+ bc = bestmv->as_mv.col << 3;
+ hstep = 4;
+ minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
+ ((1 << MV_MAX_BITS) - 1));
+ maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
+ ((1 << MV_MAX_BITS) - 1));
+ minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
+ ((1 << MV_MAX_BITS) - 1));
+ maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
+ ((1 << MV_MAX_BITS) - 1));
+
+ tr = br;
+ tc = bc;
+
+
+ offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+
+ // central mv
+ bestmv->as_mv.row <<= 3;
+ bestmv->as_mv.col <<= 3;
+
+ // calculate central point error
+ // TODO(yunqingwang): central pointer error was already calculated in full-
+ // pixel search, and can be passed in this function.
+ comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ // Each subsequent iteration checks at least one point in
+ // common with the last iteration could be 2 ( if diag selected)
+ // 1/2 pel
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
}
bestmv->as_mv.row = br;
bestmv->as_mv.col = bc;
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -80,7 +80,22 @@
int *distortion,
unsigned int *sse);
extern fractional_mv_step_fp vp9_find_best_sub_pixel_iterative;
+extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
+typedef int (fractional_mv_step_comp_fp) (
+ MACROBLOCK *x,
+ int_mv *bestmv, int_mv *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1,
+ const uint8_t *second_pred,
+ int w, int h);
+extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_iterative;
+extern fractional_mv_step_comp_fp vp9_find_best_sub_pixel_comp_tree;
+
typedef int (*vp9_full_search_fn_t)(MACROBLOCK *x,
int_mv *ref_mv, int sad_per_bit,
int distance, vp9_variance_fn_ptr_t *fn_ptr,
@@ -101,18 +116,6 @@
vp9_variance_fn_ptr_t *fn_ptr,
int *mvjcost, int *mvcost[2],
int_mv *center_mv);
-
-int vp9_find_best_sub_pixel_comp_iterative(
- MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
- int iters_per_step,
- int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred,
- int w, int h);
int vp9_refining_search_8p_c(MACROBLOCK *x,
int_mv *ref_mv, int error_per_bit,
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -709,8 +709,8 @@
sf->search_method = NSTEP;
sf->auto_filter = 1;
sf->recode_loop = 1;
- sf->subpel_search_method = SUBPEL_ITERATIVE;
- sf->subpel_iters_per_step = 3;
+ sf->subpel_search_method = SUBPEL_TREE;
+ sf->subpel_iters_per_step = 2;
sf->optimize_coefficients = !cpi->oxcf.lossless;
sf->reduce_first_step_size = 0;
sf->auto_mv_step_size = 0;
@@ -829,7 +829,7 @@
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
sf->auto_mv_step_size = 1;
sf->search_method = SQUARE;
- sf->subpel_iters_per_step = 2;
+ sf->subpel_iters_per_step = 1;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZE_TYPES;
@@ -923,9 +923,10 @@
if (cpi->sf.subpel_search_method == SUBPEL_ITERATIVE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_iterative;
- } else {
- // TODO(debargha): Other methods to come
- assert(0);
+ cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_iterative;
+ } else if (cpi->sf.subpel_search_method == SUBPEL_TREE) {
+ cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
+ cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
}
cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -234,6 +234,7 @@
typedef enum {
SUBPEL_ITERATIVE = 0,
+ SUBPEL_TREE = 1,
// Other methods to come
} SUBPEL_SEARCH_METHODS;
@@ -534,6 +535,7 @@
unsigned int active_map_enabled;
fractional_mv_step_fp *find_fractional_mv_step;
+ fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
vp9_full_search_fn_t full_search_sad;
vp9_refining_search_fn_t refining_search_sad;
vp9_diamond_search_fn_t diamond_search_sad;
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2663,7 +2663,7 @@
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- bestsme = vp9_find_best_sub_pixel_comp_iterative(
+ bestsme = cpi->find_fractional_mv_step_comp(
x, &tmp_mv,
&ref_mv[id],
x->errorperbit,