shithub: libvpx

Download patch

ref: cde6b50cb15947be71014c011824f962e30bb6f6
parent: 1d34991139e66f25a648e53fccef0df9b563f86b
parent: 360c80cfb95559b3a49fc4624a24dee01e1abb6a
author: Yunqing Wang <[email protected]>
date: Thu Feb 20 07:07:10 EST 2014

Merge "Use fast HEX search in real time mode"

--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -855,6 +855,184 @@
                             square_num_candidates, square_candidates);
 };
 
+// Number of candidates in first hex search
+#define FIRST_HEX_CANDIDATES 6
+// Index of previous hex search's best match
+#define PRE_BEST_CANDIDATE 6
+// Number of candidates in following hex search
+#define NEXT_HEX_CANDIDATES 3
+// Number of candidates in refining search
+#define REFINE_CANDIDATES 4
+
+int vp9_fast_hex_search(const MACROBLOCK *x,
+                        MV *ref_mv,
+                        int search_param,
+                        int sad_per_bit,
+                        const vp9_variance_fn_ptr_t *vfp,
+                        int use_mvcost,
+                        const MV *center_mv,
+                        MV *best_mv) {
+  const MACROBLOCKD* const xd = &x->e_mbd;
+  static const MV hex[FIRST_HEX_CANDIDATES] = {
+    { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}
+  };
+  static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = {
+    {{ -2, 0}, { -1, -2}, {1, -2}},
+    {{ -1, -2}, {1, -2}, {2, 0}},
+    {{1, -2}, {2, 0}, {1, 2}},
+    {{2, 0}, {1, 2}, { -1, 2}},
+    {{1, 2}, { -1, 2}, { -2, 0}},
+    {{ -1, 2}, { -2, 0}, { -1, -2}}
+  };
+  static const MV neighbors[REFINE_CANDIDATES] = {
+      {0, -1}, { -1, 0}, {1, 0}, {0, 1}
+  };
+  int i, j;
+
+  const uint8_t *what = x->plane[0].src.buf;
+  const int what_stride = x->plane[0].src.stride;
+  const int in_what_stride = xd->plane[0].pre[0].stride;
+  int br, bc;
+  MV this_mv;
+  unsigned int bestsad = 0x7fffffff;
+  unsigned int thissad;
+  const uint8_t *base_offset;
+  const uint8_t *this_offset;
+  int k = -1;
+  int best_site = -1;
+  const int max_hex_search = 512;
+  const int max_dia_search = 32;
+
+  const int *mvjsadcost = x->nmvjointsadcost;
+  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+
+  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+
+  // Adjust ref_mv to make sure it is within MV range
+  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+  br = ref_mv->row;
+  bc = ref_mv->col;
+
+  // Check the start point
+  base_offset = xd->plane[0].pre[0].buf;
+  this_offset = base_offset + (br * in_what_stride) + bc;
+  this_mv.row = br;
+  this_mv.col = bc;
+  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
+            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                             sad_per_bit);
+
+  // Initial 6-point hex search
+  if (check_bounds(x, br, bc, 2)) {
+    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+      this_mv.row = br + hex[i].row;
+      this_mv.col = bc + hex[i].col;
+      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                         bestsad);
+      CHECK_BETTER
+    }
+  } else {
+    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
+      this_mv.row = br + hex[i].row;
+      this_mv.col = bc + hex[i].col;
+      if (!is_mv_in(x, &this_mv))
+        continue;
+      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
+      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                         bestsad);
+      CHECK_BETTER
+    }
+  }
+
+  // Continue hex search if we find a better match in first round
+  if (best_site != -1) {
+    br += hex[best_site].row;
+    bc += hex[best_site].col;
+    k = best_site;
+
+    // Allow search covering maximum MV range
+    for (j = 1; j < max_hex_search; j++) {
+      best_site = -1;
+
+      if (check_bounds(x, br, bc, 2)) {
+        for (i = 0; i < 3; i++) {
+          this_mv.row = br + next_chkpts[k][i].row;
+          this_mv.col = bc + next_chkpts[k][i].col;
+          this_offset = base_offset + (this_mv.row * in_what_stride) +
+              this_mv.col;
+          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                             bestsad);
+          CHECK_BETTER
+        }
+      } else {
+        for (i = 0; i < 3; i++) {
+          this_mv.row = br + next_chkpts[k][i].row;
+          this_mv.col = bc + next_chkpts[k][i].col;
+          if (!is_mv_in(x, &this_mv))
+            continue;
+          this_offset = base_offset + (this_mv.row * in_what_stride) +
+              this_mv.col;
+          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                             bestsad);
+          CHECK_BETTER
+        }
+      }
+
+      if (best_site == -1) {
+        break;
+      } else {
+        br += next_chkpts[k][best_site].row;
+        bc += next_chkpts[k][best_site].col;
+        k += 5 + best_site;
+        if (k >= 12) k -= 12;
+        else if (k >= 6) k -= 6;
+      }
+    }
+  }
+
+  // Check 4 1-away neighbors
+  for (j = 0; j < max_dia_search; j++) {
+    best_site = -1;
+
+    if (check_bounds(x, br, bc, 1)) {
+      for (i = 0; i < REFINE_CANDIDATES; i++) {
+        this_mv.row = br + neighbors[i].row;
+        this_mv.col = bc + neighbors[i].col;
+        this_offset = base_offset + (this_mv.row * in_what_stride) +
+            this_mv.col;
+        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                           bestsad);
+        CHECK_BETTER
+      }
+    } else {
+      for (i = 0; i < REFINE_CANDIDATES; i++) {
+        this_mv.row = br + neighbors[i].row;
+        this_mv.col = bc + neighbors[i].col;
+        if (!is_mv_in(x, &this_mv))
+          continue;
+        this_offset = base_offset + (this_mv.row * in_what_stride) +
+            this_mv.col;
+        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
+                           bestsad);
+        CHECK_BETTER
+      }
+    }
+
+    if (best_site == -1) {
+      break;
+    } else {
+      br += neighbors[best_site].row;
+      bc += neighbors[best_site].col;
+    }
+  }
+
+  best_mv->row = br;
+  best_mv->col = bc;
+
+  return bestsad;
+}
+
 #undef CHECK_BETTER
 
 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -75,6 +75,14 @@
                       int use_mvcost,
                       const MV *center_mv,
                       MV *best_mv);
+int vp9_fast_hex_search(const MACROBLOCK *x,
+                        MV *ref_mv,
+                        int search_param,
+                        int sad_per_bit,
+                        const vp9_variance_fn_ptr_t *vfp,
+                        int use_mvcost,
+                        const MV *center_mv,
+                        MV *best_mv);
 
 typedef int (fractional_mv_step_fp) (
     const MACROBLOCK *x,
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -857,6 +857,8 @@
     }
     sf->frame_parameter_update = 0;
     sf->encode_breakout_thresh = 1000;
+
+    sf->search_method = FAST_HEX;
   }
   if (speed >= 6) {
     sf->always_this_block_size = BLOCK_16X16;
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -136,7 +136,8 @@
   NSTEP = 1,
   HEX = 2,
   BIGDIA = 3,
-  SQUARE = 4
+  SQUARE = 4,
+  FAST_HEX = 5
 } SEARCH_METHODS;
 
 typedef enum {
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2448,7 +2448,11 @@
   // Further step/diamond searches as necessary
   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-  if (cpi->sf.search_method == HEX) {
+  if (cpi->sf.search_method == FAST_HEX) {
+    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+                                  &cpi->fn_ptr[bsize], 1,
+                                  &ref_mv.as_mv, &tmp_mv->as_mv);
+  } else if (cpi->sf.search_method == HEX) {
     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
                              &cpi->fn_ptr[bsize], 1,
                              &ref_mv.as_mv, &tmp_mv->as_mv);