shithub: libvpx

Download patch

ref: 954dbb282b5533ad03df24f504bcecb323a0a1a8
parent: 9d8703d5f98eaa16fe7601ab57e3e566ea9c6b2b
parent: e39ecfaa9843735df3a415cfb9ea6685ea4e1187
author: Deb Mukherjee <[email protected]>
date: Thu Mar 13 00:09:13 EDT 2014

Merge "Preliminary code for variance based paritioning"

--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -150,6 +150,446 @@
     return BLOCK_16X16;
 }
 
+// Lighter version of set_offsets that only sets the mode info
+// pointers.
+static inline void set_modeinfo_offsets(VP9_COMMON *const cm,
+                                        MACROBLOCKD *const xd,
+                                        int mi_row,
+                                        int mi_col) {
+  const int idx_str = xd->mode_info_stride * mi_row + mi_col;
+  xd->mi_8x8 = cm->mi_grid_visible + idx_str;
+  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+  // xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
+  xd->mi_8x8[0] = cm->mi + idx_str;
+}
+
+static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col,
+                              BLOCK_SIZE bsize) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int mb_rows = cm->mb_rows;
+  const int mb_cols = cm->mb_cols;
+  const int mb_row = mi_row >> 1;
+  const int mb_col = mi_col >> 1;
+  const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1;
+  const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1;
+  int r, c;
+  if (bsize <= BLOCK_16X16) {
+    return cpi->active_map[mb_row * mb_cols + mb_col];
+  }
+  for (r = 0; r < mb_height; ++r) {
+    for (c = 0; c < mb_width; ++c) {
+      int row = mb_row + r;
+      int col = mb_col + c;
+      if (row >= mb_rows || col >= mb_cols)
+        continue;
+      if (cpi->active_map[row * mb_cols + col])
+        return 1;
+    }
+  }
+  return 0;
+}
+
+static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
+                        int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi;
+  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+  const int mi_height = num_8x8_blocks_high_lookup[bsize];
+  const int mb_row = mi_row >> 1;
+  const int mb_col = mi_col >> 1;
+  const int idx_map = mb_row * cm->mb_cols + mb_col;
+  const struct segmentation *const seg = &cm->seg;
+
+  set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
+
+  // Activity map pointer
+  x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
+
+  if (cpi->active_map_enabled && !x->e_mbd.lossless) {
+    x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
+  } else {
+    x->in_active_map = 1;
+  }
+
+  set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+
+  mbmi = &xd->mi_8x8[0]->mbmi;
+
+  // Set up destination pointers.
+  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+
+  // Set up limit values for MV components.
+  // Mv beyond the range do not produce new/different prediction block.
+  x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
+  x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
+  x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
+  x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
+
+  // Set up distance of MB to edge of frame in 1/8th pel units.
+  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
+                 cm->mi_rows, cm->mi_cols);
+
+  // Set up source buffers.
+  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
+  // R/D setup.
+  x->rddiv = cpi->RDDIV;
+  x->rdmult = cpi->RDMULT;
+
+  // Setup segment ID.
+  if (seg->enabled) {
+    if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
+      const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+                                                 : cm->last_frame_seg_map;
+      mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+    }
+    vp9_init_plane_quantizers(cpi, x);
+
+    if (seg->enabled && cpi->seg0_cnt > 0 &&
+        !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) &&
+        vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
+      cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
+    } else {
+      const int y = mb_row & ~3;
+      const int x = mb_col & ~3;
+      const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
+      const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
+      const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1;
+      const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1;
+
+      cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
+          << 16) / cm->MBs;
+    }
+
+    x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+  } else {
+    mbmi->segment_id = 0;
+    x->encode_breakout = cpi->encode_breakout;
+  }
+}
+
+static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm,
+                                     MACROBLOCKD *const xd,
+                                     int mi_row,
+                                     int mi_col,
+                                     BLOCK_SIZE bsize) {
+  const int block_width = num_8x8_blocks_wide_lookup[bsize];
+  const int block_height = num_8x8_blocks_high_lookup[bsize];
+  const int mis = xd->mode_info_stride;
+  int i, j;
+  for (j = 0; j < block_height; ++j)
+    for (i = 0; i < block_width; ++i) {
+      if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols)
+        xd->mi_8x8[j * mis + i] = xd->mi_8x8[0];
+    }
+}
+
+static void set_block_size(VP9_COMP * const cpi,
+                           const TileInfo *const tile,
+                           int mi_row, int mi_col,
+                           BLOCK_SIZE bsize) {
+  if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
+    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+    set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col);
+    xd->mi_8x8[0]->mbmi.sb_type = bsize;
+    duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
+  }
+}
+
+typedef struct {
+  int64_t sum_square_error;
+  int64_t sum_error;
+  int count;
+  int variance;
+} var;
+
+typedef struct {
+  var none;
+  var horz[2];
+  var vert[2];
+} partition_variance;
+
+typedef struct {
+  partition_variance part_variances;
+  var split[4];
+} v8x8;
+
+typedef struct {
+  partition_variance part_variances;
+  v8x8 split[4];
+} v16x16;
+
+typedef struct {
+  partition_variance part_variances;
+  v16x16 split[4];
+} v32x32;
+
+typedef struct {
+  partition_variance part_variances;
+  v32x32 split[4];
+} v64x64;
+
+typedef struct {
+  partition_variance *part_variances;
+  var *split[4];
+} variance_node;
+
+typedef enum {
+  V16X16,
+  V32X32,
+  V64X64,
+} TREE_LEVEL;
+
+static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
+  int i;
+  switch (bsize) {
+    case BLOCK_64X64: {
+      v64x64 *vt = (v64x64 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_32X32: {
+      v32x32 *vt = (v32x32 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_16X16: {
+      v16x16 *vt = (v16x16 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_8X8: {
+      v8x8 *vt = (v8x8 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i];
+      break;
+    }
+    default: {
+      assert(0);
+    }
+  }
+}
+
+// Set variance values given sum square error, sum error, count.
+static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
+  v->sum_square_error = s2;
+  v->sum_error = s;
+  v->count = c;
+  if (c > 0)
+    v->variance = (int)(256 *
+                        (v->sum_square_error - v->sum_error * v->sum_error /
+                         v->count) / v->count);
+  else
+    v->variance = 0;
+}
+
+void sum_2_variances(const var *a, const var *b, var *r) {
+  fill_variance(a->sum_square_error + b->sum_square_error,
+                a->sum_error + b->sum_error, a->count + b->count, r);
+}
+
+static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
+  variance_node node;
+  tree_to_node(data, bsize, &node);
+  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
+  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
+  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
+  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
+  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
+                  &node.part_variances->none);
+}
+
+static int set_vt_partitioning(VP9_COMP *cpi,
+                               void *data,
+                               const TileInfo *const tile,
+                               BLOCK_SIZE bsize,
+                               int mi_row,
+                               int mi_col,
+                               int mi_size) {
+  VP9_COMMON * const cm = &cpi->common;
+  variance_node vt;
+  const int block_width = num_8x8_blocks_wide_lookup[bsize];
+  const int block_height = num_8x8_blocks_high_lookup[bsize];
+  // TODO(debargha): Choose this more intelligently.
+  const int64_t threshold_multiplier = 25;
+  int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
+  assert(block_height == block_width);
+
+  tree_to_node(data, bsize, &vt);
+
+  // Split none is available only if we have more than half a block size
+  // in width and height inside the visible image.
+  if (mi_col + block_width / 2 < cm->mi_cols &&
+      mi_row + block_height / 2 < cm->mi_rows &&
+      vt.part_variances->none.variance < threshold) {
+    set_block_size(cpi, tile, mi_row, mi_col, bsize);
+    return 1;
+  }
+
+  // Vertical split is available on all but the bottom border.
+  if (mi_row + block_height / 2 < cm->mi_rows &&
+      vt.part_variances->vert[0].variance < threshold &&
+      vt.part_variances->vert[1].variance < threshold) {
+    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+    set_block_size(cpi, tile, mi_row, mi_col, subsize);
+    set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize);
+    return 1;
+  }
+
+  // Horizontal split is available on all but the right border.
+  if (mi_col + block_width / 2 < cm->mi_cols &&
+      vt.part_variances->horz[0].variance < threshold &&
+      vt.part_variances->horz[1].variance < threshold) {
+    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+    set_block_size(cpi, tile, mi_row, mi_col, subsize);
+    set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize);
+    return 1;
+  }
+  return 0;
+}
+
+// TODO(debargha): Fix this function and make it work as expected.
+static void choose_partitioning(VP9_COMP *cpi,
+                                const TileInfo *const tile,
+                                int mi_row, int mi_col) {
+  VP9_COMMON * const cm = &cpi->common;
+  MACROBLOCK *x = &cpi->mb;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+  int i, j, k;
+  v64x64 vt;
+  uint8_t *s;
+  const uint8_t *d;
+  int sp;
+  int dp;
+  int pixels_wide = 64, pixels_high = 64;
+  int_mv nearest_mv, near_mv;
+  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+  const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+
+  vp9_zero(vt);
+  set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+
+  if (xd->mb_to_right_edge < 0)
+    pixels_wide += (xd->mb_to_right_edge >> 3);
+  if (xd->mb_to_bottom_edge < 0)
+    pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+  s = x->plane[0].src.buf;
+  sp = x->plane[0].src.stride;
+
+  if (cm->frame_type != KEY_FRAME) {
+    vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf);
+
+    xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
+    xd->mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
+    vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
+                          xd->mi_8x8[0]->mbmi.ref_mvs[LAST_FRAME],
+                          &nearest_mv, &near_mv);
+
+    xd->mi_8x8[0]->mbmi.mv[0] = nearest_mv;
+    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
+
+    d = xd->plane[0].dst.buf;
+    dp = xd->plane[0].dst.stride;
+  } else {
+    d = VP9_VAR_OFFS;
+    dp = 0;
+  }
+
+  // Fill in the entire tree of 8x8 variances for splits.
+  for (i = 0; i < 4; i++) {
+    const int x32_idx = ((i & 1) << 5);
+    const int y32_idx = ((i >> 1) << 5);
+    for (j = 0; j < 4; j++) {
+      const int x16_idx = x32_idx + ((j & 1) << 4);
+      const int y16_idx = y32_idx + ((j >> 1) << 4);
+      v16x16 *vst = &vt.split[i].split[j];
+      for (k = 0; k < 4; k++) {
+        int x_idx = x16_idx + ((k & 1) << 3);
+        int y_idx = y16_idx + ((k >> 1) << 3);
+        unsigned int sse = 0;
+        int sum = 0;
+        if (x_idx < pixels_wide && y_idx < pixels_high)
+          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
+                              d + y_idx * dp + x_idx, dp, &sse, &sum);
+        fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
+      }
+    }
+  }
+  // Fill the rest of the variance tree by summing split partition values.
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
+    }
+    fill_variance_tree(&vt.split[i], BLOCK_32X32);
+  }
+  fill_variance_tree(&vt, BLOCK_64X64);
+
+  // Now go through the entire structure,  splitting every block size until
+  // we get to one that's got a variance lower than our threshold,  or we
+  // hit 8x8.
+  if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64,
+                           mi_row, mi_col, 8)) {
+    for (i = 0; i < 4; ++i) {
+      const int x32_idx = ((i & 1) << 2);
+      const int y32_idx = ((i >> 1) << 2);
+      if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32,
+                               (mi_row + y32_idx), (mi_col + x32_idx), 4)) {
+        for (j = 0; j < 4; ++j) {
+          const int x16_idx = ((j & 1) << 1);
+          const int y16_idx = ((j >> 1) << 1);
+          // NOTE: This is a temporary hack to disable 8x8 partitions,
+          // since it works really bad - possibly due to a bug
+#define DISABLE_8X8_VAR_BASED_PARTITION
+#ifdef DISABLE_8X8_VAR_BASED_PARTITION
+          if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
+              mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
+            set_block_size(cpi, tile,
+                           (mi_row + y32_idx + y16_idx),
+                           (mi_col + x32_idx + x16_idx),
+                           BLOCK_16X16);
+          } else {
+            for (k = 0; k < 4; ++k) {
+              const int x8_idx = (k & 1);
+              const int y8_idx = (k >> 1);
+              set_block_size(cpi, tile,
+                             (mi_row + y32_idx + y16_idx + y8_idx),
+                             (mi_col + x32_idx + x16_idx + x8_idx),
+                             BLOCK_8X8);
+            }
+          }
+#else
+          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
+                                   BLOCK_16X16,
+                                   (mi_row + y32_idx + y16_idx),
+                                   (mi_col + x32_idx + x16_idx), 2)) {
+            for (k = 0; k < 4; ++k) {
+              const int x8_idx = (k & 1);
+              const int y8_idx = (k >> 1);
+              set_block_size(cpi, tile,
+                             (mi_row + y32_idx + y16_idx + y8_idx),
+                             (mi_col + x32_idx + x16_idx + x8_idx),
+                             BLOCK_8X8);
+            }
+          }
+#endif
+        }
+      }
+    }
+  }
+}
+
 // Original activity measure from Tim T's code.
 static unsigned int tt_activity_measure(MACROBLOCK *x) {
   unsigned int sse;
@@ -571,117 +1011,6 @@
                      x->e_mbd.plane[i].subsampling_y);
 }
 
-static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col,
-                              BLOCK_SIZE bsize) {
-  VP9_COMMON *const cm = &cpi->common;
-  const int mb_rows = cm->mb_rows;
-  const int mb_cols = cm->mb_cols;
-  const int mb_row = mi_row >> 1;
-  const int mb_col = mi_col >> 1;
-  const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1;
-  const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1;
-  int r, c;
-  if (bsize <= BLOCK_16X16) {
-    return cpi->active_map[mb_row * mb_cols + mb_col];
-  }
-  for (r = 0; r < mb_height; ++r) {
-    for (c = 0; c < mb_width; ++c) {
-      int row = mb_row + r;
-      int col = mb_col + c;
-      if (row >= mb_rows || col >= mb_cols)
-        continue;
-      if (cpi->active_map[row * mb_cols + col])
-        return 1;
-    }
-  }
-  return 0;
-}
-
-static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
-                        int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi;
-  const int idx_str = xd->mode_info_stride * mi_row + mi_col;
-  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
-  const int mi_height = num_8x8_blocks_high_lookup[bsize];
-  const int mb_row = mi_row >> 1;
-  const int mb_col = mi_col >> 1;
-  const int idx_map = mb_row * cm->mb_cols + mb_col;
-  const struct segmentation *const seg = &cm->seg;
-
-  set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
-
-  // Activity map pointer
-  x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
-
-  if (cpi->active_map_enabled && !x->e_mbd.lossless) {
-    x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
-  } else {
-    x->in_active_map = 1;
-  }
-
-  xd->mi_8x8 = cm->mi_grid_visible + idx_str;
-  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-  xd->mi_8x8[0] = cm->mi + idx_str;
-
-  mbmi = &xd->mi_8x8[0]->mbmi;
-
-  // Set up destination pointers
-  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
-
-  // Set up limit values for MV components
-  // mv beyond the range do not produce new/different prediction block
-  x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
-  x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
-  x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
-  x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
-
-  // Set up distance of MB to edge of frame in 1/8th pel units
-  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
-  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
-                 cm->mi_rows, cm->mi_cols);
-
-  /* set up source buffers */
-  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
-
-  /* R/D setup */
-  x->rddiv = cpi->RDDIV;
-  x->rdmult = cpi->RDMULT;
-
-  /* segment ID */
-  if (seg->enabled) {
-    if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
-      const uint8_t *const map = seg->update_map ? cpi->segmentation_map
-                                                 : cm->last_frame_seg_map;
-      mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
-    }
-    vp9_init_plane_quantizers(cpi, x);
-
-    if (seg->enabled && cpi->seg0_cnt > 0 &&
-        !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) &&
-        vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
-      cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
-    } else {
-      const int y = mb_row & ~3;
-      const int x = mb_col & ~3;
-      const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
-      const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
-      const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1;
-      const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1;
-
-      cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
-          << 16) / cm->MBs;
-    }
-
-    x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
-  } else {
-    mbmi->segment_id = 0;
-    x->encode_breakout = cpi->encode_breakout;
-  }
-}
-
 static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                              int mi_row, int mi_col,
                              int *totalrate, int64_t *totaldist,
@@ -1032,9 +1361,9 @@
 // However, at the bottom and right borders of the image the requested size
 // may not be allowed in which case this code attempts to choose the largest
 // allowable partition.
-static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
-                             MODE_INFO **mi_8x8, int mi_row, int mi_col,
-                             BLOCK_SIZE bsize) {
+static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
+                                   MODE_INFO **mi_8x8, int mi_row, int mi_col,
+                                   BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   const int mis = cm->mode_info_stride;
   int row8x8_remaining = tile->mi_row_end - mi_row;
@@ -2041,21 +2370,21 @@
       cpi->mb.source_variance = UINT_MAX;
       if (cpi->sf.partition_search_type == FIXED_PARTITION) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
-        set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                         cpi->sf.always_this_block_size);
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                               cpi->sf.always_this_block_size);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
-      } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
-                 cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-        // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
-        // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
-        // map to the same thing.
+      } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
+      } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+        choose_partitioning(cpi, tile, mi_row, mi_col);
+        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1);
       } else {
         if ((cm->current_video_frame
             % cpi->sf.last_partitioning_redo_frequency) == 0
@@ -2330,14 +2659,40 @@
   mbmi->segment_id = 0;
 }
 
-static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
-                                TOKENEXTRA **tp, int mi_row, int mi_col,
-                                BLOCK_SIZE bsize, int *rate, int64_t *dist) {
+static INLINE int get_block_row(int b32i, int b16i, int b8i) {
+  return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
+}
+
+static INLINE int get_block_col(int b32i, int b16i, int b8i) {
+  return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
+}
+
+static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
+                                int mi_row, int mi_col,
+                                int *rate, int64_t *dist,
+                                BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  set_offsets(cpi, tile, mi_row, mi_col, bsize);
+  xd->mi_8x8[0]->mbmi.sb_type = bsize;
+  if (!frame_is_intra_only(cm)) {
+    vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
+                        rate, dist, bsize);
+  } else {
+    MB_PREDICTION_MODE intramode = DC_PRED;
+    set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode);
+  }
+  duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize);
+}
+
+static void nonrd_use_fixed_partition(VP9_COMP *cpi,
+                                      const TileInfo *const tile,
+                                      TOKENEXTRA **tp,
+                                      int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize,
+                                      int *rate, int64_t *dist) {
   int br, bc;
-  MB_PREDICTION_MODE mode = DC_PRED;
   int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
   int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
 
@@ -2352,29 +2707,148 @@
   // find prediction mode for each 8x8 block
   for (br = 0; br < rows; br += bh) {
     for (bc = 0; bc < cols; bc += bw) {
-      const int row = mi_row + br;
-      const int col = mi_col + bc;
-      const BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
-                                                &bh, &bw);
-      int i, j;
+      int row = mi_row + br;
+      int col = mi_col + bc;
 
-      set_offsets(cpi, tile, row, col, bs);
+      BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
+                                          &bh, &bw);
+      nonrd_pick_sb_modes(cpi, tile, row, col, &brate, &bdist, bs);
 
-      if (cm->frame_type != KEY_FRAME)
-        vp9_pick_inter_mode(cpi, x, tile, row, col, &brate, &bdist, bs);
-      else
-        set_mode_info(&xd->mi_8x8[0]->mbmi, bs, mode);
-
       *rate += brate;
       *dist += bdist;
+    }
+  }
+  encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
+}
 
-      for (j = 0; j < bh; ++j)
-        for (i = 0; i < bw; ++i)
-          xd->mi_8x8[j * cm->mode_info_stride + i] = xd->mi_8x8[0];
+static void nonrd_pick_fixed_partition(VP9_COMP *cpi,
+                                      const TileInfo *const tile,
+                                      int mi_row, int mi_col,
+                                      BLOCK_SIZE bsize) {
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  int br, bc;
+  int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
+  int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
+
+  int bw = num_8x8_blocks_wide_lookup[bsize];
+  int bh = num_8x8_blocks_high_lookup[bsize];
+
+  // Find prediction mode for each 8x8 block.
+  for (br = 0; br < rows; br += bh) {
+    for (bc = 0; bc < cols; bc += bw) {
+      int row = mi_row + br;
+      int col = mi_col + bc;
+
+      BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
+                                          &bh, &bw);
+      set_offsets(cpi, tile, row, col, bs);
+      xd->mi_8x8[0]->mbmi.sb_type = bs;
+      duplicate_modeinfo_in_sb(&cpi->common, xd, row, col, bs);
     }
   }
 }
 
+static void nonrd_use_partition(VP9_COMP *cpi,
+                                const TileInfo *const tile,
+                                MODE_INFO **mi_8x8,
+                                TOKENEXTRA **tp,
+                                int mi_row, int mi_col,
+                                BLOCK_SIZE bsize,
+                                int *totrate, int64_t *totdist) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+  const int mis = cm->mode_info_stride;
+  PARTITION_TYPE partition;
+  BLOCK_SIZE subsize;
+  int rate;
+  int64_t dist;
+
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+    return;
+
+  if (bsize >= BLOCK_8X8) {
+    subsize = mi_8x8[0]->mbmi.sb_type;
+  } else {
+    subsize = BLOCK_4X4;
+  }
+
+  partition = partition_lookup[bsl][subsize];
+
+  switch (partition) {
+    case PARTITION_NONE:
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      break;
+    case PARTITION_VERT:
+      *get_sb_index(x, subsize) = 0;
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      if (mi_col + hbs < cm->mi_cols) {
+        *get_sb_index(x, subsize) = 1;
+        nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
+                            &rate, &dist, subsize);
+        if (rate != INT_MAX && dist != INT64_MAX &&
+            *totrate != INT_MAX && *totdist != INT64_MAX) {
+          *totrate += rate;
+          *totdist += dist;
+        }
+      }
+      break;
+    case PARTITION_HORZ:
+      *get_sb_index(x, subsize) = 0;
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      if (mi_row + hbs < cm->mi_rows) {
+        *get_sb_index(x, subsize) = 1;
+        nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
+                            &rate, &dist, subsize);
+        if (rate != INT_MAX && dist != INT64_MAX &&
+            *totrate != INT_MAX && *totdist != INT64_MAX) {
+          *totrate += rate;
+          *totdist += dist;
+        }
+      }
+      break;
+    case PARTITION_SPLIT:
+      subsize = get_subsize(bsize, PARTITION_SPLIT);
+
+      *get_sb_index(x, subsize) = 0;
+      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                          subsize, totrate, totdist);
+      *get_sb_index(x, subsize) = 1;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
+                          mi_row, mi_col + hbs, subsize,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      *get_sb_index(x, subsize) = 2;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
+                          mi_row + hbs, mi_col, subsize,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      *get_sb_index(x, subsize) = 3;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
+                          mi_row + hbs, mi_col + hbs, subsize,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      break;
+    default:
+      assert("Invalid partition type.");
+  }
+
+  if (bsize == BLOCK_64X64)
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
+}
+
 static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 int mi_row, TOKENEXTRA **tp) {
   int mi_col;
@@ -2392,21 +2866,23 @@
     cpi->mb.source_variance = UINT_MAX;
 
     if (cpi->sf.partition_search_type == FIXED_PARTITION) {
-      nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
-                          cpi->sf.always_this_block_size,
-                          &dummy_rate, &dummy_dist);
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
-    } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
-               cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
-      // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
-      // map to the same thing.
+      nonrd_use_fixed_partition(cpi, tile, tp, mi_row, mi_col,
+                                cpi->sf.always_this_block_size,
+                                &dummy_rate, &dummy_dist);
+    } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
       BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi,
                                                              mi_row,
                                                              mi_col);
-      nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
-                          bsize, &dummy_rate, &dummy_dist);
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
+      nonrd_use_fixed_partition(cpi, tile, tp, mi_row, mi_col,
+                                bsize, &dummy_rate, &dummy_dist);
+    } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
+      const int idx_str = cpi->common.mode_info_stride * mi_row + mi_col;
+      MODE_INFO **mi_8x8 = cpi->common.mi_grid_visible + idx_str;
+      int dummy_rate;
+      int64_t dummy_dist;
+      choose_partitioning(cpi, tile, mi_row, mi_col);
+      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                          &dummy_rate, &dummy_dist);
     } else {
       assert(0);
     }
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -866,7 +866,7 @@
                             do_init_search, 0, vfp, use_mvcost,
                             center_mv, best_mv,
                             square_num_candidates, square_candidates);
-};
+}
 
 int vp9_fast_hex_search(const MACROBLOCK *x,
                         MV *ref_mv,
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -875,7 +875,7 @@
   if (speed >= 8) {
     int i;
     for (i = 0; i < BLOCK_SIZES; ++i)
-      sf->disable_inter_mode_mask[i] = 14;   // only search NEARESTMV (0)
+      sf->disable_inter_mode_mask[i] = 14;    // only search NEARESTMV (0)
   }
 }
 
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -250,7 +250,6 @@
   x->skip = 0;
   if (!x->in_active_map)
     x->skip = 1;
-
   // initialize mode decisions
   *returnrate = INT_MAX;
   *returndistortion = INT64_MAX;