shithub: libvpx

Download patch

ref: e8103f36765c3e01fdca7ef2a37ceea6ed8ffe5d
parent: 74f869b96215af8d61393f8f61f711207aa130a1
parent: 13a4f14710baf3d6f649978f61ba3c40812704aa
author: Scott LaVarnway <[email protected]>
date: Thu Jul 9 13:16:21 EDT 2015

Merge "Eliminate num_8x8 and num_4x4 width/height lookups"

--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -128,6 +128,11 @@
   ENTROPY_CONTEXT *left_context;
   int16_t seg_dequant[MAX_SEGMENTS][2];
 
+  // number of 4x4s in current block
+  uint16_t n4_w, n4_h;
+  // log2 of n4_w, n4_h
+  uint8_t n4_wl, n4_hl;
+
   // encoder
   const int16_t *dequant;
 };
@@ -144,6 +149,8 @@
 
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
+  uint8_t bmode_blocks_wl;
+  uint8_t bmode_blocks_hl;
 
   FRAME_COUNTS *counts;
   TileInfo tile;
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -361,6 +361,16 @@
   }
 }
 
+static INLINE void dec_txfrm_block_to_raster_xy(int bwl,
+                                                TX_SIZE tx_size, int block,
+                                                int *x, int *y) {
+  const int tx_cols_log2 = bwl - tx_size;
+  const int tx_cols = 1 << tx_cols_log2;
+  const int raster_mb = block >> (tx_size << 1);
+  *x = (raster_mb & (tx_cols - 1)) << tx_size;
+  *y = (raster_mb >> tx_cols_log2) << tx_size;
+}
+
 struct intra_args {
   MACROBLOCKD *xd;
   vp9_reader *r;
@@ -368,7 +378,6 @@
 };
 
 static void predict_and_reconstruct_intra_block(int plane, int block,
-                                                BLOCK_SIZE plane_bsize,
                                                 TX_SIZE tx_size, void *arg) {
   struct intra_args *const args = (struct intra_args *)arg;
   MACROBLOCKD *const xd = args->xd;
@@ -378,11 +387,12 @@
                                             : mi->mbmi.uv_mode;
   int x, y;
   uint8_t *dst;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
+  const int bwl = pd->n4_wl;
+  dec_txfrm_block_to_raster_xy(bwl, tx_size, block, &x, &y);
   dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x];
 
   vp9_predict_intra_block(xd, block >> (tx_size << 1),
-                          b_width_log2_lookup[plane_bsize], tx_size, mode,
+                          bwl, tx_size, mode,
                           dst, pd->dst.stride, dst, pd->dst.stride,
                           x, y, plane);
 
@@ -391,8 +401,7 @@
         DCT_DCT : intra_mode_to_tx_type_lookup[mode];
     const scan_order *sc = (plane || xd->lossless) ?
         &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type];
-    const int eob = vp9_decode_block_tokens(xd, plane, sc,
-                                            plane_bsize, x, y, tx_size,
+    const int eob = vp9_decode_block_tokens(xd, plane, sc, x, y, tx_size,
                                             args->r, args->seg_id);
     inverse_transform_block_intra(xd, plane, tx_type, tx_size,
                                   dst, pd->dst.stride, eob);
@@ -399,28 +408,18 @@
   }
 }
 
-struct inter_args {
-  MACROBLOCKD *xd;
-  vp9_reader *r;
-  int *eobtotal;
-  int seg_id;
-};
-
-static void reconstruct_inter_block(int plane, int row, int col,
-                                    BLOCK_SIZE plane_bsize,
-                                    TX_SIZE tx_size, struct inter_args *arg) {
-  struct inter_args *args = (struct inter_args *)arg;
-  MACROBLOCKD *const xd = args->xd;
+static int reconstruct_inter_block(MACROBLOCKD *const xd, vp9_reader *r,
+                                   MB_MODE_INFO *const mbmi, int plane,
+                                   int row, int col, TX_SIZE tx_size) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  int eob;
   const scan_order *sc = &vp9_default_scan_orders[tx_size];
-  eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize,
-                                col, row, tx_size, args->r, args->seg_id);
+  const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r,
+                                          mbmi->segment_id);
 
   inverse_transform_block_inter(xd, plane, tx_size,
                             &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
                             pd->dst.stride, eob);
-  *args->eobtotal += eob;
+  return eob;
 }
 
 static void build_mc_border(const uint8_t *src, int src_stride,
@@ -721,8 +720,7 @@
 
 static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
                                           MACROBLOCKD *xd,
-                                          int mi_row, int mi_col,
-                                          BLOCK_SIZE bsize) {
+                                          int mi_row, int mi_col) {
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
@@ -732,15 +730,13 @@
   const int is_compound = has_second_ref(&mi->mbmi);
 
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
-                                                        &xd->plane[plane]);
     struct macroblockd_plane *const pd = &xd->plane[plane];
     struct buf_2d *const dst_buf = &pd->dst;
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+    const int num_4x4_w = pd->n4_w;
+    const int num_4x4_h = pd->n4_h;
 
-    const int bw = 4 * num_4x4_w;
-    const int bh = 4 * num_4x4_h;
+    const int n4w_x4 = 4 * num_4x4_w;
+    const int n4h_x4 = 4 * num_4x4_h;
     int ref;
 
     for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -753,11 +749,10 @@
 
       if (sb_type < BLOCK_8X8) {
         int i = 0, x, y;
-        assert(bsize == BLOCK_8X8);
         for (y = 0; y < num_4x4_h; ++y) {
           for (x = 0; x < num_4x4_w; ++x) {
             const MV mv = average_split_mvs(pd, mi, ref, i++);
-            dec_build_inter_predictors(pbi, xd, plane, bw, bh,
+            dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
                                        4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
                                        sf, pre_buf, dst_buf, &mv,
                                        ref_frame_buf, is_scaled, ref);
@@ -765,8 +760,8 @@
         }
       } else {
         const MV mv = mi->mbmi.mv[ref].as_mv;
-        dec_build_inter_predictors(pbi, xd, plane, bw, bh,
-                                   0, 0, bw, bh, mi_x, mi_y, kernel,
+        dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
+                                   0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel,
                                    sf, pre_buf, dst_buf, &mv, ref_frame_buf,
                                    is_scaled, ref);
       }
@@ -774,12 +769,89 @@
   }
 }
 
+static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
+                                         int n4_wl, int n4_hl) {
+  // get minimum log2 num4x4s dimension
+  const int x = MIN(n4_wl, n4_hl);
+  return MIN(mbmi->tx_size,  x);
+}
+
+// TODO(slavarnway): Eliminate the foreach_ functions in future commits.
+// NOTE: Jingning removed the foreach_ for recon inter in a previous commit.
+
+typedef void (*dec_foreach_transformed_block_visitor)(int plane, int block,
+                                                      TX_SIZE tx_size,
+                                                      void *arg);
+
+static void dec_foreach_transformed_block_in_plane(
+    const MACROBLOCKD *const xd,
+    int plane,
+    dec_foreach_transformed_block_visitor visit, void *arg) {
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
+  // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
+  // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
+  // transform size varies per plane, look it up in a common way.
+  const TX_SIZE tx_size =
+      plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+              : mbmi->tx_size;
+  const int num_4x4_w = pd->n4_w;
+  const int num_4x4_h = pd->n4_h;
+  const int step = 1 << (tx_size << 1);
+  int i = 0, r, c;
+
+  // If mb_to_right_edge is < 0 we are in a situation in which
+  // the current block size extends into the UMV and we won't
+  // visit the sub blocks that are wholly within the UMV.
+  const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
+      xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+  const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
+      xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+  // Keep track of the row and column of the blocks we use so that we know
+  // if we are in the unrestricted motion border.
+  for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
+    for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
+      // Skip visiting the sub blocks that are wholly within the UMV.
+      if (c < max_blocks_wide)
+        visit(plane, i, tx_size, arg);
+      i += step;
+    }
+  }
+}
+
+static void dec_foreach_transformed_block(const MACROBLOCKD* const xd,
+    dec_foreach_transformed_block_visitor visit, void *arg) {
+  int plane;
+
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+    dec_foreach_transformed_block_in_plane(xd, plane, visit, arg);
+}
+
+static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w);
+    memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h);
+  }
+}
+
+static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl,
+                         int bhl) {
+  int i;
+  for (i = 0; i < MAX_MB_PLANE; i++) {
+    xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x;
+    xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y;
+    xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x;
+    xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y;
+  }
+}
+
 static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int x_mis = MIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                 int bw, int bh, int x_mis, int y_mis,
+                                 int bwl, int bhl) {
   const int offset = mi_row * cm->mi_stride + mi_col;
   int x, y;
   const TileInfo *const tile = &xd->tile;
@@ -786,6 +858,8 @@
 
   xd->mi = cm->mi_grid_visible + offset;
   xd->mi[0] = &cm->mi[offset];
+  // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of
+  // passing bsize from decode_partition().
   xd->mi[0]->mbmi.sb_type = bsize;
   for (y = 0; y < y_mis; ++y)
     for (x = !y; x < x_mis; ++x) {
@@ -792,6 +866,8 @@
       xd->mi[y * cm->mi_stride + x] = xd->mi[0];
     }
 
+  set_plane_n4(xd, bw, bh, bwl, bhl);
+
   set_skip_context(xd, mi_row, mi_col);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
@@ -804,11 +880,18 @@
 
 static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
                          int mi_row, int mi_col,
-                         vp9_reader *r, BLOCK_SIZE bsize) {
+                         vp9_reader *r, BLOCK_SIZE bsize,
+                         int bwl, int bhl) {
   VP9_COMMON *const cm = &pbi->common;
   const int less8x8 = bsize < BLOCK_8X8;
-  MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col);
+  const int bw = 1 << (bwl - 1);
+  const int bh = 1 << (bhl - 1);
+  const int x_mis = MIN(bw, cm->mi_cols - mi_col);
+  const int y_mis = MIN(bh, cm->mi_rows - mi_row);
 
+  MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
+                                   bw, bh, x_mis, y_mis, bwl, bhl);
+
   if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
     const BLOCK_SIZE uv_subsize =
         ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
@@ -817,46 +900,43 @@
                          VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
   }
 
-  vp9_read_mode_info(pbi, xd, mi_row, mi_col, r);
+  vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
 
-  if (less8x8)
-    bsize = BLOCK_8X8;
-
   if (mbmi->skip) {
-    reset_skip_context(xd, bsize);
+    dec_reset_skip_context(xd);
   }
 
   if (!is_inter_block(mbmi)) {
     struct intra_args arg = {xd, r, mbmi->segment_id};
-    vp9_foreach_transformed_block(xd, bsize,
+    dec_foreach_transformed_block(xd,
                                   predict_and_reconstruct_intra_block, &arg);
   } else {
     // Prediction
-    dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
+    dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
 
     // Reconstruction
     if (!mbmi->skip) {
       int eobtotal = 0;
-      struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id};
       int plane;
 
       for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
         const struct macroblockd_plane *const pd = &xd->plane[plane];
-        const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
-                                      : mbmi->tx_size;
-        const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-        const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
-        const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+        const TX_SIZE tx_size =
+            plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+                    : mbmi->tx_size;
+        const int num_4x4_w = pd->n4_w;
+        const int num_4x4_h = pd->n4_h;
         const int step = (1 << tx_size);
-        int r, c;
+        int row, col;
         const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
             0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
         const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
             0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
 
-        for (r = 0; r < max_blocks_high; r += step)
-          for (c = 0; c < max_blocks_wide; c += step)
-            reconstruct_inter_block(plane, r, c, plane_bsize, tx_size, &arg);
+        for (row = 0; row < max_blocks_high; row += step)
+          for (col = 0; col < max_blocks_wide; col += step)
+            eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col,
+                                                tx_size);
       }
 
       if (!less8x8 && eobtotal == 0)
@@ -867,10 +947,36 @@
   xd->corrupted |= vp9_reader_has_error(r);
 }
 
+static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd,
+                                              int mi_row, int mi_col,
+                                              int bsl) {
+  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
+  int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1;
+
+//  assert(bsl >= 0);
+
+  return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
+static INLINE void dec_update_partition_context(MACROBLOCKD *xd,
+                                                int mi_row, int mi_col,
+                                                BLOCK_SIZE subsize,
+                                                int bw) {
+  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+  PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
+
+  // update the partition context at the end notes. set partition bits
+  // of block sizes larger than the current one to be one, and partition
+  // bits of smaller block sizes to be zero.
+  memset(above_ctx, partition_context_lookup[subsize].above, bw);
+  memset(left_ctx, partition_context_lookup[subsize].left, bw);
+}
+
 static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
-                                     BLOCK_SIZE bsize, vp9_reader *r,
-                                     int has_rows, int has_cols) {
-  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+                                     vp9_reader *r,
+                                     int has_rows, int has_cols, int bsl) {
+  const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl);
   const vp9_prob *const probs = get_partition_probs(xd, ctx);
   FRAME_COUNTS *counts = xd->counts;
   PARTITION_TYPE p;
@@ -890,11 +996,14 @@
   return p;
 }
 
+// TODO(slavarnway): eliminate bsize and subsize in future commits
 static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
                              int mi_row, int mi_col,
-                             vp9_reader* r, BLOCK_SIZE bsize) {
+                             vp9_reader* r, BLOCK_SIZE bsize, int n4x4_l2) {
   VP9_COMMON *const cm = &pbi->common;
-  const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+  const int n8x8_l2 = n4x4_l2 - 1;
+  const int num_8x8_wh = 1 << n8x8_l2;
+  const int hbs = num_8x8_wh >> 1;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
@@ -903,30 +1012,37 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  partition = read_partition(xd, mi_row, mi_col, bsize, r, has_rows, has_cols);
-  subsize = get_subsize(bsize, partition);
-  if (bsize == BLOCK_8X8) {
-    decode_block(pbi, xd, mi_row, mi_col, r, subsize);
+  partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols,
+                             n8x8_l2);
+  subsize = subsize_lookup[partition][bsize];  // get_subsize(bsize, partition);
+  if (!hbs) {
+    // calculate bmode block dimensions (log 2)
+    xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
+    xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
+    decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1);
   } else {
     switch (partition) {
       case PARTITION_NONE:
-        decode_block(pbi, xd, mi_row, mi_col, r, subsize);
+        decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2);
         break;
       case PARTITION_HORZ:
-        decode_block(pbi, xd, mi_row, mi_col, r, subsize);
+        decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2);
         if (has_rows)
-          decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize);
+          decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2,
+                       n8x8_l2);
         break;
       case PARTITION_VERT:
-        decode_block(pbi, xd, mi_row, mi_col, r, subsize);
+        decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2);
         if (has_cols)
-          decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize);
+          decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2,
+                       n4x4_l2);
         break;
       case PARTITION_SPLIT:
-        decode_partition(pbi, xd, mi_row, mi_col, r, subsize);
-        decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize);
-        decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize);
-        decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize);
+        decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2);
+        decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2);
+        decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2);
+        decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize,
+                         n8x8_l2);
         break;
       default:
         assert(0 && "Invalid partition type");
@@ -936,7 +1052,7 @@
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+    dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
 }
 
 static void setup_token_decoder(const uint8_t *data,
@@ -1440,8 +1556,8 @@
         vp9_zero(tile_data->xd.left_seg_context);
         for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
              mi_col += MI_BLOCK_SIZE) {
-          decode_partition(pbi, &tile_data->xd, mi_row, mi_col,
-                           &tile_data->bit_reader, BLOCK_64X64);
+          decode_partition(pbi, &tile_data->xd, mi_row,
+                           mi_col, &tile_data->bit_reader, BLOCK_64X64, 4);
         }
         pbi->mb.corrupted |= tile_data->xd.corrupted;
         if (pbi->mb.corrupted)
@@ -1515,7 +1631,7 @@
          mi_col += MI_BLOCK_SIZE) {
       decode_partition(tile_data->pbi, &tile_data->xd,
                        mi_row, mi_col, &tile_data->bit_reader,
-                       BLOCK_64X64);
+                       BLOCK_64X64, 4);
     }
   }
   return !tile_data->xd.corrupted;
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -91,19 +91,27 @@
     return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
 }
 
-static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,
-                           int mi_row, int mi_col, int segment_id) {
-  const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int xmis = MIN(cm->mi_cols - mi_col, bw);
-  const int ymis = MIN(cm->mi_rows - mi_row, bh);
+static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
+                              int mi_offset, int x_mis, int y_mis) {
+  int x, y, segment_id = INT_MAX;
+
+  for (y = 0; y < y_mis; y++)
+    for (x = 0; x < x_mis; x++)
+      segment_id = MIN(segment_id,
+                       segment_ids[mi_offset + y * cm->mi_cols + x]);
+
+  assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+  return segment_id;
+}
+
+static void set_segment_id(VP9_COMMON *cm, int mi_offset,
+                           int x_mis, int y_mis, int segment_id) {
   int x, y;
 
   assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
 
-  for (y = 0; y < ymis; y++)
-    for (x = 0; x < xmis; x++)
+  for (y = 0; y < y_mis; y++)
+    for (x = 0; x < x_mis; x++)
       cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
 }
 
@@ -110,22 +118,17 @@
 static void copy_segment_id(const VP9_COMMON *cm,
                            const uint8_t *last_segment_ids,
                            uint8_t *current_segment_ids,
-                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
-  const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = num_8x8_blocks_wide_lookup[bsize];
-  const int bh = num_8x8_blocks_high_lookup[bsize];
-  const int xmis = MIN(cm->mi_cols - mi_col, bw);
-  const int ymis = MIN(cm->mi_rows - mi_row, bh);
+                           int mi_offset, int x_mis, int y_mis) {
   int x, y;
 
-  for (y = 0; y < ymis; y++)
-    for (x = 0; x < xmis; x++)
+  for (y = 0; y < y_mis; y++)
+    for (x = 0; x < x_mis; x++)
       current_segment_ids[mi_offset + y * cm->mi_cols + x] =  last_segment_ids ?
           last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0;
 }
 
-static int read_intra_segment_id(VP9_COMMON *const cm, BLOCK_SIZE bsize,
-                                 int mi_row, int mi_col,
+static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset,
+                                 int x_mis, int y_mis,
                                  vp9_reader *r) {
   struct segmentation *const seg = &cm->seg;
   int segment_id;
@@ -135,12 +138,12 @@
 
   if (!seg->update_map) {
     copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map,
-                    bsize, mi_row, mi_col);
+                    mi_offset, x_mis, y_mis);
     return 0;
   }
 
   segment_id = read_segment_id(r, seg);
-  set_segment_id(cm, bsize, mi_row, mi_col, segment_id);
+  set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
   return segment_id;
 }
 
@@ -148,18 +151,25 @@
                                  int mi_row, int mi_col, vp9_reader *r) {
   struct segmentation *const seg = &cm->seg;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
   int predicted_segment_id, segment_id;
+  const int mi_offset = mi_row * cm->mi_cols + mi_col;
+  const int bw = xd->plane[0].n4_w >> 1;
+  const int bh = xd->plane[0].n4_h >> 1;
 
+  // TODO(slavarnway): move x_mis, y_mis into xd ?????
+  const int x_mis = MIN(cm->mi_cols - mi_col, bw);
+  const int y_mis = MIN(cm->mi_rows - mi_row, bh);
+
   if (!seg->enabled)
     return 0;  // Default for disabled segmentation
 
   predicted_segment_id = cm->last_frame_seg_map ?
-      get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col) : 0;
+      dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) :
+      0;
 
   if (!seg->update_map) {
     copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map,
-                    bsize, mi_row, mi_col);
+                    mi_offset, x_mis, y_mis);
     return predicted_segment_id;
   }
 
@@ -171,7 +181,7 @@
   } else {
     segment_id = read_segment_id(r, seg);
   }
-  set_segment_id(cm, bsize, mi_row, mi_col, segment_id);
+  set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
   return segment_id;
 }
 
@@ -198,8 +208,15 @@
   const MODE_INFO *left_mi  = xd->left_mi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   int i;
+  const int mi_offset = mi_row * cm->mi_cols + mi_col;
+  const int bw = xd->plane[0].n4_w >> 1;
+  const int bh = xd->plane[0].n4_h >> 1;
 
-  mbmi->segment_id = read_intra_segment_id(cm, bsize, mi_row, mi_col, r);
+  // TODO(slavarnway): move x_mis, y_mis into xd ?????
+  const int x_mis = MIN(cm->mi_cols - mi_col, bw);
+  const int y_mis = MIN(cm->mi_rows - mi_row, bh);
+
+  mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
   mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
   mbmi->tx_size = read_tx_size(cm, xd, 1, r);
   mbmi->ref_frame[0] = INTRA_FRAME;
@@ -519,8 +536,8 @@
                       : cm->interp_filter;
 
   if (bsize < BLOCK_8X8) {
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
-    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
+    const int num_4x4_w = 1 << xd->bmode_blocks_wl;
+    const int num_4x4_h = 1 << xd->bmode_blocks_hl;
     int idx, idy;
     PREDICTION_MODE b_mode;
     int_mv nearest_sub8x8[2], near_sub8x8[2];
@@ -589,13 +606,10 @@
 }
 
 void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                        int mi_row, int mi_col, vp9_reader *r) {
+                        int mi_row, int mi_col, vp9_reader *r,
+                        int x_mis, int y_mis) {
   VP9_COMMON *const cm = &pbi->common;
   MODE_INFO *const mi = xd->mi[0];
-  const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
-  const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
-  const int x_mis = MIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = MIN(bh, cm->mi_rows - mi_row);
   MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
   int w, h;
 
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -19,7 +19,8 @@
 #endif
 
 void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
-                        int mi_row, int mi_col, vp9_reader *r);
+                        int mi_row, int mi_col, vp9_reader *r,
+                        int x_mis, int y_mis);
 
 #ifdef __cplusplus
 }  // extern "C"
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -205,9 +205,54 @@
   return c;
 }
 
+// TODO(slavarnway): Decode version of vp9_set_context.  Modify vp9_set_context
+// after testing is complete, then delete this version.
+static
+void dec_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+                      TX_SIZE tx_size, int has_eob,
+                      int aoff, int loff) {
+  ENTROPY_CONTEXT *const a = pd->above_context + aoff;
+  ENTROPY_CONTEXT *const l = pd->left_context + loff;
+  const int tx_size_in_blocks = 1 << tx_size;
+
+  // above
+  if (has_eob && xd->mb_to_right_edge < 0) {
+    int i;
+    const int blocks_wide = pd->n4_w +
+                            (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+    int above_contexts = tx_size_in_blocks;
+    if (above_contexts + aoff > blocks_wide)
+      above_contexts = blocks_wide - aoff;
+
+    for (i = 0; i < above_contexts; ++i)
+      a[i] = has_eob;
+    for (i = above_contexts; i < tx_size_in_blocks; ++i)
+      a[i] = 0;
+  } else {
+    memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+  }
+
+  // left
+  if (has_eob && xd->mb_to_bottom_edge < 0) {
+    int i;
+    const int blocks_high = pd->n4_h +
+                            (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+    int left_contexts = tx_size_in_blocks;
+    if (left_contexts + loff > blocks_high)
+      left_contexts = blocks_high - loff;
+
+    for (i = 0; i < left_contexts; ++i)
+      l[i] = has_eob;
+    for (i = left_contexts; i < tx_size_in_blocks; ++i)
+      l[i] = 0;
+  } else {
+    memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+  }
+}
+
 int vp9_decode_block_tokens(MACROBLOCKD *xd,
                             int plane, const scan_order *sc,
-                            BLOCK_SIZE plane_bsize, int x, int y,
+                            int x, int y,
                             TX_SIZE tx_size, vp9_reader *r,
                             int seg_id) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -217,7 +262,7 @@
   const int eob = decode_coefs(xd, pd->plane_type,
                                pd->dqcoeff, tx_size,
                                dequant, ctx, sc->scan, sc->neighbors, r);
-  vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
+  dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
   return eob;
 }
 
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -22,7 +22,7 @@
 
 int vp9_decode_block_tokens(MACROBLOCKD *xd,
                             int plane, const scan_order *sc,
-                            BLOCK_SIZE plane_bsize, int x, int y,
+                            int x, int y,
                             TX_SIZE tx_size, vp9_reader *r,
                             int seg_id);