shithub: libvpx

Download patch

ref: 92c4d8149ad0034a6b41a6573d3afec8dd7c33ac
parent: 1a645957804a8d090981d13e99897501409bda8f
parent: c7dc1d78bf26a2e883db485c5bba990e19db5f25
author: Ronald S. Bultje <[email protected]>
date: Tue Oct 20 11:57:05 EDT 2015

Merge "vp10: add extended-intra prediction edges experiment."

--- a/configure
+++ b/configure
@@ -266,6 +266,7 @@
     emulate_hardware
     misc_fixes
     universal_hp
+    ext_ipred_bltr
 "
 CONFIG_LIST="
     dependency_tracking
--- a/vp10/common/common_data.h
+++ b/vp10/common/common_data.h
@@ -31,6 +31,8 @@
 // Log 2 conversion lookup tables for modeinfo width and height
 static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] =
   {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
+static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] =
+  {0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
 static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] =
   {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8};
 static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -21,13 +21,35 @@
 #include "vp10/common/reconintra.h"
 #include "vp10/common/onyxc_int.h"
 
+#if CONFIG_EXT_IPRED_BLTR
 enum {
   NEED_LEFT = 1 << 1,
   NEED_ABOVE = 1 << 2,
   NEED_ABOVERIGHT = 1 << 3,
+  NEED_ABOVELEFT = 1 << 4,
+  NEED_BOTTOMLEFT = 1 << 5,
 };
 
 static const uint8_t extend_modes[INTRA_MODES] = {
+  NEED_ABOVE | NEED_LEFT,                   // DC
+  NEED_ABOVE,                               // V
+  NEED_LEFT,                                // H
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D45
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D117
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D153
+  NEED_LEFT | NEED_BOTTOMLEFT,              // D207
+  NEED_ABOVE | NEED_ABOVERIGHT,             // D63
+  NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // TM
+};
+#else
+enum {
+  NEED_LEFT = 1 << 1,
+  NEED_ABOVE = 1 << 2,
+  NEED_ABOVERIGHT = 1 << 3,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
   NEED_ABOVE | NEED_LEFT,       // DC
   NEED_ABOVE,                   // V
   NEED_LEFT,                    // H
@@ -39,7 +61,135 @@
   NEED_ABOVERIGHT,              // D63
   NEED_LEFT | NEED_ABOVE,       // TM
 };
+#endif
 
+#if CONFIG_EXT_IPRED_BLTR
+static const uint8_t orders_64x64[1] = { 0 };
+static const uint8_t orders_64x32[2] = { 0, 1 };
+static const uint8_t orders_32x64[2] = { 0, 1 };
+static const uint8_t orders_32x32[4] = {
+  0, 1,
+  2, 3,
+};
+static const uint8_t orders_32x16[8] = {
+  0, 2,
+  1, 3,
+  4, 6,
+  5, 7,
+};
+static const uint8_t orders_16x32[8] = {
+  0, 1, 2, 3,
+  4, 5, 6, 7,
+};
+static const uint8_t orders_16x16[16] = {
+  0,   1,  4,  5,
+  2,   3,  6,  7,
+  8,   9, 12, 13,
+  10, 11, 14, 15,
+};
+static const uint8_t orders_16x8[32] = {
+  0,   2,  8, 10,
+  1,   3,  9, 11,
+  4,   6, 12, 14,
+  5,   7, 13, 15,
+  16, 18, 24, 26,
+  17, 19, 25, 27,
+  20, 22, 28, 30,
+  21, 23, 29, 31,
+};
+static const uint8_t orders_8x16[32] = {
+  0,   1,  2,  3,  8,  9, 10, 11,
+  4,   5,  6,  7, 12, 13, 14, 15,
+  16, 17, 18, 19, 24, 25, 26, 27,
+  20, 21, 22, 23, 28, 29, 30, 31,
+};
+static const uint8_t orders_8x8[64] = {
+  0,   1,  4,  5, 16, 17, 20, 21,
+  2,   3,  6,  7, 18, 19, 22, 23,
+  8,   9, 12, 13, 24, 25, 28, 29,
+  10, 11, 14, 15, 26, 27, 30, 31,
+  32, 33, 36, 37, 48, 49, 52, 53,
+  34, 35, 38, 39, 50, 51, 54, 55,
+  40, 41, 44, 45, 56, 57, 60, 61,
+  42, 43, 46, 47, 58, 59, 62, 63,
+};
+static const uint8_t *const orders[BLOCK_SIZES] = {
+  orders_8x8, orders_8x8, orders_8x8, orders_8x8,
+  orders_8x16, orders_16x8, orders_16x16,
+  orders_16x32, orders_32x16, orders_32x32,
+  orders_32x64, orders_64x32, orders_64x64,
+};
+
+static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                          int right_available,
+                          TX_SIZE txsz, int y, int x, int ss_x) {
+  if (y == 0) {
+    int wl = mi_width_log2_lookup[bsize];
+    int hl = mi_height_log2_lookup[bsize];
+    int w = 1 << (wl + 1 - ss_x);
+    int step = 1 << txsz;
+    const uint8_t *order = orders[bsize];
+    int my_order, tr_order;
+
+    if (x + step < w)
+      return 1;
+
+    mi_row = (mi_row & 7) >> hl;
+    mi_col = (mi_col & 7) >> wl;
+
+    if (mi_row == 0)
+      return right_available;
+
+    if (((mi_col + 1) << wl) >= 8)
+      return 0;
+
+    my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
+    tr_order = order[((mi_row - 1) << (3 - wl)) + mi_col + 1];
+
+    return my_order > tr_order && right_available;
+  } else {
+    int wl = mi_width_log2_lookup[bsize];
+    int w = 1 << (wl + 1 - ss_x);
+    int step = 1 << txsz;
+
+    return x + step < w;
+  }
+}
+
+static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
+                           int bottom_available, TX_SIZE txsz,
+                           int y, int x, int ss_y) {
+  if (x == 0) {
+    int wl = mi_width_log2_lookup[bsize];
+    int hl = mi_height_log2_lookup[bsize];
+    int h = 1 << (hl + 1 - ss_y);
+    int step = 1 << txsz;
+    const uint8_t *order = orders[bsize];
+    int my_order, bl_order;
+
+    mi_row = (mi_row & 7) >> hl;
+    mi_col = (mi_col & 7) >> wl;
+
+    if (mi_col == 0)
+      return bottom_available &&
+             (mi_row << (hl + !ss_y)) + y + step < (8 << !ss_y);
+
+    if (((mi_row + 1) << hl) >= 8)
+      return 0;
+
+    if (y + step < h)
+      return 1;
+
+    my_order = order[((mi_row + 0) << (3 - wl)) + mi_col + 0];
+    bl_order = order[((mi_row + 1) << (3 - wl)) + mi_col - 1];
+
+    return bl_order < my_order && bottom_available;
+  } else {
+    return 0;
+  }
+}
+#endif
+
 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
                               const uint8_t *above, const uint8_t *left);
 
@@ -66,6 +216,11 @@
 
   INIT_ALL_SIZES(pred[V_PRED], v);
   INIT_ALL_SIZES(pred[H_PRED], h);
+#if CONFIG_EXT_IPRED_BLTR
+  INIT_ALL_SIZES(pred[D207_PRED], d207e);
+  INIT_ALL_SIZES(pred[D45_PRED], d45e);
+  INIT_ALL_SIZES(pred[D63_PRED], d63e);
+#else
   INIT_ALL_SIZES(pred[D207_PRED], d207);
 #if CONFIG_MISC_FIXES
   pred[D45_PRED][TX_4X4] = vpx_d45e_predictor_4x4;
@@ -74,6 +229,7 @@
   INIT_ALL_SIZES(pred[D45_PRED], d45);
 #endif
   INIT_ALL_SIZES(pred[D63_PRED], d63);
+#endif
   INIT_ALL_SIZES(pred[D117_PRED], d117);
   INIT_ALL_SIZES(pred[D135_PRED], d135);
   INIT_ALL_SIZES(pred[D153_PRED], d153);
@@ -87,6 +243,11 @@
 #if CONFIG_VP9_HIGHBITDEPTH
   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
+#if CONFIG_EXT_IPRED_BLTR
+  INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
+  INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
+  INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
+#else
   INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207);
 #if CONFIG_MISC_FIXES
   pred_high[D45_PRED][TX_4X4] = vpx_highbd_d45e_predictor_4x4;
@@ -95,6 +256,7 @@
   INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45);
 #endif
   INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63);
+#endif
   INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
   INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
   INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
@@ -109,6 +271,13 @@
 #undef intra_pred_allsizes
 }
 
+#if CONFIG_EXT_IPRED_BLTR
+static inline void memset16(uint16_t *dst, int val, int n) {
+  while (n--)
+    *dst++ = val;
+}
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
 static void build_intra_predictors_high(const MACROBLOCKD *xd,
                                         const uint8_t *ref8,
@@ -117,22 +286,35 @@
                                         int dst_stride,
                                         PREDICTION_MODE mode,
                                         TX_SIZE tx_size,
+#if CONFIG_EXT_IPRED_BLTR
+                                        int n_top_px, int n_topright_px,
+                                        int n_left_px, int n_bottomleft_px,
+#else
                                         int up_available,
                                         int left_available,
                                         int right_available,
+#endif
                                         int x, int y,
                                         int plane, int bd) {
   int i;
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+#if CONFIG_EXT_IPRED_BLTR
   DECLARE_ALIGNED(16, uint16_t, left_col[32]);
+#else
+  DECLARE_ALIGNED(16, uint16_t, left_col[64]);
+#endif
   DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
   uint16_t *above_row = above_data + 16;
   const uint16_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
+#if CONFIG_EXT_IPRED_BLTR
+  const uint16_t *above_ref = ref - ref_stride;
+#else
   int frame_width, frame_height;
   int x0, y0;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
   int base = 128 << (bd - 8);
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
@@ -140,6 +322,53 @@
   // 129  E   F  ..  U   V
   // 129  G   H  ..  S   T   T   T   T   T
 
+#if CONFIG_EXT_IPRED_BLTR
+  (void) x;
+  (void) y;
+  (void) plane;
+
+  // NEED_LEFT
+  if (extend_modes[mode] & NEED_LEFT) {
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++)
+        left_col[i] = ref[i * ref_stride - 1];
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+      if (i < (bs << need_bottom))
+        memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+    } else {
+      memset16(left_col, base + 1, bs << need_bottom);
+    }
+  }
+
+  // NEED_ABOVE
+  if (extend_modes[mode] & NEED_ABOVE) {
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px * 2);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px * 2);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      memset16(above_row, base - 1, bs << need_right);
+    }
+  }
+
+  if (extend_modes[mode] & NEED_ABOVELEFT) {
+    above_row[-1] = n_top_px > 0 ?
+        (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+  }
+#else
   // Get current frame pointer, width and height.
   if (plane == 0) {
     frame_width = xd->cur_buf->y_width;
@@ -227,12 +456,19 @@
     // TODO(Peter): this value should probably change for high bitdepth
     above_row[-1] = base - 1;
   }
+#endif
 
   // predict
   if (mode == DC_PRED) {
+#if CONFIG_EXT_IPRED_BLTR
+    dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+                                                       const_above_row,
+                                                       left_col, xd->bd);
+#else
     dc_pred_high[left_available][up_available][tx_size](dst, dst_stride,
                                                         const_above_row,
                                                         left_col, xd->bd);
+#endif
   } else {
     pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
                              xd->bd);
@@ -243,18 +479,28 @@
 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
                                    int ref_stride, uint8_t *dst, int dst_stride,
                                    PREDICTION_MODE mode, TX_SIZE tx_size,
+#if CONFIG_EXT_IPRED_BLTR
+                                   int n_top_px, int n_topright_px,
+                                   int n_left_px, int n_bottomleft_px,
+#else
                                    int up_available, int left_available,
-                                   int right_available, int x, int y,
-                                   int plane) {
+                                   int right_available,
+#endif
+                                   int x, int y, int plane) {
   int i;
+#if CONFIG_EXT_IPRED_BLTR
+  DECLARE_ALIGNED(16, uint8_t, left_col[64]);
+  const uint8_t *above_ref = ref - ref_stride;
+#else
   DECLARE_ALIGNED(16, uint8_t, left_col[32]);
+  int frame_width, frame_height;
+  int x0, y0;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+#endif
   DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
   uint8_t *above_row = above_data + 16;
   const uint8_t *const_above_row = above_row;
   const int bs = 4 << tx_size;
-  int frame_width, frame_height;
-  int x0, y0;
-  const struct macroblockd_plane *const pd = &xd->plane[plane];
 
   // 127 127 127 .. 127 127 127 127 127 127
   // 129  A   B  ..  Y   Z
@@ -263,6 +509,16 @@
   // 129  G   H  ..  S   T   T   T   T   T
   // ..
 
+#if CONFIG_EXT_IPRED_BLTR
+  (void) xd;
+  (void) x;
+  (void) y;
+  (void) plane;
+  assert(n_top_px >= 0);
+  assert(n_topright_px >= 0);
+  assert(n_left_px >= 0);
+  assert(n_bottomleft_px >= 0);
+#else
   // Get current frame pointer, width and height.
   if (plane == 0) {
     frame_width = xd->cur_buf->y_width;
@@ -275,9 +531,27 @@
   // Get block position in current frame.
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+#endif
 
   // NEED_LEFT
   if (extend_modes[mode] & NEED_LEFT) {
+#if CONFIG_EXT_IPRED_BLTR
+    const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+    i = 0;
+    if (n_left_px > 0) {
+      for (; i < n_left_px; i++)
+        left_col[i] = ref[i * ref_stride - 1];
+      if (need_bottom && n_bottomleft_px > 0) {
+        assert(i == bs);
+        for (; i < bs + n_bottomleft_px; i++)
+          left_col[i] = ref[i * ref_stride - 1];
+      }
+      if (i < (bs << need_bottom))
+        memset(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+    } else {
+      memset(left_col, 129, bs << need_bottom);
+    }
+#else
     if (left_available) {
       if (xd->mb_to_bottom_edge < 0) {
         /* slower path if the block needs border extension */
@@ -299,10 +573,27 @@
     } else {
       memset(left_col, 129, bs);
     }
+#endif
   }
 
   // NEED_ABOVE
   if (extend_modes[mode] & NEED_ABOVE) {
+#if CONFIG_EXT_IPRED_BLTR
+    const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+    if (n_top_px > 0) {
+      memcpy(above_row, above_ref, n_top_px);
+      i = n_top_px;
+      if (need_right && n_topright_px > 0) {
+        assert(n_top_px == bs);
+        memcpy(above_row + bs, above_ref + bs, n_topright_px);
+        i += n_topright_px;
+      }
+      if (i < (bs << need_right))
+        memset(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+    } else {
+      memset(above_row, 127, bs << need_right);
+    }
+#else
     if (up_available) {
       const uint8_t *above_ref = ref - ref_stride;
       if (xd->mb_to_right_edge < 0) {
@@ -327,8 +618,14 @@
       memset(above_row, 127, bs);
       above_row[-1] = 127;
     }
+#endif
   }
 
+#if CONFIG_EXT_IPRED_BLTR
+  if (extend_modes[mode] & NEED_ABOVELEFT) {
+    above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+  }
+#else
   // NEED_ABOVERIGHT
   if (extend_modes[mode] & NEED_ABOVERIGHT) {
     if (up_available) {
@@ -374,29 +671,59 @@
       above_row[-1] = 127;
     }
   }
+#endif
 
   // predict
   if (mode == DC_PRED) {
+#if CONFIG_EXT_IPRED_BLTR
+    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+                                                  const_above_row, left_col);
+#else
     dc_pred[left_available][up_available][tx_size](dst, dst_stride,
                                                    const_above_row, left_col);
+#endif
   } else {
     pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
   }
 }
 
-void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
                              int aoff, int loff, int plane) {
-  const int bw = (1 << bwl_in);
   const int txw = (1 << tx_size);
   const int have_top = loff || xd->up_available;
   const int have_left = aoff || xd->left_available;
+#if !CONFIG_EXT_IPRED_BLTR
+  const int bw = (1 << bwl_in);
   const int have_right = (aoff + txw) < bw;
+#endif
   const int x = aoff * 4;
   const int y = loff * 4;
+#if CONFIG_EXT_IPRED_BLTR
+  const int bw = VPXMAX(2, 1 << bwl_in);
+  const int bh = VPXMAX(2, 1 << bhl_in);
+  const int mi_row = -xd->mb_to_top_edge >> 6;
+  const int mi_col = -xd->mb_to_left_edge >> 6;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  const int have_right = vp10_has_right(bsize, mi_row, mi_col,
+                                        xd->mb_to_right_edge > 0,
+                                        tx_size, loff, aoff,
+                                        pd->subsampling_x);
+  const int have_bottom = vp10_has_bottom(bsize, mi_row, mi_col,
+                                          xd->mb_to_bottom_edge > 0,
+                                          tx_size, loff, aoff,
+                                          pd->subsampling_y);
+  const int wpx = 4 * bw;
+  const int hpx = 4 * bh;
+  const int txpx = 4 * txw;
 
+  int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txpx);
+  int yd =
+      (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txpx);
+
   if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
     const int bs = 4 * (1 << tx_size);
     const int stride = 4 * (1 << bwl_in);
@@ -434,6 +761,27 @@
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
+                                tx_size,
+                                have_top ? VPXMIN(txpx, xr + txpx) : 0,
+                                have_top && have_right ? VPXMIN(txpx, xr) : 0,
+                                have_left ? VPXMIN(txpx, yd + txpx) : 0,
+                                have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
+                                x, y, plane, xd->bd);
+    return;
+  }
+#endif
+  build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
+                         tx_size,
+                         have_top ? VPXMIN(txpx, xr + txpx) : 0,
+                         have_top && have_right ? VPXMIN(txpx, xr) : 0,
+                         have_left ? VPXMIN(txpx, yd + txpx) : 0,
+                         have_bottom && have_left ? VPXMIN(txpx, yd) : 0,
+                         x, y, plane);
+#else
+  (void) bhl_in;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
                                 tx_size, have_top, have_left, have_right,
                                 x, y, plane, xd->bd);
     return;
@@ -441,6 +789,7 @@
 #endif
   build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
                          have_top, have_left, have_right, x, y, plane);
+#endif
 }
 
 void vp10_init_intra_predictors(void) {
--- a/vp10/common/reconintra.h
+++ b/vp10/common/reconintra.h
@@ -20,7 +20,7 @@
 
 void vp10_init_intra_predictors(void);
 
-void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -360,7 +360,7 @@
     if (plane == 0)
       mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
 
-  vp10_predict_intra_block(xd, pd->n4_wl, tx_size, mode,
+  vp10_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode,
                           dst, pd->dst.stride, dst, pd->dst.stride,
                           col, row, plane);
 
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -963,6 +963,7 @@
   const scan_order *const scan_order = get_scan(tx_size, tx_type);
   PREDICTION_MODE mode;
   const int bwl = b_width_log2_lookup[plane_bsize];
+  const int bhl = b_height_log2_lookup[plane_bsize];
   const int diff_stride = 4 * (1 << bwl);
   uint8_t *src, *dst;
   int16_t *src_diff;
@@ -976,7 +977,7 @@
   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 
   mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
-  vp10_predict_intra_block(xd, bwl, tx_size, mode, dst, dst_stride,
+  vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride,
                           dst, dst_stride, i, j, plane);
 
 #if CONFIG_VP9_HIGHBITDEPTH
--- a/vp10/encoder/mbgraph.c
+++ b/vp10/encoder/mbgraph.c
@@ -146,7 +146,7 @@
     unsigned int err;
 
     xd->mi[0]->mbmi.mode = mode;
-    vp10_predict_intra_block(xd, 2, TX_16X16, mode,
+    vp10_predict_intra_block(xd, 2, 2, TX_16X16, mode,
                             x->plane[0].src.buf, x->plane[0].src.stride,
                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
                             0, 0, 0);
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -958,7 +958,7 @@
                                                                   p->src_diff);
           tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
           xd->mi[0]->bmi[block].as_mode = mode;
-          vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
+          vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
                                   dst, dst_stride,
                                   col + idx, row + idy, 0);
           vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
@@ -1058,7 +1058,7 @@
             vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
         xd->mi[0]->bmi[block].as_mode = mode;
-        vp10_predict_intra_block(xd, 1, TX_4X4, mode, dst, dst_stride,
+        vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
                                 dst, dst_stride, col + idx, row + idy, 0);
         vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -29,7 +29,7 @@
     pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
     pred[B_RD_PRED] = vpx_d135_predictor_4x4;
     pred[B_VR_PRED] = vpx_d117_predictor_4x4;
-    pred[B_VL_PRED] = vpx_d63e_predictor_4x4;
+    pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
     pred[B_HD_PRED] = vpx_d153_predictor_4x4;
     pred[B_HU_PRED] = vpx_d207_predictor_4x4;
 }
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -44,6 +44,21 @@
       dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
 }
 
+static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                   const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) above;
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+                            left[(c >> 1) + r + 2])
+          : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   int r, c;
@@ -61,6 +76,20 @@
   }
 }
 
+static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) left;
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
+                            above[(r >> 1) + c + 2])
+          : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   const uint8_t above_right = above[bs - 1];
@@ -80,6 +109,19 @@
   }
 }
 
+static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+                                  const uint8_t *above, const uint8_t *left) {
+  int r, c;
+  (void) left;
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = AVG3(above[r + c], above[r + c + 1],
+                    above[r + c + 1 + (r + c + 2 < bs * 2)]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
@@ -319,7 +361,7 @@
               DST(3, 3) = AVG3(E, F, G);  // differs from vp8
 }
 
-void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
                               const uint8_t *above, const uint8_t *left) {
   const int A = above[0];
   const int B = above[1];
@@ -486,6 +528,23 @@
   }
 }
 
+static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
+                                          int bs, const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  int r, c;
+  (void) above;
+  (void) bd;
+
+  for (r = 0; r < bs; ++r) {
+    for (c = 0; c < bs; ++c) {
+      dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+                            left[(c >> 1) + r + 2])
+          : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+    }
+    dst += stride;
+  }
+}
+
 static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
                                         int bs, const uint16_t *above,
                                         const uint16_t *left, int bd) {
@@ -502,6 +561,8 @@
   }
 }
 
+#define highbd_d63e_predictor highbd_d63_predictor
+
 static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
                                         const uint16_t *above,
                                         const uint16_t *left, int bd) {
@@ -527,7 +588,7 @@
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
       dst[c] = AVG3(above[r + c], above[r + c + 1],
-                    above[r + c + 1 + (r + c + 2 < 8)]);
+                    above[r + c + 1 + (r + c + 2 < bs * 2)]);
     }
     dst += stride;
   }
@@ -771,6 +832,11 @@
 intra_pred_no_4x4(d207)
 intra_pred_no_4x4(d63)
 intra_pred_no_4x4(d45)
+#if CONFIG_EXT_IPRED_BLTR
+intra_pred_allsizes(d207e)
+intra_pred_allsizes(d63e)
+intra_pred_no_4x4(d45e)
+#endif
 intra_pred_no_4x4(d117)
 intra_pred_no_4x4(d135)
 intra_pred_no_4x4(d153)
@@ -781,7 +847,7 @@
 intra_pred_allsizes(dc_left)
 intra_pred_allsizes(dc_top)
 intra_pred_allsizes(dc)
-#if CONFIG_VP9_HIGHBITDEPTH && CONFIG_MISC_FIXES
+#if CONFIG_VP9_HIGHBITDEPTH && CONFIG_MISC_FIXES && !CONFIG_EXT_IPRED_BLTR
 intra_pred_highbd_sized(d45e, 4)
 #endif
 #undef intra_pred_allsizes
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -57,6 +57,9 @@
 add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_4x4/;
+
 add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
 
@@ -69,6 +72,9 @@
 add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63e_predictor_4x4/;
 
+add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63f_predictor_4x4/;
+
 add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -108,12 +114,21 @@
 add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_8x8/;
+
 add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_8x8/;
+
 add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_8x8/;
+
 add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -147,12 +162,21 @@
 add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_16x16/;
+
 add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_16x16/;
+
 add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_16x16/;
+
 add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
 
@@ -186,12 +210,21 @@
 add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_32x32/;
+
 add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_32x32/;
+
 add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
 
+add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_32x32/;
+
 add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
 
@@ -227,6 +260,9 @@
   add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_4x4/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_4x4/;
+
   add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_4x4/;
 
@@ -236,6 +272,9 @@
   add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_4x4/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_4x4/;
+
   add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_4x4/;
 
@@ -269,12 +308,21 @@
   add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_8x8/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_8x8/;
+
   add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_8x8/;
 
@@ -308,12 +356,21 @@
   add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_16x16/;
 
+  add_proto qw/void vpx_highbd_d63e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_16x16/;
+
   add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_16x16/;
 
@@ -347,11 +404,20 @@
   add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d207_predictor_32x32/;
 
+  add_proto qw/void vpx_highbd_d207e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d207e_predictor_32x32/;
+
   add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d45_predictor_32x32/;
 
+  add_proto qw/void vpx_highbd_d45e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d45e_predictor_32x32/;
+
   add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_d63_predictor_32x32/;
+
+  add_proto qw/void vpx_highbd_d63e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+  specialize qw/vpx_highbd_d63e_predictor_32x32/;
 
   add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
   specialize qw/vpx_highbd_h_predictor_32x32/;