shithub: libvpx

Download patch

ref: 138ec38cab4e8093d459600cd744720b036f2d50
parent: cbd1315ac433e4e81e70fcee0eaad249148dc44e
author: John Koleszar <[email protected]>
date: Tue Apr 23 05:51:09 EDT 2013

Convert coeff to per-plane MACROBLOCK data

This commit moves the coeff storage from the MACROBLOCK struct to its
per-plane part. The next commit will remove the coeff member from the
BLOCK structure so that it is consistently accessed per-plane.

Also refactors vp9_sb_block_error_c and vp9_sb_uv_block_error_c to be
variable subsampling aware.

Change-Id: I18c30f87f27c3a012119b6c1970d5fa499804455

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -84,12 +84,12 @@
 
 struct macroblock_plane {
   DECLARE_ALIGNED(16, int16_t, src_diff[64*64]);
+  DECLARE_ALIGNED(16, int16_t, coeff[64*64]);
 };
 
 typedef struct macroblock MACROBLOCK;
 struct macroblock {
   struct macroblock_plane plane[MAX_MB_PLANE];
-  DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]);
   // 16 Y blocks, 4 U blocks, 4 V blocks,
   BLOCK block[24];
 
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1736,8 +1736,12 @@
 void vp9_setup_block_ptrs(MACROBLOCK *x) {
   int i;
 
-  for (i = 0; i < 24; i++)
-    x->block[i].coeff = x->coeff + i * 16;
+  for (i = 0; i < 16; i++)
+    x->block[i].coeff = x->plane[0].coeff + i * 16;
+  for (i = 16; i < 20; i++)
+    x->block[i].coeff = x->plane[1].coeff + (i - 16) * 16;
+  for (i = 20; i < 24; i++)
+    x->block[i].coeff = x->plane[2].coeff + (i - 20) * 16;
 }
 
 void vp9_build_block_offsets(MACROBLOCK *x) {
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -78,7 +78,7 @@
     const int x_idx = n & (bw - 1), y_idx = n >> bwl;
 
     vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32,
-                        x->coeff + n * 1024, stride * 2);
+                        x->plane[0].coeff + n * 1024, stride * 2);
   }
 }
 
@@ -97,10 +97,10 @@
     if (tx_type != DCT_DCT) {
       vp9_short_fht16x16(x->plane[0].src_diff +
                              y_idx * stride * 16 + x_idx * 16,
-                         x->coeff + n * 256, stride, tx_type);
+                         x->plane[0].coeff + n * 256, stride, tx_type);
     } else {
       x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16,
-                      x->coeff + n * 256, stride * 2);
+                      x->plane[0].coeff + n * 256, stride * 2);
     }
   }
 }
@@ -118,10 +118,10 @@
 
     if (tx_type != DCT_DCT) {
       vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
-                       x->coeff + n * 64, stride, tx_type);
+                       x->plane[0].coeff + n * 64, stride, tx_type);
     } else {
       x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
-                    x->coeff + n * 64, stride * 2);
+                    x->plane[0].coeff + n * 64, stride * 2);
     }
   }
 }
@@ -139,10 +139,10 @@
 
     if (tx_type != DCT_DCT) {
       vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
-                       x->coeff + n * 16, stride, tx_type);
+                       x->plane[0].coeff + n * 16, stride, tx_type);
     } else {
       x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
-                    x->coeff + n * 16, stride * 2);
+                    x->plane[0].coeff + n * 16, stride * 2);
     }
   }
 }
@@ -150,15 +150,12 @@
 void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
   assert(bsize == BLOCK_SIZE_SB64X64);
   vp9_clear_system_state();
-  vp9_short_fdct32x32(x->plane[1].src_diff,
-                      x->coeff + 4096, 64);
-  vp9_short_fdct32x32(x->plane[2].src_diff,
-                      x->coeff + 4096 + 1024, 64);
+  vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64);
+  vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64);
 }
 
 void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
   const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
-  const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2;
   const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
   const int stride = 16 << (bwl - 1);
   int n;
@@ -168,15 +165,14 @@
     const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
 
     x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16,
-                    x->coeff + uoff + n * 256, stride * 2);
+                    x->plane[1].coeff + n * 256, stride * 2);
     x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16,
-                    x->coeff + voff + n * 256, stride * 2);
+                    x->plane[2].coeff + n * 256, stride * 2);
   }
 }
 
 void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
   const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
-  const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2;
   const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
   const int stride = 8 << (bwl - 1);
   int n;
@@ -186,15 +182,14 @@
     const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
 
     x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8,
-                  x->coeff + uoff + n * 64, stride * 2);
+                  x->plane[1].coeff + n * 64, stride * 2);
     x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8,
-                  x->coeff + voff + n * 64, stride * 2);
+                  x->plane[2].coeff + n * 64, stride * 2);
   }
 }
 
 void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
   const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
-  const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2;
   const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
   const int stride = 4 << (bwl - 1);
   int n;
@@ -204,9 +199,9 @@
     const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
 
     x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4,
-                  x->coeff + uoff + n * 16, stride * 2);
+                  x->plane[1].coeff + n * 16, stride * 2);
     x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4,
-                  x->coeff + voff + n * 16, stride * 2);
+                  x->plane[2].coeff + n * 16, stride * 2);
   }
 }
 
@@ -265,7 +260,8 @@
   vp9_token_state tokens[1025][2];
   unsigned best_index[1025][2];
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
-  const int16_t *coeff_ptr = mb->coeff + ib * 16;
+  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
+                                          pb_idx.block, 16);
   int16_t *qcoeff_ptr;
   int16_t *dqcoeff_ptr;
   int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0;
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -33,7 +33,7 @@
   int i, rc, eob;
   int zbin;
   int x, y, z, sz;
-  int16_t *coeff_ptr       = mb->coeff + b_idx * 16;
+  int16_t *coeff_ptr       = BLOCK_OFFSET(mb->plane[0].coeff, b_idx, 16);
   // ht is luma-only
   int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16);
   int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16);
@@ -102,7 +102,8 @@
   int i, rc, eob;
   int zbin;
   int x, y, z, sz;
-  int16_t *coeff_ptr       = mb->coeff + b_idx * 16;
+  int16_t *coeff_ptr       = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
+                                          pb_idx.block, 16);
   int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
   int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
@@ -163,6 +164,8 @@
                                      pb_idx.block, 16);
   int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
                                       pb_idx.block, 16);
+  int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
+                                    pb_idx.block, 16);
   BLOCK *const b = &mb->block[c_idx];
   BLOCKD *const d = &xd->block[c_idx];
   const int *pt_scan;
@@ -191,7 +194,6 @@
     int x, y, z, sz;
     int zero_run;
     int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
-    int16_t *coeff_ptr  = mb->coeff + 16 * b_idx;
     int16_t *zbin_ptr   = b->zbin;
     int16_t *round_ptr  = b->round;
     int16_t *quant_ptr  = b->quant;
@@ -331,7 +333,7 @@
   if (c_idx == 16) assert(pb_idx.plane == 1);
   if (c_idx == 20) assert(pb_idx.plane == 2);
   quantize(b->zrun_zbin_boost,
-           mb->coeff + 16 * b_idx,
+           BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
            256, b->skip_block,
            b->zbin, b->round, b->quant, b->quant_shift,
            BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
@@ -353,7 +355,7 @@
   if (c_idx == 16) assert(pb_idx.plane == 1);
   if (c_idx == 20) assert(pb_idx.plane == 2);
   quantize(b->zrun_zbin_boost,
-           mb->coeff + b_idx * 16,
+           BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
            1024, b->skip_block,
            b->zbin,
            b->round, b->quant, b->quant_shift,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -648,13 +648,13 @@
                                  rd[TX_4X4][1] : rd[TX_8X8][1];
 }
 
-static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
-                                int block_size, int shift) {
+static int block_error(int16_t *coeff, int16_t *dqcoeff,
+                       int block_size, int shift) {
   int i;
   int64_t error = 0;
 
   for (i = 0; i < block_size; i++) {
-    unsigned int this_diff = coeff[i] - dqcoeff[i];
+    int this_diff = coeff[i] - dqcoeff[i];
     error += this_diff * this_diff;
   }
   error >>= shift;
@@ -662,24 +662,24 @@
   return error > INT_MAX ? INT_MAX : (int)error;
 }
 
-static int vp9_sb_uv_block_error_c(int16_t *coeff,
-                                   int16_t *dqcoeff0, int16_t *dqcoeff1,
-                                   int block_size, int shift) {
-  int i;
-  int64_t error = 0;
+static int block_error_sby(MACROBLOCK *x, int block_size, int shift) {
+  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
+                     block_size, shift);
+}
 
-  for (i = 0; i < block_size / 2; i++) {
-    unsigned int this_diff = coeff[i] - dqcoeff0[i];
-    error += this_diff * this_diff;
-  }
-  coeff += block_size / 2;
-  for (i = 0; i < block_size / 2; i++) {
-    unsigned int this_diff = coeff[i] - dqcoeff1[i];
-    error += this_diff * this_diff;
-  }
-  error >>= shift;
+static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
+  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+  int64_t sum = 0;
+  int plane;
 
-  return error > INT_MAX ? INT_MAX : (int)error;
+  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
+    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
+                            x->e_mbd.plane[plane].subsampling_y;
+    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
+                       16 << (bwl + bhl - subsampling), 0);
+  }
+  sum >>= shift;
+  return sum > INT_MAX ? INT_MAX : (int)sum;
 }
 
 static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
@@ -716,8 +716,7 @@
   vp9_transform_sby_4x4(x, bsize);
   vp9_quantize_sby_4x4(x, bsize);
 
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
-                                     16 << (bwl + bhl), 2);
+  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
   *rate       = rdcost_sby_4x4(cm, x, bsize);
   *skippable  = vp9_sby_is_skippable(xd, bsize);
 }
@@ -749,7 +748,7 @@
 static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                                 int *rate, int *distortion, int *skippable,
                                 BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
+  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
   MACROBLOCKD *const xd = &x->e_mbd;
 
   xd->mode_info_context->mbmi.txfm_size = TX_8X8;
@@ -756,8 +755,7 @@
   vp9_transform_sby_8x8(x, bsize);
   vp9_quantize_sby_8x8(x, bsize);
 
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
-                                     64 << (bhl + bwl), 2);
+  *distortion = block_error_sby(x, 16 << (bhl + bwl), 2);
   *rate       = rdcost_sby_8x8(cm, x, bsize);
   *skippable  = vp9_sby_is_skippable(xd, bsize);
 }
@@ -787,7 +785,7 @@
 static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                                   int *rate, int *distortion, int *skippable,
                                   BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
+  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
   MACROBLOCKD *const xd = &x->e_mbd;
 
   xd->mode_info_context->mbmi.txfm_size = TX_16X16;
@@ -794,8 +792,7 @@
   vp9_transform_sby_16x16(x, bsize);
   vp9_quantize_sby_16x16(x, bsize);
 
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
-                                     256 << (bwl + bhl), 2);
+  *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
   *rate       = rdcost_sby_16x16(cm, x, bsize);
   *skippable  = vp9_sby_is_skippable(xd, bsize);
 }
@@ -827,7 +824,7 @@
 static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                                   int *rate, int *distortion, int *skippable,
                                   BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize) - 1, bhl = mb_height_log2(bsize) - 1;
+  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
   MACROBLOCKD *const xd = &x->e_mbd;
 
   xd->mode_info_context->mbmi.txfm_size = TX_32X32;
@@ -834,8 +831,7 @@
   vp9_transform_sby_32x32(x, bsize);
   vp9_quantize_sby_32x32(x, bsize);
 
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
-                                     1024 << (bwl + bhl), 0);
+  *distortion = block_error_sby(x, 16 << (bwl + bhl), 0);
   *rate       = rdcost_sby_32x32(cm, x, bsize);
   *skippable  = vp9_sby_is_skippable(xd, bsize);
 }
@@ -1385,7 +1381,6 @@
 static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                                  int *rate, int *distortion, int *skip,
                                  BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
   MACROBLOCKD *const xd = &x->e_mbd;
 
   vp9_transform_sbuv_4x4(x, bsize);
@@ -1392,10 +1387,7 @@
   vp9_quantize_sbuv_4x4(x, bsize);
 
   *rate       = rd_cost_sbuv_4x4(cm, x, bsize);
-  *distortion = vp9_sb_uv_block_error_c(x->coeff + (16 << (bwl + bhl)),
-                                        xd->plane[1].dqcoeff,
-                                        xd->plane[2].dqcoeff,
-                                        32 << (bwl + bhl - 2), 2);
+  *distortion = block_error_sbuv(x, bsize, 2);
   *skip       = vp9_sbuv_is_skippable(xd, bsize);
 }
 
@@ -1430,7 +1422,6 @@
 static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                                  int *rate, int *distortion, int *skip,
                                  BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
   MACROBLOCKD *const xd = &x->e_mbd;
 
   vp9_transform_sbuv_8x8(x, bsize);
@@ -1437,10 +1428,7 @@
   vp9_quantize_sbuv_8x8(x, bsize);
 
   *rate       = rd_cost_sbuv_8x8(cm, x, bsize);
-  *distortion = vp9_sb_uv_block_error_c(x->coeff + (64 << (bwl + bhl)),
-                                        xd->plane[1].dqcoeff,
-                                        xd->plane[2].dqcoeff,
-                                        128 << (bwl + bhl - 2), 2);
+  *distortion = block_error_sbuv(x, bsize, 2);
   *skip       = vp9_sbuv_is_skippable(xd, bsize);
 }
 
@@ -1475,7 +1463,6 @@
 static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                                    int *rate, int *distortion, int *skip,
                                    BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
   MACROBLOCKD *const xd = &x->e_mbd;
 
   vp9_transform_sbuv_16x16(x, bsize);
@@ -1482,10 +1469,7 @@
   vp9_quantize_sbuv_16x16(x, bsize);
 
   *rate       = rd_cost_sbuv_16x16(cm, x, bsize);
-  *distortion = vp9_sb_uv_block_error_c(x->coeff + (256 << (bwl + bhl)),
-                                        xd->plane[1].dqcoeff,
-                                        xd->plane[2].dqcoeff,
-                                        512 << (bwl + bhl - 2), 2);
+  *distortion = block_error_sbuv(x, bsize, 2);
   *skip       = vp9_sbuv_is_skippable(xd, bsize);
 }
 
@@ -1521,7 +1505,6 @@
 static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                                    int *rate, int *distortion, int *skip,
                                    BLOCK_SIZE_TYPE bsize) {
-  const int bwl = mb_width_log2(bsize) - 1, bhl = mb_height_log2(bsize) - 1;
   MACROBLOCKD *const xd = &x->e_mbd;
 
   vp9_transform_sbuv_32x32(x, bsize);
@@ -1528,10 +1511,7 @@
   vp9_quantize_sbuv_32x32(x, bsize);
 
   *rate       = rd_cost_sbuv_32x32(cm, x, bsize);
-  *distortion = vp9_sb_uv_block_error_c(x->coeff + (1024 << (bwl + bhl)),
-                                        xd->plane[1].dqcoeff,
-                                        xd->plane[2].dqcoeff,
-                                        2048 << (bwl + bhl - 2), 0);
+  *distortion = block_error_sbuv(x, bsize, 0);
   *skip       = vp9_sbuv_is_skippable(xd, bsize);
 }