shithub: libvpx

Download patch

ref: a672351af986725df4cbeae546c15fc85fb2e38c
parent: e40a769036a97403718244caa81b8680a4c9c7eb
author: John Koleszar <[email protected]>
date: Thu Apr 25 10:44:44 EDT 2013

quantize: make 4x4, 8x8 common with larger transforms

There were 4 variants of the quantize loop in vp9_quantize.c, now
there is 1.

Change-Id: Ic853393411214b32d46a6ba53769413bd14e1cac

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -155,7 +155,8 @@
   void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
   void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
   void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
-  void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, int y_blocks);
+  void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
+                         int y_blocks);
   void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2,
                               int y_blocks);
   void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -70,12 +70,12 @@
   tx_type = get_tx_type_4x4(&x->e_mbd, ib);
   if (tx_type != DCT_DCT) {
     vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-    vp9_ht_quantize_b_4x4(x, ib, tx_type);
+    x->quantize_b_4x4(x, ib, tx_type, 16);
     vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                      diff, 16, tx_type);
   } else {
     x->fwd_txm4x4(src_diff, coeff, 32);
-    x->quantize_b_4x4(x, ib, 16);
+    x->quantize_b_4x4(x, ib, tx_type, 16);
     vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib],
                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                                 diff, 32);
@@ -209,7 +209,7 @@
       tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
       if (tx_type != DCT_DCT) {
         vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-        vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+        x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
         vp9_short_iht4x4(dqcoeff, diff, 16, tx_type);
       } else if (!(i & 1) &&
                  get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
@@ -222,7 +222,7 @@
         i++;
       } else {
         x->fwd_txm4x4(src_diff, coeff, 32);
-        x->quantize_b_4x4(x, ib + iblock[i], 16);
+        x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
         vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
                                     dqcoeff, diff, 32);
       }
@@ -275,7 +275,7 @@
                      *(b->base_dst) + b->dst, b->dst_stride);
 
   x->fwd_txm4x4(src_diff, coeff, 16);
-  x->quantize_b_4x4(x, ib, 16);
+  x->quantize_b_4x4(x, ib, DCT_DCT, 16);
   vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
                               dqcoeff, diff, 16);
 
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -26,210 +26,7 @@
          plane == 1 ? 16 : 20;
 }
 
-void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  int i, rc, eob;
-  int zbin;
-  int x, y, z, sz;
-  int16_t *coeff_ptr       = BLOCK_OFFSET(mb->plane[0].coeff, b_idx, 16);
-  // ht is luma-only
-  int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16);
-  int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16);
-  int16_t *zbin_boost_ptr  = mb->plane[0].zrun_zbin_boost;
-  int16_t *zbin_ptr        = mb->plane[0].zbin;
-  int16_t *round_ptr       = mb->plane[0].round;
-  int16_t *quant_ptr       = mb->plane[0].quant;
-  uint8_t *quant_shift_ptr = mb->plane[0].quant_shift;
-  int16_t *dequant_ptr     = xd->plane[0].dequant;
-  int zbin_oq_value        = mb->plane[0].zbin_extra;
-  const int *pt_scan = get_scan_4x4(tx_type);
-
-  vpx_memset(qcoeff_ptr, 0, 32);
-  vpx_memset(dqcoeff_ptr, 0, 32);
-
-  eob = -1;
-
-  if (!mb->skip_block) {
-    for (i = 0; i < 16; i++) {
-      rc   = pt_scan[i];
-      z    = coeff_ptr[rc];
-
-      zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-      zbin_boost_ptr++;
-
-      sz = (z >> 31);                                 // sign of z
-      x  = (z ^ sz) - sz;                             // x = abs(z)
-
-      if (x >= zbin) {
-        x += round_ptr[rc];
-        y  = (((x * quant_ptr[rc]) >> 16) + x)
-             >> quant_shift_ptr[rc];                // quantize (x)
-        x  = (y ^ sz) - sz;                         // get the sign back
-        qcoeff_ptr[rc]  = x;                        // write to destination
-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];      // dequantized value
-
-        if (y) {
-          eob = i;                                // last nonzero coeffs
-          zbin_boost_ptr = mb->plane[0].zrun_zbin_boost;  // reset zero run len
-        }
-      }
-    }
-  }
-
-  xd->plane[0].eobs[b_idx] = eob + 1;
-}
-
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  const int c_idx = plane_idx(pb_idx.plane);
-  int i, rc, eob;
-  int zbin;
-  int x, y, z, sz;
-  int16_t *coeff_ptr       = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
-                                          pb_idx.block, 16);
-  int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
-                                          pb_idx.block, 16);
-  int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
-                                          pb_idx.block, 16);
-  int16_t *zbin_boost_ptr  = mb->plane[pb_idx.plane].zrun_zbin_boost;
-  int16_t *zbin_ptr        = mb->plane[pb_idx.plane].zbin;
-  int16_t *round_ptr       = mb->plane[pb_idx.plane].round;
-  int16_t *quant_ptr       = mb->plane[pb_idx.plane].quant;
-  uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
-  int16_t *dequant_ptr     = xd->plane[0].dequant;
-  int zbin_oq_value        = mb->plane[pb_idx.plane].zbin_extra;
-
-  if (c_idx == 0) assert(pb_idx.plane == 0);
-  if (c_idx == 16) assert(pb_idx.plane == 1);
-  if (c_idx == 20) assert(pb_idx.plane == 2);
-  vpx_memset(qcoeff_ptr, 0, 32);
-  vpx_memset(dqcoeff_ptr, 0, 32);
-
-  eob = -1;
-
-  if (!mb->skip_block) {
-    for (i = 0; i < 16; i++) {
-      rc   = vp9_default_zig_zag1d_4x4[i];
-      z    = coeff_ptr[rc];
-
-      zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-      zbin_boost_ptr++;
-
-      sz = (z >> 31);                                 // sign of z
-      x  = (z ^ sz) - sz;                             // x = abs(z)
-
-      if (x >= zbin) {
-        x += round_ptr[rc];
-
-        y  = (((x * quant_ptr[rc]) >> 16) + x)
-             >> quant_shift_ptr[rc];                // quantize (x)
-        x  = (y ^ sz) - sz;                         // get the sign back
-        qcoeff_ptr[rc]  = x;                        // write to destination
-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];      // dequantized value
-
-        if (y) {
-          eob = i;                                // last nonzero coeffs
-          zbin_boost_ptr = mb->plane[pb_idx.plane].zrun_zbin_boost;
-        }
-      }
-    }
-  }
-
-  xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
-}
-
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
-                                int y_blocks) {
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  const int c_idx = plane_idx(pb_idx.plane);
-  int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
-                                     pb_idx.block, 16);
-  int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
-                                      pb_idx.block, 16);
-  int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
-                                    pb_idx.block, 16);
-  const int *pt_scan = get_scan_8x8(tx_type);
-
-  if (c_idx == 0) assert(pb_idx.plane == 0);
-  if (c_idx == 16) assert(pb_idx.plane == 1);
-  if (c_idx == 20) assert(pb_idx.plane == 2);
-  vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
-  vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
-
-  if (!mb->skip_block) {
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    int zero_run;
-    int16_t *zbin_boost_ptr = mb->plane[pb_idx.plane].zrun_zbin_boost;
-    int16_t *zbin_ptr   = mb->plane[pb_idx.plane].zbin;
-    int16_t *round_ptr  = mb->plane[pb_idx.plane].round;
-    int16_t *quant_ptr  = mb->plane[pb_idx.plane].quant;
-    uint8_t *quant_shift_ptr = mb->plane[pb_idx.plane].quant_shift;
-    int16_t *dequant_ptr = xd->plane[pb_idx.plane].dequant;
-    int zbin_oq_value = mb->plane[pb_idx.plane].zbin_extra;
-
-    eob = -1;
-
-    // Special case for DC as it is the one triggering access in various
-    // tables: {zbin, quant, quant_shift, dequant}_ptr[rc != 0]
-    {
-      z    = coeff_ptr[0];
-      zbin = (zbin_ptr[0] + zbin_boost_ptr[0] + zbin_oq_value);
-      zero_run = 1;
-
-      sz = (z >> 31);                                // sign of z
-      x  = (z ^ sz) - sz;                            // x = abs(z)
-
-      if (x >= zbin) {
-        x += (round_ptr[0]);
-        y  = ((int)(((int)(x * quant_ptr[0]) >> 16) + x))
-             >> quant_shift_ptr[0];                  // quantize (x)
-        x  = (y ^ sz) - sz;                          // get the sign back
-        qcoeff_ptr[0]  = x;                          // write to destination
-        dqcoeff_ptr[0] = x * dequant_ptr[0];         // dequantized value
-
-        if (y) {
-          eob = 0;                                   // last nonzero coeffs
-          zero_run = 0;
-        }
-      }
-    }
-    for (i = 1; i < 64; i++) {
-      rc   = pt_scan[i];
-      z    = coeff_ptr[rc];
-      zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
-      // The original code was incrementing zero_run while keeping it at
-      // maximum 15 by adding "(zero_run < 15)". The same is achieved by
-      // removing the opposite of the sign mask of "(zero_run - 15)".
-      zero_run -= (zero_run - 15) >> 31;
-
-      sz = (z >> 31);                                // sign of z
-      x  = (z ^ sz) - sz;                            // x = abs(z)
-
-      if (x >= zbin) {
-        x += (round_ptr[rc != 0]);
-        y  = ((int)(((int)(x * quant_ptr[1]) >> 16) + x))
-             >> quant_shift_ptr[1];                  // quantize (x)
-        x  = (y ^ sz) - sz;                          // get the sign back
-        qcoeff_ptr[rc]  = x;                         // write to destination
-        dqcoeff_ptr[rc] = x * dequant_ptr[1];        // dequantized value
-
-        if (y) {
-          eob = i;                                   // last nonzero coeffs
-          zero_run = 0;
-        }
-      }
-    }
-    xd->plane[pb_idx.plane].eobs[pb_idx.block] = eob + 1;
-  } else {
-    xd->plane[pb_idx.plane].eobs[pb_idx.block] = 0;
-  }
-}
-
-static void quantize(int16_t *zbin_boost_orig_ptr,
+static void quantize(int16_t *zbin_boost_orig_ptr,
                      int16_t *coeff_ptr, int n_coeffs, int skip_block,
                      int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr,
                      uint8_t *quant_shift_ptr,
@@ -278,16 +75,54 @@
   *eob_ptr = eob + 1;
 }
 
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+                                int y_blocks) {
+  MACROBLOCKD *const xd = &mb->e_mbd;
+  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
+  const int *pt_scan = get_scan_4x4(tx_type);
+
+  quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
+           BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
+           16, mb->skip_block,
+           mb->plane[pb_idx.plane].zbin,
+           mb->plane[pb_idx.plane].round,
+           mb->plane[pb_idx.plane].quant,
+           mb->plane[pb_idx.plane].quant_shift,
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
+           xd->plane[pb_idx.plane].dequant,
+           mb->plane[pb_idx.plane].zbin_extra,
+           &xd->plane[pb_idx.plane].eobs[pb_idx.block],
+           pt_scan, 1);
+}
+
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+                                int y_blocks) {
+  MACROBLOCKD *const xd = &mb->e_mbd;
+  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
+  const int *pt_scan = get_scan_8x8(tx_type);
+
+  quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
+           BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
+           64, mb->skip_block,
+           mb->plane[pb_idx.plane].zbin,
+           mb->plane[pb_idx.plane].round,
+           mb->plane[pb_idx.plane].quant,
+           mb->plane[pb_idx.plane].quant_shift,
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
+           xd->plane[pb_idx.plane].dequant,
+           mb->plane[pb_idx.plane].zbin_extra,
+           &xd->plane[pb_idx.plane].eobs[pb_idx.block],
+           pt_scan, 1);
+}
+
 void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
                                   int y_blocks) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  const int c_idx = plane_idx(pb_idx.plane);
   const int *pt_scan = get_scan_16x16(tx_type);
 
-  if (c_idx == 0) assert(pb_idx.plane == 0);
-  if (c_idx == 16) assert(pb_idx.plane == 1);
-  if (c_idx == 20) assert(pb_idx.plane == 2);
   quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
            BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
            256, mb->skip_block,
@@ -306,11 +141,7 @@
 void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-  const int c_idx = plane_idx(pb_idx.plane);
 
-  if (c_idx == 0) assert(pb_idx.plane == 0);
-  if (c_idx == 16) assert(pb_idx.plane == 1);
-  if (c_idx == 20) assert(pb_idx.plane == 2);
   quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
            BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
            1024, mb->skip_block,
@@ -371,11 +202,7 @@
 
   for (n = 0; n < bw * bh; n++) {
     const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
-    if (tx_type != DCT_DCT) {
-      vp9_ht_quantize_b_4x4(x, n, tx_type);
-    } else {
-      x->quantize_b_4x4(x, n, bw * bh);
-    }
+    x->quantize_b_4x4(x, n, tx_type, bw * bh);
   }
 }
 
@@ -412,7 +239,7 @@
   int i;
 
   for (i = uoff; i < ((uoff * 3) >> 1); i++)
-    x->quantize_b_4x4(x, i, uoff);
+    x->quantize_b_4x4(x, i, DCT_DCT, uoff);
 }
 
 /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
@@ -421,8 +248,8 @@
  * of blocks. */
 void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2,
                                      int y_blocks) {
-  vp9_regular_quantize_b_4x4(x, b_idx1, y_blocks);
-  vp9_regular_quantize_b_4x4(x, b_idx2, y_blocks);
+  vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks);
+  vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks);
 }
 
 static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -26,10 +26,10 @@
 #include "x86/vp9_quantize_x86.h"
 #endif
 
-void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks);
 void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
                                      int y_blocks);
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+                                int y_blocks);
 void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
                                 int y_blocks);
 void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -909,10 +909,10 @@
     tx_type = get_tx_type_4x4(xd, ib);
     if (tx_type != DCT_DCT) {
       vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-      vp9_ht_quantize_b_4x4(x, ib, tx_type);
+      x->quantize_b_4x4(x, ib, tx_type, 16);
     } else {
       x->fwd_txm4x4(src_diff, coeff, 32);
-      x->quantize_b_4x4(x, ib, 16);
+      x->quantize_b_4x4(x, ib, tx_type, 16);
     }
 
     tempa = ta;
@@ -1167,7 +1167,7 @@
         tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
         if (tx_type != DCT_DCT) {
           vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
-          vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
+          x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
         } else if (!(i & 1) &&
                    get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
           x->fwd_txm8x4(src_diff, coeff, 32);
@@ -1175,7 +1175,7 @@
           do_two = 1;
         } else {
           x->fwd_txm4x4(src_diff, coeff, 32);
-          x->quantize_b_4x4(x, ib + iblock[i], 16);
+          x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
         }
         distortion += vp9_block_error_c(coeff,
             BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
@@ -1756,7 +1756,7 @@
                          src, src_stride,
                          *(bd->base_dst) + bd->dst, bd->dst_stride);
       x->fwd_txm4x4(src_diff, coeff, 32);
-      x->quantize_b_4x4(x, i, 16);
+      x->quantize_b_4x4(x, i, DCT_DCT, 16);
       thisdistortion = vp9_block_error(coeff,
           BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
       *distortion += thisdistortion;