shithub: libvpx

Download patch

ref: 38fa4871647667b5c36c2482993887ad479f7c6d
parent: 325e0aa6505eb480f5a55e072e195cbc3db0aacf
author: Jingning Han <[email protected]>
date: Fri Jul 26 13:01:51 EDT 2013

Shortcut 8x8/16x16 inverse 2D-DCT

This commit brought back the shortcut implementation of 8x8/16x16
inverse 2D-DCT. When the eob <= 10, it skips the inverse transform
operations on row 4:7/4:15 in the first round. For bus_cif at 1000
kbps, this provides about 2% speed-up at speed 0.

Change-Id: I453e2d72956467d75be4ad8c04b4482ab889d572

--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -95,6 +95,9 @@
       // DC only DCT coefficient
       vp9_short_idct8x8_1_add(input, dest, stride);
       input[0] = 0;
+    } else if (eob <= 10) {
+      vp9_short_idct10_8x8_add(input, dest, stride);
+      vpx_memset(input, 0, 128);
     } else {
       vp9_short_idct8x8_add(input, dest, stride);
       vpx_memset(input, 0, 128);
@@ -128,6 +131,9 @@
       input[0] = 0;
 
       vp9_add_constant_residual_16x16(out, dest, stride);
+    } else if (eob <= 10) {
+      vp9_short_idct10_16x16_add(input, dest, stride);
+      vpx_memset(input, 0, 512);
     } else {
       vp9_short_idct16x16_add(input, dest, stride);
       vpx_memset(input, 0, 512);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -52,10 +52,21 @@
                                         int stride) {
   if (eob <= 1)
     vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
+  else if (eob <= 10)
+    vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
   else
     vp9_short_idct8x8_add(dqcoeff, dest, stride);
 }
 
+static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob,
+                                          int16_t *dqcoeff, uint8_t *dest,
+                                          int stride) {
+  if (eob <= 10)
+    vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+  else
+    vp9_short_idct16x16_add(dqcoeff, dest, stride);
+}
+
 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
   struct macroblock_plane *const p = &x->plane[plane];
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -538,7 +549,8 @@
       vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
       break;
     case TX_16X16:
-      vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+      inverse_transform_b_16x16_add(xd, pd->eobs[block], dqcoeff,
+                                    dst, pd->dst.stride);
       break;
     case TX_8X8:
       inverse_transform_b_8x8_add(xd, pd->eobs[block], dqcoeff,
@@ -691,7 +703,7 @@
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
       if (!x->skip_encode && *eob) {
         if (tx_type == DCT_DCT)
-          vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+          inverse_transform_b_16x16_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
         else
           vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
       }