ref: 38fa4871647667b5c36c2482993887ad479f7c6d
parent: 325e0aa6505eb480f5a55e072e195cbc3db0aacf
author: Jingning Han <[email protected]>
date: Fri Jul 26 13:01:51 EDT 2013
Shortcut 8x8/16x16 inverse 2D-DCT This commit brought back the shortcut implementation of 8x8/16x16 inverse 2D-DCT. When the eob <= 10, it skips the inverse transform operations on row 4:7/4:15 in the first round. For bus_cif at 1000 kbps, this provides about 2% speed-up at speed 0. Change-Id: I453e2d72956467d75be4ad8c04b4482ab889d572
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -95,6 +95,9 @@
// DC only DCT coefficient
vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
+ } else if (eob <= 10) {
+ vp9_short_idct10_8x8_add(input, dest, stride);
+ vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
vpx_memset(input, 0, 128);
@@ -128,6 +131,9 @@
input[0] = 0;
vp9_add_constant_residual_16x16(out, dest, stride);
+ } else if (eob <= 10) {
+ vp9_short_idct10_16x16_add(input, dest, stride);
+ vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
vpx_memset(input, 0, 512);
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -52,10 +52,21 @@
int stride) {
if (eob <= 1)
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
+ else if (eob <= 10)
+ vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
else
vp9_short_idct8x8_add(dqcoeff, dest, stride);
}
+static void inverse_transform_b_16x16_add(MACROBLOCKD *xd, int eob,
+ int16_t *dqcoeff, uint8_t *dest,
+ int stride) {
+ if (eob <= 10)
+ vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+ else
+ vp9_short_idct16x16_add(dqcoeff, dest, stride);
+}
+
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
struct macroblock_plane *const p = &x->plane[plane];
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -538,7 +549,8 @@
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
break;
case TX_16X16:
- vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_16x16_add(xd, pd->eobs[block], dqcoeff,
+ dst, pd->dst.stride);
break;
case TX_8X8:
inverse_transform_b_8x8_add(xd, pd->eobs[block], dqcoeff,
@@ -691,7 +703,7 @@
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
- vp9_short_idct16x16_add(dqcoeff, dst, pd->dst.stride);
+ inverse_transform_b_16x16_add(xd, *eob, dqcoeff, dst, pd->dst.stride);
else
vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type);
}