ref: cccad1c5de41e29d6cc64c6e11a1d91c8486d090
parent: 0ede9f52b796b6d8e02046b24f68a3db8b9f5920
author: Jingning Han <[email protected]>
date: Tue Jul 7 07:36:05 EDT 2015
Reduce dqcoeff array size in decoder The decoding process handles detokenization and reconstruction per transform block sequentially. There is no need to offset the dqcoeff buffer according to the transform block index. This allows to reduce the memory spill and improve cache performance. Change-Id: Ibb8bfe532a7a08fcabaf6d42cbec1e986901d32d
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -188,8 +188,11 @@
#endif
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
+#if CONFIG_VP9_ENCODER
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[64 * 64]);
-
+#else
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+#endif
int lossless;
int corrupted;
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -188,7 +188,7 @@
struct macroblockd_plane *const pd = &xd->plane[plane];
if (eob > 0) {
TX_TYPE tx_type = DCT_DCT;
- tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ tran_low_t *const dqcoeff = pd->dqcoeff;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
if (xd->lossless) {
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -217,7 +217,7 @@
pd->left_context + y);
const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
const int eob = decode_coefs(xd, pd->plane_type,
- BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
+ pd->dqcoeff, tx_size,
dequant, ctx, so->scan, so->neighbors, r);
vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
return eob;