ref: a64a192c90e45aafe77f02c84930a9495ac04c3b
parent: 5576a4e1cbef87394d1251eda9520619051c0a1e
parent: 70ffd5d0558d69f89d92168fcab337ed252df028
author: Jim Bankoski <[email protected]>
date: Fri Nov 22 03:16:17 EST 2013
Merge changes Id1698a35,Idcabd0b9 * changes: detokenization speedups Don't write 0's to token_cache
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -241,7 +241,8 @@
}
static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
- TX_SIZE tx_size, uint8_t *dst, int stride) {
+ TX_SIZE tx_size, uint8_t *dst, int stride,
+ uint8_t *token_cache) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int eob = pd->eobs[block];
if (eob > 0) {
@@ -274,13 +275,20 @@
if (eob == 1) {
vpx_memset(dqcoeff, 0, 2 * sizeof(dqcoeff[0]));
+ vpx_memset(token_cache, 0, 2 * sizeof(token_cache[0]));
} else {
- if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+ if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) {
vpx_memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
+ vpx_memset(token_cache, 0,
+ 4 * (4 << tx_size) * sizeof(token_cache[0]));
+ } else if (tx_size == TX_32X32 && eob <= 34) {
vpx_memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
+ vpx_memset(token_cache, 0, 256 * sizeof(token_cache[0]));
+ } else {
vpx_memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
+ vpx_memset(token_cache, 0,
+ (16 << (tx_size << 1)) * sizeof(token_cache[0]));
+ }
}
}
}
@@ -319,7 +327,8 @@
if (!mi->mbmi.skip_coeff) {
vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, x, y, tx_size,
args->r, args->token_cache);
- inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride);
+ inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
+ args->token_cache);
}
}
@@ -345,7 +354,7 @@
args->r, args->token_cache);
inverse_transform_block(xd, plane, block, tx_size,
&pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
- pd->dst.stride);
+ pd->dst.stride, args->token_cache);
}
static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
@@ -946,6 +955,7 @@
pd[i].dqcoeff = tile_data->dqcoeff[i];
pd[i].eobs = tile_data->eobs[i];
vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+ vpx_memset(tile_data->token_cache, 0, sizeof(tile_data->token_cache));
}
}
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -81,6 +81,7 @@
INCREMENT_COUNT(token); \
token_cache[scan[c]] = vp9_pt_energy_class[token]; \
++c; \
+ pt = get_coef_context(nb, token_cache, c); \
dqv = dq[1]; \
continue; \
}
@@ -118,8 +119,6 @@
while (c < seg_eob) {
int val;
- if (c)
- pt = get_coef_context(nb, token_cache, c);
band = *band_translate++;
prob = coef_probs[band][pt];
if (!cm->frame_parallel_decoding_mode)
@@ -126,23 +125,18 @@
++eob_branch_count[band][pt];
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
- goto DECODE_ZERO;
- SKIP_START:
- if (c >= seg_eob)
- break;
- if (c)
- pt = get_coef_context(nb, token_cache, c);
- band = *band_translate++;
- prob = coef_probs[band][pt];
-
DECODE_ZERO:
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
- token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
- dqv = dq[1]; \
+ dqv = dq[1];
++c;
- goto SKIP_START;
+ if (c >= seg_eob)
+ break;
+ pt = get_coef_context(nb, token_cache, c);
+ band = *band_translate++;
+ prob = coef_probs[band][pt];
+ goto DECODE_ZERO;
}
// ONE_CONTEXT_NODE_0_