ref: a33a84b11a483adb2fa8bd60946c364d0b397ce2
parent: e72d49a97a1e4292ce8bfbc98b0d8855b40accf2
parent: 5ade423774690e2bf877559dc1a1f9547db5dac1
author: Deb Mukherjee <[email protected]>
date: Tue Nov 12 06:22:21 EST 2013
Merge "Removes conditional statements from band getting"
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -127,12 +127,6 @@
extern const uint8_t vp9_coefband_trans_8x8plus[MAXBAND_INDEX + 1];
extern const uint8_t vp9_coefband_trans_4x4[MAXBAND_INDEX + 1];
-
-static int get_coef_band(const uint8_t * band_translate, int coef_index) {
- return (coef_index > MAXBAND_INDEX)
- ? (COEF_BANDS-1) : band_translate[coef_index];
-}
-
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
@@ -179,11 +173,6 @@
}
return combine_entropy_contexts(above_ec, left_ec);
-}
-
-static const uint8_t *get_band_translate(TX_SIZE tx_size) {
- return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
- : vp9_coefband_trans_8x8plus;
}
static void get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -45,6 +45,7 @@
DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]);
DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]);
+ const uint8_t *band_translate[2];
} TileWorkerData;
static int read_be32(const uint8_t *p) {
@@ -294,7 +295,8 @@
VP9_COMMON *cm;
MACROBLOCKD *xd;
vp9_reader *r;
- unsigned char* token_cache;
+ uint8_t *token_cache;
+ const uint8_t *band_translate[2];
};
static void predict_and_reconstruct_intra_block(int plane, int block,
@@ -303,6 +305,9 @@
struct intra_args *const args = arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
+ const uint8_t *band_translate[2] = {
+ args->band_translate[0], args->band_translate[1]
+ };
struct macroblockd_plane *const pd = &xd->plane[plane];
MODE_INFO *const mi = xd->mi_8x8[0];
@@ -324,7 +329,7 @@
if (!mi->mbmi.skip_coeff) {
vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size,
- args->r, args->token_cache);
+ args->r, args->token_cache, band_translate);
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
}
}
@@ -334,7 +339,8 @@
MACROBLOCKD *xd;
vp9_reader *r;
int *eobtotal;
- unsigned char* token_cache;
+ uint8_t *token_cache;
+ const uint8_t *band_translate[2];
};
static void reconstruct_inter_block(int plane, int block,
@@ -343,10 +349,14 @@
struct inter_args *args = arg;
VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
+ const uint8_t *band_translate[2] = {
+ args->band_translate[0], args->band_translate[1]
+ };
*args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
plane_bsize, tx_size,
- args->r, args->token_cache);
+ args->r, args->token_cache,
+ band_translate);
inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
}
@@ -398,7 +408,8 @@
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE bsize,
- unsigned char *token_cache) {
+ uint8_t *token_cache,
+ const uint8_t *band_translate[2]) {
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi;
@@ -420,7 +431,9 @@
}
if (!is_inter_block(mbmi)) {
- struct intra_args arg = { cm, xd, r, token_cache };
+ struct intra_args arg = {
+ cm, xd, r, token_cache, {band_translate[0], band_translate[1]}
+ };
foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
&arg);
} else {
@@ -438,7 +451,10 @@
// Reconstruction
if (!mbmi->skip_coeff) {
int eobtotal = 0;
- struct inter_args arg = { cm, xd, r, &eobtotal, token_cache };
+ struct inter_args arg = {
+ cm, xd, r, &eobtotal, token_cache,
+ {band_translate[0], band_translate[1]}
+ };
foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
if (!less8x8 && eobtotal == 0)
mbmi->skip_coeff = 1; // skip loopfilter
@@ -478,7 +494,8 @@
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader* r, BLOCK_SIZE bsize,
- unsigned char *token_cache) {
+ uint8_t *token_cache,
+ const uint8_t *band_translate[2]) {
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
@@ -489,33 +506,37 @@
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
subsize = get_subsize(bsize, partition);
if (subsize < BLOCK_8X8) {
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
+ band_translate);
} else {
switch (partition) {
case PARTITION_NONE:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
+ band_translate);
break;
case PARTITION_HORZ:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
+ band_translate);
if (mi_row + hbs < cm->mi_rows)
decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
- token_cache);
+ token_cache, band_translate);
break;
case PARTITION_VERT:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache,
+ band_translate);
if (mi_col + hbs < cm->mi_cols)
decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
- token_cache);
+ token_cache, band_translate);
break;
case PARTITION_SPLIT:
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize,
- token_cache);
+ token_cache, band_translate);
decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
- token_cache);
+ token_cache, band_translate);
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
- token_cache);
+ token_cache, band_translate);
decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize,
- token_cache);
+ token_cache, band_translate);
break;
default:
assert(!"Invalid partition type");
@@ -798,9 +819,13 @@
vp9_zero(xd->left_context);
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE)
+ mi_col += MI_BLOCK_SIZE) {
+ const uint8_t *band_translate[2] = {
+ vp9_coefband_trans_4x4, pbi->coefband_trans_8x8plus
+ };
decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64,
- pbi->token_cache);
+ pbi->token_cache, band_translate);
+ }
if (pbi->do_loopfilter_inline) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
@@ -948,7 +973,7 @@
}
static int tile_worker_hook(void *arg1, void *arg2) {
- TileWorkerData *tile_data = (TileWorkerData*)arg1;
+ TileWorkerData *const tile_data = (TileWorkerData*)arg1;
const TileInfo *const tile = (TileInfo*)arg2;
int mi_row, mi_col;
@@ -960,7 +985,8 @@
mi_col += MI_BLOCK_SIZE) {
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64,
- tile_data->token_cache);
+ tile_data->token_cache,
+ tile_data->band_translate);
}
}
return !tile_data->xd.corrupted;
@@ -1019,6 +1045,8 @@
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
+ tile_data->band_translate[0] = vp9_coefband_trans_4x4;
+ tile_data->band_translate[1] = pbi->coefband_trans_8x8plus;
vp9_tile_init(tile, tile_data->cm, 0, tile_col);
setup_token_decoder(data, data_end, size, &cm->error,
@@ -1298,6 +1326,13 @@
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = 1 << cm->log2_tile_cols;
YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
+
+ vpx_memset(pbi->coefband_trans_8x8plus,
+ (COEF_BANDS - 1),
+ sizeof(pbi->coefband_trans_8x8plus));
+ vpx_memcpy(pbi->coefband_trans_8x8plus,
+ vp9_coefband_trans_8x8plus,
+ sizeof(vp9_coefband_trans_8x8plus));
if (!first_partition_size) {
// showing a frame directly
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -93,7 +93,8 @@
vp9_reader *r, int block_idx,
PLANE_TYPE type, int seg_eob, int16_t *dqcoeff_ptr,
TX_SIZE tx_size, const int16_t *dq, int pt,
- uint8_t *token_cache) {
+ uint8_t *token_cache,
+ const uint8_t *band_translate) {
const FRAME_CONTEXT *const fc = &cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
@@ -108,22 +109,20 @@
unsigned int (*eob_branch_count)[PREV_COEF_CONTEXTS] =
counts->eob_branch[tx_size][type][ref];
const int16_t *scan, *nb;
- const uint8_t *const band_translate = get_band_translate(tx_size);
+ const uint8_t *cat6;
get_scan(xd, tx_size, type, block_idx, &scan, &nb);
- while (1) {
+ while (c < seg_eob) {
int val;
- const uint8_t *cat6 = cat6_prob;
- if (c >= seg_eob)
- break;
if (c)
pt = get_coef_context(nb, token_cache, c);
- band = get_coef_band(band_translate, c);
+ band = *band_translate++;
prob = coef_probs[band][pt];
if (!cm->frame_parallel_decoding_mode)
++eob_branch_count[band][pt];
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
+ goto DECODE_ZERO;
SKIP_START:
if (c >= seg_eob)
@@ -130,9 +129,10 @@
break;
if (c)
pt = get_coef_context(nb, token_cache, c);
- band = get_coef_band(band_translate, c);
+ band = *band_translate++;
prob = coef_probs[band][pt];
+ DECODE_ZERO:
if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
@@ -200,6 +200,7 @@
WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY5);
}
val = 0;
+ cat6 = cat6_prob;
while (*cat6) {
val = (val << 1) | vp9_read(r, *cat6++);
}
@@ -218,7 +219,8 @@
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, vp9_reader *r,
- uint8_t *token_cache) {
+ uint8_t *token_cache,
+ const uint8_t *band_translate[2]) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
tx_size);
@@ -229,7 +231,8 @@
eob = decode_coefs(cm, xd, r, block,
pd->plane_type, seg_eob, BLOCK_OFFSET(pd->dqcoeff, block),
- tx_size, pd->dequant, pt, token_cache);
+ tx_size, pd->dequant, pt, token_cache,
+ band_translate[tx_size != TX_4X4]);
set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -18,6 +18,7 @@
int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, vp9_reader *r,
- uint8_t *token_cache);
+ uint8_t *token_cache,
+ const uint8_t *band_translate[2]);
#endif // VP9_DECODER_VP9_DETOKENIZE_H_
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -54,7 +54,8 @@
ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
PARTITION_CONTEXT *above_seg_context;
- DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
+ DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
+ DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]);
} VP9D_COMP;
#endif // VP9_DECODER_VP9_ONYXD_INT_H_
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -184,6 +184,9 @@
BLOCK_SIZE sb64_partitioning;
void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride);
+
+ // band cache
+ DECLARE_ALIGNED(16, uint8_t, coefband_trans_8x8plus[1024]);
};
// TODO(jingning): the variables used here are little complicated. need further
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -138,7 +138,9 @@
uint8_t token_cache[1024];
const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block);
const int16_t *dequant_ptr = pd->dequant;
- const uint8_t *const band_translate = get_band_translate(tx_size);
+ const uint8_t *const band_translate = (tx_size == TX_4X4 ?
+ vp9_coefband_trans_4x4 :
+ mb->coefband_trans_8x8plus);
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -179,7 +181,7 @@
t0 = (vp9_dct_value_tokens_ptr + x)->token;
/* Consider both possible successor states. */
if (next < default_eob) {
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
mb->token_costs[tx_size][type][ref][band][0][pt]
@@ -230,7 +232,7 @@
t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
}
if (next < default_eob) {
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
@@ -264,7 +266,7 @@
/* There's no choice to make for a zero coefficient, so we don't
* add a new trellis node, but we do need to update the costs.
*/
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
@@ -284,7 +286,7 @@
}
/* Now pick the best path through the whole trellis. */
- band = get_coef_band(band_translate, i + 1);
+ band = band_translate[i + 1];
pt = combine_entropy_contexts(*a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1223,6 +1223,13 @@
cpi->fixed_divide[0] = 0;
for (i = 1; i < 512; i++)
cpi->fixed_divide[i] = 0x80000 / i;
+
+ vpx_memset(cpi->mb.coefband_trans_8x8plus,
+ (COEF_BANDS-1),
+ sizeof(cpi->mb.coefband_trans_8x8plus));
+ vpx_memcpy(cpi->mb.coefband_trans_8x8plus,
+ vp9_coefband_trans_8x8plus,
+ sizeof(vp9_coefband_trans_8x8plus));
}
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -115,7 +115,9 @@
vp9_coeff_count *const counts = cpi->coef_counts[tx_size];
vp9_coeff_probs_model *const coef_probs = cpi->common.fc.coef_probs[tx_size];
const int ref = is_inter_block(mbmi);
- const uint8_t *const band_translate = get_band_translate(tx_size);
+ const uint8_t *const band_translate = (tx_size == TX_4X4 ?
+ vp9_coefband_trans_4x4 :
+ cpi->mb.coefband_trans_8x8plus);
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int aoff, loff;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
@@ -127,7 +129,7 @@
get_scan(xd, tx_size, type, block, &scan, &nb);
c = 0;
do {
- const int band = get_coef_band(band_translate, c);
+ const int band = band_translate[c];
int token;
int v = 0;
rc = scan[c];