ref: 5322a31b189178ad714aa03e42760e4cacd45a9e
parent: 66a96fd3de6426f8a7ec5293a858d97009ae00c4
author: Urvang Joshi <[email protected]>
date: Tue Jul 11 09:05:29 EDT 2017
Remove the token state array from greedy optimize_b. Reduces memory usage, and speeds up encoding for some difficult clips. No impact on output or metrics. Ported from aomedia patch: https://aomedia-review.googlesource.com/c/14501 Change-Id: I26ec69af8336f9e80da486a1cfbfc89a3596954d
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -57,12 +57,6 @@
#if USE_GREEDY_OPTIMIZE_B
-typedef struct {
- int16_t token;
- tran_low_t qc;
- tran_low_t dqc;
-} vp9_token_state;
-
// 'num' can be negative, but 'shift' must be non-negative.
#define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))
@@ -73,7 +67,6 @@
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(xd->mi[0]);
- vp9_token_state tokens[1025][2];
uint8_t token_cache[1024];
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -110,22 +103,16 @@
int64_t accu_error = ((int64_t)1) << 50;
int64_t best_block_rd_cost = INT64_MAX;
int x_prev = 1;
+ tran_low_t before_best_eob_qc = 0;
+ tran_low_t before_best_eob_dqc = 0;
+
assert((!plane_type && !plane) || (plane_type && plane));
assert(eob <= default_eob);
for (i = 0; i < eob; i++) {
const int rc = scan[i];
- int x = qcoeff[rc];
- t0 = vp9_get_token(x);
- tokens[i][0].qc = x;
- tokens[i][0].token = t0;
- tokens[i][0].dqc = dqcoeff[rc];
- token_cache[rc] = vp9_pt_energy_class[t0];
+ token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
}
- tokens[eob][0].token = EOB_TOKEN;
- tokens[eob][0].qc = 0;
- tokens[eob][0].dqc = 0;
- tokens[eob][1] = tokens[eob][0];
final_eob = 0;
// Initial RD cost.
@@ -144,8 +131,8 @@
const int token_tree_sel_cur = (x_prev == 0);
token_costs_cur = token_costs + band_cur;
if (x == 0) { // No need to search
- rate0 =
- (*token_costs_cur)[token_tree_sel_cur][ctx_cur][tokens[i][0].token];
+ const int token = vp9_get_token(x);
+ rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
accu_rate += rate0;
x_prev = 0;
// Note: accu_error does not change.
@@ -205,6 +192,7 @@
int rdcost_better_for_x1, eob_rdcost_better_for_x1;
int dqc0, dqc1;
int64_t best_eob_cost_cur;
+ int use_x1;
// Calculate RD Cost effect on the next coeff for the two candidates.
int64_t next_bits0 = 0;
@@ -214,6 +202,8 @@
if (i < default_eob - 1) {
int ctx_next, token_tree_sel_next;
const int band_next = band_translate[i + 1];
+ const int token_next =
+ (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
unsigned int(
*const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
token_costs + band_next;
@@ -220,15 +210,15 @@
token_cache[rc] = vp9_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x == 0);
- next_bits0 = (*token_costs_next)[token_tree_sel_next][ctx_next]
- [tokens[i + 1][0].token];
+ next_bits0 =
+ (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
next_eob_bits0 =
(*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
token_cache[rc] = vp9_pt_energy_class[t1];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x1 == 0);
- next_bits1 = (*token_costs_next)[token_tree_sel_next][ctx_next]
- [tokens[i + 1][0].token];
+ next_bits1 =
+ (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
if (x1 != 0) {
next_eob_bits1 =
(*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
@@ -279,21 +269,20 @@
assert(accu_error >= 0);
x_prev = qcoeff[rc]; // Update based on selected quantized value.
- best_eob_cost_cur = eob_cost0;
- tokens[i][1].token = t0;
- tokens[i][1].qc = x;
- tokens[i][1].dqc = dqc0;
- if ((x1 != 0) && eob_rdcost_better_for_x1) {
- best_eob_cost_cur = eob_cost1;
- tokens[i][1].token = t1;
- tokens[i][1].qc = x1;
- tokens[i][1].dqc = dqc1;
- }
+ use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
+ best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
// Determine whether to move the eob position to i+1
if (best_eob_cost_cur < best_block_rd_cost) {
best_block_rd_cost = best_eob_cost_cur;
final_eob = i + 1;
+ if (use_x1) {
+ before_best_eob_qc = x1;
+ before_best_eob_dqc = dqc1;
+ } else {
+ before_best_eob_qc = x;
+ before_best_eob_dqc = dqc0;
+ }
}
}
}
@@ -301,11 +290,11 @@
assert(final_eob <= eob);
if (final_eob > 0) {
int rc;
- assert(tokens[final_eob - 1][1].qc != 0);
+ assert(before_best_eob_qc != 0);
i = final_eob - 1;
rc = scan[i];
- qcoeff[rc] = tokens[i][1].qc;
- dqcoeff[rc] = tokens[i][1].dqc;
+ qcoeff[rc] = before_best_eob_qc;
+ dqcoeff[rc] = before_best_eob_dqc;
}
for (i = final_eob; i < eob; i++) {
int rc = scan[i];