ref: e02dc84c1aae3a0c22c8c225c8e7b34335d1379b
parent: 4505e8accbe2b88a029f20fc0cc0cddc966e42cd
author: Deb Mukherjee <[email protected]>
date: Tue Aug 27 11:07:50 EDT 2013
Adds a speed feature for fast 1-loop forw updates Incorporates a speed feature for fast forward updates of coefficients. This feature takes 3 values: 0 - use standard 2-loop version 1 - use a 1-loop version 2 - use a 1-loop version with reduced updates Results: derfraw300 +0.007% (on speed 0) at feature value = 1 -0.160% (on speed 0) at feature value = 2 There is substantial speed up at speeds 2 and above for low resolution sequences where the entropy updates are a big part of the overall computations. Change-Id: Ie96fc50777088a5bd441288bca6111e43d03bcae
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -783,94 +783,170 @@
vp9_coeff_probs_model *old_frame_coef_probs =
cpi->common.fc.coef_probs[tx_size];
vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size];
+ const vp9_prob upd = VP9_COEF_UPDATE_PROB;
+ const int entropy_nodes_update = UNCONSTRAINED_NODES;
int i, j, k, l, t;
- int update[2] = {0, 0};
- int savings;
+ switch (cpi->sf.use_fast_coef_updates) {
+ case 0: {
+ /* dry run to see if there is any udpate at all needed */
+ int savings = 0;
+ int update[2] = {0, 0};
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
+ int s;
+ int u = 0;
- const int entropy_nodes_update = UNCONSTRAINED_NODES;
+ if (l >= 3 && k == 0)
+ continue;
+ if (t == PIVOT_NODE)
+ s = vp9_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+ else
+ s = vp9_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
+ if (s > 0 && newp != oldp)
+ u = 1;
+ if (u)
+ savings += s - (int)(vp9_cost_zero(upd));
+ else
+ savings -= (int)(vp9_cost_zero(upd));
+ update[u]++;
+ }
+ }
+ }
+ }
+ }
- const int tstart = 0;
- /* dry run to see if there is any udpate at all needed */
- savings = 0;
- for (i = 0; i < BLOCK_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- // int prev_coef_savings[ENTROPY_NODES] = {0};
- for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
- for (t = tstart; t < entropy_nodes_update; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
- const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
- const vp9_prob upd = VP9_COEF_UPDATE_PROB;
- int s;
- int u = 0;
-
- if (l >= 3 && k == 0)
- continue;
- if (t == PIVOT_NODE)
- s = vp9_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0],
- old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
- else
- s = vp9_prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
- if (s > 0 && newp != oldp)
- u = 1;
- if (u)
- savings += s - (int)(vp9_cost_zero(upd));
- else
- savings -= (int)(vp9_cost_zero(upd));
- update[u]++;
+ // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
+ /* Is coef updated at all */
+ if (update[1] == 0 || savings < 0) {
+ vp9_write_bit(bc, 0);
+ return;
+ }
+ vp9_write_bit(bc, 1);
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+ const vp9_prob upd = VP9_COEF_UPDATE_PROB;
+ int s;
+ int u = 0;
+ if (l >= 3 && k == 0)
+ continue;
+ if (t == PIVOT_NODE)
+ s = vp9_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+ else
+ s = vp9_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+ vp9_write(bc, u, upd);
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ ++tree_update_hist[tx_size][i][j][k][l][t][u];
+#endif
+ if (u) {
+ /* send/use new probability */
+ vp9_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
}
}
}
+ return;
}
- }
- // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
- /* Is coef updated at all */
- if (update[1] == 0 || savings < 0) {
- vp9_write_bit(bc, 0);
- return;
- }
- vp9_write_bit(bc, 1);
- for (i = 0; i < BLOCK_TYPES; ++i) {
- for (j = 0; j < REF_TYPES; ++j) {
- for (k = 0; k < COEF_BANDS; ++k) {
- // int prev_coef_savings[ENTROPY_NODES] = {0};
- for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
- // calc probs and branch cts for this frame only
- for (t = tstart; t < entropy_nodes_update; ++t) {
- vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
- vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
- const vp9_prob upd = VP9_COEF_UPDATE_PROB;
- int s;
- int u = 0;
- if (l >= 3 && k == 0)
- continue;
- if (t == PIVOT_NODE)
- s = vp9_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0],
- old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
- else
- s = vp9_prob_diff_update_savings_search(
- frame_branch_ct[i][j][k][l][t],
- *oldp, &newp, upd);
- if (s > 0 && newp != *oldp)
- u = 1;
- vp9_write(bc, u, upd);
+ case 1:
+ case 2: {
+ const int prev_coef_contexts_to_update =
+ (cpi->sf.use_fast_coef_updates == 2 ?
+ PREV_COEF_CONTEXTS >> 1 : PREV_COEF_CONTEXTS);
+ const int coef_band_to_update =
+ (cpi->sf.use_fast_coef_updates == 2 ?
+ COEF_BANDS >> 1 : COEF_BANDS);
+ int updates = 0;
+ int noupdates_before_first = 0;
+ for (i = 0; i < BLOCK_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < PREV_COEF_CONTEXTS; ++l) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
+ vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+ int s;
+ int u = 0;
+ if (l >= 3 && k == 0)
+ continue;
+ if (l >= prev_coef_contexts_to_update ||
+ k >= coef_band_to_update) {
+ u = 0;
+ } else {
+ if (t == PIVOT_NODE)
+ s = vp9_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
+ else
+ s = vp9_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t],
+ *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp)
+ u = 1;
+ }
+ updates += u;
+ if (u == 0 && updates == 0) {
+ noupdates_before_first++;
+#ifdef ENTROPY_STATS
+ if (!cpi->dummy_packing)
+ ++tree_update_hist[tx_size][i][j][k][l][t][u];
+#endif
+ continue;
+ }
+ if (u == 1 && updates == 1) {
+ int v;
+ // first update
+ vp9_write_bit(bc, 1);
+ for (v = 0; v < noupdates_before_first; ++v)
+ vp9_write(bc, 0, upd);
+ }
+ vp9_write(bc, u, upd);
#ifdef ENTROPY_STATS
- if (!cpi->dummy_packing)
- ++tree_update_hist[tx_size][i][j][k][l][t][u];
+ if (!cpi->dummy_packing)
+ ++tree_update_hist[tx_size][i][j][k][l][t][u];
#endif
- if (u) {
- /* send/use new probability */
- vp9_write_prob_diff_update(bc, newp, *oldp);
- *oldp = newp;
+ if (u) {
+ /* send/use new probability */
+ vp9_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
}
}
}
}
+ if (updates == 0) {
+ vp9_write_bit(bc, 0); // no updates
+ }
+ return;
}
+
+ default:
+ assert(0);
}
}
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -741,6 +741,7 @@
sf->skip_encode_sb = 0;
sf->use_uv_intra_rd_estimate = 0;
sf->use_fast_lpf_pick = 0;
+ sf->use_fast_coef_updates = 0;
sf->using_small_partition_info = 0;
// Skip any mode not chosen at size < X for all sizes > X
// Hence BLOCK_64X64 (skip is off)
@@ -802,6 +803,7 @@
sf->intra_y_mode_mask = INTRA_DC_TM_H_V;
sf->intra_uv_mode_mask = INTRA_DC_TM_H_V;
+ sf->use_fast_coef_updates = 1;
}
if (speed == 2) {
sf->adjust_thresholds_by_speed = 1;
@@ -840,6 +842,7 @@
sf->auto_min_max_partition_interval = 2;
sf->disable_split_var_thresh = 32;
sf->disable_filter_search_var_thresh = 32;
+ sf->use_fast_coef_updates = 2;
}
if (speed == 3) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -866,6 +869,7 @@
sf->disable_filter_search_var_thresh = 64;
sf->intra_y_mode_mask = INTRA_DC_ONLY;
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
+ sf->use_fast_coef_updates = 2;
}
if (speed == 4) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -894,6 +898,7 @@
sf->subpel_iters_per_step = 1;
sf->disable_split_var_thresh = 64;
sf->disable_filter_search_var_thresh = 96;
+ sf->use_fast_coef_updates = 2;
}
/*
if (speed == 2) {
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -298,6 +298,7 @@
int use_rd_breakout;
int use_uv_intra_rd_estimate;
int use_fast_lpf_pick;
+ int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
} SPEED_FEATURES;
typedef struct VP9_COMP {