ref: 70eb870cfed0e16b9f32e426ef05f0b3faba3bc5
parent: 57cae22c1eb2289182bea4df2abc7885de7ddca3
author: Alex Converse <[email protected]>
date: Mon Nov 2 11:28:10 EST 2015
Expand unconstrained nodes in pack_mb_tokens and loop on zeros. Reduces Linux perf estimated cycle count for pack_mb_tokens on a lossless encode on my desktop from 61858501855 to 48154040219 or from 26% of the overall profile to 21%. Change-Id: I9ca3426d7e3272bc7f7030abda4f0d0cec87fb4a
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -123,72 +123,66 @@
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
- TOKENEXTRA *p = *tp;
-
- while (p < stop && p->token != EOSB_TOKEN) {
- const int t = p->token;
- const struct vp9_token *const a = &vp9_coef_encodings[t];
- int i = 0;
- int v = a->value;
- int n = a->len;
+ const TOKENEXTRA *p;
+ const vp9_extra_bit *const extra_bits =
#if CONFIG_VP9_HIGHBITDEPTH
- const vp9_extra_bit *b;
- if (bit_depth == VPX_BITS_12)
- b = &vp9_extra_bits_high12[t];
- else if (bit_depth == VPX_BITS_10)
- b = &vp9_extra_bits_high10[t];
- else
- b = &vp9_extra_bits[t];
+ (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
+ (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
+ vp9_extra_bits;
#else
- const vp9_extra_bit *const b = &vp9_extra_bits[t];
+ vp9_extra_bits;
(void) bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- /* skip one or two nodes */
- if (p->skip_eob_node) {
- n -= p->skip_eob_node;
- i = 2 * p->skip_eob_node;
+ for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
+ if (p->token == EOB_TOKEN) {
+ vpx_write(w, 0, p->context_tree[0]);
+ continue;
}
-
- // TODO(jbb): expanding this can lead to big gains. It allows
- // much better branch prediction and would enable us to avoid numerous
- // lookups and compares.
-
- // If we have a token that's in the constrained set, the coefficient tree
- // is split into two treed writes. The first treed write takes care of the
- // unconstrained nodes. The second treed write takes care of the
- // constrained nodes.
- if (t >= TWO_TOKEN && t < EOB_TOKEN) {
- int len = UNCONSTRAINED_NODES - p->skip_eob_node;
- int bits = v >> (n - len);
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i);
- vp9_write_tree(w, vp9_coef_con_tree,
- vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
- v, n - len, 0);
- } else {
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i);
+ vpx_write(w, 1, p->context_tree[0]);
+ while (p->token == ZERO_TOKEN) {
+ vpx_write(w, 0, p->context_tree[1]);
+ ++p;
+ if (p == stop || p->token == EOSB_TOKEN) {
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
+ return;
+ }
}
- if (b->base_val) {
- const int e = p->extra, l = b->len;
-
- if (l) {
- const unsigned char *pb = b->prob;
- int v = e >> 1;
- int n = l; /* number of bits in v, assumed nonzero */
-
- do {
- const int bb = (v >> --n) & 1;
- vpx_write(w, bb, *pb++);
- } while (n);
+ {
+ const int t = p->token;
+ const vpx_prob *const context_tree = p->context_tree;
+ assert(t != ZERO_TOKEN);
+ assert(t != EOB_TOKEN);
+ assert(t != EOSB_TOKEN);
+ vpx_write(w, 1, context_tree[1]);
+ if (t == ONE_TOKEN) {
+ vpx_write(w, 0, context_tree[2]);
+ vpx_write_bit(w, p->extra & 1);
+ } else { // t >= TWO_TOKEN && t < EOB_TOKEN
+ const struct vp9_token *const a = &vp9_coef_encodings[t];
+ const int v = a->value;
+ const int n = a->len;
+ const int e = p->extra;
+ vpx_write(w, 1, context_tree[2]);
+ vp9_write_tree(w, vp9_coef_con_tree,
+ vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
+ n - UNCONSTRAINED_NODES, 0);
+ if (t >= CATEGORY1_TOKEN) {
+ const vp9_extra_bit *const b = &extra_bits[t];
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = b->len; // number of bits in v, assumed nonzero
+ do {
+ const int bb = (v >> --n) & 1;
+ vpx_write(w, bb, *pb++);
+ } while (n);
+ }
+ vpx_write_bit(w, e & 1);
}
-
- vpx_write_bit(w, e & 1);
}
- ++p;
}
-
- *tp = p + (p->token == EOSB_TOKEN);
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
}
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,