shithub: libvpx

Download patch

ref: 70eb870cfed0e16b9f32e426ef05f0b3faba3bc5
parent: 57cae22c1eb2289182bea4df2abc7885de7ddca3
author: Alex Converse <[email protected]>
date: Mon Nov 2 11:28:10 EST 2015

Expand unconstrained nodes in pack_mb_tokens and loop on zeros.

Reduces Linux perf estimated cycle count for pack_mb_tokens on a
lossless encode on my desktop from 61858501855 to 48154040219 or from
26% of the overall profile to 21%.

Change-Id: I9ca3426d7e3272bc7f7030abda4f0d0cec87fb4a

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -123,72 +123,66 @@
 static void pack_mb_tokens(vpx_writer *w,
                            TOKENEXTRA **tp, const TOKENEXTRA *const stop,
                            vpx_bit_depth_t bit_depth) {
-  TOKENEXTRA *p = *tp;
-
-  while (p < stop && p->token != EOSB_TOKEN) {
-    const int t = p->token;
-    const struct vp9_token *const a = &vp9_coef_encodings[t];
-    int i = 0;
-    int v = a->value;
-    int n = a->len;
+  const TOKENEXTRA *p;
+  const vp9_extra_bit *const extra_bits =
 #if CONFIG_VP9_HIGHBITDEPTH
-    const vp9_extra_bit *b;
-    if (bit_depth == VPX_BITS_12)
-      b = &vp9_extra_bits_high12[t];
-    else if (bit_depth == VPX_BITS_10)
-      b = &vp9_extra_bits_high10[t];
-    else
-      b = &vp9_extra_bits[t];
+    (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
+    (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
+    vp9_extra_bits;
 #else
-    const vp9_extra_bit *const b = &vp9_extra_bits[t];
+    vp9_extra_bits;
     (void) bit_depth;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    /* skip one or two nodes */
-    if (p->skip_eob_node) {
-      n -= p->skip_eob_node;
-      i = 2 * p->skip_eob_node;
+  for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
+    if (p->token == EOB_TOKEN) {
+      vpx_write(w, 0, p->context_tree[0]);
+      continue;
     }
-
-    // TODO(jbb): expanding this can lead to big gains.  It allows
-    // much better branch prediction and would enable us to avoid numerous
-    // lookups and compares.
-
-    // If we have a token that's in the constrained set, the coefficient tree
-    // is split into two treed writes.  The first treed write takes care of the
-    // unconstrained nodes.  The second treed write takes care of the
-    // constrained nodes.
-    if (t >= TWO_TOKEN && t < EOB_TOKEN) {
-      int len = UNCONSTRAINED_NODES - p->skip_eob_node;
-      int bits = v >> (n - len);
-      vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i);
-      vp9_write_tree(w, vp9_coef_con_tree,
-                     vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
-                     v, n - len, 0);
-    } else {
-      vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i);
+    vpx_write(w, 1, p->context_tree[0]);
+    while (p->token == ZERO_TOKEN) {
+      vpx_write(w, 0, p->context_tree[1]);
+      ++p;
+      if (p == stop || p->token == EOSB_TOKEN) {
+        *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
+        return;
+      }
     }
 
-    if (b->base_val) {
-      const int e = p->extra, l = b->len;
-
-      if (l) {
-        const unsigned char *pb = b->prob;
-        int v = e >> 1;
-        int n = l;              /* number of bits in v, assumed nonzero */
-
-        do {
-          const int bb = (v >> --n) & 1;
-          vpx_write(w, bb, *pb++);
-        } while (n);
+    {
+      const int t = p->token;
+      const vpx_prob *const context_tree = p->context_tree;
+      assert(t != ZERO_TOKEN);
+      assert(t != EOB_TOKEN);
+      assert(t != EOSB_TOKEN);
+      vpx_write(w, 1, context_tree[1]);
+      if (t == ONE_TOKEN) {
+        vpx_write(w, 0, context_tree[2]);
+        vpx_write_bit(w, p->extra & 1);
+      } else {  // t >= TWO_TOKEN && t < EOB_TOKEN
+        const struct vp9_token *const a = &vp9_coef_encodings[t];
+        const int v = a->value;
+        const int n = a->len;
+        const int e = p->extra;
+        vpx_write(w, 1, context_tree[2]);
+        vp9_write_tree(w, vp9_coef_con_tree,
+                       vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
+                       n - UNCONSTRAINED_NODES, 0);
+        if (t >= CATEGORY1_TOKEN) {
+          const vp9_extra_bit *const b = &extra_bits[t];
+          const unsigned char *pb = b->prob;
+          int v = e >> 1;
+          int n = b->len;  // number of bits in v, assumed nonzero
+          do {
+            const int bb = (v >> --n) & 1;
+            vpx_write(w, bb, *pb++);
+          } while (n);
+        }
+        vpx_write_bit(w, e & 1);
       }
-
-      vpx_write_bit(w, e & 1);
     }
-    ++p;
   }
-
-  *tp = p + (p->token == EOSB_TOKEN);
+  *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
 }
 
 static void write_segment_id(vpx_writer *w, const struct segmentation *seg,