shithub: libvpx

Download patch

ref: 7c4992c4667f4f3392b911e3a8b17a776a216c5f
parent: d86d834fcb153281c6c470700011385c1849927a
author: Yunqing Wang <[email protected]>
date: Thu Oct 16 08:29:48 EDT 2014

Remove the dependency in token storing locations

Currently, the tokens for a tile are stored immediately after its
preceding tile, which causes a dependency. This is unnecessary
since we always allocate enough memory for tokens. Removing
the dependency allows token writing done in parallel. This patch
doesn't change encoding result.

Change-Id: I7365a6e5e2c2833eb14377c37e1503c9d0f26543

--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -935,26 +935,27 @@
   size_t total_size = 0;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
+  TileInfo tile[4][1 << 6];
+  TOKENEXTRA *pre_tok = cpi->tok;
+  int tile_tok = 0;
 
   vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
              mi_cols_aligned_to_sb(cm->mi_cols));
 
-  tok[0][0] = cpi->tok;
-  for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-    if (tile_row)
-      tok[tile_row][0] = tok[tile_row - 1][tile_cols - 1] +
-                         cpi->tok_count[tile_row - 1][tile_cols - 1];
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+      vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
 
-    for (tile_col = 1; tile_col < tile_cols; tile_col++)
-      tok[tile_row][tile_col] = tok[tile_row][tile_col - 1] +
-                                cpi->tok_count[tile_row][tile_col - 1];
+      tok[tile_row][tile_col] = pre_tok + tile_tok;
+      pre_tok = tok[tile_row][tile_col];
+      tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+    }
   }
 
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-      TileInfo tile;
+      const TileInfo * const ptile = &tile[tile_row][tile_col];
 
-      vp9_tile_init(&tile, cm, tile_row, tile_col);
       tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
 
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
@@ -962,7 +963,7 @@
       else
         vp9_start_encode(&residual_bc, data_ptr + total_size);
 
-      write_modes(cpi, &tile, &residual_bc, &tok[tile_row][tile_col], tok_end);
+      write_modes(cpi, ptile, &residual_bc, &tok[tile_row][tile_col], tok_end);
       assert(tok[tile_row][tile_col] == tok_end);
       vp9_stop_encode(&residual_bc);
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3340,25 +3340,39 @@
   const VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
+
   int tile_col, tile_row;
-  TOKENEXTRA *tok = cpi->tok;
+  TileInfo tile[4][1 << 6];
+  TOKENEXTRA *tok[4][1 << 6];
+  TOKENEXTRA *pre_tok = cpi->tok;
+  int tile_tok = 0;
 
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      TileInfo tile;
-      TOKENEXTRA *old_tok = tok;
+      vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
+
+      tok[tile_row][tile_col] = pre_tok + tile_tok;
+      pre_tok = tok[tile_row][tile_col];
+      tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+    }
+  }
+
+  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+      const TileInfo * const ptile = &tile[tile_row][tile_col];
+      TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
       int mi_row;
 
-      vp9_tile_init(&tile, cm, tile_row, tile_col);
-      for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
+      for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
            mi_row += MI_BLOCK_SIZE) {
         if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
-          encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
+          encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
         else
-          encode_rd_sb_row(cpi, &tile, mi_row, &tok);
+          encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
       }
-      cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok - old_tok);
-      assert(tok - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+      cpi->tok_count[tile_row][tile_col] =
+          (unsigned int)(tok[tile_row][tile_col] - old_tok);
+      assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
     }
   }
 }
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -481,6 +481,15 @@
   return mb_rows * mb_cols * (16 * 16 * 3 + 4);
 }
 
+// Get the allocated token size for a tile. It does the same calculation as in
+// the frame token allocation.
+static INLINE int allocated_tokens(TileInfo tile) {
+  int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
+  int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
+
+  return get_token_alloc(tile_mb_rows, tile_mb_cols);
+}
+
 int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 #if CONFIG_VP9_HIGHBITDEPTH
 int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,