ref: 7c4992c4667f4f3392b911e3a8b17a776a216c5f
parent: d86d834fcb153281c6c470700011385c1849927a
author: Yunqing Wang <[email protected]>
date: Thu Oct 16 08:29:48 EDT 2014
Remove the dependency in token storing locations Currently, the tokens for a tile are stored immediately after its preceding tile, which causes a dependency. This is unnecessary since we always allocate enough memory for tokens. Removing the dependency allows token writing done in parallel. This patch doesn't change encoding result. Change-Id: I7365a6e5e2c2833eb14377c37e1503c9d0f26543
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -935,26 +935,27 @@
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+ TileInfo tile[4][1 << 6];
+ TOKENEXTRA *pre_tok = cpi->tok;
+ int tile_tok = 0;
vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
mi_cols_aligned_to_sb(cm->mi_cols));
- tok[0][0] = cpi->tok;
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- if (tile_row)
- tok[tile_row][0] = tok[tile_row - 1][tile_cols - 1] +
- cpi->tok_count[tile_row - 1][tile_cols - 1];
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
- for (tile_col = 1; tile_col < tile_cols; tile_col++)
- tok[tile_row][tile_col] = tok[tile_row][tile_col - 1] +
- cpi->tok_count[tile_row][tile_col - 1];
+ tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ }
}
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile;
+ const TileInfo * const ptile = &tile[tile_row][tile_col];
- vp9_tile_init(&tile, cm, tile_row, tile_col);
tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
@@ -962,7 +963,7 @@
else
vp9_start_encode(&residual_bc, data_ptr + total_size);
- write_modes(cpi, &tile, &residual_bc, &tok[tile_row][tile_col], tok_end);
+ write_modes(cpi, ptile, &residual_bc, &tok[tile_row][tile_col], tok_end);
assert(tok[tile_row][tile_col] == tok_end);
vp9_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3340,25 +3340,39 @@
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+
int tile_col, tile_row;
- TOKENEXTRA *tok = cpi->tok;
+ TileInfo tile[4][1 << 6];
+ TOKENEXTRA *tok[4][1 << 6];
+ TOKENEXTRA *pre_tok = cpi->tok;
+ int tile_tok = 0;
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo tile;
- TOKENEXTRA *old_tok = tok;
+ vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
+
+ tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ }
+ }
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ const TileInfo * const ptile = &tile[tile_row][tile_col];
+ TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
int mi_row;
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
+ for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
- encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
else
- encode_rd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
}
- cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok - old_tok);
- assert(tok - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+ cpi->tok_count[tile_row][tile_col] =
+ (unsigned int)(tok[tile_row][tile_col] - old_tok);
+ assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
}
}
}
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -481,6 +481,15 @@
return mb_rows * mb_cols * (16 * 16 * 3 + 4);
}
+// Get the allocated token size for a tile. It does the same calculation as in
+// the frame token allocation.
+static INLINE int allocated_tokens(TileInfo tile) {
+ int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
+ int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
+
+ return get_token_alloc(tile_mb_rows, tile_mb_cols);
+}
+
int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
#if CONFIG_VP9_HIGHBITDEPTH
int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,