ref: 469986f96399cbd2cf929e7e6c418196184e7ffa
parent: c2044fda1d00421ddab27c9fd80383546a8ba844
parent: e3c8f2f1526946ebbd5b90a7e4baf2fda09ba897
author: James Zern <[email protected]>
date: Fri Jun 30 15:02:05 EDT 2017
Merge changes from topic 'rm-dec-frame-parallel' * changes: vp9_dx,vpx_codec_alg_priv: rm *worker_id* vp9_dx,vpx_codec_alg_priv: rm *cache* vp9_dx,vpx_codec_alg_priv: rm frame_parallel_decode
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -47,9 +47,6 @@
ctx->priv->init_flags = ctx->init_flags;
priv->si.sz = sizeof(priv->si);
priv->flushed = 0;
- // TODO(jzern): remnants of frame-level parallel decoding should be
- // removed. cf., https://bugs.chromium.org/p/webm/issues/detail?id=1395
- priv->frame_parallel_decode = 0;
if (ctx->config.dec) {
priv->cfg = *ctx->config.dec;
ctx->config.dec = &priv->cfg;
@@ -279,25 +276,7 @@
frame_worker_data->pbi, frame_worker_data->data_size, &data);
frame_worker_data->data_end = data;
- if (frame_worker_data->pbi->frame_parallel_decode) {
- // In frame parallel decoding, a worker thread must successfully decode all
- // the compressed data.
- if (frame_worker_data->result != 0 ||
- frame_worker_data->data + frame_worker_data->data_size - 1 > data) {
- VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner;
- BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool;
- // Signal all the other threads that are waiting for this frame.
- vp9_frameworker_lock_stats(worker);
- frame_worker_data->frame_context_ready = 1;
- lock_buffer_pool(pool);
- frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
- unlock_buffer_pool(pool);
- frame_worker_data->pbi->need_resync = 1;
- vp9_frameworker_signal_stats(worker);
- vp9_frameworker_unlock_stats(worker);
- return 0;
- }
- } else if (frame_worker_data->result != 0) {
+ if (frame_worker_data->result != 0) {
// Check decode result in serial decode.
frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
frame_worker_data->pbi->need_resync = 1;
@@ -310,18 +289,8 @@
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
ctx->last_show_frame = -1;
- ctx->next_submit_worker_id = 0;
- ctx->last_submit_worker_id = 0;
- ctx->next_output_worker_id = 0;
- ctx->frame_cache_read = 0;
- ctx->frame_cache_write = 0;
- ctx->num_cache_frames = 0;
ctx->need_resync = 1;
- ctx->num_frame_workers =
- (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads : 1;
- if (ctx->num_frame_workers > MAX_DECODE_THREADS)
- ctx->num_frame_workers = MAX_DECODE_THREADS;
- ctx->available_threads = ctx->num_frame_workers;
+ ctx->num_frame_workers = 1;
ctx->flushed = 0;
ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
@@ -375,13 +344,11 @@
#endif
// If decoding in serial mode, FrameWorker thread could create tile worker
// thread or loopfilter thread.
- frame_worker_data->pbi->max_threads =
- (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0;
+ frame_worker_data->pbi->max_threads = ctx->cfg.threads;
frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
- frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode;
- frame_worker_data->pbi->common.frame_parallel_decode =
- ctx->frame_parallel_decode;
+ frame_worker_data->pbi->frame_parallel_decode = 0;
+ frame_worker_data->pbi->common.frame_parallel_decode = 0;
worker->hook = (VPxWorkerHook)frame_worker_hook;
if (!winterface->reset(worker)) {
set_error_detail(ctx, "Frame Worker thread creation failed");
@@ -426,7 +393,7 @@
if (!ctx->si.is_kf && !is_intra_only) return VPX_CODEC_ERROR;
}
- if (!ctx->frame_parallel_decode) {
+ {
VPxWorker *const worker = ctx->frame_workers;
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
frame_worker_data->data = *data;
@@ -449,80 +416,11 @@
return update_error_state(ctx, &frame_worker_data->pbi->common.error);
check_resync(ctx, frame_worker_data->pbi);
- } else {
- VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id];
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- // Copy context from last worker thread to next worker thread.
- if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
- vp9_frameworker_copy_context(
- &ctx->frame_workers[ctx->next_submit_worker_id],
- &ctx->frame_workers[ctx->last_submit_worker_id]);
-
- frame_worker_data->pbi->ready_for_new_data = 0;
- // Copy the compressed data into worker's internal buffer.
- // TODO(hkuang): Will all the workers allocate the same size
- // as the size of the first intra frame be better? This will
- // avoid too many deallocate and allocate.
- if (frame_worker_data->scratch_buffer_size < data_sz) {
- vpx_free(frame_worker_data->scratch_buffer);
- frame_worker_data->scratch_buffer = (uint8_t *)vpx_malloc(data_sz);
- if (frame_worker_data->scratch_buffer == NULL) {
- set_error_detail(ctx, "Failed to reallocate scratch buffer");
- return VPX_CODEC_MEM_ERROR;
- }
- frame_worker_data->scratch_buffer_size = data_sz;
- }
- frame_worker_data->data_size = data_sz;
- memcpy(frame_worker_data->scratch_buffer, *data, data_sz);
-
- frame_worker_data->frame_decoded = 0;
- frame_worker_data->frame_context_ready = 0;
- frame_worker_data->received_frame = 1;
- frame_worker_data->data = frame_worker_data->scratch_buffer;
- frame_worker_data->user_priv = user_priv;
-
- if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
- ctx->last_submit_worker_id =
- (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers;
-
- ctx->next_submit_worker_id =
- (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers;
- --ctx->available_threads;
- worker->had_error = 0;
- winterface->launch(worker);
}
return VPX_CODEC_OK;
}
-static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
- YV12_BUFFER_CONFIG sd;
- vp9_ppflags_t flags = { 0, 0, 0 };
- const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
- FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
- ctx->next_output_worker_id =
- (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
- // TODO(hkuang): Add worker error handling here.
- winterface->sync(worker);
- frame_worker_data->received_frame = 0;
- ++ctx->available_threads;
-
- check_resync(ctx, frame_worker_data->pbi);
-
- if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
- VP9_COMMON *const cm = &frame_worker_data->pbi->common;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
- yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd,
- frame_worker_data->user_priv);
- ctx->frame_cache[ctx->frame_cache_write].img.fb_priv =
- frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
- ctx->frame_cache_write = (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE;
- ++ctx->num_cache_frames;
- }
-}
-
static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
const uint8_t *data, unsigned int data_sz,
void *user_priv, long deadline) {
@@ -553,91 +451,37 @@
if (ctx->svc_decoding && ctx->svc_spatial_layer < frame_count - 1)
frame_count = ctx->svc_spatial_layer + 1;
- if (ctx->frame_parallel_decode) {
- // Decode in frame parallel mode. When decoding in this mode, the frame
- // passed to the decoder must be either a normal frame or a superframe with
- // superframe index so the decoder could get each frame's start position
- // in the superframe.
- if (frame_count > 0) {
- int i;
+ // Decode in serial mode.
+ if (frame_count > 0) {
+ int i;
- for (i = 0; i < frame_count; ++i) {
- const uint8_t *data_start_copy = data_start;
- const uint32_t frame_size = frame_sizes[i];
- if (data_start < data ||
- frame_size > (uint32_t)(data_end - data_start)) {
- set_error_detail(ctx, "Invalid frame size in index");
- return VPX_CODEC_CORRUPT_FRAME;
- }
-
- if (ctx->available_threads == 0) {
- // No more threads for decoding. Wait until the next output worker
- // finishes decoding. Then copy the decoded frame into cache.
- if (ctx->num_cache_frames < FRAME_CACHE_SIZE) {
- wait_worker_and_cache_frame(ctx);
- } else {
- // TODO(hkuang): Add unit test to test this path.
- set_error_detail(ctx, "Frame output cache is full.");
- return VPX_CODEC_ERROR;
- }
- }
-
- res =
- decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline);
- if (res != VPX_CODEC_OK) return res;
- data_start += frame_size;
+ for (i = 0; i < frame_count; ++i) {
+ const uint8_t *data_start_copy = data_start;
+ const uint32_t frame_size = frame_sizes[i];
+ vpx_codec_err_t res;
+ if (data_start < data || frame_size > (uint32_t)(data_end - data_start)) {
+ set_error_detail(ctx, "Invalid frame size in index");
+ return VPX_CODEC_CORRUPT_FRAME;
}
- } else {
- if (ctx->available_threads == 0) {
- // No more threads for decoding. Wait until the next output worker
- // finishes decoding. Then copy the decoded frame into cache.
- if (ctx->num_cache_frames < FRAME_CACHE_SIZE) {
- wait_worker_and_cache_frame(ctx);
- } else {
- // TODO(hkuang): Add unit test to test this path.
- set_error_detail(ctx, "Frame output cache is full.");
- return VPX_CODEC_ERROR;
- }
- }
- res = decode_one(ctx, &data, data_sz, user_priv, deadline);
+ res = decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline);
if (res != VPX_CODEC_OK) return res;
+
+ data_start += frame_size;
}
} else {
- // Decode in serial mode.
- if (frame_count > 0) {
- int i;
+ while (data_start < data_end) {
+ const uint32_t frame_size = (uint32_t)(data_end - data_start);
+ const vpx_codec_err_t res =
+ decode_one(ctx, &data_start, frame_size, user_priv, deadline);
+ if (res != VPX_CODEC_OK) return res;
- for (i = 0; i < frame_count; ++i) {
- const uint8_t *data_start_copy = data_start;
- const uint32_t frame_size = frame_sizes[i];
- vpx_codec_err_t res;
- if (data_start < data ||
- frame_size > (uint32_t)(data_end - data_start)) {
- set_error_detail(ctx, "Invalid frame size in index");
- return VPX_CODEC_CORRUPT_FRAME;
- }
-
- res =
- decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline);
- if (res != VPX_CODEC_OK) return res;
-
- data_start += frame_size;
- }
- } else {
+ // Account for suboptimal termination by the encoder.
while (data_start < data_end) {
- const uint32_t frame_size = (uint32_t)(data_end - data_start);
- const vpx_codec_err_t res =
- decode_one(ctx, &data_start, frame_size, user_priv, deadline);
- if (res != VPX_CODEC_OK) return res;
-
- // Account for suboptimal termination by the encoder.
- while (data_start < data_end) {
- const uint8_t marker =
- read_marker(ctx->decrypt_cb, ctx->decrypt_state, data_start);
- if (marker) break;
- ++data_start;
- }
+ const uint8_t marker =
+ read_marker(ctx->decrypt_cb, ctx->decrypt_state, data_start);
+ if (marker) break;
+ ++data_start;
}
}
}
@@ -645,80 +489,42 @@
return res;
}
-static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) {
- RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs;
- // Decrease reference count of last output frame in frame parallel mode.
- if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) {
- BufferPool *const pool = ctx->buffer_pool;
- lock_buffer_pool(pool);
- decrease_ref_count(ctx->last_show_frame, frame_bufs, pool);
- unlock_buffer_pool(pool);
- }
-}
-
static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
vpx_codec_iter_t *iter) {
vpx_image_t *img = NULL;
- // Only return frame when all the cpu are busy or
- // application fluhsed the decoder in frame parallel decode.
- if (ctx->frame_parallel_decode && ctx->available_threads > 0 &&
- !ctx->flushed) {
- return NULL;
- }
-
- // Output the frames in the cache first.
- if (ctx->num_cache_frames > 0) {
- release_last_output_frame(ctx);
- ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx;
- if (ctx->need_resync) return NULL;
- img = &ctx->frame_cache[ctx->frame_cache_read].img;
- ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE;
- --ctx->num_cache_frames;
- return img;
- }
-
// iter acts as a flip flop, so an image is only returned on the first
// call to get_frame.
if (*iter == NULL && ctx->frame_workers != NULL) {
- do {
- YV12_BUFFER_CONFIG sd;
- vp9_ppflags_t flags = { 0, 0, 0 };
- const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
- FrameWorkerData *const frame_worker_data =
- (FrameWorkerData *)worker->data1;
- ctx->next_output_worker_id =
- (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
- if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
- set_ppflags(ctx, &flags);
- // Wait for the frame from worker thread.
- if (winterface->sync(worker)) {
- // Check if worker has received any frames.
- if (frame_worker_data->received_frame == 1) {
- ++ctx->available_threads;
- frame_worker_data->received_frame = 0;
- check_resync(ctx, frame_worker_data->pbi);
- }
- if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
- VP9_COMMON *const cm = &frame_worker_data->pbi->common;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- release_last_output_frame(ctx);
- ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx;
- if (ctx->need_resync) return NULL;
- yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
- ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
- img = &ctx->img;
- return img;
- }
- } else {
- // Decoding failed. Release the worker thread.
+ YV12_BUFFER_CONFIG sd;
+ vp9_ppflags_t flags = { 0, 0, 0 };
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ VPxWorker *const worker = &ctx->frame_workers[0];
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) set_ppflags(ctx, &flags);
+ // Wait for the frame from worker thread.
+ if (winterface->sync(worker)) {
+ // Check if worker has received any frames.
+ if (frame_worker_data->received_frame == 1) {
frame_worker_data->received_frame = 0;
- ++ctx->available_threads;
- ctx->need_resync = 1;
- if (ctx->flushed != 1) return NULL;
+ check_resync(ctx, frame_worker_data->pbi);
}
- } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
+ if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
+ VP9_COMMON *const cm = &frame_worker_data->pbi->common;
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx;
+ if (ctx->need_resync) return NULL;
+ yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
+ ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
+ img = &ctx->img;
+ return img;
+ }
+ } else {
+ // Decoding failed. Release the worker thread.
+ frame_worker_data->received_frame = 0;
+ ctx->need_resync = 1;
+ if (ctx->flushed != 1) return NULL;
+ }
}
return NULL;
}
@@ -744,12 +550,6 @@
va_list args) {
vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (data) {
vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data;
YV12_BUFFER_CONFIG sd;
@@ -768,12 +568,6 @@
va_list args) {
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (data) {
vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
YV12_BUFFER_CONFIG sd;
@@ -791,12 +585,6 @@
va_list args) {
vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (data) {
YV12_BUFFER_CONFIG *fb;
VPxWorker *const worker = ctx->frame_workers;
@@ -842,12 +630,6 @@
va_list args) {
int *const update_info = va_arg(args, int *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (update_info) {
if (ctx->frame_workers) {
VPxWorker *const worker = ctx->frame_workers;
@@ -891,12 +673,6 @@
va_list args) {
int *const frame_size = va_arg(args, int *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (frame_size) {
if (ctx->frame_workers) {
VPxWorker *const worker = ctx->frame_workers;
@@ -918,12 +694,6 @@
va_list args) {
int *const render_size = va_arg(args, int *);
- // Only support this function in serial decode.
- if (ctx->frame_parallel_decode) {
- set_error_detail(ctx, "Not supported in frame parallel decode");
- return VPX_CODEC_INCAPABLE;
- }
-
if (render_size) {
if (ctx->frame_workers) {
VPxWorker *const worker = ctx->frame_workers;
@@ -944,7 +714,7 @@
static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx,
va_list args) {
unsigned int *const bit_depth = va_arg(args, unsigned int *);
- VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
+ VPxWorker *const worker = &ctx->frame_workers[0];
if (bit_depth) {
if (worker) {
--- a/vp9/vp9_dx_iface.h
+++ b/vp9/vp9_dx_iface.h
@@ -15,15 +15,6 @@
typedef vpx_codec_stream_info_t vp9_stream_info_t;
-// This limit is due to framebuffer numbers.
-// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
-#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames.
-
-typedef struct cache_frame {
- int fb_idx;
- vpx_image_t img;
-} cache_frame;
-
struct vpx_codec_alg_priv {
vpx_codec_priv_t base;
vpx_codec_dec_cfg_t cfg;
@@ -41,17 +32,8 @@
int skip_loop_filter;
// Frame parallel related.
- int frame_parallel_decode; // frame-based threading.
VPxWorker *frame_workers;
int num_frame_workers;
- int next_submit_worker_id;
- int last_submit_worker_id;
- int next_output_worker_id;
- int available_threads;
- cache_frame frame_cache[FRAME_CACHE_SIZE];
- int frame_cache_write;
- int frame_cache_read;
- int num_cache_frames;
int need_resync; // wait for key/intra-only frame
// BufferPool that holds all reference frames. Shared by all the FrameWorkers.
BufferPool *buffer_pool;