ref: c98273c9e7a0d06572a11ad754b7f33b0666921c
parent: a4bb5f2a29fc925f0fd033490c1c8ecb54e502c3
author: Yunqing Wang <[email protected]>
date: Fri Jun 12 13:10:30 EDT 2015
vp9_ethread: create enough threads while using SVC This patch modified the thread creating code. When use_svc is true, the number of threads created is decided by the highest resolution. This resolved WebM issue 1018. Change-Id: I367227b14d1f8b08bbdad3635b232a3a37bbba26
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -630,7 +630,8 @@
if (svc_ctx.speed != -1)
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
- vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, 0);
+ if (svc_ctx.threads)
+ vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
// Encode frames
while (!end_of_stream) {
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -54,6 +54,18 @@
return 0;
}
+static int get_max_tile_cols(VP9_COMP *cpi) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
+ int mi_cols = aligned_width >> MI_SIZE_LOG2;
+ int min_log2_tile_cols, max_log2_tile_cols;
+ int log2_tile_cols;
+
+ vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+ log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+ min_log2_tile_cols, max_log2_tile_cols);
+ return (1 << log2_tile_cols);
+}
+
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -65,13 +77,23 @@
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
+ int allocated_workers = num_workers;
+
+ // While using SVC, we need to allocate threads according to the highest
+ // resolution.
+ if (cpi->use_svc) {
+ int max_tile_cols = get_max_tile_cols(cpi);
+ allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
+ }
+
CHECK_MEM_ERROR(cm, cpi->workers,
- vpx_malloc(num_workers * sizeof(*cpi->workers)));
+ vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
- vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+ vpx_calloc(allocated_workers,
+ sizeof(*cpi->tile_thr_data)));
- for (i = 0; i < num_workers; i++) {
+ for (i = 0; i < allocated_workers; i++) {
VP9Worker *const worker = &cpi->workers[i];
EncWorkerData *thread_data = &cpi->tile_thr_data[i];
@@ -78,7 +100,7 @@
++cpi->num_workers;
winterface->init(worker);
- if (i < num_workers - 1) {
+ if (i < allocated_workers - 1) {
thread_data->cpi = cpi;
// Allocate thread data.
@@ -154,7 +176,7 @@
// Set the starting tile for each thread.
thread_data->start = i;
- if (i == num_workers - 1)
+ if (i == cpi->num_workers - 1)
winterface->execute(worker);
else
winterface->launch(worker);
@@ -171,7 +193,7 @@
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
// Accumulate counters.
- if (i < num_workers - 1) {
+ if (i < cpi->num_workers - 1) {
vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}