shithub: libvpx

Download patch

ref: e820ca6973f53815608d44854eebf380cc17f2f6
parent: a6d126709a178142259b41aea8c3d841161ea166
parent: c98273c9e7a0d06572a11ad754b7f33b0666921c
author: Yunqing Wang <[email protected]>
date: Mon Jun 15 19:03:31 EDT 2015

Merge "vp9_ethread: create enough threads while using SVC"

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -630,7 +630,8 @@
 
   if (svc_ctx.speed != -1)
     vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
-  vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, 0);
+  if (svc_ctx.threads)
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
 
   // Encode frames
   while (!end_of_stream) {
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -54,6 +54,18 @@
   return 0;
 }
 
+static int get_max_tile_cols(VP9_COMP *cpi) {
+  const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
+  int mi_cols = aligned_width >> MI_SIZE_LOG2;
+  int min_log2_tile_cols, max_log2_tile_cols;
+  int log2_tile_cols;
+
+  vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+  log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+                   min_log2_tile_cols, max_log2_tile_cols);
+  return (1 << log2_tile_cols);
+}
+
 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
@@ -65,13 +77,23 @@
 
   // Only run once to create threads and allocate thread data.
   if (cpi->num_workers == 0) {
+    int allocated_workers = num_workers;
+
+    // While using SVC, we need to allocate threads according to the highest
+    // resolution.
+    if (cpi->use_svc) {
+      int max_tile_cols = get_max_tile_cols(cpi);
+      allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
+    }
+
     CHECK_MEM_ERROR(cm, cpi->workers,
-                    vpx_malloc(num_workers * sizeof(*cpi->workers)));
+                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
 
     CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
-                    vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+                    vpx_calloc(allocated_workers,
+                    sizeof(*cpi->tile_thr_data)));
 
-    for (i = 0; i < num_workers; i++) {
+    for (i = 0; i < allocated_workers; i++) {
       VP9Worker *const worker = &cpi->workers[i];
       EncWorkerData *thread_data = &cpi->tile_thr_data[i];
 
@@ -78,7 +100,7 @@
       ++cpi->num_workers;
       winterface->init(worker);
 
-      if (i < num_workers - 1) {
+      if (i < allocated_workers - 1) {
         thread_data->cpi = cpi;
 
         // Allocate thread data.
@@ -154,7 +176,7 @@
     // Set the starting tile for each thread.
     thread_data->start = i;
 
-    if (i == num_workers - 1)
+    if (i == cpi->num_workers - 1)
       winterface->execute(worker);
     else
       winterface->launch(worker);
@@ -171,7 +193,7 @@
     EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
 
     // Accumulate counters.
-    if (i < num_workers - 1) {
+    if (i < cpi->num_workers - 1) {
       vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
       accumulate_rd_opt(&cpi->td, thread_data->td);
     }