shithub: libvpx

Download patch

ref: 3448987ab20aa05716ffc4aedf6d02e23f75920b
parent: d99b6af68ff6b95f577ec9a0899e54ab653064c8
author: James Zern <[email protected]>
date: Sat Sep 22 08:19:16 EDT 2018

Revert "Revert "Revert "Loopfilter MultiThread Optimization"""

This reverts commit bf6299010e815e111d7326530c249e9d99611f34.

segfaults, causes an assertion failure with corrupt input:
get_uv_tx_size: Assertion `mi->sb_type < BLOCK_8X8 ||
ss_size_lookup[mi->sb_type][pd->subsampling_x][pd->subsampling_y] !=
BLOCK_INVALID

BUG=webm:1562

Change-Id: I05a711cad3d8e7f1a8e64422b4356bdf4edb3d12

--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -204,7 +204,6 @@
   { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" },
   { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" },
   { 2, "invalid-crbug-629481.webm" },
-  { 3, "invalid-crbug-1558.ivf" },
 };
 
 INSTANTIATE_TEST_CASE_P(
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -787,8 +787,6 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += crbug-1539.rawfile
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -859,5 +859,3 @@
 a0fbbbc5dd50fd452096f4455a58c1a8c9f66697 *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf
 a61774cf03fc584bd9f0904fc145253bb8ea6c4c *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res
 894fae3afee0290546590823974203ab4b8abd95 *crbug-1539.rawfile
-f1026c03efd5da21b381c8eb21f0d64e6d7e4ba3 *invalid-crbug-1558.ivf
-eb198c25f861c3fe2cbd310de11eb96843019345 *invalid-crbug-1558.ivf.res
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -259,8 +259,6 @@
   PARTITION_CONTEXT *above_seg_context;
   ENTROPY_CONTEXT *above_context;
   int above_context_alloc_cols;
-
-  int lf_row;
 } VP9_COMMON;
 
 static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -229,26 +229,6 @@
                       workers, num_workers, lf_sync);
 }
 
-void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
-                     int num_workers) {
-  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
-
-  if (!frame_filter_level) return;
-
-  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
-      num_workers > lf_sync->num_workers) {
-    vp9_loop_filter_dealloc(lf_sync);
-    vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
-  }
-
-  // Initialize cur_sb_col to -1 for all SB rows.
-  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
-
-  memset(lf_sync->num_tiles_done, 0,
-         sizeof(*lf_sync->num_tiles_done) * sb_rows);
-  cm->lf_row = 0;
-}
-
 // Set up nsync by width.
 static INLINE int get_sync_range(int width) {
   // nsync numbers are picked by testing. For example, for 4k
@@ -286,25 +266,6 @@
         pthread_cond_init(&lf_sync->cond[i], NULL);
       }
     }
-    pthread_mutex_init(&lf_sync->lf_mutex, NULL);
-
-    CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
-                    vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
-    if (lf_sync->recon_done_mutex) {
-      int i;
-      for (i = 0; i < rows; ++i) {
-        pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
-      }
-    }
-
-    CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
-                    vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
-    if (lf_sync->recon_done_cond) {
-      int i;
-      for (i = 0; i < rows; ++i) {
-        pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
-      }
-    }
   }
 #endif  // CONFIG_MULTITHREAD
 
@@ -315,11 +276,6 @@
   CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
                   vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
 
-  CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
-                  vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
-                                 mi_cols_aligned_to_sb(cm->mi_rows) >>
-                             MI_BLOCK_SIZE_LOG2));
-
   // Set up nsync.
   lf_sync->sync_range = get_sync_range(width);
 }
@@ -342,116 +298,13 @@
       }
       vpx_free(lf_sync->cond);
     }
-    if (lf_sync->recon_done_mutex != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
-      }
-      vpx_free(lf_sync->recon_done_mutex);
-    }
-
-    pthread_mutex_destroy(&lf_sync->lf_mutex);
-    if (lf_sync->recon_done_cond != NULL) {
-      int i;
-      for (i = 0; i < lf_sync->rows; ++i) {
-        pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
-      }
-      vpx_free(lf_sync->recon_done_cond);
-    }
 #endif  // CONFIG_MULTITHREAD
-
     vpx_free(lf_sync->lfdata);
     vpx_free(lf_sync->cur_sb_col);
-    vpx_free(lf_sync->num_tiles_done);
     // clear the structure as the source of this call may be a resize in which
     // case this call will be followed by an _alloc() which may fail.
     vp9_zero(*lf_sync);
   }
-}
-
-static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
-  int return_val = -1;
-  int cur_row;
-  const int max_rows = cm->mi_rows;
-
-#if CONFIG_MULTITHREAD
-  const int tile_cols = 1 << cm->log2_tile_cols;
-
-  pthread_mutex_lock(&lf_sync->lf_mutex);
-  if (cm->lf_row < max_rows) {
-    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
-    return_val = cm->lf_row;
-    cm->lf_row += MI_BLOCK_SIZE;
-    if (cm->lf_row < max_rows) {
-      /* If this is not the last row, make sure the next row is also decoded.
-       * This is because the intra predict has to happen before loop filter */
-      cur_row += 1;
-    }
-  }
-  pthread_mutex_unlock(&lf_sync->lf_mutex);
-
-  if (return_val == -1) return return_val;
-
-  pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
-  if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
-    pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
-                      &lf_sync->recon_done_mutex[cur_row]);
-  }
-  pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
-#else
-  (void)lf_sync;
-  if (cm->lf_row < max_rows) {
-    cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
-    return_val = cm->lf_row;
-    cm->lf_row += MI_BLOCK_SIZE;
-    if (cm->lf_row < max_rows) {
-      /* If this is not the last row, make sure the next row is also decoded.
-       * This is because the intra predict has to happen before loop filter */
-      cur_row += 1;
-    }
-  }
-#endif  // CONFIG_MULTITHREAD
-
-  return return_val;
-}
-
-void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync,
-                         MACROBLOCKD *xd) {
-  int mi_row;
-  VP9_COMMON *cm = lf_data->cm;
-
-  while (!xd->corrupted && (mi_row = get_next_row(cm, lf_sync)) != -1 &&
-         mi_row < cm->mi_rows) {
-    lf_data->start = mi_row;
-    lf_data->stop = mi_row + MI_BLOCK_SIZE;
-
-    thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
-                            lf_data->start, lf_data->stop, lf_data->y_only,
-                            lf_sync);
-  }
-}
-
-void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row) {
-#if CONFIG_MULTITHREAD
-  pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
-  lf_sync->num_tiles_done[row] += 1;
-  if (num_tiles == lf_sync->num_tiles_done[row]) {
-    if (is_last_row) {
-      /* The last 2 rows wait on the last row to be done.
-       * So, we have to broadcast the signal in this case.
-       */
-      pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
-    } else {
-      pthread_cond_signal(&lf_sync->recon_done_cond[row]);
-    }
-  }
-  pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
-#else
-  (void)lf_sync;
-  (void)num_tiles;
-  (void)row;
-  (void)is_last_row;
-#endif  // CONFIG_MULTITHREAD
 }
 
 // Accumulate frame counts.
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -37,13 +37,6 @@
   // Row-based parallel loopfilter data
   LFWorkerData *lfdata;
   int num_workers;
-
-#if CONFIG_MULTITHREAD
-  pthread_mutex_t lf_mutex;
-  pthread_mutex_t *recon_done_mutex;
-  pthread_cond_t *recon_done_cond;
-#endif
-  int *num_tiles_done;
 } VP9LfSync;
 
 // Allocate memory for loopfilter row synchronization.
@@ -59,17 +52,6 @@
                               int frame_filter_level, int y_only,
                               int partial_frame, VPxWorker *workers,
                               int num_workers, VP9LfSync *lf_sync);
-
-// Multi-threaded loopfilter initialisations
-void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
-                     int frame_filter_level, int num_workers);
-
-void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync,
-                         MACROBLOCKD *xd);
-
-void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row);
-
-void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
 
 void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
                                  const struct FRAME_COUNTS *counts, int is_dec);
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1451,24 +1451,6 @@
   return vpx_reader_find_end(&tile_data->bit_reader);
 }
 
-static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows,
-                                 int num_tiles_left, int total_num_tiles) {
-  do {
-    int mi_row;
-    const int aligned_rows = mi_cols_aligned_to_sb(mi_rows);
-    const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
-    for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) {
-      const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
-      vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2,
-                  is_last_row);
-    }
-    /* If there are multiple tiles, the second tile should start marking row
-     * progress from row 0.
-     */
-    start_row = 0;
-  } while (num_tiles_left--);
-}
-
 // On entry 'tile_data->data_end' points to the end of the input frame, on exit
 // it is updated to reflect the bitreader position of the final tile column if
 // present in the tile buffer group or NULL otherwise.
@@ -1479,12 +1461,6 @@
   TileInfo *volatile tile = &tile_data->xd.tile;
   const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
   const uint8_t *volatile bit_reader_end = NULL;
-  VP9_COMMON *cm = &pbi->common;
-
-  LFWorkerData *lf_data = tile_data->lf_data;
-  VP9LfSync *lf_sync = tile_data->lf_sync;
-
-  volatile int mi_row = 0;
   volatile int n = tile_data->buf_start;
   tile_data->error_info.setjmp = 1;
 
@@ -1492,12 +1468,6 @@
     tile_data->error_info.setjmp = 0;
     tile_data->xd.corrupted = 1;
     tile_data->data_end = NULL;
-    if (cm->lf.filter_level && !cm->skip_loop_filter) {
-      const int num_tiles_left = tile_data->buf_end - n;
-      const int mi_row_start = mi_row;
-      set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left,
-                           1 << cm->log2_tile_cols);
-    }
     return 0;
   }
 
@@ -1504,14 +1474,8 @@
   tile_data->xd.corrupted = 0;
 
   do {
-    int mi_col;
+    int mi_row, mi_col;
     const TileBuffer *const buf = pbi->tile_buffers + n;
-
-    /* Initialize to 0 is safe since we do not deal with streams that have
-     * more than one row of tiles. (So tile->mi_row_start will be 0)
-     */
-    assert(cm->log2_tile_rows == 0);
-    mi_row = 0;
     vp9_zero(tile_data->dqcoeff);
     vp9_tile_init(tile, &pbi->common, 0, buf->col);
     setup_token_decoder(buf->data, tile_data->data_end, buf->size,
@@ -1529,13 +1493,6 @@
            mi_col += MI_BLOCK_SIZE) {
         decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
       }
-      if (cm->lf.filter_level && !cm->skip_loop_filter) {
-        const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
-        const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
-        const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
-        vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
-                    mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row);
-      }
     }
 
     if (buf->col == final_col) {
@@ -1543,20 +1500,6 @@
     }
   } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
 
-  if (n < tile_data->buf_end && cm->lf.filter_level && !cm->skip_loop_filter) {
-    /* This was not incremented in the tile loop, so increment before tiles left
-     * calculation
-     */
-    ++n;
-    set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n,
-                         1 << cm->log2_tile_cols);
-  }
-
-  if (!tile_data->xd.corrupted && cm->lf.filter_level &&
-      !cm->skip_loop_filter) {
-    vp9_loopfilter_rows(lf_data, lf_sync, &tile_data->xd);
-  }
-
   tile_data->data_end = bit_reader_end;
   return !tile_data->xd.corrupted;
 }
@@ -1573,8 +1516,6 @@
   VP9_COMMON *const cm = &pbi->common;
   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
   const uint8_t *bit_reader_end = NULL;
-  VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
-  YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1601,12 +1542,6 @@
     }
   }
 
-  // Initialize LPF
-  if (cm->lf.filter_level && !cm->skip_loop_filter) {
-    vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
-                    pbi->num_tile_workers);
-  }
-
   // Reset tile decoding hook
   for (n = 0; n < num_workers; ++n) {
     VPxWorker *const worker = &pbi->tile_workers[n];
@@ -1613,14 +1548,6 @@
     TileWorkerData *const tile_data =
         &pbi->tile_worker_data[n + pbi->total_tiles];
     winterface->sync(worker);
-
-    if (cm->lf.filter_level && !cm->skip_loop_filter) {
-      tile_data->lf_sync = lf_row_sync;
-      tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
-      vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
-      tile_data->lf_data->y_only = 0;
-    }
-
     tile_data->xd = pbi->mb;
     tile_data->xd.counts =
         cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -2142,7 +2069,15 @@
   if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
     // Multi-threaded tile decoder
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
-    if (xd->corrupted) {
+    if (!xd->corrupted) {
+      if (!cm->skip_loop_filter) {
+        // If multiple threads are used to decode tiles, then we use those
+        // threads to do parallel loopfiltering.
+        vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
+                                 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+                                 &pbi->lf_row_sync);
+      }
+    } else {
       vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                          "Decode failed. Frame data is corrupted.");
     }
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -37,8 +37,6 @@
   int buf_start, buf_end;  // pbi->tile_buffers to decode, inclusive
   vpx_reader bit_reader;
   FRAME_COUNTS counts;
-  LFWorkerData *lf_data;
-  VP9LfSync *lf_sync;
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
--- a/vpx_util/vpx_thread.h
+++ b/vpx_util/vpx_thread.h
@@ -159,23 +159,6 @@
   return 0;
 }
 
-static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) {
-  int ok = 1;
-#ifdef USE_WINDOWS_CONDITION_VARIABLE
-  WakeAllConditionVariable(condition);
-#else
-  while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
-    // a thread is waiting in pthread_cond_wait: allow it to be notified
-    ok &= SetEvent(condition->signal_event_);
-    // wait until the event is consumed so the signaler cannot consume
-    // the event via its own pthread_cond_wait.
-    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
-           WAIT_OBJECT_0);
-  }
-#endif
-  return !ok;
-}
-
 static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
   int ok = 1;
 #ifdef USE_WINDOWS_CONDITION_VARIABLE