shithub: libvpx

Download patch

ref: 97d6a4cbd1d89224fcb2c4045a8adefa9c75bfe0
parent: 61927ba4ac6390bc98396b9ba30d050f63e720fa
author: Ranjit Kumar Tulabandu <[email protected]>
date: Thu Feb 16 14:07:41 EST 2017

Refactored the row based multi-threading code

Modified the code to facilitate bit-match tests in first pass
Added unit-tests to test the row based multi-threading behavior for bit-exactness

Change-Id: Ieaf6a8f935bb1075597e0a3b52d9989c8546d7df

--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -40,6 +40,7 @@
     init_flags_ = VPX_CODEC_USE_PSNR;
 
     new_mt_mode_ = 1;
+    bit_match_mode_ = 0;
     first_pass_only_ = true;
     firstpass_stats_.buf = NULL;
     firstpass_stats_.sz = 0;
@@ -85,6 +86,8 @@
       if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
         encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
 
+      encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_);
+
       encoder_initialized_ = true;
     }
   }
@@ -110,6 +113,7 @@
   ::libvpx_test::TestMode encoding_mode_;
   int set_cpu_used_;
   int new_mt_mode_;
+  int bit_match_mode_;
   bool first_pass_only_;
   vpx_fixed_buf_t firstpass_stats_;
 };
@@ -144,6 +148,28 @@
   fp_stats->sz = 0;
 }
 
+static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) {
+  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
+  // stats are compared to check if the stats match.
+  uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf);
+  uint8_t *stats2 = stats1 + fp_stats->sz / 2;
+  ::libvpx_test::MD5 md5_new_mt_0, md5_new_mt_1;
+
+  md5_new_mt_0.Add(stats1, fp_stats->sz / 2);
+  const char *md5_new_mt_0_str = md5_new_mt_0.Get();
+
+  md5_new_mt_1.Add(stats2, fp_stats->sz / 2);
+  const char *md5_new_mt_1_str = md5_new_mt_1.Get();
+
+  // Check md5 match.
+  ASSERT_STREQ(md5_new_mt_0_str, md5_new_mt_1_str)
+      << "MD5 checksums don't match";
+
+  // Reset firstpass_stats_ to 0.
+  memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
+  fp_stats->sz = 0;
+}
+
 TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
   ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
 
@@ -151,6 +177,7 @@
   cfg_.rc_target_bitrate = 1000;
 
   // Test new_mt_mode: 0 vs 1 (threads = 1, tiles_ = 0)
+  bit_match_mode_ = 0;
   tiles_ = 0;
   cfg_.g_threads = 1;
 
@@ -177,6 +204,21 @@
 
   // Compare to check if single-thread and multi-thread stats matches.
   compare_fp_stats(&firstpass_stats_);
+
+  // Test new_mt_mode: 0 vs 1 (threads = 8, tiles_ = 2)
+  bit_match_mode_ = 1;
+  tiles_ = 2;
+  cfg_.g_threads = 8;
+
+  new_mt_mode_ = 0;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  new_mt_mode_ = 1;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  // Compare to check if stats match with new-mt=0/1.
+  compare_fp_stats_md5(&firstpass_stats_);
 }
 
 class VPxEncoderThreadTest
@@ -191,6 +233,7 @@
     init_flags_ = VPX_CODEC_USE_PSNR;
     md5_.clear();
     new_mt_mode_ = 1;
+    bit_match_mode_ = 0;
   }
   virtual ~VPxEncoderThreadTest() {}
 
@@ -229,10 +272,11 @@
         encoder->Control(VP8E_SET_ARNR_TYPE, 3);
         encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
 
-        // While new_mt = 1(namely, using row-based multi-threading), several
+        encoder->Control(VP9E_SET_NEW_MT, new_mt_mode_);
+        // While new_mt = 1/0(with/without row-based multi-threading), several
         // speed features that would adaptively adjust encoding parameters have
         // to be disabled to guarantee the bit match of the resulted bitstream.
-        if (new_mt_mode_) encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, 1);
+        encoder->Control(VP9E_ENABLE_THREAD_BIT_MATCH, bit_match_mode_);
       } else {
         encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
         encoder->Control(VP9E_SET_AQ_MODE, 3);
@@ -265,15 +309,18 @@
   ::libvpx_test::TestMode encoding_mode_;
   int set_cpu_used_;
   int new_mt_mode_;
+  int bit_match_mode_;
   std::vector<std::string> md5_;
 };
 
 TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
-  std::vector<std::string> single_thr_md5, multi_thr_md5;
+  std::vector<std::string> single_thr_md5, multi_thr_md5, new_mt_0_md5;
 
   ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
 
   cfg_.rc_target_bitrate = 1000;
+  bit_match_mode_ = 1;
+  new_mt_mode_ = 1;
 
   // Encode using single thread.
   cfg_.g_threads = 1;
@@ -290,6 +337,17 @@
 
   // Compare to check if two vectors are equal.
   ASSERT_EQ(single_thr_md5, multi_thr_md5);
+
+  // Encode with new-mt 0.
+  new_mt_mode_ = 0;
+  cfg_.g_threads = threads_;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  new_mt_0_md5 = md5_;
+  md5_.clear();
+
+  // Compare to check if two vectors are equal.
+  ASSERT_EQ(new_mt_0_md5, multi_thr_md5);
 }
 
 INSTANTIATE_TEST_CASE_P(
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1732,12 +1732,6 @@
   }
 #endif
 
-#if ENABLE_MT_BIT_MATCH
-  CHECK_MEM_ERROR(
-      cm, cpi->twopass.fp_mb_float_stats,
-      vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
-#endif
-
   cpi->refresh_alt_ref_frame = 0;
   cpi->multi_arf_last_grp_enabled = 0;
 
@@ -2116,11 +2110,6 @@
     vpx_free(cpi->twopass.frame_mb_stats_buf);
     cpi->twopass.frame_mb_stats_buf = NULL;
   }
-#endif
-
-#if ENABLE_MT_BIT_MATCH
-  vpx_free(cpi->twopass.fp_mb_float_stats);
-  cpi->twopass.fp_mb_float_stats = NULL;
 #endif
 
   vp9_remove_common(cm);
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -287,6 +287,9 @@
   } else {
     output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
   }
+
+  vpx_free(cpi->twopass.fp_mb_float_stats);
+  cpi->twopass.fp_mb_float_stats = NULL;
 }
 
 static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@@ -647,7 +650,8 @@
   return block_noise << 2;  // Scale << 2 to account for sampling.
 }
 
-#if ENABLE_MT_BIT_MATCH
+// This function is called to test the functionality of row based
+// multi-threading in unit tests for bit-exactness
 static void accumulate_floating_point_stats(VP9_COMP *cpi,
                                             TileDataEnc *first_tile_col) {
   VP9_COMMON *const cm = &cpi->common;
@@ -667,7 +671,6 @@
     }
   }
 }
-#endif
 
 static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
                                  FIRSTPASS_DATA *fp_acc_data) {
@@ -804,6 +807,10 @@
                            : NULL;
   MODE_INFO mi_above, mi_left;
 
+  double mb_intra_factor;
+  double mb_brightness_factor;
+  double mb_neutral_count;
+
   // First pass code requires valid last and new frame buffers.
   assert(new_yv12 != NULL);
   assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
@@ -861,9 +868,7 @@
     const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
     double log_intra;
     int level_sample;
-#if ENABLE_MT_BIT_MATCH
     const int mb_index = mb_row * cm->mb_cols + mb_col;
-#endif
 
 #if CONFIG_FP_MB_STATS
     const int mb_index = mb_row * cm->mb_cols + mb_col;
@@ -962,16 +967,15 @@
     vpx_clear_system_state();
     log_intra = log(this_error + 1.0);
     if (log_intra < 10.0) {
-      fp_acc_data->intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
-#if ENABLE_MT_BIT_MATCH
-      cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor =
-          1.0 + ((10.0 - log_intra) * 0.05);
-#endif
+      mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05);
+      fp_acc_data->intra_factor += mb_intra_factor;
+      if (cpi->oxcf.ethread_bit_match)
+        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor =
+            mb_intra_factor;
     } else {
       fp_acc_data->intra_factor += 1.0;
-#if ENABLE_MT_BIT_MATCH
-      cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0;
-#endif
+      if (cpi->oxcf.ethread_bit_match)
+        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0;
     }
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -983,17 +987,16 @@
     level_sample = x->plane[0].src.buf[0];
 #endif
     if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) {
-      fp_acc_data->brightness_factor +=
-          1.0 + (0.01 * (DARK_THRESH - level_sample));
-#if ENABLE_MT_BIT_MATCH
-      cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
-          1.0 + (0.01 * (DARK_THRESH - level_sample));
-#endif
+      mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample));
+      fp_acc_data->brightness_factor += mb_brightness_factor;
+      if (cpi->oxcf.ethread_bit_match)
+        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
+            mb_brightness_factor;
     } else {
       fp_acc_data->brightness_factor += 1.0;
-#if ENABLE_MT_BIT_MATCH
-      cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0;
-#endif
+      if (cpi->oxcf.ethread_bit_match)
+        cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor =
+            1.0;
     }
 
     // Intrapenalty below deals with situations where the intra and inter
@@ -1153,19 +1156,19 @@
         if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
             (this_error < (2 * intrapenalty))) {
           fp_acc_data->neutral_count += 1.0;
-#if ENABLE_MT_BIT_MATCH
-          cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0;
-#endif
+          if (cpi->oxcf.ethread_bit_match)
+            cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
+                1.0;
           // Also track cases where the intra is not much worse than the inter
           // and use this in limiting the GF/arf group length.
         } else if ((this_error > NCOUNT_INTRA_THRESH) &&
                    (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
-          fp_acc_data->neutral_count +=
+          mb_neutral_count =
               (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
-#if ENABLE_MT_BIT_MATCH
-          cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
-              (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
-#endif
+          fp_acc_data->neutral_count += mb_neutral_count;
+          if (cpi->oxcf.ethread_bit_match)
+            cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count =
+                mb_neutral_count;
         }
 
         mv.row *= 8;
@@ -1403,6 +1406,11 @@
 
   cm->log2_tile_rows = 0;
 
+  if (cpi->oxcf.ethread_bit_match && cpi->twopass.fp_mb_float_stats == NULL)
+    CHECK_MEM_ERROR(
+        cm, cpi->twopass.fp_mb_float_stats,
+        vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
+
   {
     FIRSTPASS_STATS fps;
     TileDataEnc *first_tile_col;
@@ -1415,15 +1423,14 @@
     } else {
       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
-#if ENABLE_MT_BIT_MATCH
-      cm->log2_tile_cols = 0;
-      vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs);
-#endif
+      if (cpi->oxcf.ethread_bit_match) {
+        cm->log2_tile_cols = 0;
+        vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs);
+      }
       vp9_encode_fp_row_mt(cpi);
       first_tile_col = &cpi->tile_data[0];
-#if ENABLE_MT_BIT_MATCH
-      accumulate_floating_point_stats(cpi, first_tile_col);
-#endif
+      if (cpi->oxcf.ethread_bit_match)
+        accumulate_floating_point_stats(cpi, first_tile_col);
       first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data));
     }
 
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -41,14 +41,11 @@
 
 #define INVALID_ROW -1
 
-#define ENABLE_MT_BIT_MATCH 0
-#if ENABLE_MT_BIT_MATCH
 typedef struct {
   double frame_mb_intra_factor;
   double frame_mb_brightness_factor;
   double frame_mb_neutral_count;
 } FP_MB_FLOAT_STATS;
-#endif
 
 typedef struct {
   double intra_factor;
@@ -149,9 +146,7 @@
   FIRSTPASS_MB_STATS firstpass_mb_stats;
 #endif
 
-#if ENABLE_MT_BIT_MATCH
   FP_MB_FLOAT_STATS *fp_mb_float_stats;
-#endif
 
   // An indication of the content type of the current frame
   FRAME_CONTENT_TYPE fr_content_type;