shithub: libvpx

Download patch

ref: 9bd2bde10dfbe65224a73fd422a49a6c8e6ba078
parent: 4ddfa331c4f5730e1fbcf561d91602c6806ac5ae
author: Hui Su <[email protected]>
date: Tue Sep 4 10:44:02 EDT 2018

Enable rectangular partition search for speed 1

This patch enables rectangular partition search on speed 1. The encoding
speed loss is reduced thanks to recently added speed features.

This only affects speed 1 low bit-depth encoding.

Coding gains:
           avg_psnr     ovr_psnr    ssim
lowres      0.577%       0.621%    0.665%
midres      1.147%       1.215%    1.148%
hdres       0.758%       0.790%    0.769%

Tested encoding speed on 15 midres and 15 hdres clips, average speed
loss:
           QP=30       QP=40        QP=50
midres     4.43%       3.72%       -1.05%
hdres      4.41%       5.65%        3.77%

Change-Id: Ifc0712becccc69f7498796359ff12dbfa63fd7b3

--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3546,7 +3546,8 @@
   }
 
   if (cpi->sf.use_square_partition_only &&
-      bsize > cpi->sf.use_square_only_threshold) {
+      (bsize > cpi->sf.use_square_only_thresh_high ||
+       bsize < cpi->sf.use_square_only_thresh_low)) {
     if (cpi->use_svc) {
       if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
         partition_horz_allowed &= force_horz_split;
@@ -3839,9 +3840,9 @@
     } else {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      if ((cpi->sf.less_rectangular_check) &&
-          ((bsize > cpi->sf.use_square_only_threshold) ||
-           (best_rdc.dist < dist_breakout_thr)))
+      if (cpi->sf.less_rectangular_check &&
+          (bsize > cpi->sf.use_square_only_thresh_high ||
+           best_rdc.dist < dist_breakout_thr))
         do_rect &= !partition_none_allowed;
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -3921,8 +3922,8 @@
       best_rdc = sum_rdc;
       pc_tree->partitioning = PARTITION_HORZ;
 
-      if ((cpi->sf.less_rectangular_check) &&
-          (bsize > cpi->sf.use_square_only_threshold))
+      if (cpi->sf.less_rectangular_check &&
+          bsize > cpi->sf.use_square_only_thresh_high)
         do_rect = 0;
     }
     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -70,7 +70,8 @@
   // speed 0 features
   sf->partition_search_breakout_thr.dist = (1 << 20);
   sf->partition_search_breakout_thr.rate = 80;
-  sf->use_square_only_threshold = BLOCK_SIZES;
+  sf->use_square_only_thresh_high = BLOCK_SIZES;
+  sf->use_square_only_thresh_low = BLOCK_4X4;
 
   if (is_480p_or_larger) {
     // Currently, the machine-learning based partition search early termination
@@ -77,7 +78,7 @@
     // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
     sf->ml_partition_search_early_termination = 1;
   } else {
-    sf->use_square_only_threshold = BLOCK_32X32;
+    sf->use_square_only_thresh_high = BLOCK_32X32;
   }
 
   if (!is_1080p_or_larger) {
@@ -95,23 +96,42 @@
 
   if (speed >= 1) {
     sf->ml_partition_search_early_termination = 0;
-    sf->use_square_only_threshold = BLOCK_4X4;
-
+    sf->use_ml_partition_search_breakout = 1;
+    if (is_480p_or_larger)
+      sf->use_square_only_thresh_high = BLOCK_64X64;
+    else
+      sf->use_square_only_thresh_high = BLOCK_32X32;
+    sf->use_square_only_thresh_low = BLOCK_16X16;
     if (is_720p_or_larger) {
       sf->disable_split_mask =
           cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-      sf->partition_search_breakout_thr.dist = (1 << 23);
-      sf->use_ml_partition_search_breakout = 0;
+      sf->partition_search_breakout_thr.dist = (1 << 22);
+      sf->ml_partition_search_breakout_thresh[0] = -5.0f;
+      sf->ml_partition_search_breakout_thresh[1] = -5.0f;
+      sf->ml_partition_search_breakout_thresh[2] = -9.0f;
     } else {
       sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
       sf->partition_search_breakout_thr.dist = (1 << 21);
-      sf->ml_partition_search_breakout_thresh[0] = 0.0f;
-      sf->ml_partition_search_breakout_thresh[1] = 0.0f;
-      sf->ml_partition_search_breakout_thresh[2] = 0.0f;
+      sf->ml_partition_search_breakout_thresh[0] = -1.0f;
+      sf->ml_partition_search_breakout_thresh[1] = -1.0f;
+      sf->ml_partition_search_breakout_thresh[2] = -1.0f;
     }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) {
+      sf->use_square_only_thresh_high = BLOCK_4X4;
+      sf->use_square_only_thresh_low = BLOCK_SIZES;
+      if (is_720p_or_larger) {
+        sf->partition_search_breakout_thr.dist = (1 << 23);
+        sf->use_ml_partition_search_breakout = 0;
+      }
+    }
+#endif
   }
 
   if (speed >= 2) {
+    sf->use_square_only_thresh_high = BLOCK_4X4;
+    sf->use_square_only_thresh_low = BLOCK_SIZES;
     if (is_720p_or_larger) {
       sf->disable_split_mask =
           cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
@@ -118,6 +138,7 @@
       sf->adaptive_pred_interp_filter = 0;
       sf->partition_search_breakout_thr.dist = (1 << 24);
       sf->partition_search_breakout_thr.rate = 120;
+      sf->use_ml_partition_search_breakout = 0;
     } else {
       sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
       sf->partition_search_breakout_thr.dist = (1 << 22);
@@ -220,13 +241,14 @@
 
   if (speed >= 1) {
     sf->enable_tpl_model = 0;
-    sf->prune_ref_frame_for_rect_partitions = 0;
+    sf->ml_prune_rect_partition_threhold[1] = 200;
+    sf->ml_prune_rect_partition_threhold[2] = 200;
+    sf->ml_prune_rect_partition_threhold[3] = 200;
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+      sf->prune_ref_frame_for_rect_partitions = 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    sf->ml_prune_rect_partition_threhold[0] = -1;
-    sf->ml_prune_rect_partition_threhold[1] = -1;
-    sf->ml_prune_rect_partition_threhold[2] = -1;
-    sf->ml_prune_rect_partition_threhold[3] = -1;
-
     if (oxcf->pass == 2) {
       TWO_PASS *const twopass = &cpi->twopass;
       if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -289,6 +311,10 @@
     sf->recode_tolerance_low = 15;
     sf->recode_tolerance_high = 45;
     sf->enhanced_full_pixel_motion_search = 0;
+    sf->prune_ref_frame_for_rect_partitions = 0;
+    sf->ml_prune_rect_partition_threhold[1] = -1;
+    sf->ml_prune_rect_partition_threhold[2] = -1;
+    sf->ml_prune_rect_partition_threhold[3] = -1;
 
     if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
       for (i = 0; i < MAX_MESH_STEP; ++i) {
@@ -839,7 +865,8 @@
   sf->partition_search_type = SEARCH_PARTITION;
   sf->less_rectangular_check = 0;
   sf->use_square_partition_only = 0;
-  sf->use_square_only_threshold = BLOCK_SIZES;
+  sf->use_square_only_thresh_high = BLOCK_SIZES;
+  sf->use_square_only_thresh_low = BLOCK_4X4;
   sf->auto_min_max_partition_size = NOT_IN_USE;
   sf->rd_auto_partition_min_limit = BLOCK_4X4;
   sf->default_max_partition_size = BLOCK_64X64;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -331,14 +331,19 @@
   // rd than partition type split.
   int less_rectangular_check;
 
-  // Disable testing non square partitions. (eg 16x32)
+  // Disable testing non square partitions(eg 16x32) for block sizes larger than
+  // use_square_only_thresh_high or smaller than use_square_only_thresh_low.
   int use_square_partition_only;
-  BLOCK_SIZE use_square_only_threshold;
+  BLOCK_SIZE use_square_only_thresh_high;
+  BLOCK_SIZE use_square_only_thresh_low;
 
   // Prune reference frames for rectangular partitions.
   int prune_ref_frame_for_rect_partitions;
 
   // Threshold values used for ML based rectangular partition search pruning.
+  // If < 0, the feature is turned off.
+  // Higher values mean more aggressiveness to skip rectangular partition
+  // search that results in better encoding speed but worse coding performance.
   int ml_prune_rect_partition_threhold[4];
 
   // Sets min and max partition sizes for this 64x64 region based on the
@@ -497,6 +502,8 @@
 
   // Use ML-based partition search early breakout.
   int use_ml_partition_search_breakout;
+  // Higher values mean more aggressiveness for partition search breakout that
+  // results in better encoding  speed but worse compression performance.
   float ml_partition_search_breakout_thresh[3];
 
   // Machine-learning based partition search early termination