ref: 9bd2bde10dfbe65224a73fd422a49a6c8e6ba078
parent: 4ddfa331c4f5730e1fbcf561d91602c6806ac5ae
author: Hui Su <[email protected]>
date: Tue Sep 4 10:44:02 EDT 2018
Enable rectangular partition search for speed 1 This patch enables rectangular partition search on speed 1. The encoding speed loss is reduced thanks to recently added speed features. This only affects speed 1 low bit-depth encoding. Coding gains: avg_psnr ovr_psnr ssim lowres 0.577% 0.621% 0.665% midres 1.147% 1.215% 1.148% hdres 0.758% 0.790% 0.769% Tested encoding speed on 15 midres and 15 hdres clips, average speed loss: QP=30 QP=40 QP=50 midres 4.43% 3.72% -1.05% hdres 4.41% 5.65% 3.77% Change-Id: Ifc0712becccc69f7498796359ff12dbfa63fd7b3
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3546,7 +3546,8 @@
}
if (cpi->sf.use_square_partition_only &&
- bsize > cpi->sf.use_square_only_threshold) {
+ (bsize > cpi->sf.use_square_only_thresh_high ||
+ bsize < cpi->sf.use_square_only_thresh_low)) {
if (cpi->use_svc) {
if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
partition_horz_allowed &= force_horz_split;
@@ -3839,9 +3840,9 @@
} else {
// skip rectangular partition test when larger block size
// gives better rd cost
- if ((cpi->sf.less_rectangular_check) &&
- ((bsize > cpi->sf.use_square_only_threshold) ||
- (best_rdc.dist < dist_breakout_thr)))
+ if (cpi->sf.less_rectangular_check &&
+ (bsize > cpi->sf.use_square_only_thresh_high ||
+ best_rdc.dist < dist_breakout_thr))
do_rect &= !partition_none_allowed;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -3921,8 +3922,8 @@
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
- if ((cpi->sf.less_rectangular_check) &&
- (bsize > cpi->sf.use_square_only_threshold))
+ if (cpi->sf.less_rectangular_check &&
+ bsize > cpi->sf.use_square_only_thresh_high)
do_rect = 0;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -70,7 +70,8 @@
// speed 0 features
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
- sf->use_square_only_threshold = BLOCK_SIZES;
+ sf->use_square_only_thresh_high = BLOCK_SIZES;
+ sf->use_square_only_thresh_low = BLOCK_4X4;
if (is_480p_or_larger) {
// Currently, the machine-learning based partition search early termination
@@ -77,7 +78,7 @@
// is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
sf->ml_partition_search_early_termination = 1;
} else {
- sf->use_square_only_threshold = BLOCK_32X32;
+ sf->use_square_only_thresh_high = BLOCK_32X32;
}
if (!is_1080p_or_larger) {
@@ -95,23 +96,42 @@
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
- sf->use_square_only_threshold = BLOCK_4X4;
-
+ sf->use_ml_partition_search_breakout = 1;
+ if (is_480p_or_larger)
+ sf->use_square_only_thresh_high = BLOCK_64X64;
+ else
+ sf->use_square_only_thresh_high = BLOCK_32X32;
+ sf->use_square_only_thresh_low = BLOCK_16X16;
if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- sf->partition_search_breakout_thr.dist = (1 << 23);
- sf->use_ml_partition_search_breakout = 0;
+ sf->partition_search_breakout_thr.dist = (1 << 22);
+ sf->ml_partition_search_breakout_thresh[0] = -5.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -5.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -9.0f;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 21);
- sf->ml_partition_search_breakout_thresh[0] = 0.0f;
- sf->ml_partition_search_breakout_thresh[1] = 0.0f;
- sf->ml_partition_search_breakout_thresh[2] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[0] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -1.0f;
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) {
+ sf->use_square_only_thresh_high = BLOCK_4X4;
+ sf->use_square_only_thresh_low = BLOCK_SIZES;
+ if (is_720p_or_larger) {
+ sf->partition_search_breakout_thr.dist = (1 << 23);
+ sf->use_ml_partition_search_breakout = 0;
+ }
+ }
+#endif
}
if (speed >= 2) {
+ sf->use_square_only_thresh_high = BLOCK_4X4;
+ sf->use_square_only_thresh_low = BLOCK_SIZES;
if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
@@ -118,6 +138,7 @@
sf->adaptive_pred_interp_filter = 0;
sf->partition_search_breakout_thr.dist = (1 << 24);
sf->partition_search_breakout_thr.rate = 120;
+ sf->use_ml_partition_search_breakout = 0;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->partition_search_breakout_thr.dist = (1 << 22);
@@ -220,13 +241,14 @@
if (speed >= 1) {
sf->enable_tpl_model = 0;
- sf->prune_ref_frame_for_rect_partitions = 0;
+ sf->ml_prune_rect_partition_threhold[1] = 200;
+ sf->ml_prune_rect_partition_threhold[2] = 200;
+ sf->ml_prune_rect_partition_threhold[3] = 200;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->prune_ref_frame_for_rect_partitions = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
- sf->ml_prune_rect_partition_threhold[0] = -1;
- sf->ml_prune_rect_partition_threhold[1] = -1;
- sf->ml_prune_rect_partition_threhold[2] = -1;
- sf->ml_prune_rect_partition_threhold[3] = -1;
-
if (oxcf->pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -289,6 +311,10 @@
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 45;
sf->enhanced_full_pixel_motion_search = 0;
+ sf->prune_ref_frame_for_rect_partitions = 0;
+ sf->ml_prune_rect_partition_threhold[1] = -1;
+ sf->ml_prune_rect_partition_threhold[2] = -1;
+ sf->ml_prune_rect_partition_threhold[3] = -1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
@@ -839,7 +865,8 @@
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
- sf->use_square_only_threshold = BLOCK_SIZES;
+ sf->use_square_only_thresh_high = BLOCK_SIZES;
+ sf->use_square_only_thresh_low = BLOCK_4X4;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -331,14 +331,19 @@
// rd than partition type split.
int less_rectangular_check;
- // Disable testing non square partitions. (eg 16x32)
+ // Disable testing non square partitions(eg 16x32) for block sizes larger than
+ // use_square_only_thresh_high or smaller than use_square_only_thresh_low.
int use_square_partition_only;
- BLOCK_SIZE use_square_only_threshold;
+ BLOCK_SIZE use_square_only_thresh_high;
+ BLOCK_SIZE use_square_only_thresh_low;
// Prune reference frames for rectangular partitions.
int prune_ref_frame_for_rect_partitions;
// Threshold values used for ML based rectangular partition search pruning.
+ // If < 0, the feature is turned off.
+ // Higher values mean more aggressiveness to skip rectangular partition
+ // search that results in better encoding speed but worse coding performance.
int ml_prune_rect_partition_threhold[4];
// Sets min and max partition sizes for this 64x64 region based on the
@@ -497,6 +502,8 @@
// Use ML-based partition search early breakout.
int use_ml_partition_search_breakout;
+ // Higher values mean more aggressiveness for partition search breakout that
+ // results in better encoding speed but worse compression performance.
float ml_partition_search_breakout_thresh[3];
// Machine-learning based partition search early termination