ref: 65732c36a8c8f60907b62d24c097782a0a0d7e1c
parent: 015c43f0c1b0325632283f371a42974968b5655a
author: paulwilkins <[email protected]>
date: Thu May 5 07:37:04 EDT 2016
Fixed 8K two pass encoder crash. Bug found by Yunqing relating to the correction for size at 8K and above in get_twopass_worst_quality(). The basis for the correction was changed to the linear size relative to 1080P as a baseline and the adjustment has been clamped to prevent problems at extreme images sizes. For 1080P the results on our test sets were neutral but the low res and mid res sets saw a small gain (0.1%-0.2% average). I would also expect some gains on 4k and larger content where the previous correction was overly aggressive. Change-Id: I30b026b5f4535e9601e3178d738066459d19c8fb
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -45,7 +45,6 @@
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
-#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@@ -231,6 +230,13 @@
section->duration -= frame->duration;
}
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0 // 1920x1080
+static double get_linear_size_factor(const VP10_COMP *cpi) {
+ const double this_area = cpi->initial_width * cpi->initial_height;
+ return pow(this_area / BASE_SIZE, 0.5);
+}
+
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
@@ -1103,11 +1109,7 @@
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR 100.0
static int get_twopass_worst_quality(const VP10_COMP *cpi,
const double section_err,
double inactive_zone,
@@ -1126,11 +1128,21 @@
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+ double ediv_size_correction;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
BPER_MB_NORMBITS) / active_mbs;
-
int q;
+
+ // Larger image formats are expected to be a little harder to code
+ // relatively given the same prediction error score. This in part at
+ // least relates to the increased size and hence coding overheads of
+ // motion vectors. Some account of this is made through adjustment of
+ // the error divisor.
+ ediv_size_correction =
+ VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+ if (ediv_size_correction < 1.0)
+ ediv_size_correction = -(1.0 / ediv_size_correction);
+ ediv_size_correction *= 4.0;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -45,7 +45,6 @@
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
-#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@@ -237,6 +236,13 @@
section->duration -= frame->duration;
}
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0 // 1920x1080
+static double get_linear_size_factor(const VP9_COMP *cpi) {
+ const double this_area = cpi->initial_width * cpi->initial_height;
+ return pow(this_area / BASE_SIZE, 0.5);
+}
+
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
@@ -1241,11 +1247,7 @@
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR 100.0
static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double section_err,
double inactive_zone,
@@ -1267,10 +1269,9 @@
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+ double ediv_size_correction;
const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
BPER_MB_NORMBITS) / active_mbs;
-
int q;
int is_svc_upper_layer = 0;
@@ -1277,6 +1278,16 @@
if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
is_svc_upper_layer = 1;
+ // Larger image formats are expected to be a little harder to code
+ // relatively given the same prediction error score. This in part at
+ // least relates to the increased size and hence coding overheads of
+ // motion vectors. Some account of this is made through adjustment of
+ // the error divisor.
+ ediv_size_correction =
+ VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+ if (ediv_size_correction < 1.0)
+ ediv_size_correction = -(1.0 / ediv_size_correction);
+ ediv_size_correction *= 4.0;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.