ref: ba70f1601108c537ffb48b4f0e63ccd5c7af25ef
parent: bac9beff7230293afc522b8a5d0dc85f04842c78
author: Yunqing Wang <[email protected]>
date: Tue Aug 12 10:36:42 EDT 2014
Add early termination in transform size search In the full-rd transform size search, we go through all transform sizes to choose the one with best rd score. In this patch, an early termination is added to stop the search once we see that the smaller size won't give better rd score than the larger size. Also, the search starts from largest transform size, then goes down to smallest size. A speed feature tx_size_search_breakout is added, which is turned off at speed 0, and on for other speeds. The transform size search is turned on at speed 1. Borg test results: 1. At speed 1, derf set: psnr gain: 0.618%, ssim gain: 0.377%; stdhd set: psnr gain: 0.594%, ssim gain: 0.162%; No noticeable speed change. 3. At speed 2, derf set: psnr loss: 0.157%, ssim loss: 0.175%; stdhd set: psnr loss: 0.090%, ssim loss: 0.101%; speed gain: ~4%. Change-Id: I22535cd2017b5e54f2a62bb6a38231aea4268b3f
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -458,11 +458,11 @@
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX},
{INT64_MAX, INT64_MAX}};
- TX_SIZE n, m;
+ int n, m;
int s0, s1;
const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
int64_t best_rd = INT64_MAX;
- TX_SIZE best_tx = TX_4X4;
+ TX_SIZE best_tx = max_tx_size;
const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
assert(skip_prob > 0);
@@ -469,7 +469,7 @@
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
- for (n = TX_4X4; n <= max_tx_size; n++) {
+ for (n = max_tx_size; n >= 0; n--) {
txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
&sse[n], ref_best_rd, 0, bs, n,
cpi->sf.use_fast_coef_costing);
@@ -490,6 +490,13 @@
rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd[n][1] == INT64_MAX ||
+ (n < max_tx_size && rd[n][1] > rd[n + 1][1]) ||
+ s[n] == 1))
+ break;
if (rd[n][1] < best_rd) {
best_tx = n;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -59,8 +59,6 @@
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
- sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
- : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -80,9 +78,14 @@
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+
+ sf->tx_size_search_breakout = 1;
}
if (speed >= 2) {
+ sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+ : USE_LARGESTALL;
+
if (MIN(cm->width, cm->height) >= 720) {
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
@@ -387,6 +390,7 @@
// Recode loop tolerence %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
+ sf->tx_size_search_breakout = 0;
switch (oxcf->mode) {
case ONE_PASS_BEST:
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -374,6 +374,10 @@
// default interp filter choice
INTERP_FILTER default_interp_filter;
+
+ // Early termination in transform size search, which only applies while
+ // tx_size_search_method is USE_FULL_RD.
+ int tx_size_search_breakout;
} SPEED_FEATURES;
struct VP9_COMP;