ref: eee904c9b91b62510169c52d5fc05580fd1f18c0
parent: 31596ee877473b952afe07675606a392d4cfd229
author: Jingning Han <[email protected]>
date: Thu Sep 18 09:37:20 EDT 2014
Adaptive mode search scheduling This commit enables an adaptive mode search order scheduling scheme in the rate-distortion optimization. It changes the compression performance by -0.433% and -0.420% for derf and stdhd respectively. It provides speed improvement for speed 3: bus CIF 1000 kbps 24590 b/f, 35.513 dB, 7864 ms -> 24696 b/f, 35.491 dB, 7408 ms (6% speed-up) stockholm 720p 1000 kbps 8983 b/f, 35.078 dB, 65698 ms -> 8962 b/f, 35.054 dB, 60298 ms (8%) old_town_cross 720p 1000 kbps 11804 b/f, 35.666 dB, 62492 ms -> 11778 b/f, 35.609 dB, 56040 ms (10%) blue_sky 1080p 1500 kbps 57173 b/f, 36.179 dB, 77879 ms -> 57199 b/f, 36.131 dB, 69821 ms (10%) pedestrian_area 1080p 2000 kbps 74241 b/f, 41.105 dB, 144031 ms -> 74271 b/f, 41.091 dB, 133614 ms (8%) Change-Id: Iaad28cbc99399030fc5f9951eb5aa7fa633f320e
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -991,8 +991,10 @@
// Default rd threshold factors for mode selection
for (i = 0; i < BLOCK_SIZES; ++i) {
- for (j = 0; j < MAX_MODES; ++j)
+ for (j = 0; j < MAX_MODES; ++j) {
cpi->rd.thresh_freq_fact[i][j] = 32;
+ cpi->rd.mode_map[i][j] = j;
+ }
}
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -51,6 +51,12 @@
THR_NEARMV,
THR_NEARA,
+ THR_NEARG,
+
+ THR_ZEROMV,
+ THR_ZEROG,
+ THR_ZEROA,
+
THR_COMP_NEARESTLA,
THR_COMP_NEARESTGA,
@@ -58,13 +64,9 @@
THR_COMP_NEARLA,
THR_COMP_NEWLA,
- THR_NEARG,
THR_COMP_NEARGA,
THR_COMP_NEWGA,
- THR_ZEROMV,
- THR_ZEROG,
- THR_ZEROA,
THR_COMP_ZEROLA,
THR_COMP_ZEROGA,
@@ -97,6 +99,8 @@
int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+
+ int mode_map[BLOCK_SIZES][MAX_MODES];
int64_t comp_pred_diff[REFERENCE_MODES];
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -91,6 +91,12 @@
{NEARMV, {LAST_FRAME, NONE}},
{NEARMV, {ALTREF_FRAME, NONE}},
+ {NEARMV, {GOLDEN_FRAME, NONE}},
+
+ {ZEROMV, {LAST_FRAME, NONE}},
+ {ZEROMV, {GOLDEN_FRAME, NONE}},
+ {ZEROMV, {ALTREF_FRAME, NONE}},
+
{NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
{NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
@@ -98,13 +104,9 @@
{NEARMV, {LAST_FRAME, ALTREF_FRAME}},
{NEWMV, {LAST_FRAME, ALTREF_FRAME}},
- {NEARMV, {GOLDEN_FRAME, NONE}},
{NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
- {ZEROMV, {LAST_FRAME, NONE}},
- {ZEROMV, {GOLDEN_FRAME, NONE}},
- {ZEROMV, {ALTREF_FRAME, NONE}},
{ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
{ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
@@ -2572,7 +2574,7 @@
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
- int mode_index, best_mode_index = -1;
+ int midx, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vp9_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
@@ -2590,8 +2592,11 @@
int mode_skip_start = cpi->sf.mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
+ int mode_threshold[MAX_MODES];
+ int *mode_map = rd_opt->mode_map[bsize];
const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
vp9_zero(best_mbmode);
+
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
@@ -2686,7 +2691,25 @@
mode_skip_mask[INTRA_FRAME] |=
~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
- for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
+ for (i = 0; i < MAX_MODES; ++i)
+ mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
+
+ midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0;
+ while (midx > 4) {
+ uint8_t end_pos = 0;
+ for (i = 5; i < midx; ++i) {
+ if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
+ uint8_t tmp = mode_map[i];
+ mode_map[i] = mode_map[i - 1];
+ mode_map[i - 1] = tmp;
+ end_pos = i;
+ }
+ }
+ midx = end_pos;
+ }
+
+ for (midx = 0; midx < MAX_MODES; ++midx) {
+ int mode_index = mode_map[midx];
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
int disable_skip = 0;
@@ -2706,7 +2729,7 @@
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
- if (mode_index == mode_skip_start && best_mode_index >= 0) {
+ if (midx == mode_skip_start && best_mode_index >= 0) {
switch (best_mbmode.ref_frame[0]) {
case INTRA_FRAME:
break;
@@ -2736,8 +2759,10 @@
continue;
// Test best rd so far against threshold for trying this mode.
- if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
- rd_thresh_freq_fact[mode_index]))
+ if (best_mode_skippable && cpi->sf.schedule_mode_search)
+ mode_threshold[mode_index] <<= 1;
+
+ if (best_rd < mode_threshold[mode_index])
continue;
if (cpi->sf.motion_field_mode_search) {
@@ -3129,7 +3154,8 @@
(cm->interp_filter == best_mbmode.interp_filter) ||
!is_inter_block(&best_mbmode));
- update_rd_thresh_fact(cpi, bsize, best_mode_index);
+ if (!cpi->rc.is_src_frame_alt_ref)
+ update_rd_thresh_fact(cpi, bsize, best_mode_index);
// macroblock modes
*mbmi = best_mbmode;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -95,9 +95,11 @@
: USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
} else {
sf->max_intra_bsize = BLOCK_32X32;
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
+ sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0;
}
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 1;
@@ -376,6 +378,7 @@
sf->use_fast_coef_updates = TWO_LOOP;
sf->use_fast_coef_costing = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
+ sf->schedule_mode_search = 0;
sf->use_nonrd_pick_mode = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -318,6 +318,8 @@
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
+ int schedule_mode_search;
+
// Allows sub 8x8 modes to use the prediction filter that was determined
// best for 8x8 mode. If set to 0 we always re check all the filters for
// sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter