ref: 01a37177d167f2f837ae6d2de03a17511d1b38c9
parent: 8b810c7a78cbaac715cc516973de2bfdbf4067f9
author: Jingning Han <[email protected]>
date: Tue Aug 20 10:34:17 EDT 2013
Refactor rd_pick_partition for parameter control This commit changes the partition search order of superblocks from {SPLIT, NONE, HORZ, VERT} to {NONE, SPLIT, HORZ, VERT} for consistency with that of sub8x8 partition search. It enable the use of early termination in partition search for all block sizes. For ped_area_1080p 50 frames coded at 4000 kbps, it makes the runtime goes down from 844305ms -> 818003ms (3% speed-up) at speed 0. This will further move towards making the in-search partition types configurable, hence unifying various speed-up approaches. Some speed 1 and 2 features are turned off during the refactoring process, including: disable_split_var_thresh using_small_partition_info Stricter constraints are applied to use_square_partition_only for right/bottom boundary blocks. Will bring back/refine these features subsequently. At this point, it makes derf set at speed 1 about 0.45% higher in compression performance, and 9% down in run-time. Change-Id: I3db9f9d1d1a0d6cbe2e50e49bd9eda1cf705f37c
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1671,29 +1671,19 @@
int i, pl;
BLOCK_SIZE_TYPE subsize;
int this_rate, sum_rate = 0, best_rate = INT_MAX;
- int64_t this_dist, sum_dist = 0, best_dist = INT_MAX;
+ int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
int64_t sum_rd = 0;
- int do_split = 1, do_rect = 1;
- // Override min_partition_size for edge blocks
- int force_horz_split = mi_row + (ms >> 1) >= cm->mi_rows;
- int force_vert_split = mi_col + (ms >> 1) >= cm->mi_cols;
- const int partition_none_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !force_horz_split &&
- !force_vert_split;
- const int partition_horz_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !cpi->sf.use_square_partition_only &&
- bsize >= BLOCK_8X8 &&
- !force_vert_split;
- const int partition_vert_allowed = (bsize <= cpi->sf.max_partition_size ||
- !cpi->sf.auto_min_max_partition_size) &&
- !cpi->sf.use_square_partition_only &&
- bsize >= BLOCK_8X8 &&
- !force_horz_split;
- int partition_split_done = 0;
+ int do_split = bsize >= BLOCK_8X8;
+ int do_rect = 1;
+ // Override skipping rectangular partition operations for edge blocks
+ const int force_horz_split = (mi_row + (ms >> 1) >= cm->mi_rows);
+ const int force_vert_split = (mi_col + (ms >> 1) >= cm->mi_cols);
+ int partition_none_allowed = !force_horz_split && !force_vert_split;
+ int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8;
+ int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8;
+ int partition_split_done = 0;
(void) *tp_orig;
if (bsize < BLOCK_8X8) {
@@ -1707,6 +1697,24 @@
}
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
+ // Determine partition types in search according to the speed features.
+ // The threshold set here has to be of square block size.
+ if (cpi->sf.auto_min_max_partition_size) {
+ partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
+ bsize >= cpi->sf.min_partition_size);
+ partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_horz_split);
+ partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+ bsize > cpi->sf.min_partition_size) ||
+ force_vert_split);
+ do_split &= bsize > cpi->sf.min_partition_size;
+ }
+ if (cpi->sf.use_square_partition_only) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
+ }
+
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
@@ -1719,48 +1727,75 @@
do_rect = 0;
}
}
+
+ // PARTITION_NONE
+ if (partition_none_allowed) {
+ pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize,
+ get_block_context(x, bsize), best_rd);
+ if (this_rate != INT_MAX) {
+ if (bsize >= BLOCK_8X8) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ this_rate += x->partition_cost[pl][PARTITION_NONE];
+ }
+ sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (sum_rd < best_rd) {
+ best_rate = this_rate;
+ best_dist = this_dist;
+ best_rd = sum_rd;
+ if (bsize >= BLOCK_8X8)
+ *(get_sb_partitioning(x, bsize)) = bsize;
+ }
+ }
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
+
// PARTITION_SPLIT
- if (do_split &&
- (!cpi->sf.auto_min_max_partition_size ||
- bsize > cpi->sf.min_partition_size)) {
- if (bsize > BLOCK_8X8) {
- subsize = get_subsize(bsize, PARTITION_SPLIT);
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
- int x_idx = (i & 1) * (ms >> 1);
- int y_idx = (i >> 1) * (ms >> 1);
+ sum_rd = 0;
+ // TODO(jingning): use the motion vectors given by the above search as
+ // the starting point of motion search in the following partition type check.
+ if (do_split) {
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ int x_idx = (i & 1) * (ms >> 1);
+ int y_idx = (i >> 1) * (ms >> 1);
- if ((mi_row + y_idx >= cm->mi_rows) ||
- (mi_col + x_idx >= cm->mi_cols))
- continue;
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
- *(get_sb_index(xd, subsize)) = i;
+ *(get_sb_index(xd, subsize)) = i;
- rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
- &this_rate, &this_dist, i != 3, best_rd - sum_rd);
+ rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+ &this_rate, &this_dist, i != 3, best_rd - sum_rd);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd && i == 4) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
}
- partition_split_done = 1;
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ if (sum_rd < best_rd && i == 4) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ do_rect &= !partition_none_allowed &&
+ cpi->sf.less_rectangular_check;
+ }
+ }
+ partition_split_done = 1;
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
x->fast_ms = 0;
@@ -1767,172 +1802,94 @@
x->pred_mv.as_int = 0;
x->subblock_ref = 0;
- // Use 4 subblocks' motion estimation results to speed up current
- // partition's checking.
if (partition_split_done &&
cpi->sf.using_small_partition_info) {
compute_fast_motion_search_level(cpi, bsize);
}
- if (!cpi->sf.auto_min_max_partition_size ||
- bsize <= cpi->sf.max_partition_size) {
- int larger_is_better = 0;
+ // PARTITION_HORZ
+ if (partition_horz_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- // PARTITION_NONE
- if (partition_none_allowed) {
- pick_sb_modes(cpi, mi_row, mi_col, &this_rate, &this_dist, bsize,
- get_block_context(x, bsize), best_rd);
- if (this_rate != INT_MAX) {
- if (bsize >= BLOCK_8X8) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- this_rate += x->partition_cost[pl][PARTITION_NONE];
- }
- sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
- if (sum_rd < best_rd || bsize == BLOCK_8X8) {
- best_rate = this_rate;
- best_dist = this_dist;
- best_rd = sum_rd;
- larger_is_better = 1;
- if (bsize >= BLOCK_8X8)
- *(get_sb_partitioning(x, bsize)) = bsize;
- }
+ if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) {
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
}
}
+ if (sum_rd < best_rd) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_HORZ];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rd = sum_rd;
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ *(get_sb_partitioning(x, bsize)) = subsize;
+ }
+ }
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
+ }
- if (bsize == BLOCK_8X8 && do_split) {
- sum_rate = 0; sum_dist = 0; sum_rd = 0;
+ // PARTITION_VERT
+ if (partition_vert_allowed && do_rect) {
+ subsize = get_subsize(bsize, PARTITION_VERT);
- subsize = get_subsize(bsize, PARTITION_SPLIT);
+ *(get_sb_index(xd, subsize)) = 0;
+ pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
+ get_block_context(x, subsize), best_rd);
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) {
+ update_state(cpi, get_block_context(x, subsize), subsize, 0);
+ encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
- int x_idx = (i & 1) * (ms >> 1);
- int y_idx = (i >> 1) * (ms >> 1);
-
- if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
- continue;
-
- *(get_sb_index(xd, subsize)) = i;
- rd_pick_partition(cpi, tp, mi_row + y_idx, mi_col + x_idx, subsize,
- &this_rate, &this_dist, i != 3, best_rd - sum_rd);
-
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd && i == 4) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ *(get_sb_index(xd, subsize)) = 1;
+ pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate,
+ &this_dist, subsize, get_block_context(x, subsize),
+ best_rd - sum_rd);
+ if (this_rate == INT_MAX) {
+ sum_rd = INT64_MAX;
+ } else {
+ sum_rate += this_rate;
+ sum_dist += this_dist;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- larger_is_better = 0;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
}
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
-
- if (do_rect &&
- !cpi->sf.use_square_partition_only &&
- (!cpi->sf.less_rectangular_check || !larger_is_better)) {
- // PARTITION_HORZ
- if (partition_horz_allowed) {
- subsize = get_subsize(bsize, PARTITION_HORZ);
- if (!cpi->sf.auto_min_max_partition_size || force_horz_split ||
- subsize >= cpi->sf.min_partition_size) {
- *(get_sb_index(xd, subsize)) = 0;
- pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-
- if (sum_rd < best_rd && mi_row + (ms >> 1) < cm->mi_rows) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
- *(get_sb_index(xd, subsize)) = 1;
- pick_sb_modes(cpi, mi_row + (ms >> 1), mi_col, &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_HORZ];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rd = sum_rd;
- best_rate = sum_rate;
- best_dist = sum_dist;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
- }
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- }
+ if (sum_rd < best_rd) {
+ set_partition_seg_context(cm, xd, mi_row, mi_col);
+ pl = partition_plane_context(xd, bsize);
+ sum_rate += x->partition_cost[pl][PARTITION_VERT];
+ sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (sum_rd < best_rd) {
+ best_rate = sum_rate;
+ best_dist = sum_dist;
+ best_rd = sum_rd;
+ *(get_sb_partitioning(x, bsize)) = subsize;
}
-
- // PARTITION_VERT
- if (partition_vert_allowed) {
- subsize = get_subsize(bsize, PARTITION_VERT);
- if (!cpi->sf.auto_min_max_partition_size || force_vert_split ||
- subsize >= cpi->sf.min_partition_size) {
- *(get_sb_index(xd, subsize)) = 0;
- pick_sb_modes(cpi, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
- get_block_context(x, subsize), best_rd);
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + (ms >> 1) < cm->mi_cols) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
-
- *(get_sb_index(xd, subsize)) = 1;
- pick_sb_modes(cpi, mi_row, mi_col + (ms >> 1), &this_rate,
- &this_dist, subsize, get_block_context(x, subsize),
- best_rd - sum_rd);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
- } else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
- }
- if (sum_rd < best_rd) {
- set_partition_seg_context(cm, xd, mi_row, mi_col);
- pl = partition_plane_context(xd, bsize);
- sum_rate += x->partition_cost[pl][PARTITION_VERT];
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
- *(get_sb_partitioning(x, bsize)) = subsize;
- }
- }
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
- }
- }
}
+ restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
+
+
*rate = best_rate;
*dist = best_dist;
- restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
-
- if (best_rate < INT_MAX && best_dist < INT_MAX && do_recon)
+ if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon)
encode_sb(cpi, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -793,7 +793,10 @@
sf->auto_min_max_partition_size = 1;
sf->auto_min_max_partition_interval = 1;
- sf->disable_split_var_thresh = 16;
+ // FIXME(jingning): temporarily turn off disable_split_var_thresh
+ // during refactoring process. will get this back after finishing
+ // the main framework of partition search type.
+ sf->disable_split_var_thresh = 0;
sf->disable_filter_search_var_thresh = 16;
}
if (speed == 2) {
@@ -821,7 +824,7 @@
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
- sf->using_small_partition_info = 1;
+ sf->using_small_partition_info = 0;
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
sf->auto_mv_step_size = 1;