ref: 19941fcc116a0b4c31de25ea330b63ea4d0f4912
parent: 6753efd235230c49c42387a08f741b144de0701d
author: sdeng <[email protected]>
date: Mon Mar 18 07:26:06 EDT 2019
Use reconstructed variance in rd_variance_adjustment to replace the variance from .dst which is the prediction buffer in inter mode. Only enable it in tune-content-film mode at the moment. Change-Id: I647b4a524a0849fda42541887ebc34091f152073
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -77,6 +77,7 @@
int use_fast_coef_costing;
const scan_order *so;
uint8_t skippable;
+ struct buf_2d *this_recon;
};
#define LAST_NEW_MV_INDEX 6
@@ -459,6 +460,66 @@
return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;
}
+// Copy all visible 4x4s in the transform block.
+static void copy_block_visible(const MACROBLOCKD *xd,
+ const struct macroblockd_plane *const pd,
+ const uint8_t *src, const int src_stride,
+ uint8_t *dst, const int dst_stride, int blk_row,
+ int blk_col, const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize) {
+ const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
+ const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
+ int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
+ pd->subsampling_x, blk_col);
+ int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
+ pd->subsampling_y, blk_row);
+ const int is_highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+ if (tx_bsize == BLOCK_4X4 ||
+ (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
+ const int w = tx_4x4_w << 2;
+ const int h = tx_4x4_h << 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_highbd) {
+ vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), src_stride,
+ CONVERT_TO_SHORTPTR(dst), dst_stride, NULL, 0, 0,
+ 0, 0, w, h, xd->bd);
+ } else {
+#endif
+ vpx_convolve_copy(src, src_stride, dst, dst_stride, NULL, 0, 0, 0, 0, w,
+ h);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ } else {
+ int r, c;
+ int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
+ int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
+ // if we are in the unrestricted motion border.
+ for (r = 0; r < max_r; ++r) {
+ // Skip visiting the sub blocks that are wholly within the UMV.
+ for (c = 0; c < max_c; ++c) {
+ const uint8_t *src_ptr = src + r * src_stride * 4 + c * 4;
+ uint8_t *dst_ptr = dst + r * dst_stride * 4 + c * 4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_highbd) {
+ vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+ CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+ NULL, 0, 0, 0, 0, 4, 4, xd->bd);
+ } else {
+#endif
+ vpx_convolve_copy(src_ptr, src_stride, dst_ptr, dst_stride, NULL, 0,
+ 0, 0, 0, 4, 4);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ }
+ }
+ }
+ (void)is_highbd;
+}
+
// Compute the pixel domain sum square error on all visible 4x4s in the
// transform block.
static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd,
@@ -539,13 +600,13 @@
static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
BLOCK_SIZE plane_bsize, int block, int blk_row,
int blk_col, TX_SIZE tx_size, int64_t *out_dist,
- int64_t *out_sse) {
+ int64_t *out_sse, struct buf_2d *out_recon) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int eob = p->eobs[block];
- if (x->block_tx_domain && eob) {
+ if (!out_recon && x->block_tx_domain && eob) {
const int ss_txfrm_size = tx_size << 1;
int64_t this_sse;
const int shift = tx_size == TX_32X32 ? 0 : 2;
@@ -584,6 +645,8 @@
const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
const uint8_t *src = &p->src.buf[src_idx];
const uint8_t *dst = &pd->dst.buf[dst_idx];
+ uint8_t *out_recon_ptr = 0;
+
const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
unsigned int tmp;
@@ -590,6 +653,13 @@
tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
blk_col, plane_bsize, tx_bsize);
*out_sse = (int64_t)tmp * 16;
+ if (out_recon) {
+ const int out_recon_idx = 4 * (blk_row * out_recon->stride + blk_col);
+ out_recon_ptr = &out_recon->buf[out_recon_idx];
+ copy_block_visible(xd, pd, dst, dst_stride, out_recon_ptr,
+ out_recon->stride, blk_row, blk_col, plane_bsize,
+ tx_bsize);
+ }
if (eob) {
#if CONFIG_VP9_HIGHBITDEPTH
@@ -644,6 +714,10 @@
tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col,
plane_bsize, tx_bsize);
+ if (out_recon) {
+ copy_block_visible(xd, pd, recon, 32, out_recon_ptr, out_recon->stride,
+ blk_row, blk_col, plane_bsize, tx_bsize);
+ }
}
*out_dist = (int64_t)tmp * 16;
@@ -668,6 +742,11 @@
int64_t sse;
const int coeff_ctx =
combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
+ struct buf_2d *recon = args->this_recon;
+ const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int dst_stride = pd->dst.stride;
+ const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
if (args->exit_early) return;
@@ -676,18 +755,19 @@
args->t_left, &mi->skip };
vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
&intra_arg);
+ if (recon) {
+ uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)];
+ copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride,
+ blk_row, blk_col, plane_bsize, tx_bsize);
+ }
if (x->block_tx_domain) {
dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
- tx_size, &dist, &sse);
+ tx_size, &dist, &sse, /*recon =*/0);
} else {
- const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const struct macroblock_plane *const p = &x->plane[plane];
- const struct macroblockd_plane *const pd = &xd->plane[plane];
const int src_stride = p->src.stride;
- const int dst_stride = pd->dst.stride;
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
- const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
unsigned int tmp;
sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col,
@@ -705,13 +785,15 @@
int skip_txfm_flag = SKIP_TXFM_NONE;
if (max_txsize_lookup[plane_bsize] == tx_size)
skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))];
- if (skip_txfm_flag == SKIP_TXFM_NONE) {
+
+ if (skip_txfm_flag == SKIP_TXFM_NONE ||
+ (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
if (x->block_qcoeff_opt)
vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
- tx_size, &dist, &sse);
+ tx_size, &dist, &sse, recon);
} else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
@@ -738,6 +820,11 @@
x->plane[plane].eobs[block] = 0;
sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
dist = sse;
+ if (recon) {
+ uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)];
+ copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride,
+ blk_row, blk_col, plane_bsize, tx_bsize);
+ }
}
}
@@ -778,7 +865,8 @@
static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
- TX_SIZE tx_size, int use_fast_coef_costing) {
+ TX_SIZE tx_size, int use_fast_coef_costing,
+ struct buf_2d *recon) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
struct rdcost_block_args args;
@@ -788,6 +876,7 @@
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_costing;
args.skippable = 1;
+ args.this_recon = recon;
if (plane == 0) xd->mi[0]->tx_size = tx_size;
@@ -812,7 +901,8 @@
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip, int64_t *sse,
- int64_t ref_best_rd, BLOCK_SIZE bs) {
+ int64_t ref_best_rd, BLOCK_SIZE bs,
+ struct buf_2d *recon) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -822,13 +912,13 @@
mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
- mi->tx_size, cpi->sf.use_fast_coef_costing);
+ mi->tx_size, cpi->sf.use_fast_coef_costing, recon);
}
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip,
int64_t *psse, int64_t ref_best_rd,
- BLOCK_SIZE bs) {
+ BLOCK_SIZE bs, struct buf_2d *recon) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -846,6 +936,20 @@
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_size_ctx = get_tx_size_context(xd);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, recon_buf16[TX_SIZES][64 * 64]);
+ uint8_t *recon_buf[TX_SIZES];
+ for (n = 0; n < TX_SIZES; ++n) {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ recon_buf[n] = CONVERT_TO_BYTEPTR(recon_buf16[n]);
+ } else {
+ recon_buf[n] = (uint8_t *)recon_buf16[n];
+ }
+ }
+#else
+ DECLARE_ALIGNED(16, uint8_t, recon_buf[TX_SIZES][64 * 64]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
@@ -863,8 +967,16 @@
for (n = start_tx; n >= end_tx; n--) {
const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
- txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
+ if (recon) {
+ struct buf_2d this_recon;
+ this_recon.buf = recon_buf[n];
+ this_recon.stride = recon->stride;
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs,
+ n, cpi->sf.use_fast_coef_costing, &this_recon);
+ } else {
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs,
+ n, cpi->sf.use_fast_coef_costing, 0);
+ }
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
r[n][1] += r_tx_size;
@@ -906,11 +1018,25 @@
*rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
*skip = s[mi->tx_size];
*psse = sse[mi->tx_size];
+ if (recon) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ memcpy(CONVERT_TO_SHORTPTR(recon->buf),
+ CONVERT_TO_SHORTPTR(recon_buf[mi->tx_size]),
+ 64 * 64 * sizeof(uint16_t));
+ } else {
+#endif
+ memcpy(recon->buf, recon_buf[mi->tx_size], 64 * 64);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ }
}
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip, int64_t *psse,
- BLOCK_SIZE bs, int64_t ref_best_rd) {
+ BLOCK_SIZE bs, int64_t ref_best_rd,
+ struct buf_2d *recon) {
MACROBLOCKD *xd = &x->e_mbd;
int64_t sse;
int64_t *ret_sse = psse ? psse : &sse;
@@ -919,10 +1045,10 @@
if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
- bs);
+ bs, recon);
} else {
choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
- bs);
+ bs, recon);
}
}
@@ -1264,7 +1390,7 @@
mic->mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
- bsize, best_rd);
+ bsize, best_rd, /*recon = */ 0);
if (this_rate_tokenonly == INT_MAX) continue;
@@ -1316,7 +1442,8 @@
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
- plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
+ plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing,
+ /*recon = */ 0);
if (pnrate == INT_MAX) {
is_cost_valid = 0;
break;
@@ -2586,8 +2713,8 @@
static int64_t handle_inter_mode(
VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
- int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row,
- int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
+ struct buf_2d *recon, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES],
+ int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse,
const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) {
@@ -2900,7 +3027,7 @@
// Y cost and distortion
vp9_subtract_plane(x, bsize, 0);
super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
- ref_best_rd);
+ ref_best_rd, recon);
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -3004,6 +3131,7 @@
static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int64_t *this_rd,
+ struct buf_2d *recon,
MV_REFERENCE_FRAME ref_frame,
unsigned int source_variance) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3021,16 +3149,15 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- rec_variance =
- vp9_high_get_sby_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
+ rec_variance = vp9_high_get_sby_variance(cpi, recon, bsize, xd->bd);
src_variance =
vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd);
} else {
- rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize);
+ rec_variance = vp9_get_sby_variance(cpi, recon, bsize);
src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize);
}
#else
- rec_variance = vp9_get_sby_variance(cpi, &xd->plane[0].dst, bsize);
+ rec_variance = vp9_get_sby_variance(cpi, recon, bsize);
src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -3062,6 +3189,8 @@
if (bsize > BLOCK_16X16) *this_rd *= 2;
}
}
+
+ (void)xd;
}
// Do we have an internal image edge (e.g. formatting bars).
@@ -3192,6 +3321,20 @@
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+ struct buf_2d *recon;
+ struct buf_2d recon_buf;
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, recon16[64 * 64]);
+ recon_buf.buf = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH
+ ? CONVERT_TO_BYTEPTR(recon16)
+ : (uint8_t *)recon16;
+#else
+ DECLARE_ALIGNED(16, uint8_t, recon8[64 * 64]);
+ recon_buf.buf = recon8;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ recon_buf.stride = 64;
+ recon = cpi->oxcf.content == VP9E_CONTENT_FILM ? &recon_buf : 0;
+
vp9_zero(best_mbmode);
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -3506,7 +3649,7 @@
struct macroblockd_plane *const pd = &xd->plane[1];
memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
- best_rd);
+ best_rd, recon);
if (rate_y == INT_MAX) continue;
uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x]
@@ -3529,7 +3672,7 @@
} else {
this_rd = handle_inter_mode(
cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
- &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
+ recon, &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
single_inter_filter, single_skippable, &total_sse, best_rd,
&mask_filter, filter_cache);
if (this_rd == INT64_MAX) continue;
@@ -3584,8 +3727,10 @@
// Apply an adjustment to the rd value based on the similarity of the
// source variance and reconstructed variance.
- rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame,
- x->source_variance);
+ if (recon) {
+ rd_variance_adjustment(cpi, x, bsize, &this_rd, recon, ref_frame,
+ x->source_variance);
+ }
if (ref_frame == INTRA_FRAME) {
// Keep record of best intra rd