ref: b39a599cefed4cc2459ef20ba5c198955bb66516
parent: cdb141dd948b043e2fe82d4c3b8fe587234a8d55
author: Marco <[email protected]>
date: Fri Jan 22 06:45:31 EST 2016
vp9 non-rd mode: Modification for detected skin areas. If a superblock contains alot of "skin" then force split of 64x64 partition, and make some adjustments in mode selection. This helps to reduce artifacts on moving face/skin areas at low bitrates. Little/no change in metrics: avgPSNR/SSIM down by ~0.12%. Small encoding time increase < 1%. Change-Id: Ic57f52148c3716f391419fab0530d916e4c1d186
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -137,6 +137,8 @@
// the visual quality at the boundary of moving color objects.
uint8_t color_sensitivity[2];
+ uint8_t sb_is_skin;
+
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -714,6 +714,10 @@
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
+ // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
+ // 5-20 for the 16x16 blocks.
+ force_split[0] = 0;
+
if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is
// that the temporal reference frame will always be of type LAST_FRAME.
@@ -768,6 +772,47 @@
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+ // Check if most of the superblock is skin content, and if so, force split
+ // to 32x32. Avoid checking superblocks on/near boundary for high resoln
+ // Note superblock may still pick 64X64 if y_sad is very small
+ // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
+ x->sb_is_skin = 0;
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && (low_res || (mi_col >= 8 &&
+ mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows))) {
+ int num_16x16_skin = 0;
+ int num_16x16_nonskin = 0;
+ uint8_t *ysignal = x->plane[0].src.buf;
+ uint8_t *usignal = x->plane[1].src.buf;
+ uint8_t *vsignal = x->plane[2].src.buf;
+ int spuv = x->plane[1].src.stride;
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ int is_skin = vp9_compute_skin_block(ysignal,
+ usignal,
+ vsignal,
+ sp,
+ spuv,
+ BLOCK_16X16);
+ num_16x16_skin += is_skin;
+ num_16x16_nonskin += (1 - is_skin);
+ if (num_16x16_nonskin > 3) {
+ // Exit loop if at least 4 of the 16x16 blocks are not skin.
+ i = 4;
+ j = 4;
+ }
+ ysignal += 16;
+ usignal += 8;
+ vsignal += 8;
+ }
+ ysignal += (sp << 4) - 64;
+ usignal += (spuv << 3) - 32;
+ vsignal += (spuv << 3) - 32;
+ }
+ if (num_16x16_skin > 12) {
+ x->sb_is_skin = 1;
+ force_split[0] = 1;
+ }
+ }
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
@@ -779,7 +824,9 @@
uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride);
- x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
+ // TODO(marpan): Investigate if we should lower this threshold if
+ // superblock is detected as skin.
+ x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
}
d = xd->plane[0].dst.buf;
@@ -818,9 +865,6 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
}
- // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
- // 5-20 for the 16x16 blocks.
- force_split[0] = 0;
// Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
// for splits.
for (i = 0; i < 4; i++) {
@@ -3629,6 +3673,7 @@
vp9_rd_cost_init(&dummy_rdc);
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
+ x->sb_is_skin = 0;
if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -812,7 +812,7 @@
mi->mv[0].as_mv.col > 64 ||
mi->mv[0].as_mv.col < -64)
motion_low = 0;
- if (x->encode_breakout > 0 && motion_low == 1) {
+ if (x->encode_breakout > 0 && motion_low == 1 && !x->sb_is_skin) {
// Set a maximum for threshold to avoid big PSNR loss in low bit rate
// case. Use extreme low threshold for static frames to limit
// skipping.
@@ -1585,7 +1585,8 @@
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
if (cpi->oxcf.speed >= 5 &&
- cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+ !x->sb_is_skin) {
// Bias against non-zero (above some threshold) motion for large blocks.
// This is temporary fix to avoid selection of large mv for big blocks.
if (frame_mv[this_mode][ref_frame].as_mv.row > 64 ||