ref: 3529526e114d34ba6be0fab94a9d36abb512bee4
parent: d64e328624e09cbc36e7077598bf0ff367dcdb4c
author: Jerome Jiang <[email protected]>
date: Mon Mar 4 10:51:22 EST 2019
vp9 svc: add simulcast mode when inter-layer pred is off. Force all upper spatial layers to be key frame if the base layer is key. Mode only works for inter-layer pred=off and non-flexible mode. Add flag to write out bitstream for each spatial layer in example encoder. Change-Id: I5db4543cf8697544ae49464f2157e692640d5256
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -34,6 +34,8 @@
#define OUTPUT_RC_STATS 1
+#define SIMULCAST_MODE 0
+
static const arg_def_t outputfile =
ARG_DEF("o", "output", 1, "Output filename");
static const arg_def_t skip_frames_arg =
@@ -749,7 +751,7 @@
}
}
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
const int frames_out, int *mismatch_seen) {
vpx_image_t enc_img, dec_img;
@@ -834,6 +836,14 @@
for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
unsigned int sl2;
uint64_t tot_size = 0;
+#if SIMULCAST_MODE
+ for (sl2 = 0; sl2 < sl; ++sl2) {
+ if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
+ }
+ vpx_video_writer_write_frame(outfile[sl],
+ (uint8_t *)(cx_pkt->data.frame.buf) + tot_size,
+ (size_t)(sizes[sl]), cx_pkt->data.frame.pts);
+#else
for (sl2 = 0; sl2 <= sl; ++sl2) {
if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
}
@@ -840,6 +850,7 @@
if (tot_size > 0)
vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf,
(size_t)(tot_size), cx_pkt->data.frame.pts);
+#endif // SIMULCAST_MODE
}
for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
@@ -924,7 +935,7 @@
#if CONFIG_INTERNAL_STATS
FILE *f = fopen("opsnr.stt", "a");
#endif
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
int mismatch_seen = 0;
vpx_codec_ctx_t decoder;
#endif
@@ -964,7 +975,7 @@
if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) !=
VPX_CODEC_OK)
die("Failed to initialize encoder\n");
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
if (vpx_codec_dec_init(
&decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0))
die("Failed to initialize decoder\n");
@@ -1163,7 +1174,7 @@
if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
++frames_received;
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
(unsigned int)cx_pkt->data.frame.sz, NULL, 0))
die_codec(&decoder, "Failed to decode frame.");
@@ -1178,7 +1189,7 @@
default: { break; }
}
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id);
// Don't look for mismatch on top spatial and top temporal layers as they
// are non reference frames.
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -22,6 +22,19 @@
namespace svc_test {
namespace {
+typedef enum {
+ // Inter-layer prediction is on on all frames.
+ INTER_LAYER_PRED_ON,
+ // Inter-layer prediction is off on all frames.
+ INTER_LAYER_PRED_OFF,
+ // Inter-layer prediction is off on non-key frames and non-sync frames.
+ INTER_LAYER_PRED_OFF_NONKEY,
+ // Inter-layer prediction is on on all frames, but constrained such
+ // that any layer S (> 0) can only predict from previous spatial
+ // layer S-1, from the same superframe.
+ INTER_LAYER_PRED_ON_CONSTRAINED
+} INTER_LAYER_PRED;
+
class DatarateOnePassCbrSvc : public OnePassCbrSvc {
public:
explicit DatarateOnePassCbrSvc(const ::libvpx_test::CodecFactory *codec)
@@ -989,6 +1002,8 @@
// pass CBR SVC: 3 spatial layers and 3 temporal layers. Run CIF clip with 1
// thread.
TEST_P(DatarateOnePassCbrSvcInterLayerPredSingleBR, OnePassCbrSvc3SL3TL) {
+ // Disable test for inter-layer pred off for now since simulcast_mode fails.
+ if (inter_layer_pred_mode_ == INTER_LAYER_PRED_OFF) return;
SetSvcConfig(3, 3);
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
--- a/test/svc_end_to_end_test.cc
+++ b/test/svc_end_to_end_test.cc
@@ -21,6 +21,19 @@
namespace svc_test {
namespace {
+typedef enum {
+ // Inter-layer prediction is on on all frames.
+ INTER_LAYER_PRED_ON,
+ // Inter-layer prediction is off on all frames.
+ INTER_LAYER_PRED_OFF,
+ // Inter-layer prediction is off on non-key frames and non-sync frames.
+ INTER_LAYER_PRED_OFF_NONKEY,
+ // Inter-layer prediction is on on all frames, but constrained such
+ // that any layer S (> 0) can only predict from previous spatial
+ // layer S-1, from the same superframe.
+ INTER_LAYER_PRED_ON_CONSTRAINED
+} INTER_LAYER_PRED;
+
class ScalePartitionOnePassCbrSvc
: public OnePassCbrSvc,
public ::testing::TestWithParam<const ::libvpx_test::CodecFactory *> {
@@ -130,7 +143,10 @@
current_video_frame_ = video->frame();
PreEncodeFrameHookSetup(video, encoder);
if (video->frame() == 0) {
- encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_);
+ // Do not turn off inter-layer pred completely because simulcast mode
+ // fails.
+ if (inter_layer_pred_mode_ != INTER_LAYER_PRED_OFF)
+ encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_);
encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
if (intra_only_test_)
// Decoder sets the color_space for Intra-only frames
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3093,7 +3093,11 @@
}
void vp9_update_reference_frames(VP9_COMP *cpi) {
- update_ref_frames(cpi);
+ if (cpi->svc.simulcast_mode && is_one_pass_cbr_svc(cpi) &&
+ cpi->common.frame_type == KEY_FRAME)
+ vp9_svc_update_ref_frame_key_simulcast(cpi);
+ else
+ update_ref_frames(cpi);
#if CONFIG_VP9_TEMPORAL_DENOISING
vp9_denoiser_update_ref_frame(cpi);
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2209,6 +2209,13 @@
}
}
+ if (svc->simulcast_mode && svc->spatial_layer_id > 0 &&
+ svc->layer_context[layer].is_key_frame == 1) {
+ cm->frame_type = KEY_FRAME;
+ cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ }
+
// Check if superframe contains a sync layer request.
vp9_svc_check_spatial_layer_sync(cpi);
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -54,6 +54,7 @@
svc->superframe_has_layer_sync = 0;
svc->use_set_ref_frame_config = 0;
svc->num_encoded_top_layer = 0;
+ svc->simulcast_mode = 0;
for (i = 0; i < REF_FRAMES; ++i) {
svc->fb_idx_spatial_layer_id[i] = -1;
@@ -474,6 +475,17 @@
}
}
+// Never refresh any reference frame buffers on top temporal layers in
+// simulcast mode, which has interlayer prediction disabled.
+static void non_reference_frame_simulcast(VP9_COMP *const cpi) {
+ if (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1 &&
+ cpi->svc.temporal_layer_id > 0) {
+ cpi->ext_refresh_last_frame = 0;
+ cpi->ext_refresh_golden_frame = 0;
+ cpi->ext_refresh_alt_ref_frame = 0;
+ }
+}
+
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
// scheme.
@@ -578,6 +590,8 @@
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+ if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
reset_fb_idx_unused(cpi);
}
@@ -639,6 +653,8 @@
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+ if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
reset_fb_idx_unused(cpi);
}
@@ -673,6 +689,8 @@
cpi->gld_fb_idx = 0;
}
+ if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
reset_fb_idx_unused(cpi);
}
@@ -732,6 +750,15 @@
SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
svc->skip_enhancement_layer = 0;
+
+ if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF &&
+ svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 &&
+ !(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->use_set_ref_frame_config))
+ svc->simulcast_mode = 1;
+ else
+ svc->simulcast_mode = 0;
+
if (svc->number_spatial_layers > 1) {
svc->use_base_mv = 1;
svc->use_partition_reuse = 1;
@@ -1184,6 +1211,44 @@
}
}
+void vp9_svc_update_ref_frame_key_simulcast(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ BufferPool *const pool = cm->buffer_pool;
+ const int sl_id = svc->spatial_layer_id;
+ const int tl_id = svc->temporal_layer_id;
+ const int num_sl = svc->number_spatial_layers;
+ // SL0:
+ // 3 spatial layers: update slot 0 and 3
+ // 2 spatial layers: update slot 0 and 2
+ // 1 spatial layer: update slot 0 and 1
+ // SL1:
+ // 3 spatial layers: update slot 1, 4, and 6
+ // 2 spatial layers: update slot 1, 3, and 6
+ // slot 6 is for golden frame long temporal prediction.
+ // SL2: update slot 2, 5 and 7
+ // slot 7 is for golden frame long temporal prediction.
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[sl_id], cm->new_fb_idx);
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[num_sl + sl_id],
+ cm->new_fb_idx);
+ svc->fb_idx_spatial_layer_id[sl_id] = sl_id;
+ svc->fb_idx_temporal_layer_id[sl_id] = tl_id;
+ svc->fb_idx_spatial_layer_id[num_sl + sl_id] = sl_id;
+ svc->fb_idx_temporal_layer_id[num_sl + sl_id] = tl_id;
+ // Update slots for golden frame long temporal prediction.
+ if (svc->use_gf_temporal_ref_current_layer) {
+ const int index = num_sl == 3 ? sl_id - 1 : sl_id;
+ const int lt_buffer_index = svc->buffer_gf_temporal_ref[index].idx;
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[lt_buffer_index],
+ cm->new_fb_idx);
+ svc->fb_idx_spatial_layer_id[lt_buffer_index] = sl_id;
+ svc->fb_idx_temporal_layer_id[lt_buffer_index] = tl_id;
+ }
+
+ vp9_copy_flags_ref_update_idx(cpi);
+ vp9_svc_update_ref_frame_buffer_idx(cpi);
+}
+
void vp9_svc_update_ref_frame(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
SVC *const svc = &cpi->svc;
@@ -1192,7 +1257,7 @@
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
svc->use_set_ref_frame_config) {
vp9_svc_update_ref_frame_bypass_mode(cpi);
- } else if (cm->frame_type == KEY_FRAME) {
+ } else if (cm->frame_type == KEY_FRAME && !svc->simulcast_mode) {
// Keep track of frame index for each reference frame.
int i;
// On key frame update all reference frame slots.
@@ -1203,7 +1268,7 @@
if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx)
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
}
- } else {
+ } else if (cm->frame_type != KEY_FRAME) {
if (cpi->refresh_last_frame) {
svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
@@ -1236,6 +1301,7 @@
// (to level closer to worst_quality) if the overshoot is significant.
// Reset it for all temporal layers on base spatial layer.
if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_mode == VPX_CBR &&
+ !svc->simulcast_mode &&
rc->projected_frame_size > 3 * rc->avg_frame_bandwidth) {
int tl;
rc->avg_frame_qindex[INTER_FRAME] =
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -189,6 +189,9 @@
int64_t time_stamp_prev[VPX_SS_MAX_LAYERS];
int num_encoded_top_layer;
+
+ // Every spatial layer on a superframe whose base is key is key too.
+ int simulcast_mode;
} SVC;
struct VP9_COMP;
@@ -257,6 +260,8 @@
void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi);
void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi);
+
+void vp9_svc_update_ref_frame_key_simulcast(struct VP9_COMP *const cpi);
void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi);