ref: c139b81a13f680340dd874c205cba40a7233d388
parent: ebf7466cd8b884fd29be42ebe670317f5a7ca04d
author: Marco <[email protected]>
date: Thu May 21 12:15:37 EDT 2015
Vidyo patch: Rate control for SVC, 1 pass CBR mode. -Make Rate control work for SVC 1 pass CBR mode. -Added temporal layering mode. -Fixed bug in non-rd variance partition. -Modified/updated the sample encoders (vp9_spatial_svc_encoder, vpx_temporal_svc_encoder). -Added datarate unittest(s) for 1 pass CBR SVC. Change-Id: Ie94b1b68a56ea1267b5087c625e5df04def2ee48
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -14,11 +14,13 @@
* that benefit from a scalable bitstream.
*/
+#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "../args.h"
#include "../tools_common.h"
#include "../video_writer.h"
@@ -27,11 +29,18 @@
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "../vpxstats.h"
+#define OUTPUT_RC_STATS 1
static const arg_def_t skip_frames_arg =
ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t threads_arg =
+ ARG_DEF("th", "threads", 1, "number of threads to use");
+#if OUTPUT_RC_STATS
+static const arg_def_t output_rc_stats_arg =
+ ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
+#endif
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
@@ -42,6 +51,9 @@
ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
static const arg_def_t temporal_layers_arg =
ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
+static const arg_def_t temporal_layering_mode_arg =
+ ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
+ "VP9E_TEMPORAL_LAYERING_MODE");
static const arg_def_t kf_dist_arg =
ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
@@ -65,6 +77,8 @@
"generating any outputs");
static const arg_def_t rc_end_usage_arg =
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
+static const arg_def_t speed_arg =
+ ARG_DEF("sp", "speed", 1, "speed configuration");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@@ -85,10 +99,16 @@
&timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
- &max_bitrate_arg, &temporal_layers_arg, &lag_in_frame_arg,
+ &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
+ &lag_in_frame_arg, &threads_arg,
+#if OUTPUT_RC_STATS
+ &output_rc_stats_arg,
+#endif
+
#if CONFIG_VP9_HIGHBITDEPTH
&bitdepth_arg,
#endif
+ &speed_arg,
&rc_end_usage_arg, NULL
};
@@ -102,6 +122,10 @@
static const uint32_t default_spatial_layers = 5;
static const uint32_t default_temporal_layers = 1;
static const uint32_t default_kf_dist = 100;
+static const uint32_t default_temporal_layering_mode = 0;
+static const uint32_t default_output_rc_stats = 0;
+static const int32_t default_speed = -1; // -1 means use library default.
+static const uint32_t default_threads = 0; // zero means use library default.
typedef struct {
const char *input_filename;
@@ -143,6 +167,12 @@
svc_ctx->log_level = SVC_LOG_DEBUG;
svc_ctx->spatial_layers = default_spatial_layers;
svc_ctx->temporal_layers = default_temporal_layers;
+ svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
+#if OUTPUT_RC_STATS
+ svc_ctx->output_rc_stat = default_output_rc_stats;
+#endif
+ svc_ctx->speed = default_speed;
+ svc_ctx->threads = default_threads;
// start with default encoder configuration
res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -184,6 +214,20 @@
svc_ctx->spatial_layers = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layers_arg, argi)) {
svc_ctx->temporal_layers = arg_parse_uint(&arg);
+#if OUTPUT_RC_STATS
+ } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
+ svc_ctx->output_rc_stat = arg_parse_uint(&arg);
+#endif
+ } else if (arg_match(&arg, &speed_arg, argi)) {
+ svc_ctx->speed = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &threads_arg, argi)) {
+ svc_ctx->threads = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
+ svc_ctx->temporal_layering_mode =
+ enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+ if (svc_ctx->temporal_layering_mode) {
+ enc_cfg->g_error_resilient = 1;
+ }
} else if (arg_match(&arg, &kf_dist_arg, argi)) {
enc_cfg->kf_min_dist = arg_parse_uint(&arg);
enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -316,6 +360,185 @@
enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
}
+#if OUTPUT_RC_STATS
+// For rate control encoding stats.
+struct RateControlStats {
+ // Number of input frames per layer.
+ int layer_input_frames[VPX_MAX_LAYERS];
+ // Total (cumulative) number of encoded frames per layer.
+ int layer_tot_enc_frames[VPX_MAX_LAYERS];
+ // Number of encoded non-key frames per layer.
+ int layer_enc_frames[VPX_MAX_LAYERS];
+ // Framerate per layer (cumulative).
+ double layer_framerate[VPX_MAX_LAYERS];
+ // Target average frame size per layer (per-frame-bandwidth per layer).
+ double layer_pfb[VPX_MAX_LAYERS];
+ // Actual average frame size per layer.
+ double layer_avg_frame_size[VPX_MAX_LAYERS];
+ // Average rate mismatch per layer (|target - actual| / target).
+ double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
+ // Actual encoding bitrate per layer (cumulative).
+ double layer_encoding_bitrate[VPX_MAX_LAYERS];
+ // Average of the short-time encoder actual bitrate.
+ // TODO(marpan): Should we add these short-time stats for each layer?
+ double avg_st_encoding_bitrate;
+ // Variance of the short-time encoder actual bitrate.
+ double variance_st_encoding_bitrate;
+ // Window (number of frames) for computing short-time encoding bitrate.
+ int window_size;
+ // Number of window measurements.
+ int window_count;
+};
+
+// Note: these rate control stats assume only 1 key frame in the
+// sequence (i.e., first frame only).
+static void set_rate_control_stats(struct RateControlStats *rc,
+ vpx_codec_enc_cfg_t *cfg) {
+ unsigned int sl, tl;
+ // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+ // per-frame-bandwidth, for the rate control encoding stats below.
+ const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+
+ for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+ const int layer = sl * cfg->ts_number_layers + tl;
+ const int tlayer0 = sl * cfg->ts_number_layers;
+ rc->layer_framerate[layer] =
+ framerate / cfg->ts_rate_decimator[tl];
+ if (tl > 0) {
+ rc->layer_pfb[layer] = 1000.0 *
+ (cfg->layer_target_bitrate[layer] -
+ cfg->layer_target_bitrate[layer - 1]) /
+ (rc->layer_framerate[layer] -
+ rc->layer_framerate[layer - 1]);
+ } else {
+ rc->layer_pfb[tlayer0] = 1000.0 *
+ cfg->layer_target_bitrate[tlayer0] /
+ rc->layer_framerate[tlayer0];
+ }
+ rc->layer_input_frames[layer] = 0;
+ rc->layer_enc_frames[layer] = 0;
+ rc->layer_tot_enc_frames[layer] = 0;
+ rc->layer_encoding_bitrate[layer] = 0.0;
+ rc->layer_avg_frame_size[layer] = 0.0;
+ rc->layer_avg_rate_mismatch[layer] = 0.0;
+ }
+ }
+ rc->window_count = 0;
+ rc->window_size = 15;
+ rc->avg_st_encoding_bitrate = 0.0;
+ rc->variance_st_encoding_bitrate = 0.0;
+}
+
+static void printout_rate_control_summary(struct RateControlStats *rc,
+ vpx_codec_enc_cfg_t *cfg,
+ int frame_cnt) {
+ unsigned int sl, tl;
+ int tot_num_frames = 0;
+ double perc_fluctuation = 0.0;
+ printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
+ printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
+ cfg->ss_number_layers, cfg->ts_number_layers);
+ for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+ const int layer = sl * cfg->ts_number_layers + tl;
+ const int num_dropped = (tl > 0) ?
+ (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
+ (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
+ if (!sl)
+ tot_num_frames += rc->layer_input_frames[layer];
+ rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
+ rc->layer_encoding_bitrate[layer] / tot_num_frames;
+ rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
+ rc->layer_enc_frames[layer];
+ rc->layer_avg_rate_mismatch[layer] =
+ 100.0 * rc->layer_avg_rate_mismatch[layer] /
+ rc->layer_enc_frames[layer];
+ printf("For layer#: sl%d tl%d \n", sl, tl);
+ printf("Bitrate (target vs actual): %d %f.0 kbps\n",
+ cfg->layer_target_bitrate[layer],
+ rc->layer_encoding_bitrate[layer]);
+ printf("Average frame size (target vs actual): %f %f bits\n",
+ rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
+ printf("Average rate_mismatch: %f\n",
+ rc->layer_avg_rate_mismatch[layer]);
+ printf("Number of input frames, encoded (non-key) frames, "
+ "and percent dropped frames: %d %d %f.0 \n",
+ rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
+ 100.0 * num_dropped / rc->layer_input_frames[layer]);
+ printf("\n");
+ }
+ }
+ rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+ rc->variance_st_encoding_bitrate =
+ rc->variance_st_encoding_bitrate / rc->window_count -
+ (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+ perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+ rc->avg_st_encoding_bitrate;
+ printf("Short-time stats, for window of %d frames: \n", rc->window_size);
+ printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+ rc->avg_st_encoding_bitrate,
+ sqrt(rc->variance_st_encoding_bitrate),
+ perc_fluctuation);
+ if (frame_cnt != tot_num_frames)
+ die("Error: Number of input frames not equal to output encoded frames != "
+ "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+}
+
+vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8], int *count) {
+ // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+ // it is a super frame index. If the last byte of real video compression
+ // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+ // not the associated matching marker byte at the front of the index we have
+ // an invalid bitstream and need to return an error.
+
+ uint8_t marker;
+
+ marker = *(data + data_sz - 1);
+ *count = 0;
+
+
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frames;
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz)
+ return VPX_CODEC_CORRUPT_FRAME;
+
+ {
+ const uint8_t marker2 = *(data + data_sz - index_sz);
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2)
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ {
+ // Found a valid superframe index.
+ uint32_t i, j;
+ const uint8_t *x = &data[data_sz - index_sz + 1];
+
+ for (i = 0; i < frames; ++i) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; ++j)
+ this_sz |= (*x++) << (j * 8);
+ sizes[i] = this_sz;
+ }
+ *count = frames;
+ }
+ }
+ return VPX_CODEC_OK;
+}
+#endif
+
int main(int argc, const char **argv) {
AppInput app_input = {0};
VpxVideoWriter *writer = NULL;
@@ -332,7 +555,15 @@
FILE *infile = NULL;
int end_of_stream = 0;
int frames_received = 0;
-
+#if OUTPUT_RC_STATS
+ VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+ struct RateControlStats rc;
+ vpx_svc_layer_id_t layer_id;
+ int sl, tl;
+ double sum_bitrate = 0.0;
+ double sum_bitrate2 = 0.0;
+ double framerate = 30.0;
+#endif
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
exec_name = argv[0];
@@ -359,6 +590,13 @@
VPX_CODEC_OK)
die("Failed to initialize encoder\n");
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ set_rate_control_stats(&rc, &enc_cfg);
+ framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
+ }
+#endif
+
info.codec_fourcc = VP9_FOURCC;
info.time_base.numerator = enc_cfg.g_timebase.num;
info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -370,11 +608,30 @@
if (!writer)
die("Failed to open %s for writing\n", app_input.output_filename);
}
+#if OUTPUT_RC_STATS
+ // For now, just write temporal layer streams.
+ // TODO(wonkap): do spatial by re-writing superframe.
+ if (svc_ctx.output_rc_stat) {
+ for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+ char file_name[PATH_MAX];
+ snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
+ app_input.output_filename, tl);
+ outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+ if (!outfile[tl])
+ die("Failed to open %s for writing", file_name);
+ }
+ }
+#endif
+
// skip initial frames
for (i = 0; i < app_input.frames_to_skip; ++i)
vpx_img_read(&raw, infile);
+ if (svc_ctx.speed != -1)
+ vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
+ vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, 0);
+
// Encode frames
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
@@ -386,7 +643,9 @@
}
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
- pts, frame_duration, VPX_DL_GOOD_QUALITY);
+ pts, frame_duration, svc_ctx.speed >= 5 ?
+ VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
die_codec(&codec, "Failed to encode frame");
@@ -395,12 +654,91 @@
while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
switch (cx_pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
- if (cx_pkt->data.frame.sz > 0)
+ if (cx_pkt->data.frame.sz > 0) {
+#if OUTPUT_RC_STATS
+ uint32_t sizes[8];
+ int count = 0;
+#endif
vpx_video_writer_write_frame(writer,
cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz,
cx_pkt->data.frame.pts);
+#if OUTPUT_RC_STATS
+ // TODO(marpan/wonkap): Put this (to line728) in separate function.
+ if (svc_ctx.output_rc_stat) {
+ vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
+ parse_superframe_index(cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz, sizes, &count);
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+ layer_id.temporal_layer_id];
+ }
+ for (tl = layer_id.temporal_layer_id;
+ tl < enc_cfg.ts_number_layers; ++tl) {
+ vpx_video_writer_write_frame(outfile[tl],
+ cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz,
+ cx_pkt->data.frame.pts);
+ }
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ for (tl = layer_id.temporal_layer_id;
+ tl < enc_cfg.ts_number_layers; ++tl) {
+ const int layer = sl * enc_cfg.ts_number_layers + tl;
+ ++rc.layer_tot_enc_frames[layer];
+ rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+ // Keep count of rate control stats per layer, for non-key
+ // frames.
+ if (tl == layer_id.temporal_layer_id &&
+ !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+ rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+ rc.layer_avg_rate_mismatch[layer] +=
+ fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+ rc.layer_pfb[layer];
+ ++rc.layer_enc_frames[layer];
+ }
+ }
+ }
+
+ // Update for short-time encoding bitrate states, for moving
+ // window of size rc->window, shifted by rc->window / 2.
+ // Ignore first window segment, due to key frame.
+ if (frame_cnt > rc.window_size) {
+ tl = layer_id.temporal_layer_id;
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+ }
+ if (frame_cnt % rc.window_size == 0) {
+ rc.window_count += 1;
+ rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+ rc.variance_st_encoding_bitrate +=
+ (sum_bitrate / rc.window_size) *
+ (sum_bitrate / rc.window_size);
+ sum_bitrate = 0.0;
+ }
+ }
+
+ // Second shifted window.
+ if (frame_cnt > rc.window_size + rc.window_size / 2) {
+ tl = layer_id.temporal_layer_id;
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+ }
+
+ if (frame_cnt > 2 * rc.window_size &&
+ frame_cnt % rc.window_size == 0) {
+ rc.window_count += 1;
+ rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+ rc.variance_st_encoding_bitrate +=
+ (sum_bitrate2 / rc.window_size) *
+ (sum_bitrate2 / rc.window_size);
+ sum_bitrate2 = 0.0;
+ }
+ }
+ }
+#endif
+ }
+
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
(int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
@@ -424,25 +762,30 @@
pts += frame_duration;
}
}
-
printf("Processed %d frames\n", frame_cnt);
-
fclose(infile);
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
+ printf("\n");
+ }
+#endif
if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
if (app_input.passes == 2)
stats_close(&app_input.rc_stats, 1);
-
if (writer) {
vpx_video_writer_close(writer);
}
-
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+ vpx_video_writer_close(outfile[tl]);
+ }
+ }
+#endif
vpx_img_free(&raw);
-
// display average size, psnr
printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-
vpx_svc_release(&svc_ctx);
-
return EXIT_SUCCESS;
}
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -85,13 +85,13 @@
// per-frame-bandwidth, for the rate control encoding stats below.
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
- rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+ rc->layer_pfb[0] = 1000.0 * cfg->layer_target_bitrate[0] /
rc->layer_framerate[0];
for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
rc->layer_pfb[i] = 1000.0 *
- (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+ (cfg->layer_target_bitrate[i] - cfg->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
}
rc->layer_input_frames[i] = 0;
@@ -128,7 +128,7 @@
rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
rc->layer_enc_frames[i];
printf("For layer#: %d \n", i);
- printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+ printf("Bitrate (target vs actual): %d %f \n", cfg->layer_target_bitrate[i],
rc->layer_encoding_bitrate[i]);
printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
rc->layer_avg_frame_size[i]);
@@ -597,7 +597,7 @@
for (i = min_args_base;
(int)i < min_args_base + mode_to_num_layers[layering_mode];
++i) {
- cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+ cfg.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
}
// Real time parameters.
@@ -625,6 +625,8 @@
// Disable automatic keyframe placement.
cfg.kf_min_dist = cfg.kf_max_dist = 3000;
+ cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
set_temporal_layer_pattern(layering_mode,
&cfg,
layer_flags,
@@ -633,8 +635,8 @@
set_rate_control_metrics(&rc, &cfg);
// Target bandwidth for the whole stream.
- // Set to ts_target_bitrate for highest layer (total bitrate).
- cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
+ // Set to layer_target_bitrate for highest layer (total bitrate).
+ cfg.rc_target_bitrate = cfg.layer_target_bitrate[cfg.ts_number_layers - 1];
// Open input file.
if (!(infile = fopen(argv[1], "rb"))) {
@@ -677,6 +679,9 @@
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+ vpx_svc_extra_cfg_t svc_params;
+#endif
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
@@ -685,6 +690,15 @@
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
die_codec(&codec, "Failed to set SVC");
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+ for (i = 0; i < cfg.ts_number_layers; ++i) {
+ svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
+ svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
+ }
+ svc_params.scaling_factor_num[0] = cfg.g_h;
+ svc_params.scaling_factor_den[0] = cfg.g_h;
+ vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
+#endif
}
}
if (strncmp(encoder->name, "vp8", 3) == 0) {
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -14,6 +14,7 @@
#include "test/i420_video_source.h"
#include "test/util.h"
#include "test/y4m_video_source.h"
+#include "vpx/vpx_codec.h"
namespace {
@@ -565,6 +566,8 @@
cfg_.ts_rate_decimator[0] = 2;
cfg_.ts_rate_decimator[1] = 1;
+ cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
if (deadline_ == VPX_DL_REALTIME)
cfg_.g_error_resilient = 1;
@@ -574,14 +577,14 @@
cfg_.rc_target_bitrate = i;
ResetModel();
// 60-40 bitrate allocation for 2 temporal layers.
- cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
- cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+ cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
- ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+ ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
- ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+ ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
}
@@ -606,6 +609,8 @@
cfg_.ts_rate_decimator[1] = 2;
cfg_.ts_rate_decimator[2] = 1;
+ cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
for (int i = 200; i <= 800; i += 200) {
@@ -612,19 +617,19 @@
cfg_.rc_target_bitrate = i;
ResetModel();
// 40-20-40 bitrate allocation for 3 temporal layers.
- cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
- cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
- cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+ cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+ cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
// TODO(yaowu): Work out more stable rc control strategy and
// Adjust the thresholds to be tighter than .75.
- ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
+ ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
// TODO(yaowu): Work out more stable rc control strategy and
// Adjust the thresholds to be tighter than 1.25.
- ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
+ ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
}
@@ -652,20 +657,22 @@
cfg_.ts_rate_decimator[1] = 2;
cfg_.ts_rate_decimator[2] = 1;
+ cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
cfg_.rc_target_bitrate = 200;
ResetModel();
// 40-20-40 bitrate allocation for 3 temporal layers.
- cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
- cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
- cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+ cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+ cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
- ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+ ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
<< " The datarate for the file is lower than target by too much, "
"for layer: " << j;
- ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+ ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
<< " The datarate for the file is greater than target by too much, "
"for layer: " << j;
// Expect some frame drops in this test: for this 200 frames test,
@@ -737,9 +744,180 @@
}
#endif // CONFIG_VP9_TEMPORAL_DENOISING
+class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ public:
+ DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
+ virtual ~DatarateOnePassCbrSvc() {}
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GET_PARAM(1));
+ speed_setting_ = GET_PARAM(2);
+ ResetModel();
+ }
+ virtual void ResetModel() {
+ last_pts_ = 0;
+ bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+ frame_number_ = 0;
+ first_drop_ = 0;
+ bits_total_ = 0;
+ duration_ = 0.0;
+ }
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ }
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ int i;
+ for (i = 0; i < 2; ++i) {
+ svc_params_.max_quantizers[i] = 63;
+ svc_params_.min_quantizers[i] = 0;
+ }
+ svc_params_.scaling_factor_num[0] = 144;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 288;
+ svc_params_.scaling_factor_den[1] = 288;
+ encoder->Control(VP9E_SET_SVC, 1);
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+ encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
+#endif
+ encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
+ encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
+ encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
+ }
+ const vpx_rational_t tb = video->timebase();
+ timebase_ = static_cast<double>(tb.num) / tb.den;
+ duration_ = 0;
+ }
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+ if (last_pts_ == 0)
+ duration = 1;
+ bits_in_buffer_model_ += static_cast<int64_t>(
+ duration * timebase_ * cfg_.rc_target_bitrate * 1000);
+ const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
+ ? true: false;
+ if (!key_frame) {
+ ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+ << pkt->data.frame.pts;
+ }
+ const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+ bits_in_buffer_model_ -= frame_size_in_bits;
+ bits_total_ += frame_size_in_bits;
+ if (!first_drop_ && duration > 1)
+ first_drop_ = last_pts_ + 1;
+ last_pts_ = pkt->data.frame.pts;
+ bits_in_last_frame_ = frame_size_in_bits;
+ ++frame_number_;
+ }
+ virtual void EndPassHook(void) {
+ if (bits_total_) {
+ const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
+ duration_ = (last_pts_ + 1) * timebase_;
+ effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
+ / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
+ file_datarate_ = file_size_in_kb / duration_;
+ }
+ }
+ vpx_codec_pts_t last_pts_;
+ int64_t bits_in_buffer_model_;
+ double timebase_;
+ int frame_number_;
+ vpx_codec_pts_t first_drop_;
+ int64_t bits_total_;
+ double duration_;
+ double file_datarate_;
+ double effective_datarate_;
+ size_t bits_in_last_frame_;
+ vpx_svc_extra_cfg_t svc_params_;
+ int speed_setting_;
+};
+static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
+ const vpx_svc_extra_cfg_t *svc_params,
+ int spatial_layers,
+ int temporal_layers,
+ int temporal_layering_mode,
+ unsigned int total_rate) {
+ int sl, spatial_layer_target;
+ float total = 0;
+ float alloc_ratio[VPX_MAX_LAYERS] = {0};
+ for (sl = 0; sl < spatial_layers; ++sl) {
+ if (svc_params->scaling_factor_den[sl] > 0) {
+ alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
+ 1.0 / svc_params->scaling_factor_den[sl]);
+ total += alloc_ratio[sl];
+ }
+ }
+ for (sl = 0; sl < spatial_layers; ++sl) {
+ enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+ (unsigned int)(enc_cfg->rc_target_bitrate *
+ alloc_ratio[sl] / total);
+ const int index = sl * temporal_layers;
+ if (temporal_layering_mode == 3) {
+ enc_cfg->layer_target_bitrate[index] =
+ spatial_layer_target >> 1;
+ enc_cfg->layer_target_bitrate[index + 1] =
+ (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+ enc_cfg->layer_target_bitrate[index + 2] =
+ spatial_layer_target;
+ } else if (temporal_layering_mode == 2) {
+ enc_cfg->layer_target_bitrate[index] =
+ spatial_layer_target * 2 / 3;
+ enc_cfg->layer_target_bitrate[index + 1] =
+ spatial_layer_target;
+ }
+ }
+}
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
+// 3 temporal layers.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 2;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 144;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 288;
+ svc_params_.scaling_factor_den[1] = 288;
+ // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
+ // frame dropping for SVC.
+ cfg_.rc_dropframe_thresh = 0;
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 200);
+ // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
+ // layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
+ for (int i = 400; i <= 800; i += 200) {
+ cfg_.rc_target_bitrate = i;
+ ResetModel();
+ assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+ cfg_.rc_target_bitrate);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+ << " The datarate for the file is lower than the target by too much!";
+ }
+}
+#endif
VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(2, 7));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
+ ::testing::Values(::libvpx_test::kRealTime),
+ ::testing::Range(5, 8));
} // namespace
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -133,6 +133,10 @@
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
+ void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
+ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
void Control(int ctrl_id, vpx_active_map_t *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -453,6 +453,7 @@
TEST_F(SvcTest, OnePassEncodeThreeFrames) {
codec_enc_.g_pass = VPX_RC_ONE_PASS;
+ codec_enc_.g_lag_in_frames = 0;
vpx_fixed_buf outputs[3];
memset(&outputs[0], 0, sizeof(outputs));
Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -687,17 +687,28 @@
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
- if (!is_key_frame) {
+ if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ // In the case of spatial/temporal scalable coding, the assumption here is
+ // that the temporal reference frame will always be of type LAST_FRAME.
+ // TODO(marpan): If that assumption is broken, we need to revisit this code.
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
unsigned int uv_sad;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = NULL;
unsigned int y_sad, y_sad_g;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
assert(yv12 != NULL);
+
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+ // For now, GOLDEN will not be used for non-zero spatial layers, since
+ // it may not be a temporal reference.
+ yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
+
if (yv12_g && yv12_g != yv12) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -756,6 +756,8 @@
cm->height = oxcf->height;
vp9_alloc_compressor_data(cpi);
+ cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
+
// Single thread case: use counts in common.
cpi->td.counts = &cm->counts;
@@ -2265,8 +2267,9 @@
pkt.data.psnr.psnr[i] = psnr.psnr[i];
}
pkt.kind = VPX_CODEC_PSNR_PKT;
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr;
else
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
@@ -3667,9 +3670,11 @@
}
cm->prev_frame = cm->cur_frame;
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
- cm->frame_type;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].last_frame_type =
+ cm->frame_type;
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -3930,6 +3935,8 @@
#endif
if (oxcf->pass == 2)
vp9_restore_layer_context(cpi);
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ vp9_one_pass_cbr_svc_start_layer(cpi);
}
vpx_usec_timer_start(&cmptimer);
@@ -3948,9 +3955,11 @@
// Normal defaults
cm->reset_frame_context = 0;
cm->refresh_frame_context = 1;
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
+ if (!is_one_pass_cbr_svc(cpi)) {
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ }
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
@@ -4006,12 +4015,11 @@
}
// Read in the source frame.
-#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi))
+ if (cpi->use_svc)
source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
-#endif
source = vp9_lookahead_pop(cpi->lookahead, flush);
+
if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
@@ -4060,8 +4068,7 @@
adjust_frame_rate(cpi, source);
}
- if (cpi->svc.number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);
}
@@ -4143,11 +4150,10 @@
}
// Save layer specific state.
- if ((cpi->svc.number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) ||
- ((cpi->svc.number_temporal_layers > 1 ||
- cpi->svc.number_spatial_layers > 1) &&
- oxcf->pass == 2)) {
+ if (is_one_pass_cbr_svc(cpi) ||
+ ((cpi->svc.number_temporal_layers > 1 ||
+ cpi->svc.number_spatial_layers > 1) &&
+ oxcf->pass == 2)) {
vp9_save_layer_context(cpi);
}
@@ -4342,6 +4348,12 @@
// May need the empty frame after an visible frame.
cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
+ }
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ if (cm->show_frame) {
+ ++cpi->svc.spatial_layer_to_encode;
+ if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+ cpi->svc.spatial_layer_to_encode = 0;
}
}
return 0;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -194,10 +194,10 @@
int ss_number_layers; // Number of spatial layers.
int ts_number_layers; // Number of temporal layers.
// Bitrate allocation for spatial layers.
+ int layer_target_bitrate[VPX_MAX_LAYERS];
int ss_target_bitrate[VPX_SS_MAX_LAYERS];
int ss_enable_auto_arf[VPX_SS_MAX_LAYERS];
// Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
- int ts_target_bitrate[VPX_TS_MAX_LAYERS];
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
int enable_auto_arf;
@@ -237,6 +237,7 @@
int use_highbitdepth;
#endif
vpx_color_space_t color_space;
+ VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
@@ -611,11 +612,13 @@
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
- return cpi->use_svc &&
- ((cpi->svc.number_spatial_layers > 1) ||
- (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0));
+ return cpi->use_svc && cpi->oxcf.pass != 0;
}
+static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
+ return (cpi->use_svc && cpi->oxcf.pass == 0);
+}
+
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
(cpi->oxcf.enable_auto_arf &&
@@ -641,6 +644,8 @@
}
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
+
+#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
#ifdef __cplusplus
} // extern "C"
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -234,13 +234,16 @@
return target;
}
-// Update the buffer level for higher layers, given the encoded current layer.
+// Update the buffer level for higher temporal layers, given the encoded current
+// temporal layer.
static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
- int temporal_layer = 0;
+ int i = 0;
int current_temporal_layer = svc->temporal_layer_id;
- for (temporal_layer = current_temporal_layer + 1;
- temporal_layer < svc->number_temporal_layers; ++temporal_layer) {
- LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer];
+ for (i = current_temporal_layer + 1;
+ i < svc->number_temporal_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
encoded_frame_size);
@@ -268,7 +271,7 @@
rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = rc->bits_off_target;
- if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
update_layer_buffer_level(&cpi->svc, encoded_frame_size);
}
}
@@ -1418,13 +1421,14 @@
} else {
target = rc->avg_frame_bandwidth;
}
- if (svc->number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
// Note that for layers, avg_frame_bandwidth is the cumulative
// per-frame-bandwidth. For the target size of this frame, use the
// layer average frame size (i.e., non-cumulative per-frame-bw).
- int current_temporal_layer = svc->temporal_layer_id;
- const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer];
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+ svc->temporal_layer_id, svc->number_temporal_layers);
+ const LAYER_CONTEXT *lc = &svc->layer_context[layer];
target = lc->avg_frame_size;
min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
}
@@ -1459,7 +1463,9 @@
if (svc->number_temporal_layers > 1 &&
oxcf->rc_mode == VPX_CBR) {
// Use the layer framerate for temporal layers CBR mode.
- const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+ svc->temporal_layer_id, svc->number_temporal_layers);
+ const LAYER_CONTEXT *lc = &svc->layer_context[layer];
framerate = lc->framerate;
}
kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
@@ -1472,10 +1478,27 @@
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
+// Reset information needed to set proper reference frames and buffer updates
+// for temporal layering. This is called when a key frame is encoded.
+static void reset_temporal_layer_to_zero(VP9_COMP *cpi) {
+ int sl;
+ LAYER_CONTEXT *lc = NULL;
+ cpi->svc.temporal_layer_id = 0;
+
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+ lc->current_video_frame_in_layer = 0;
+ lc->frames_from_key_frame = 0;
+ }
+}
+
void vp9_rc_get_svc_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target = rc->avg_frame_bandwidth;
+ const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
+ cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
+
if ((cm->current_video_frame == 0) ||
(cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key && (rc->frames_since_key %
@@ -1484,30 +1507,39 @@
rc->source_alt_ref_active = 0;
if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
+ cpi->svc.layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- }
-
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ cpi->svc.layer_context[layer].is_key_frame = 1;
+ reset_temporal_layer_to_zero(cpi);
+ cpi->ref_frame_flags &=
+ (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
+ // Assumption here is that LAST_FRAME is being updated for a keyframe.
+ // Thus no change in update flags.
target = calc_iframe_target_size_one_pass_cbr(cpi);
}
} else {
cm->frame_type = INTER_FRAME;
-
if (is_two_pass_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+ lc->is_key_frame =
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
if (lc->is_key_frame)
cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
}
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
- }
-
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ if (cpi->svc.spatial_layer_id == 0) {
+ lc->is_key_frame = 0;
+ } else {
+ lc->is_key_frame =
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
+ }
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -21,23 +21,17 @@
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- int layer;
- int layer_end;
+ int sl, tl;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- layer_end = svc->number_temporal_layers;
- } else {
- layer_end = svc->number_spatial_layers;
-
- if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
- if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
- SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
- cpi->common.subsampling_x,
- cpi->common.subsampling_y,
+ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+ if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+ SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
+ cpi->common.subsampling_x,
+ cpi->common.subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cpi->common.use_highbitdepth,
#endif
@@ -44,60 +38,62 @@
VP9_ENC_BORDER_IN_PIXELS,
cpi->common.byte_alignment,
NULL, NULL, NULL))
- vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate empty frame for multiple frame "
- "contexts");
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate empty frame for multiple frame "
+ "contexts");
- memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
- cpi->svc.empty_frame.img.buffer_alloc_sz);
- }
+ memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+ cpi->svc.empty_frame.img.buffer_alloc_sz);
}
- for (layer = 0; layer < layer_end; ++layer) {
- LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- RATE_CONTROL *const lrc = &lc->rc;
- int i;
- lc->current_video_frame_in_layer = 0;
- lc->layer_size = 0;
- lc->frames_from_key_frame = 0;
- lc->last_frame_type = FRAME_TYPES;
- lrc->ni_av_qi = oxcf->worst_allowed_q;
- lrc->total_actual_bits = 0;
- lrc->total_target_vs_actual = 0;
- lrc->ni_tot_qi = 0;
- lrc->tot_q = 0.0;
- lrc->avg_q = 0.0;
- lrc->ni_frames = 0;
- lrc->decimation_count = 0;
- lrc->decimation_factor = 0;
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ int i;
+ lc->current_video_frame_in_layer = 0;
+ lc->layer_size = 0;
+ lc->frames_from_key_frame = 0;
+ lc->last_frame_type = FRAME_TYPES;
+ lrc->ni_av_qi = oxcf->worst_allowed_q;
+ lrc->total_actual_bits = 0;
+ lrc->total_target_vs_actual = 0;
+ lrc->ni_tot_qi = 0;
+ lrc->tot_q = 0.0;
+ lrc->avg_q = 0.0;
+ lrc->ni_frames = 0;
+ lrc->decimation_count = 0;
+ lrc->decimation_factor = 0;
- for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
- lrc->rate_correction_factors[i] = 1.0;
- }
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ lrc->rate_correction_factors[i] = 1.0;
+ }
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
- lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
- lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
- lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
- } else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
- lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
- lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
- lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
- oxcf->best_allowed_q) / 2;
- lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+ if (cpi->oxcf.rc_mode == VPX_CBR) {
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+ lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+ lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+ lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
+ } else {
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+ lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+ lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
+ lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
- if (oxcf->ss_enable_auto_arf[layer])
- lc->alt_ref_idx = alt_ref_idx++;
- else
- lc->alt_ref_idx = INVALID_IDX;
- lc->gold_ref_idx = INVALID_IDX;
- }
+ lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ if (oxcf->ss_enable_auto_arf[sl])
+ lc->alt_ref_idx = alt_ref_idx++;
+ else
+ lc->alt_ref_idx = INVALID_IDX;
+ lc->gold_ref_idx = INVALID_IDX;
+ }
- lrc->buffer_level = oxcf->starting_buffer_level_ms *
- lc->target_bandwidth / 1000;
- lrc->bits_off_target = lrc->buffer_level;
+ lrc->buffer_level = oxcf->starting_buffer_level_ms *
+ lc->target_bandwidth / 1000;
+ lrc->bits_off_target = lrc->buffer_level;
+ }
}
// Still have extra buffer for base layer golden frame
@@ -112,53 +108,100 @@
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const RATE_CONTROL *const rc = &cpi->rc;
- int layer;
- int layer_end;
+ int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- layer_end = svc->number_temporal_layers;
- } else {
- layer_end = svc->number_spatial_layers;
- }
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ spatial_layer_target = 0;
- for (layer = 0; layer < layer_end; ++layer) {
- LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- RATE_CONTROL *const lrc = &lc->rc;
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+ svc->layer_context[layer].target_bandwidth =
+ oxcf->layer_target_bitrate[layer];
+ }
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
- } else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
+ layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ?
+ 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers);
+ spatial_layer_target =
+ svc->layer_context[layer].target_bandwidth =
+ oxcf->layer_target_bitrate[layer];
+
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ LAYER_CONTEXT *const lc =
+ &svc->layer_context[sl * oxcf->ts_number_layers + tl];
+ RATE_CONTROL *const lrc = &lc->rc;
+ layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+
+ lc->spatial_layer_target_bandwidth = spatial_layer_target;
+ bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target;
+ lrc->starting_buffer_level =
+ (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+ lrc->optimal_buffer_level =
+ (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+ lrc->maximum_buffer_size =
+ (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target =
+ MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
+ }
}
- bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
- // Update buffer-related quantities.
- lrc->starting_buffer_level =
- (int64_t)(rc->starting_buffer_level * bitrate_alloc);
- lrc->optimal_buffer_level =
- (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
- lrc->maximum_buffer_size =
- (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
- lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
- lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
- // Update framerate-related quantities.
+ } else {
+ int layer_end;
+ float bitrate_alloc = 1.0;
+
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ layer_end = svc->number_temporal_layers;
} else {
- lc->framerate = cpi->framerate;
+ layer_end = svc->number_spatial_layers;
}
- lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
- lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
- // Update qp-related quantities.
- lrc->worst_quality = rc->worst_quality;
- lrc->best_quality = rc->best_quality;
+
+ for (layer = 0; layer < layer_end; ++layer) {
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+
+ bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
+ // Update buffer-related quantities.
+ lrc->starting_buffer_level =
+ (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+ lrc->optimal_buffer_level =
+ (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+ lrc->maximum_buffer_size =
+ (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target = MIN(lrc->bits_off_target,
+ lrc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ // Update framerate-related quantities.
+ if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ } else {
+ lc->framerate = cpi->framerate;
+ }
+ lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ // Update qp-related quantities.
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
+ }
}
}
static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {
- return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
- &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
- &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ if (is_one_pass_cbr_svc(cpi))
+ return &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id];
+ else
+ return (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.rc_mode == VPX_CBR) ?
+ &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
}
void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
@@ -166,18 +209,22 @@
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
RATE_CONTROL *const lrc = &lc->rc;
- const int layer = svc->temporal_layer_id;
+ // Index into spatial+temporal arrays.
+ const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id;
+ const int tl = svc->temporal_layer_id;
- lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
// Update the average layer frame size (non-cumulative per-frame-bw).
- if (layer == 0) {
+ if (tl == 0) {
lc->avg_frame_size = lrc->avg_frame_bandwidth;
} else {
const double prev_layer_framerate =
- cpi->framerate / oxcf->ts_rate_decimator[layer - 1];
- const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1];
+ cpi->framerate / oxcf->ts_rate_decimator[tl - 1];
+ const int prev_layer_target_bandwidth =
+ oxcf->layer_target_bitrate[st_idx - 1];
lc->avg_frame_size =
(int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
(lc->framerate - prev_layer_framerate));
@@ -243,9 +290,8 @@
void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
LAYER_CONTEXT *const lc =
- (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
- &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
- &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers];
++lc->current_video_frame_in_layer;
++lc->frames_from_key_frame;
}
@@ -253,10 +299,11 @@
int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
return is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id > 0 &&
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].is_key_frame;
}
-#if CONFIG_SPATIAL_SVC
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den,
int *width_out, int *height_out) {
@@ -276,6 +323,201 @@
*height_out = h;
}
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
+ int frame_num_within_temporal_struct = 0;
+ int spatial_id, temporal_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ frame_num_within_temporal_struct =
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4;
+ temporal_id = cpi->svc.temporal_layer_id =
+ (frame_num_within_temporal_struct & 1) ? 2 :
+ (frame_num_within_temporal_struct >> 1);
+ cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+ cpi->ext_refresh_alt_ref_frame = 0;
+ if (!temporal_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ // base layer is a key frame.
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else if (temporal_id == 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else {
+ if (frame_num_within_temporal_struct == 1) {
+ // the first tl2 picture
+ if (!spatial_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ } else { // Top layer
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else {
+ // The second tl2 picture
+ if (!spatial_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_last_frame = 1;
+ } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ cpi->ext_refresh_last_frame = 1;
+ } else { // top layer
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ }
+ }
+ if (temporal_id == 0) {
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+ cpi->alt_fb_idx = 0;
+ } else if (temporal_id == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ } else if (frame_num_within_temporal_struct == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ } else {
+ cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = 0;
+ }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
+ int spatial_id, temporal_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ temporal_id = cpi->svc.temporal_layer_id =
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1;
+ cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+ cpi->ext_refresh_alt_ref_frame = 0;
+ if (!temporal_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ // base layer is a key frame.
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else if (temporal_id == 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ }
+
+ if (temporal_id == 0) {
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+ cpi->alt_fb_idx = 0;
+ } else if (temporal_id == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 0 - that has no temporal layering.
+static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
+ VP9_COMP *const cpi) {
+ int spatial_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ cpi->ext_refresh_last_frame =
+ cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0;
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[0].is_key_frame) {
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+}
+
+int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
+ int width = 0, height = 0;
+ LAYER_CONTEXT *lc = NULL;
+
+ if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+ set_flags_and_fb_idx_for_temporal_mode3(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+ set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_0101) {
+ set_flags_and_fb_idx_for_temporal_mode2(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ // VP9E_TEMPORAL_LAYERING_MODE_BYPASS :
+ // if the code goes here, it means the encoder will be relying on the
+ // flags from outside for layering.
+ // However, since when spatial+temporal layering is used, the buffer indices
+ // cannot be derived automatically, the bypass mode will only work when the
+ // number of spatial layers equals 1.
+ assert(cpi->svc.number_spatial_layers == 1);
+ }
+
+ lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id];
+
+ get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
+ lc->scaling_factor_num, lc->scaling_factor_den,
+ &width, &height);
+
+ if (vp9_set_size_literal(cpi, width, height) != 0)
+ return VPX_CODEC_INVALID_PARAM;
+
+ return 0;
+}
+
+#if CONFIG_SPATIAL_SVC
int vp9_svc_start_frame(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc;
@@ -386,11 +628,12 @@
return 0;
}
+#endif
+
struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
struct lookahead_ctx *ctx,
int drain) {
struct lookahead_entry *buf = NULL;
-
if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = vp9_lookahead_peek(ctx, 0);
if (buf != NULL) {
@@ -400,7 +643,5 @@
}
}
}
-
return buf;
}
-#endif
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -22,6 +22,7 @@
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
+ int spatial_layer_target_bandwidth; // Target for the spatial layer.
double framerate;
int avg_frame_size;
int max_q;
@@ -64,9 +65,11 @@
YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
// Layer context used for rate control in one pass temporal CBR mode or
- // two pass spatial mode. Defined for temporal or spatial layers for now.
- // Does not support temporal combined with spatial RC.
- LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
+ // two pass spatial mode.
+ LAYER_CONTEXT layer_context[VPX_MAX_LAYERS];
+ // Indicates what sort of temporal layering is used.
+ // Currently, this only works for CBR mode.
+ VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} SVC;
struct VP9_COMP;
@@ -109,6 +112,8 @@
// Start a frame and initialize svc parameters
int vp9_svc_start_frame(struct VP9_COMP *const cpi);
+
+int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
#ifdef __cplusplus
} // extern "C"
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -682,7 +682,7 @@
if (frames_to_blur > 0) {
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
- if (is_two_pass_svc(cpi)) {
+ if (cpi->use_svc) {
// In spatial svc the scaling factors might be less then 1/2.
// So we will use non-normative scaling.
int frame_used = 0;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -176,15 +176,23 @@
RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
+ if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS)
+ ERROR("ss_number_layers * ts_number_layers is out of range");
if (cfg->ts_number_layers > 1) {
- unsigned int i;
- for (i = 1; i < cfg->ts_number_layers; ++i)
- if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1])
+ unsigned int sl, tl;
+ for (sl = 1; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 1; tl < cfg->ts_number_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers);
+ if (cfg->layer_target_bitrate[layer] <
+ cfg->layer_target_bitrate[layer - 1])
ERROR("ts_target_bitrate entries are not increasing");
+ }
+ }
RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1);
- for (i = cfg->ts_number_layers - 2; i > 0; --i)
- if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i])
+ for (tl = cfg->ts_number_layers - 2; tl > 0; --tl)
+ if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl])
ERROR("ts_rate_decimator factors are not powers of 2");
}
@@ -360,6 +368,7 @@
const vpx_codec_enc_cfg_t *cfg,
const struct vp9_extracfg *extra_cfg) {
const int is_vbr = cfg->rc_end_usage == VPX_VBR;
+ int sl, tl;
oxcf->profile = cfg->g_profile;
oxcf->max_threads = (int)cfg->g_threads;
oxcf->width = cfg->g_w;
@@ -460,35 +469,33 @@
oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost;
oxcf->ss_number_layers = cfg->ss_number_layers;
+ oxcf->ts_number_layers = cfg->ts_number_layers;
+ oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)
+ cfg->temporal_layering_mode;
- if (oxcf->ss_number_layers > 1) {
- int i;
- for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
- oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i];
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
#if CONFIG_SPATIAL_SVC
- oxcf->ss_enable_auto_arf[i] = cfg->ss_enable_auto_alt_ref[i];
+ oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
#endif
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] =
+ 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl];
}
- } else if (oxcf->ss_number_layers == 1) {
+ }
+ if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) {
oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
#if CONFIG_SPATIAL_SVC
oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref;
#endif
}
-
- oxcf->ts_number_layers = cfg->ts_number_layers;
-
if (oxcf->ts_number_layers > 1) {
- int i;
- for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) {
- oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i];
- oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i];
+ for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) {
+ oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ?
+ cfg->ts_rate_decimator[tl] : 1;
}
} else if (oxcf->ts_number_layers == 1) {
- oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
oxcf->ts_rate_decimator[0] = 1;
}
-
/*
printf("Current VP9 Settings: \n");
printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
@@ -902,11 +909,12 @@
unsigned int lib_flags) {
vpx_codec_frame_flags_t flags = lib_flags << 16;
- if (lib_flags & FRAMEFLAGS_KEY
-#if CONFIG_SPATIAL_SVC
- || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame)
-#endif
- )
+ if (lib_flags & FRAMEFLAGS_KEY ||
+ (cpi->use_svc &&
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].is_key_frame)
+ )
flags |= VPX_FRAME_IS_KEY;
if (cpi->droppable)
@@ -1022,16 +1030,15 @@
vpx_codec_cx_pkt_t pkt;
#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].layer_size += size;
#endif
// Pack invisible frames with the next visible frame
- if (!cpi->common.show_frame
-#if CONFIG_SPATIAL_SVC
- || (is_two_pass_svc(cpi) &&
- cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
-#endif
+ if (!cpi->common.show_frame ||
+ (cpi->use_svc &&
+ cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
) {
if (ctx->pending_cx_data == 0)
ctx->pending_cx_data = cx_data;
@@ -1089,7 +1096,8 @@
pkt.data.frame.partition_id = -1;
if(ctx->output_cx_pkt_cb.output_cx_pkt)
- ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv);
+ ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
+ ctx->output_cx_pkt_cb.user_priv);
else
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
@@ -1096,17 +1104,18 @@
cx_data += size;
cx_data_sz -= size;
#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) {
+ if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) {
vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
- int i;
+ int sl;
vp9_zero(pkt_sizes);
vp9_zero(pkt_psnr);
pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR;
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
- pkt_sizes.data.layer_sizes[i] = lc->layer_size;
- pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ LAYER_CONTEXT *lc =
+ &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+ pkt_sizes.data.layer_sizes[sl] = lc->layer_size;
+ pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt;
lc->layer_size = 0;
}
@@ -1115,6 +1124,11 @@
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
}
#endif
+ if (is_one_pass_cbr_svc(cpi) &&
+ (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+ // Encoded all spatial layers; exit loop.
+ break;
+ }
}
}
}
@@ -1292,16 +1306,20 @@
static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
int data = va_arg(args, int);
const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
+ // Both one-pass and two-pass RC are supported now.
+ // User setting this has to make sure of the following.
+ // In two-pass setting: either (but not both)
+ // cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
+ // In one-pass setting:
+ // either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
vp9_set_svc(ctx->cpi, data);
- // CBR or two pass mode for SVC with both temporal and spatial layers
- // not yet supported.
+
if (data == 1 &&
- (cfg->rc_end_usage == VPX_CBR ||
- cfg->g_pass == VPX_RC_FIRST_PASS ||
+ (cfg->g_pass == VPX_RC_FIRST_PASS ||
cfg->g_pass == VPX_RC_LAST_PASS) &&
- cfg->ss_number_layers > 1 &&
- cfg->ts_number_layers > 1) {
+ cfg->ss_number_layers > 1 &&
+ cfg->ts_number_layers > 1) {
return VPX_CODEC_INVALID_PARAM;
}
return VPX_CODEC_OK;
@@ -1347,15 +1365,21 @@
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *);
- int i;
+ int sl, tl;
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
-
- lc->max_q = params->max_quantizers[i];
- lc->min_q = params->min_quantizers[i];
- lc->scaling_factor_num = params->scaling_factor_num[i];
- lc->scaling_factor_den = params->scaling_factor_den[i];
+ // Number of temporal layers and number of spatial layers have to be set
+ // properly before calling this control function.
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers);
+ LAYER_CONTEXT *lc =
+ &cpi->svc.layer_context[layer];
+ lc->max_q = params->max_quantizers[sl];
+ lc->min_q = params->min_quantizers[sl];
+ lc->scaling_factor_num = params->scaling_factor_num[sl];
+ lc->scaling_factor_den = params->scaling_factor_den[sl];
+ }
}
return VPX_CODEC_OK;
@@ -1495,6 +1519,8 @@
{0}, // ts_rate_decimator
0, // ts_periodicity
{0}, // ts_layer_id
+ {0}, // layer_taget_bitrate
+ 0 // temporal_layering_mode
}
},
};
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -302,32 +302,80 @@
vpx_codec_enc_cfg_t *const enc_cfg) {
int i;
const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
+ int sl, tl, spatial_layer_target;
- if (si->bitrates[0] != 0) {
- enc_cfg->rc_target_bitrate = 0;
- for (i = 0; i < svc_ctx->spatial_layers; ++i) {
- enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
- enc_cfg->rc_target_bitrate += si->bitrates[i];
- }
- } else {
- float total = 0;
- float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
+ if (svc_ctx->temporal_layering_mode != 0) {
+ if (si->bitrates[0] != 0) {
+ enc_cfg->rc_target_bitrate = 0;
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] = 0;
+ for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
+ enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers]
+ += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl];
+ enc_cfg->layer_target_bitrate[sl*svc_ctx->temporal_layers + tl]
+ = si->bitrates[sl * svc_ctx->temporal_layers + tl];
+ }
+ }
+ } else {
+ float total = 0;
+ float alloc_ratio[VPX_MAX_LAYERS] = {0};
- for (i = 0; i < svc_ctx->spatial_layers; ++i) {
- if (si->svc_params.scaling_factor_den[i] > 0) {
- alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
- si->svc_params.scaling_factor_den[i]);
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ if (si->svc_params.scaling_factor_den[sl] > 0) {
+ alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] *
+ 1.0 / si->svc_params.scaling_factor_den[sl]);
+ total += alloc_ratio[sl];
+ }
+ }
- alloc_ratio[i] *= alloc_ratio[i];
- total += alloc_ratio[i];
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+ (unsigned int)(enc_cfg->rc_target_bitrate *
+ alloc_ratio[sl] / total);
+ if (svc_ctx->temporal_layering_mode == 3) {
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+ spatial_layer_target >> 1;
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+ (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =
+ spatial_layer_target;
+ } else if (svc_ctx->temporal_layering_mode == 2) {
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+ spatial_layer_target * 2 / 3;
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+ spatial_layer_target;
+ } else {
+ // User should explicitly assign bitrates in this case.
+ assert(0);
+ }
}
}
+ } else {
+ if (si->bitrates[0] != 0) {
+ enc_cfg->rc_target_bitrate = 0;
+ for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+ enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
+ enc_cfg->rc_target_bitrate += si->bitrates[i];
+ }
+ } else {
+ float total = 0;
+ float alloc_ratio[VPX_MAX_LAYERS] = {0};
- for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
- if (total > 0) {
- enc_cfg->ss_target_bitrate[i] = (unsigned int)
- (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+ for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+ if (si->svc_params.scaling_factor_den[i] > 0) {
+ alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
+ si->svc_params.scaling_factor_den[i]);
+
+ alloc_ratio[i] *= alloc_ratio[i];
+ total += alloc_ratio[i];
+ }
}
+ for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
+ if (total > 0) {
+ enc_cfg->layer_target_bitrate[i] = (unsigned int)
+ (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+ }
+ }
}
}
}
@@ -365,6 +413,14 @@
return VPX_CODEC_INVALID_PARAM;
}
+ // Note: temporal_layering_mode only applies to one-pass CBR
+ // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode;
+ if (svc_ctx->temporal_layering_mode == 3) {
+ svc_ctx->temporal_layers = 3;
+ } else if (svc_ctx->temporal_layering_mode == 2) {
+ svc_ctx->temporal_layers = 2;
+ }
+
for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
si->svc_params.min_quantizers[i] = 0;
@@ -387,6 +443,14 @@
if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS)
svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS;
+ if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) {
+ svc_log(svc_ctx, SVC_LOG_ERROR,
+ "spatial layers * temporal layers exceeds the maximum number of "
+ "allowed layers of %d\n",
+ svc_ctx->spatial_layers * svc_ctx->temporal_layers,
+ (int) VPX_MAX_LAYERS);
+ return VPX_CODEC_INVALID_PARAM;
+ }
assign_layer_bitrates(svc_ctx, enc_cfg);
#if CONFIG_SPATIAL_SVC
@@ -403,10 +467,24 @@
}
}
- // modify encoder configuration
+ if (svc_ctx->threads)
+ enc_cfg->g_threads = svc_ctx->threads;
+
+ // Modify encoder configuration
enc_cfg->ss_number_layers = svc_ctx->spatial_layers;
enc_cfg->ts_number_layers = svc_ctx->temporal_layers;
+ if (enc_cfg->rc_end_usage == VPX_CBR) {
+ enc_cfg->rc_resize_allowed = 0;
+ enc_cfg->rc_min_quantizer = 2;
+ enc_cfg->rc_max_quantizer = 63;
+ enc_cfg->rc_undershoot_pct = 50;
+ enc_cfg->rc_overshoot_pct = 50;
+ enc_cfg->rc_buf_initial_sz = 20;
+ enc_cfg->rc_buf_optimal_sz = 600;
+ enc_cfg->rc_buf_sz = 1000;
+ }
+
if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
enc_cfg->g_error_resilient = 1;
@@ -554,7 +632,7 @@
mse[1], mse[2], mse[3]);
bytes_total += si->bytes_sum[i];
- // clear sums for next time
+ // Clear sums for next time.
si->bytes_sum[i] = 0;
for (j = 0; j < COMPONENTS; ++j) {
si->psnr_sum[i][j] = 0;
--- a/vpx/svc_context.h
+++ b/vpx/svc_context.h
@@ -33,10 +33,13 @@
// public interface to svc_command options
int spatial_layers; // number of spatial layers
int temporal_layers; // number of temporal layers
+ int temporal_layering_mode;
SVC_LOG_LEVEL log_level; // amount of information to display
int log_print; // when set, printf log messages instead of returning the
// message with svc_get_message
-
+ int output_rc_stat; // for outputting rc stats
+ int speed; // speed setting for codec
+ int threads;
// private storage for vpx_svc_encode
void *internal;
} SvcContext;
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -511,6 +511,17 @@
*/
VP9E_SET_COLOR_SPACE,
+ /*!\brief Codec control function to set temporal layering mode.
+ * \note Valid ranges: 0..3, default is "0" (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING).
+ * 0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING
+ * 1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS
+ * 2 = VP9E_TEMPORAL_LAYERING_MODE_0101
+ * 3 = VP9E_TEMPORAL_LAYERING_MODE_0212
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_TEMPORAL_LAYERING_MODE,
+
/*!\brief Codec control function to get an Active map back from the encoder.
*
* Supported in codecs: VP9
@@ -529,6 +540,32 @@
VP8E_ONETWO = 3
} VPX_SCALING_MODE;
+/*!\brief Temporal layering mode enum for VP9 SVC.
+ *
+ * This set of macros define the different temporal layering modes.
+ * Supported codecs: VP9 (in SVC mode)
+ *
+ */
+typedef enum vp9e_temporal_layering_mode {
+ /*!\brief No temporal layering.
+ * Used when only spatial layering is used.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING = 0,
+
+ /*!\brief Bypass mode.
+ * Used when application needs to control temporal layering.
+ * This will only work when the number of spatial layers equals 1.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS = 1,
+
+ /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_0101 = 2,
+
+ /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_0212 = 3
+} VP9E_TEMPORAL_LAYERING_MODE;
/*!\brief vpx region of interest map
*
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -42,9 +42,12 @@
/*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */
#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY
- /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */
-#define MAX_LAYERS VPX_TS_MAX_LAYERS
+/*! Temporal+Spatial Scalability: Maximum number of coding layers */
+#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed.
+/*!\deprecated Use #VPX_MAX_LAYERS instead. */
+#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed.
+
/*! Spatial Scalability: Maximum number of coding layers */
#define VPX_SS_MAX_LAYERS 5
@@ -729,6 +732,22 @@
* ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1).
*/
unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY];
+
+ /*!\brief Target bitrate for each spatial/temporal layer.
+ *
+ * These values specify the target coding bitrate to be used for each
+ * spatial/temporal layer.
+ *
+ */
+ unsigned int layer_target_bitrate[VPX_MAX_LAYERS];
+
+ /*!\brief Temporal layering mode indicating which temporal layering scheme to use.
+ *
+ * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the
+ * temporal layering mode to use.
+ *
+ */
+ int temporal_layering_mode;
} vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
/*!\brief vp9 svc extra configure parameters
@@ -737,10 +756,11 @@
*
*/
typedef struct vpx_svc_parameters {
- int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */
- int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */
- int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/
- int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/
+ int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */
+ int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */
+ int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */
+ int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */
+ int temporal_layering_mode; /**< Temporal layering mode */
} vpx_svc_extra_cfg_t;