shithub: libvpx

Download patch

ref: e899859c48adf4f093fbd4fec74051ac9e108248
parent: 6ce515b9ff9000740bc47bb2dfa1b80df6ec2fc9
author: Minghai Shang <[email protected]>
date: Mon Jul 14 07:24:17 EDT 2014

[spatial svc]Implement alt reference frames

All changes are for spatial svc only.
1. Enable encoding hidden frames in each layer and use alt reference idex to reference the hidden frame in each layer
2. Use golden reference idx for spatial reference
3. For those layers that don't have hidden frames (caused by lack of frame buffers), reference a hidden frame in lower layers
4. Add "auto-alt-refs" in svc options
Change-Id: Idf27d1fd2fb5f3ffd9e86d2119235e3dad36c178

--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -167,6 +167,24 @@
   codec_initialized_ = true;
 }
 
+TEST_F(SvcTest, SetAutoAltRefOption) {
+  svc_.spatial_layers = 5;
+  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "auto-alt-refs=none");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  res = vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1,1,0");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0");
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+}
+
 TEST_F(SvcTest, SetQuantizers) {
   vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30");
   EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
@@ -362,6 +380,7 @@
   codec_enc_.g_pass = VPX_RC_FIRST_PASS;
   vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
   vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
 
   vpx_codec_err_t res =
       vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -410,6 +429,9 @@
   vpx_codec_err_t res_dec;
   int frame_size;
   codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  vpx_svc_set_quantizers(&svc_, "40,30");
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1");
   codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0];
   codec_enc_.rc_twopass_stats_in.sz = stats_buf.size();
 
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -9,6 +9,7 @@
  */
 
 
+#include "./vpx_config.h"
 #include "vp8_rtcd.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/internal/vpx_codec_internal.h"
@@ -1314,6 +1315,9 @@
         "vp8.fpf"           /* first pass filename */
 #endif
         VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
+#ifdef CONFIG_SPATIAL_SVC
+        {0},
+#endif
         {0},                /* ss_target_bitrate */
         1,                  /* ts_number_layers */
         {0},                /* ts_target_bitrate */
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -206,6 +206,12 @@
     cpi->twopass.this_frame_mb_stats.mb_stats = NULL;
   }
 #endif
+
+  for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
+    vp9_free_frame_buffer(&cpi->svc.scaled_frames[i]);
+  }
+  vpx_memset(&cpi->svc.scaled_frames[0], 0,
+             MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
 }
 
 static void save_coding_context(VP9_COMP *cpi) {
@@ -476,6 +482,15 @@
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   vp9_update_frame_size(cm);
   init_macroblockd(cm, xd);
+
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
+                                 cm->width, cm->height,
+                                 cm->subsampling_x, cm->subsampling_y,
+                                 VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                         "Failed to reallocate alt_ref_buffer");
+  }
 }
 
 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
@@ -2486,7 +2501,7 @@
 static int get_arf_src_index(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   int arf_src_index = 0;
-  if (is_altref_enabled(&cpi->oxcf)) {
+  if (is_altref_enabled(cpi)) {
     if (cpi->pass == 2) {
       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
@@ -2565,12 +2580,26 @@
 #ifdef CONFIG_SPATIAL_SVC
     if (is_spatial_svc)
       cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead,
-                                           arf_src_index, 1);
+                                           arf_src_index, 0);
     else
 #endif
       cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
     if (cpi->source != NULL) {
       cpi->alt_ref_source = cpi->source;
+
+#ifdef CONFIG_SPATIAL_SVC
+      if (is_spatial_svc && cpi->svc.spatial_layer_id > 0) {
+        int i;
+        // Reference a hidden frame from a lower layer
+        for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {
+          if (cpi->oxcf.ss_play_alternate[i]) {
+            cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx;
+            break;
+          }
+        }
+      }
+      cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1;
+#endif
 
       if (cpi->oxcf.arnr_max_frames > 0) {
         // Produce the filtered ARF frame.
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -195,6 +195,7 @@
   int ts_number_layers;  // Number of temporal layers.
   // Bitrate allocation for spatial layers.
   int ss_target_bitrate[VPX_SS_MAX_LAYERS];
+  int ss_play_alternate[VPX_SS_MAX_LAYERS];
   // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
   int ts_target_bitrate[VPX_TS_MAX_LAYERS];
   int ts_rate_decimator[VPX_TS_MAX_LAYERS];
@@ -229,10 +230,6 @@
   vp8e_tuning tuning;
 } VP9EncoderConfig;
 
-static INLINE int is_altref_enabled(const VP9EncoderConfig *cfg) {
-  return cfg->mode != REALTIME && cfg->play_alternate && cfg->lag_in_frames > 0;
-}
-
 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
   return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
 }
@@ -534,6 +531,13 @@
                                           YV12_BUFFER_CONFIG *scaled);
 
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
+
+static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
+  return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
+         (cpi->oxcf.play_alternate &&
+          (!(cpi->use_svc && cpi->svc.number_temporal_layers == 1) ||
+           cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
+}
 
 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
                                 MV_REFERENCE_FRAME ref0,
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -486,6 +486,16 @@
     const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
     twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
 
+    if (cpi->common.current_video_frame == 0) {
+      cpi->ref_frame_flags = 0;
+    } else {
+      LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+      if (lc->current_video_frame_in_layer == 0)
+        cpi->ref_frame_flags = VP9_GOLD_FLAG;
+      else
+        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+    }
+
     vp9_scale_references(cpi);
 
     // Use either last frame or alt frame for motion search.
@@ -492,19 +502,18 @@
     if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
       scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
       ref_frame = LAST_FRAME;
-    } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
-      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME);
-      ref_frame = ALTREF_FRAME;
+    } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
+      ref_frame = GOLDEN_FRAME;
     }
 
-    if (scaled_ref_buf != NULL) {
-      // Update the stride since we are using scaled reference buffer
+    if (scaled_ref_buf != NULL)
       first_ref_buf = scaled_ref_buf;
-      recon_y_stride = first_ref_buf->y_stride;
-      recon_uv_stride = first_ref_buf->uv_stride;
-      uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height);
-    }
 
+    recon_y_stride = new_yv12->y_stride;
+    recon_uv_stride = new_yv12->uv_stride;
+    uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
+
     // Disable golden frame for svc first pass for now.
     gld_yv12 = NULL;
     set_ref_ptrs(cm, xd, ref_frame, NONE);
@@ -909,6 +918,8 @@
   }
 
   ++cm->current_video_frame;
+  if (cpi->use_svc)
+    vp9_inc_frame_in_layer(&cpi->svc);
 }
 
 static double calc_correction_factor(double err_per_mb,
@@ -1506,7 +1517,7 @@
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   double mv_ratio_accumulator_thresh;
-  unsigned int allow_alt_ref = is_altref_enabled(oxcf);
+  unsigned int allow_alt_ref = is_altref_enabled(cpi);
 
   int f_boost = 0;
   int b_boost = 0;
@@ -2080,6 +2091,11 @@
     default:
       assert(0);
   }
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    cpi->refresh_golden_frame = 0;
+    if (cpi->alt_ref_source == NULL)
+      cpi->refresh_alt_ref_frame = 0;
+  }
 }
 
 
@@ -2122,6 +2138,18 @@
 #endif
     vp9_rc_set_frame_target(cpi, target_rate);
     cm->frame_type = INTER_FRAME;
+
+    if (is_spatial_svc) {
+      if (cpi->svc.spatial_layer_id == 0) {
+        lc->is_key_frame = 0;
+      } else {
+        lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+
+        if (lc->is_key_frame)
+          cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
+      }
+    }
+
     return;
   }
 
@@ -2189,7 +2217,8 @@
     }
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    cpi->refresh_golden_frame = 1;
+    if (!is_spatial_svc)
+      cpi->refresh_golden_frame = 1;
   }
 
   {
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1097,7 +1097,7 @@
 
   rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
 
-  if (is_altref_enabled(oxcf) && cpi->refresh_alt_ref_frame &&
+  if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
       (cm->frame_type != KEY_FRAME))
     // Update the alternate reference frame stats as appropriate.
     update_alt_ref_frame_stats(cpi);
@@ -1349,8 +1349,9 @@
   return target_index - qindex;
 }
 
-void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf,
+void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi,
                                 RATE_CONTROL *const rc) {
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   // Set Maximum gf/arf interval
   rc->max_gf_interval = 16;
 
@@ -1359,7 +1360,7 @@
   if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2))
     rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
 
-  if (is_altref_enabled(oxcf)) {
+  if (is_altref_enabled(cpi)) {
     if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
       rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
   }
@@ -1392,5 +1393,5 @@
   rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
                                     vbr_max_bits);
 
-  vp9_rc_set_gf_max_interval(oxcf, rc);
+  vp9_rc_set_gf_max_interval(cpi, rc);
 }
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -189,7 +189,7 @@
 
 void vp9_rc_update_framerate(struct VP9_COMP *cpi);
 
-void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf,
+void vp9_rc_set_gf_max_interval(const struct VP9_COMP *const cpi,
                                 RATE_CONTROL *const rc);
 
 #ifdef __cplusplus
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -19,6 +19,7 @@
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   int layer;
   int layer_end;
+  int alt_ref_idx = svc->number_spatial_layers;
 
   svc->spatial_layer_id = 0;
   svc->temporal_layer_id = 0;
@@ -34,7 +35,6 @@
     RATE_CONTROL *const lrc = &lc->rc;
     int i;
     lc->current_video_frame_in_layer = 0;
-    lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
     lrc->ni_av_qi = oxcf->worst_allowed_q;
     lrc->total_actual_bits = 0;
     lrc->total_target_vs_actual = 0;
@@ -48,14 +48,24 @@
     for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
       lrc->rate_correction_factors[i] = 1.0;
     }
+    lc->layer_size = 0;
 
     if (svc->number_temporal_layers > 1) {
       lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
       lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+      lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
     } else {
       lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
       lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
       lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
+      lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
+                                          oxcf->best_allowed_q) / 2;
+      lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+                                            oxcf->best_allowed_q) / 2;
+      if (oxcf->ss_play_alternate[layer])
+        lc->alt_ref_idx = alt_ref_idx++;
+      else
+        lc->alt_ref_idx = -1;
     }
 
     lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms),
@@ -153,7 +163,7 @@
                                    oxcf->two_pass_vbrmin_section / 100);
   lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth *
                                    oxcf->two_pass_vbrmax_section) / 100);
-  vp9_rc_set_gf_max_interval(oxcf, lrc);
+  vp9_rc_set_gf_max_interval(cpi, lrc);
 }
 
 void vp9_restore_layer_context(VP9_COMP *const cpi) {
@@ -164,6 +174,7 @@
   cpi->rc = lc->rc;
   cpi->twopass = lc->twopass;
   cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+  cpi->alt_ref_source = lc->alt_ref_source;
   // Reset the frames_since_key and frames_to_key counters to their values
   // before the layer restore. Keep these defined for the stream (not layer).
   if (cpi->svc.number_temporal_layers > 1) {
@@ -179,6 +190,7 @@
   lc->rc = cpi->rc;
   lc->twopass = cpi->twopass;
   lc->target_bandwidth = (int)oxcf->target_bandwidth;
+  lc->alt_ref_source = cpi->alt_ref_source;
 }
 
 void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
@@ -239,7 +251,7 @@
 static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) {
   int layer_id;
   vpx_svc_parameters_t *layer_param;
-  vpx_enc_frame_flags_t flags;
+  LAYER_CONTEXT *lc;
 
   // Find the next layer to be encoded
   for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) {
@@ -251,13 +263,47 @@
     return 1;
 
   layer_param = &buf->svc_params[layer_id];
-  buf->flags = flags = layer_param->flags;
   cpi->svc.spatial_layer_id = layer_param->spatial_layer;
   cpi->svc.temporal_layer_id = layer_param->temporal_layer;
-  cpi->lst_fb_idx = layer_param->lst_fb_idx;
-  cpi->gld_fb_idx = layer_param->gld_fb_idx;
-  cpi->alt_fb_idx = layer_param->alt_fb_idx;
 
+  cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
+
+  if (cpi->svc.spatial_layer_id < 1)
+    cpi->gld_fb_idx = cpi->lst_fb_idx;
+  else
+    cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1;
+
+  lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+
+  if (lc->current_video_frame_in_layer == 0) {
+    if (cpi->svc.spatial_layer_id >= 2)
+      cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
+    else
+      cpi->alt_fb_idx = cpi->lst_fb_idx;
+  } else {
+    if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]) {
+      cpi->alt_fb_idx = lc->alt_ref_idx;
+      if (!lc->has_alt_frame)
+        cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
+    } else {
+      // Find a proper alt_fb_idx for layers that don't have alt ref frame
+      if (cpi->svc.spatial_layer_id == 0) {
+        cpi->alt_fb_idx = cpi->lst_fb_idx;
+      } else {
+        LAYER_CONTEXT *lc_lower =
+            &cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1];
+
+        if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id - 1] &&
+            lc_lower->alt_ref_source != NULL)
+          cpi->alt_fb_idx = lc_lower->alt_ref_idx;
+        else if (cpi->svc.spatial_layer_id >= 2)
+          cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
+        else
+          cpi->alt_fb_idx = cpi->lst_fb_idx;
+      }
+    }
+  }
+
   if (vp9_set_size_literal(cpi, layer_param->width, layer_param->height) != 0)
     return VPX_CODEC_INVALID_PARAM;
 
@@ -270,9 +316,7 @@
 
   vp9_set_high_precision_mv(cpi, 1);
 
-  // Retrieve the encoding flags for each layer and apply it to encoder.
-  // It includes reference frame flags and update frame flags.
-  vp9_apply_encoding_flags(cpi, flags);
+  cpi->alt_ref_source = get_layer_context(&cpi->svc)->alt_ref_source;
 
   return 0;
 }
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -29,6 +29,10 @@
   unsigned int current_video_frame_in_layer;
   int is_key_frame;
   vpx_svc_parameters_t svc_params_received;
+  struct lookahead_entry  *alt_ref_source;
+  int alt_ref_idx;
+  int has_alt_frame;
+  size_t layer_size;
 } LAYER_CONTEXT;
 
 typedef struct {
@@ -36,6 +40,11 @@
   int temporal_layer_id;
   int number_spatial_layers;
   int number_temporal_layers;
+
+  // Store scaled source frames to be used for temporal filter to generate
+  // a alt ref frame.
+  YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
+
   // Layer context used for rate control in one pass temporal CBR mode or
   // two pass spatial mode. Defined for temporal or spatial layers for now.
   // Does not support temporal combined with spatial RC.
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -432,12 +432,6 @@
   frames_to_blur_forward = ((frames_to_blur - 1) / 2);
   start_frame = distance + frames_to_blur_forward;
 
-  // Setup scaling factors. Scaling on each of the arnr frames not supported.
-  vp9_setup_scale_factors_for_frame(&sf,
-      get_frame_new_buffer(cm)->y_crop_width,
-      get_frame_new_buffer(cm)->y_crop_height,
-      cm->width, cm->height);
-
   // Setup frame pointers, NULL indicates frame not included in filter.
   vp9_zero(cpi->frames);
   for (frame = 0; frame < frames_to_blur; ++frame) {
@@ -445,6 +439,41 @@
     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
                                                      which_buffer);
     cpi->frames[frames_to_blur - 1 - frame] = &buf->img;
+  }
+
+  // Setup scaling factors. Scaling on each of the arnr frames is not supported
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    // In spatial svc the scaling factors might be less then 1/2. So we will use
+    // non-normative scaling.
+    int frame_used = 0;
+    vp9_setup_scale_factors_for_frame(&sf,
+                                      get_frame_new_buffer(cm)->y_crop_width,
+                                      get_frame_new_buffer(cm)->y_crop_height,
+                                      get_frame_new_buffer(cm)->y_crop_width,
+                                      get_frame_new_buffer(cm)->y_crop_height);
+
+    for (frame = 0; frame < frames_to_blur; ++frame) {
+      if (cm->mi_cols * MI_SIZE != cpi->frames[frame]->y_width ||
+          cm->mi_rows * MI_SIZE != cpi->frames[frame]->y_height) {
+        if (vp9_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used],
+                                     cm->width, cm->height,
+                                     cm->subsampling_x, cm->subsampling_y,
+                                     VP9_ENC_BORDER_IN_PIXELS, NULL, NULL,
+                                     NULL))
+          vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                             "Failed to reallocate alt_ref_buffer");
+
+        cpi->frames[frame] =
+            vp9_scale_if_required(cm, cpi->frames[frame],
+                                  &cpi->svc.scaled_frames[frame_used]);
+        ++frame_used;
+      }
+    }
+  } else {
+    vp9_setup_scale_factors_for_frame(&sf,
+                                      get_frame_new_buffer(cm)->y_crop_width,
+                                      get_frame_new_buffer(cm)->y_crop_height,
+                                      cm->width, cm->height);
   }
 
   temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward,
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "./vpx_config.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "./vpx_version.h"
@@ -88,7 +89,7 @@
   size_t                  pending_frame_magnitude;
   vpx_image_t             preview_img;
   vp8_postproc_cfg_t      preview_ppcfg;
-  vpx_codec_pkt_list_decl(128) pkt_list;
+  vpx_codec_pkt_list_decl(256) pkt_list;
   unsigned int                 fixed_kf_cntr;
 };
 
@@ -174,6 +175,19 @@
   }
 
   RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
+
+#ifdef CONFIG_SPATIAL_SVC
+  if (cfg->ss_number_layers > 1) {
+    int i, alt_ref_sum = 0;
+    for (i = 0; i < cfg->ss_number_layers; ++i) {
+      if (cfg->ss_enable_auto_alt_ref[i])
+        ++alt_ref_sum;
+    }
+    if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
+      ERROR("Not enough ref buffers for svc alt ref frames");
+  }
+#endif
+
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
   if (cfg->ts_number_layers > 1) {
     unsigned int i;
@@ -382,8 +396,12 @@
 
   if (oxcf->ss_number_layers > 1) {
     int i;
-    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i)
+    for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
       oxcf->ss_target_bitrate[i] =  1000 * cfg->ss_target_bitrate[i];
+#ifdef CONFIG_SPATIAL_SVC
+      oxcf->ss_play_alternate[i] =  cfg->ss_enable_auto_alt_ref[i];
+#endif
+    }
   } else if (oxcf->ss_number_layers == 1) {
     oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
   }
@@ -864,6 +882,11 @@
         vpx_codec_cx_pkt_t pkt;
         VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
 
+#ifdef CONFIG_SPATIAL_SVC
+        if (cpi->use_svc && cpi->svc.number_temporal_layers == 1)
+          cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;
+#endif
+
         // Pack invisible frames with the next visible frame
         if (cpi->common.show_frame == 0
 #ifdef CONFIG_SPATIAL_SVC
@@ -936,6 +959,18 @@
         vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
         cx_data += size;
         cx_data_sz -= size;
+#ifdef CONFIG_SPATIAL_SVC
+        if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+          vpx_codec_cx_pkt_t pkt = {0};
+          int i;
+          pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
+          for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+            pkt.data.layer_sizes[i] = cpi->svc.layer_context[i].layer_size;
+            cpi->svc.layer_context[i].layer_size = 0;
+          }
+          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+        }
+#endif
       }
     }
   }
@@ -1245,6 +1280,9 @@
       9999,               // kf_max_dist
 
       VPX_SS_DEFAULT_LAYERS,  // ss_number_layers
+#ifdef CONFIG_SPATIAL_SVC
+      {0},
+#endif
       {0},                    // ss_target_bitrate
       1,                      // ts_number_layers
       {0},                    // ts_target_bitrate
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -21,10 +21,12 @@
 #include <string.h>
 #define VPX_DISABLE_CTRL_TYPECHECKS 1
 #define VPX_CODEC_DISABLE_COMPAT 1
+#include "./vpx_config.h"
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_onyxc_int.h"
 
 #ifdef __MINGW32__
 #define strtok_r strtok_s
@@ -65,6 +67,7 @@
   int scaling_factor_num[VPX_SS_MAX_LAYERS];
   int scaling_factor_den[VPX_SS_MAX_LAYERS];
   int quantizer[VPX_SS_MAX_LAYERS];
+  int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
 
   // accumulated statistics
   double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS];   // total/Y/U/V
@@ -80,7 +83,6 @@
   int encode_frame_count;
   int frame_received;
   int frame_within_gop;
-  vpx_enc_frame_flags_t enc_frame_flags;
   int layers;
   int layer;
   int is_keyframe;
@@ -238,6 +240,59 @@
   return res;
 }
 
+static vpx_codec_err_t parse_auto_alt_ref(SvcContext *svc_ctx,
+                                          const char *alt_ref_options) {
+  char *input_string;
+  char *token;
+  const char *delim = ",";
+  char *save_ptr;
+  int found = 0, enabled = 0;
+  int i, value;
+  vpx_codec_err_t res = VPX_CODEC_OK;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
+
+  if (alt_ref_options == NULL || strlen(alt_ref_options) == 0) {
+    return VPX_CODEC_INVALID_PARAM;
+  } else {
+    input_string = strdup(alt_ref_options);
+  }
+
+  token = strtok_r(input_string, delim, &save_ptr);
+  for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+    if (token != NULL) {
+      value = atoi(token);
+      if (value < 0 || value > 1) {
+        svc_log(svc_ctx, SVC_LOG_ERROR,
+                "enable auto alt ref values: invalid value %s\n", token);
+        res = VPX_CODEC_INVALID_PARAM;
+        break;
+      }
+      token = strtok_r(NULL, delim, &save_ptr);
+      found = i + 1;
+    } else {
+      value = 0;
+    }
+    si->enable_auto_alt_ref[i] = value;
+    if (value > 0)
+      ++enabled;
+  }
+  if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
+    svc_log(svc_ctx, SVC_LOG_ERROR,
+            "svc: quantizers: %d values required, but only %d specified\n",
+            svc_ctx->spatial_layers, found);
+    res = VPX_CODEC_INVALID_PARAM;
+  }
+  if (enabled > REF_FRAMES - svc_ctx->spatial_layers) {
+    svc_log(svc_ctx, SVC_LOG_ERROR,
+            "svc: auto alt ref: Maxinum %d(REF_FRAMES - layers) layers could"
+            "enabled auto alt reference frame, but % layers are enabled\n",
+            REF_FRAMES - svc_ctx->spatial_layers, enabled);
+    res = VPX_CODEC_INVALID_PARAM;
+  }
+  free(input_string);
+  return res;
+}
+
 static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) {
   svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n",
           value);
@@ -335,6 +390,9 @@
     } else if (strcmp("quantizers", option_name) == 0) {
       res = parse_quantizer_values(svc_ctx, option_value);
       if (res != VPX_CODEC_OK) break;
+    } else if (strcmp("auto-alt-refs", option_name) == 0) {
+      res = parse_auto_alt_ref(svc_ctx, option_value);
+      if (res != VPX_CODEC_OK) break;
     } else {
       svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
       res = VPX_CODEC_INVALID_PARAM;
@@ -382,6 +440,7 @@
                              vpx_codec_iface_t *iface,
                              vpx_codec_enc_cfg_t *enc_cfg) {
   vpx_codec_err_t res;
+  int i;
   SvcInternal *const si = get_svc_internal(svc_ctx);
   if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
       enc_cfg == NULL) {
@@ -428,7 +487,6 @@
   // TODO(Minghai): Optimize the mechanism of allocating bits after
   // implementing svc two pass rate control.
   if (si->layers > 1) {
-    int i;
     float total = 0;
     float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
 
@@ -452,6 +510,9 @@
     }
   }
 
+  for (i = 0; i < si->layers; ++i)
+    enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i];
+
   // modify encoder configuration
   enc_cfg->ss_number_layers = si->layers;
   enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
@@ -482,106 +543,10 @@
 
   vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
   vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1);
-  vpx_codec_control(codec_ctx, VP8E_SET_ENABLEAUTOALTREF, 0);
 
   return VPX_CODEC_OK;
 }
 
-static void accumulate_frame_size_for_each_layer(SvcInternal *const si,
-                                                 const uint8_t *const buf,
-                                                 const size_t size) {
-  uint8_t marker = buf[size - 1];
-  if ((marker & 0xe0) == 0xc0) {
-    const uint32_t frames = (marker & 0x7) + 1;
-    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
-    const size_t index_sz = 2 + mag * frames;
-
-    uint8_t marker2 = buf[size - index_sz];
-
-    if (size >= index_sz && marker2 == marker) {
-      // found a valid superframe index
-      uint32_t i, j;
-      const uint8_t *x = &buf[size - index_sz + 1];
-
-      // frames has a maximum of 8 and mag has a maximum of 4.
-      for (i = 0; i < frames; i++) {
-        uint32_t this_sz = 0;
-
-        for (j = 0; j < mag; j++)
-          this_sz |= (*x++) << (j * 8);
-        si->bytes_sum[i] += this_sz;
-      }
-    }
-  }
-}
-
-// SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h
-
-// encoder should reference the last frame
-#define USE_LAST (1 << 0)
-
-// encoder should reference the alt ref frame
-#define USE_ARF (1 << 1)
-
-// encoder should reference the golden frame
-#define USE_GF (1 << 2)
-
-// encoder should copy current frame to the last frame buffer
-#define UPDATE_LAST (1 << 3)
-
-// encoder should copy current frame to the alt ref frame buffer
-#define UPDATE_ARF (1 << 4)
-
-// encoder should copy current frame to the golden frame
-#define UPDATE_GF (1 << 5)
-
-static int map_vp8_flags(int svc_flags) {
-  int flags = 0;
-
-  if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST;
-  if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF;
-  if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF;
-
-  if (svc_flags & UPDATE_LAST) {
-    // last is updated automatically
-  } else {
-    flags |= VP8_EFLAG_NO_UPD_LAST;
-  }
-  if (svc_flags & UPDATE_ARF) {
-    flags |= VP8_EFLAG_FORCE_ARF;
-  } else {
-    flags |= VP8_EFLAG_NO_UPD_ARF;
-  }
-  if (svc_flags & UPDATE_GF) {
-    flags |= VP8_EFLAG_FORCE_GF;
-  } else {
-    flags |= VP8_EFLAG_NO_UPD_GF;
-  }
-  return flags;
-}
-
-static void calculate_enc_frame_flags(SvcContext *svc_ctx) {
-  vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF;
-  SvcInternal *const si = get_svc_internal(svc_ctx);
-  const int is_keyframe = (si->frame_within_gop == 0);
-
-  // keyframe layer zero is identical for all modes
-  if (is_keyframe && si->layer == 0) {
-    si->enc_frame_flags = VPX_EFLAG_FORCE_KF;
-    return;
-  }
-
-  if (si->layer == 0) {
-    flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
-  } else if (is_keyframe) {
-    flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
-  } else {
-    flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
-  }
-
-  si->enc_frame_flags = flags;
-}
-
 vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
                                              int layer,
                                              unsigned int *width,
@@ -621,7 +586,6 @@
   memset(&svc_params, 0, sizeof(svc_params));
   svc_params.temporal_layer = 0;
   svc_params.spatial_layer = si->layer;
-  svc_params.flags = si->enc_frame_flags;
 
   layer = si->layer;
   if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer,
@@ -640,33 +604,6 @@
   }
 
   svc_params.distance_from_i_frame = si->frame_within_gop;
-
-  // Use buffer i for layer i LST
-  svc_params.lst_fb_idx = si->layer;
-
-  // Use buffer i-1 for layer i Alt (Inter-layer prediction)
-  svc_params.alt_fb_idx = (si->layer > 0) ? si->layer - 1 : 0;
-  svc_params.gld_fb_idx = svc_params.lst_fb_idx;
-
-  svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n",
-          si->encode_frame_count, si->layer, svc_params.width,
-          svc_params.height, svc_params.min_quantizer);
-
-  if (svc_params.flags == VPX_EFLAG_FORCE_KF) {
-    svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n");
-  } else {
-    svc_log(
-        svc_ctx, SVC_LOG_DEBUG, "Using:    LST/GLD/ALT [%2d|%2d|%2d]\n",
-        svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx,
-        svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx,
-        svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx);
-    svc_log(
-        svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n",
-        svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx,
-        svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx,
-        svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx);
-  }
-
   vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params);
 }
 
@@ -705,7 +642,6 @@
   if (rawimg != NULL) {
     // encode each layer
     for (si->layer = 0; si->layer < si->layers; ++si->layer) {
-      calculate_enc_frame_flags(svc_ctx);
       set_svc_parameters(svc_ctx, codec_ctx);
     }
   }
@@ -723,8 +659,6 @@
         fd_list_add(&si->frame_list, fd_create(cx_pkt->data.frame.buf,
                                                cx_pkt->data.frame.sz,
                                                cx_pkt->data.frame.flags));
-        accumulate_frame_size_for_each_layer(si, cx_pkt->data.frame.buf,
-                                             cx_pkt->data.frame.sz);
 
         svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, "
                 "pts: %d\n", si->frame_received,
@@ -773,6 +707,12 @@
         memcpy(si->rc_stats_buf + si->rc_stats_buf_used,
                cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz);
         si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
+        break;
+      }
+      case VPX_CODEC_SPATIAL_SVC_LAYER_SIZES: {
+        int i;
+        for (i = 0; i < si->layers; ++i)
+          si->bytes_sum[i] += cx_pkt->data.layer_sizes[i];
         break;
       }
       default: {
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -298,7 +298,6 @@
   unsigned int height;        /**< height of current spatial layer */
   int spatial_layer;          /**< current spatial layer number - 0 = base */
   int temporal_layer;         /**< current temporal layer number - 0 = base */
-  int flags;                  /**< encode frame flags */
   int max_quantizer;          /**< max quantizer for current layer */
   int min_quantizer;          /**< min quantizer for current layer */
   int distance_from_i_frame;  /**< frame number within current gop */
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -156,6 +156,9 @@
     VPX_CODEC_CX_FRAME_PKT,    /**< Compressed video frame */
     VPX_CODEC_STATS_PKT,       /**< Two-pass statistics for this frame */
     VPX_CODEC_PSNR_PKT,        /**< PSNR statistics for this frame */
+#ifdef CONFIG_SPATIAL_SVC
+    VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/
+#endif
     VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions  */
   };
 
@@ -191,6 +194,9 @@
         double       psnr[4];     /**< PSNR, total/y/u/v */
       } psnr;                       /**< data for PSNR packet */
       struct vpx_fixed_buf raw;     /**< data for arbitrary packets */
+#ifdef CONFIG_SPATIAL_SVC
+      size_t layer_sizes[VPX_SS_MAX_LAYERS];
+#endif
 
       /* This packet size is fixed to allow codecs to extend this
        * interface without having to manage storage for raw packets,
@@ -622,6 +628,15 @@
      * This value specifies the number of spatial coding layers to be used.
      */
     unsigned int           ss_number_layers;
+
+#ifdef CONFIG_SPATIAL_SVC
+    /*!\brief Enable auto alt reference flags for each spatial layer.
+     *
+     * These values specify if auto alt reference frame is enabled for each
+     * spatial layer.
+     */
+    int                    ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
+#endif
 
     /*!\brief Target bitrate for each spatial layer.
      *