shithub: libvpx

--- a/examples/vp9_spatial_svc_encoder.c

+++ b/examples/vp9_spatial_svc_encoder.c

@@ -30,6 +30,7 @@

 #include "vpx/vp8cx.h"

 #include "vpx/vpx_encoder.h"

 #include "../vpxstats.h"

+#include "vp9/encoder/vp9_encoder.h"

 #define OUTPUT_RC_STATS 1

 static const arg_def_t skip_frames_arg =

@@ -749,6 +750,7 @@

     cx_time += vpx_usec_timer_elapsed(&timer);

     printf("%s", vpx_svc_get_message(&svc_ctx));

+    fflush(stdout);

     if (res != VPX_CODEC_OK) {

       die_codec(&codec, "Failed to encode frame");

@@ -756,6 +758,7 @@

     while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {

       switch (cx_pkt->kind) {

         case VPX_CODEC_CX_FRAME_PKT: {

+          SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;

           if (cx_pkt->data.frame.sz > 0) {

 #if OUTPUT_RC_STATS

             uint32_t sizes[8];

@@ -851,6 +854,8 @@

           printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,

                  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),

                  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);

+          if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)

+            si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;

           ++frames_received;

           break;

--- a/examples/vpx_temporal_svc_encoder.c

+++ b/examples/vpx_temporal_svc_encoder.c

@@ -41,7 +41,7 @@

   kDenoiserOnAdaptive

};

-static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};

+static int mode_to_num_layers[13] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3};

 // For rate control encoding stats.

 struct RateControlMetrics {

@@ -432,7 +432,32 @@

       layer_flags[7] = layer_flags[3];

       break;

-    case 11:

+    case 11: {

+      // 3-layers structure with one reference frame.

+      // This works same as temporal_layering_mode 3.

+      // This was added to compare with vp9_spatial_svc_encoder.

+      // 3-layers, 4-frame period.

+      int ids[4] = {0, 2, 1, 2};

+      cfg->ts_periodicity = 4;

+      *flag_periodicity = 4;

+      cfg->ts_number_layers = 3;

+      cfg->ts_rate_decimator[0] = 4;

+      cfg->ts_rate_decimator[1] = 2;

+      cfg->ts_rate_decimator[2] = 1;

+      memcpy(cfg->ts_layer_id, ids, sizeof(ids));

+      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.

+      layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |

+          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;

+      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |

+          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;

+      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |

+          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;

+      layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |

+          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;

+      break;

+    }

+    case 12:

     default: {

       // 3-layers structure as in case 10, but no sync/refresh points for

       // layer 1 and 2.

@@ -530,7 +555,7 @@

   layering_mode = strtol(argv[10], NULL, 0);

-  if (layering_mode < 0 || layering_mode > 12) {

+  if (layering_mode < 0 || layering_mode > 13) {

     die("Invalid layering mode (0..12) %s", argv[10]);

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -1754,7 +1754,9 @@

-  if (cm->use_prev_frame_mvs) {

+  if (cm->use_prev_frame_mvs ||

+      (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1

+        && cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {

     MV_REF *const frame_mvs =

         cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;

     int w, h;

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -3318,6 +3318,11 @@

       cpi->oxcf.content == VP9E_CONTENT_SCREEN)

     vp9_avg_source_sad(cpi);

+  // TODO(wonkap/marpan): For 1 pass SVC, since only ZERMOV is allowed for

+  // upsampled reference frame (i.e, svc->force_zero_mode_spatial_ref = 0),

+  // we should be able to avoid this frame-level upsampling.

+  // Keeping it for now as there is an asan error in the multi-threaded SVC

+  // rate control test if this upsampling is removed.

   if (frame_is_intra_only(cm) == 0) {

     vp9_scale_references(cpi);

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -40,12 +40,13 @@

   int in_use;

 } PRED_BUFFER;

-static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,

+static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,

+                      const MACROBLOCK *x,

                       const MACROBLOCKD *xd,

                       const TileInfo *const tile,

                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,

-                      int_mv *mv_ref_list,

-                      int mi_row, int mi_col) {

+                      int_mv *mv_ref_list, int_mv *base_mv,

+                      int mi_row, int mi_col, int use_base_mv) {

   const int *ref_sign_bias = cm->ref_frame_sign_bias;

   int i, refmv_count = 0;

@@ -109,6 +110,20 @@

+  if (use_base_mv &&

+      !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&

+      ref_frame == LAST_FRAME) {

+    // Get base layer mv.

+    MV_REF *candidate =

+        &cm->prev_frame->mvs[(mi_col>>1) + (mi_row>>1) * (cm->mi_cols>>1)];

+    if (candidate->mv[0].as_int != INVALID_MV) {

+        base_mv->as_mv.row = (candidate->mv[0].as_mv.row << 1);

+        base_mv->as_mv.col = (candidate->mv[0].as_mv.col << 1);

+      clamp_mv_ref(&base_mv->as_mv, xd);

+    } else {

+      base_mv->as_int = INVALID_MV;

+    }

+  }

  Done:

@@ -124,7 +139,7 @@

 static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,

                                   BLOCK_SIZE bsize, int mi_row, int mi_col,

                                   int_mv *tmp_mv, int *rate_mv,

-                                  int64_t best_rd_sofar) {

+                                  int64_t best_rd_sofar, int use_base_mv) {

   MACROBLOCKD *xd = &x->e_mbd;

   MODE_INFO *mi = xd->mi[0];

   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};

@@ -133,6 +148,7 @@

   MV mvp_full;

   const int ref = mi->ref_frame[0];

   const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;

+  MV center_mv;

   int dis;

   int rate_mode;

   const int tmp_col_min = x->mv_col_min;

@@ -163,9 +179,14 @@

   mvp_full.col >>= 3;

   mvp_full.row >>= 3;

+  if (!use_base_mv)

+    center_mv = ref_mv;

+  else

+    center_mv = tmp_mv->as_mv;

   vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,

                         cond_cost_list(cpi, cost_list),

-                        &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);

+                        &center_mv, &tmp_mv->as_mv, INT_MAX, 0);

   x->mv_col_min = tmp_col_min;

   x->mv_col_max = tmp_col_max;

@@ -1085,8 +1106,50 @@

       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;

+static INLINE void find_predictors(VP9_COMP *cpi, MACROBLOCK *x,

+                                 MV_REFERENCE_FRAME ref_frame,

+                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],

+                                 int const_motion[MAX_REF_FRAMES],

+                                 int *ref_frame_skip_mask,

+                                 const int flag_list[4],

+                                 TileDataEnc *tile_data,

+                                 int mi_row, int mi_col,

+                                 struct buf_2d yv12_mb[4][MAX_MB_PLANE],

+                                 BLOCK_SIZE bsize) {

+  VP9_COMMON *const cm = &cpi->common;

+  MACROBLOCKD *const xd = &x->e_mbd;

+  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);

+  TileInfo *const tile_info = &tile_data->tile_info;

 // TODO(jingning) placeholder for inter-frame non-RD mode decision.

+  x->pred_mv_sad[ref_frame] = INT_MAX;

+  frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;

+  frame_mv[ZEROMV][ref_frame].as_int = 0;

 // this needs various further optimizations. to be continued..

+  if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {

+    int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];

+    const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;

+    vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,

+                         sf, sf);

+    if (cm->use_prev_frame_mvs)

+      vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,

+                       candidates, mi_row, mi_col,

+                       x->mbmi_ext->mode_context);

+    else

+    const_motion[ref_frame] =

+        mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,

+            candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,

+            (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));

+    vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,

+                          &frame_mv[NEARESTMV][ref_frame],

+                          &frame_mv[NEARMV][ref_frame]);

+    if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8) {

+      vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,

+                  ref_frame, bsize);

+    }

+  } else {

+    *ref_frame_skip_mask |= (1 << ref_frame);

+  }

+}

 void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,

                          TileDataEnc *tile_data,

                          int mi_row, int mi_col, RD_COST *rd_cost,

@@ -1094,7 +1157,6 @@

   VP9_COMMON *const cm = &cpi->common;

   SPEED_FEATURES *const sf = &cpi->sf;

   const SVC *const svc = &cpi->svc;

-  TileInfo *const tile_info = &tile_data->tile_info;

   MACROBLOCKD *const xd = &x->e_mbd;

   MODE_INFO *const mi = xd->mi[0];

   struct macroblockd_plane *const pd = &xd->plane[0];

@@ -1113,7 +1175,7 @@

   unsigned int var_y = UINT_MAX;

   unsigned int sse_y = UINT_MAX;

   const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize);

-  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,

+  int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,

                                            intra_cost_penalty, 0);

   const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];

   const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];

@@ -1144,6 +1206,7 @@

   int best_early_term = 0;

   int ref_frame_cost[MAX_REF_FRAMES];

   int svc_force_zero_mode[3] = {0};

+  int perform_intra_pred = 1;

 #if CONFIG_VP9_TEMPORAL_DENOISING

   int64_t zero_last_cost_orig = INT64_MAX;

 #endif

@@ -1209,38 +1272,9 @@

   for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {

-    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);

-    x->pred_mv_sad[ref_frame] = INT_MAX;

-    frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;

-    frame_mv[ZEROMV][ref_frame].as_int = 0;

-    if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {

-      int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];

-      const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;

-      vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,

-                           sf, sf);

-      if (cm->use_prev_frame_mvs)

-        vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,

-                         candidates, mi_row, mi_col, x->mbmi_ext->mode_context);

-      else

-        const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info,

-                                             xd->mi[0],

-                                             ref_frame, candidates,

-                                             mi_row, mi_col);

-      vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,

-                            &frame_mv[NEARESTMV][ref_frame],

-                            &frame_mv[NEARMV][ref_frame]);

-      if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)

-        vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,

-                    ref_frame, bsize);

-    } else {

-      ref_frame_skip_mask |= (1 << ref_frame);

-    }

+    find_predictors(cpi, x, ref_frame, frame_mv, const_motion,

+                    &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col,

+                    yv12_mb, bsize);

   for (idx = 0; idx < RT_INTER_MODES; ++idx) {

@@ -1330,8 +1364,36 @@

           cond_cost_list(cpi, cost_list),

           x->nmvjointcost, x->mvcost, &dis,

           &x->pred_sse[ref_frame], NULL, 0, 0);

+      } else if (svc->use_base_mv && svc->spatial_layer_id) {

+        if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV &&

+            frame_mv[NEWMV][ref_frame].as_int != 0) {

+          const int pre_stride = xd->plane[0].pre[0].stride;

+          int base_mv_sad = INT_MAX;

+          const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +

+              (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +

+              (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);

+          base_mv_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,

+                                       x->plane[0].src.stride,

+                                       pre_buf, pre_stride);

+          // TODO(wonkap): make the decision to use base layer mv on RD;

+          // not just SAD.

+          if (base_mv_sad < x->pred_mv_sad[ref_frame]) {

+            // Base layer mv is good.

+            if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,

+                &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 1)) {

+                continue;

+            }

+          } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,

+            &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {

+            continue;

+          }

+        } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,

+          &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {

+          continue;

+        }

       } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,

-        &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) {

+        &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {

         continue;

@@ -1593,11 +1655,20 @@

   xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int;

   x->skip_txfm[0] = best_mode_skip_txfm;

+  // Perform intra prediction only if base layer is chosen as the reference.

+  if (cpi->svc.spatial_layer_id) {

+    perform_intra_pred =

+        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||

+        (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame

+            && svc_force_zero_mode[best_ref_frame]);

+    inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;

+  }

   // Perform intra prediction search, if the best SAD is above a certain

   // threshold.

-  if (best_rdc.rdcost == INT64_MAX ||

+  if (perform_intra_pred &&

+      ((best_rdc.rdcost == INT64_MAX ||

       (!x->skip && best_rdc.rdcost > inter_mode_thresh &&

-       bsize <= cpi->sf.max_intra_bsize)) {

+       bsize <= cpi->sf.max_intra_bsize)))) {

     struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };

     int i;

     TX_SIZE best_intra_tx_size = TX_SIZES;

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -33,6 +33,7 @@

   svc->first_spatial_layer_to_encode = 0;

   svc->rc_drop_superframe = 0;

   svc->force_zero_mode_spatial_ref = 0;

+  svc->use_base_mv = 0;

   svc->current_superframe = 0;

   for (i = 0; i < REF_FRAMES; ++i)

     svc->ref_frame_index[i] = -1;

@@ -416,7 +417,9 @@

       cpi->ref_frame_flags = VP9_LAST_FLAG;

     } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {

       // base layer is a key frame.

-      cpi->ref_frame_flags = VP9_GOLD_FLAG;

+      cpi->ref_frame_flags = VP9_LAST_FLAG;

+      cpi->ext_refresh_last_frame = 0;

+      cpi->ext_refresh_golden_frame = 1;

     } else {

       cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

@@ -431,8 +434,14 @@

   } else {

     if (frame_num_within_temporal_struct == 1) {

       // the first tl2 picture

-      if (!spatial_id) {

+      if (spatial_id == cpi->svc.number_spatial_layers - 1) {  // top layer

         cpi->ext_refresh_frame_flags_pending = 1;

+        if (!spatial_id)

+          cpi->ref_frame_flags = VP9_LAST_FLAG;

+        else

+          cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

+      } else if (!spatial_id) {

+        cpi->ext_refresh_frame_flags_pending = 1;

         cpi->ext_refresh_alt_ref_frame = 1;

         cpi->ref_frame_flags = VP9_LAST_FLAG;

       } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {

@@ -439,32 +448,38 @@

         cpi->ext_refresh_frame_flags_pending = 1;

         cpi->ext_refresh_alt_ref_frame = 1;

         cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

-      } else {  // Top layer

-        cpi->ext_refresh_frame_flags_pending = 0;

-        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

     } else {

       //  The second tl2 picture

-      if (!spatial_id) {

+      if (spatial_id == cpi->svc.number_spatial_layers - 1) {  // top layer

         cpi->ext_refresh_frame_flags_pending = 1;

+        if (!spatial_id)

         cpi->ref_frame_flags = VP9_LAST_FLAG;

-        cpi->ext_refresh_last_frame = 1;

-      } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {

+        else

+          cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

+      } else if (!spatial_id) {

         cpi->ext_refresh_frame_flags_pending = 1;

-        cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

-        cpi->ext_refresh_last_frame = 1;

+        cpi->ref_frame_flags = VP9_LAST_FLAG;

+        cpi->ext_refresh_alt_ref_frame = 1;

       } else {  // top layer

-        cpi->ext_refresh_frame_flags_pending = 0;

+        cpi->ext_refresh_frame_flags_pending = 1;

         cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

+        cpi->ext_refresh_alt_ref_frame = 1;

   if (temporal_id == 0) {

     cpi->lst_fb_idx = spatial_id;

-    if (spatial_id)

+    if (spatial_id) {

+      if (cpi->svc.layer_context[temporal_id].is_key_frame) {

+        cpi->lst_fb_idx = spatial_id - 1;

+        cpi->gld_fb_idx = spatial_id;

+      } else {

       cpi->gld_fb_idx = spatial_id - 1;

-    else

+      }

+    } else {

       cpi->gld_fb_idx = 0;

+    }

     cpi->alt_fb_idx = 0;

   } else if (temporal_id == 1) {

     cpi->lst_fb_idx = spatial_id;

@@ -477,7 +492,7 @@

   } else {

     cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;

     cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;

-    cpi->alt_fb_idx = 0;

+    cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;

@@ -499,7 +514,9 @@

       cpi->ref_frame_flags = VP9_LAST_FLAG;

     } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {

       // base layer is a key frame.

-      cpi->ref_frame_flags = VP9_GOLD_FLAG;

+      cpi->ref_frame_flags = VP9_LAST_FLAG;

+      cpi->ext_refresh_last_frame = 0;

+      cpi->ext_refresh_golden_frame = 1;

     } else {

       cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

@@ -515,10 +532,16 @@

   if (temporal_id == 0) {

     cpi->lst_fb_idx = spatial_id;

-    if (spatial_id)

+    if (spatial_id) {

+      if (cpi->svc.layer_context[temporal_id].is_key_frame) {

+        cpi->lst_fb_idx = spatial_id - 1;

+        cpi->gld_fb_idx = spatial_id;

+      } else {

       cpi->gld_fb_idx = spatial_id - 1;

-    else

+      }

+    } else {

       cpi->gld_fb_idx = 0;

+    }

     cpi->alt_fb_idx = 0;

   } else if (temporal_id == 1) {

     cpi->lst_fb_idx = spatial_id;

@@ -540,20 +563,30 @@

   if (!spatial_id) {

     cpi->ref_frame_flags = VP9_LAST_FLAG;

   } else if (cpi->svc.layer_context[0].is_key_frame) {

-    cpi->ref_frame_flags = VP9_GOLD_FLAG;

+    cpi->ref_frame_flags = VP9_LAST_FLAG;

+    cpi->ext_refresh_last_frame = 0;

+    cpi->ext_refresh_golden_frame = 1;

   } else {

     cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

   cpi->lst_fb_idx = spatial_id;

-  if (spatial_id)

+  if (spatial_id) {

+    if (cpi->svc.layer_context[0].is_key_frame) {

+      cpi->lst_fb_idx = spatial_id - 1;

+      cpi->gld_fb_idx = spatial_id;

+    } else {

     cpi->gld_fb_idx = spatial_id - 1;

-  else

+    }

+  } else {

     cpi->gld_fb_idx = 0;

+  }

 int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {

   int width = 0, height = 0;

   LAYER_CONTEXT *lc = NULL;

+  if (cpi->svc.number_spatial_layers > 1)

+    cpi->svc.use_base_mv = 1;

   cpi->svc.force_zero_mode_spatial_ref = 1;

   if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -86,6 +86,7 @@

   int ref_frame_index[REF_FRAMES];

   int force_zero_mode_spatial_ref;

   int current_superframe;

+  int use_base_mv;

 } SVC;

 struct VP9_COMP;

--- a/vpx/src/svc_encodeframe.c

+++ b/vpx/src/svc_encodeframe.c

@@ -322,8 +322,7 @@

       for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {

         if (si->svc_params.scaling_factor_den[sl] > 0) {

-          alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] *

-              1.0 / si->svc_params.scaling_factor_den[sl]);

+          alloc_ratio[sl] = (float)( (sl+1) );

           total += alloc_ratio[sl];

@@ -334,9 +333,9 @@

                 alloc_ratio[sl] / total);

         if (svc_ctx->temporal_layering_mode == 3) {

           enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =

-              spatial_layer_target >> 1;

+              (spatial_layer_target*6)/10;  // 60%

           enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =

-              (spatial_layer_target >> 1) + (spatial_layer_target >> 2);

+              (spatial_layer_target*8)/10;  // 80%

           enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =

               spatial_layer_target;

         } else if (svc_ctx->temporal_layering_mode == 2 ||

@@ -398,11 +397,13 @@

   si->width = enc_cfg->g_w;

   si->height = enc_cfg->g_h;

-  if (enc_cfg->kf_max_dist < 2) {

+// wonkap: why is this necessary?

+  /*if (enc_cfg->kf_max_dist < 2) {

     svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n",

             enc_cfg->kf_max_dist);

     return VPX_CODEC_INVALID_PARAM;

-  }

+  }*/

   si->kf_dist = enc_cfg->kf_max_dist;

   if (svc_ctx->spatial_layers == 0)

@@ -577,6 +578,27 @@

 #endif

 #endif

+      case VPX_CODEC_PSNR_PKT:

+      {

+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)

+        int j;

+        svc_log(svc_ctx, SVC_LOG_DEBUG,

+                "frame: %d, layer: %d, PSNR(Total/Y/U/V): "

+                "%2.3f  %2.3f  %2.3f  %2.3f \n",

+                si->psnr_pkt_received, 0,

+                cx_pkt->data.layer_psnr[0].psnr[0],

+                cx_pkt->data.layer_psnr[0].psnr[1],

+                cx_pkt->data.layer_psnr[0].psnr[2],

+                cx_pkt->data.layer_psnr[0].psnr[3]);

+        for (j = 0; j < COMPONENTS; ++j) {

+          si->psnr_sum[0][j] +=

+              cx_pkt->data.layer_psnr[0].psnr[j];

+          si->sse_sum[0][j] += cx_pkt->data.layer_psnr[0].sse[j];

+        }

+#endif

+      }

+      ++si->psnr_pkt_received;

+      break;

       default: {

         break;