shithub: libvpx

--- a/examples/vp9_spatial_svc_encoder.c

+++ b/examples/vp9_spatial_svc_encoder.c

@@ -38,8 +38,10 @@

     ARG_DEF("t", "timebase", 1, "timebase (num/den)");

 static const arg_def_t bitrate_arg = ARG_DEF(

     "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");

-static const arg_def_t layers_arg =

-    ARG_DEF("l", "layers", 1, "number of SVC layers");

+static const arg_def_t spatial_layers_arg =

+    ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");

+static const arg_def_t temporal_layers_arg =

+    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");

 static const arg_def_t kf_dist_arg =

     ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");

 static const arg_def_t scale_factors_arg =

@@ -65,10 +67,11 @@

 static const arg_def_t *svc_args[] = {

   &frames_arg,        &width_arg,         &height_arg,

-  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,

+  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,

   &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,  &passes_arg,

   &pass_arg,          &fpf_name_arg,      &min_q_arg,       &max_q_arg,

-  &min_bitrate_arg,   &max_bitrate_arg,   NULL

+  &min_bitrate_arg,   &max_bitrate_arg,   &temporal_layers_arg,

+  NULL

};

 static const uint32_t default_frames_to_skip = 0;

@@ -79,6 +82,7 @@

 static const uint32_t default_timebase_den = 60;

 static const uint32_t default_bitrate = 1000;

 static const uint32_t default_spatial_layers = 5;

+static const uint32_t default_temporal_layers = 1;

 static const uint32_t default_kf_dist = 100;

 typedef struct {

@@ -119,6 +123,7 @@

   // initialize SvcContext with parameters that will be passed to vpx_svc_init

   svc_ctx->log_level = SVC_LOG_DEBUG;

   svc_ctx->spatial_layers = default_spatial_layers;

+  svc_ctx->temporal_layers = default_temporal_layers;

   // start with default encoder configuration

   res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);

@@ -156,8 +161,10 @@

       enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);

     } else if (arg_match(&arg, &skip_frames_arg, argi)) {

       app_input->frames_to_skip = arg_parse_uint(&arg);

-    } else if (arg_match(&arg, &layers_arg, argi)) {

+    } else if (arg_match(&arg, &spatial_layers_arg, argi)) {

       svc_ctx->spatial_layers = arg_parse_uint(&arg);

+    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {

+      svc_ctx->temporal_layers = arg_parse_uint(&arg);

     } else if (arg_match(&arg, &kf_dist_arg, argi)) {

       enc_cfg->kf_min_dist = arg_parse_uint(&arg);

       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;

--- a/test/svc_test.cc

+++ b/test/svc_test.cc

@@ -212,13 +212,13 @@

     EXPECT_EQ(received_frames, n);

-  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,

-                             const int num_super_frames,

-                             const int remained_layers,

-                             const bool is_multiple_frame_context) {

+  void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs,

+                                        const int num_super_frames,

+                                        const int remained_spatial_layers,

+                                        const bool is_multiple_frame_contexts) {

     ASSERT_TRUE(inputs != NULL);

     ASSERT_GT(num_super_frames, 0);

-    ASSERT_GT(remained_layers, 0);

+    ASSERT_GT(remained_spatial_layers, 0);

     for (int i = 0; i < num_super_frames; ++i) {

       uint32_t frame_sizes[8] = {0};

@@ -234,64 +234,110 @@

                                      NULL, NULL);

       ASSERT_EQ(VPX_CODEC_OK, res);

-      uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);

-      uint8_t *frame_start = frame_data;

-      for (frame = 0; frame < frame_count; ++frame) {

-        // Looking for a visible frame.

-        if (frame_data[0] & 0x02) {

-          ++frames_found;

-          if (frames_found == remained_layers)

-            break;

+      if (frame_count == 0) {

+        // There's no super frame but only a single frame.

+        ASSERT_EQ(1, remained_spatial_layers);

+        if (is_multiple_frame_contexts) {

+          // Make a new super frame.

+          uint8_t marker = 0xc1;

+          unsigned int mask;

+          int mag;

+          // Choose the magnitude.

+          for (mag = 0, mask = 0xff; mag < 4; ++mag) {

+            if (inputs[i].sz < mask)

+              break;

+            mask <<= 8;

+            mask |= 0xff;

+          }

+          marker |= mag << 3;

+          int index_sz = 2 + (mag + 1) * 2;

+          inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16);

+          ASSERT_TRUE(inputs[i].buf != NULL);

+          uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);

+          frame_data[0] &= ~2;      // Set the show_frame flag to 0.

+          frame_data += inputs[i].sz;

+          // Add an one byte frame with show_existing_frame.

+          *frame_data++ = 0x88;

+          // Write the super frame index.

+          *frame_data++ = marker;

+          frame_sizes[0] = inputs[i].sz;

+          frame_sizes[1] = 1;

+          for (int j = 0; j < 2; ++j) {

+            unsigned int this_sz = frame_sizes[j];

+            for (int k = 0; k <= mag; k++) {

+              *frame_data++ = this_sz & 0xff;

+              this_sz >>= 8;

+            }

+          }

+          *frame_data++ = marker;

+          inputs[i].sz += index_sz + 1;

+      } else {

+        // Found a super frame.

+        uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);

+        uint8_t *frame_start = frame_data;

+        for (frame = 0; frame < frame_count; ++frame) {

+          // Looking for a visible frame.

+          if (frame_data[0] & 0x02) {

+            ++frames_found;

+            if (frames_found == remained_spatial_layers)

+              break;

+          }

+          frame_data += frame_sizes[frame];

+        }

+        ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "

+            << "remained_spatial_layers: " << remained_spatial_layers

+            << "    super_frame: " << i

+            << "    is_multiple_frame_context: " << is_multiple_frame_contexts;

+        if (frame == frame_count - 1 && !is_multiple_frame_contexts)

+          continue;

         frame_data += frame_sizes[frame];

-      }

-      ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "

-          << "remaining_layers: " << remained_layers

-          << "    super_frame: " << i

-          << "    is_multiple_frame_context: " << is_multiple_frame_context;

-      if (frame == frame_count - 1 && !is_multiple_frame_context)

-        continue;

-      frame_data += frame_sizes[frame];

-      // We need to add one more frame for multiple frame context.

-      if (is_multiple_frame_context)

-        ++frame;

-      uint8_t marker =

-          static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];

-      const uint32_t mag = ((marker >> 3) & 0x3) + 1;

-      const size_t index_sz = 2 + mag * frame_count;

-      const size_t new_index_sz = 2 + mag * (frame + 1);

-      marker &= 0x0f8;

-      marker |= frame;

+        // We need to add one more frame for multiple frame contexts.

+        if (is_multiple_frame_contexts)

+          ++frame;

+        uint8_t marker =

+            static_cast<const uint8_t*>(inputs[i].buf)[inputs[i].sz - 1];

+        const uint32_t mag = ((marker >> 3) & 0x3) + 1;

+        const size_t index_sz = 2 + mag * frame_count;

+        const size_t new_index_sz = 2 + mag * (frame + 1);

+        marker &= 0x0f8;

+        marker |= frame;

-      // Copy existing frame sizes.

-      memmove(frame_data + (is_multiple_frame_context ? 2 : 1),

-              frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);

-      if (is_multiple_frame_context) {

-        // Add a one byte frame with flag show_existing frame.

-        *frame_data++ = 0x88 | (remained_layers - 1);

-      }

-      // New marker.

-      frame_data[0] = marker;

-      frame_data += (mag * (frame + 1) + 1);

+        // Copy existing frame sizes.

+        memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1),

+                frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);

+        if (is_multiple_frame_contexts) {

+          // Add a one byte frame with flag show_existing_frame.

+          *frame_data++ = 0x88 | (remained_spatial_layers - 1);

+        }

+        // New marker.

+        frame_data[0] = marker;

+        frame_data += (mag * (frame + 1) + 1);

-      if (is_multiple_frame_context) {

-        // Write the frame size for the one byte frame.

-        frame_data -= mag;

-        *frame_data++ = 1;

-        for (uint32_t j = 1; j < mag; ++j) {

-          *frame_data++ = 0;

+        if (is_multiple_frame_contexts) {

+          // Write the frame size for the one byte frame.

+          frame_data -= mag;

+          *frame_data++ = 1;

+          for (uint32_t j = 1; j < mag; ++j) {

+            *frame_data++ = 0;

+          }

-      }

-      *frame_data++ = marker;

-      inputs[i].sz = frame_data - frame_start;

+        *frame_data++ = marker;

+        inputs[i].sz = frame_data - frame_start;

-      if (is_multiple_frame_context) {

-        // Change the show frame flag to 0 for all frames.

-        for (int j = 0; j < frame; ++j) {

-          frame_start[0] &= ~2;

-          frame_start += frame_sizes[j];

+        if (is_multiple_frame_contexts) {

+          // Change the show frame flag to 0 for all frames.

+          for (int j = 0; j < frame; ++j) {

+            frame_start[0] &= ~2;

+            frame_start += frame_sizes[j];

+          }

@@ -359,7 +405,7 @@

 TEST_F(SvcTest, SetLayersOption) {

-  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=3");

+  vpx_codec_err_t res = vpx_svc_set_options(&svc_, "spatial-layers=3");

   EXPECT_EQ(VPX_CODEC_OK, res);

   InitializeEncoder();

   EXPECT_EQ(3, svc_.spatial_layers);

@@ -367,7 +413,7 @@

 TEST_F(SvcTest, SetMultipleOptions) {

   vpx_codec_err_t res =

-      vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3");

+      vpx_svc_set_options(&svc_, "spatial-layers=2 scale-factors=1/3,2/3");

   EXPECT_EQ(VPX_CODEC_OK, res);

   InitializeEncoder();

   EXPECT_EQ(2, svc_.spatial_layers);

@@ -529,7 +575,7 @@

   FreeBitstreamBuffers(&outputs[0], 20);

-TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) {

+TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) {

   // First pass encode

   std::string stats_buf;

   Pass1EncodeNFrames(10, 2, &stats_buf);

@@ -540,12 +586,12 @@

   vpx_fixed_buf outputs[10];

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

-  DropEnhancementLayers(&outputs[0], 10, 1, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

-TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) {

+TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) {

   // First pass encode

   std::string stats_buf;

   Pass1EncodeNFrames(10, 5, &stats_buf);

@@ -558,13 +604,13 @@

   Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 4, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 3, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 2, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 1, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

@@ -601,15 +647,15 @@

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);

   DecodeNFrames(&outputs[0], 20);

-  DropEnhancementLayers(&outputs[0], 20, 2, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false);

   DecodeNFrames(&outputs[0], 20);

-  DropEnhancementLayers(&outputs[0], 20, 1, false);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false);

   DecodeNFrames(&outputs[0], 20);

   FreeBitstreamBuffers(&outputs[0], 20);

-TEST_F(SvcTest, SetMultipleFrameContextOption) {

+TEST_F(SvcTest, SetMultipleFrameContextsOption) {

   svc_.spatial_layers = 5;

   vpx_codec_err_t res =

       vpx_svc_set_options(&svc_, "multi-frame-contexts=1");

@@ -622,7 +668,7 @@

   InitializeEncoder();

-TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) {

+TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) {

   // First pass encode

   std::string stats_buf;

   Pass1EncodeNFrames(10, 2, &stats_buf);

@@ -634,12 +680,13 @@

   vpx_fixed_buf outputs[10];

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

-  DropEnhancementLayers(&outputs[0], 10, 2, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

-TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) {

+TEST_F(SvcTest,

+       TwoPassEncode2SpatialLayersWithMultipleFrameContextsDecodeBaselayer) {

   // First pass encode

   std::string stats_buf;

   Pass1EncodeNFrames(10, 2, &stats_buf);

@@ -651,12 +698,12 @@

   vpx_fixed_buf outputs[10];

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

-  DropEnhancementLayers(&outputs[0], 10, 1, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

-TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) {

+TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) {

   // First pass encode

   std::string stats_buf;

   vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");

@@ -670,12 +717,13 @@

   vpx_fixed_buf outputs[10];

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

-  DropEnhancementLayers(&outputs[0], 10, 2, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

-TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) {

+TEST_F(SvcTest,

+       TwoPassEncode3SNRLayersWithMultipleFrameContextsDecode321Layer) {

   // First pass encode

   std::string stats_buf;

   vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");

@@ -697,7 +745,7 @@

     memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

     outputs_new[i].sz = outputs[i].sz;

-  DropEnhancementLayers(&outputs_new[0], 10, 3, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true);

   DecodeNFrames(&outputs_new[0], 10);

   for (int i = 0; i < 10; ++i) {

@@ -704,7 +752,7 @@

     memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

     outputs_new[i].sz = outputs[i].sz;

-  DropEnhancementLayers(&outputs_new[0], 10, 2, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true);

   DecodeNFrames(&outputs_new[0], 10);

   for (int i = 0; i < 10; ++i) {

@@ -711,11 +759,100 @@

     memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

     outputs_new[i].sz = outputs[i].sz;

-  DropEnhancementLayers(&outputs_new[0], 10, 1, true);

+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true);

   DecodeNFrames(&outputs_new[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs_new[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1");

+  svc_.temporal_layers = 2;

+  Pass1EncodeNFrames(10, 1, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  svc_.temporal_layers = 2;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);

+  DecodeNFrames(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1");

+  svc_.temporal_layers = 2;

+  Pass1EncodeNFrames(10, 1, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  svc_.temporal_layers = 2;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 "

+                      "multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

+  DecodeNFrames(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode2TemporalLayersDecodeBaseLayer) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1");

+  svc_.temporal_layers = 2;

+  Pass1EncodeNFrames(10, 1, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  svc_.temporal_layers = 2;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);

+  vpx_fixed_buf base_layer[5];

+  for (int i = 0; i < 5; ++i)

+    base_layer[i] = outputs[i * 2];

+  DecodeNFrames(&base_layer[0], 5);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest,

+       TwoPassEncode2TemporalLayersWithMultipleFrameContextsDecodeBaseLayer) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1");

+  svc_.temporal_layers = 2;

+  Pass1EncodeNFrames(10, 1, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  svc_.temporal_layers = 2;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 "

+                      "multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);

+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

+  vpx_fixed_buf base_layer[5];

+  for (int i = 0; i < 5; ++i)

+    base_layer[i] = outputs[i * 2];

+  DecodeNFrames(&base_layer[0], 5);

+  FreeBitstreamBuffers(&outputs[0], 10);

 }  // namespace

--- a/vp9/encoder/vp9_bitstream.c

+++ b/vp9/encoder/vp9_bitstream.c

@@ -999,8 +999,10 @@

     // Set "found" to 0 for temporal svc and for spatial svc key frame

     if (cpi->use_svc &&

-        (cpi->svc.number_spatial_layers == 1 ||

-         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame)) {

+        ((cpi->svc.number_temporal_layers > 1 &&

+         cpi->oxcf.rc_mode == VPX_CBR) ||

+        (cpi->svc.number_spatial_layers > 1 &&

+         cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {

       found = 0;

     vp9_wb_write_bit(wb, found);

@@ -1093,7 +1095,7 @@

     // show_existing_frame flag which tells the decoder which frame we want to

     // show.

     if (!cm->show_frame ||

-        (is_spatial_svc(cpi) && cm->error_resilient_mode == 0))

+        (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0))

       vp9_wb_write_bit(wb, cm->intra_only);

     if (!cm->error_resilient_mode)

--- a/vp9/encoder/vp9_bitstream.h

+++ b/vp9/encoder/vp9_bitstream.h

@@ -26,7 +26,7 @@

   return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&

          cpi->rc.is_src_frame_alt_ref &&

          (!cpi->use_svc ||      // Add spatial svc base layer case here

-          (is_spatial_svc(cpi) &&

+          (is_two_pass_svc(cpi) &&

            cpi->svc.spatial_layer_id == 0 &&

            cpi->svc.layer_context[0].gold_ref_idx >=0 &&

            cpi->oxcf.ss_play_alternate[0]));

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -128,7 +128,7 @@

   if (cm->frame_type == KEY_FRAME) {

-    if (!is_spatial_svc(cpi))

+    if (!is_two_pass_svc(cpi))

       cpi->refresh_golden_frame = 1;

     cpi->refresh_alt_ref_frame = 1;

     vp9_zero(cpi->interp_filter_selected);

@@ -525,7 +525,7 @@

   vp9_init_context_buffers(cm);

   init_macroblockd(cm, xd);

-  if (is_spatial_svc(cpi)) {

+  if (is_two_pass_svc(cpi)) {

     if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,

                                  cm->width, cm->height,

                                  cm->subsampling_x, cm->subsampling_y,

@@ -580,7 +580,9 @@

   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;

   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||

-      (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {

+      ((cpi->svc.number_temporal_layers > 1 ||

+        cpi->svc.number_spatial_layers > 1) &&

+       cpi->oxcf.pass == 2)) {

     vp9_init_layer_context(cpi);

@@ -672,7 +674,9 @@

   if ((cpi->svc.number_temporal_layers > 1 &&

       cpi->oxcf.rc_mode == VPX_CBR) ||

-      (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {

+      ((cpi->svc.number_temporal_layers > 1 ||

+        cpi->svc.number_spatial_layers > 1) &&

+       cpi->oxcf.pass == 2)) {

     vp9_update_layer_context_change_config(cpi,

                                            (int)cpi->oxcf.target_bandwidth);

@@ -923,7 +927,7 @@

     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);

     if (cpi->svc.number_spatial_layers > 1

-        && cpi->svc.number_temporal_layers == 1) {

+        || cpi->svc.number_temporal_layers > 1) {

       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;

       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0};

       int i;

@@ -1531,7 +1535,7 @@

     cpi->alt_fb_idx = cpi->gld_fb_idx;

     cpi->gld_fb_idx = tmp;

-    if (is_spatial_svc(cpi)) {

+    if (is_two_pass_svc(cpi)) {

       cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx;

       cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx;

@@ -1960,8 +1964,7 @@

   if (gold_is_last)

     flags &= ~VP9_GOLD_FLAG;

-  if (cpi->rc.frames_till_gf_update_due == INT_MAX &&

-      !is_spatial_svc(cpi))

+  if (cpi->rc.frames_till_gf_update_due == INT_MAX && !is_two_pass_svc(cpi))

     flags &= ~VP9_GOLD_FLAG;

   if (alt_is_last)

@@ -2008,7 +2011,7 @@

   // can be skipped for partition check, and the partition size is assigned

   // according to the variance

   const SVC *const svc = &cpi->svc;

-  const TWO_PASS *const twopass = is_spatial_svc(cpi) ?

+  const TWO_PASS *const twopass = is_two_pass_svc(cpi) ?

       &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;

   return (!frame_is_intra_only(&cpi->common) &&

@@ -2160,18 +2163,34 @@

       cm->reset_frame_context = 2;

-  if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {

-    cm->frame_context_idx = cpi->svc.spatial_layer_id;

+  if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {

+    cm->frame_context_idx =

+        cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +

+        cpi->svc.temporal_layer_id;

     // The probs will be updated based on the frame type of its previous

     // frame if frame_parallel_decoding_mode is 0. The type may vary for

     // the frame after a key frame in base layer since we may drop enhancement

     // layers. So set frame_parallel_decoding_mode to 1 in this case.

-    if (cpi->svc.spatial_layer_id == 0 &&

-        cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)

-      cm->frame_parallel_decoding_mode = 1;

-    else

-      cm->frame_parallel_decoding_mode = 0;

+    if (cpi->svc.number_temporal_layers == 1) {

+      if (cpi->svc.spatial_layer_id == 0 &&

+          cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)

+        cm->frame_parallel_decoding_mode = 1;

+      else

+        cm->frame_parallel_decoding_mode = 0;

+    } else if (cpi->svc.spatial_layer_id == 0) {

+      // Find the 2nd frame in temporal base layer and 1st frame in temporal

+      // enhancement layers from the key frame.

+      int i;

+      for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {

+        if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {

+          cm->frame_parallel_decoding_mode = 1;

+          break;

+        }

+      }

+      if (i == cpi->svc.number_temporal_layers)

+        cm->frame_parallel_decoding_mode = 0;

+    }

   // Configure experimental use of segmentation for enhanced coding of

@@ -2184,7 +2203,7 @@

   // Check if the current frame is skippable for the partition search in the

   // second pass according to the first pass stats

   if (oxcf->pass == 2 &&

-      (!cpi->use_svc || is_spatial_svc(cpi))) {

+      (!cpi->use_svc || is_two_pass_svc(cpi))) {

     cpi->skippable_frame = is_skippable_frame(cpi);

@@ -2330,7 +2349,7 @@

   // reset to normal state now that we are done.

   if (!cm->show_existing_frame) {

-    if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)

+    if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0)

       cm->last_show_frame = 0;

     else

       cm->last_show_frame = cm->show_frame;

@@ -2343,10 +2362,10 @@

     // update not a real frame

     ++cm->current_video_frame;

     if (cpi->use_svc)

-      vp9_inc_frame_in_layer(&cpi->svc);

+      vp9_inc_frame_in_layer(cpi);

-  if (is_spatial_svc(cpi))

+  if (is_two_pass_svc(cpi))

     cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =

         cm->frame_type;

@@ -2421,7 +2440,7 @@

   vpx_usec_timer_start(&timer);

 #if CONFIG_SPATIAL_SVC

-  if (is_spatial_svc(cpi))

+  if (is_two_pass_svc(cpi))

     res = vp9_svc_lookahead_push(cpi, cpi->lookahead, sd, time_stamp, end_time,

                                  frame_flags);

   else

@@ -2557,7 +2576,7 @@

   MV_REFERENCE_FRAME ref_frame;

   int arf_src_index;

-  if (is_spatial_svc(cpi) && oxcf->pass == 2) {

+  if (is_two_pass_svc(cpi) && oxcf->pass == 2) {

 #if CONFIG_SPATIAL_SVC

     vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1);

 #endif

@@ -2581,7 +2600,7 @@

     assert(arf_src_index <= rc->frames_to_key);

 #if CONFIG_SPATIAL_SVC

-    if (is_spatial_svc(cpi))

+    if (is_two_pass_svc(cpi))

       source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, arf_src_index, 0);

     else

 #endif

@@ -2590,7 +2609,7 @@

       cpi->alt_ref_source = source;

 #if CONFIG_SPATIAL_SVC

-      if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) {

+      if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) {

         int i;

         // Reference a hidden frame from a lower layer

         for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {

@@ -2625,7 +2644,7 @@

     // Get last frame source.

     if (cm->current_video_frame > 0) {

 #if CONFIG_SPATIAL_SVC

-      if (is_spatial_svc(cpi))

+      if (is_two_pass_svc(cpi))

         last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);

       else

 #endif

@@ -2636,7 +2655,7 @@

     // Read in the source frame.

 #if CONFIG_SPATIAL_SVC

-    if (is_spatial_svc(cpi))

+    if (is_two_pass_svc(cpi))

       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);

     else

 #endif

@@ -2750,13 +2769,13 @@

   if (oxcf->pass == 1 &&

-      (!cpi->use_svc || is_spatial_svc(cpi))) {

+      (!cpi->use_svc || is_two_pass_svc(cpi))) {

     const int lossless = is_lossless_requested(oxcf);

     cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;

     cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;

     vp9_first_pass(cpi, source);

   } else if (oxcf->pass == 2 &&

-      (!cpi->use_svc || is_spatial_svc(cpi))) {

+      (!cpi->use_svc || is_two_pass_svc(cpi))) {

     Pass2Encode(cpi, size, dest, frame_flags);

   } else if (cpi->use_svc) {

     SvcEncode(cpi, size, dest, frame_flags);

@@ -2779,8 +2798,10 @@

   // Save layer specific state.

   if ((cpi->svc.number_temporal_layers > 1 &&

-      oxcf->rc_mode == VPX_CBR) ||

-      (cpi->svc.number_spatial_layers > 1 && oxcf->pass == 2)) {

+       oxcf->rc_mode == VPX_CBR) ||

+      ((cpi->svc.number_temporal_layers > 1 ||

+        cpi->svc.number_spatial_layers > 1) &&

+       oxcf->pass == 2)) {

     vp9_save_layer_context(cpi);

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -495,16 +495,17 @@

 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);

-static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) {

+static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {

   return cpi->use_svc &&

-         cpi->svc.number_temporal_layers == 1 &&

-         cpi->svc.number_spatial_layers > 1;

+         (cpi->svc.number_temporal_layers > 1 ||

+          cpi->svc.number_spatial_layers > 1) &&

+         (cpi->oxcf.pass == 1 || cpi->oxcf.pass == 2);

 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {

   return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&

          (cpi->oxcf.play_alternate &&

-          (!is_spatial_svc(cpi) ||

+          (!is_two_pass_svc(cpi) ||

            cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));

--- a/vp9/encoder/vp9_firstpass.c

+++ b/vp9/encoder/vp9_firstpass.c

@@ -246,7 +246,7 @@

 void vp9_end_first_pass(VP9_COMP *cpi) {

-  if (is_spatial_svc(cpi)) {

+  if (is_two_pass_svc(cpi)) {

     int i;

     for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {

       output_stats(&cpi->svc.layer_context[i].twopass.total_stats,

@@ -422,8 +422,8 @@

   TWO_PASS *twopass = &cpi->twopass;

   const MV zero_mv = {0, 0};

   const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;

-  LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?

-        &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;

+  LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?

+        &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL;

 #if CONFIG_FP_MB_STATS

   if (cpi->use_fp_mb_stats) {

@@ -438,13 +438,13 @@

   if (lc != NULL) {

     MV_REFERENCE_FRAME ref_frame = LAST_FRAME;

-    const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;

     twopass = &lc->twopass;

     if (cpi->common.current_video_frame == 0) {

       cpi->ref_frame_flags = 0;

     } else {

-      if (lc->current_video_frame_in_layer == 0)

+    if (lc->current_video_frame_in_layer <

+        (unsigned int)cpi->svc.number_temporal_layers)

         cpi->ref_frame_flags = VP9_GOLD_FLAG;

       else

         cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;

@@ -454,16 +454,17 @@

     // Use either last frame or alt frame for motion search.

     if (cpi->ref_frame_flags & VP9_LAST_FLAG) {

-      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);

+      first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);

       ref_frame = LAST_FRAME;

+      if (first_ref_buf == NULL)

+        first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);

     } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {

-      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);

+      first_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);

       ref_frame = GOLDEN_FRAME;

+      if (first_ref_buf == NULL)

+        first_ref_buf = get_ref_frame_buffer(cpi, GOLDEN_FRAME);

-    if (scaled_ref_buf != NULL)

-      first_ref_buf = scaled_ref_buf;

     recon_y_stride = new_yv12->y_stride;

     recon_uv_stride = new_yv12->uv_stride;

     uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);

@@ -914,7 +915,7 @@

   ++cm->current_video_frame;

   if (cpi->use_svc)

-    vp9_inc_frame_in_layer(&cpi->svc);

+    vp9_inc_frame_in_layer(cpi);

 static double calc_correction_factor(double err_per_mb,

@@ -952,7 +953,7 @@

                                          BPER_MB_NORMBITS) / num_mbs;

     int q;

     int is_svc_upper_layer = 0;

-    if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0)

+    if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)

       is_svc_upper_layer = 1;

     // Try and pick a max Q that will be high enough to encode the

@@ -980,9 +981,9 @@

 void vp9_init_second_pass(VP9_COMP *cpi) {

   SVC *const svc = &cpi->svc;

   const VP9EncoderConfig *const oxcf = &cpi->oxcf;

-  const int is_spatial_svc = (svc->number_spatial_layers > 1) &&

-                             (svc->number_temporal_layers == 1);

-  TWO_PASS *const twopass = is_spatial_svc ?

+  const int is_two_pass_svc = (svc->number_spatial_layers > 1) ||

+                              (svc->number_temporal_layers > 1);

+  TWO_PASS *const twopass = is_two_pass_svc ?

       &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;

   double frame_rate;

   FIRSTPASS_STATS *stats;

@@ -1005,7 +1006,7 @@

   // It is calculated based on the actual durations of all frames from the

   // first pass.

-  if (is_spatial_svc) {

+  if (is_two_pass_svc) {

     vp9_update_spatial_layer_framerate(cpi, frame_rate);

     twopass->bits_left = (int64_t)(stats->duration *

         svc->layer_context[svc->spatial_layer_id].target_bandwidth /

@@ -1020,7 +1021,7 @@

   // scores used in the second pass. We have this minimum to make sure

   // that clips that are static but "low complexity" in the intra domain

   // are still boosted appropriately for KF/GF/ARF.

-  if (!is_spatial_svc) {

+  if (!is_two_pass_svc) {

     // We don't know the number of MBs for each layer at this point.

     // So we will do it later.

     twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;

@@ -1368,7 +1369,14 @@

   int mid_boost_bits = 0;

   int mid_frame_idx;

   unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];

+  int alt_frame_index = frame_index;

+  int has_temporal_layers = is_two_pass_svc(cpi) &&

+                            cpi->svc.number_temporal_layers > 1;

+  // Only encode alt reference frame in temporal base layer.

+  if (has_temporal_layers)

+    alt_frame_index = cpi->svc.number_temporal_layers;

   key_frame = cpi->common.frame_type == KEY_FRAME ||

               vp9_is_upper_layer_key_frame(cpi);

@@ -1403,16 +1411,24 @@

   // Store the bits to spend on the ARF if there is one.

   if (rc->source_alt_ref_pending) {

-    gf_group->update_type[frame_index] = ARF_UPDATE;

-    gf_group->rf_level[frame_index] = GF_ARF_STD;

-    gf_group->bit_allocation[frame_index] = gf_arf_bits;

-    gf_group->arf_src_offset[frame_index] =

-      (unsigned char)(rc->baseline_gf_interval - 1);

-    gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];

-    gf_group->arf_ref_idx[frame_index] =

+    gf_group->update_type[alt_frame_index] = ARF_UPDATE;

+    gf_group->rf_level[alt_frame_index] = GF_ARF_STD;

+    gf_group->bit_allocation[alt_frame_index] = gf_arf_bits;

+    if (has_temporal_layers)

+      gf_group->arf_src_offset[alt_frame_index] =

+          (unsigned char)(rc->baseline_gf_interval -

+                          cpi->svc.number_temporal_layers);

+    else

+      gf_group->arf_src_offset[alt_frame_index] =

+          (unsigned char)(rc->baseline_gf_interval - 1);

+    gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0];

+    gf_group->arf_ref_idx[alt_frame_index] =

       arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&

                          rc->source_alt_ref_active];

-    ++frame_index;

+    if (!has_temporal_layers)

+      ++frame_index;

     if (cpi->multi_arf_enabled) {

       // Set aside a slot for a level 1 arf.

@@ -1435,6 +1451,10 @@

     if (EOF == input_stats(twopass, &frame_stats))

       break;

+    if (has_temporal_layers && frame_index == alt_frame_index) {

+      ++frame_index;

+    }

     modified_err = calculate_modified_err(twopass, oxcf, &frame_stats);

     if (group_error > 0)

@@ -1656,6 +1676,21 @@

   else

     rc->baseline_gf_interval = i;

+  // Only encode alt reference frame in temporal base layer. So

+  // baseline_gf_interval should be multiple of a temporal layer group

+  // (typically the frame distance between two base layer frames)

+  if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {

+    int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;

+    int new_gf_interval = (rc->baseline_gf_interval + count) & (~count);

+    int j;

+    for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) {

+      if (EOF == input_stats(twopass, this_frame))

+        break;

+      gf_group_err += calculate_modified_err(twopass, oxcf, this_frame);

+    }

+    rc->baseline_gf_interval = new_gf_interval;

+  }

   rc->frames_till_gf_update_due = rc->baseline_gf_interval;

   // Should we use the alternate reference frame.

@@ -1928,6 +1963,18 @@

     rc->next_key_frame_forced = 0;

+  if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {

+    int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;

+    int new_frame_to_key = (rc->frames_to_key + count) & (~count);

+    int j;

+    for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) {

+      if (EOF == input_stats(twopass, this_frame))

+        break;

+      kf_group_err += calculate_modified_err(twopass, oxcf, this_frame);

+    }

+    rc->frames_to_key = new_frame_to_key;

+  }

   // Special case for the last key frame of the file.

   if (twopass->stats_in >= twopass->stats_in_end) {

     // Accumulate kf group error.

@@ -2086,7 +2133,7 @@

       assert(0);

       break;

-  if (is_spatial_svc(cpi)) {

+  if (is_two_pass_svc(cpi)) {

     if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0)

       cpi->refresh_golden_frame = 0;

     if (cpi->alt_ref_source == NULL)

@@ -2105,7 +2152,7 @@

   FIRSTPASS_STATS this_frame_copy;

   int target_rate;

-  LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?

+  LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?

         &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;

   if (lc != NULL) {

@@ -2188,9 +2235,11 @@

   if (lc != NULL) {

     if (cpi->svc.spatial_layer_id == 0) {

       lc->is_key_frame = (cm->frame_type == KEY_FRAME);

-      if (lc->is_key_frame)

+      if (lc->is_key_frame) {

         cpi->ref_frame_flags &=

             (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);

+        lc->frames_from_key_frame = 0;

+      }

     } else {

       cm->frame_type = INTER_FRAME;

       lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;

@@ -2197,6 +2246,7 @@

       if (lc->is_key_frame) {

         cpi->ref_frame_flags &= (~VP9_LAST_FLAG);

+        lc->frames_from_key_frame = 0;

--- a/vp9/encoder/vp9_ratectrl.c

+++ b/vp9/encoder/vp9_ratectrl.c

@@ -1235,7 +1235,7 @@

     cm->frame_type = KEY_FRAME;

     rc->source_alt_ref_active = 0;

-    if (is_spatial_svc(cpi)) {

+    if (is_two_pass_svc(cpi)) {

       cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;

       cpi->ref_frame_flags &=

           (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);

@@ -1247,7 +1247,7 @@

   } else {

     cm->frame_type = INTER_FRAME;

-    if (is_spatial_svc(cpi)) {

+    if (is_two_pass_svc(cpi)) {

       LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];

       if (cpi->svc.spatial_layer_id == 0) {

         lc->is_key_frame = 0;

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -19,12 +19,12 @@

   const VP9EncoderConfig *const oxcf = &cpi->oxcf;

   int layer;

   int layer_end;

-  int alt_ref_idx = svc->number_spatial_layers;

+  int alt_ref_idx = svc->number_spatial_layers * svc->number_temporal_layers;

   svc->spatial_layer_id = 0;

   svc->temporal_layer_id = 0;

-  if (svc->number_temporal_layers > 1) {

+  if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {

     layer_end = svc->number_temporal_layers;

   } else {

     layer_end = svc->number_spatial_layers;

@@ -36,6 +36,7 @@

     int i;

     lc->current_video_frame_in_layer = 0;

     lc->layer_size = 0;

+    lc->frames_from_key_frame = 0;

     lc->last_frame_type = FRAME_TYPES;

     lrc->ni_av_qi = oxcf->worst_allowed_q;

     lrc->total_actual_bits = 0;

@@ -51,7 +52,7 @@

       lrc->rate_correction_factors[i] = 1.0;

-    if (svc->number_temporal_layers > 1) {

+    if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {

       lc->target_bandwidth = oxcf->ts_target_bitrate[layer];

       lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;

       lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;

@@ -76,7 +77,8 @@

   // Still have extra buffer for base layer golden frame

-  if (svc->number_spatial_layers > 1 && alt_ref_idx < REF_FRAMES)

+  if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR)

+      && alt_ref_idx < REF_FRAMES)

     svc->layer_context[0].gold_ref_idx = alt_ref_idx;

@@ -90,7 +92,7 @@

   int layer_end;

   float bitrate_alloc = 1.0;

-  if (svc->number_temporal_layers > 1) {

+  if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {

     layer_end = svc->number_temporal_layers;

   } else {

     layer_end = svc->number_spatial_layers;

@@ -100,7 +102,7 @@

     LAYER_CONTEXT *const lc = &svc->layer_context[layer];

     RATE_CONTROL *const lrc = &lc->rc;

-    if (svc->number_temporal_layers > 1) {

+    if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {

       lc->target_bandwidth = oxcf->ts_target_bitrate[layer];

     } else {

       lc->target_bandwidth = oxcf->ss_target_bitrate[layer];

@@ -116,7 +118,7 @@

     lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);

     lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);

     // Update framerate-related quantities.

-    if (svc->number_temporal_layers > 1) {

+    if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {

       lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];

     } else {

       lc->framerate = cpi->framerate;

@@ -129,16 +131,16 @@

-static LAYER_CONTEXT *get_layer_context(SVC *svc) {

-  return svc->number_temporal_layers > 1 ?

-         &svc->layer_context[svc->temporal_layer_id] :

-         &svc->layer_context[svc->spatial_layer_id];

+static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {

+  return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?

+         &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :

+         &cpi->svc.layer_context[cpi->svc.spatial_layer_id];

 void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {

   SVC *const svc = &cpi->svc;

   const VP9EncoderConfig *const oxcf = &cpi->oxcf;

-  LAYER_CONTEXT *const lc = get_layer_context(svc);

+  LAYER_CONTEXT *const lc = get_layer_context(cpi);

   RATE_CONTROL *const lrc = &lc->rc;

   const int layer = svc->temporal_layer_id;

@@ -160,7 +162,7 @@

 void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {

   const VP9EncoderConfig *const oxcf = &cpi->oxcf;

-  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);

+  LAYER_CONTEXT *const lc = get_layer_context(cpi);

   RATE_CONTROL *const lrc = &lc->rc;

   lc->framerate = framerate;

@@ -173,7 +175,7 @@

 void vp9_restore_layer_context(VP9_COMP *const cpi) {

-  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);

+  LAYER_CONTEXT *const lc = get_layer_context(cpi);

   const int old_frame_since_key = cpi->rc.frames_since_key;

   const int old_frame_to_key = cpi->rc.frames_to_key;

@@ -191,7 +193,7 @@

 void vp9_save_layer_context(VP9_COMP *const cpi) {

   const VP9EncoderConfig *const oxcf = &cpi->oxcf;

-  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);

+  LAYER_CONTEXT *const lc = get_layer_context(cpi);

   lc->rc = cpi->rc;

   lc->twopass = cpi->twopass;

@@ -215,15 +217,17 @@

   svc->spatial_layer_id = 0;

-void vp9_inc_frame_in_layer(SVC *svc) {

-  LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1)

-      ? &svc->layer_context[svc->temporal_layer_id]

-      : &svc->layer_context[svc->spatial_layer_id];

+void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {

+  LAYER_CONTEXT *const lc =

+      (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?

+      &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :

+      &cpi->svc.layer_context[cpi->svc.spatial_layer_id];

   ++lc->current_video_frame_in_layer;

+  ++lc->frames_from_key_frame;

 int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {

-  return is_spatial_svc(cpi) &&

+  return is_two_pass_svc(cpi) &&

          cpi->svc.spatial_layer_id > 0 &&

          cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;

@@ -258,6 +262,7 @@

   int layer_id;

   vpx_svc_parameters_t *layer_param;

   LAYER_CONTEXT *lc;

+  int count = 1 << (cpi->svc.number_temporal_layers - 1);

   // Find the next layer to be encoded

   for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) {

@@ -275,17 +280,36 @@

   lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];

-  cpi->lst_fb_idx = cpi->svc.spatial_layer_id;

+  cpi->svc.temporal_layer_id = 0;

+  while ((lc->current_video_frame_in_layer % count) != 0) {

+    ++cpi->svc.temporal_layer_id;

+    count >>= 1;

+  }

-  if (cpi->svc.spatial_layer_id < 1)

+  cpi->lst_fb_idx =

+      cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +

+      cpi->svc.temporal_layer_id;

+  if (lc->frames_from_key_frame < cpi->svc.number_temporal_layers)

+    cpi->ref_frame_flags &= ~VP9_LAST_FLAG;

+  if (cpi->svc.spatial_layer_id == 0) {

+    if (cpi->svc.temporal_layer_id == 0)

       cpi->gld_fb_idx = lc->gold_ref_idx >= 0 ?

                         lc->gold_ref_idx : cpi->lst_fb_idx;

-  else

-    cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1;

+    else

+      cpi->gld_fb_idx = cpi->lst_fb_idx - 1;

+  } else {

+    if (cpi->svc.temporal_layer_id == 0)

+      cpi->gld_fb_idx = cpi->svc.spatial_layer_id -

+                        cpi->svc.number_temporal_layers;

+    else

+      cpi->gld_fb_idx = cpi->lst_fb_idx - 1;

+  }

   if (lc->current_video_frame_in_layer == 0) {

     if (cpi->svc.spatial_layer_id >= 2) {

-      cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;

+      cpi->alt_fb_idx =

+          cpi->svc.spatial_layer_id - 2 * cpi->svc.number_temporal_layers;

     } else {

       cpi->alt_fb_idx = cpi->lst_fb_idx;

       cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG);

@@ -307,7 +331,8 @@

             lc_lower->alt_ref_source != NULL)

           cpi->alt_fb_idx = lc_lower->alt_ref_idx;

         else if (cpi->svc.spatial_layer_id >= 2)

-          cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;

+          cpi->alt_fb_idx =

+              cpi->svc.spatial_layer_id - 2 * cpi->svc.number_temporal_layers;

         else

           cpi->alt_fb_idx = cpi->lst_fb_idx;

@@ -326,7 +351,7 @@

   vp9_set_high_precision_mv(cpi, 1);

-  cpi->alt_ref_source = get_layer_context(&cpi->svc)->alt_ref_source;

+  cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;

   return 0;

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -28,6 +28,7 @@

   vpx_fixed_buf_t rc_twopass_stats_in;

   unsigned int current_video_frame_in_layer;

   int is_key_frame;

+  int frames_from_key_frame;

   FRAME_TYPE last_frame_type;

   vpx_svc_parameters_t svc_params_received;

   struct lookahead_entry  *alt_ref_source;

@@ -81,7 +82,7 @@

 void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);

 // Increment number of video frames in layer

-void vp9_inc_frame_in_layer(SVC *svc);

+void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi);

 // Check if current layer is key frame in spatial upper layer

 int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi);

--- a/vp9/encoder/vp9_temporal_filter.c

+++ b/vp9/encoder/vp9_temporal_filter.c

@@ -450,7 +450,7 @@

   // Setup scaling factors. Scaling on each of the arnr frames is not supported

-  if (is_spatial_svc(cpi)) {

+  if (is_two_pass_svc(cpi)) {

     // In spatial svc the scaling factors might be less then 1/2. So we will use

     // non-normative scaling.

     int frame_used = 0;

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -163,22 +163,8 @@

   RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);

-#if CONFIG_SPATIAL_SVC

-  if (cfg->ss_number_layers > 1) {

-    unsigned int i, alt_ref_sum = 0;

-    for (i = 0; i < cfg->ss_number_layers; ++i) {

-      if (cfg->ss_enable_auto_alt_ref[i])

-        ++alt_ref_sum;

-    }

-    if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)

-      ERROR("Not enough ref buffers for svc alt ref frames");

-  }

-  if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)

-    ERROR("Multiple frame contexts are not supported for more than 3 layers");

-#endif

   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);

   if (cfg->ts_number_layers > 1) {

     unsigned int i;

     for (i = 1; i < cfg->ts_number_layers; ++i)

@@ -191,6 +177,28 @@

         ERROR("ts_rate_decimator factors are not powers of 2");

+#if CONFIG_SPATIAL_SVC

+  if (cfg->ss_number_layers * cfg->ts_number_layers > REF_FRAMES)

+    ERROR("Too many layers. Maximum 8 layers could be set");

+  if ((cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) &&

+      cfg->g_pass == VPX_RC_LAST_PASS) {

+    unsigned int i, alt_ref_sum = 0;

+    for (i = 0; i < cfg->ss_number_layers; ++i) {

+      if (cfg->ss_enable_auto_alt_ref[i])

+        ++alt_ref_sum;

+    }

+    if (alt_ref_sum >

+        REF_FRAMES - cfg->ss_number_layers * cfg->ts_number_layers)

+      ERROR("Not enough ref buffers for svc alt ref frames");

+    if ((cfg->ss_number_layers > 3 ||

+         cfg->ss_number_layers * cfg->ts_number_layers > 4) &&

+        cfg->g_error_resilient == 0)

+    ERROR("Multiple frame context are not supported for more than 3 spatial "

+          "layers or more than 4 spatial x temporal layers");

+  }

+#endif

   // VP9 does not support a lower bound on the keyframe interval in

   // automatic keyframe placement mode.

   if (cfg->kf_mode != VPX_KF_DISABLED &&

@@ -228,7 +236,7 @@

     if (cfg->rc_twopass_stats_in.sz % packet_sz)

       ERROR("rc_twopass_stats_in.sz indicates truncated packet.");

-    if (cfg->ss_number_layers > 1) {

+    if (cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) {

       int i;

       unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0};

@@ -423,6 +431,9 @@

   } else if (oxcf->ss_number_layers == 1) {

     oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;

+#if CONFIG_SPATIAL_SVC

+    oxcf->ss_play_alternate[0] = extra_cfg->enable_auto_alt_ref;

+#endif

   oxcf->ts_number_layers = cfg->ts_number_layers;

@@ -809,7 +820,7 @@

   if (lib_flags & FRAMEFLAGS_KEY

 #if CONFIG_SPATIAL_SVC

-      || (is_spatial_svc(cpi) && cpi->svc.layer_context[0].is_key_frame)

+      || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame)

 #endif

     flags |= VPX_FRAME_IS_KEY;

@@ -923,7 +934,7 @@

         vpx_codec_cx_pkt_t pkt;

 #if CONFIG_SPATIAL_SVC

-        if (is_spatial_svc(cpi))

+        if (is_two_pass_svc(cpi))

           cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;

 #endif

@@ -930,7 +941,7 @@

         // Pack invisible frames with the next visible frame

         if (!cpi->common.show_frame

 #if CONFIG_SPATIAL_SVC

-            || (is_spatial_svc(cpi) &&

+            || (is_two_pass_svc(cpi) &&

                 cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)

 #endif

) {

@@ -972,7 +983,7 @@

         cx_data += size;

         cx_data_sz -= size;

 #if CONFIG_SPATIAL_SVC

-        if (is_spatial_svc(cpi)) {

+        if (is_two_pass_svc(cpi)) {

           vpx_codec_cx_pkt_t pkt;

           int i;

           vp9_zero(pkt);

--- a/vpx/src/svc_encodeframe.c

+++ b/vpx/src/svc_encodeframe.c

@@ -384,8 +384,10 @@

       res = VPX_CODEC_INVALID_PARAM;

       break;

-    if (strcmp("layers", option_name) == 0) {

+    if (strcmp("spatial-layers", option_name) == 0) {

       svc_ctx->spatial_layers = atoi(option_value);

+    } else if (strcmp("temporal-layers", option_name) == 0) {

+      svc_ctx->temporal_layers = atoi(option_value);

     } else if (strcmp("scale-factors", option_name) == 0) {

       res = parse_scale_factors(svc_ctx, option_value);

       if (res != VPX_CODEC_OK) break;

@@ -406,7 +408,9 @@

   free(input_string);

-  if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3)

+  if (si->use_multiple_frame_contexts &&

+      (svc_ctx->spatial_layers > 3 ||

+       svc_ctx->spatial_layers * svc_ctx->temporal_layers > 4))

     res = VPX_CODEC_INVALID_PARAM;

   return res;

@@ -488,6 +492,16 @@

   res = parse_options(svc_ctx, si->options);

   if (res != VPX_CODEC_OK) return res;

+  if (svc_ctx->spatial_layers < 1)

+    svc_ctx->spatial_layers = 1;

+  if (svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS)

+    svc_ctx->spatial_layers = VPX_SS_MAX_LAYERS;

+  if (svc_ctx->temporal_layers < 1)

+    svc_ctx->temporal_layers = 1;

+  if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS)

+    svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS;

   si->layers = svc_ctx->spatial_layers;

   // Assign target bitrate for each layer. We calculate the ratio

@@ -523,9 +537,18 @@

     enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i];

 #endif

+  if (svc_ctx->temporal_layers > 1) {

+    int i;

+    for (i = 0; i < svc_ctx->temporal_layers; ++i) {

+      enc_cfg->ts_target_bitrate[i] = enc_cfg->rc_target_bitrate /

+                                      svc_ctx->temporal_layers;

+      enc_cfg->ts_rate_decimator[i] = 1 << (svc_ctx->temporal_layers - 1 - i);

+    }

+  }

   // modify encoder configuration

   enc_cfg->ss_number_layers = si->layers;

-  enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.

+  enc_cfg->ts_number_layers = svc_ctx->temporal_layers;

   // TODO(ivanmaltz): determine if these values need to be set explicitly for

   // svc, or if the normal default/override mechanism can be used

--- a/vpx/svc_context.h

+++ b/vpx/svc_context.h

@@ -31,7 +31,8 @@

 typedef struct {

   // public interface to svc_command options

-  int spatial_layers;               // number of layers

+  int spatial_layers;               // number of spatial layers

+  int temporal_layers;               // number of temporal layers

   SVC_LOG_LEVEL log_level;  // amount of information to display

   int log_print;  // when set, printf log messages instead of returning the

                   // message with svc_get_message