shithub: libvpx

--- a/test/svc_test.cc

+++ b/test/svc_test.cc

@@ -112,7 +112,7 @@

       video.Next();

-    // Flush encoder and test EOS packet

+    // Flush encoder and test EOS packet.

     res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),

                          video.duration(), VPX_DL_GOOD_QUALITY);

     stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);

@@ -135,7 +135,7 @@

         EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));

-      outputs[*frame_received].buf = malloc(frame_size);

+      outputs[*frame_received].buf = malloc(frame_size + 16);

       ASSERT_TRUE(outputs[*frame_received].buf != NULL);

       memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_),

              frame_size);

@@ -176,13 +176,13 @@

       video.Next();

-    // Flush Encoder

+    // Flush encoder.

     res = vpx_svc_encode(&svc_, &codec_, NULL, 0,

                          video.duration(), VPX_DL_GOOD_QUALITY);

     EXPECT_EQ(VPX_CODEC_OK, res);

     StoreFrames(n, outputs, &frame_received);

-    EXPECT_EQ(frame_received, (size_t)n);

+    EXPECT_EQ(frame_received, static_cast<size_t>(n));

     ReleaseEncoder();

@@ -204,7 +204,7 @@

       ++decoded_frames;

       DxDataIterator dec_iter = decoder_->GetDxData();

-      while (dec_iter.Next()) {

+      while (dec_iter.Next() != NULL) {

         ++received_frames;

@@ -214,7 +214,8 @@

   void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,

                              const int num_super_frames,

-                             const int remained_layers) {

+                             const int remained_layers,

+                             const bool is_multiple_frame_context) {

     ASSERT_TRUE(inputs != NULL);

     ASSERT_GT(num_super_frames, 0);

     ASSERT_GT(remained_layers, 0);

@@ -236,7 +237,7 @@

       uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);

       uint8_t *frame_start = frame_data;

       for (frame = 0; frame < frame_count; ++frame) {

-        // Looking for a visible frame

+        // Looking for a visible frame.

         if (frame_data[0] & 0x02) {

           ++frames_found;

           if (frames_found == remained_layers)

@@ -244,11 +245,17 @@

         frame_data += frame_sizes[frame];

-      ASSERT_LT(frame, frame_count);

-      if (frame == frame_count - 1)

+      ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "

+          << "remaining_layers: " << remained_layers

+          << "    super_frame: " << i

+          << "    is_multiple_frame_context: " << is_multiple_frame_context;

+      if (frame == frame_count - 1 && !is_multiple_frame_context)

         continue;

       frame_data += frame_sizes[frame];

+      // We need to add one more frame for multiple frame context.

+      if (is_multiple_frame_context)

+        ++frame;

       uint8_t marker =

           static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];

       const uint32_t mag = ((marker >> 3) & 0x3) + 1;

@@ -256,11 +263,37 @@

       const size_t new_index_sz = 2 + mag * (frame + 1);

       marker &= 0x0f8;

       marker |= frame;

+      // Copy existing frame sizes.

+      memmove(frame_data + (is_multiple_frame_context ? 2 : 1),

+              frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);

+      if (is_multiple_frame_context) {

+        // Add a one byte frame with flag show_existing frame.

+        *frame_data++ = 0x88 | (remained_layers - 1);

+      }

+      // New marker.

       frame_data[0] = marker;

-      memcpy(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,

-             new_index_sz - 2);

-      frame_data[new_index_sz - 1] = marker;

-      inputs[i].sz = frame_data - frame_start + new_index_sz;

+      frame_data += (mag * (frame + 1) + 1);

+      if (is_multiple_frame_context) {

+        // Write the frame size for the one byte frame.

+        frame_data -= mag;

+        *frame_data++ = 1;

+        for (uint32_t j = 1; j < mag; ++j) {

+          *frame_data++ = 0;

+        }

+      }

+      *frame_data++ = marker;

+      inputs[i].sz = frame_data - frame_start;

+      if (is_multiple_frame_context) {

+        // Change the show frame flag to 0 for all frames.

+        for (int j = 0; j < frame; ++j) {

+          frame_start[0] &= ~2;

+          frame_start += frame_sizes[j];

+        }

+      }

@@ -507,7 +540,7 @@

   vpx_fixed_buf outputs[10];

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

-  DropEnhancementLayers(&outputs[0], 10, 1);

+  DropEnhancementLayers(&outputs[0], 10, 1, false);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

@@ -525,13 +558,13 @@

   Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 4);

+  DropEnhancementLayers(&outputs[0], 10, 4, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 3);

+  DropEnhancementLayers(&outputs[0], 10, 3, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 2);

+  DropEnhancementLayers(&outputs[0], 10, 2, false);

   DecodeNFrames(&outputs[0], 10);

-  DropEnhancementLayers(&outputs[0], 10, 1);

+  DropEnhancementLayers(&outputs[0], 10, 1, false);

   DecodeNFrames(&outputs[0], 10);

   FreeBitstreamBuffers(&outputs[0], 10);

@@ -568,12 +601,121 @@

   memset(&outputs[0], 0, sizeof(outputs));

   Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);

   DecodeNFrames(&outputs[0], 20);

-  DropEnhancementLayers(&outputs[0], 20, 2);

+  DropEnhancementLayers(&outputs[0], 20, 2, false);

   DecodeNFrames(&outputs[0], 20);

-  DropEnhancementLayers(&outputs[0], 20, 1);

+  DropEnhancementLayers(&outputs[0], 20, 1, false);

   DecodeNFrames(&outputs[0], 20);

   FreeBitstreamBuffers(&outputs[0], 20);

+}

+TEST_F(SvcTest, SetMultipleFrameContextOption) {

+  svc_.spatial_layers = 5;

+  vpx_codec_err_t res =

+      vpx_svc_set_options(&svc_, "multi-frame-contexts=1");

+  EXPECT_EQ(VPX_CODEC_OK, res);

+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);

+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);

+  svc_.spatial_layers = 2;

+  res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");

+  InitializeEncoder();

+}

+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) {

+  // First pass encode

+  std::string stats_buf;

+  Pass1EncodeNFrames(10, 2, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

+  DropEnhancementLayers(&outputs[0], 10, 2, true);

+  DecodeNFrames(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) {

+  // First pass encode

+  std::string stats_buf;

+  Pass1EncodeNFrames(10, 2, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

+  DropEnhancementLayers(&outputs[0], 10, 1, true);

+  DecodeNFrames(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");

+  Pass1EncodeNFrames(10, 2, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1 "

+                      "multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);

+  DropEnhancementLayers(&outputs[0], 10, 2, true);

+  DecodeNFrames(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+}

+TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) {

+  // First pass encode

+  std::string stats_buf;

+  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");

+  Pass1EncodeNFrames(10, 3, &stats_buf);

+  // Second pass encode

+  codec_enc_.g_pass = VPX_RC_LAST_PASS;

+  codec_enc_.g_error_resilient = 0;

+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 "

+                      "multi-frame-contexts=1");

+  vpx_fixed_buf outputs[10];

+  memset(&outputs[0], 0, sizeof(outputs));

+  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);

+  vpx_fixed_buf outputs_new[10];

+  for (int i = 0; i < 10; ++i) {

+    outputs_new[i].buf = malloc(outputs[i].sz + 16);

+    ASSERT_TRUE(outputs_new[i].buf != NULL);

+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

+    outputs_new[i].sz = outputs[i].sz;

+  }

+  DropEnhancementLayers(&outputs_new[0], 10, 3, true);

+  DecodeNFrames(&outputs_new[0], 10);

+  for (int i = 0; i < 10; ++i) {

+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

+    outputs_new[i].sz = outputs[i].sz;

+  }

+  DropEnhancementLayers(&outputs_new[0], 10, 2, true);

+  DecodeNFrames(&outputs_new[0], 10);

+  for (int i = 0; i < 10; ++i) {

+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);

+    outputs_new[i].sz = outputs[i].sz;

+  }

+  DropEnhancementLayers(&outputs_new[0], 10, 1, true);

+  DecodeNFrames(&outputs_new[0], 10);

+  FreeBitstreamBuffers(&outputs[0], 10);

+  FreeBitstreamBuffers(&outputs_new[0], 10);

 }  // namespace

--- a/vp9/encoder/vp9_bitstream.c

+++ b/vp9/encoder/vp9_bitstream.c

@@ -1083,7 +1083,16 @@

     write_bitdepth_colorspace_sampling(cm, wb);

     write_frame_size(cm, wb);

   } else {

-    if (!cm->show_frame)

+    // In spatial svc if it's not error_resilient_mode then we need to code all

+    // visible frames as invisible. But we need to keep the show_frame flag so

+    // that the publisher could know whether it is supposed to be visible.

+    // So we will code the show_frame flag as it is. Then code the intra_only

+    // bit here. This will make the bitstream incompatible. In the player we

+    // will change to show_frame flag to 0, then add an one byte frame with

+    // show_existing_frame flag which tells the decoder which frame we want to

+    // show.

+    if (!cm->show_frame ||

+        (is_spatial_svc(cpi) && cm->error_resilient_mode == 0))

       vp9_wb_write_bit(wb, cm->intra_only);

     if (!cm->error_resilient_mode)

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -2123,7 +2123,20 @@

       cm->reset_frame_context = 2;

+  if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {

+    cm->frame_context_idx = cpi->svc.spatial_layer_id;

+    // The probs will be updated based on the frame type of its previous

+    // frame if frame_parallel_decoding_mode is 0. The type may vary for

+    // the frame after a key frame in base layer since we may drop enhancement

+    // layers. So set frame_parallel_decoding_mode to 1 in this case.

+    if (cpi->svc.spatial_layer_id == 0 &&

+        cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)

+      cm->frame_parallel_decoding_mode = 1;

+    else

+      cm->frame_parallel_decoding_mode = 0;

+  }

   // Configure experimental use of segmentation for enhanced coding of

   // static regions if indicated.

   // Only allowed in second pass of two pass (as requires lagged coding)

@@ -2298,8 +2311,12 @@

   cm->last_height = cm->height;

   // reset to normal state now that we are done.

-  if (!cm->show_existing_frame)

-    cm->last_show_frame = cm->show_frame;

+  if (!cm->show_existing_frame) {

+    if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)

+      cm->last_show_frame = 0;

+    else

+      cm->last_show_frame = cm->show_frame;

+  }

   if (cm->show_frame) {

     vp9_swap_mi_and_prev_mi(cm);

@@ -2310,6 +2327,10 @@

     if (cpi->use_svc)

       vp9_inc_frame_in_layer(&cpi->svc);

+  if (is_spatial_svc(cpi))

+    cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =

+        cm->frame_type;

 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,

--- a/vp9/encoder/vp9_svc_layercontext.c

+++ b/vp9/encoder/vp9_svc_layercontext.c

@@ -36,6 +36,7 @@

     int i;

     lc->current_video_frame_in_layer = 0;

     lc->layer_size = 0;

+    lc->last_frame_type = FRAME_TYPES;

     lrc->ni_av_qi = oxcf->worst_allowed_q;

     lrc->total_actual_bits = 0;

     lrc->total_target_vs_actual = 0;

--- a/vp9/encoder/vp9_svc_layercontext.h

+++ b/vp9/encoder/vp9_svc_layercontext.h

@@ -28,6 +28,7 @@

   vpx_fixed_buf_t rc_twopass_stats_in;

   unsigned int current_video_frame_in_layer;

   int is_key_frame;

+  FRAME_TYPE last_frame_type;

   vpx_svc_parameters_t svc_params_received;

   struct lookahead_entry  *alt_ref_source;

   int alt_ref_idx;

--- a/vp9/vp9_cx_iface.c

+++ b/vp9/vp9_cx_iface.c

@@ -176,6 +176,8 @@

     if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)

       ERROR("Not enough ref buffers for svc alt ref frames");

+  if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)

+    ERROR("Multiple frame contexts are not supported for more than 3 layers");

 #endif

   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);

--- a/vpx/src/svc_encodeframe.c

+++ b/vpx/src/svc_encodeframe.c

@@ -86,6 +86,7 @@

   int layers;

   int layer;

   int is_keyframe;

+  int use_multiple_frame_contexts;

   FrameData *frame_list;

   FrameData *frame_temp;

@@ -366,6 +367,7 @@

   char *option_name;

   char *option_value;

   char *input_ptr;

+  SvcInternal *const si = get_svc_internal(svc_ctx);

   vpx_codec_err_t res = VPX_CODEC_OK;

   if (options == NULL) return VPX_CODEC_OK;

@@ -393,6 +395,8 @@

     } else if (strcmp("auto-alt-refs", option_name) == 0) {

       res = parse_auto_alt_ref(svc_ctx, option_value);

       if (res != VPX_CODEC_OK) break;

+    } else if (strcmp("multi-frame-contexts", option_name) == 0) {

+      si->use_multiple_frame_contexts = atoi(option_value);

     } else {

       svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);

       res = VPX_CODEC_INVALID_PARAM;

@@ -401,6 +405,10 @@

     option_name = strtok_r(NULL, "=", &input_ptr);

   free(input_string);

+  if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3)

+    res = VPX_CODEC_INVALID_PARAM;

   return res;

@@ -534,7 +542,8 @@

   enc_cfg->rc_buf_initial_sz = 500;

   enc_cfg->rc_buf_optimal_sz = 600;

   enc_cfg->rc_buf_sz = 1000;

-  enc_cfg->g_error_resilient = 1;

+  if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)

+    enc_cfg->g_error_resilient = 1;

   // Initialize codec

   res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR);