shithub: libvpx

Download patch

ref: b2209c334666992bd87a89581ef0b2ed90c0243b
parent: 005fc6970b4c1997d40b98d2ac01d34f39310606
author: Deb Mukherjee <[email protected]>
date: Wed Feb 5 11:19:11 EST 2014

Parameter fixes for one-pass non-cbr mode

Fixes some of the parameters for 1-pass non-cbr mode.
Also includes some cleanups, inlcuding refactoring of the
recode_loop options.

Results on derfraw300 improve by about 5-6%, so that the one-pass
mode is now 13% below the 2-pass mode in speed 0.

Change-Id: I844cc2638694c7574f3be00d41d60b23dc1016f0

--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -572,7 +572,7 @@
                                    int speed) {
   int i;
   sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = (speed < 1);
+  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
   if (speed == 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
@@ -590,7 +590,7 @@
     sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -626,7 +626,7 @@
     sf->last_partitioning_redo_frequency = 3;
 
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->use_lp32x32fdct = 1;
     sf->mode_skip_start = 11;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -743,7 +743,7 @@
                                  int speed) {
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = (speed < 1);
+  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
   if (speed == 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check = 1;
@@ -761,7 +761,7 @@
     sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -797,7 +797,7 @@
     sf->last_partitioning_redo_frequency = 3;
 
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->use_lp32x32fdct = 1;
     sf->mode_skip_start = 11;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -865,7 +865,7 @@
   // best quality defaults
   sf->RD = 1;
   sf->search_method = NSTEP;
-  sf->recode_loop = 1;
+  sf->recode_loop = ALLOW_RECODE;
   sf->subpel_search_method = SUBPEL_TREE;
   sf->subpel_iters_per_step = 2;
   sf->subpel_force_stop = 0;
@@ -933,7 +933,7 @@
 
   // No recode for 1 pass.
   if (cpi->pass == 0) {
-    sf->recode_loop = 0;
+    sf->recode_loop = DISALLOW_RECODE;
     sf->optimize_coefficients = 0;
   }
 
@@ -2544,8 +2544,8 @@
   // Is frame recode allowed.
   // Yes if either recode mode 1 is selected or mode 2 is selected
   // and the frame is a key frame, golden frame or alt_ref_frame
-  } else if ((cpi->sf.recode_loop == 1) ||
-             ((cpi->sf.recode_loop == 2) &&
+  } else if ((cpi->sf.recode_loop == ALLOW_RECODE) ||
+             ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) &&
               (cm->frame_type == KEY_FRAME ||
                cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
     // General over and under shoot tests
@@ -2764,14 +2764,49 @@
 }
 #endif
 
+static void encode_without_recode_loop(VP9_COMP *cpi,
+                                       size_t *size,
+                                       uint8_t *dest,
+                                       int *q) {
+  VP9_COMMON *const cm = &cpi->common;
+  vp9_clear_system_state();  // __asm emms;
+  vp9_set_quantizer(cpi, *q);
+
+  // Set up entropy context depending on frame type. The decoder mandates
+  // the use of the default context, index 0, for keyframes and inter
+  // frames where the error_resilient_mode or intra_only flag is set. For
+  // other inter-frames the encoder currently uses only two contexts;
+  // context 1 for ALTREF frames and context 0 for the others.
+  if (cm->frame_type == KEY_FRAME) {
+    vp9_setup_key_frame(cpi);
+  } else {
+    if (!cm->intra_only && !cm->error_resilient_mode) {
+      cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
+    }
+    vp9_setup_inter_frame(cpi);
+  }
+  // Variance adaptive and in frame q adjustment experiments are mutually
+  // exclusive.
+  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+    vp9_vaq_frame_setup(cpi);
+  } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+    setup_in_frame_q_adj(cpi);
+  }
+  // transform / motion compensation build reconstruction frame
+  vp9_encode_frame(cpi);
+
+  // Update the skip mb flag probabilities based on the distribution
+  // seen in the last encoder iteration.
+  // update_base_skip_probs(cpi);
+  vp9_clear_system_state();  // __asm emms;
+}
+
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     size_t *size,
                                     uint8_t *dest,
                                     int *q,
                                     int bottom_index,
-                                    int top_index,
-                                    int frame_over_shoot_limit,
-                                    int frame_under_shoot_limit) {
+                                    int top_index) {
   VP9_COMMON *const cm = &cpi->common;
   int loop_count = 0;
   int loop = 0;
@@ -2778,7 +2813,14 @@
   int overshoot_seen = 0;
   int undershoot_seen = 0;
   int q_low = bottom_index, q_high = top_index;
+  int frame_over_shoot_limit;
+  int frame_under_shoot_limit;
 
+  // Decide frame size bounds
+  vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
+                                   &frame_under_shoot_limit,
+                                   &frame_over_shoot_limit);
+
   do {
     vp9_clear_system_state();  // __asm emms;
 
@@ -2809,7 +2851,6 @@
     }
 
     // transform / motion compensation build reconstruction frame
-
     vp9_encode_frame(cpi);
 
     // Update the skip mb flag probabilities based on the distribution
@@ -2821,7 +2862,7 @@
     // Dummy pack of the bitstream using up to date stats to get an
     // accurate estimate of output frame size to determine if we need
     // to recode.
-    if (cpi->sf.recode_loop != 0) {
+    if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
       vp9_save_coding_context(cpi);
       cpi->dummy_packing = 1;
       if (!cpi->sf.super_fast_rtc)
@@ -3024,8 +3065,6 @@
   VP9_COMMON *const cm = &cpi->common;
   TX_SIZE t;
   int q;
-  int frame_over_shoot_limit;
-  int frame_under_shoot_limit;
   int top_index;
   int bottom_index;
 
@@ -3121,7 +3160,7 @@
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
       vp9_rc_postencode_update_drop_frame(cpi);
-      cm->current_video_frame++;
+      ++cm->current_video_frame;
       return;
     }
   }
@@ -3159,27 +3198,11 @@
   vp9_write_yuv_frame(cpi->Source);
 #endif
 
-  // Decide frame size bounds
-  vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
-                                   &frame_under_shoot_limit,
-                                   &frame_over_shoot_limit);
-
   // Decide q and q bounds.
   q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
                                         &bottom_index,
                                         &top_index);
 
-  // JBB : This is realtime mode.  In real time mode the first frame
-  // should be larger. Q of 0 is disabled because we force tx size to be
-  // 16x16...
-  if (cpi->sf.super_fast_rtc) {
-    if (cm->current_video_frame == 0)
-      q /= 3;
-
-    if (q == 0)
-      q++;
-  }
-
   if (!frame_is_intra_only(cm)) {
     cm->interp_filter = DEFAULT_INTERP_FILTER;
     /* TODO: Decide this more intelligently */
@@ -3186,8 +3209,11 @@
     set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
   }
 
-  encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index,
-                          frame_over_shoot_limit, frame_under_shoot_limit);
+  if (cpi->sf.recode_loop == DISALLOW_RECODE) {
+    encode_without_recode_loop(cpi, size, dest, &q);
+  } else {
+    encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index);
+  }
 
   // Special case code to reduce pulsing when key frames are forced at a
   // fixed interval. Note the reconstruction error if it is the frame before
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -44,8 +44,9 @@
 #else
 #define MIN_GF_INTERVAL             4
 #endif
-#define DEFAULT_GF_INTERVAL         7
+#define DEFAULT_GF_INTERVAL         11
 #define DEFAULT_KF_BOOST            2000
+#define DEFAULT_GF_BOOST            2000
 
 #define KEY_FRAME_CONTEXT 5
 
@@ -217,6 +218,17 @@
   LAST_FRAME_PARTITION_ALL = 2
 } LAST_FRAME_PARTITION_METHOD;
 
+typedef enum {
+  // No recode.
+  DISALLOW_RECODE = 0,
+  // Allow recode for KF and exceeding maximum frame bandwidth.
+  ALLOW_RECODE_KFMAXBW = 1,
+  // Allow recode only for KF/ARF/GF frames.
+  ALLOW_RECODE_KFARFGF = 2,
+  // Allow recode for all frames based on bitrate constraints.
+  ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
 typedef struct {
   // This flag refers to whether or not to perform rd optimization.
   int RD;
@@ -224,11 +236,7 @@
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
   SEARCH_METHODS search_method;
 
-  // Recode_loop can be:
-  // 0 means we only encode a frame once
-  // 1 means we can re-encode based on bitrate constraints on any frame
-  // 2 means we can only recode gold, alt, and key frames.
-  int recode_loop;
+  RECODE_LOOP_TYPE recode_loop;
 
   // Subpel_search_method can only be subpel_tree which does a subpixel
   // logarithmic search that keeps stepping at 1/2 pixel units until
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -220,7 +220,7 @@
     // bits on this frame even if it is a constructed arf.
     // The active maximum quantizer insures that an appropriate
     // number of bits will be spent if needed for constructed ARFs.
-    target = 0;
+    target = min_frame_target;
   }
   // Clip the frame target to the maximum allowed value.
   if (target > rc->max_frame_bandwidth)
@@ -569,7 +569,6 @@
        active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
       else
         active_best_quality = inter_minq[active_worst_quality];
-      //
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -840,12 +839,28 @@
 int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
                                       int *bottom_index,
                                       int *top_index) {
+  int q;
   if (cpi->pass == 0)
-    return rc_pick_q_and_adjust_q_bounds_one_pass(
+    q = rc_pick_q_and_adjust_q_bounds_one_pass(
         cpi, bottom_index, top_index);
   else
-    return rc_pick_q_and_adjust_q_bounds_two_pass(
+    q = rc_pick_q_and_adjust_q_bounds_two_pass(
         cpi, bottom_index, top_index);
+
+  // JBB : This is realtime mode.  In real time mode the first frame
+  // should be larger. Q of 0 is disabled because we force tx size to be
+  // 16x16...
+  if (cpi->sf.super_fast_rtc) {
+    if (cpi->common.current_video_frame == 0)
+      q /= 3;
+    if (q == 0)
+      q++;
+    if (q < *bottom_index)
+      *bottom_index = q;
+    else if (q > *top_index)
+      *top_index = q;
+  }
+  return q;
 }
 
 void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
@@ -948,7 +963,8 @@
   rc->projected_frame_size = (bytes_used << 3);
 
   // Post encode loop adjustment of Q prediction.
-  vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+  vp9_rc_update_rate_correction_factors(
+      cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
             cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
@@ -1040,17 +1056,27 @@
 #define USE_ALTREF_FOR_ONE_PASS   1
 
 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+  static const int af_ratio = 5;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth;
-  target = vp9_rc_clamp_pframe_target_size(cpi, target);
-  return target;
+  int target;
+#if USE_ALTREF_FOR_ONE_PASS
+  target = (!rc->is_src_frame_alt_ref &&
+            (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ?
+      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) /
+      (cpi->rc.baseline_gf_interval + af_ratio - 1) :
+      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) /
+      (cpi->rc.baseline_gf_interval + af_ratio - 1);
+#else
+  target = rc->av_per_frame_bandwidth;
+#endif
+  return vp9_rc_clamp_pframe_target_size(cpi, target);
 }
 
 static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+  static const int kf_ratio = 12;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth * 8;
-  target = vp9_rc_clamp_iframe_target_size(cpi, target);
-  return target;
+  int target = rc->av_per_frame_bandwidth * kf_ratio;
+  return vp9_rc_clamp_iframe_target_size(cpi, target);
 }
 
 void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
@@ -1094,7 +1120,7 @@
       cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
     cpi->refresh_golden_frame = 1;
     cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    cpi->rc.gfu_boost = 2000;
+    cpi->rc.gfu_boost = DEFAULT_GF_BOOST;
   }
   if (cm->frame_type == KEY_FRAME)
     target = calc_iframe_target_size_one_pass_vbr(cpi);