shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -51,6 +51,7 @@

   int comp_pred_diff;

   int single_pred_diff;

   int64_t txfm_rd_diff[NB_TXFM_MODES];

+  int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];

   // Bit flag for each mode whether it has high error in comparison to others.

   unsigned int modes_with_high_error;

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -43,8 +43,6 @@

 int enc_debug = 0;

 #endif

-void vp9_select_interp_filter_type(VP9_COMP *cpi);

 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,

                               int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize);

@@ -345,6 +343,8 @@

+  // FIXME(rbultje) I'm pretty sure this should go to the end of this block

+  // (i.e. after the output_enabled)

   if (bsize < BLOCK_SIZE_SB32X32) {

     if (bsize < BLOCK_SIZE_MB16X16)

       ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];

@@ -430,6 +430,10 @@

     cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;

     cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;

     cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;

+    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {

+      cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];

+    }

@@ -1795,7 +1799,6 @@

   cpi->inter_zz_count = 0;

   vp9_zero(cm->fc.switchable_interp_count);

-  vp9_zero(cpi->best_switchable_interp_count);

   vp9_zero(cpi->txfm_stepdown_count);

   xd->mode_info_context = cm->mi;

@@ -1827,6 +1830,7 @@

   init_encode_frame_mb_context(cpi);

   vpx_memset(cpi->rd_comp_pred_diff, 0, sizeof(cpi->rd_comp_pred_diff));

+  vp9_zero(cpi->rd_filter_diff);

   vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));

   vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));

@@ -2063,6 +2067,7 @@

   if (cpi->sf.RD) {

     int i, pred_type;

+    INTERPOLATIONFILTERTYPE filter_type;

/*

      * This code does a single RD pass over the whole frame assuming

      * either compound, single or hybrid prediction as per whatever has

@@ -2089,6 +2094,30 @@

     else

       pred_type = HYBRID_PREDICTION;

+    /* filter type selection */

+    // FIXME(rbultje) for some odd reason, we often select smooth_filter

+    // as default filter for ARF overlay frames. This is a REALLY BAD

+    // IDEA so we explicitely disable it here.

+    if (frame_type != 3 &&

+        cpi->rd_filter_threshes[frame_type][1] >

+            cpi->rd_filter_threshes[frame_type][0] &&

+        cpi->rd_filter_threshes[frame_type][1] >

+            cpi->rd_filter_threshes[frame_type][2] &&

+        cpi->rd_filter_threshes[frame_type][1] >

+            cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {

+      filter_type = vp9_switchable_interp[1];

+    } else if (cpi->rd_filter_threshes[frame_type][2] >

+            cpi->rd_filter_threshes[frame_type][0] &&

+        cpi->rd_filter_threshes[frame_type][2] >

+            cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {

+      filter_type = vp9_switchable_interp[2];

+    } else if (cpi->rd_filter_threshes[frame_type][0] >

+                  cpi->rd_filter_threshes[frame_type][VP9_SWITCHABLE_FILTERS]) {

+      filter_type = vp9_switchable_interp[0];

+    } else {

+      filter_type = SWITCHABLE;

+    }

     /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */

     cpi->mb.e_mbd.lossless = 0;

@@ -2098,6 +2127,7 @@

     select_txfm_mode(cpi);

     cpi->common.comp_pred_mode = pred_type;

+    cpi->common.mcomp_filter_type = filter_type;

     encode_frame_internal(cpi);

     for (i = 0; i < NB_PREDICTION_TYPES; ++i) {

@@ -2106,6 +2136,12 @@

       cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;

+    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {

+      const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;

+      cpi->rd_filter_threshes[frame_type][i] =

+          (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;

+    }

     for (i = 0; i < NB_TXFM_MODES; ++i) {

       int64_t pd = cpi->rd_tx_select_diff[i];

       int diff;

@@ -2180,10 +2216,6 @@

         reset_skip_txfm_size(cpi, TX_16X16);

-    // Update interpolation filter strategy for next frame.

-    if ((cpi->common.frame_type != KEY_FRAME) && (cpi->sf.search_best_filter))

-      vp9_select_interp_filter_type(cpi);

   } else {

     encode_frame_internal(cpi);

--- a/vp9/encoder/vp9_onyx_if.c

+++ b/vp9/encoder/vp9_onyx_if.c

@@ -2395,45 +2395,6 @@

-void vp9_select_interp_filter_type(VP9_COMP *cpi) {

-  int i;

-  int high_filter_index = 0;

-  unsigned int thresh;

-  unsigned int high_count = 0;

-  unsigned int count_sum = 0;

-  unsigned int *hist = cpi->best_switchable_interp_count;

-  if (DEFAULT_INTERP_FILTER != SWITCHABLE) {

-    cpi->common.mcomp_filter_type = DEFAULT_INTERP_FILTER;

-    return;

-  }

-  // TODO(agrange): Look at using RD criteria to select the interpolation

-  // filter to use for the next frame rather than this simpler counting scheme.

-  // Select the interpolation filter mode for the next frame

-  // based on the selection frequency seen in the current frame.

-  for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {

-    unsigned int count = hist[i];

-    count_sum += count;

-    if (count > high_count) {

-      high_count = count;

-      high_filter_index = i;

-    }

-  }

-  thresh = (unsigned int)(0.80 * count_sum);

-  if (high_count > thresh) {

-    // One filter accounts for 80+% of cases so force the next

-    // frame to use this filter exclusively using frame-level flag.

-    cpi->common.mcomp_filter_type = vp9_switchable_interp[high_filter_index];

-  } else {

-    // Use a MB-level switchable filter selection strategy.

-    cpi->common.mcomp_filter_type = SWITCHABLE;

-  }

-}

 static void scale_references(VP9_COMP *cpi) {

   VP9_COMMON *cm = &cpi->common;

   int i;

--- a/vp9/encoder/vp9_onyx_int.h

+++ b/vp9/encoder/vp9_onyx_int.h

@@ -380,6 +380,7 @@

   int rd_thresh_freq_fact[BLOCK_SIZE_TYPES][MAX_MODES];

   int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];

+  // FIXME(rbultje) int64_t?

   int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];

   unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2];

   unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];

@@ -386,11 +387,14 @@

   unsigned int single_ref_count[REF_CONTEXTS][2][2];

   unsigned int comp_ref_count[REF_CONTEXTS][2];

-  // FIXME contextualize

   int64_t rd_tx_select_diff[NB_TXFM_MODES];

+  // FIXME(rbultje) can this overflow?

   int rd_tx_select_threshes[4][NB_TXFM_MODES];

+  int64_t rd_filter_diff[VP9_SWITCHABLE_FILTERS + 1];

+  int64_t rd_filter_threshes[4][VP9_SWITCHABLE_FILTERS + 1];

+  int64_t rd_filter_cache[VP9_SWITCHABLE_FILTERS + 1];

   int RDMULT;

   int RDDIV;

@@ -629,7 +633,6 @@

   unsigned int switchable_interp_count[VP9_SWITCHABLE_FILTERS + 1]

                                       [VP9_SWITCHABLE_FILTERS];

-  unsigned int best_switchable_interp_count[VP9_SWITCHABLE_FILTERS];

   unsigned int txfm_stepdown_count[TX_SIZE_MAX_SB];

--- a/vp9/encoder/vp9_rdopt.c

+++ b/vp9/encoder/vp9_rdopt.c

@@ -2177,12 +2177,13 @@

 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,

-                                 int mode_index,

-                                 PARTITION_INFO *partition,

-                                 int_mv *ref_mv,

-                                 int_mv *second_ref_mv,

-                                 int64_t comp_pred_diff[NB_PREDICTION_TYPES],

-                                 int64_t txfm_size_diff[NB_TXFM_MODES]) {

+                         int mode_index,

+                         PARTITION_INFO *partition,

+                         int_mv *ref_mv,

+                         int_mv *second_ref_mv,

+                         int64_t comp_pred_diff[NB_PREDICTION_TYPES],

+                         int64_t txfm_size_diff[NB_TXFM_MODES],

+                         int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1]) {

   MACROBLOCKD *const xd = &x->e_mbd;

   // Take a snapshot of the coding context so it can be

@@ -2201,7 +2202,11 @@

   ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];

   ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];

+  // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()

+  // doesn't actually work this way

   memcpy(ctx->txfm_rd_diff, txfm_size_diff, sizeof(ctx->txfm_rd_diff));

+  memcpy(ctx->best_filter_diff, best_filter_diff,

+         sizeof(*best_filter_diff) * (VP9_SWITCHABLE_FILTERS + 1));

 static void setup_pred_block(const MACROBLOCKD *xd,

@@ -2644,29 +2649,46 @@

   // pred error irrespective of whether the filter will be used

   if (cpi->sf.use_8tap_always) {

     *best_filter = EIGHTTAP;

+    vp9_zero(cpi->rd_filter_cache);

   } else {

     int i, newbest;

     int tmp_rate_sum = 0;

     int64_t tmp_dist_sum = 0;

+    cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;

     for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {

-      int rs = 0;

+      int rs;

+      int64_t rs_rd;

       const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];

       const int is_intpel_interp = intpel_mv &&

           vp9_is_interpolating_filter[filter];

       mbmi->interp_filter = filter;

       vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);

+      rs = get_switchable_rate(cm, x);

+      rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);

-      if (cm->mcomp_filter_type == SWITCHABLE)

-        rs = get_switchable_rate(cm, x);

       if (interpolating_intpel_seen && is_intpel_interp) {

-        rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);

+        cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,

+                                         tmp_rate_sum, tmp_dist_sum);

+        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =

+            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],

+                cpi->rd_filter_cache[i] + rs_rd);

+        rd = cpi->rd_filter_cache[i];

+        if (cm->mcomp_filter_type == SWITCHABLE)

+          rd += rs_rd;

       } else {

         int rate_sum = 0;

         int64_t dist_sum = 0;

         vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);

         model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);

-        rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);

+        cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,

+                                         rate_sum, dist_sum);

+        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =

+            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],

+                cpi->rd_filter_cache[i] + rs_rd);

+        rd = cpi->rd_filter_cache[i];

+        if (cm->mcomp_filter_type == SWITCHABLE)

+          rd += rs_rd;

         if (!interpolating_intpel_seen && is_intpel_interp) {

           tmp_rate_sum = rate_sum;

           tmp_dist_sum = dist_sum;

@@ -2891,7 +2913,6 @@

   MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;

   const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);

   MB_PREDICTION_MODE this_mode;

-  MB_PREDICTION_MODE best_mode = DC_PRED;

   MV_REFERENCE_FRAME ref_frame;

   unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;

   int comp_pred, i;

@@ -2909,18 +2930,18 @@

   int64_t best_txfm_diff[NB_TXFM_MODES];

   int64_t best_pred_diff[NB_PREDICTION_TYPES];

   int64_t best_pred_rd[NB_PREDICTION_TYPES];

+  int64_t best_filter_rd[VP9_SWITCHABLE_FILTERS + 1];

+  int64_t best_filter_diff[VP9_SWITCHABLE_FILTERS + 1];

   MB_MODE_INFO best_mbmode;

   int j;

   int mode_index, best_mode_index = 0;

   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];

   vp9_prob comp_mode_p;

-  int64_t best_overall_rd = INT64_MAX;

   int64_t best_intra_rd = INT64_MAX;

   int64_t best_inter_rd = INT64_MAX;

   MB_PREDICTION_MODE best_intra_mode = DC_PRED;

   // MB_PREDICTION_MODE best_inter_mode = ZEROMV;

   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;

-  INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;

   INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;

   int rate_uv_intra[TX_SIZE_MAX_SB], rate_uv_tokenonly[TX_SIZE_MAX_SB];

   int64_t dist_uv[TX_SIZE_MAX_SB];

@@ -2962,6 +2983,8 @@

     best_pred_rd[i] = INT64_MAX;

   for (i = 0; i < NB_TXFM_MODES; i++)

     best_txfm_rd[i] = INT64_MAX;

+  for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)

+    best_filter_rd[i] = INT64_MAX;

   // Create a mask set to 1 for each frame used by a smaller resolution.

   if (cpi->sf.use_avoid_tested_higherror) {

@@ -3291,10 +3314,12 @@

           cpi->rd_threshes[bsize][THR_NEWG] : this_rd_thresh;

       xd->mode_info_context->mbmi.txfm_size = TX_4X4;

+      cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;

       for (switchable_filter_index = 0;

            switchable_filter_index < VP9_SWITCHABLE_FILTERS;

            ++switchable_filter_index) {

-        int newbest;

+        int newbest, rs;

+        int64_t rs_rd;

         mbmi->interp_filter =

         vp9_switchable_interp[switchable_filter_index];

         vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);

@@ -3306,10 +3331,13 @@

                      &skippable,

                      (int)this_rd_thresh, seg_mvs,

                      mi_row, mi_col);

-        if (cpi->common.mcomp_filter_type == SWITCHABLE) {

-          const int rs = get_switchable_rate(cm, x);

-          tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);

-        }

+        cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;

+        rs = get_switchable_rate(cm, x);

+        rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);

+        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =

+            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);

+        if (cm->mcomp_filter_type == SWITCHABLE)

+          tmp_rd += rs_rd;

         newbest = (tmp_rd < tmp_best_rd);

         if (newbest) {

           tmp_best_filter = mbmi->interp_filter;

@@ -3454,6 +3482,7 @@

                                                           PRED_MBSKIP), 0);

           rate2 += prob_skip_cost;

         } else {

+          // FIXME(rbultje) make this work for splitmv also

           int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd,

                                                               PRED_MBSKIP), 1);

           rate2 += prob_skip_cost;

@@ -3492,14 +3521,11 @@

       // best_inter_mode = xd->mode_info_context->mbmi.mode;

-    if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME)

+    if (!disable_skip && mbmi->ref_frame[0] == INTRA_FRAME) {

       for (i = 0; i < NB_PREDICTION_TYPES; ++i)

         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);

-    if (this_rd < best_overall_rd) {

-      best_overall_rd = this_rd;

-      best_filter = tmp_best_filter;

-      best_mode = this_mode;

+      for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++)

+        best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);

     if (this_mode != I4X4_PRED && this_mode != SPLITMV) {

@@ -3595,6 +3621,26 @@

         best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;

+    /* keep record of best filter type */

+    if (!mode_excluded && !disable_skip && mbmi->ref_frame[0] != INTRA_FRAME &&

+        cm->mcomp_filter_type != BILINEAR) {

+      int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?

+                              VP9_SWITCHABLE_FILTERS :

+                              vp9_switchable_interp_map[cm->mcomp_filter_type]];

+      for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {

+        int64_t adj_rd;

+        // In cases of poor prediction, filter_cache[] can contain really big

+        // values, which actually are bigger than this_rd itself. This can

+        // cause negative best_filter_rd[] values, which is obviously silly.

+        // Therefore, if filter_cache < ref, we do an adjusted calculation.

+        if (cpi->rd_filter_cache[i] >= ref)

+          adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;

+        else  // FIXME(rbultje) do this for comppred also

+          adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;

+        best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);

+      }

+    }

     /* keep record of best txfm size */

     if (bsize < BLOCK_SIZE_SB32X32) {

       if (bsize < BLOCK_SIZE_MB16X16) {

@@ -3666,11 +3712,6 @@

          (cm->mcomp_filter_type == best_mbmode.interp_filter) ||

          (best_mbmode.ref_frame[0] == INTRA_FRAME));

-  // Accumulate filter usage stats

-  // TODO(agrange): Use RD criteria to select interpolation filter mode.

-  if (is_inter_mode(best_mode))

-    ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];

   // Updating rd_thresh_freq_fact[] here means that the differnt

   // partition/block sizes are handled independently based on the best

   // choice for the current partition. It may well be better to keep a scaled

@@ -3731,6 +3772,7 @@

     vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));

     vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));

+    vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));

     goto end;

@@ -3768,6 +3810,19 @@

   if (!x->skip) {

+    for (i = 0; i <= VP9_SWITCHABLE_FILTERS; i++) {

+      if (best_filter_rd[i] == INT64_MAX)

+        best_filter_diff[i] = 0;

+      else

+        best_filter_diff[i] = best_rd - best_filter_rd[i];

+    }

+    if (cm->mcomp_filter_type == SWITCHABLE)

+      assert(best_filter_diff[VP9_SWITCHABLE_FILTERS] == 0);

+  } else {

+    vpx_memset(best_filter_diff, 0, sizeof(best_filter_diff));

+  }

+  if (!x->skip) {

     for (i = 0; i < NB_TXFM_MODES; i++) {

       if (best_txfm_rd[i] == INT64_MAX)

         best_txfm_diff[i] = 0;

@@ -3786,7 +3841,7 @@

                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],

                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :

                                       mbmi->ref_frame[1]][0],

-                       best_pred_diff, best_txfm_diff);

+                       best_pred_diff, best_txfm_diff, best_filter_diff);

   return best_rd;