shithub: libvpx

--- a/vp9/encoder/vp9_block.h

+++ b/vp9/encoder/vp9_block.h

@@ -146,9 +146,9 @@

   uint8_t sb_is_skin;

   // Used to save the status of whether a block has a low variance in

-  // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for

-  // 32x32.

-  uint8_t variance_low[9];

+  // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for

+  // 32x32, 9~24 for 16x16.

+  uint8_t variance_low[25];

   void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);

   void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);

--- a/vp9/encoder/vp9_encodeframe.c

+++ b/vp9/encoder/vp9_encodeframe.c

@@ -773,7 +773,7 @@

-  for (i = 0; i < 9; i++) {

+  for (i = 0; i < 25; i++) {

     x->variance_low[i] = 0;

@@ -1083,28 +1083,53 @@

   if (cpi->sf.short_circuit_low_temp_var) {

-    // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was

-    // selected.

-    if (ref_frame_partition == LAST_FRAME) {

+    int mv_thr = cm->width > 640 ? 8 : 4;

+    // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected

+    // and int_pro mv is small. If the temporal variance is small set the

+    // variance_low flag for the block. The variance threshold can be adjusted,

+    // the higher the more aggressive.

+    if (ref_frame_partition == LAST_FRAME &&

+        (cpi->sf.short_circuit_low_temp_var == 1 ||

+         (xd->mi[0]->mv[0].as_mv.col < mv_thr &&

+          xd->mi[0]->mv[0].as_mv.col > -mv_thr &&

+          xd->mi[0]->mv[0].as_mv.row < mv_thr &&

+          xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {

       if (xd->mi[0]->sb_type == BLOCK_64X64 &&

           vt.part_variances.none.variance < (thresholds[0] >> 1)) {

         x->variance_low[0] = 1;

       } else if (xd->mi[0]->sb_type == BLOCK_64X32) {

-        if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))

-          x->variance_low[1] = 1;

-        if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))

-          x->variance_low[2] = 1;

+        for (j = 0; j < 2; j++) {

+          if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))

+            x->variance_low[j + 1] = 1;

+        }

       } else if (xd->mi[0]->sb_type == BLOCK_32X64) {

-        if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))

-          x->variance_low[3] = 1;

-        if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))

-          x->variance_low[4] = 1;

+        for (j = 0; j < 2; j++) {

+          if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))

+            x->variance_low[j + 3] = 1;

+        }

       } else {

-        // 32x32

         for (i = 0; i < 4; i++) {

-          if (!force_split[i + 1] &&

-              vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))

-            x->variance_low[i + 5] = 1;

+          if (!force_split[i + 1]) {

+            // 32x32

+            if (vt.split[i].part_variances.none.variance <

+                (thresholds[1] >> 1))

+              x->variance_low[i + 5] = 1;

+          } else if (cpi->sf.short_circuit_low_temp_var == 2) {

+            int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};

+            const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];

+            MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;

+            // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block

+            // inside.

+            if ((*this_mi)->sb_type == BLOCK_16X16 ||

+                (*this_mi)->sb_type == BLOCK_32X16 ||

+                (*this_mi)->sb_type == BLOCK_16X32) {

+              for (j = 0; j < 4; j++) {

+                if (vt.split[i].split[j].part_variances.none.variance <

+                    (thresholds[2] >> 8))

+                  x->variance_low[(i << 2) + j + 9] = 1;

+              }

+            }

+          }

--- a/vp9/encoder/vp9_pickmode.c

+++ b/vp9/encoder/vp9_pickmode.c

@@ -40,6 +40,14 @@

   int in_use;

 } PRED_BUFFER;

+static const int pos_shift_16x16[4][4] = {

+  {9, 10, 13, 14},

+  {11, 12, 15, 16},

+  {17, 18, 21, 22},

+  {19, 20, 23, 24}

+};

 static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,

                       const MACROBLOCK *x,

                       const MACROBLOCKD *xd,

@@ -1274,6 +1282,8 @@

                                               int mi_row, int mi_col,

                                               BLOCK_SIZE bsize) {

   int force_skip_low_temp_var = 0;

+  int i = (mi_row & 0x7) >> 1;

+  int j = (mi_col & 0x7) >> 1;

   // Set force_skip_low_temp_var based on the block size and block offset.

   if (bsize == BLOCK_64X64) {

     force_skip_low_temp_var = variance_low[0];

@@ -1299,6 +1309,19 @@

     } else if ((mi_col & 0x7) && (mi_row & 0x7)) {

       force_skip_low_temp_var = variance_low[8];

+  } else if (bsize == BLOCK_16X16) {

+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];

+  } else if (bsize == BLOCK_32X16) {

+    // The col shift index for the second 16x16 block.

+    int j2 = ((mi_col + 2) & 0x7) >> 1;

+    // Only if each 16x16 block inside has low temporal variance.

+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&

+      variance_low[pos_shift_16x16[i][j2]];

+  } else if (bsize == BLOCK_16X32) {

+    // The row shift index for the second 16x16 block.

+    int i2 = ((mi_row + 2) & 0x7) >> 1;

+    force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&

+      variance_low[pos_shift_16x16[i2][j]];

   return force_skip_low_temp_var;

@@ -1503,6 +1526,12 @@

       continue;

+    if (cpi->sf.short_circuit_low_temp_var == 2 &&

+        force_skip_low_temp_var && ref_frame == LAST_FRAME &&

+        this_mode == NEWMV) {

+      continue;

+    }

     if (cpi->use_svc) {

       if (svc_force_zero_mode[ref_frame - 1] &&

           frame_mv[this_mode][ref_frame].as_int != 0)

@@ -1842,8 +1871,9 @@

     inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;

   // Perform intra prediction search, if the best SAD is above a certain

-  // threshold. Skip intra prediction if force_skip_low_temp_var is set.

-  if (!force_skip_low_temp_var && perform_intra_pred &&

+  // threshold.

+  if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&

+      perform_intra_pred &&

       (best_rdc.rdcost == INT64_MAX ||

        (!x->skip && best_rdc.rdcost > inter_mode_thresh &&

         bsize <= cpi->sf.max_intra_bsize))) {

--- a/vp9/encoder/vp9_speed_features.c

+++ b/vp9/encoder/vp9_speed_features.c

@@ -429,7 +429,7 @@

     sf->mv.search_method = NSTEP;

     sf->mv.reduce_first_step_size = 1;

     sf->skip_encode_sb = 0;

-    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&

+    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&

         content != VP9E_CONTENT_SCREEN) {

       // Enable short circuit for low temporal variance.

       sf->short_circuit_low_temp_var = 1;

@@ -450,6 +450,17 @@

     sf->adaptive_rd_thresh = 4;

     sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;

     sf->lpf_pick = LPF_PICK_MINIMAL_LPF;

+    // Only keep INTRA_DC mode for speed 8.

+    if (!is_keyframe) {

+      int i = 0;

+      for (i = 0; i < BLOCK_SIZES; ++i)

+        sf->intra_y_mode_bsize_mask[i] = INTRA_DC;

+    }

+    if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&

+        content != VP9E_CONTENT_SCREEN) {

+      // More aggressive short circuit for speed 8.

+      sf->short_circuit_low_temp_var = 2;

+    }

--- a/vp9/encoder/vp9_speed_features.h

+++ b/vp9/encoder/vp9_speed_features.h

@@ -449,6 +449,10 @@

   // Skip a number of expensive mode evaluations for blocks with very low

   // temporal variance.

+  // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.

+  // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL

+  // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and

+  // 32x16.

   int short_circuit_low_temp_var;

 } SPEED_FEATURES;