shithub: libvpx

Download patch

ref: 0ce51d823fcef0f88d213958b43d713ce7bdf189
parent: dae97868da290b2d401416caed80a22bbdcae6de
author: Jim Bankoski <[email protected]>
date: Tue Oct 7 12:36:14 EDT 2014

experimental : partition using 1/8 x 1/8 image

The concept:

There's too much noise in source pixels for variance and at low bitrate
the reconstructed looks nothing like the source so we have problems
getting good partitionings with either.   This skirts the issue by using
a box blur scaled down version for variance calculations.  To compare
against source_var_ moved keyframe to be rd based like source_var.

Change-Id: Ie3babdbfadae324b7b5a76bea192893af27f0624

--- a/test/test.mk
+++ b/test/test.mk
@@ -129,6 +129,7 @@
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc
 
 ifeq ($(CONFIG_VP9_ENCODER),yes)
--- /dev/null
+++ b/test/vp9_avg_test.cc
@@ -1,0 +1,150 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class AverageTestBase : public ::testing::Test {
+ public:
+  AverageTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBlockSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle blocks up to 4 blocks 64x64 with stride up to 128
+  static const int kDataAlignment = 16;
+  static const int kDataBlockSize = 64 * 128;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  // Sum Pixels
+  unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
+    unsigned int average = 0;
+    for (int h = 0; h < 8; ++h)
+      for (int w = 0; w < 8; ++w)
+        average += source[h * source_stride_ + w];
+    return ((average + 32) >> 6);
+  }
+
+  void FillConstant(uint8_t fill_constant) {
+    for (int i = 0; i < width_ * height_; ++i) {
+        source_data_[i] = fill_constant;
+    }
+  }
+
+  void FillRandom() {
+    for (int i = 0; i < width_ * height_; ++i) {
+        source_data_[i] = rnd_.Rand8();
+    }
+  }
+
+  int width_, height_;
+  static uint8_t* source_data_;
+  int source_stride_;
+
+  ACMRandom rnd_;
+};
+typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
+
+typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;
+
+class AverageTest
+    : public AverageTestBase,
+      public ::testing::WithParamInterface<AvgFunc>{
+ public:
+  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  void CheckAverages() {
+    unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
+                                             source_stride_);
+
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
+                                          source_stride_));
+    unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
+                                       source_stride_);
+
+    EXPECT_EQ(expected, actual);
+  }
+};
+
+
+uint8_t* AverageTestBase::source_data_ = NULL;
+
+TEST_P(AverageTest, MinValue) {
+  FillConstant(0);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, MaxValue) {
+  FillConstant(255);
+  CheckAverages();
+}
+
+TEST_P(AverageTest, Random) {
+  // The reference frame, but not the source frame, may be unaligned for
+  // certain types of searches.
+  for (int i = 0; i < 1000; i++) {
+    FillRandom();
+    CheckAverages();
+  }
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 1, &vp9_avg_8x8_c)));
+
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
+        make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
+        make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));
+
+#endif
+
+}  // namespace
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1110,6 +1110,10 @@
 
 add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
 specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
+specialize qw/vp9_avg_8x8/, "$sse2_x86inc";
+
 # ENCODEMB INVOKE
 
 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
--- /dev/null
+++ b/vp9/encoder/vp9_avg.c
@@ -1,0 +1,19 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_ports/mem.h"
+
+unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
+  int i, j;
+  int sum = 0;
+  for (i = 0; i < 8; ++i, s+=p)
+    for (j = 0; j < 8; sum += s[j], ++j) {}
+
+  return (sum + 32) >> 6;
+}
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -396,10 +396,10 @@
   const int block_width = num_8x8_blocks_wide_lookup[bsize];
   const int block_height = num_8x8_blocks_high_lookup[bsize];
   // TODO(debargha): Choose this more intelligently.
-  const int64_t threshold_multiplier = 25;
-  int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
+  const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;
+  int64_t threshold = threshold_multiplier *
+      vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
   assert(block_height == block_width);
-
   tree_to_node(data, bsize, &vt);
 
   // Split none is available only if we have more than half a block size
@@ -511,10 +511,17 @@
         int y_idx = y16_idx + ((k >> 1) << 3);
         unsigned int sse = 0;
         int sum = 0;
-        if (x_idx < pixels_wide && y_idx < pixels_high)
-          vp9_get8x8var(s + y_idx * sp + x_idx, sp,
-                        d + y_idx * dp + x_idx, dp, &sse, &sum);
-        fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
+
+        if (x_idx < pixels_wide && y_idx < pixels_high) {
+          int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+          int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+          sum = s_avg - d_avg;
+          sse = sum * sum;
+        }
+        // For an 8x8 block we have just one value the average of all 64
+        // pixels,  so use 1.   This means of course that there is no variance
+        // in an 8x8 block.
+        fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
       }
     }
   }
@@ -530,8 +537,8 @@
   // Now go through the entire structure,  splitting every block size until
   // we get to one that's got a variance lower than our threshold,  or we
   // hit 8x8.
-  if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
-                           mi_row, mi_col)) {
+  if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
+      !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {
     for (i = 0; i < 4; ++i) {
       const int x32_idx = ((i & 1) << 2);
       const int y32_idx = ((i >> 1) << 2);
@@ -561,10 +568,10 @@
             }
           }
 #else
-          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
+          if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
                                    BLOCK_16X16,
-                                   (mi_row + y32_idx + y16_idx),
-                                   (mi_col + x32_idx + x16_idx), 2)) {
+                                   mi_row + y32_idx + y16_idx,
+                                   mi_col + x32_idx + x16_idx)) {
             for (k = 0; k < 4; ++k) {
               const int x8_idx = (k & 1);
               const int y8_idx = (k >> 1);
@@ -2593,7 +2600,8 @@
       set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);
-    } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
+      } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+                 cm->frame_type != KEY_FRAME ) {
       choose_partitioning(cpi, tile, mi_row, mi_col);
       rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, cpi->pc_root);
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -235,6 +235,10 @@
               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
     else
       xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
+
+    if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
+        xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
+      xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
   } else {
     xd->mi[0].src_mi->mbmi.tx_size =
         MIN(max_txsize_lookup[bsize],
@@ -611,7 +615,8 @@
         continue;
 
       if (this_mode == NEWMV) {
-        if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
+        if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
+            this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
           continue;
         if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
                                     &frame_mv[NEWMV][ref_frame],
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -249,6 +249,7 @@
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->frame_parameter_update = 0;
     sf->mv.search_method = FAST_HEX;
+
     sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
     sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
     sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
@@ -278,12 +279,17 @@
       int i;
       // Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used
       for (i = 0; i < BLOCK_SIZES; ++i)
-        sf->inter_mode_mask[i] = INTER_ALL;
+        sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW;
     }
 
     // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
-    sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
+    sf->partition_search_type = VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
+    sf->mv.search_method = NSTEP;
+    sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+    sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
 
     sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
 
@@ -291,7 +297,7 @@
     sf->reuse_inter_pred_sby = 1;
 
     // Increase mode checking threshold for NEWMV.
-    sf->elevate_newmv_thresh = 2000;
+    sf->elevate_newmv_thresh = 1000;
 
     sf->mv.reduce_first_step_size = 1;
   }
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -34,6 +34,9 @@
 enum {
   INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
   INTER_NEAREST = (1 << NEARESTMV),
+  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
+  INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),
+  INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),
   INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
   INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
 };
--- /dev/null
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -1,0 +1,40 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h>
+#include "vpx_ports/mem.h"
+
+
+unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
+  __m128i s0, s1, u0;
+  unsigned int avg = 0;
+  u0  = _mm_setzero_si128();
+  s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+  s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
+  s0 = _mm_adds_epu16(s0, s1);
+
+  s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
+  s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
+  avg = _mm_extract_epi16(s0, 0);
+  return (avg + 32) >> 6;
+}
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -17,6 +17,7 @@
 
 VP9_CX_SRCS-yes += vp9_cx_iface.c
 
+VP9_CX_SRCS-yes += encoder/vp9_avg.c
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@@ -95,6 +96,7 @@
 
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm