ref: c005792951199bd40724a028120e80888ac38b57
parent: fd891a965572315f971effd908c4a499127cebcf
parent: eb88b172fe5e5fece6676b24a2b6b787e4901753
author: Johann Koenig <[email protected]>
date: Thu Jun 4 02:16:12 EDT 2015
Merge "Make vp9 subpixel match vp8"
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -21,6 +21,9 @@
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
+#if CONFIG_VP8_ENCODER
+# include "./vp8_rtcd.h"
+#endif // CONFIG_VP8_ENCODER
#if CONFIG_VP9_ENCODER
# include "./vp9_rtcd.h"
# include "vp9/encoder/vp9_variance.h"
@@ -32,10 +35,13 @@
typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+ int xoffset, int yoffset,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse);
typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride);
-
using ::std::tr1::get;
using ::std::tr1::make_tuple;
using ::std::tr1::tuple;
@@ -102,6 +108,12 @@
(l2w + l2h)));
}
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
int l2w, int l2h, int xoff, int yoff,
uint32_t *sse_ptr,
@@ -111,6 +123,10 @@
uint64_t sse = 0;
const int w = 1 << l2w;
const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
// Bilinear interpolation at a 16th pel step.
@@ -480,6 +496,10 @@
uint64_t sse = 0;
const int w = 1 << l2w;
const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
// bilinear interpolation at a 16th pel step
@@ -598,8 +618,8 @@
template<typename SubpelVarianceFunctionType>
void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
if (!use_high_bit_depth_) {
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
@@ -621,8 +641,9 @@
unsigned int var1;
ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
src_, width_, &sse1));
- const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
- log2height_, x, y, &sse2,
+ const unsigned int var2 = subpel_variance_ref(ref_, src_,
+ log2width_, log2height_,
+ x, y, &sse2,
use_high_bit_depth_,
bit_depth_);
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -636,8 +657,8 @@
// Compare against reference.
// Src: Set the first half of values to 0, the second half to the maximum.
// Ref: Set the first half of values to the maximum, the second half to 0.
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
const int half = block_size_ / 2;
if (!use_high_bit_depth_) {
memset(src_, 0, half);
@@ -658,10 +679,10 @@
ASM_REGISTER_STATE_CHECK(
var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
const unsigned int var2 =
- subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
- use_high_bit_depth_, bit_depth_);
- EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
- EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+ subpel_variance_ref(ref_, src_, log2width_, log2height_,
+ x, y, &sse2, use_high_bit_depth_, bit_depth_);
+ EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+ EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
}
}
}
@@ -669,8 +690,8 @@
#if CONFIG_VP9_ENCODER
template<>
void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
if (!use_high_bit_depth_) {
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
@@ -795,7 +816,6 @@
const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
-
INSTANTIATE_TEST_CASE_P(
C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
make_tuple(4, 4, highbd_12_mse16x8_c),
@@ -811,7 +831,6 @@
make_tuple(4, 4, highbd_8_mse8x8_c)));
*/
-
const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
@@ -976,7 +995,6 @@
const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
-
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
make_tuple(4, 3, highbd_12_mse16x8_sse2),
@@ -1088,8 +1106,15 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
+#if CONFIG_VP8
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
+
+TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
@@ -1097,7 +1122,7 @@
TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
#if CONFIG_VP9_HIGHBITDEPTH
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
VP9SubpelAvgVarianceHighTest;
@@ -1106,32 +1131,19 @@
TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
#endif // CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t subpel_variance4x4_c =
- vp9_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t subpel_variance4x8_c =
- vp9_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x4_c =
- vp9_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t subpel_variance8x8_c =
- vp9_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x16_c =
- vp9_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x8_c =
- vp9_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t subpel_variance16x16_c =
- vp9_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x32_c =
- vp9_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x16_c =
- vp9_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t subpel_variance32x32_c =
- vp9_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x64_c =
- vp9_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t subpel_variance64x32_c =
- vp9_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t subpel_variance64x64_c =
- vp9_sub_pixel_variance64x64_c;
+const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
+const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
+const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
+const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
+const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
+const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
+const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
+const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
+const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelVarianceTest,
::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
@@ -1147,6 +1159,23 @@
make_tuple(5, 6, subpel_variance32x64_c, 0),
make_tuple(6, 5, subpel_variance64x32_c, 0),
make_tuple(6, 6, subpel_variance64x64_c, 0)));
+
+#if CONFIG_VP8
+const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
+ vp8_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
+ make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
+ make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
+ make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
+#endif // CONFIG_VP8
+
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
vp9_sub_pixel_avg_variance4x4_c;
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -1189,83 +1218,83 @@
make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
#if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
vp9_highbd_10_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
vp9_highbd_10_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
vp9_highbd_10_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
vp9_highbd_10_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
vp9_highbd_10_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
vp9_highbd_10_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
vp9_highbd_10_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
vp9_highbd_10_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
vp9_highbd_10_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
vp9_highbd_10_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
vp9_highbd_10_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
vp9_highbd_10_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
vp9_highbd_10_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
vp9_highbd_12_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
vp9_highbd_12_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
vp9_highbd_12_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
vp9_highbd_12_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
vp9_highbd_12_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
vp9_highbd_12_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
vp9_highbd_12_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
vp9_highbd_12_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
vp9_highbd_12_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
vp9_highbd_12_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
vp9_highbd_12_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
vp9_highbd_12_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
vp9_highbd_12_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
vp9_highbd_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
vp9_highbd_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
vp9_highbd_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
vp9_highbd_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
vp9_highbd_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
vp9_highbd_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
vp9_highbd_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
vp9_highbd_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
vp9_highbd_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
vp9_highbd_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
vp9_highbd_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
vp9_highbd_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
vp9_highbd_sub_pixel_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelVarianceHighTest,
@@ -1431,34 +1460,48 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_MMX
+const SubpixVarMxNFunc subpel_variance16x16_mmx =
+ vp8_sub_pixel_variance16x16_mmx;
+const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
+const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
+const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
+const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+ MMX, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
+ make_tuple(4, 3, subpel_variance16x8_mmx, 0),
+ make_tuple(3, 4, subpel_variance8x16_mmx, 0),
+ make_tuple(3, 3, subpel_variance8x8_mmx, 0),
+ make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
+#endif // HAVE_MMX
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
#if HAVE_SSE2
#if CONFIG_USE_X86INC
-const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
- vp9_sub_pixel_variance4x4_sse;
-const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
- vp9_sub_pixel_variance4x8_sse;
-const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
- vp9_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
- vp9_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
+const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
+const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
+const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
+const SubpixVarMxNFunc subpel_variance8x16_sse2 =
vp9_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc subpel_variance16x8_sse2 =
vp9_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc subpel_variance16x16_sse2 =
vp9_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc subpel_variance16x32_sse2 =
vp9_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc subpel_variance32x16_sse2 =
vp9_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc subpel_variance32x32_sse2 =
vp9_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc subpel_variance32x64_sse2 =
vp9_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc subpel_variance64x32_sse2 =
vp9_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc subpel_variance64x64_sse2 =
vp9_sub_pixel_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelVarianceTest,
@@ -1517,71 +1560,71 @@
make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
#if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
vp9_highbd_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
vp9_highbd_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
vp9_highbd_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
vp9_highbd_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
vp9_highbd_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
vp9_highbd_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
vp9_highbd_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
vp9_highbd_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
vp9_highbd_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
vp9_highbd_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
vp9_highbd_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
vp9_highbd_10_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
vp9_highbd_10_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
vp9_highbd_10_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
vp9_highbd_10_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
vp9_highbd_10_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
vp9_highbd_10_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
vp9_highbd_10_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
vp9_highbd_10_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
vp9_highbd_10_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
vp9_highbd_10_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
vp9_highbd_10_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
vp9_highbd_12_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
vp9_highbd_12_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
vp9_highbd_12_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
vp9_highbd_12_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
vp9_highbd_12_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
vp9_highbd_12_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
vp9_highbd_12_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
vp9_highbd_12_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
vp9_highbd_12_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
vp9_highbd_12_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
vp9_highbd_12_sub_pixel_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelVarianceHighTest,
@@ -1725,35 +1768,56 @@
#endif // HAVE_SSE2
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_SSE2
+const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
+ vp8_sub_pixel_variance16x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
+ vp8_sub_pixel_variance16x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
+ vp8_sub_pixel_variance8x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
+ vp8_sub_pixel_variance8x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
+ vp8_sub_pixel_variance4x4_wmt;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
+ make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
+ make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
+ make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
+#endif // HAVE_SSE2
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
-
-const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
vp9_sub_pixel_variance4x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
vp9_sub_pixel_variance4x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
vp9_sub_pixel_variance8x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
vp9_sub_pixel_variance8x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
vp9_sub_pixel_variance8x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
vp9_sub_pixel_variance16x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
vp9_sub_pixel_variance16x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
vp9_sub_pixel_variance16x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
vp9_sub_pixel_variance32x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
vp9_sub_pixel_variance32x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
vp9_sub_pixel_variance32x64_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
vp9_sub_pixel_variance64x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
vp9_sub_pixel_variance64x64_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9SubpelVarianceTest,
@@ -1815,6 +1879,19 @@
#endif // HAVE_SSSE3
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_SSSE3
+const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
+ vp8_sub_pixel_variance16x16_ssse3;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
+ vp8_sub_pixel_variance16x8_ssse3;
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
+#endif // HAVE_SSSE3
+#endif // CONFIG_VP8
+
#if HAVE_AVX2
const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
@@ -1834,9 +1911,9 @@
make_tuple(4, 4, variance16x16_avx2, 0)));
#if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+const SubpixVarMxNFunc subpel_variance32x32_avx2 =
vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+const SubpixVarMxNFunc subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9SubpelVarianceTest,
@@ -1854,6 +1931,19 @@
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_AVX2
+#if CONFIG_VP8
+#if HAVE_MEDIA
+const SubpixVarMxNFunc subpel_variance16x16_media =
+ vp8_sub_pixel_variance16x16_armv6;
+const SubpixVarMxNFunc subpel_variance8x8_media =
+ vp8_sub_pixel_variance8x8_armv6;
+INSTANTIATE_TEST_CASE_P(
+ MEDIA, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
+ make_tuple(4, 4, subpel_variance16x16_media, 0)));
+#endif // HAVE_MEDIA
+#endif // CONFIG_VP8
+
#if HAVE_NEON
const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
@@ -1882,14 +1972,26 @@
make_tuple(3, 4, variance8x16_neon, 0),
make_tuple(3, 3, variance8x8_neon, 0)));
+#if CONFIG_VP8
+#if HAVE_NEON_ASM
+const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
+ vp8_sub_pixel_variance16x16_neon;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
+ vp8_sub_pixel_variance8x8_neon;
+INSTANTIATE_TEST_CASE_P(
+ NEON, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+#endif // HAVE_NEON_ASM
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
- vp9_sub_pixel_variance8x8_neon;
-const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
+const SubpixVarMxNFunc subpel_variance16x16_neon =
vp9_sub_pixel_variance16x16_neon;
-const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
+const SubpixVarMxNFunc subpel_variance32x32_neon =
vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+const SubpixVarMxNFunc subpel_variance64x64_neon =
vp9_sub_pixel_variance64x64_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9SubpelVarianceTest,
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -43,14 +43,6 @@
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
-DECLARE_ALIGNED(256, extern const InterpKernel,
- vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
- (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
#ifdef __cplusplus
} // extern "C"
#endif
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -16,10 +16,18 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
-#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr,
@@ -27,9 +35,9 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -50,9 +58,9 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) {
@@ -84,9 +92,9 @@
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
- 8, BILINEAR_FILTERS_2TAP(yoffset));
+ 8, bilinear_filters[yoffset]);
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
@@ -102,9 +110,9 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
- 16, BILINEAR_FILTERS_2TAP(yoffset));
+ 16, bilinear_filters[yoffset]);
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
}
@@ -120,9 +128,9 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
- 32, BILINEAR_FILTERS_2TAP(yoffset));
+ 32, bilinear_filters[yoffset]);
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
}
@@ -138,8 +146,8 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
- 64, BILINEAR_FILTERS_2TAP(yoffset));
+ 64, bilinear_filters[yoffset]);
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
}
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -162,9 +162,9 @@
error_per_bit + 4096) >> 13 : 0)
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
- return (x & 7) << 1;
+ return x & 7;
}
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -679,16 +679,14 @@
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -709,14 +707,12 @@
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc};
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -19,6 +19,17 @@
#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
+
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter.
@@ -33,7 +44,7 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -65,7 +76,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -91,9 +102,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
}
@@ -110,9 +121,9 @@
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
@@ -166,7 +177,7 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) {
@@ -192,7 +203,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -219,9 +230,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
@@ -236,9 +247,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -253,9 +264,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -273,9 +284,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -295,9 +306,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -317,9 +328,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
--- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
SECTION .text
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0
- times 8 db 15, 1
times 8 db 14, 2
- times 8 db 13, 3
times 8 db 12, 4
- times 8 db 11, 5
times 8 db 10, 6
- times 8 db 9, 7
times 16 db 8
- times 8 db 7, 9
times 8 db 6, 10
- times 8 db 5, 11
times 8 db 4, 12
- times 8 db 3, 13
times 8 db 2, 14
- times 8 db 1, 15
SECTION .text
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -17,36 +17,20 @@
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
};
#define FILTER_SRC(filter) \