ref: eb88b172fe5e5fece6676b24a2b6b787e4901753
parent: 01853d7ce9511fb528289c44f1b1881288f10090
author: Johann <[email protected]>
date: Tue May 26 07:30:25 EDT 2015
Make vp9 subpixel match vp8 The only difference between the two was that the vp9 function allowed for every step in the bilinear filter (16 steps) while vp8 only allowed for half of those. Since all the call sites in vp9 (<< 1) the input, it only ever used the same steps as vp8. This will allow moving the subpel variance to vpx_dsp with the rest of the variance functions. Change-Id: I6fa2509350a2dc610c46b3e15bde98a15a084b75
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -21,6 +21,9 @@
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
+#if CONFIG_VP8_ENCODER
+# include "./vp8_rtcd.h"
+#endif // CONFIG_VP8_ENCODER
#if CONFIG_VP9_ENCODER
# include "./vp9_rtcd.h"
# include "vp9/encoder/vp9_variance.h"
@@ -32,10 +35,13 @@
typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+ int xoffset, int yoffset,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse);
typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride);
-
using ::std::tr1::get;
using ::std::tr1::make_tuple;
using ::std::tr1::tuple;
@@ -102,6 +108,12 @@
(l2w + l2h)));
}
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
int l2w, int l2h, int xoff, int yoff,
uint32_t *sse_ptr,
@@ -111,6 +123,10 @@
uint64_t sse = 0;
const int w = 1 << l2w;
const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
// Bilinear interpolation at a 16th pel step.
@@ -480,6 +496,10 @@
uint64_t sse = 0;
const int w = 1 << l2w;
const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
// bilinear interpolation at a 16th pel step
@@ -598,8 +618,8 @@
template<typename SubpelVarianceFunctionType>
void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
if (!use_high_bit_depth_) {
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
@@ -621,8 +641,9 @@
unsigned int var1;
ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
src_, width_, &sse1));
- const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
- log2height_, x, y, &sse2,
+ const unsigned int var2 = subpel_variance_ref(ref_, src_,
+ log2width_, log2height_,
+ x, y, &sse2,
use_high_bit_depth_,
bit_depth_);
EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -636,8 +657,8 @@
// Compare against reference.
// Src: Set the first half of values to 0, the second half to the maximum.
// Ref: Set the first half of values to the maximum, the second half to 0.
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
const int half = block_size_ / 2;
if (!use_high_bit_depth_) {
memset(src_, 0, half);
@@ -658,10 +679,10 @@
ASM_REGISTER_STATE_CHECK(
var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
const unsigned int var2 =
- subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
- use_high_bit_depth_, bit_depth_);
- EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
- EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+ subpel_variance_ref(ref_, src_, log2width_, log2height_,
+ x, y, &sse2, use_high_bit_depth_, bit_depth_);
+ EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+ EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
}
}
}
@@ -669,8 +690,8 @@
#if CONFIG_VP9_ENCODER
template<>
void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
- for (int x = 0; x < 16; ++x) {
- for (int y = 0; y < 16; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
if (!use_high_bit_depth_) {
for (int j = 0; j < block_size_; j++) {
src_[j] = rnd_.Rand8();
@@ -795,7 +816,6 @@
const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
-
INSTANTIATE_TEST_CASE_P(
C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
make_tuple(4, 4, highbd_12_mse16x8_c),
@@ -811,7 +831,6 @@
make_tuple(4, 4, highbd_8_mse8x8_c)));
*/
-
const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
@@ -976,7 +995,6 @@
const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
-
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
make_tuple(4, 3, highbd_12_mse16x8_sse2),
@@ -1088,8 +1106,15 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
+#if CONFIG_VP8
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
+
+TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
@@ -1097,7 +1122,7 @@
TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
#if CONFIG_VP9_HIGHBITDEPTH
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
VP9SubpelAvgVarianceHighTest;
@@ -1106,32 +1131,19 @@
TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
#endif // CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t subpel_variance4x4_c =
- vp9_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t subpel_variance4x8_c =
- vp9_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x4_c =
- vp9_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t subpel_variance8x8_c =
- vp9_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x16_c =
- vp9_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x8_c =
- vp9_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t subpel_variance16x16_c =
- vp9_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x32_c =
- vp9_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x16_c =
- vp9_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t subpel_variance32x32_c =
- vp9_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x64_c =
- vp9_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t subpel_variance64x32_c =
- vp9_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t subpel_variance64x64_c =
- vp9_sub_pixel_variance64x64_c;
+const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
+const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
+const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
+const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
+const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
+const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
+const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
+const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
+const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelVarianceTest,
::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
@@ -1147,6 +1159,23 @@
make_tuple(5, 6, subpel_variance32x64_c, 0),
make_tuple(6, 5, subpel_variance64x32_c, 0),
make_tuple(6, 6, subpel_variance64x64_c, 0)));
+
+#if CONFIG_VP8
+const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
+ vp8_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
+INSTANTIATE_TEST_CASE_P(
+ C, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
+ make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
+ make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
+ make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
+#endif // CONFIG_VP8
+
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
vp9_sub_pixel_avg_variance4x4_c;
const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -1189,83 +1218,83 @@
make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
#if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
vp9_highbd_10_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
vp9_highbd_10_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
vp9_highbd_10_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
vp9_highbd_10_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
vp9_highbd_10_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
vp9_highbd_10_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
vp9_highbd_10_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
vp9_highbd_10_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
vp9_highbd_10_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
vp9_highbd_10_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
vp9_highbd_10_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
vp9_highbd_10_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
vp9_highbd_10_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
vp9_highbd_12_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
vp9_highbd_12_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
vp9_highbd_12_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
vp9_highbd_12_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
vp9_highbd_12_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
vp9_highbd_12_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
vp9_highbd_12_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
vp9_highbd_12_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
vp9_highbd_12_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
vp9_highbd_12_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
vp9_highbd_12_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
vp9_highbd_12_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
vp9_highbd_12_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
vp9_highbd_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
vp9_highbd_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
vp9_highbd_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
vp9_highbd_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
vp9_highbd_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
vp9_highbd_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
vp9_highbd_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
vp9_highbd_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
vp9_highbd_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
vp9_highbd_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
vp9_highbd_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
vp9_highbd_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
vp9_highbd_sub_pixel_variance64x64_c;
INSTANTIATE_TEST_CASE_P(
C, VP9SubpelVarianceHighTest,
@@ -1431,34 +1460,48 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_MMX
+const SubpixVarMxNFunc subpel_variance16x16_mmx =
+ vp8_sub_pixel_variance16x16_mmx;
+const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
+const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
+const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
+const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+ MMX, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
+ make_tuple(4, 3, subpel_variance16x8_mmx, 0),
+ make_tuple(3, 4, subpel_variance8x16_mmx, 0),
+ make_tuple(3, 3, subpel_variance8x8_mmx, 0),
+ make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
+#endif // HAVE_MMX
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
#if HAVE_SSE2
#if CONFIG_USE_X86INC
-const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
- vp9_sub_pixel_variance4x4_sse;
-const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
- vp9_sub_pixel_variance4x8_sse;
-const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
- vp9_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
- vp9_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
+const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
+const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
+const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
+const SubpixVarMxNFunc subpel_variance8x16_sse2 =
vp9_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc subpel_variance16x8_sse2 =
vp9_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc subpel_variance16x16_sse2 =
vp9_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc subpel_variance16x32_sse2 =
vp9_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc subpel_variance32x16_sse2 =
vp9_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc subpel_variance32x32_sse2 =
vp9_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc subpel_variance32x64_sse2 =
vp9_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc subpel_variance64x32_sse2 =
vp9_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc subpel_variance64x64_sse2 =
vp9_sub_pixel_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelVarianceTest,
@@ -1517,71 +1560,71 @@
make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
#if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
vp9_highbd_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
vp9_highbd_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
vp9_highbd_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
vp9_highbd_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
vp9_highbd_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
vp9_highbd_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
vp9_highbd_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
vp9_highbd_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
vp9_highbd_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
vp9_highbd_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
vp9_highbd_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
vp9_highbd_10_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
vp9_highbd_10_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
vp9_highbd_10_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
vp9_highbd_10_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
vp9_highbd_10_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
vp9_highbd_10_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
vp9_highbd_10_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
vp9_highbd_10_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
vp9_highbd_10_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
vp9_highbd_10_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
vp9_highbd_10_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
vp9_highbd_12_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
vp9_highbd_12_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
vp9_highbd_12_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
vp9_highbd_12_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
vp9_highbd_12_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
vp9_highbd_12_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
vp9_highbd_12_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
vp9_highbd_12_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
vp9_highbd_12_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
vp9_highbd_12_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
vp9_highbd_12_sub_pixel_variance64x64_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VP9SubpelVarianceHighTest,
@@ -1725,35 +1768,56 @@
#endif // HAVE_SSE2
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_SSE2
+const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
+ vp8_sub_pixel_variance16x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
+ vp8_sub_pixel_variance16x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
+ vp8_sub_pixel_variance8x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
+ vp8_sub_pixel_variance8x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
+ vp8_sub_pixel_variance4x4_wmt;
+INSTANTIATE_TEST_CASE_P(
+ SSE2, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
+ make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
+ make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
+ make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
+#endif // HAVE_SSE2
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
-
-const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
vp9_sub_pixel_variance4x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
vp9_sub_pixel_variance4x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
vp9_sub_pixel_variance8x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
vp9_sub_pixel_variance8x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
vp9_sub_pixel_variance8x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
vp9_sub_pixel_variance16x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
vp9_sub_pixel_variance16x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
vp9_sub_pixel_variance16x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
vp9_sub_pixel_variance32x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
vp9_sub_pixel_variance32x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
vp9_sub_pixel_variance32x64_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
vp9_sub_pixel_variance64x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
vp9_sub_pixel_variance64x64_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9SubpelVarianceTest,
@@ -1815,6 +1879,19 @@
#endif // HAVE_SSSE3
#endif // CONFIG_VP9_ENCODER
+#if CONFIG_VP8
+#if HAVE_SSSE3
+const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
+ vp8_sub_pixel_variance16x16_ssse3;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
+ vp8_sub_pixel_variance16x8_ssse3;
+INSTANTIATE_TEST_CASE_P(
+ SSSE3, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
+#endif // HAVE_SSSE3
+#endif // CONFIG_VP8
+
#if HAVE_AVX2
const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
@@ -1834,9 +1911,9 @@
make_tuple(4, 4, variance16x16_avx2, 0)));
#if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+const SubpixVarMxNFunc subpel_variance32x32_avx2 =
vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+const SubpixVarMxNFunc subpel_variance64x64_avx2 =
vp9_sub_pixel_variance64x64_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VP9SubpelVarianceTest,
@@ -1854,6 +1931,19 @@
#endif // CONFIG_VP9_ENCODER
#endif // HAVE_AVX2
+#if CONFIG_VP8
+#if HAVE_MEDIA
+const SubpixVarMxNFunc subpel_variance16x16_media =
+ vp8_sub_pixel_variance16x16_armv6;
+const SubpixVarMxNFunc subpel_variance8x8_media =
+ vp8_sub_pixel_variance8x8_armv6;
+INSTANTIATE_TEST_CASE_P(
+ MEDIA, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
+ make_tuple(4, 4, subpel_variance16x16_media, 0)));
+#endif // HAVE_MEDIA
+#endif // CONFIG_VP8
+
#if HAVE_NEON
const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
@@ -1882,14 +1972,26 @@
make_tuple(3, 4, variance8x16_neon, 0),
make_tuple(3, 3, variance8x8_neon, 0)));
+#if CONFIG_VP8
+#if HAVE_NEON_ASM
+const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
+ vp8_sub_pixel_variance16x16_neon;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
+ vp8_sub_pixel_variance8x8_neon;
+INSTANTIATE_TEST_CASE_P(
+ NEON, VP8SubpelVarianceTest,
+ ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
+ make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+#endif // HAVE_NEON_ASM
+#endif // CONFIG_VP8
+
#if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
- vp9_sub_pixel_variance8x8_neon;
-const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
+const SubpixVarMxNFunc subpel_variance16x16_neon =
vp9_sub_pixel_variance16x16_neon;
-const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
+const SubpixVarMxNFunc subpel_variance32x32_neon =
vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+const SubpixVarMxNFunc subpel_variance64x64_neon =
vp9_sub_pixel_variance64x64_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9SubpelVarianceTest,
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -43,14 +43,6 @@
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
-DECLARE_ALIGNED(256, extern const InterpKernel,
- vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
- (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
#ifdef __cplusplus
} // extern "C"
#endif
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -16,10 +16,18 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
-#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr,
@@ -27,9 +35,9 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -50,9 +58,9 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) {
@@ -84,9 +92,9 @@
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
- 8, BILINEAR_FILTERS_2TAP(yoffset));
+ 8, bilinear_filters[yoffset]);
return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
@@ -102,9 +110,9 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
- 16, BILINEAR_FILTERS_2TAP(yoffset));
+ 16, bilinear_filters[yoffset]);
return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
}
@@ -120,9 +128,9 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
- 32, BILINEAR_FILTERS_2TAP(yoffset));
+ 32, bilinear_filters[yoffset]);
return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
}
@@ -138,8 +146,8 @@
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
- 64, BILINEAR_FILTERS_2TAP(yoffset));
+ 64, bilinear_filters[yoffset]);
return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
}
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -162,9 +162,9 @@
error_per_bit + 4096) >> 13 : 0)
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
- return (x & 7) << 1;
+ return x & 7;
}
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -679,16 +679,14 @@
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -709,14 +707,12 @@
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc};
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -19,6 +19,17 @@
#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
+
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// first-pass of 2-D separable filter.
@@ -33,7 +44,7 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -65,7 +76,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -91,9 +102,9 @@
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
}
@@ -110,9 +121,9 @@
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
@@ -166,7 +177,7 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) {
@@ -192,7 +203,7 @@
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -219,9 +230,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
@@ -236,9 +247,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -253,9 +264,9 @@
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -273,9 +284,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -295,9 +306,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -317,9 +328,9 @@
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
--- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
SECTION .text
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0
- times 8 db 15, 1
times 8 db 14, 2
- times 8 db 13, 3
times 8 db 12, 4
- times 8 db 11, 5
times 8 db 10, 6
- times 8 db 9, 7
times 16 db 8
- times 8 db 7, 9
times 8 db 6, 10
- times 8 db 5, 11
times 8 db 4, 12
- times 8 db 3, 13
times 8 db 2, 14
- times 8 db 1, 15
SECTION .text
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -17,36 +17,20 @@
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
};
#define FILTER_SRC(filter) \