shithub: libvpx

Download patch

ref: e28fc4997b52cbf18fda9deb2acfbf21d5009120
parent: 57de34de44b4a7f2e3328791a68acde6b8a17e70
parent: 69a5f5ecf7257e4ef7ca0c75d4d84c6d3a97fec0
author: James Zern <[email protected]>
date: Thu Aug 7 15:54:53 EDT 2014

Merge "Fix bug 807"

--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -707,24 +707,7 @@
 #endif
 
 #if HAVE_AVX2
-// TODO(jzern): these prototypes can be removed after the avx2 versions are
-// reenabled in vp9_rtcd_defs.pl.
-extern "C" {
-unsigned int vp9_sub_pixel_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_sub_pixel_avg_variance32x32_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-unsigned int vp9_sub_pixel_avg_variance64x64_avx2(
-    const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset,
-    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
-    const uint8_t *second_pred);
-}
+
 const vp9_variance_fn_t variance16x16_avx2 = vp9_variance16x16_avx2;
 const vp9_variance_fn_t variance32x16_avx2 = vp9_variance32x16_avx2;
 const vp9_variance_fn_t variance32x32_avx2 = vp9_variance32x32_avx2;
@@ -743,7 +726,7 @@
 const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelVarianceTest,
+    AVX2, VP9SubpelVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_variance32x32_avx2),
                       make_tuple(6, 6, subpel_variance64x64_avx2)));
 
@@ -752,7 +735,7 @@
 const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_avx2 =
     vp9_sub_pixel_avg_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_AVX2, VP9SubpelAvgVarianceTest,
+    AVX2, VP9SubpelAvgVarianceTest,
     ::testing::Values(make_tuple(5, 5, subpel_avg_variance32x32_avx2),
                       make_tuple(6, 6, subpel_avg_variance64x64_avx2)));
 #endif  // HAVE_AVX2
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -447,10 +447,10 @@
 specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -477,10 +477,10 @@
 specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance32x32 neon/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance32x32 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
-specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
 
 add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 specialize qw/vp9_sub_pixel_variance16x16 neon/, "$sse2_x86inc", "$ssse3_x86inc";
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -67,7 +67,7 @@
 #define LOAD_SRC_DST \
   /* load source and destination */ \
   src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
-  dst_reg = _mm256_load_si256((__m256i const *) (dst));
+  dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
 
 #define AVG_NEXT_SRC(src_reg, size_stride) \
   src_next_reg = _mm256_loadu_si256((__m256i const *) \