shithub: libvpx

Download patch

ref: 6036a0d24fa0946c5830f2a925bd427428c8afb1
parent: 1e224dcb83c83ecb183d989937c3d13896bfd846
author: Yi Luo <[email protected]>
date: Tue Feb 21 07:07:47 EST 2017

Following SSSE3 intrinsics functions also work for HBD

- vpx_idct8x8_12_add_ssse3
  vpx_idct8x8_64_add_ssse3
  vpx_idct32x32_34_add_ssse3
  vpx_idct32x32_135_add_ssse3
  vpx_idct32x32_1024_add_ssse3
- turn on unit tests.

Change-Id: I788b2b3b2074a6f3ab6a0e6f469c1327a123eff7

--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -637,8 +637,7 @@
 
 #endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE && \
-    !CONFIG_VP9_HIGHBITDEPTH
+#if HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE
 const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
   make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
              &wrapper<vpx_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -683,12 +683,11 @@
     add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
     specialize qw/vpx_idct4x4_1_add neon sse2/;
 
-    # TODO(jingning): Add ssse3 for high bit-depth
     add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-    specialize qw/vpx_idct8x8_64_add neon sse2/;
+    specialize qw/vpx_idct8x8_64_add neon sse2 ssse3/;
 
     add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-    specialize qw/vpx_idct8x8_12_add neon sse2/, "$ssse3_x86_64";
+    specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
 
     add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
     specialize qw/vpx_idct8x8_1_add neon sse2/;
@@ -707,15 +706,15 @@
     specialize qw/vpx_idct16x16_1_add neon sse2/;
 
     add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-    specialize qw/vpx_idct32x32_1024_add neon sse2/, "$ssse3_x86_64";
+    specialize qw/vpx_idct32x32_1024_add neon sse2 ssse3/;
 
     add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-    specialize qw/vpx_idct32x32_135_add neon sse2/, "$ssse3_x86_64";
+    specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
     # Need to add 135 eob idct32x32 implementations.
     $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
 
     add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
-    specialize qw/vpx_idct32x32_34_add neon sse2/, "$ssse3_x86_64";
+    specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
 
     add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
     specialize qw/vpx_idct32x32_1_add neon sse2/;