shithub: libvpx

Download patch

ref: 2404e3290e51b776dc16c5c082bbd39e45a15b10
parent: e19b7df8d33c795e0daf4432315325ec445d21bd
parent: 88120481a4475c1b40f867b8d80edfd2a560a315
author: Jian Zhou <[email protected]>
date: Mon Dec 14 12:56:01 EST 2015

Merge "Code clean of tm_predictor_32x32"

--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -337,7 +337,6 @@
                 vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)
 
 #if HAVE_SSE2 && CONFIG_USE_X86INC
-#if ARCH_X86_64
 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
                 vpx_dc_left_predictor_32x32_sse2,
                 vpx_dc_top_predictor_32x32_sse2,
@@ -344,14 +343,6 @@
                 vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
                 vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
                 NULL, vpx_tm_predictor_32x32_sse2)
-#else
-INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
-                vpx_dc_left_predictor_32x32_sse2,
-                vpx_dc_top_predictor_32x32_sse2,
-                vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
-                vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL)
-#endif  // ARCH_X86_64
 #endif  // HAVE_SSE2 && CONFIG_USE_X86INC
 
 #if HAVE_SSSE3 && CONFIG_USE_X86INC
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -241,7 +241,7 @@
 specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
+specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
 
 add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
--- a/vpx_dsp/x86/intrapred_sse2.asm
+++ b/vpx_dsp/x86/intrapred_sse2.asm
@@ -700,9 +700,8 @@
   jnz .loop
   REP_RET
 
-%if ARCH_X86_64
 INIT_XMM sse2
-cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
+cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left
   pxor                  m1, m1
   movd                  m2, [aboveq-1]
   mova                  m0, [aboveq]
@@ -723,31 +722,29 @@
   psubw                 m5, m2
 .loop:
   movd                  m2, [leftq+lineq*2]
-  movd                  m6, [leftq+lineq*2+1]
+  pxor                  m1, m1
   punpcklbw             m2, m1
-  punpcklbw             m6, m1
+  pshuflw               m7, m2, 0x55
   pshuflw               m2, m2, 0x0
-  pshuflw               m6, m6, 0x0
   punpcklqdq            m2, m2
-  punpcklqdq            m6, m6
-  paddw                 m7, m2, m0
-  paddw                 m8, m2, m3
-  paddw                 m9, m2, m4
-  paddw                 m2, m5
-  packuswb              m7, m8
-  packuswb              m9, m2
-  paddw                 m2, m6, m0
-  paddw                 m8, m6, m3
-  mova   [dstq           ], m7
-  paddw                 m7, m6, m4
-  paddw                 m6, m5
-  mova   [dstq        +16], m9
-  packuswb              m2, m8
-  packuswb              m7, m6
-  mova   [dstq+strideq   ], m2
-  mova   [dstq+strideq+16], m7
+  punpcklqdq            m7, m7
+  paddw                 m6, m2, m3
+  paddw                 m1, m2, m0
+  packuswb              m1, m6
+  mova   [dstq           ], m1
+  paddw                 m6, m2, m5
+  paddw                 m1, m2, m4
+  packuswb              m1, m6
+  mova   [dstq+16        ], m1
+  paddw                 m6, m7, m3
+  paddw                 m1, m7, m0
+  packuswb              m1, m6
+  mova   [dstq+strideq   ], m1
+  paddw                 m6, m7, m5
+  paddw                 m1, m7, m4
+  packuswb              m1, m6
+  mova   [dstq+strideq+16], m1
   lea                 dstq, [dstq+strideq*2]
   inc                lineq
   jnz .loop
   REP_RET
-%endif