ref: 1b10753ad731928f2130ce73b05a7f5325a1793f
parent: 131c1600a9b10e0e1bb9fc8de6e27626db65569d
parent: d76032ae87e535be5b924d9e88bbd67189380534
author: James Zern <[email protected]>
date: Fri Nov 20 20:12:42 EST 2015
Merge "Speed up h_predictor_4x4"
--- a/vpx_dsp/x86/intrapred_ssse3.asm
+++ b/vpx_dsp/x86/intrapred_ssse3.asm
@@ -33,23 +33,20 @@
SECTION .text
-INIT_MMX ssse3
+INIT_XMM ssse3
cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 4
- mov lineq, -2
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- movd [dstq ], m1
- movd [dstq+strideq], m2
+ movd m0, [leftq]
+ punpcklbw m0, m0
+ punpcklbw m0, m0
+ movd [dstq ], m0
+ psrldq m0, 4
+ movd [dstq+strideq], m0
lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
+ psrldq m0, 4
+ movd [dstq ], m0
+ psrldq m0, 4
+ movd [dstq+strideq], m0
+ RET
INIT_MMX ssse3
cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left