ref: d76032ae87e535be5b924d9e88bbd67189380534
parent: f3f6b6fe3e960959489db2568d9942aeca261daa
author: Jian Zhou <[email protected]>
date: Thu Nov 19 06:34:22 EST 2015
Speed up h_predictor_4x4 Modify h_predictor_4x4 with XMM registers. Speed up by ~25% in ./test_intra_pred_speed. Change-Id: Id01c34c48e75b9d56dfc2e93af12cf0c0326a279
--- a/vpx_dsp/x86/intrapred_ssse3.asm
+++ b/vpx_dsp/x86/intrapred_ssse3.asm
@@ -33,23 +33,20 @@
SECTION .text
-INIT_MMX ssse3
+INIT_XMM ssse3
cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 4
- mov lineq, -2
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- movd [dstq ], m1
- movd [dstq+strideq], m2
+ movd m0, [leftq]
+ punpcklbw m0, m0
+ punpcklbw m0, m0
+ movd [dstq ], m0
+ psrldq m0, 4
+ movd [dstq+strideq], m0
lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
+ psrldq m0, 4
+ movd [dstq ], m0
+ psrldq m0, 4
+ movd [dstq+strideq], m0
+ RET
INIT_MMX ssse3
cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left