ref: 2149f214d58a87ff7bb1f6fb2e6b2e6a50bde9c3
parent: fff4654d3643b02d75f06df79164c259add89f08
parent: f2cf3c06a0835bf436bb25c5a2f28265c287ec70
author: Jingning Han <[email protected]>
date: Wed Apr 1 11:46:22 EDT 2015
Merge "Reduce required xmm number by one in block_error_fp"
--- a/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/vp9/encoder/x86/vp9_error_sse2.asm
@@ -78,7 +78,7 @@
; intptr_t block_size)
INIT_XMM sse2
-cglobal block_error_fp, 3, 3, 8, uqc, dqc, size
+cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
pxor m4, m4 ; sse accumulator
pxor m5, m5 ; dedicated zero register
lea uqcq, [uqcq+sizeq*2]
@@ -96,13 +96,13 @@
pmaddwd m0, m0
pmaddwd m1, m1
; accumulate in 64bit
- punpckldq m7, m0, m5
+ punpckldq m3, m0, m5
punpckhdq m0, m5
- paddq m4, m7
- punpckldq m7, m1, m5
+ paddq m4, m3
+ punpckldq m3, m1, m5
paddq m4, m0
punpckhdq m1, m5
- paddq m4, m7
+ paddq m4, m3
paddq m4, m1
add sizeq, mmsize
jl .loop