ref: 7f83a3bcdc3fd5ed4f85de0a406cd1dcda9e2382
parent: cfd16a6890a02480b293314ac3e507b7b2132f8f
parent: 1893122e34a0c96afb75daa5517d04a081bf8e27
author: Yunqing Wang <[email protected]>
date: Tue Apr 22 12:09:43 EDT 2014
Merge "Fix dr memory VP8 encode/decode errors"
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -365,6 +365,7 @@
GET_GOT rbx
push rsi
push rdi
+ push rbx
; end prolog
; read top row
@@ -395,8 +396,11 @@
movsxd rcx, dword ptr arg(1) ;dst_stride
.vp8_intra_pred_uv_tm_%1_loop:
- movd xmm3, [rsi]
- movd xmm5, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm3, xmm0
punpcklbw xmm5, xmm0
@@ -419,6 +423,7 @@
jnz .vp8_intra_pred_uv_tm_%1_loop
; begin epilog
+ pop rbx
pop rdi
pop rsi
RESTORE_GOT
@@ -486,10 +491,8 @@
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
-%ifidn %1, ssse3
-%ifndef GET_GOT_SAVE_ARG
push rbx
-%endif
+%ifidn %1, ssse3
GET_GOT rbx
%endif
; end prolog
@@ -507,13 +510,16 @@
%ifidn %1, ssse3
lea rdx, [rcx*3]
movdqa xmm2, [GLOBAL(dc_00001111)]
- lea rbx, [rax*3]
%endif
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
- movd mm0, [rsi]
- movd mm1, [rsi+rax]
+ mov bl, [rsi]
+ movd mm0, ebx
+
+ mov bl, [rsi+rax]
+ movd mm1, ebx
+
punpcklbw mm0, mm0
punpcklbw mm1, mm1
pshufw mm0, mm0, 0x0
@@ -525,10 +531,19 @@
dec edx
jnz .vp8_intra_pred_uv_ho_%1_loop
%else
- movd xmm0, [rsi]
- movd xmm3, [rsi+rax]
- movd xmm1, [rsi+rax*2]
- movd xmm4, [rsi+rbx]
+ mov bl, [rsi]
+ movd xmm0, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax*2]
+ movd xmm1, ebx
+
+ lea rbx, [rax*3]
+ mov bl, [rsi+rbx]
+ movd xmm4, ebx
+
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@@ -539,10 +554,20 @@
movhps [rdi+rdx], xmm1
lea rsi, [rsi+rax*4]
lea rdi, [rdi+rcx*4]
- movd xmm0, [rsi]
- movd xmm3, [rsi+rax]
- movd xmm1, [rsi+rax*2]
- movd xmm4, [rsi+rbx]
+
+ mov bl, [rsi]
+ movd xmm0, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm3, ebx
+
+ mov bl, [rsi+rax*2]
+ movd xmm1, ebx
+
+ lea rbx, [rax*3]
+ mov bl, [rsi+rbx]
+ movd xmm4, ebx
+
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
@@ -556,10 +581,8 @@
; begin epilog
%ifidn %1, ssse3
RESTORE_GOT
-%ifndef GET_GOT_SAVE_ARG
- pop rbx
%endif
-%endif
+ pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS
@@ -893,6 +916,7 @@
SAVE_XMM 7
push rsi
push rdi
+ push rbx
GET_GOT rbx
; end prolog
@@ -926,8 +950,11 @@
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_tm_%1_loop:
- movd xmm4, [rsi]
- movd xmm5, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm4, ebx
+
+ mov bl, [rsi+rax]
+ movd xmm5, ebx
%ifidn %1, sse2
punpcklbw xmm4, xmm0
punpcklbw xmm5, xmm0
@@ -956,6 +983,7 @@
; begin epilog
RESTORE_GOT
+ pop rbx
pop rdi
pop rsi
RESTORE_XMM
@@ -1029,6 +1057,7 @@
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
+ push rbx
; end prolog
;arg(2) not used
@@ -1041,8 +1070,11 @@
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_y_ho_sse2_loop:
- movd xmm0, [rsi]
- movd xmm1, [rsi+rax]
+ mov bl, [rsi]
+ movd xmm0, ebx
+ mov bl, [rsi+rax]
+ movd xmm1, ebx
+
; FIXME use pshufb for ssse3 version
punpcklbw xmm0, xmm0
punpcklbw xmm1, xmm1
@@ -1058,6 +1090,7 @@
jnz vp8_intra_pred_y_ho_sse2_loop
; begin epilog
+ pop rbx
pop rdi
pop rsi
UNSHADOW_ARGS