ref: f96918283f27cc813c84c432c99a46d56a9546eb
parent: 258828f7ecf2ecf6bbd283c8438ce804dfdd1141
author: Martin Storsjö <[email protected]>
date: Fri Mar 14 11:00:39 EDT 2014
Remove commented out code for old, 32-bit only x86 assembly function prologues/epilogues
--- a/codec/common/mb_copy.asm
+++ b/codec/common/mb_copy.asm
@@ -133,15 +133,6 @@
ALIGN 16
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
WelsCopy16x16NotAligned_sse2:
- ;push esi
- ;push edi
- ;push ebx
-
- ;mov edi, [esp+16] ; Dst
- ;mov eax, [esp+20] ; iStrideD
- ;mov esi, [esp+24] ; Src
- ;mov ecx, [esp+28] ; iStrideS
-
push r4
push r5
%assign push_num 2
@@ -205,15 +196,6 @@
;***********************************************************************
ALIGN 16
WelsCopy16x8NotAligned_sse2:
- ;push esi
- ;push edi
- ;push ebx
-
- ;mov edi, [esp+16] ; Dst
- ;mov eax, [esp+20] ; iStrideD
- ;mov esi, [esp+24] ; Src
- ;mov ecx, [esp+28] ; iStrideS
-
push r4
push r5
%assign push_num 2
@@ -255,13 +237,6 @@
;***********************************************************************
ALIGN 16
WelsCopy8x16_mmx:
- ;push ebx
-
- ;mov eax, [esp + 8 ] ;Dst
- ;mov ecx, [esp + 12] ;iStrideD
- ;mov ebx, [esp + 16] ;Src
- ;mov edx, [esp + 20] ;iStrideS
-
%assign push_num 0
LOAD_4_PARA
@@ -327,13 +302,6 @@
;***********************************************************************
ALIGN 16
WelsCopy8x8_mmx:
- ;push ebx
- ;push esi
- ;mov eax, [esp + 12] ;Dst
- ;mov ecx, [esp + 16] ;iStrideD
- ;mov esi, [esp + 20] ;Src
- ;mov ebx, [esp + 24] ;iStrideS
-
push r4
%assign push_num 1
LOAD_4_PARA
@@ -373,8 +341,6 @@
movq [r0+r1], mm7
WELSEMMS
- ;pop esi
- ;pop ebx
LOAD_4_PARA_POP
pop r4
ret
@@ -389,8 +355,6 @@
%assign push_num 0
LOAD_2_PARA
- ;mov eax, [esp+4] ; mv_buffer
- ;movd xmm0, [esp+8] ; _mv
movd xmm0, r1d ; _mv
pshufd xmm1, xmm0, $00
movdqa [r0 ], xmm1
@@ -472,20 +436,6 @@
; int iHeight );
;*******************************************************************************
PixelAvgWidthEq8_mmx:
-
- ;push esi
- ;push edi
- ;push ebp
- ;push ebx
-
- ;mov edi, [esp+20] ; pDst
- ;mov eax, [esp+24] ; iDstStride
- ;mov esi, [esp+28] ; pSrcA
- ;mov ecx, [esp+32] ; iSrcAStride
- ;mov ebp, [esp+36] ; pSrcB
- ;mov edx, [esp+40] ; iSrcBStride
- ;mov ebx, [esp+44] ; iHeight
-
%assign push_num 0
LOAD_7_PARA
@@ -575,17 +525,6 @@
; uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
McCopyWidthEq4_mmx:
- ;push esi
- ;push edi
- ;push ebx
-
-
- ;mov esi, [esp+16]
- ;mov eax, [esp+20]
- ;mov edi, [esp+24]
- ;mov ecx, [esp+28]
- ;mov edx, [esp+32]
-
push r5
%assign push_num 1
LOAD_5_PARA
@@ -614,14 +553,6 @@
; uint8_t *pDst, int iDstStride, int iHeight )
;*******************************************************************************
McCopyWidthEq8_mmx:
- ;push esi
- ;push edi
- ;mov esi, [esp+12]
- ;mov eax, [esp+16]
- ;mov edi, [esp+20]
- ;mov ecx, [esp+24]
- ;mov edx, [esp+28]
-
%assign push_num 0
LOAD_5_PARA
@@ -659,15 +590,6 @@
movhps [%1+8], %2
%endmacro
McCopyWidthEq16_sse2:
- ;push esi
- ;push edi
-
- ;mov esi, [esp+12] ; pSrc
- ;mov eax, [esp+16] ; iSrcStride
- ;mov edi, [esp+20] ; pDst
- ;mov edx, [esp+24] ; iDstStride
- ;mov ecx, [esp+28] ; iHeight
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
--- a/codec/common/mc_chroma.asm
+++ b/codec/common/mc_chroma.asm
@@ -76,10 +76,6 @@
;*******************************************************************************
WELS_EXTERN McChromaWidthEq4_mmx
McChromaWidthEq4_mmx:
- ;push esi
- ;push edi
- ;push ebx
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -86,8 +82,6 @@
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
- ;mov eax, [esp +12 + 20]
-
movd mm3, [r4]; [eax]
WELS_Zero mm7
punpcklbw mm3, mm3
@@ -103,12 +97,6 @@
punpcklbw mm4, mm7
punpckhbw mm6, mm7
- ;mov esi, [esp +12+ 4]
- ;mov eax, [esp + 12 + 8]
- ;mov edi, [esp + 12 + 12]
- ;mov edx, [esp + 12 + 16]
- ;mov ecx, [esp + 12 + 24]
-
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movd mm0, [r0]
movd mm1, [r0+1]
@@ -149,9 +137,6 @@
jnz near .xloop
WELSEMMS
LOAD_6_PARA_POP
- ;pop ebx
- ;pop edi
- ;pop esi
ret
@@ -166,10 +151,6 @@
;*******************************************************************************
WELS_EXTERN McChromaWidthEq8_sse2
McChromaWidthEq8_sse2:
- ;push esi
- ;push edi
- ;push ebx
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -176,7 +157,6 @@
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
- ;mov eax, [esp +12 + 20]
movd xmm3, [r4]
WELS_Zero xmm7
punpcklbw xmm3, xmm3
@@ -193,12 +173,6 @@
punpcklbw xmm4, xmm7
punpckhbw xmm6, xmm7
- ;mov esi, [esp +12+ 4]
- ;mov eax, [esp + 12 + 8]
- ;mov edi, [esp + 12 + 12]
- ;mov edx, [esp + 12 + 16]
- ;mov ecx, [esp + 12 + 24]
-
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
movq xmm0, [r0]
movq xmm1, [r0+1]
@@ -240,9 +214,6 @@
LOAD_6_PARA_POP
- ;pop ebx
- ;pop edi
- ;pop esi
ret
@@ -259,9 +230,6 @@
;***********************************************************************
WELS_EXTERN McChromaWidthEq8_ssse3
McChromaWidthEq8_ssse3:
- ;push ebx
- ;push esi
- ;push edi
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -268,8 +236,6 @@
SIGN_EXTENSION r3, r3d
SIGN_EXTENSION r5, r5d
- ;mov eax, [esp + 12 + 20]
-
pxor xmm7, xmm7
movd xmm5, [r4]
punpcklwd xmm5, xmm5
@@ -278,12 +244,6 @@
punpcklqdq xmm5, xmm5
punpckhqdq xmm6, xmm6
- ;mov eax, [esp + 12 + 4]
- ;mov edx, [esp + 12 + 8]
- ;mov esi, [esp + 12 + 12]
- ;mov edi, [esp + 12 + 16]
- ;mov ecx, [esp + 12 + 24]
-
sub r2, r3 ;sub esi, edi
sub r2, r3
movdqa xmm7, [h264_d0x20_sse2]
@@ -329,10 +289,6 @@
jnz .hloop_chroma
LOAD_6_PARA_POP
-
- ;pop edi
- ;pop esi
- ;pop ebx
ret
--- a/codec/common/mc_luma.asm
+++ b/codec/common/mc_luma.asm
@@ -83,15 +83,6 @@
; int iHeight)
;*******************************************************************************
McHorVer20WidthEq4_mmx:
- ;push esi
- ;push edi
-
- ;mov esi, [esp+12]
- ;mov eax, [esp+16]
- ;mov edi, [esp+20]
- ;mov ecx, [esp+24]
- ;mov edx, [esp+28]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -185,15 +176,6 @@
; )
;***********************************************************************
McHorVer22Width8HorFirst_sse2:
- ;push esi
- ;push edi
- ;push ebx
- ;mov esi, [esp+16] ;pSrc
- ;mov eax, [esp+20] ;iSrcStride
- ;mov edi, [esp+24] ;pDst
- ;mov edx, [esp+28] ;iDstStride
- ;mov ebx, [esp+32] ;iHeight
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -245,15 +227,6 @@
; );
;*******************************************************************************
McHorVer20WidthEq8_sse2:
- ;push esi
- ;push edi
-
- ;mov esi, [esp + 12] ;pSrc
- ;mov eax, [esp + 16] ;iSrcStride
- ;mov edi, [esp + 20] ;pDst
- ;mov ecx, [esp + 28] ;iHeight
- ;mov edx, [esp + 24] ;iDstStride
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -309,14 +282,6 @@
; );
;*******************************************************************************
McHorVer20WidthEq16_sse2:
- ;push esi
- ;push edi
- ;mov esi, [esp + 12] ;pSrc
- ;mov eax, [esp + 16] ;iSrcStride
- ;mov edi, [esp + 20] ;pDst
- ;mov ecx, [esp + 28] ;iHeight
- ;mov edx, [esp + 24] ;iDstStride
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -398,14 +363,6 @@
;*******************************************************************************
ALIGN 16
McHorVer02WidthEq8_sse2:
- ;push esi
- ;push edi
- ;mov esi, [esp + 12] ;pSrc
- ;mov edx, [esp + 16] ;iSrcStride
- ;mov edi, [esp + 20] ;pDst
- ;mov eax, [esp + 24] ;iDstStride
- ;mov ecx, [esp + 28] ;iHeight
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -503,17 +460,6 @@
;***********************************************************************
ALIGN 16
McHorVer02Height9Or17_sse2:
- ;push esi
- ;push edi
- ;push ebx
-
- ;mov esi, [esp + 16]
- ;mov edx, [esp + 20]
- ;mov edi, [esp + 24]
- ;mov eax, [esp + 28]
- ;mov ecx, [esp + 36]
- ;mov ebx, [esp + 32]
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -612,9 +558,6 @@
.x_loop_dec:
dec r4
jz near .xx_exit
- ;mov esi, [esp + 16]
- ;mov edi, [esp + 24]
- ;mov ecx, [esp + 36]
%ifdef X86_32
mov r0, arg1
mov r2, arg3
@@ -651,16 +594,6 @@
; );
;***********************************************************************
McHorVer20Width9Or17_sse2:
- ;push esi
- ;push edi
- ;push ebx
- ;mov esi, [esp+16]
- ;mov eax, [esp+20]
- ;mov edi, [esp+24]
- ;mov edx, [esp+28]
- ;mov ecx, [esp+32]
- ;mov ebx, [esp+36]
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -819,16 +752,6 @@
; int32_t iWidth,int32_t iHeight);
;***********************************************************************
McHorVer22HorFirst_sse2:
- ;push esi
- ;push edi
- ;push ebx
- ;mov esi, [esp+16]
- ;mov eax, [esp+20]
- ;mov edi, [esp+24]
- ;mov edx, [esp+28]
- ;mov ecx, [esp+32]
- ;mov ebx, [esp+36]
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -996,18 +919,6 @@
;***********************************************************************
McHorVer22Width8VerLastAlign_sse2:
- ;push esi
- ;push edi
- ;push ebx
- ;push ebp
-
- ;mov esi, [esp+20]
- ;mov eax, [esp+24]
- ;mov edi, [esp+28]
- ;mov edx, [esp+32]
- ;mov ebx, [esp+36]
- ;mov ecx, [esp+40]
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -1104,9 +1015,6 @@
.x_loop_dec:
dec r4
jz near .exit
- ;mov esi, [esp+20]
- ;mov edi, [esp+28]
- ;mov ecx, [esp+40]
%ifdef X86_32
mov r0, arg1
mov r2, arg3
@@ -1140,18 +1048,6 @@
;***********************************************************************
McHorVer22Width8VerLastUnAlign_sse2:
- ;push esi
- ;push edi
- ;push ebx
- ;push ebp
-
- ;mov esi, [esp+20]
- ;mov eax, [esp+24]
- ;mov edi, [esp+28]
- ;mov edx, [esp+32]
- ;mov ebx, [esp+36]
- ;mov ecx, [esp+40]
-
%assign push_num 0
LOAD_6_PARA
SIGN_EXTENSION r1, r1d
@@ -1247,9 +1143,6 @@
.x_loop_dec:
dec r4
jz near .exit
- ;mov esi, [esp+20]
- ;mov edi, [esp+28]
- ;mov ecx, [esp+40]
%ifdef X86_32
mov r0, arg1
mov r2, arg3
--- a/codec/common/satd_sad.asm
+++ b/codec/common/satd_sad.asm
@@ -158,12 +158,6 @@
WELS_EXTERN WelsSampleSatd4x4_sse2
align 16
WelsSampleSatd4x4_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -238,12 +232,6 @@
WELS_EXTERN WelsSampleSatd8x8_sse2
align 16
WelsSampleSatd8x8_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -265,12 +253,6 @@
WELS_EXTERN WelsSampleSatd8x16_sse2
align 16
WelsSampleSatd8x16_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -297,12 +279,6 @@
WELS_EXTERN WelsSampleSatd16x8_sse2
align 16
WelsSampleSatd16x8_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -316,8 +292,6 @@
pop r2
pop r0
- ;mov eax, [esp+8]
- ;mov ecx, [esp+16]
add r0, 8
add r2, 8
SSE2_GetSatd8x8
@@ -336,12 +310,6 @@
WELS_EXTERN WelsSampleSatd16x16_sse2
align 16
WelsSampleSatd16x16_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -358,8 +326,6 @@
pop r2
pop r0
- ;mov eax, [esp+8]
- ;mov ecx, [esp+16]
add r0, 8
add r2, 8
@@ -1022,12 +988,6 @@
;***********************************************************************
WELS_EXTERN WelsSampleSatd4x4_sse41
WelsSampleSatd4x4_sse41:
- ;push ebx
- ;mov eax,[esp+8]
- ;mov ebx,[esp+12]
- ;mov ecx,[esp+16]
- ;mov edx,[esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -1082,13 +1042,6 @@
WELS_EXTERN WelsSampleSatd8x8_sse41
align 16
WelsSampleSatd8x8_sse41:
- ;push ebx
- ;push esi
- ;push edi
- ;mov eax, [esp+16]
- ;mov ebx, [esp+20]
- ;mov ecx, [esp+24]
- ;mov edx, [esp+28]
%ifdef X86_32
push r4
push r5
@@ -1121,15 +1074,6 @@
WELS_EXTERN WelsSampleSatd8x16_sse41
align 16
WelsSampleSatd8x16_sse41:
- ;push ebx
- ;push esi
- ;push edi
- ;push ebp
- ;%define pushsize 16
- ;mov eax, [esp+pushsize+4]
- ;mov ebx, [esp+pushsize+8]
- ;mov ecx, [esp+pushsize+12]
- ;mov edx, [esp+pushsize+16]
%ifdef X86_32
push r4
push r5
@@ -1168,13 +1112,6 @@
WELS_EXTERN WelsSampleSatd16x8_sse41
align 16
WelsSampleSatd16x8_sse41:
- ;push ebx
- ;push esi
- ;push edi
- ;mov eax, [esp+16]
- ;mov ebx, [esp+20]
- ;mov ecx, [esp+24]
- ;mov edx, [esp+28]
%ifdef X86_32
push r4
push r5
@@ -1197,8 +1134,6 @@
pop r2
pop r0
- ;mov eax, [esp+16]
- ;mov ecx, [esp+24]
add r0, 8
add r2, 8
SSE41_GetSatd8x4
@@ -1222,15 +1157,6 @@
WELS_EXTERN WelsSampleSatd16x16_sse41
align 16
WelsSampleSatd16x16_sse41:
- ;push ebx
- ;push esi
- ;push edi
- ;push ebp
- ;%define pushsize 16
- ;mov eax, [esp+pushsize+4]
- ;mov ebx, [esp+pushsize+8]
- ;mov ecx, [esp+pushsize+12]
- ;mov edx, [esp+pushsize+16]
%ifdef X86_32
push r4
push r5
@@ -1259,8 +1185,6 @@
pop r2
pop r0
- ;mov eax, [esp+pushsize+4]
- ;mov ecx, [esp+pushsize+12]
add r0, 8
add r2, 8
mov r6, 0
@@ -1272,7 +1196,6 @@
cmp r6, 4
jl loop_get_satd_16x16_right
SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7
- ;%undef pushsize
LOAD_4_PARA_POP
%ifdef X86_32
pop r6
@@ -1355,14 +1278,6 @@
WELS_EXTERN WelsSampleSad16x16_sse2
align 16
WelsSampleSad16x16_sse2:
- ;push ebx
- ;push edi
- ;push esi
- ;%define _STACK_SIZE 12
- ;mov eax, [esp+_STACK_SIZE+4 ]
- ;mov ebx, [esp+_STACK_SIZE+8 ]
- ;mov ecx, [esp+_STACK_SIZE+12]
- ;mov edx, [esp+_STACK_SIZE+16]
%ifdef X86_32
push r4
push r5
@@ -1406,12 +1321,6 @@
WELS_EXTERN WelsSampleSad16x8_sse2
align 16
WelsSampleSad16x8_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -1438,12 +1347,6 @@
WELS_EXTERN WelsSampleSad8x16_sse2
WelsSampleSad8x16_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
@@ -1475,15 +1378,6 @@
WELS_EXTERN WelsSampleSad8x8_sse21
WelsSampleSad8x8_sse21:
- ;mov ecx, [esp+12]
- ;mov edx, ecx
- ;CACHE_SPLIT_CHECK edx, 8, 64
- ;jle near .pixel_sad_8x8_nsplit
- ;push ebx
- ;push edi
- ;mov eax, [esp+12]
- ;mov ebx, [esp+16]
-
%assign push_num 0
mov r2, arg3
push r2
@@ -1596,10 +1490,6 @@
jmp .return
.pixel_sad_8x8_nsplit:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov edx, [esp+20]
pop r2
%assign push_num 0
@@ -1647,12 +1537,6 @@
%endmacro
WELS_EXTERN WelsSampleSadFour16x16_sse2
WelsSampleSadFour16x16_sse2:
- ;push ebx
- ;mov eax, [esp+8]
- ;mov ebx, [esp+12]
- ;mov ecx, [esp+16]
- ;mov edx, [esp+20]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -1753,7 +1637,6 @@
psadbw xmm0, xmm3
paddw xmm5, xmm0
- ;mov ecx, [esp+24]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@@ -1772,13 +1655,6 @@
WELS_EXTERN WelsSampleSadFour16x8_sse2
WelsSampleSadFour16x8_sse2:
- ;push ebx
- ;push edi
- ;mov eax, [esp+12]
- ;mov ebx, [esp+16]
- ;mov edi, [esp+20]
- ;mov edx, [esp+24]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -1847,7 +1723,6 @@
psadbw xmm1, xmm3
paddw xmm5, xmm1
- ;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@@ -1865,13 +1740,6 @@
WELS_EXTERN WelsSampleSadFour8x16_sse2
WelsSampleSadFour8x16_sse2:
- ;push ebx
- ;push edi
- ;mov eax, [esp+12]
- ;mov ebx, [esp+16]
- ;mov edi, [esp+20]
- ;mov edx, [esp+24]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -2066,7 +1934,6 @@
psadbw xmm0, xmm3
paddw xmm5, xmm0
- ;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@@ -2085,13 +1952,6 @@
WELS_EXTERN WelsSampleSadFour8x8_sse2
WelsSampleSadFour8x8_sse2:
- ;push ebx
- ;push edi
- ;mov eax, [esp+12]
- ;mov ebx, [esp+16]
- ;mov edi, [esp+20]
- ;mov edx, [esp+24]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -2195,7 +2055,6 @@
psadbw xmm0, xmm3
paddw xmm5, xmm0
- ;mov edi, [esp+28]
movhlps xmm0, xmm4
paddw xmm4, xmm0
movhlps xmm0, xmm5
@@ -2213,13 +2072,6 @@
WELS_EXTERN WelsSampleSadFour4x4_sse2
WelsSampleSadFour4x4_sse2:
- ;push ebx
- ;push edi
- ;mov eax, [esp+12]
- ;mov ebx, [esp+16]
- ;mov edi, [esp+20]
- ;mov edx, [esp+24]
-
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -2279,7 +2131,6 @@
paddw xmm3, xmm0
movhlps xmm0, xmm4
paddw xmm4, xmm0
- ;mov edi, [esp+28]
punpckldq xmm1, xmm4
punpckldq xmm2, xmm3
punpcklqdq xmm1, xmm2
@@ -2300,17 +2151,6 @@
; int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t )
;***********************************************************************
WelsSampleSad4x4_mmx:
- ;push ebx
- ;%define pushsize 4
- ;%define pix1address esp+pushsize+4
- ;%define pix1stride esp+pushsize+8
- ;%define pix2address esp+pushsize+12
- ;%define pix2stride esp+pushsize+16
- ;mov eax, [pix1address]
- ;mov ebx, [pix1stride ]
- ;mov ecx, [pix2address]
- ;mov edx, [pix2stride ]
-
%assign push_num 0
LOAD_4_PARA
SIGN_EXTENSION r1, r1d
--- a/codec/decoder/core/asm/block_add.asm
+++ b/codec/decoder/core/asm/block_add.asm
@@ -56,15 +56,10 @@
; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
;*******************************************************************************
WelsResBlockZero16x16_sse2:
- ;push r0
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov r0, [esp+08h]
- ;mov r1, [esp+0ch]
- ;lea r1, [r1*2]
lea r1, [r1*2]
- ;lea r2, [r1*3]
lea r2, [r1*3]
pxor xmm7, xmm7
@@ -124,7 +119,6 @@
movdqa [r0+r2], xmm7
movdqa [r0+r2+10h], xmm7
- ;pop r0
ret
@@ -135,12 +129,9 @@
; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
;*******************************************************************************
WelsResBlockZero8x8_sse2:
- ;push r0
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov r0, [esp+08h]
- ;mov r1, [esp+0ch]
lea r1, [r1*2]
lea r2, [r1*3]
@@ -158,6 +149,5 @@
movdqa [r0+r2], xmm7
- ;pop r0
ret
--- a/codec/decoder/core/asm/intra_pred.asm
+++ b/codec/decoder/core/asm/intra_pred.asm
@@ -192,8 +192,6 @@
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov eax, [esp+4] ;pPred
- ;mov ecx, [esp+8] ;kiStride
movzx r2, byte [r0-1]
movd xmm0, r2d
@@ -225,7 +223,6 @@
; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
;*******************************************************************************
WelsDecoderI16x16LumaPredPlane_sse2:
- ;%define pushsize 4
push r3
push r4
%assign push_num 2
@@ -232,9 +229,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0 ; save r0 in r4
- ;push esi
- ;mov esi, [esp + pushsize + 4]
- ;mov ecx, [esp + pushsize + 8]
sub r0, 1
sub r0, r1
@@ -286,7 +280,6 @@
sar r2, 6 ; c = (5 * V + 32) >> 6;
SSE2_Copy8Times xmm4, r2d ; xmm4 = c,c,c,c,c,c,c,c
- ;mov esi, [esp + pushsize + 4]
mov r0, r4
add r3, 16
imul r2, -7
@@ -313,7 +306,6 @@
cmp r2, 16
jnz get_i16x16_luma_pred_plane_sse2_1
- ;pop esi
pop r4
pop r3
ret
@@ -338,8 +330,6 @@
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov eax, [esp+4] ; pPred
- ;mov ecx, [esp+8] ; kiStride
COPY_16_TIMES r0, xmm0
movdqa [r0], xmm0
@@ -364,8 +354,6 @@
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov edx, [esp+4] ; pPred
- ;mov ecx, [esp+8] ; kiStride
sub r0, r1
movdqa xmm0, [r0]
@@ -402,7 +390,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredPlane_sse2
WelsDecoderIChromaPredPlane_sse2:
- ;%define pushsize 4
push r3
push r4
%assign push_num 2
@@ -409,9 +396,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0
- ;push esi
- ;mov esi, [esp + pushsize + 4] ;pPred
- ;mov ecx, [esp + pushsize + 8] ;kiStride
sub r0, 1
sub r0, r1
@@ -466,7 +450,6 @@
sar r2, 5 ; c = (17 * V + 16) >> 5;
SSE2_Copy8Times xmm4, r2d ; mm4 = c,c,c,c,c,c,c,c
- ;mov esi, [esp + pushsize + 4]
mov r0, r4
add r3, 16
imul r2, -3
@@ -489,7 +472,6 @@
cmp r2, 8
jnz get_i_chroma_pred_plane_sse2_1
- ;pop esi
pop r4
pop r3
WELSEMMS
@@ -513,9 +495,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx,[esp+4] ;pPred
- ;mov eax,edx
- ;mov ecx,[esp+8] ;kiStride
movq mm1,[r2+r1-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11
movq mm2,[r2-8] ;get value of 6 mm2[8] = 6
@@ -586,9 +565,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ;pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ;kiStride
movq mm0, [r2-8]
psrlq mm0, 38h
@@ -631,8 +607,6 @@
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
- ;mov eax, [esp+4] ;pPred
- ;mov ecx, [esp+8] ;kiStride
sub r0, r1
movq mm0, [r0]
@@ -689,9 +663,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ; pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ; kiStride
sub r2, r1
movd mm0, [r2-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt]
psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx]
@@ -776,9 +747,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ; pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ; kiStride
movd mm0, [r2-4] ; mm0[3] = l0
punpcklbw mm0, [r2+r1-4] ; mm0[7] = l1, mm0[6] = l0
@@ -866,9 +834,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ; pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt]
psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx]
@@ -957,9 +922,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ; pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
movq mm1, mm0
@@ -1030,9 +992,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r2, r0
- ;mov edx, [esp+4] ; pPred
- ;mov eax, edx
- ;mov ecx, [esp+8] ; kiStride
sub r2, r1
movq mm0, [r2] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
@@ -1077,9 +1036,6 @@
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
mov r4, r0
- ;push ebx
- ;mov eax, [esp+8] ; pPred
- ;mov ecx, [esp+12] ; kiStride
sub r0, r1
movq mm0, [r0]
@@ -1144,8 +1100,6 @@
psllq mm1, 0x20
pxor mm1, mm2 ; mm2 = m_down
- ;mov edx, [esp+8] ; pPred
-
movq [r4], mm0
movq [r4+r1], mm0
movq [r4+2*r1], mm0
@@ -1159,7 +1113,6 @@
lea r4, [r4+2*r1]
movq [r4+r1], mm1
- ;pop ebx
pop r4
pop r3
WELSEMMS
@@ -1174,9 +1127,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2
WelsDecoderI16x16LumaPredDc_sse2:
- ;push ebx
- ;mov eax, [esp+8] ; pPred
- ;mov ecx, [esp+12] ; kiStride
push r3
push r4
%assign push_num 2
@@ -1211,8 +1161,6 @@
pmuludq xmm0, [mmx_01bytes]
pshufd xmm0, xmm0, 0
- ;mov edx, [esp+8] ; pPred
-
movdqa [r4], xmm0
movdqa [r4+r1], xmm0
movdqa [r4+2*r1], xmm0
@@ -1244,7 +1192,6 @@
movdqa [r4+r1], xmm0
- ;pop ebx
pop r4
pop r3
@@ -1260,10 +1207,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2
WelsDecoderI16x16LumaPredDcTop_sse2:
- ;push ebx
- ;%define PUSH_SIZE 4
- ;mov eax, [esp+PUSH_SIZE+4] ; pPred
- ;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@@ -1328,8 +1271,6 @@
movdqa [r0+2*r1], xmm0
movdqa [r0+r2], xmm1
- ;%undef PUSH_SIZE
- ;pop ebx
ret
ALIGN 16
@@ -1338,12 +1279,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2
WelsDecoderI16x16LumaPredDcNA_sse2:
- ;push ebx
-
- ;%define PUSH_SIZE 4
-
- ;mov eax, [esp+PUSH_SIZE+4] ; pPred
- ;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@@ -1371,9 +1306,6 @@
movdqa [r0+2*r1], xmm0
movdqa [r0+r2], xmm1
- ;%undef PUSH_SIZE
-
- ;pop ebx
ret
ALIGN 16
@@ -1382,12 +1314,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx
WelsDecoderIChromaPredDcLeft_mmx:
- ;push ebx
- ;push esi
- ;%define PUSH_SIZE 8
- ;mov esi, [esp+PUSH_SIZE+4] ; pPred
- ;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
- ;mov eax, esi
push r3
push r4
%assign push_num 2
@@ -1450,8 +1376,6 @@
movq [r4+r1], mm3
movq [r4+2*r1], mm2
movq [r4+r2], mm3
- ;pop esi
- ;pop ebx
pop r4
pop r3
emms
@@ -1463,12 +1387,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2
WelsDecoderIChromaPredDcTop_sse2:
- ;push ebx
- ;%define PUSH_SIZE 4
- ;mov eax, [esp+PUSH_SIZE+4] ; pPred
- ;mov ecx, [esp+PUSH_SIZE+8] ; kiStride
- ;mov ebx, ecx
- ;neg ebx
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@@ -1500,8 +1418,6 @@
movq [r0+r1], xmm0
movq [r0+2*r1], xmm0
movq [r0+r2], xmm0
- ;%undef PUSH_SIZE
- ;pop ebx
ret
ALIGN 16
@@ -1510,10 +1426,6 @@
;*******************************************************************************
WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx
WelsDecoderIChromaPredDcNA_mmx:
- ;push ebx
- ;%define PUSH_SIZE 4
- ;mov eax, [esp+PUSH_SIZE+4] ; pPred
- ;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
SIGN_EXTENSION r1, r1d
@@ -1529,8 +1441,6 @@
movq [r0+r1], mm1
movq [r0+2*r1], mm0
movq [r0+r2], mm1
- ;%undef PUSH_SIZE
- ;pop ebx
emms
ret
--- a/codec/encoder/core/asm/dct.asm
+++ b/codec/encoder/core/asm/dct.asm
@@ -136,11 +136,6 @@
;***********************************************************************
WELS_EXTERN WelsDctT4_mmx
WelsDctT4_mmx:
- ;push ebx
- ;mov eax, [esp+12] ; pix1
- ;mov ebx, [esp+16] ; i_pix1
- ;mov ecx, [esp+20] ; pix2
- ;mov edx, [esp+24] ; i_pix2
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r2, r2d
@@ -155,7 +150,6 @@
MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6
MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5
- ;mov eax, [esp+ 8] ; pDct
movq [r0+ 0], mm2
movq [r0+ 8], mm1
movq [r0+16], mm5
@@ -162,7 +156,6 @@
movq [r0+24], mm4
WELSEMMS
LOAD_5_PARA_POP
- ;pop ebx
ret
@@ -171,26 +164,14 @@
;***********************************************************************
WELS_EXTERN WelsIDctT4Rec_mmx
WelsIDctT4Rec_mmx:
- ;push ebx
-;%define pushsize 4
-;%define p_dst esp+pushsize+4
-;%define i_dst esp+pushsize+8
-;%define p_pred esp+pushsize+12
-;%define i_pred esp+pushsize+16
-;%define pDct esp+pushsize+20
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
-; mov eax, [pDct ]
movq mm0, [r4+ 0]
movq mm1, [r4+ 8]
movq mm2, [r4+16]
movq mm3, [r4+24]
- ;mov edx, [p_dst ] ; r0
- ;mov ecx, [i_dst ] ; r1
- ;mov eax, [p_pred] ; r2
- ;mov ebx, [i_pred] ; r3
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
@@ -209,13 +190,6 @@
WELSEMMS
LOAD_5_PARA_POP
-;%undef pushsize
-;%undef p_dst
-;%undef i_dst
-;%undef p_pred
-;%undef i_pred
-;%undef pDct
-; pop ebx
ret
@@ -319,13 +293,6 @@
WELS_EXTERN WelsDctFourT4_sse2
ALIGN 16
WelsDctFourT4_sse2:
- ;push ebx
- ;push esi
- ;mov esi, [esp+12]
- ;mov eax, [esp+16] ; pix1
- ;mov ebx, [esp+20] ; i_pix1
- ;mov ecx, [esp+24] ; pix2
- ;mov edx, [esp+28] ; i_pix2
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r2, r2d
@@ -365,17 +332,10 @@
lea r0, [r0+64]
SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
- ;pop esi
- ;pop ebx
LOAD_5_PARA_POP
ret
-;%define rec esp + pushsize + 4
-;%define stride esp + pushsize + 8
-;%define pred esp + pushsize + 12
-;%define pred_stride esp + pushsize + 16
-;%define rs esp + pushsize + 20
;***********************************************************************
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
;***********************************************************************
@@ -382,15 +342,6 @@
WELS_EXTERN WelsIDctFourT4Rec_sse2
ALIGN 16
WelsIDctFourT4Rec_sse2:
-;%define pushsize 8
-; push ebx
-; push esi
-
-; mov eax, [rec]
-; mov ebx, [stride]
-; mov ecx, [pred]
-; mov edx, [pred_stride]
-; mov esi, [rs]
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
@@ -449,21 +400,11 @@
;***********************************************************************
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
ALIGN 16
-;%define pushsize 8
-;%define luma_dc esp + pushsize + 20
WelsIDctRecI16x16Dc_sse2:
%assign push_num 0
LOAD_5_PARA
SIGN_EXTENSION r1, r1d
SIGN_EXTENSION r3, r3d
- ; push esi
- ; push edi
-
- ;mov ecx, [luma_dc] ; r4
- ;mov eax, [rec] ; r0
- ;mov edx, [stride] ; r1
- ;mov esi, [pred]; r2
- ;mov edi, [pred_stride]; r3
pxor xmm7, xmm7
WELS_DW32 xmm6
@@ -499,8 +440,6 @@
lea r2, [r2 + 2 * r3]
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
LOAD_5_PARA_POP
- ;pop edi
- ;pop esi
ret
@@ -537,8 +476,6 @@
;***********************************************************************
WELS_EXTERN WelsHadamardT4Dc_sse2
WelsHadamardT4Dc_sse2:
- ;mov eax, [esp + 4] ; luma_dc
- ;mov ecx, [esp + 8] ; pDct
%assign push_num 0
LOAD_2_PARA
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1
--- a/codec/encoder/core/asm/intra_pred.asm
+++ b/codec/encoder/core/asm/intra_pred.asm
@@ -234,10 +234,6 @@
; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WelsI16x16LumaPredPlane_sse2:
- ;%define pushsize 4
- ;push esi
- ;mov esi, [esp + pushsize + 8]
- ;mov ecx, [esp + pushsize + 12]
push r3
push r4
%assign push_num 2
@@ -293,7 +289,6 @@
sar r3, 6 ; c = (5 * V + 32) >> 6;
SSE2_Copy8Times xmm4, r3d ; xmm4 = c,c,c,c,c,c,c,c
- ;mov esi, [esp + pushsize + 4]
add r4, 16
imul r3, -7
add r3, r4 ; s = a + 16 + (-7)*c
@@ -367,9 +362,6 @@
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
WelsI16x16LumaPredV_sse2:
- ;mov edx, [esp+4] ; pred
- ;mov eax, [esp+8] ; pRef
- ;mov ecx, [esp+12] ; stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
@@ -400,10 +392,6 @@
;***********************************************************************
WELS_EXTERN WelsIChromaPredPlane_sse2
WelsIChromaPredPlane_sse2:
- ;%define pushsize 4
- ;push esi
- ;mov esi, [esp + pushsize + 8] ;pRef
- ;mov ecx, [esp + pushsize + 12] ;stride
push r3
push r4
%assign push_num 2
@@ -462,7 +450,6 @@
sar r3, 5 ; c = (17 * V + 16) >> 5;
SSE2_Copy8Times xmm4, r3d ; mm4 = c,c,c,c,c,c,c,c
- ;mov esi, [esp + pushsize + 4]
add r4, 16
imul r3, -3
add r3, r4 ; s = a + 16 + (-3)*c
@@ -502,9 +489,6 @@
;
;***********************************************************************
WelsI4x4LumaPredDDR_mmx:
- ;mov edx,[esp+4] ;pred
- ;mov eax,[esp+8] ;pRef
- ;mov ecx,[esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
@@ -619,9 +603,6 @@
WELS_EXTERN WelsIChromaPredH_mmx
WelsIChromaPredH_mmx:
- ;mov edx, [esp+4] ;pred
- ;mov eax, [esp+8] ;pRef
- ;mov ecx, [esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
--- a/codec/encoder/core/asm/memzero.asm
+++ b/codec/encoder/core/asm/memzero.asm
@@ -55,7 +55,6 @@
WelsPrefetchZero_mmx:
%assign push_num 0
LOAD_1_PARA
- ;mov eax,[esp+4]
prefetchnta [r0]
ret
--- a/codec/encoder/core/asm/quant.asm
+++ b/codec/encoder/core/asm/quant.asm
@@ -87,12 +87,9 @@
WelsQuant4x4_sse2:
%assign push_num 0
LOAD_3_PARA
- ;mov eax, [ff]
- ;mov ecx, [mf]
movdqa xmm2, [r1]
movdqa xmm3, [r2]
- ;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
@@ -108,13 +105,10 @@
LOAD_3_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
- ;mov ax, [mf]
SSE2_Copy8Times xmm3, r2d
- ;mov cx, [ff]
SSE2_Copy8Times xmm2, r1d
- ;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
@@ -128,12 +122,9 @@
WelsQuantFour4x4_sse2:
%assign push_num 0
LOAD_3_PARA
- ;mov eax, [ff]
- ;mov ecx, [mf]
MOVDQ xmm2, [r1]
MOVDQ xmm3, [r2]
- ;mov edx, [pDct]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20]
@@ -153,12 +144,9 @@
WelsQuantFour4x4Max_sse2:
%assign push_num 0
LOAD_4_PARA
- ;mov eax, [ff]
- ;mov ecx, [mf]
MOVDQ xmm2, [r1]
MOVDQ xmm3, [r2]
- ;mov edx, [pDct]
pxor xmm4, xmm4
pxor xmm5, xmm5
pxor xmm6, xmm6
@@ -180,7 +168,6 @@
punpckhqdq xmm0, xmm1
pmaxsw xmm0, xmm1
- ;mov r0, [r3]
movq [r3], xmm0
LOAD_4_PARA_POP
ret
@@ -204,8 +191,6 @@
psubw %1, %2
%endmacro
-%define dct2x2 esp + 16
-%define iChromaDc esp + 20
;***********************************************************************
;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block);
;***********************************************************************
@@ -216,7 +201,6 @@
LOAD_5_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
- ;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]
punpcklwd mm0, mm1
@@ -237,16 +221,12 @@
punpcklwd mm1, mm3
;quant_2x2_dc
- ;mov ax, [mf]
MMX_Copy4Times mm3, r2d
- ;mov cx, [ff]
MMX_Copy4Times mm2, r1d
MMX_Quant4 mm1, mm0, mm2, mm3
; store dct_2x2
- ;mov edx, [dct2x2]
movq [r3], mm1
- ;mov ecx, [iChromaDc]
movq [r4], mm1
; pNonZeroCount of dct_2x2
@@ -279,7 +259,6 @@
LOAD_3_PARA
SIGN_EXTENSION r1, r1w
SIGN_EXTENSION r2, r2w
- ;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]
punpcklwd mm0, mm1
@@ -300,9 +279,7 @@
punpcklwd mm1, mm3
;quant_2x2_dc
- ;mov ax, [mf]
MMX_Copy4Times mm3, r2d
- ;mov cx, [ff]
MMX_Copy4Times mm2, r1d
MMX_Quant4 mm1, mm0, mm2, mm3
@@ -333,11 +310,8 @@
align 16
WELS_EXTERN WelsDequant4x4_sse2
WelsDequant4x4_sse2:
- ;ecx = dequant_mf[qp], edx = pDct
%assign push_num 0
LOAD_2_PARA
- ;mov ecx, [esp + 8]
- ;mov edx, [esp + 4]
movdqa xmm1, [r1]
SSE2_DeQuant8 [r0 ], xmm0, xmm1
@@ -353,11 +327,8 @@
WELS_EXTERN WelsDequantFour4x4_sse2
WelsDequantFour4x4_sse2:
- ;ecx = dequant_mf[qp], edx = pDct
%assign push_num 0
LOAD_2_PARA
- ;mov ecx, [esp + 8]
- ;mov edx, [esp + 4]
movdqa xmm1, [r1]
SSE2_DeQuant8 [r0 ], xmm0, xmm1
@@ -382,8 +353,6 @@
%ifndef X86_32
movzx r1, r1w
%endif
- ;mov eax, [esp + 4]
- ;mov cx, [esp + 8]
; WelsDequantLumaDc4x4
SSE2_Copy8Times xmm1, r1d
--- a/codec/encoder/core/asm/score.asm
+++ b/codec/encoder/core/asm/score.asm
@@ -176,7 +176,6 @@
%assign push_num 0
%endif
LOAD_2_PARA
- ;mov eax, [esp+8]
movdqa xmm0, [r1] ; 7 6 5 4 3 2 1 0
movdqa xmm1, [r1+16] ; f e d c b a 9 8
pextrw r2d, xmm0, 7 ; ecx = 7
@@ -191,7 +190,6 @@
pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a
pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0
pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9
- ;mov eax, [esp+4]
movdqa [r0],xmm0
movdqa [r0+16], xmm1
%ifdef X86_32
@@ -207,7 +205,6 @@
WelsScan4x4DcAc_ssse3:
%assign push_num 0
LOAD_2_PARA
- ;mov eax, [esp+8]
movdqa xmm0, [r1]
movdqa xmm1, [r1+16]
pextrw r2d, xmm0, 7 ; ecx = [7]
@@ -217,7 +214,6 @@
pshufb xmm1, [pb_scanacdc_maskb]
pshufb xmm0, [pb_scanacdc_maska]
- ;mov eax, [esp+4]
movdqa [r0],xmm0
movdqa [r0+16], xmm1
ret
@@ -229,7 +225,6 @@
WelsScan4x4Ac_sse2:
%assign push_num 0
LOAD_2_PARA
- ;mov eax, [esp+8]
movdqa xmm0, [r1]
movdqa xmm1, [r1+16]
movdqa xmm2, xmm0
@@ -256,7 +251,6 @@
pslldq xmm3, 14
por xmm1, xmm3
psrldq xmm2, 2
- ;mov eax, [esp+4]
movdqa [r0],xmm1
movdqa [r0+16], xmm2
ret
@@ -268,8 +262,6 @@
ALIGN 16
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
WelsCalculateSingleCtr4x4_sse2:
- ;push ebx
- ;mov eax, [esp+8]
%ifdef X86_32
push r3
%assign push_num 1
@@ -321,7 +313,6 @@
%else
mov retrd, r0d
%endif
- ;pop ebx
ret
@@ -333,7 +324,6 @@
WelsGetNoneZeroCount_sse2:
%assign push_num 0
LOAD_1_PARA
- ;mov eax, [esp+4]
movdqa xmm0, [r0]
movdqa xmm1, [r0+16]
pxor xmm2, xmm2
--- a/codec/processing/src/asm/denoisefilter.asm
+++ b/codec/processing/src/asm/denoisefilter.asm
@@ -172,11 +172,6 @@
; 4 0 5
; 6 7 8
; 0: the center point
-%define pushsize 4
-;%define pixel esp + pushsize + 4
-;%define stride esp + pushsize + 8
-;%define pixel r0
-;%define stride r1
BilateralLumaFilter8_sse2: