shithub: openh264

Download patch

ref: 918b211990ec2ab891cac748aa3561d5b0db74f8
parent: 258828f7ecf2ecf6bbd283c8438ce804dfdd1141
parent: f96918283f27cc813c84c432c99a46d56a9546eb
author: Licai Guo <[email protected]>
date: Mon Mar 17 15:12:47 EDT 2014

Merge pull request #515 from mstorsjo/remove-commented-x86-asm

Remove commented out code for old, 32-bit only x86 assembly function prologues/epilogues

--- a/codec/common/mb_copy.asm
+++ b/codec/common/mb_copy.asm
@@ -133,15 +133,6 @@
 ALIGN 16
 ; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
 WelsCopy16x16NotAligned_sse2:
-	;push esi
-	;push edi
-	;push ebx
-
-	;mov edi, [esp+16]	; Dst
-	;mov eax, [esp+20]	; iStrideD
-	;mov esi, [esp+24]	; Src
-	;mov ecx, [esp+28]	; iStrideS
-
 	push r4
 	push r5
 	%assign  push_num 2
@@ -205,15 +196,6 @@
 ;***********************************************************************
 ALIGN 16
 WelsCopy16x8NotAligned_sse2:
-	;push esi
-	;push edi
-	;push ebx
-
-	;mov edi, [esp+16]	; Dst
-	;mov eax, [esp+20]	; iStrideD
-	;mov esi, [esp+24]	; Src
-	;mov ecx, [esp+28]	; iStrideS
-
 	push r4
 	push r5
 	%assign  push_num 2
@@ -255,13 +237,6 @@
 ;***********************************************************************
 ALIGN 16
 WelsCopy8x16_mmx:
-	;push ebx
-
-	;mov eax, [esp + 8 ]           ;Dst
-	;mov ecx, [esp + 12]           ;iStrideD
-	;mov ebx, [esp + 16]           ;Src
-	;mov edx, [esp + 20]           ;iStrideS
-
 	%assign  push_num 0
     LOAD_4_PARA
 
@@ -327,13 +302,6 @@
 ;***********************************************************************
 ALIGN 16
 WelsCopy8x8_mmx:
-	;push ebx
-	;push esi
-	;mov eax, [esp + 12]           ;Dst
-	;mov ecx, [esp + 16]           ;iStrideD
-	;mov esi, [esp + 20]           ;Src
-	;mov ebx, [esp + 24]           ;iStrideS
-
 	push r4
 	%assign  push_num 1
     LOAD_4_PARA
@@ -373,8 +341,6 @@
 	movq [r0+r1], mm7
 
 	WELSEMMS
-	;pop esi
-	;pop ebx
 	LOAD_4_PARA_POP
 	pop r4
 	ret
@@ -389,8 +355,6 @@
     %assign  push_num 0
     LOAD_2_PARA
 
-	;mov eax, [esp+4]	; mv_buffer
-	;movd xmm0, [esp+8]	; _mv
 	movd xmm0, r1d	; _mv
 	pshufd xmm1, xmm0, $00
 	movdqa [r0     ], xmm1
@@ -472,20 +436,6 @@
 ;                           int iHeight );
 ;*******************************************************************************
 PixelAvgWidthEq8_mmx:
-
-    ;push        esi
-    ;push        edi
-    ;push        ebp
-    ;push        ebx
-
-    ;mov         edi, [esp+20]       ; pDst
-    ;mov         eax, [esp+24]       ; iDstStride
-    ;mov         esi, [esp+28]       ; pSrcA
-    ;mov         ecx, [esp+32]       ; iSrcAStride
-    ;mov         ebp, [esp+36]       ; pSrcB
-    ;mov         edx, [esp+40]       ; iSrcBStride
-    ;mov         ebx, [esp+44]       ; iHeight
-
     %assign  push_num 0
     LOAD_7_PARA
 
@@ -575,17 +525,6 @@
 ;                          uint8_t *pDst, int iDstStride, int iHeight )
 ;*******************************************************************************
 McCopyWidthEq4_mmx:
-    ;push    esi
-    ;push    edi
-    ;push    ebx
-
-
-    ;mov esi,  [esp+16]
-    ;mov eax, [esp+20]
-    ;mov edi,  [esp+24]
-    ;mov ecx,  [esp+28]
-    ;mov edx,  [esp+32]
-
     push	r5
     %assign  push_num 1
     LOAD_5_PARA
@@ -614,14 +553,6 @@
 ;                           uint8_t *pDst, int iDstStride, int iHeight )
 ;*******************************************************************************
 McCopyWidthEq8_mmx:
-    ;push  esi
-    ;push  edi
-	;mov  esi, [esp+12]
-	;mov eax, [esp+16]
-	;mov edi, [esp+20]
-	;mov ecx, [esp+24]
-	;mov edx, [esp+28]
-
     %assign  push_num 0
     LOAD_5_PARA
 
@@ -659,15 +590,6 @@
 	movhps	[%1+8], %2
 %endmacro
 McCopyWidthEq16_sse2:
-    ;push    esi
-    ;push    edi
-
-    ;mov     esi, [esp+12]       ; pSrc
-    ;mov     eax, [esp+16]       ; iSrcStride
-    ;mov     edi, [esp+20]       ; pDst
-    ;mov     edx, [esp+24]       ; iDstStride
-    ;mov     ecx, [esp+28]       ; iHeight
-
     %assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
--- a/codec/common/mc_chroma.asm
+++ b/codec/common/mc_chroma.asm
@@ -76,10 +76,6 @@
 ;*******************************************************************************
 WELS_EXTERN McChromaWidthEq4_mmx
 McChromaWidthEq4_mmx:
-	;push esi
-	;push edi
-	;push ebx
-
 	%assign  push_num 0
 	LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -86,8 +82,6 @@
 	SIGN_EXTENSION	r3, r3d
 	SIGN_EXTENSION	r5, r5d
 
-	;mov eax, [esp +12 + 20]
-
 	movd mm3, [r4];	[eax]
 	WELS_Zero mm7
 	punpcklbw mm3, mm3
@@ -103,12 +97,6 @@
 	punpcklbw mm4, mm7
 	punpckhbw mm6, mm7
 
-	;mov esi, [esp +12+ 4]
-	;mov eax, [esp + 12 + 8]
-	;mov edi, [esp + 12 + 12]
-	;mov edx, [esp + 12 + 16]
-    ;mov ecx, [esp + 12 + 24]
-
 	lea r4, [r0 + r1] ;lea ebx, [esi + eax]
 	movd mm0, [r0]
 	movd mm1, [r0+1]
@@ -149,9 +137,6 @@
 	jnz near .xloop
 	WELSEMMS
 	LOAD_6_PARA_POP
-	;pop ebx
-	;pop edi
-	;pop esi
 	ret
 
 
@@ -166,10 +151,6 @@
 ;*******************************************************************************
 WELS_EXTERN McChromaWidthEq8_sse2
 McChromaWidthEq8_sse2:
-	;push esi
-	;push edi
-	;push ebx
-
 	%assign  push_num 0
 	LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -176,7 +157,6 @@
 	SIGN_EXTENSION	r3, r3d
 	SIGN_EXTENSION	r5, r5d
 
-	;mov eax, [esp +12 + 20]
 	movd xmm3, [r4]
 	WELS_Zero xmm7
 	punpcklbw  xmm3, xmm3
@@ -193,12 +173,6 @@
 	punpcklbw  xmm4, xmm7
 	punpckhbw  xmm6, xmm7
 
-	;mov esi, [esp +12+ 4]
-	;mov eax, [esp + 12 + 8]
-	;mov edi, [esp + 12 + 12]
-	;mov edx, [esp + 12 + 16]
-    ;mov ecx, [esp + 12 + 24]
-
 	lea r4, [r0 + r1] ;lea ebx, [esi + eax]
 	movq xmm0, [r0]
 	movq xmm1, [r0+1]
@@ -240,9 +214,6 @@
 
 	LOAD_6_PARA_POP
 
-	;pop ebx
-	;pop edi
-	;pop esi
 	ret
 
 
@@ -259,9 +230,6 @@
 ;***********************************************************************
 WELS_EXTERN McChromaWidthEq8_ssse3
 McChromaWidthEq8_ssse3:
-	;push ebx
-	;push esi
-	;push edi
 	%assign  push_num 0
 	LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -268,8 +236,6 @@
 	SIGN_EXTENSION	r3, r3d
 	SIGN_EXTENSION	r5, r5d
 
-	;mov eax, [esp + 12 + 20]
-
     pxor      xmm7, xmm7
     movd   xmm5, [r4]
     punpcklwd xmm5, xmm5
@@ -278,12 +244,6 @@
     punpcklqdq xmm5, xmm5
     punpckhqdq xmm6, xmm6
 
-	;mov eax, [esp + 12 + 4]
-	;mov edx, [esp + 12 + 8]
-	;mov esi, [esp + 12 + 12]
-	;mov edi, [esp + 12 + 16]
-    ;mov ecx, [esp + 12 + 24]
-
     sub r2, r3 ;sub esi, edi
     sub r2, r3
 	movdqa xmm7, [h264_d0x20_sse2]
@@ -329,10 +289,6 @@
 	jnz .hloop_chroma
 
 	LOAD_6_PARA_POP
-
-	;pop edi
-	;pop esi
-	;pop ebx
 
 	ret
 
--- a/codec/common/mc_luma.asm
+++ b/codec/common/mc_luma.asm
@@ -83,15 +83,6 @@
 ;						int iHeight)
 ;*******************************************************************************
 McHorVer20WidthEq4_mmx:
-	;push esi
-	;push edi
-
-	;mov esi, [esp+12]
-	;mov eax, [esp+16]
-	;mov edi, [esp+20]
-	;mov ecx, [esp+24]
-	;mov edx, [esp+28]
-
     %assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -185,15 +176,6 @@
 ;                       )
 ;***********************************************************************
 McHorVer22Width8HorFirst_sse2:
-	;push esi
-	;push edi
-	;push ebx
-	;mov esi, [esp+16]     ;pSrc
-	;mov eax, [esp+20]	;iSrcStride
-	;mov edi, [esp+24]		;pDst
-	;mov edx, [esp+28]	;iDstStride
-	;mov ebx, [esp+32]	;iHeight
-
 	%assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -245,15 +227,6 @@
 ;                      );
 ;*******************************************************************************
 McHorVer20WidthEq8_sse2:
-	;push	esi
-	;push	edi
-
-	;mov esi, [esp + 12]         ;pSrc
-	;mov eax, [esp + 16]         ;iSrcStride
-	;mov edi, [esp + 20]         ;pDst
-	;mov ecx, [esp + 28]         ;iHeight
-	;mov edx, [esp + 24]			;iDstStride
-
 	%assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -309,14 +282,6 @@
 ;                      );
 ;*******************************************************************************
 McHorVer20WidthEq16_sse2:
-	;push	esi
-	;push	edi
-	;mov esi, [esp + 12]         ;pSrc
-	;mov eax, [esp + 16]         ;iSrcStride
-	;mov edi, [esp + 20]         ;pDst
-	;mov ecx, [esp + 28]         ;iHeight
-	;mov edx, [esp + 24]			;iDstStride
-
 	%assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -398,14 +363,6 @@
 ;*******************************************************************************
 ALIGN 16
 McHorVer02WidthEq8_sse2:
-	;push esi
-	;push edi
-	;mov esi, [esp + 12]           ;pSrc
-	;mov edx, [esp + 16]	          ;iSrcStride
-	;mov edi, [esp + 20]           ;pDst
-	;mov eax, [esp + 24]           ;iDstStride
-	;mov ecx, [esp + 28]           ;iHeight
-
 	%assign  push_num 0
     LOAD_5_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -503,17 +460,6 @@
 ;***********************************************************************
 ALIGN 16
 McHorVer02Height9Or17_sse2:
-	;push esi
-	;push edi
-	;push ebx
-
-	;mov esi, [esp + 16]
-	;mov edx, [esp + 20]
-	;mov edi, [esp + 24]
-	;mov eax, [esp + 28]
-	;mov ecx, [esp + 36]
-	;mov ebx, [esp + 32]
-
 	%assign  push_num 0
     LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -612,9 +558,6 @@
 .x_loop_dec:
 	dec r4
 	jz  near .xx_exit
-	;mov esi, [esp + 16]
-	;mov edi, [esp + 24]
-	;mov ecx, [esp + 36]
 %ifdef X86_32
 	mov	r0, arg1
 	mov r2, arg3
@@ -651,16 +594,6 @@
 ;                      );
 ;***********************************************************************
 McHorVer20Width9Or17_sse2:
-	;push esi
-	;push edi
-	;push ebx
-	;mov esi, [esp+16]
-	;mov eax, [esp+20]
-	;mov edi, [esp+24]
-	;mov edx, [esp+28]
-	;mov ecx, [esp+32]
-	;mov ebx, [esp+36]
-
 	%assign  push_num 0
     LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -819,16 +752,6 @@
 ;							int32_t iWidth,int32_t iHeight);
 ;***********************************************************************
 McHorVer22HorFirst_sse2:
-	;push esi
-	;push edi
-	;push ebx
-	;mov esi, [esp+16]
-	;mov eax, [esp+20]
-	;mov edi, [esp+24]
-	;mov edx, [esp+28]
-	;mov ecx, [esp+32]
-	;mov ebx, [esp+36]
-
 	%assign  push_num 0
     LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -996,18 +919,6 @@
 ;***********************************************************************
 
  McHorVer22Width8VerLastAlign_sse2:
-	;push esi
-	;push edi
-	;push ebx
-	;push ebp
-
-	;mov esi, [esp+20]
-	;mov eax, [esp+24]
-	;mov edi, [esp+28]
-	;mov edx, [esp+32]
-	;mov ebx, [esp+36]
-	;mov ecx, [esp+40]
-
 	%assign  push_num 0
     LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -1104,9 +1015,6 @@
 .x_loop_dec:
 	dec r4
 	jz near .exit
-	;mov esi, [esp+20]
-	;mov edi, [esp+28]
-	;mov ecx, [esp+40]
 %ifdef X86_32
 	mov	r0, arg1
 	mov r2, arg3
@@ -1140,18 +1048,6 @@
 ;***********************************************************************
 
  McHorVer22Width8VerLastUnAlign_sse2:
-	;push esi
-	;push edi
-	;push ebx
-	;push ebp
-
-	;mov esi, [esp+20]
-	;mov eax, [esp+24]
-	;mov edi, [esp+28]
-	;mov edx, [esp+32]
-	;mov ebx, [esp+36]
-	;mov ecx, [esp+40]
-
 	%assign  push_num 0
     LOAD_6_PARA
 	SIGN_EXTENSION	r1, r1d
@@ -1247,9 +1143,6 @@
 .x_loop_dec:
 	dec r4
 	jz near .exit
-	;mov esi, [esp+20]
-	;mov edi, [esp+28]
-	;mov ecx, [esp+40]
 %ifdef X86_32
 	mov	r0, arg1
 	mov r2, arg3
--- a/codec/common/satd_sad.asm
+++ b/codec/common/satd_sad.asm
@@ -158,12 +158,6 @@
 WELS_EXTERN WelsSampleSatd4x4_sse2
 align 16
 WelsSampleSatd4x4_sse2:
-	;push      ebx
-	;mov       eax,  [esp+8]
-	;mov       ebx,  [esp+12]
-	;mov       ecx,  [esp+16]
-	;mov       edx,  [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -238,12 +232,6 @@
  WELS_EXTERN WelsSampleSatd8x8_sse2
 align 16
  WelsSampleSatd8x8_sse2:
-	 ;push   ebx
-	 ;mov    eax,    [esp+8]
-	 ;mov    ebx,    [esp+12]
-	 ;mov    ecx,    [esp+16]
-	 ;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -265,12 +253,6 @@
  WELS_EXTERN WelsSampleSatd8x16_sse2
 align 16
  WelsSampleSatd8x16_sse2:
-	 ;push   ebx
-	 ;mov    eax,    [esp+8]
-	 ;mov    ebx,    [esp+12]
-	 ;mov    ecx,    [esp+16]
-	 ;mov    edx,    [esp+20]
-
 	 %assign  push_num 0
 	 LOAD_4_PARA
 	 SIGN_EXTENSION r1, r1d
@@ -297,12 +279,6 @@
 WELS_EXTERN WelsSampleSatd16x8_sse2
 align 16
 WelsSampleSatd16x8_sse2:
-	;push   ebx
-	;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    ecx,    [esp+16]
-	;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -316,8 +292,6 @@
 
 	pop r2
 	pop r0
-	;mov    eax,    [esp+8]
-    ;mov    ecx,    [esp+16]
     add    r0,    8
     add    r2,    8
 	SSE2_GetSatd8x8
@@ -336,12 +310,6 @@
 WELS_EXTERN WelsSampleSatd16x16_sse2
 align 16
 WelsSampleSatd16x16_sse2:
-	;push   ebx
-	;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    ecx,    [esp+16]
-	;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -358,8 +326,6 @@
 
 	pop r2
 	pop r0
-	;mov    eax,    [esp+8]
-	;mov    ecx,    [esp+16]
 	add    r0,    8
 	add    r2,    8
 
@@ -1022,12 +988,6 @@
 ;***********************************************************************
 WELS_EXTERN WelsSampleSatd4x4_sse41
 WelsSampleSatd4x4_sse41:
-	;push        ebx
-	;mov         eax,[esp+8]
-	;mov         ebx,[esp+12]
-	;mov         ecx,[esp+16]
-	;mov         edx,[esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1082,13 +1042,6 @@
 WELS_EXTERN WelsSampleSatd8x8_sse41
 align 16
 WelsSampleSatd8x8_sse41:
-	;push   ebx
-	;push   esi
-	;push   edi
-	;mov    eax,    [esp+16]
-	;mov    ebx,    [esp+20]
-	;mov    ecx,    [esp+24]
-	;mov    edx,    [esp+28]
 %ifdef X86_32
 	push  r4
 	push  r5
@@ -1121,15 +1074,6 @@
 WELS_EXTERN WelsSampleSatd8x16_sse41
 align 16
 WelsSampleSatd8x16_sse41:
-	;push   ebx
-	;push   esi
-	;push   edi
-	;push   ebp
-	;%define pushsize   16
-	;mov    eax,    [esp+pushsize+4]
-	;mov    ebx,    [esp+pushsize+8]
-	;mov    ecx,    [esp+pushsize+12]
-	;mov    edx,    [esp+pushsize+16]
 %ifdef X86_32
 	push  r4
 	push  r5
@@ -1168,13 +1112,6 @@
 WELS_EXTERN WelsSampleSatd16x8_sse41
 align 16
 WelsSampleSatd16x8_sse41:
-	;push   ebx
-	;push   esi
-	;push   edi
-	;mov    eax,    [esp+16]
-	;mov    ebx,    [esp+20]
-	;mov    ecx,    [esp+24]
-	;mov    edx,    [esp+28]
 %ifdef X86_32
 	push  r4
 	push  r5
@@ -1197,8 +1134,6 @@
 
 	pop  r2
 	pop  r0
-	;mov			eax,    [esp+16]
-	;mov			ecx,    [esp+24]
 	add			r0,    8
 	add			r2,    8
 	SSE41_GetSatd8x4
@@ -1222,15 +1157,6 @@
 WELS_EXTERN WelsSampleSatd16x16_sse41
 align 16
 WelsSampleSatd16x16_sse41:
-	;push   ebx
-	;push   esi
-	;push   edi
-	;push   ebp
-	;%define pushsize   16
-	;mov    eax,    [esp+pushsize+4]
-	;mov    ebx,    [esp+pushsize+8]
-	;mov    ecx,    [esp+pushsize+12]
-	;mov    edx,    [esp+pushsize+16]
 %ifdef X86_32
 	push  r4
 	push  r5
@@ -1259,8 +1185,6 @@
 
 	pop  r2
 	pop  r0
-	;mov			eax,    [esp+pushsize+4]
-	;mov			ecx,    [esp+pushsize+12]
 	add			r0,    8
 	add			r2,    8
 	mov         r6,    0
@@ -1272,7 +1196,6 @@
 	cmp         r6,  4
 	jl          loop_get_satd_16x16_right
 	SSSE3_SumWHorizon retrd, xmm6, xmm5, xmm7
-	;%undef pushsize
 	LOAD_4_PARA_POP
 %ifdef X86_32
 	pop  r6
@@ -1355,14 +1278,6 @@
 WELS_EXTERN WelsSampleSad16x16_sse2
 align 16
 WelsSampleSad16x16_sse2:
-	;push ebx
-	;push edi
-	;push esi
-	;%define _STACK_SIZE		12
-	;mov eax, [esp+_STACK_SIZE+4 ]
-	;mov	ebx, [esp+_STACK_SIZE+8 ]
-	;mov ecx, [esp+_STACK_SIZE+12]
-	;mov edx, [esp+_STACK_SIZE+16]
 %ifdef X86_32
 	push  r4
 	push  r5
@@ -1406,12 +1321,6 @@
 WELS_EXTERN WelsSampleSad16x8_sse2
 align 16
 WelsSampleSad16x8_sse2:
-	;push   ebx
-	;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    ecx,    [esp+16]
-	;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1438,12 +1347,6 @@
 
 WELS_EXTERN WelsSampleSad8x16_sse2
 WelsSampleSad8x16_sse2:
-	;push   ebx
-	;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    ecx,    [esp+16]
-	;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1475,15 +1378,6 @@
 
 WELS_EXTERN WelsSampleSad8x8_sse21
 WelsSampleSad8x8_sse21:
-    ;mov    ecx,    [esp+12]
-	;mov    edx,    ecx
-    ;CACHE_SPLIT_CHECK edx, 8, 64
-	;jle    near   .pixel_sad_8x8_nsplit
-	;push   ebx
-	;push   edi
-	;mov    eax,    [esp+12]
-	;mov    ebx,    [esp+16]
-
 	%assign  push_num 0
 	mov		r2,  arg3
 	push	r2
@@ -1596,10 +1490,6 @@
 	jmp        .return
 
 .pixel_sad_8x8_nsplit:
-    ;push   ebx
-    ;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    edx,    [esp+20]
 
 	pop r2
 	%assign  push_num 0
@@ -1647,12 +1537,6 @@
 %endmacro
 WELS_EXTERN WelsSampleSadFour16x16_sse2
 WelsSampleSadFour16x16_sse2:
-	;push ebx
-	;mov    eax,    [esp+8]
-	;mov    ebx,    [esp+12]
-	;mov    ecx,    [esp+16]
-	;mov    edx,    [esp+20]
-
 	%assign  push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1753,7 +1637,6 @@
 	psadbw xmm0,   xmm3
 	paddw xmm5,   xmm0
 
-	;mov        ecx,  [esp+24]
 	movhlps    xmm0, xmm4
 	paddw      xmm4, xmm0
 	movhlps    xmm0, xmm5
@@ -1772,13 +1655,6 @@
 
 WELS_EXTERN WelsSampleSadFour16x8_sse2
 WelsSampleSadFour16x8_sse2:
-	;push ebx
-	;push edi
-	;mov    eax,    [esp+12]
-	;mov    ebx,    [esp+16]
-	;mov    edi,    [esp+20]
-	;mov    edx,    [esp+24]
-
 	%assign  push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1847,7 +1723,6 @@
 	psadbw xmm1,   xmm3
 	paddw xmm5,   xmm1
 
-	;mov        edi,  [esp+28]
 	movhlps    xmm0, xmm4
 	paddw      xmm4, xmm0
 	movhlps    xmm0, xmm5
@@ -1865,13 +1740,6 @@
 
 WELS_EXTERN WelsSampleSadFour8x16_sse2
 WelsSampleSadFour8x16_sse2:
-	;push ebx
-	;push edi
-	;mov    eax,    [esp+12]
-	;mov    ebx,    [esp+16]
-	;mov    edi,    [esp+20]
-	;mov    edx,    [esp+24]
-
 	%assign  push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -2066,7 +1934,6 @@
 	psadbw xmm0,  xmm3
 	paddw  xmm5,  xmm0
 
-	;mov        edi,  [esp+28]
 	movhlps    xmm0, xmm4
 	paddw      xmm4, xmm0
 	movhlps    xmm0, xmm5
@@ -2085,13 +1952,6 @@
 
 WELS_EXTERN WelsSampleSadFour8x8_sse2
 WelsSampleSadFour8x8_sse2:
-	;push ebx
-	;push edi
-	;mov    eax,    [esp+12]
-	;mov    ebx,    [esp+16]
-	;mov    edi,    [esp+20]
-	;mov    edx,    [esp+24]
-
 	%assign  push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -2195,7 +2055,6 @@
 	psadbw xmm0,  xmm3
 	paddw  xmm5,  xmm0
 
-	;mov        edi,  [esp+28]
 	movhlps    xmm0, xmm4
 	paddw      xmm4, xmm0
 	movhlps    xmm0, xmm5
@@ -2213,13 +2072,6 @@
 
 WELS_EXTERN WelsSampleSadFour4x4_sse2
 WelsSampleSadFour4x4_sse2:
-	;push ebx
-	;push edi
-	;mov    eax,    [esp+12]
-	;mov    ebx,    [esp+16]
-	;mov    edi,    [esp+20]
-	;mov    edx,    [esp+24]
-
 	%assign  push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -2279,7 +2131,6 @@
 	paddw      xmm3, xmm0
 	movhlps    xmm0, xmm4
 	paddw      xmm4, xmm0
-	;mov        edi,  [esp+28]
 	punpckldq  xmm1, xmm4
 	punpckldq  xmm2, xmm3
 	punpcklqdq xmm1, xmm2
@@ -2300,17 +2151,6 @@
 ;   int32_t WelsSampleSad4x4_mmx (uint8_t *, int32_t, uint8_t *, int32_t )
 ;***********************************************************************
 WelsSampleSad4x4_mmx:
-    ;push    ebx
-	;%define pushsize     4
-	;%define pix1address	 esp+pushsize+4
-	;%define pix1stride   esp+pushsize+8
-	;%define pix2address  esp+pushsize+12
-	;%define pix2stride   esp+pushsize+16
-    ;mov		  eax, [pix1address]
-    ;mov		  ebx, [pix1stride ]
-    ;mov		  ecx, [pix2address]
-    ;mov		  edx, [pix2stride ]
-
     %assign  push_num 0
 	LOAD_4_PARA
 	SIGN_EXTENSION r1, r1d
--- a/codec/decoder/core/asm/block_add.asm
+++ b/codec/decoder/core/asm/block_add.asm
@@ -56,15 +56,10 @@
 ;  void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
 ;*******************************************************************************
 WelsResBlockZero16x16_sse2:
-        ;push     r0
         %assign push_num 0
         LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
-	;mov      r0,        [esp+08h]
-	;mov      r1,        [esp+0ch]
-	;lea      r1,        [r1*2]
         lea 	r1, 	[r1*2]
-	;lea      r2,        [r1*3]
         lea 	r2,	[r1*3]
 
 	pxor     xmm7,       xmm7
@@ -124,7 +119,6 @@
 	movdqa   [r0+r2],     xmm7
 	movdqa   [r0+r2+10h],     xmm7
 
-    ;pop      r0
 	ret
 
 
@@ -135,12 +129,9 @@
 ;  void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
 ;*******************************************************************************
 WelsResBlockZero8x8_sse2:
-	  ;push      r0
 	  %assign push_num 0
           LOAD_2_PARA
 	  SIGN_EXTENSION r1, r1d
-      	  ;mov       r0,     [esp+08h]
-	  ;mov       r1,     [esp+0ch]
 	  lea       r1,     [r1*2]
 	  lea       r2,     [r1*3]
 
@@ -158,6 +149,5 @@
 	  movdqa    [r0+r2],     xmm7
 
 
-	  ;pop       r0
 	  ret
 
--- a/codec/decoder/core/asm/intra_pred.asm
+++ b/codec/decoder/core/asm/intra_pred.asm
@@ -192,8 +192,6 @@
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
-	;mov			eax,	[esp+4]			;pPred
-	;mov			ecx,	[esp+8]			;kiStride
 
 	movzx		r2,	byte [r0-1]
 	movd		xmm0,	r2d
@@ -225,7 +223,6 @@
 ; void WelsDecoderI16x16LumaPredPlane_sse2(uint8_t *pPred, const int32_t kiStride);
 ;*******************************************************************************
 WelsDecoderI16x16LumaPredPlane_sse2:
-		;%define pushsize	4
 		push r3
 		push r4
 		%assign push_num 2
@@ -232,9 +229,6 @@
 		LOAD_2_PARA
 		SIGN_EXTENSION r1, r1d
 		mov r4, r0 ; save r0 in r4
-		;push	esi
-		;mov		esi,	[esp + pushsize + 4]
-		;mov		ecx,	[esp + pushsize + 8]
 		sub		r0,	1
 		sub		r0,	r1
 
@@ -286,7 +280,6 @@
 		sar		r2,	6				; c = (5 * V + 32) >> 6;
 		SSE2_Copy8Times	xmm4, r2d		; xmm4 = c,c,c,c,c,c,c,c
 
-		;mov		esi,	[esp + pushsize + 4]
 		mov r0, r4
 		add		r3,	16
 		imul	r2,	-7
@@ -313,7 +306,6 @@
 		cmp		r2,	16
 		jnz get_i16x16_luma_pred_plane_sse2_1
 
-		;pop		esi
 		pop r4
 		pop r3
 		ret
@@ -338,8 +330,6 @@
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
-    ;mov     eax, [esp+4]    ; pPred
-    ;mov     ecx, [esp+8]    ; kiStride
 
     COPY_16_TIMES r0,	xmm0
     movdqa  [r0],		xmm0
@@ -364,8 +354,6 @@
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
-    ;mov     edx, [esp+4]    ; pPred
-    ;mov     ecx, [esp+8]    ; kiStride
 
     sub     r0, r1
     movdqa  xmm0, [r0]
@@ -402,7 +390,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderIChromaPredPlane_sse2
 WelsDecoderIChromaPredPlane_sse2:
-		;%define pushsize	4
 		push r3
 		push r4
 		%assign push_num 2
@@ -409,9 +396,6 @@
 		LOAD_2_PARA
 		SIGN_EXTENSION r1, r1d
 		mov r4, r0
-		;push	esi
-		;mov		esi,	[esp + pushsize + 4]	;pPred
-		;mov		ecx,	[esp + pushsize + 8]	;kiStride
 		sub		r0,	1
 		sub		r0,	r1
 
@@ -466,7 +450,6 @@
 		sar		r2,	5				; c = (17 * V + 16) >> 5;
 		SSE2_Copy8Times	xmm4, r2d		; mm4 = c,c,c,c,c,c,c,c
 
-		;mov		esi,	[esp + pushsize + 4]
 		mov 	r0, r4
 		add		r3,	16
 		imul	r2,	-3
@@ -489,7 +472,6 @@
 		cmp		r2,	8
 		jnz get_i_chroma_pred_plane_sse2_1
 
-		;pop		esi
 		pop r4
 		pop r3
 		WELSEMMS
@@ -513,9 +495,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx,[esp+4]			;pPred
-	;mov         eax,edx
-	;mov			ecx,[esp+8]		;kiStride
 
 	movq        mm1,[r2+r1-8]		;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11
 	movq        mm2,[r2-8]			;get value of 6 mm2[8] = 6
@@ -586,9 +565,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx,	[esp+4]			;pPred
-	;mov         eax,	edx
-	;mov			ecx,	[esp+8]			;kiStride
 
 	movq		mm0,	[r2-8]
 	psrlq		mm0,	38h
@@ -631,8 +607,6 @@
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
-	;mov			eax,		[esp+4]    ;pPred
-	;mov			ecx,		[esp+8]    ;kiStride
 
 	sub			r0,		r1
 	movq		mm0,		[r0]
@@ -689,9 +663,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx, [esp+4]			; pPred
-	;mov         eax, edx
-	;mov			ecx, [esp+8]            ; kiStride
 	sub         r2, r1
 	movd        mm0, [r2-1]            ; mm0 = [xx xx xx xx t2 t1 t0 lt]
 	psllq       mm0, 20h                ; mm0 = [t2 t1 t0 lt xx xx xx xx]
@@ -776,9 +747,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx, [esp+4]			; pPred
-	;mov         eax, edx
-	;mov			ecx, [esp+8]            ; kiStride
 
 	movd        mm0, [r2-4]            ; mm0[3] = l0
 	punpcklbw   mm0, [r2+r1-4]        ; mm0[7] = l1, mm0[6] = l0
@@ -866,9 +834,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx, [esp+4]			; pPred
-	;mov         eax, edx
-	;mov			ecx, [esp+8]            ; kiStride
 	sub         r2, r1
 	movq        mm0, [r2-1]            ; mm0 = [xx xx xx t3 t2 t1 t0 lt]
 	psllq       mm0, 18h                ; mm0 = [t3 t2 t1 t0 lt xx xx xx]
@@ -957,9 +922,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx, [esp+4]			; pPred
-	;mov         eax, edx
-	;mov			ecx, [esp+8]            ; kiStride
 	sub         r2, r1
 	movq        mm0, [r2]              ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
 	movq        mm1, mm0
@@ -1030,9 +992,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r2, r0
-	;mov			edx, [esp+4]			; pPred
-	;mov         eax, edx
-	;mov			ecx, [esp+8]            ; kiStride
 
 	sub         r2, r1
 	movq        mm0, [r2]              ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
@@ -1077,9 +1036,6 @@
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
 	mov r4, r0
-	;push        ebx
-	;mov         eax, [esp+8]			; pPred
-	;mov			ecx, [esp+12]           ; kiStride
 
 	sub         r0, r1
 	movq        mm0, [r0]
@@ -1144,8 +1100,6 @@
 	psllq       mm1, 0x20
 	pxor        mm1, mm2                 ; mm2 = m_down
 
-	;mov         edx, [esp+8]			 ; pPred
-
 	movq        [r4],       mm0
 	movq        [r4+r1],   mm0
 	movq        [r4+2*r1], mm0
@@ -1159,7 +1113,6 @@
 	lea         r4, [r4+2*r1]
 	movq        [r4+r1],   mm1
 
-	;pop         ebx
 	pop r4
 	pop r3
 	WELSEMMS
@@ -1174,9 +1127,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderI16x16LumaPredDc_sse2
 WelsDecoderI16x16LumaPredDc_sse2:
-	;push        ebx
-	;mov         eax, [esp+8]			; pPred
-	;mov			ecx, [esp+12]           ; kiStride
 	push 	r3
 	push 	r4
 	%assign push_num 2
@@ -1211,8 +1161,6 @@
 	pmuludq     xmm0, [mmx_01bytes]
 	pshufd      xmm0, xmm0, 0
 
-	;mov         edx, [esp+8]			; pPred
-
 	movdqa      [r4],       xmm0
 	movdqa      [r4+r1],   xmm0
 	movdqa      [r4+2*r1], xmm0
@@ -1244,7 +1192,6 @@
 
 	movdqa      [r4+r1],   xmm0
 
-	;pop         ebx
 	pop r4
 	pop r3
 
@@ -1260,10 +1207,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderI16x16LumaPredDcTop_sse2
 WelsDecoderI16x16LumaPredDcTop_sse2:
-	;push ebx
-	;%define PUSH_SIZE 4
-	;mov eax, [esp+PUSH_SIZE+4]	; pPred
-	;mov ebx, [esp+PUSH_SIZE+8]	; kiStride
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1328,8 +1271,6 @@
 	movdqa [r0+2*r1], xmm0
 	movdqa [r0+r2], xmm1
 
-	;%undef PUSH_SIZE
-	;pop ebx
 	ret
 
 ALIGN 16
@@ -1338,12 +1279,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderI16x16LumaPredDcNA_sse2
 WelsDecoderI16x16LumaPredDcNA_sse2:
-	;push ebx
-
-	;%define PUSH_SIZE	4
-
-	;mov eax, [esp+PUSH_SIZE+4]	; pPred
-	;mov ebx, [esp+PUSH_SIZE+8]	; kiStride
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1371,9 +1306,6 @@
 	movdqa [r0+2*r1], xmm0
 	movdqa [r0+r2], xmm1
 
-	;%undef PUSH_SIZE
-
-	;pop ebx
 	ret
 
 ALIGN 16
@@ -1382,12 +1314,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderIChromaPredDcLeft_mmx
 WelsDecoderIChromaPredDcLeft_mmx:
-	;push ebx
-	;push esi
-	;%define PUSH_SIZE 8
-	;mov esi, [esp+PUSH_SIZE+4]	; pPred
-	;mov ecx, [esp+PUSH_SIZE+8]	; kiStride
-	;mov eax, esi
 	push r3
 	push r4
 	%assign push_num 2
@@ -1450,8 +1376,6 @@
 	movq [r4+r1], mm3
 	movq [r4+2*r1], mm2
 	movq [r4+r2], mm3
-	;pop esi
-	;pop ebx
 	pop r4
 	pop r3
 	emms
@@ -1463,12 +1387,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderIChromaPredDcTop_sse2
 WelsDecoderIChromaPredDcTop_sse2:
-	;push ebx
-	;%define PUSH_SIZE 4
-	;mov eax, [esp+PUSH_SIZE+4]	; pPred
-	;mov ecx, [esp+PUSH_SIZE+8]	; kiStride
-	;mov ebx, ecx
-	;neg ebx
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1500,8 +1418,6 @@
 	movq [r0+r1], xmm0
 	movq [r0+2*r1], xmm0
 	movq [r0+r2], xmm0
-	;%undef PUSH_SIZE
-	;pop ebx
 	ret
 
 ALIGN 16
@@ -1510,10 +1426,6 @@
 ;*******************************************************************************
 WELS_EXTERN WelsDecoderIChromaPredDcNA_mmx
 WelsDecoderIChromaPredDcNA_mmx:
-	;push ebx
-	;%define PUSH_SIZE 4
-	;mov eax, [esp+PUSH_SIZE+4]	; pPred
-	;mov ebx, [esp+PUSH_SIZE+8]	; kiStride
 	%assign push_num 0
 	LOAD_2_PARA
 	SIGN_EXTENSION r1, r1d
@@ -1529,8 +1441,6 @@
 	movq [r0+r1], mm1
 	movq [r0+2*r1], mm0
 	movq [r0+r2], mm1
-	;%undef PUSH_SIZE
-	;pop ebx
 	emms
 	ret
 
--- a/codec/encoder/core/asm/dct.asm
+++ b/codec/encoder/core/asm/dct.asm
@@ -136,11 +136,6 @@
 ;***********************************************************************
 WELS_EXTERN WelsDctT4_mmx
 WelsDctT4_mmx:
-    ;push    ebx
-    ;mov     eax, [esp+12]   ; pix1
-    ;mov     ebx, [esp+16]   ; i_pix1
-    ;mov     ecx, [esp+20]   ; pix2
-    ;mov     edx, [esp+24]   ; i_pix2
     %assign push_num 0
     LOAD_5_PARA
     SIGN_EXTENSION r2, r2d
@@ -155,7 +150,6 @@
     MMX_DCT			mm3, mm5, mm2 ,mm4, mm1, mm6
     MMX_Trans4x4W	mm2, mm3, mm4, mm1, mm5
 
-    ;mov     eax, [esp+ 8]   ; pDct
     movq    [r0+ 0],   mm2
     movq    [r0+ 8],   mm1
     movq    [r0+16],   mm5
@@ -162,7 +156,6 @@
     movq    [r0+24],   mm4
     WELSEMMS
     LOAD_5_PARA_POP
-    ;pop     ebx
     ret
 
 
@@ -171,26 +164,14 @@
 ;***********************************************************************
 WELS_EXTERN WelsIDctT4Rec_mmx
 WelsIDctT4Rec_mmx:
-	;push   ebx
-;%define	pushsize	4
-;%define     p_dst       esp+pushsize+4
-;%define     i_dst       esp+pushsize+8
-;%define     p_pred      esp+pushsize+12
-;%define     i_pred      esp+pushsize+16
-;%define     pDct        esp+pushsize+20
     %assign push_num 0
     LOAD_5_PARA
     SIGN_EXTENSION r1, r1d
     SIGN_EXTENSION r3, r3d
-;	mov     eax, [pDct   ]
     movq    mm0, [r4+ 0]
     movq    mm1, [r4+ 8]
     movq    mm2, [r4+16]
     movq    mm3, [r4+24]
-    ;mov     edx, [p_dst ] ; r0
-    ;mov     ecx, [i_dst ] ; r1
-    ;mov     eax, [p_pred] ; r2
-    ;mov     ebx, [i_pred] ; r3
 
 	MMX_Trans4x4W		mm0, mm1, mm2, mm3, mm4
 	MMX_IDCT			mm1, mm2, mm3, mm4, mm0, mm6
@@ -209,13 +190,6 @@
 
 	WELSEMMS
     LOAD_5_PARA_POP
-;%undef	pushsize
-;%undef  p_dst
-;%undef  i_dst
-;%undef  p_pred
-;%undef  i_pred
-;%undef  pDct
-;    pop ebx
     ret
 
 
@@ -319,13 +293,6 @@
 WELS_EXTERN WelsDctFourT4_sse2
 ALIGN 16
 WelsDctFourT4_sse2:
-    ;push    ebx
-    ;push	esi
-    ;mov		esi, [esp+12]
-    ;mov     eax, [esp+16]   ; pix1
-    ;mov     ebx, [esp+20]   ; i_pix1
-    ;mov     ecx, [esp+24]   ; pix2
-    ;mov     edx, [esp+28]   ; i_pix2
     %assign push_num 0
     LOAD_5_PARA
     SIGN_EXTENSION r2, r2d
@@ -365,17 +332,10 @@
 	lea		r0, [r0+64]
 	SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
 
-    ;pop esi
-    ;pop ebx
 	LOAD_5_PARA_POP
     ret
 
 
-;%define		rec			esp + pushsize + 4
-;%define		stride		esp + pushsize + 8
-;%define		pred		esp + pushsize + 12
-;%define		pred_stride	esp + pushsize + 16
-;%define		rs			esp + pushsize + 20
 ;***********************************************************************
 ; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
 ;***********************************************************************
@@ -382,15 +342,6 @@
 WELS_EXTERN WelsIDctFourT4Rec_sse2
 ALIGN 16
 WelsIDctFourT4Rec_sse2:
-;%define	pushsize	8
-;    push		ebx
-;    push		esi
-
-;    mov			eax,		[rec]
-;    mov			ebx,		[stride]
-;    mov			ecx,		[pred]
-;    mov			edx,		[pred_stride]
-;    mov			esi,		[rs]
 	%assign push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
@@ -449,21 +400,11 @@
 ;***********************************************************************
 WELS_EXTERN WelsIDctRecI16x16Dc_sse2
 ALIGN 16
-;%define		pushsize	8
-;%define		luma_dc		esp + pushsize + 20
 WelsIDctRecI16x16Dc_sse2:
 	%assign push_num 0
 	LOAD_5_PARA
 	SIGN_EXTENSION r1, r1d
 	SIGN_EXTENSION r3, r3d
-   ; push		esi
-   ; push		edi
-
-   ;mov			ecx,		[luma_dc] ; r4
-    ;mov			eax,		[rec] ; r0
-    ;mov			edx,		[stride] ; r1
-    ;mov			esi,		[pred]; r2
-    ;mov			edi,		[pred_stride]; r3
 	pxor		xmm7,		xmm7
     WELS_DW32	xmm6
 
@@ -499,8 +440,6 @@
 	lea			r2,		[r2 + 2 * r3]
 	SSE2_StoreDiff4x8p		xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
 	LOAD_5_PARA_POP
-    ;pop		edi
-    ;pop		esi
     ret
 
 
@@ -537,8 +476,6 @@
 ;***********************************************************************
 WELS_EXTERN WelsHadamardT4Dc_sse2
 WelsHadamardT4Dc_sse2:
-		;mov			eax,		[esp + 4]	; luma_dc
-		;mov			ecx,		[esp + 8]	; pDct
 		%assign push_num 0
 		LOAD_2_PARA
 		SSE2_Load4Col	    xmm1, xmm5, xmm6, xmm0, r1
--- a/codec/encoder/core/asm/intra_pred.asm
+++ b/codec/encoder/core/asm/intra_pred.asm
@@ -234,10 +234,6 @@
 ; void WelsI16x16LumaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
 ;***********************************************************************
 WelsI16x16LumaPredPlane_sse2:
-		;%define pushsize	4
-		;push	esi
-		;mov		esi,	[esp + pushsize + 8]
-		;mov		ecx,	[esp + pushsize + 12]
 		push r3
 		push r4
 		%assign push_num 2
@@ -293,7 +289,6 @@
 		sar		r3,	6				; c = (5 * V + 32) >> 6;
 		SSE2_Copy8Times	xmm4, r3d		; xmm4 = c,c,c,c,c,c,c,c
 
-		;mov		esi,	[esp + pushsize + 4]
 		add		r4,	16
 		imul	r3,	-7
 		add		r3,	r4				; s = a + 16 + (-7)*c
@@ -367,9 +362,6 @@
 ;***********************************************************************
 WELS_EXTERN WelsI16x16LumaPredV_sse2
 WelsI16x16LumaPredV_sse2:
-    ;mov     edx, [esp+4]    ; pred
-    ;mov     eax, [esp+8]	; pRef
-    ;mov     ecx, [esp+12]   ; stride
     %assign push_num 0
     LOAD_3_PARA
     SIGN_EXTENSION r2, r2d
@@ -400,10 +392,6 @@
 ;***********************************************************************
 WELS_EXTERN WelsIChromaPredPlane_sse2
 WelsIChromaPredPlane_sse2:
-		;%define pushsize	4
-		;push	esi
-		;mov		esi,	[esp + pushsize + 8]	;pRef
-		;mov		ecx,	[esp + pushsize + 12]	;stride
 		push r3
 		push r4
 		%assign push_num 2
@@ -462,7 +450,6 @@
 		sar		r3,	5				; c = (17 * V + 16) >> 5;
 		SSE2_Copy8Times	xmm4, r3d	; mm4 = c,c,c,c,c,c,c,c
 
-		;mov		esi,	[esp + pushsize + 4]
 		add		r4,	16
 		imul	r3,	-3
 		add		r3,	r4		; s = a + 16 + (-3)*c
@@ -502,9 +489,6 @@
 ;
 ;***********************************************************************
 WelsI4x4LumaPredDDR_mmx:
-	;mov			edx,[esp+4]			;pred
-	;mov         eax,[esp+8]			;pRef
-	;mov			ecx,[esp+12]		;stride
 	%assign push_num 0
 	LOAD_3_PARA
 	SIGN_EXTENSION r2, r2d
@@ -619,9 +603,6 @@
 
 WELS_EXTERN WelsIChromaPredH_mmx
 WelsIChromaPredH_mmx:
-	;mov			edx,	[esp+4]			;pred
-	;mov         eax,	[esp+8]			;pRef
-	;mov			ecx,	[esp+12]		;stride
 	%assign push_num 0
 	LOAD_3_PARA
 	SIGN_EXTENSION r2, r2d
--- a/codec/encoder/core/asm/memzero.asm
+++ b/codec/encoder/core/asm/memzero.asm
@@ -55,7 +55,6 @@
 WelsPrefetchZero_mmx:
 	%assign  push_num 0
 	LOAD_1_PARA
-	;mov  eax,[esp+4]
 	prefetchnta [r0]
 	ret
 
--- a/codec/encoder/core/asm/quant.asm
+++ b/codec/encoder/core/asm/quant.asm
@@ -87,12 +87,9 @@
 WelsQuant4x4_sse2:
 		%assign push_num 0
                 LOAD_3_PARA
-		;mov		eax,  [ff]
-		;mov		ecx,  [mf]
 		movdqa	xmm2, [r1]
 		movdqa	xmm3, [r2]
 
-		;mov		edx,  [pDct]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
 
@@ -108,13 +105,10 @@
 		LOAD_3_PARA
 		SIGN_EXTENSION r1, r1w
 		SIGN_EXTENSION r2, r2w
-		;mov		ax,		[mf]
 		SSE2_Copy8Times xmm3, r2d
 
-		;mov		cx, [ff]
 		SSE2_Copy8Times xmm2, r1d
 
-		;mov		edx,  [pDct]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
 
@@ -128,12 +122,9 @@
 WelsQuantFour4x4_sse2:
 		%assign push_num 0
 		LOAD_3_PARA
-		;mov		eax,  [ff]
-		;mov		ecx,  [mf]
 		MOVDQ	xmm2, [r1]
 		MOVDQ	xmm3, [r2]
 
-		;mov		edx,  [pDct]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
 		SSE2_Quant8	xmm0, xmm1, xmm2, xmm3, [r0 + 0x20]
@@ -153,12 +144,9 @@
 WelsQuantFour4x4Max_sse2:
 		%assign push_num 0
 		LOAD_4_PARA
-		;mov		eax,  [ff]
-		;mov		ecx,  [mf]
 		MOVDQ	xmm2, [r1]
 		MOVDQ	xmm3, [r2]
 
-		;mov		edx,  [pDct]
 		pxor	xmm4, xmm4
 		pxor	xmm5, xmm5
 		pxor	xmm6, xmm6
@@ -180,7 +168,6 @@
 		punpckhqdq	xmm0, xmm1
 		pmaxsw	xmm0, xmm1
 
-		;mov		r0,  [r3]
 		movq	[r3], xmm0
 		LOAD_4_PARA_POP
 		ret
@@ -204,8 +191,6 @@
 		psubw	%1, %2
 %endmacro
 
-%define dct2x2				esp + 16
-%define iChromaDc			esp + 20
 ;***********************************************************************
 ;int32_t WelsHadamardQuant2x2_mmx(int16_t *rs, const int16_t ff, int16_t mf, int16_t * pDct, int16_t * block);
 ;***********************************************************************
@@ -216,7 +201,6 @@
 		LOAD_5_PARA
 		SIGN_EXTENSION r1, r1w
 		SIGN_EXTENSION r2, r2w
-		;mov			eax,			[pDct]
 		movd		mm0,			[r0]
 		movd		mm1,			[r0 + 0x20]
 		punpcklwd	mm0,			mm1
@@ -237,16 +221,12 @@
 		punpcklwd	mm1,			mm3
 
 		;quant_2x2_dc
-		;mov			ax,				[mf]
 		MMX_Copy4Times	mm3,		r2d
-		;mov			cx,				[ff]
 		MMX_Copy4Times	mm2,		r1d
 		MMX_Quant4		mm1,	mm0,	mm2,	mm3
 
 		; store dct_2x2
-		;mov			edx,			[dct2x2]
 		movq		[r3],			mm1
-		;mov			ecx,			[iChromaDc]
 		movq		[r4],			mm1
 
 		; pNonZeroCount of dct_2x2
@@ -279,7 +259,6 @@
 		LOAD_3_PARA
 		SIGN_EXTENSION r1, r1w
 		SIGN_EXTENSION r2, r2w
-		;mov			eax,			[pDct]
 		movd		mm0,			[r0]
 		movd		mm1,			[r0 + 0x20]
 		punpcklwd	mm0,			mm1
@@ -300,9 +279,7 @@
 		punpcklwd	mm1,			mm3
 
 		;quant_2x2_dc
-		;mov			ax,				[mf]
 		MMX_Copy4Times	mm3,		r2d
-		;mov			cx,				[ff]
 		MMX_Copy4Times	mm2,		r1d
 		MMX_Quant4		mm1,	mm0,	mm2,	mm3
 
@@ -333,11 +310,8 @@
 align 16
 WELS_EXTERN WelsDequant4x4_sse2
 WelsDequant4x4_sse2:
-	;ecx = dequant_mf[qp], edx = pDct
 	%assign push_num 0
 	LOAD_2_PARA
-	;mov		ecx,  [esp + 8]
-	;mov		edx,  [esp + 4]
 
 	movdqa  xmm1, [r1]
 	SSE2_DeQuant8 [r0	],  xmm0, xmm1
@@ -353,11 +327,8 @@
 
 WELS_EXTERN WelsDequantFour4x4_sse2
 WelsDequantFour4x4_sse2:
-    ;ecx = dequant_mf[qp], edx = pDct
 	%assign push_num 0
 	LOAD_2_PARA
-	;mov		ecx,  [esp + 8]
-	;mov		edx,  [esp + 4]
 
 	movdqa  xmm1, [r1]
 	SSE2_DeQuant8 [r0	],  xmm0, xmm1
@@ -382,8 +353,6 @@
 		%ifndef X86_32
 		movzx r1, r1w
 		%endif
-		;mov			eax,			[esp + 4]
-		;mov			cx,				[esp + 8]
 
 		; WelsDequantLumaDc4x4
 		SSE2_Copy8Times	xmm1,		r1d
--- a/codec/encoder/core/asm/score.asm
+++ b/codec/encoder/core/asm/score.asm
@@ -176,7 +176,6 @@
 	%assign push_num 0
 	%endif
 	LOAD_2_PARA
-	;mov        eax, [esp+8]
 	movdqa     xmm0, [r1]			; 7 6 5 4 3 2 1 0
 	movdqa     xmm1, [r1+16]		; f e d c b a 9 8
 	pextrw     r2d, xmm0, 7			; ecx = 7
@@ -191,7 +190,6 @@
 	pshufd     xmm3, xmm1, 0xd8		; f e b 7 d c 9 a
 	pshufhw    xmm0, xmm2, 0x93		; 6 3 2 5 8 4 1 0
 	pshuflw    xmm1, xmm3, 0x39		; f e b 7 a d c 9
-	;mov        eax,  [esp+4]
 	movdqa     [r0],xmm0
 	movdqa     [r0+16], xmm1
 	%ifdef X86_32
@@ -207,7 +205,6 @@
 WelsScan4x4DcAc_ssse3:
 	%assign push_num 0
 	LOAD_2_PARA
-	;mov        eax, [esp+8]
 	movdqa     xmm0, [r1]
 	movdqa     xmm1, [r1+16]
 	pextrw		r2d,  xmm0, 7			; ecx = [7]
@@ -217,7 +214,6 @@
 	pshufb		xmm1, [pb_scanacdc_maskb]
 	pshufb		xmm0, [pb_scanacdc_maska]
 
-	;mov        eax,  [esp+4]
 	movdqa     [r0],xmm0
 	movdqa     [r0+16], xmm1
 	ret
@@ -229,7 +225,6 @@
 WelsScan4x4Ac_sse2:
 	%assign push_num 0
 	LOAD_2_PARA
-	;mov        eax, [esp+8]
 	movdqa     xmm0, [r1]
 	movdqa     xmm1, [r1+16]
 	movdqa     xmm2, xmm0
@@ -256,7 +251,6 @@
     pslldq     xmm3, 14
     por        xmm1, xmm3
     psrldq     xmm2, 2
-	;mov        eax,  [esp+4]
 	movdqa     [r0],xmm1
 	movdqa     [r0+16], xmm2
 	ret
@@ -268,8 +262,6 @@
 ALIGN 16
 WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
 WelsCalculateSingleCtr4x4_sse2:
-	;push      ebx
-	;mov       eax,  [esp+8]
 	%ifdef X86_32
 	push r3
 	%assign push_num 1
@@ -321,7 +313,6 @@
 	%else
 	mov retrd, r0d
 	%endif
-	;pop       ebx
 	ret
 
 
@@ -333,7 +324,6 @@
 WelsGetNoneZeroCount_sse2:
 	%assign push_num 0
 	LOAD_1_PARA
-	;mov       eax,  [esp+4]
 	movdqa    xmm0, [r0]
 	movdqa    xmm1, [r0+16]
 	pxor      xmm2, xmm2
--- a/codec/processing/src/asm/denoisefilter.asm
+++ b/codec/processing/src/asm/denoisefilter.asm
@@ -172,11 +172,6 @@
 ;	4	0	5
 ;	6	7	8
 ;	0:	the center point
-%define		pushsize	4
-;%define		pixel		esp + pushsize + 4
-;%define		stride		esp + pushsize + 8
-;%define         pixel  r0
-;%define         stride r1
 
 BilateralLumaFilter8_sse2: