shithub: openh264

Download patch

ref: ef888894046228861825f4acdab09bf6298a6037
parent: 76863f977a9a3bffff0b566a8d7af7cbdb93e646
author: zhiliang wang <[email protected]>
date: Fri Aug 15 05:22:37 EDT 2014

refine format and add UT cases

--- a/codec/encoder/core/x86/sample_sc.asm
+++ b/codec/encoder/core/x86/sample_sc.asm
@@ -676,79 +676,79 @@
 ; void FillQpelLocationByFeatureValue_sse2(uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight, uint16_t** pFeatureValuePointerList)
 ;-----------------------------------------------------------------------------------------------------------------------------
 WELS_EXTERN FillQpelLocationByFeatureValue_sse2
-	push	esi
-	push	edi
-	push	ebx
-	push	ebp
+    push	esi
+    push	edi
+    push	ebx
+    push	ebp
 
-	%define _ps			16				; push size
-	%define	_ls			4				; local size
-	%define	sum_ref		esp+_ps+_ls+4
-	%define	pos_list	esp+_ps+_ls+16
-	%define width		esp+_ps+_ls+8
-	%define height		esp+_ps+_ls+12
-	%define	i_height	esp
-	sub		esp,	_ls
+    %define _ps			16				; push size
+    %define	_ls			4				; local size
+    %define	sum_ref		esp+_ps+_ls+4
+    %define	pos_list	esp+_ps+_ls+16
+    %define width		esp+_ps+_ls+8
+    %define height		esp+_ps+_ls+12
+    %define	i_height	esp
+    sub		esp,	_ls
 
-	mov		esi,	[sum_ref]
-	mov		edi,	[pos_list]
-	mov		ebp,	[width]
-	mov		ebx,	[height]
-	mov		[i_height],	ebx
+    mov		esi,	[sum_ref]
+    mov		edi,	[pos_list]
+    mov		ebp,	[width]
+    mov		ebx,	[height]
+    mov		[i_height],	ebx
 
-	movq	xmm7,	[mv_x_inc_x4]		; x_qpel inc
-	movq	xmm6,	[mv_y_inc_x4]		; y_qpel inc
-	movq	xmm5,	[mx_x_offset_x4]	; x_qpel vector
-	pxor	xmm4,	xmm4
-	pxor	xmm3,	xmm3				; y_qpel vector
+    movq	xmm7,	[mv_x_inc_x4]		; x_qpel inc
+    movq	xmm6,	[mv_y_inc_x4]		; y_qpel inc
+    movq	xmm5,	[mx_x_offset_x4]	; x_qpel vector
+    pxor	xmm4,	xmm4
+    pxor	xmm3,	xmm3				; y_qpel vector
 HASH_HEIGHT_LOOP_SSE2:
-	movdqa	xmm2,	xmm5	; x_qpel vector
-	mov		ecx,	ebp
+    movdqa	xmm2,	xmm5	; x_qpel vector
+    mov		ecx,	ebp
 HASH_WIDTH_LOOP_SSE2:
-	movq	xmm0,	[esi]			; load x8 sum
-	punpcklwd	xmm0,	xmm4
-	movdqa		xmm1,	xmm2
-	punpcklwd	xmm1,	xmm3
+    movq	xmm0,	[esi]			; load x8 sum
+    punpcklwd	xmm0,	xmm4
+    movdqa		xmm1,	xmm2
+    punpcklwd	xmm1,	xmm3
 %rep	3
-	movd	edx,	xmm0
-	lea		ebx,	[edi+edx*4]
-	mov		eax,	[ebx]
-	movd	[eax],	xmm1
-	mov		edx,	[eax+4]	; explictly load eax+4 due cache miss from vtune observation
-	lea		eax,	[eax+4]
-	mov		[ebx],	eax
-	psrldq	xmm1,	4
-	psrldq	xmm0,	4
+    movd	edx,	xmm0
+    lea		ebx,	[edi+edx*4]
+    mov		eax,	[ebx]
+    movd	[eax],	xmm1
+    mov		edx,	[eax+4]	; explictly load eax+4 due cache miss from vtune observation
+    lea		eax,	[eax+4]
+    mov		[ebx],	eax
+    psrldq	xmm1,	4
+    psrldq	xmm0,	4
 %endrep
-	movd	edx,	xmm0
-	lea		ebx,	[edi+edx*4]
-	mov		eax,	[ebx]
-	movd	[eax],	xmm1
-	mov		edx,	[eax+4]	; explictly load eax+4 due cache miss from vtune observation
-	lea		eax,	[eax+4]
-	mov		[ebx],	eax
+    movd	edx,	xmm0
+    lea		ebx,	[edi+edx*4]
+    mov		eax,	[ebx]
+    movd	[eax],	xmm1
+    mov		edx,	[eax+4]	; explictly load eax+4 due cache miss from vtune observation
+    lea		eax,	[eax+4]
+    mov		[ebx],	eax
 
-	paddw	xmm2,	xmm7
-	lea		esi,	[esi+8]
-	sub		ecx,	4
-	jnz near HASH_WIDTH_LOOP_SSE2
-	paddw	xmm3,	xmm6
-	dec	dword [i_height]
-	jnz	near HASH_HEIGHT_LOOP_SSE2
+    paddw	xmm2,	xmm7
+    lea		esi,	[esi+8]
+    sub		ecx,	4
+    jnz near HASH_WIDTH_LOOP_SSE2
+    paddw	xmm3,	xmm6
+    dec	dword [i_height]
+    jnz	near HASH_HEIGHT_LOOP_SSE2
 
-	add		esp,	_ls
-	%undef	_ps
-	%undef	_ls
-	%undef	sum_ref
-	%undef	pos_list
-	%undef	width
-	%undef	height
-	%undef	i_height
-	pop		ebp
-	pop		ebx
-	pop		edi
-	pop		esi
-	ret
+    add		esp,	_ls
+    %undef	_ps
+    %undef	_ls
+    %undef	sum_ref
+    %undef	pos_list
+    %undef	width
+    %undef	height
+    %undef	i_height
+    pop		ebp
+    pop		ebx
+    pop		edi
+    pop		esi
+    ret
 
 ;---------------------------------------------------------------------------------------------------------------------------------------------------
 ; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
@@ -755,75 +755,75 @@
 ;                        uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList )
 ;---------------------------------------------------------------------------------------------------------------------------------------------------
 WELS_EXTERN InitializeHashforFeature_sse2
-	push	ebx
-	push	esi
-	push	edi
-	push	ebp
-	%define	_ps	16	; push size
-	mov		edi,	[esp+_ps+16]	; pPositionOfSum
-	mov		ebp,	[esp+_ps+20]	; sum_idx_list
-	mov		esi,	[esp+_ps+4]     ; pTimesOfSum
-	mov		ebx,	[esp+_ps+8]     ; pBuf
-	mov		edx,	[esp+_ps+12]	; list_sz
-	sar		edx,	2
-	mov		ecx,	0
-	pxor	xmm7,	xmm7
+    push	ebx
+    push	esi
+    push	edi
+    push	ebp
+    %define	_ps	16	; push size
+    mov		edi,	[esp+_ps+16]	; pPositionOfSum
+    mov		ebp,	[esp+_ps+20]	; sum_idx_list
+    mov		esi,	[esp+_ps+4]     ; pTimesOfSum
+    mov		ebx,	[esp+_ps+8]     ; pBuf
+    mov		edx,	[esp+_ps+12]	; list_sz
+    sar		edx,	2
+    mov		ecx,	0
+    pxor	xmm7,	xmm7
 hash_assign_loop_x4_sse2:
-	movdqa	xmm0,	[esi+ecx]
-	pslld	xmm0,	2
+    movdqa	xmm0,	[esi+ecx]
+    pslld	xmm0,	2
 
-	movdqa	xmm1,	xmm0
-	pcmpeqd	xmm1,	xmm7
-	movmskps	eax,	xmm1
+    movdqa	xmm1,	xmm0
+    pcmpeqd	xmm1,	xmm7
+    movmskps	eax,	xmm1
     cmp eax, 0x0f
-	je	near hash_assign_with_copy_sse2
+    je	near hash_assign_with_copy_sse2
 
 %assign x	0
 %rep 4
-	lea		eax,	[edi+ecx+x]
-	mov		[eax],	ebx
-	lea		eax,	[ebp+ecx+x]
-	mov		[eax],	ebx
-	movd	eax,	xmm0
-	add		ebx,	eax
-	psrldq	xmm0,	4
+    lea		eax,	[edi+ecx+x]
+    mov		[eax],	ebx
+    lea		eax,	[ebp+ecx+x]
+    mov		[eax],	ebx
+    movd	eax,	xmm0
+    add		ebx,	eax
+    psrldq	xmm0,	4
 %assign	x	x+4
 %endrep
-	jmp near assign_next_sse2
+    jmp near assign_next_sse2
 
 hash_assign_with_copy_sse2:
-	movd	xmm1,	ebx
-	pshufd	xmm2,	xmm1,	0
-	movdqa	[edi+ecx], xmm2
-	movdqa	[ebp+ecx], xmm2
+    movd	xmm1,	ebx
+    pshufd	xmm2,	xmm1,	0
+    movdqa	[edi+ecx], xmm2
+    movdqa	[ebp+ecx], xmm2
 
 assign_next_sse2:
-	add		ecx,	16
-	dec		edx
-	jnz		near hash_assign_loop_x4_sse2
+    add		ecx,	16
+    dec		edx
+    jnz		near hash_assign_loop_x4_sse2
 
-	mov		edx,	[esp+_ps+12]	; list_sz
-	and		edx,	3
-	jz		near hash_assign_no_rem_sse2
+    mov		edx,	[esp+_ps+12]	; list_sz
+    and		edx,	3
+    jz		near hash_assign_no_rem_sse2
 hash_assign_loop_x4_rem_sse2:
-	lea		eax,	[edi+ecx]
-	mov		[eax],	ebx
-	lea		eax,	[ebp+ecx]
-	mov		[eax],	ebx
-	mov		eax,	[esi+ecx]
-	sal		eax,	2
-	add		ebx,	eax
-	add		ecx,	4
-	dec		edx
-	jnz		near hash_assign_loop_x4_rem_sse2
+    lea		eax,	[edi+ecx]
+    mov		[eax],	ebx
+    lea		eax,	[ebp+ecx]
+    mov		[eax],	ebx
+    mov		eax,	[esi+ecx]
+    sal		eax,	2
+    add		ebx,	eax
+    add		ecx,	4
+    dec		edx
+    jnz		near hash_assign_loop_x4_rem_sse2
 
 hash_assign_no_rem_sse2:
-	%undef	_ps
-	pop		ebp
-	pop		edi
-	pop		esi
-	pop		ebx
-	ret
+    %undef	_ps
+    pop		ebp
+    pop		edi
+    pop		esi
+    pop		ebx
+    ret
 %else
 
 ;**********************************************************************************************************************
@@ -1398,50 +1398,50 @@
     push r13
     mov     r12,    r2
 
-	movq	xmm7,	[mv_x_inc_x4]		; x_qpel inc
-	movq	xmm6,	[mv_y_inc_x4]		; y_qpel inc
-	movq	xmm5,	[mx_x_offset_x4]	; x_qpel vector
-	pxor	xmm4,	xmm4
-	pxor	xmm3,	xmm3				; y_qpel vector
+    movq	xmm7,	[mv_x_inc_x4]		; x_qpel inc
+    movq	xmm6,	[mv_y_inc_x4]		; y_qpel inc
+    movq	xmm5,	[mx_x_offset_x4]	; x_qpel vector
+    pxor	xmm4,	xmm4
+    pxor	xmm3,	xmm3				; y_qpel vector
 HASH_HEIGHT_LOOP_SSE2:
-	movdqa	xmm2,	xmm5	; x_qpel vector
-	mov		r4,	r1
+    movdqa	xmm2,	xmm5	; x_qpel vector
+    mov		r4,	r1
 HASH_WIDTH_LOOP_SSE2:
-	movq	xmm0,	[r0]			; load x8 sum
-	punpcklwd	xmm0,	xmm4
-	movdqa		xmm1,	xmm2
-	punpcklwd	xmm1,	xmm3
+    movq	xmm0,	[r0]			; load x8 sum
+    punpcklwd	xmm0,	xmm4
+    movdqa		xmm1,	xmm2
+    punpcklwd	xmm1,	xmm3
 %rep	3
-	movd	r2d,	xmm0        ;edx:r3
-	lea		r5,     [r3+r2*8]   ;ebx:r5
-	mov		r6,     [r5]        ;eax:r6
-	movd	[r6],	xmm1
-	mov		r13,    [r6+4]	; explictly load eax+4 due cache miss from vtune observation
-	lea		r6,     [r6+4]
-	mov		[r5],	r6
-	psrldq	xmm1,	4
-	psrldq	xmm0,	4
+    movd	r2d,	xmm0        ;edx:r3
+    lea		r5,     [r3+r2*8]   ;ebx:r5
+    mov		r6,     [r5]        ;eax:r6
+    movd	[r6],	xmm1
+    mov		r13,    [r6+4]	; explictly load eax+4 due cache miss from vtune observation
+    lea		r6,     [r6+4]
+    mov		[r5],	r6
+    psrldq	xmm1,	4
+    psrldq	xmm0,	4
 %endrep
-	movd	r2d,	xmm0
-	lea		r5,     [r3+r2*8]   ;ebx:r5
-	mov		r6,     [r5]        ;eax:r6
-	movd	[r6],	xmm1
-	mov		r13,    [r6+4]	; explictly load eax+4 due cache miss from vtune observation
-	lea		r6,     [r6+4]
-	mov		[r5],	r6
+    movd	r2d,	xmm0
+    lea		r5,     [r3+r2*8]   ;ebx:r5
+    mov		r6,     [r5]        ;eax:r6
+    movd	[r6],	xmm1
+    mov		r13,    [r6+4]	; explictly load eax+4 due cache miss from vtune observation
+    lea		r6,     [r6+4]
+    mov		[r5],	r6
 
-	paddw	xmm2,	xmm7
-	lea		r0,     [r0+8]
-	sub		r4,     4
-	jnz near HASH_WIDTH_LOOP_SSE2
-	paddw	xmm3,	xmm6
-	dec	r12
-	jnz	near HASH_HEIGHT_LOOP_SSE2
+    paddw	xmm2,	xmm7
+    lea		r0,     [r0+8]
+    sub		r4,     4
+    jnz near HASH_WIDTH_LOOP_SSE2
+    paddw	xmm3,	xmm6
+    dec	r12
+    jnz	near HASH_HEIGHT_LOOP_SSE2
 
-	pop		r13
-	pop		r12
+    pop		r13
+    pop		r12
     POP_XMM
-	ret
+    ret
 
 ;---------------------------------------------------------------------------------------------------------------------------------------------------
 ; void InitializeHashforFeature_sse2( uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
@@ -1455,59 +1455,59 @@
     push r12
     push r13
     mov     r12,    r2
-	sar		r2,     2
-	mov		r5,     0       ;r5:ecx
+    sar		r2,     2
+    mov		r5,     0       ;r5:ecx
     xor     r6,     r6
-	pxor	xmm3,	xmm3
+    pxor	xmm3,	xmm3
 hash_assign_loop_x4_sse2:
-	movdqa	xmm0,	[r0+r5]
-	pslld	xmm0,	2
+    movdqa	xmm0,	[r0+r5]
+    pslld	xmm0,	2
 
-	movdqa	xmm1,	xmm0
-	pcmpeqd	xmm1,	xmm3
-	movmskps	r6,	xmm1
-    cmp     r6, 	0x0f
-	jz	near hash_assign_with_copy_sse2
+    movdqa	xmm1,	xmm0
+    pcmpeqd	xmm1,	xmm3
+    movmskps	r6,	xmm1
+    cmp     r6,     0x0f
+    jz	near hash_assign_with_copy_sse2
 
 %assign x	0
 %rep 4
-	lea		r13,	[r3+r5*2+x]
-	mov		[r13],	r1
-	lea		r13,	[r4+r5*2+x]
-	mov		[r13],	r1
-	movd	r6d,	xmm0
-	add		r1,     r6
-	psrldq	xmm0,	4
+    lea		r13,	[r3+r5*2+x]
+    mov		[r13],	r1
+    lea		r13,	[r4+r5*2+x]
+    mov		[r13],	r1
+    movd	r6d,	xmm0
+    add		r1,     r6
+    psrldq	xmm0,	4
 %assign	x	x+8
 %endrep
-	jmp near assign_next_sse2
+    jmp near assign_next_sse2
 
 hash_assign_with_copy_sse2:
-	movq	xmm1,	r1
-	pshufd	xmm2,	xmm1,	01000100b
-	movdqa	[r3+r5*2], xmm2
-	movdqa	[r4+r5*2], xmm2
-	movdqa	[r3+r5*2+16], xmm2
-	movdqa	[r4+r5*2+16], xmm2
+    movq	xmm1,	r1
+    pshufd	xmm2,	xmm1,	01000100b
+    movdqa	[r3+r5*2], xmm2
+    movdqa	[r4+r5*2], xmm2
+    movdqa	[r3+r5*2+16], xmm2
+    movdqa	[r4+r5*2+16], xmm2
 
 assign_next_sse2:
-	add		r5,	16
-	dec		r2
-	jnz		near hash_assign_loop_x4_sse2
+    add		r5,	16
+    dec		r2
+    jnz		near hash_assign_loop_x4_sse2
 
-	and		r12,	3
-	jz		near hash_assign_no_rem_sse2
+    and		r12,	3
+    jz		near hash_assign_no_rem_sse2
 hash_assign_loop_x4_rem_sse2:
-	lea		r13,	[r3+r5*2]
-	mov		[r13],	r1
-	lea		r13,	[r4+r5*2]
-	mov		[r13],	r1
-	mov		r6d,	[r0+r5]
-	sal		r6,     2
-	add		r1,     r6
-	add		r5,     4
-	dec		r12
-	jnz		near hash_assign_loop_x4_rem_sse2
+    lea		r13,	[r3+r5*2]
+    mov		[r13],	r1
+    lea		r13,	[r4+r5*2]
+    mov		[r13],	r1
+    mov		r6d,	[r0+r5]
+    sal		r6,     2
+    add		r1,     r6
+    add		r5,     4
+    dec		r12
+    jnz		near hash_assign_loop_x4_rem_sse2
 
 hash_assign_no_rem_sse2:
     pop     r13
--- a/test/encoder/EncUT_SVC_me.cpp
+++ b/test/encoder/EncUT_SVC_me.cpp
@@ -242,9 +242,13 @@
 
 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 10, 10)
 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 16, 16)
+GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_c, 640, 320)
+GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_c, 640, 320)
 #ifdef X86_ASM
 GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 10, 10)
 GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 16, 16)
+GENERATE_InitializeHashforFeature (InitializeHashforFeature_ref, InitializeHashforFeature_sse2, 640, 320)
+GENERATE_FillQpelLocationByFeatureValue (FillQpelLocationByFeatureValue_ref, FillQpelLocationByFeatureValue_sse2, 640, 320)
 #endif
 
 GENERATE_SumOfFrame (SumOf8x8BlockOfFrame_ref, SumOf8x8BlockOfFrame_c, 1, 1)