ref: 17eed992af381d05e0a3af811aa4320d5b2cbbda
parent: f76daa92ad831c5ca179d2ec16ac9d7996874b51
author: Sindre Aamås <[email protected]>
date: Tue Mar 7 09:30:04 EST 2017
[Encoder/x86] Support X86_32_PICASM in coeff routines In order for program counter-relative offsets to work with nasm, data constants are placed in the text segment. Enable previously disabled routines.
--- a/codec/encoder/core/inc/set_mb_syn_cavlc.h
+++ b/codec/encoder/core/inc/set_mb_syn_cavlc.h
@@ -78,12 +78,10 @@
int32_t CavlcParamCal_c (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs ,
int32_t iEndIdx);
#ifdef X86_ASM
-#ifndef X86_32_PICASM
int32_t CavlcParamCal_sse2 (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs ,
int32_t iEndIdx);
int32_t CavlcParamCal_sse42 (int16_t* pCoffLevel, uint8_t* pRun, int16_t* pLevel, int32_t* pTotalCoeffs ,
int32_t iEndIdx);
-#endif
#endif
#if defined(__cplusplus)
--- a/codec/encoder/core/src/set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/set_mb_syn_cavlc.cpp
@@ -291,19 +291,15 @@
pFuncList->pfCavlcParamCal = CavlcParamCal_c;
#if defined(X86_32_ASM)
-#ifndef X86_32_PICASM
if (uiCpuFlag & WELS_CPU_SSE2) {
pFuncList->pfCavlcParamCal = CavlcParamCal_sse2;
}
#endif
-#endif
#ifdef X86_ASM
-#ifndef X86_32_PICASM
if (uiCpuFlag & WELS_CPU_SSE42) {
pFuncList->pfCavlcParamCal = CavlcParamCal_sse42;
}
-#endif
#endif
if (iEntropyCodingModeFlag) {
pFuncList->pfStashMBStatus = StashMBStatusCabac;
--- a/codec/encoder/core/x86/coeff.asm
+++ b/codec/encoder/core/x86/coeff.asm
@@ -42,7 +42,11 @@
%include "asm_inc.asm"
+%ifdef X86_32_PICASM
+SECTION .text align=16
+%else
SECTION .rodata align=16
+%endif
align 16
@@ -369,7 +373,6 @@
%ifdef X86_32
-%ifndef X86_32_PICASM
;***********************************************************************
;int32_t CavlcParamCal_sse2(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx);
;***********************************************************************
@@ -377,10 +380,12 @@
push ebx
push edi
push esi
+ %assign push_num 3
+ INIT_X86_32_PIC ebp
- mov eax, [esp+16] ;coffLevel
- mov edi, [esp+24] ;Level
- mov ebx, [esp+32] ;endIdx
+ mov eax, arg1 ;coffLevel
+ mov edi, arg3 ;Level
+ mov ebx, arg5 ;endIdx
cmp ebx, 3
jne .Level16
pxor xmm1, xmm1
@@ -400,7 +405,7 @@
pmovmskb edx, xmm0
cmp edx, 0
je near .return
- movdqa xmm6, [sse2_b_1]
+ movdqa xmm6, [pic(sse2_b_1)]
pcmpeqw xmm7, xmm7 ;generate -1
mov ebx, 0xff
;pinsrw xmm6, ebx, 3
@@ -407,7 +412,7 @@
mov bl, dh
- lea ebx, [byte_1pos_table+8*ebx]
+ lea ebx, [pic(byte_1pos_table+8*ebx)]
movq xmm0, [ebx]
pextrw ecx, xmm0, 3
shr ecx, 8
@@ -438,7 +443,7 @@
add edi, 2
.LowByteFind0:
and edx, 0xff
- lea ebx, [byte_1pos_table+8*edx]
+ lea ebx, [pic(byte_1pos_table+8*edx)]
movq xmm1, [ebx]
pextrw esi, xmm1, 3
or esi, 0xff
@@ -466,7 +471,7 @@
mov edx, [eax]
mov [edi], dx
.getLevelEnd:
- mov edx, [esp+28] ;total_coeffs
+ mov edx, arg4 ;total_coeffs
;mov ebx, ecx
;and ebx, 0xff
movzx ebx, byte cl
@@ -473,7 +478,7 @@
add cl, ch
mov [edx], cl
;getRun
- movq xmm5, [sse2_b8]
+ movq xmm5, [pic(sse2_b8)]
paddb xmm0, xmm5
pxor xmm2, xmm2
pxor xmm3, xmm3
@@ -499,18 +504,17 @@
paddb xmm1, xmm7
psrldq xmm0, 1
psubb xmm1, xmm0
- mov ecx, [esp+20] ;run
+ mov ecx, arg2 ;run
movdqa [ecx], xmm1
;getRunEnd
.return:
+ DEINIT_X86_32_PIC
pop esi
pop edi
pop ebx
ret
-%endif ;%ifndef X86_32_PICASM
%endif ;%ifdef X86_32
-%ifndef X86_32_PICASM
;***********************************************************************
;int32_t CavlcParamCal_sse42(int16_t*coffLevel, uint8_t* run, int16_t *Level, int32_t* total_coeffs , int32_t endIdx);
;***********************************************************************
@@ -524,17 +528,21 @@
push r5
push r6
%assign push_num 4
+%ifdef X86_32_PICASM
+ %define p_total_coeffs r1
+%else
%define p_total_coeffs r0
+%endif
%define r_tmp r1
%define r_tmpd r1d
%define r_tmpb r1b
%define p_level r2
%define p_coeff_level r3
+ %define p_run r6
%define r_mask r5
%define r_maskd r5d
- %define p_run r6
- %define p_shufb_lut wels_cavlc_param_cal_shufb_lut
- %define p_run_lut wels_cavlc_param_cal_run_lut
+ %define p_shufb_lut pic(wels_cavlc_param_cal_shufb_lut)
+ %define p_run_lut pic(wels_cavlc_param_cal_run_lut)
mov p_coeff_level, arg1
mov p_run, arg2
mov p_level, arg3
@@ -571,6 +579,7 @@
%define p_run_lut (p_shufb_lut + (wels_cavlc_param_cal_run_lut - wels_cavlc_param_cal_shufb_lut))
lea p_shufb_lut, [wels_cavlc_param_cal_shufb_lut]
%endif
+ INIT_X86_32_PIC_NOPRESERVE r0
; Acquire a bitmask indicating which words are non-zero.
; Assume p_coeff_level is 16-byte-aligned and at least 32 bytes if endIdx > 3.
@@ -588,7 +597,7 @@
.load_done:
movdqa [p_run], xmm1 ; Zero-initialize because we may read back implied zeros.
pcmpeqb xmm0, xmm1
- pshufb xmm0, [wels_shufb_rev]
+ pshufb xmm0, [pic(wels_shufb_rev)]
pmovmskb r_maskd, xmm0
xor r_maskd, 0FFFFh
%undef i_endidxd
@@ -605,12 +614,18 @@
%xdefine i_total_zeros p_total_coeffs
%endif
%undef p_total_coeffs
+%ifdef X86_32_PICASM
+ push r_tmp2
+ %undef i_total_zeros
+ %define i_total_zeros dword [esp]
+%else
mov i_total_zeros, r_tmp2
+%endif
jz .done
- mov i_total_zeros, 16
- sub i_total_zeros, r_tmp2
bsf r_tmpd, r_maskd ; Find first set bit.
- sub i_total_zeros, r_tmp
+ lea r_tmp2, [r_tmp2 + r_tmp - 16]
+ neg r_tmp2
+ mov i_total_zeros, r_tmp2
; Skip trailing zeros.
; Restrict to multiples of 4 to retain alignment and avoid out-of-bound stores.
and r_tmpd, -4
@@ -649,8 +664,13 @@
jnz .loop
.done:
%ifnidni retrq, i_total_zeros
+ %ifdef X86_32_PICASM
+ pop retrq
+ %else
mov retrq, i_total_zeros
+ %endif
%endif
+ DEINIT_X86_32_PIC
%ifdef X86_32
pop r6
pop r5
@@ -673,5 +693,3 @@
%undef r_tmp2d
%undef p_shufb_lut
%undef p_run_lut
-
-%endif ;ifndef X86_32_PICASM
--- a/test/encoder/EncUT_Cavlc.cpp
+++ b/test/encoder/EncUT_Cavlc.cpp
@@ -77,18 +77,14 @@
}
#ifdef X86_32_ASM
-#ifndef X86_32_PICASM
TEST (CavlcTest, CavlcParamCal_sse2) {
TestCavlcParamCal (CavlcParamCal_sse2);
}
#endif
-#endif
#ifdef X86_ASM
-#ifndef X86_32_PICASM
TEST (CavlcTest, CavlcParamCal_sse42) {
if (WelsCPUFeatureDetect (0) & WELS_CPU_SSE42)
TestCavlcParamCal (CavlcParamCal_sse42);
}
-#endif
#endif