ref: 9d901217c689c1c5b38eb7cfe7db7f161827eb7a
parent: 84758960db7e18cd77e301ab4a1dc6022e1a3335
author: Yunqing Wang <[email protected]>
date: Wed Sep 18 06:36:21 EDT 2013
Fix x86inc.asm to build PIC code correctly Current x86inc.asm didn't handle 32bit PIC build properly. TEXTRELs were seen in the library built. The PIC macros from libvpx's x86_abi_support.asm was used to fix this problem. The assembly code was modified to use the macros. Notes: We need this fix in for decoder building. Functions in encoder will be fixed later. Change-Id: Ifa548d37b1d0bc7d0528db75009cc18cd5eb1838
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -97,21 +97,91 @@
%endif
%endmacro
-%if WIN64
+; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
+; from original code is added in for 64bit.
+%ifidn __OUTPUT_FORMAT__,elf32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,macho32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,win32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,aout
+%define ABI_IS_32BIT 1
+%else
+%define ABI_IS_32BIT 0
+%endif
+
+%if ABI_IS_32BIT
+ %if CONFIG_PIC=1
+ %ifidn __OUTPUT_FORMAT__,elf32
+ %define GET_GOT_SAVE_ARG 1
+ %define WRT_PLT wrt ..plt
+ %macro GET_GOT 1
+ extern _GLOBAL_OFFSET_TABLE_
+ push %1
+ call %%get_got
+ %%sub_offset:
+ jmp %%exitGG
+ %%get_got:
+ mov %1, [esp]
+ add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
+ ret
+ %%exitGG:
+ %undef GLOBAL
+ %define GLOBAL(x) x + %1 wrt ..gotoff
+ %undef RESTORE_GOT
+ %define RESTORE_GOT pop %1
+ %endmacro
+ %elifidn __OUTPUT_FORMAT__,macho32
+ %define GET_GOT_SAVE_ARG 1
+ %macro GET_GOT 1
+ push %1
+ call %%get_got
+ %%get_got:
+ pop %1
+ %undef GLOBAL
+ %define GLOBAL(x) x + %1 - %%get_got
+ %undef RESTORE_GOT
+ %define RESTORE_GOT pop %1
+ %endmacro
+ %endif
+ %endif
+
+ %if ARCH_X86_64 == 0
+ %undef PIC
+ %endif
+
+%else
+ %macro GET_GOT 1
+ %endmacro
+ %define GLOBAL(x) rel x
+ %define WRT_PLT wrt ..plt
+
+ %if WIN64
%define PIC
-%elifidn __OUTPUT_FORMAT__,macho64
+ %elifidn __OUTPUT_FORMAT__,macho64
%define PIC
-%elif ARCH_X86_64 == 0
-; x86_32 doesn't require PIC.
-; Some distros prefer shared objects to be PIC, but nothing breaks if
-; the code contains a few textrels, so we'll skip that complexity.
- %undef PIC
-%elif CONFIG_PIC
+ %elif CONFIG_PIC
%define PIC
+ %endif
%endif
+
+%ifnmacro GET_GOT
+ %macro GET_GOT 1
+ %endmacro
+ %define GLOBAL(x) x
+%endif
+%ifndef RESTORE_GOT
+%define RESTORE_GOT
+%endif
+%ifndef WRT_PLT
+%define WRT_PLT
+%endif
+
%ifdef PIC
default rel
%endif
+; Done with PIC macros
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
%ifndef __NASM_VER__
--- a/vp9/common/x86/vp9_intrapred_sse2.asm
+++ b/vp9/common/x86/vp9_intrapred_sse2.asm
@@ -19,12 +19,14 @@
SECTION .text
INIT_MMX sse
-cglobal dc_predictor_4x4, 4, 4, 2, dst, stride, above, left
+cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
pxor m1, m1
movd m0, [aboveq]
punpckldq m0, [leftq]
psadbw m0, m1
- paddw m0, [pw_4]
+ paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
pshufw m0, m0, 0x0
packuswb m0, m0
@@ -33,10 +35,14 @@
lea dstq, [dstq+strideq*2]
movd [dstq ], m0
movd [dstq+strideq], m0
+
+ RESTORE_GOT
RET
INIT_MMX sse
-cglobal dc_predictor_8x8, 4, 4, 3, dst, stride, above, left
+cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
pxor m1, m1
movq m0, [aboveq]
movq m2, [leftq]
@@ -45,7 +51,7 @@
psadbw m0, m1
psadbw m2, m1
paddw m0, m2
- paddw m0, [pw_8]
+ paddw m0, [GLOBAL(pw_8)]
psraw m0, 4
pshufw m0, m0, 0x0
packuswb m0, m0
@@ -58,10 +64,14 @@
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
RET
INIT_XMM sse2
-cglobal dc_predictor_16x16, 4, 4, 3, dst, stride, above, left
+cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
pxor m1, m1
mova m0, [aboveq]
mova m2, [leftq]
@@ -73,7 +83,7 @@
paddw m0, m2
movhlps m2, m0
paddw m0, m2
- paddw m0, [pw_16]
+ paddw m0, [GLOBAL(pw_16)]
psraw m0, 5
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
@@ -86,10 +96,14 @@
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
+
+ RESTORE_GOT
REP_RET
INIT_XMM sse2
-cglobal dc_predictor_32x32, 4, 4, 5, dst, stride, above, left
+cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
pxor m1, m1
mova m0, [aboveq]
mova m2, [aboveq+16]
@@ -107,7 +121,7 @@
paddw m0, m4
movhlps m2, m0
paddw m0, m2
- paddw m0, [pw_32]
+ paddw m0, [GLOBAL(pw_32)]
psraw m0, 6
pshuflw m0, m0, 0x0
punpcklqdq m0, m0
@@ -124,6 +138,8 @@
lea dstq, [dstq+strideq*4]
dec lines4d
jnz .loop
+
+ RESTORE_GOT
REP_RET
INIT_MMX sse
--- a/vp9/common/x86/vp9_intrapred_ssse3.asm
+++ b/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -112,14 +112,16 @@
REP_RET
INIT_MMX ssse3
-cglobal d45_predictor_4x4, 3, 3, 4, dst, stride, above
+cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
+ GET_GOT goffsetq
+
movq m0, [aboveq]
- pshufb m2, m0, [sh_b23456777]
- pshufb m1, m0, [sh_b01234577]
- pshufb m0, [sh_b12345677]
+ pshufb m2, m0, [GLOBAL(sh_b23456777)]
+ pshufb m1, m0, [GLOBAL(sh_b01234577)]
+ pshufb m0, [GLOBAL(sh_b12345677)]
pavgb m3, m2, m1
pxor m2, m1
- pand m2, [pb_1]
+ pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@@ -132,19 +134,23 @@
movd [dstq ], m0
psrlq m0, 8
movd [dstq+strideq], m0
+
+ RESTORE_GOT
RET
INIT_MMX ssse3
-cglobal d45_predictor_8x8, 3, 3, 4, dst, stride, above
+cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
+ GET_GOT goffsetq
+
movq m0, [aboveq]
- mova m1, [sh_b12345677]
- DEFINE_ARGS dst, stride, stride3, line
+ mova m1, [GLOBAL(sh_b12345677)]
+ DEFINE_ARGS dst, stride, stride3
lea stride3q, [strideq*3]
- pshufb m2, m0, [sh_b23456777]
+ pshufb m2, m0, [GLOBAL(sh_b23456777)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
- pand m2, [pb_1]
+ pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@@ -167,20 +173,24 @@
movq [dstq+strideq*2], m0
pshufb m0, m1
movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
RET
INIT_XMM ssse3
-cglobal d45_predictor_16x16, 3, 5, 4, dst, stride, above, dst8, line
+cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
+ GET_GOT goffsetq
+
mova m0, [aboveq]
DEFINE_ARGS dst, stride, stride3, dst8, line
lea stride3q, [strideq*3]
lea dst8q, [dstq+strideq*8]
- mova m1, [sh_b123456789abcdeff]
- pshufb m2, m0, [sh_b23456789abcdefff]
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m0
pxor m2, m0
pshufb m0, m1
- pand m2, [pb_1]
+ pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m0, m3
@@ -214,10 +224,14 @@
movhps [dstq+strideq +8], m0
movhps [dstq+strideq*2+8], m0
movhps [dstq+stride3q +8], m0
+
+ RESTORE_GOT
RET
INIT_XMM ssse3
-cglobal d45_predictor_32x32, 3, 5, 7, dst, stride, above, dst16, line
+cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset
+ GET_GOT goffsetq
+
mova m0, [aboveq]
mova m4, [aboveq+16]
DEFINE_ARGS dst, stride, stride3, dst16, line
@@ -224,19 +238,19 @@
lea stride3q, [strideq*3]
lea dst16q, [dstq +strideq*8]
lea dst16q, [dst16q+strideq*8]
- mova m1, [sh_b123456789abcdeff]
- pshufb m2, m4, [sh_b23456789abcdefff]
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)]
pavgb m3, m2, m4
pxor m2, m4
palignr m5, m4, m0, 1
palignr m6, m4, m0, 2
pshufb m4, m1
- pand m2, [pb_1]
+ pand m2, [GLOBAL(pb_1)]
psubb m3, m2
pavgb m4, m3
pavgb m3, m0, m6
pxor m0, m6
- pand m0, [pb_1]
+ pand m0, [GLOBAL(pb_1)]
psubb m3, m0
pavgb m5, m3
@@ -288,4 +302,6 @@
mova [dstq +strideq +16], m4
mova [dstq +strideq*2+16], m4
mova [dstq +stride3q +16], m4
+
+ RESTORE_GOT
RET