ref: 683b5a31617f0fc5a1f43b8f46343693774551ea
parent: a3df343cda2b6f3d554138ce5dae831e2f946d0c
author: James Zern <[email protected]>
date: Wed Sep 16 13:33:34 EDT 2015
vpx_subpixel_8t_ssse3: fix reg counts/access fixes build on windows x64; previously 'heightq' i.e., the 64-bit register was accessed when only the 32-bit value was needed. given this is from a stack variable the upper bits were undefined. + bump register/xmm counts; users of SETUP_LOCAL_VARS touch xmm13 in 64-bit builds and filter_block1d16_v* uses one extra temp variable Change-Id: I9c768c0b2047481d1d3b11c2e16b2f8de6eb0d80
--- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -104,7 +104,7 @@
%define k0k1k4k5 m8
%define k2k3k6k7 m9
%define krd m10
- %define orig_height r7
+ %define orig_height r7d
mova krd, [GLOBAL(pw_64)]
pshuflw k0k1k4k5, m4, 0b ;k0_k1
pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
@@ -131,8 +131,8 @@
mova k2k3k6k7, m7
mova krd, m1
%endif
- mov orig_height, heightq
- shr heightq, 1
+ mov orig_height, heightd
+ shr heightd, 1
.loop:
;Do two rows at once
movh m0, [srcq - 3]
@@ -200,12 +200,12 @@
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightq
+ dec heightd
jnz .loop
; Do last row if output_height is odd
- mov heightq, orig_height
- and heightq, 1
+ mov heightd, orig_height
+ and heightd, 1
je .done
movh m0, [srcq - 3] ; load src
@@ -254,17 +254,17 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER8 1
-cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
%if ARCH_X86_64
- %define orig_height r7
+ %define orig_height r7d
%else
%define orig_height heightmp
%endif
- mov orig_height, heightq
- shr heightq, 1
+ mov orig_height, heightd
+ shr heightd, 1
.loop:
movh m0, [srcq - 3]
@@ -336,12 +336,12 @@
lea srcq, [srcq + sstrideq ]
lea dstq, [dstq + 2 * dstrideq ]
prefetcht0 [srcq + 2 * sstrideq - 3]
- dec heightq
+ dec heightd
jnz .loop
;Do last row if output_height is odd
- mov heightq, orig_height
- and heightq, 1
+ mov heightd, orig_height
+ and heightd, 1
je .done
movh m0, [srcq - 3]
@@ -361,7 +361,7 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_HFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
SETUP_LOCAL_VARS
@@ -427,7 +427,7 @@
lea srcq, [srcq + sstrideq]
mova [dstq], m0
lea dstq, [dstq + dstrideq]
- dec heightq
+ dec heightd
jnz .loop
RET
%endm
@@ -527,11 +527,11 @@
%endif
movx [dstq], m1
add dstq, dst_stride
- sub heightq, 2
- cmp heightq, 1
+ sub heightd, 2
+ cmp heightd, 1
jg .loop
- cmp heightq, 0
+ cmp heightd, 0
je .done
movx m0, [srcq ] ;A
@@ -570,7 +570,7 @@
;-------------------------------------------------------------------------------
%macro SUBPIX_VFILTER16 1
-cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
@@ -655,7 +655,7 @@
%endif
movh [dstq + 8], m3
add dstq, dst_stride
- dec heightq
+ dec heightd
jnz .loop
RET
%endm