ref: 1f736e400f310ebed2c8f1c9086bf58c0a090bbd
parent: a0278cad3f352e4d878f59ecf414f0af2badae8b
author: Scott LaVarnway <[email protected]>
date: Thu Feb 25 01:15:24 EST 2016
VPX: vpx_filter_block1d16_(v8, v8_avg) Store result with one 16 byte store instead of two 8 byte stores. Change-Id: I43acbc5edfd6d6055a926f9b9605d47127400f09
--- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -572,7 +572,6 @@
%macro SUBPIX_VFILTER16 1
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
-
mova m4, [filterq]
SETUP_LOCAL_VARS
%if ARCH_X86_64
@@ -630,13 +629,10 @@
movh m5, [src1q + sstride6q + 8] ;H
psraw m0, 7
punpcklbw m2, m5 ;G H
- packuswb m0, m0
pmaddubsw m2, k6k7
%ifidn %1, v8_avg
- movh m4, [dstq]
- pavgb m0, m4
+ mova m4, [dstq]
%endif
- movh [dstq], m0
mova m6, m7
pmaxsw m7, m1
pminsw m1, m6
@@ -645,15 +641,14 @@
paddsw m3, m7
paddsw m3, krd
psraw m3, 7
- packuswb m3, m3
+ packuswb m0, m3
add srcq, sstrideq
add src1q, sstrideq
%ifidn %1, v8_avg
- movh m1, [dstq + 8]
- pavgb m3, m1
+ pavgb m0, m4
%endif
- movh [dstq + 8], m3
+ mova [dstq], m0
add dstq, dst_stride
dec heightd
jnz .loop