shithub: libvpx

Download patch

ref: 36133b04c0d3f82b16902de2ed57fe58d7c30990
parent: 5be37810d26d224f1ec75ff688d21aeadb863501
author: James Zern <[email protected]>
date: Mon Feb 3 11:57:58 EST 2020

loopfilter_sse2: call unsuffixed lpf functions

this allows calls to use better versions (e.g., avx2) if available. in
most other cases the function pointer will be defined to the sse2
variant if another isn't available. this improves performance at 1080P
by ~2% on a Xeon E5-2690.

Change-Id: Ie9da3a567021f8416651a29b8c9ab9238dc4bdf1

--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -1674,8 +1674,8 @@
   transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
-                                 blimit1, limit1, thresh1);
+  vpx_lpf_horizontal_4_dual(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                            blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
   dst[0] = s - 4;
@@ -1700,7 +1700,7 @@
   transpose(src, pitch, dst, 8, 1);
 
   // Loop filtering
-  vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_8(t_dst + 4 * 8, 8, blimit, limit, thresh);
 
   src[0] = t_dst;
   dst[0] = s - 4;
@@ -1721,8 +1721,8 @@
   transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
-                                 blimit1, limit1, thresh1);
+  vpx_lpf_horizontal_8_dual(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+                            blimit1, limit1, thresh1);
   src[0] = t_dst;
   src[1] = t_dst + 8;
 
@@ -1750,7 +1750,7 @@
   transpose(src, pitch, dst, 8, 2);
 
   // Loop filtering
-  vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_16(t_dst + 8 * 8, 8, blimit, limit, thresh);
 
   src[0] = t_dst;
   src[1] = t_dst + 8 * 8;
@@ -1771,7 +1771,7 @@
   transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16);
 
   // Loop filtering
-  vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
+  vpx_lpf_horizontal_16_dual(t_dst + 8 * 16, 16, blimit, limit, thresh);
 
   // Transpose back
   transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch);