shithub: libvpx

Download patch

ref: 12a14913947b510514746389319b49a188a53579
parent: abc7105acdfbbeaeecf41c675148683a1cb8b4f7
author: James Zern <[email protected]>
date: Tue May 4 08:13:17 EDT 2021

vp9_denoiser_neon,horizontal_add_s8x16: use vaddlv w/aarch64

this reduces the number of instructions to compute the sum

Change-Id: Icae4d4fb3e343d5b6e5a095c60ac6d171b3e7d54

--- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c
+++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c
@@ -21,6 +21,9 @@
 
 // Compute the sum of all pixel differences of this MB.
 static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
+#if defined(__aarch64__)
+  return vaddlvq_s8(v_sum_diff_total);
+#else
   const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);
   const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
   const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210);
@@ -28,6 +31,7 @@
                                 vget_low_s64(fedcba98_76543210));
   const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0);
   return sum_diff;
+#endif
 }
 
 // Denoise a 16x1 vector.