shithub: libvpx

Download patch

ref: 4529dc848332b78421c04408c8f8fe698ea0c21e
parent: cbec0795c638c677101ee388cc5342a7e5b41fd7
author: Vitaly Buka <[email protected]>
date: Fri Nov 15 05:40:47 EST 2019

Disable -ftrivial-auto-var-init= for hot code

This helps to improve some benchmarks by 10%, e.g. decode_time
PCFullStackTest.VP9SVC_3SL_Low

Bug: 1020220, 977230
Change-Id: Ic992f1eec369f46a08e19eb33bc3a7c15c1e7c87

--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -107,7 +107,7 @@
     assert(x_step_q4 == 16);                                                   \
     assert(y_step_q4 == 16);                                                   \
     if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) {               \
-      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                           \
+      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED);         \
       vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64,  \
                                 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
                                 h + 7);                                        \
@@ -116,7 +116,7 @@
                                       y_step_q4, w, h);                        \
     } else if (filter_x[2] | filter_x[5]) {                                    \
       const int num_taps = is_avg ? 8 : 4;                                     \
-      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                           \
+      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED);         \
       vpx_convolve8_horiz_##opt(                                               \
           src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64,       \
           filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1);    \
@@ -124,7 +124,7 @@
                                       dst, dst_stride, filter, x0_q4,          \
                                       x_step_q4, y0_q4, y_step_q4, w, h);      \
     } else {                                                                   \
-      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]);                           \
+      DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED);         \
       vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4,    \
                                 x_step_q4, y0_q4, y_step_q4, w, h + 1);        \
       vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter,     \
@@ -242,7 +242,7 @@
     if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \
       if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) ||           \
           filter_x[3] == 128) {                                                \
-        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
+        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED);      \
         vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride,     \
                                          fdata2, 64, filter, x0_q4, x_step_q4, \
                                          y0_q4, y_step_q4, w, h + 7, bd);      \
@@ -251,7 +251,7 @@
             y0_q4, y_step_q4, w, h, bd);                                       \
       } else if (filter_x[2] | filter_x[5]) {                                  \
         const int num_taps = is_avg ? 8 : 4;                                   \
-        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
+        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED);      \
         vpx_highbd_convolve8_horiz_##opt(                                      \
             src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64,     \
             filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1,   \
@@ -260,7 +260,7 @@
             fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter,     \
             x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);                     \
       } else {                                                                 \
-        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                        \
+        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED);      \
         vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter,  \
                                          x0_q4, x_step_q4, y0_q4, y_step_q4,   \
                                          w, h + 1, bd);                        \
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -51,4 +51,18 @@
 #define VPX_WITH_ASAN 0
 #endif  // __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
 
+#if __has_attribute(uninitialized)
+// Attribute disables -ftrivial-auto-var-init=pattern for specific variables.
+// -ftrivial-auto-var-init is security risk mitigation feature, so attribute
+// should not be used "just in case", but only to fix real performance
+// bottlenecks when other approaches do not work. In general compiler is quite
+// effective eleminating unneeded initializations introduced by the flag, e.g.
+// when they are followed by actual initialization by a program.
+// However if compiler optimization fails and code refactoring is hard, the
+// attribute can be used as a workaround.
+#define VPX_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define VPX_UNINITIALIZED
+#endif  // __has_attribute(uninitialized)
+
 #endif  // VPX_VPX_PORTS_MEM_H_