ref: 4529dc848332b78421c04408c8f8fe698ea0c21e
parent: cbec0795c638c677101ee388cc5342a7e5b41fd7
author: Vitaly Buka <[email protected]>
date: Fri Nov 15 05:40:47 EST 2019
Disable -ftrivial-auto-var-init= for hot code This helps to improve some benchmarks by 10%, e.g. decode_time PCFullStackTest.VP9SVC_3SL_Low Bug: 1020220, 977230 Change-Id: Ic992f1eec369f46a08e19eb33bc3a7c15c1e7c87
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -107,7 +107,7 @@
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) { \
- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
h + 7); \
@@ -116,7 +116,7 @@
y_step_q4, w, h); \
} else if (filter_x[2] | filter_x[5]) { \
const int num_taps = is_avg ? 8 : 4; \
- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \
vpx_convolve8_horiz_##opt( \
src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1); \
@@ -124,7 +124,7 @@
dst, dst_stride, filter, x0_q4, \
x_step_q4, y0_q4, y_step_q4, w, h); \
} else { \
- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED); \
vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \
x_step_q4, y0_q4, y_step_q4, w, h + 1); \
vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \
@@ -242,7 +242,7 @@
if (x_step_q4 == 16 && y_step_q4 == 16) { \
if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) || \
filter_x[3] == 128) { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
fdata2, 64, filter, x0_q4, x_step_q4, \
y0_q4, y_step_q4, w, h + 7, bd); \
@@ -251,7 +251,7 @@
y0_q4, y_step_q4, w, h, bd); \
} else if (filter_x[2] | filter_x[5]) { \
const int num_taps = is_avg ? 8 : 4; \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \
vpx_highbd_convolve8_horiz_##opt( \
src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \
filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1, \
@@ -260,7 +260,7 @@
fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter, \
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
} else { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED); \
vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \
x0_q4, x_step_q4, y0_q4, y_step_q4, \
w, h + 1, bd); \
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -51,4 +51,18 @@
#define VPX_WITH_ASAN 0
#endif // __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#if __has_attribute(uninitialized)
+// Attribute disables -ftrivial-auto-var-init=pattern for specific variables.
+// -ftrivial-auto-var-init is security risk mitigation feature, so attribute
+// should not be used "just in case", but only to fix real performance
+// bottlenecks when other approaches do not work. In general compiler is quite
+// effective eleminating unneeded initializations introduced by the flag, e.g.
+// when they are followed by actual initialization by a program.
+// However if compiler optimization fails and code refactoring is hard, the
+// attribute can be used as a workaround.
+#define VPX_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define VPX_UNINITIALIZED
+#endif // __has_attribute(uninitialized)
+
#endif // VPX_VPX_PORTS_MEM_H_