ref: cfe6fa98f72cae9587152591ffd52035e71b9562
parent: aa86438a0d5f7e24506b3bc66603eefe3cf26096
parent: 849b63ffe1723e4e3c021c821cbd50db7cb5211d
author: James Zern <[email protected]>
date: Thu Oct 24 02:36:38 EDT 2019
Merge "vpx_int_pro_col_sse2: use unaligned loads"
--- a/vpx_dsp/x86/avg_intrin_sse2.c
+++ b/vpx_dsp/x86/avg_intrin_sse2.c
@@ -515,7 +515,7 @@
int16_t vpx_int_pro_col_sse2(const uint8_t *ref, const int width) {
__m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
+ __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
@@ -522,7 +522,7 @@
for (i = 16; i < width; i += 16) {
ref += 16;
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
s1 = _mm_sad_epu8(src_line, zero);
s0 = _mm_adds_epu16(s0, s1);
}