shithub: libvpx

Download patch

ref: 6da6a2329132e7173fe00a76e8feb578d4031164
parent: d6eeef9ee6324af69a9fb19b1c507c29700ac28f
author: Linfeng Zhang <[email protected]>
date: Tue Jun 13 12:53:53 EDT 2017

Update high bitdepth load_input_data() in x86

BUG=webm:1412

Change-Id: Ibf9d120b80c7d3a7637e79e123cf2f0aae6dd78c

--- a/vpx_dsp/x86/inv_txfm_sse2.h
+++ b/vpx_dsp/x86/inv_txfm_sse2.h
@@ -80,8 +80,20 @@
 // highbitdepth enabled
 static INLINE __m128i load_input_data(const tran_low_t *data) {
 #if CONFIG_VP9_HIGHBITDEPTH
-  return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5],
-                        data[6], data[7]);
+  // in0: 0 X 1 X  2 X 3 X
+  // in1: 4 X 5 X  6 X 7 X
+  // t0:  0 4 X X  1 5 X X
+  // t1:  2 6 X X  3 7 X X
+  // t2:  0 2 4 6  X X X X
+  // t3:  1 3 5 7  X X X X
+  // rtn: 0 1 2 3  4 5 6 7
+  const __m128i in0 = _mm_load_si128((const __m128i *)data);
+  const __m128i in1 = _mm_load_si128((const __m128i *)(data + 4));
+  const __m128i t0 = _mm_unpacklo_epi16(in0, in1);
+  const __m128i t1 = _mm_unpackhi_epi16(in0, in1);
+  const __m128i t2 = _mm_unpacklo_epi16(t0, t1);
+  const __m128i t3 = _mm_unpackhi_epi16(t0, t1);
+  return _mm_unpacklo_epi16(t2, t3);
 #else
   return _mm_load_si128((const __m128i *)data);
 #endif