ref: f8d744d91a20c3bc80e82cf5f3aa7e78fe73d164
parent: 2ba383474dd0f9a2f4abc4d0ef3af434153cdbeb
author: Johann <[email protected]>
date: Mon Jan 30 11:03:05 EST 2017
satd highbd neon: use tran_low_t for coeff BUG=webm:1365 Change-Id: I43521ad32b6c96737a8ef2b8c327f901fd7eaf84
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -379,15 +379,11 @@
make_tuple(64, &vpx_int_pro_col_neon,
&vpx_int_pro_col_c)));
-// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
::testing::Values(make_tuple(16, &vpx_satd_neon),
make_tuple(64, &vpx_satd_neon),
make_tuple(256, &vpx_satd_neon),
make_tuple(1024, &vpx_satd_neon)));
-#endif // !CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
#if HAVE_MSA
@@ -414,6 +410,8 @@
make_tuple(64, &vpx_int_pro_col_msa,
&vpx_int_pro_col_c)));
+// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
+// in place.
#if !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(MSA, SatdTest,
::testing::Values(make_tuple(16, &vpx_satd_msa),
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/idct_neon.h"
static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
const uint32x4_t a = vpaddlq_u16(v_16x8);
@@ -64,13 +65,13 @@
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
-int vpx_satd_neon(const int16_t *coeff, int length) {
+int vpx_satd_neon(const tran_low_t *coeff, int length) {
const int16x4_t zero = vdup_n_s16(0);
int32x4_t accum = vdupq_n_s32(0);
do {
- const int16x8_t src0 = vld1q_s16(coeff);
- const int16x8_t src8 = vld1q_s16(coeff + 8);
+ const int16x8_t src0 = load_tran_low_to_s16q(coeff);
+ const int16x8_t src8 = load_tran_low_to_s16q(coeff + 8);
accum = vabal_s16(accum, vget_low_s16(src0), zero);
accum = vabal_s16(accum, vget_high_s16(src0), zero);
accum = vabal_s16(accum, vget_low_s16(src8), zero);
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -894,7 +894,7 @@
specialize qw/vpx_hadamard_16x16/;
add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
- specialize qw/vpx_satd sse2/;
+ specialize qw/vpx_satd sse2 neon/;
} else {
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa/, "$ssse3_x86_64";