shithub: libvpx

--- a/test/sad_test.cc

+++ b/test/sad_test.cc

@@ -920,4 +920,13 @@

 INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));

 #endif  // HAVE_MSA

+//------------------------------------------------------------------------------

+// VSX functions

+#if HAVE_VSX

+const SadMxNParam vsx_tests[] = {

+  SadMxNParam(16, 32, &vpx_sad16x32_vsx),

+  SadMxNParam(16, 16, &vpx_sad16x16_vsx), SadMxNParam(16, 8, &vpx_sad16x8_vsx),

+};

+INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests));

+#endif  // HAVE_VSX

 }  // namespace

--- /dev/null

+++ b/vpx_dsp/ppc/sad_vsx.c

@@ -1,0 +1,53 @@

+/*

+ *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <stdlib.h>

+#include "vpx_dsp/ppc/types_vsx.h"

+#include "vpx/vpx_integer.h"

+#define PROCESS16(offset)           \

+  v_a = vec_vsx_ld(offset, a);      \

+  v_b = vec_vsx_ld(offset, b);      \

+  v_ah = unpack_to_s16_h(v_a);      \

+  v_al = unpack_to_s16_l(v_a);      \

+  v_bh = unpack_to_s16_h(v_b);      \

+  v_bl = unpack_to_s16_l(v_b);      \

+  v_subh = vec_sub(v_ah, v_bh);     \

+  v_subl = vec_sub(v_al, v_bl);     \

+  v_absh = vec_abs(v_subh);         \

+  v_absl = vec_abs(v_subl);         \

+  v_sad = vec_sum4s(v_absh, v_sad); \

+  v_sad = vec_sum4s(v_absl, v_sad);

+#define SAD16(height)                                                     \

+  unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride,   \

+                                        const uint8_t *b, int b_stride) { \

+    int y;                                                                \

+    unsigned int sad[4];                                                  \

+    uint8x16_t v_a, v_b;                                                  \

+    int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl;     \

+    int32x4_t v_sad = vec_splat_s32(0);                                   \

+                                                                          \

+    for (y = 0; y < height; y++) {                                        \

+      PROCESS16(0);                                                       \

+                                                                          \

+      a += a_stride;                                                      \

+      b += b_stride;                                                      \

+    }                                                                     \

+    vec_vsx_st((uint32x4_t)v_sad, 0, sad);                                \

+                                                                          \

+    return sad[3] + sad[2] + sad[1] + sad[0];                             \

+  }

+SAD16(8);

+SAD16(16);

+SAD16(32);

--- a/vpx_dsp/vpx_dsp.mk

+++ b/vpx_dsp/vpx_dsp.mk

@@ -311,6 +311,8 @@

 DSP_SRCS-$(HAVE_SSE2)   += x86/sad_sse2.asm

 DSP_SRCS-$(HAVE_SSE2)   += x86/subtract_sse2.asm

+DSP_SRCS-$(HAVE_VSX) += ppc/sad_vsx.c

 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)

 DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm

 DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm

--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl

+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl

@@ -711,13 +711,13 @@

 specialize qw/vpx_sad32x16 avx2 msa sse2/;

 add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";

-specialize qw/vpx_sad16x32 msa sse2/;

+specialize qw/vpx_sad16x32 msa sse2 vsx/;

 add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";

-specialize qw/vpx_sad16x16 neon msa sse2/;

+specialize qw/vpx_sad16x16 neon msa sse2 vsx/;

 add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";

-specialize qw/vpx_sad16x8 neon msa sse2/;

+specialize qw/vpx_sad16x8 neon msa sse2 vsx/;

 add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";

 specialize qw/vpx_sad8x16 neon msa sse2/;