shithub: libvpx

--- a/vpx_dsp/ppc/sad_vsx.c

+++ b/vpx_dsp/ppc/sad_vsx.c

@@ -17,19 +17,11 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_ports/mem.h"

-#define PROCESS16(offset)           \

-  v_a = vec_vsx_ld(offset, a);      \

-  v_b = vec_vsx_ld(offset, b);      \

-  v_ah = unpack_to_s16_h(v_a);      \

-  v_al = unpack_to_s16_l(v_a);      \

-  v_bh = unpack_to_s16_h(v_b);      \

-  v_bl = unpack_to_s16_l(v_b);      \

-  v_subh = vec_sub(v_ah, v_bh);     \

-  v_subl = vec_sub(v_al, v_bl);     \

-  v_absh = vec_abs(v_subh);         \

-  v_absl = vec_abs(v_subl);         \

-  v_sad = vec_sum4s(v_absh, v_sad); \

-  v_sad = vec_sum4s(v_absl, v_sad);

+#define PROCESS16(offset)                                \

+  v_a = vec_vsx_ld(offset, a);                           \

+  v_b = vec_vsx_ld(offset, b);                           \

+  v_abs = vec_sub(vec_max(v_a, v_b), vec_min(v_a, v_b)); \

+  v_sad = vec_sum4s(v_abs, v_sad);

 #define SAD8(height)                                                     \

   unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride,   \

@@ -39,12 +31,8 @@

     uint32x4_t v_sad = vec_zeros_u32;                                    \

     do {                                                                 \

-      v_a = vec_vsx_ld(0, a);                                            \

-      v_b = vec_vsx_ld(0, b);                                            \

+      PROCESS16(0)                                                       \

-      v_abs = vec_sub(vec_max(v_a, v_b), vec_min(v_a, v_b));             \

-      v_sad = vec_sum4s(v_abs, v_sad);                                   \

-                                                                         \

       a += a_stride;                                                     \

       b += b_stride;                                                     \

       y++;                                                               \

@@ -56,54 +44,48 @@

 #define SAD16(height)                                                     \

   unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride,   \

                                         const uint8_t *b, int b_stride) { \

-    int y;                                                                \

-    unsigned int sad[4];                                                  \

-    uint8x16_t v_a, v_b;                                                  \

-    int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl;     \

-    int32x4_t v_sad = vec_splat_s32(0);                                   \

+    int y = 0;                                                            \

+    uint8x16_t v_a, v_b, v_abs;                                           \

+    uint32x4_t v_sad = vec_zeros_u32;                                     \

-    for (y = 0; y < height; y++) {                                        \

+    do {                                                                  \

       PROCESS16(0);                                                       \

       a += a_stride;                                                      \

       b += b_stride;                                                      \

-    }                                                                     \

-    vec_vsx_st((uint32x4_t)v_sad, 0, sad);                                \

+      y++;                                                                \

+    } while (y < height);                                                 \

-    return sad[3] + sad[2] + sad[1] + sad[0];                             \

+    return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0];                     \

 #define SAD32(height)                                                     \

   unsigned int vpx_sad32x##height##_vsx(const uint8_t *a, int a_stride,   \

                                         const uint8_t *b, int b_stride) { \

-    int y;                                                                \

-    unsigned int sad[4];                                                  \

-    uint8x16_t v_a, v_b;                                                  \

-    int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl;     \

-    int32x4_t v_sad = vec_splat_s32(0);                                   \

+    int y = 0;                                                            \

+    uint8x16_t v_a, v_b, v_abs;                                           \

+    uint32x4_t v_sad = vec_zeros_u32;                                     \

-    for (y = 0; y < height; y++) {                                        \

+    do {                                                                  \

       PROCESS16(0);                                                       \

       PROCESS16(16);                                                      \

       a += a_stride;                                                      \

       b += b_stride;                                                      \

-    }                                                                     \

-    vec_vsx_st((uint32x4_t)v_sad, 0, sad);                                \

+      y++;                                                                \

+    } while (y < height);                                                 \

-    return sad[3] + sad[2] + sad[1] + sad[0];                             \

+    return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0];                     \

 #define SAD64(height)                                                     \

   unsigned int vpx_sad64x##height##_vsx(const uint8_t *a, int a_stride,   \

                                         const uint8_t *b, int b_stride) { \

-    int y;                                                                \

-    unsigned int sad[4];                                                  \

-    uint8x16_t v_a, v_b;                                                  \

-    int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl;     \

-    int32x4_t v_sad = vec_splat_s32(0);                                   \

+    int y = 0;                                                            \

+    uint8x16_t v_a, v_b, v_abs;                                           \

+    uint32x4_t v_sad = vec_zeros_u32;                                     \

-    for (y = 0; y < height; y++) {                                        \

+    do {                                                                  \

       PROCESS16(0);                                                       \

       PROCESS16(16);                                                      \

       PROCESS16(32);                                                      \

@@ -111,10 +93,10 @@

       a += a_stride;                                                      \

       b += b_stride;                                                      \

-    }                                                                     \

-    vec_vsx_st((uint32x4_t)v_sad, 0, sad);                                \

+      y++;                                                                \

+    } while (y < height);                                                 \

-    return sad[3] + sad[2] + sad[1] + sad[0];                             \

+    return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0];                     \

 SAD8(4);