shithub: libvpx

--- a/vpx_dsp/arm/vpx_convolve8_avg_neon.c

+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c

@@ -9,6 +9,7 @@

*/

 #include <arm_neon.h>

+#include <assert.h>

 #include "./vpx_config.h"

 #include "./vpx_dsp_rtcd.h"

@@ -15,17 +16,6 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_ports/mem.h"

-void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

-                               uint8_t *dst, ptrdiff_t dst_stride,

-                               const int16_t *filter_x, int x_step_q4,

-                               const int16_t *filter_y, int y_step_q4,

-                               int w, int h);

-void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

-                               uint8_t *dst, ptrdiff_t dst_stride,

-                               const int16_t *filter_x, int x_step_q4,

-                               const int16_t *filter_y, int y_step_q4,

-                               int w, int h);

 static INLINE int32x4_t MULTIPLY_BY_Q0(

     int16x4_t dsrc0,

     int16x4_t dsrc1,

@@ -82,12 +72,7 @@

   uint16x4x2_t d0x2u16, d1x2u16;

   uint32x4x2_t q0x2u32;

-  if (x_step_q4 != 16) {

-    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,

-                              filter_x, x_step_q4,

-                              filter_y, y_step_q4, w, h);

-    return;

-}

+  assert(x_step_q4 == 16);

   q0s16 = vld1q_s16(filter_x);

@@ -271,12 +256,7 @@

   uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;

   int32x4_t q1s32, q2s32, q14s32, q15s32;

-  if (y_step_q4 != 16) {

-    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,

-                             filter_x, x_step_q4,

-                             filter_y, y_step_q4, w, h);

-    return;

-  }

+  assert(y_step_q4 == 16);

   src -= src_stride * 3;

   q0s16 = vld1q_s16(filter_y);

--- a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm

+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm

@@ -19,8 +19,6 @@

     EXPORT  |vpx_convolve8_avg_horiz_neon|

     EXPORT  |vpx_convolve8_avg_vert_neon|

-    IMPORT  |vpx_convolve8_avg_horiz_c|

-    IMPORT  |vpx_convolve8_avg_vert_c|

ARM

     REQUIRE8

     PRESERVE8

@@ -52,10 +50,6 @@

 ; sp[]int h

 |vpx_convolve8_avg_horiz_neon| PROC

-    ldr             r12, [sp, #4]           ; x_step_q4

-    cmp             r12, #16

-    bne             vpx_convolve8_avg_horiz_c

     push            {r4-r10, lr}

     sub             r0, r0, #3              ; adjust for taps

@@ -184,10 +178,6 @@

     ENDP

 |vpx_convolve8_avg_vert_neon| PROC

-    ldr             r12, [sp, #12]

-    cmp             r12, #16

-    bne             vpx_convolve8_avg_vert_c

     push            {r4-r8, lr}

     ; adjust for taps

--- a/vpx_dsp/arm/vpx_convolve8_neon.c

+++ b/vpx_dsp/arm/vpx_convolve8_neon.c

@@ -9,6 +9,7 @@

*/

 #include <arm_neon.h>

+#include <assert.h>

 #include "./vpx_config.h"

 #include "./vpx_dsp_rtcd.h"

@@ -15,17 +16,6 @@

 #include "vpx/vpx_integer.h"

 #include "vpx_ports/mem.h"

-void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

-                           uint8_t *dst, ptrdiff_t dst_stride,

-                           const int16_t *filter_x, int x_step_q4,

-                           const int16_t *filter_y, int y_step_q4,

-                           int w, int h);

-void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

-                           uint8_t *dst, ptrdiff_t dst_stride,

-                           const int16_t *filter_x, int x_step_q4,

-                           const int16_t *filter_y, int y_step_q4,

-                           int w, int h);

 static INLINE int32x4_t MULTIPLY_BY_Q0(

     int16x4_t dsrc0,

     int16x4_t dsrc1,

@@ -82,12 +72,7 @@

   uint16x4x2_t d0x2u16, d1x2u16;

   uint32x4x2_t q0x2u32;

-  if (x_step_q4 != 16) {

-    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,

-                          filter_x, x_step_q4,

-                          filter_y, y_step_q4, w, h);

-    return;

-  }

+  assert(x_step_q4 == 16);

   q0s16 = vld1q_s16(filter_x);

@@ -255,12 +240,7 @@

   uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;

   int32x4_t q1s32, q2s32, q14s32, q15s32;

-  if (y_step_q4 != 16) {

-    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,

-                         filter_x, x_step_q4,

-                         filter_y, y_step_q4, w, h);

-    return;

-  }

+  assert(y_step_q4 == 16);

   src -= src_stride * 3;

   q0s16 = vld1q_s16(filter_y);

--- a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

+++ b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

@@ -19,8 +19,6 @@

     EXPORT  |vpx_convolve8_horiz_neon|

     EXPORT  |vpx_convolve8_vert_neon|

-    IMPORT  |vpx_convolve8_horiz_c|

-    IMPORT  |vpx_convolve8_vert_c|

ARM

     REQUIRE8

     PRESERVE8

@@ -52,10 +50,6 @@

 ; sp[]int h

 |vpx_convolve8_horiz_neon| PROC

-    ldr             r12, [sp, #4]           ; x_step_q4

-    cmp             r12, #16

-    bne             vpx_convolve8_horiz_c

     push            {r4-r10, lr}

     sub             r0, r0, #3              ; adjust for taps

@@ -173,10 +167,6 @@

     ENDP

 |vpx_convolve8_vert_neon| PROC

-    ldr             r12, [sp, #12]

-    cmp             r12, #16

-    bne             vpx_convolve8_vert_c

     push            {r4-r8, lr}

     ; adjust for taps

--- a/vpx_dsp/arm/vpx_convolve_neon.c

+++ b/vpx_dsp/arm/vpx_convolve_neon.c

@@ -8,6 +8,8 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <assert.h>

 #include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/vpx_dsp_common.h"

 #include "vpx_ports/mem.h"

@@ -25,14 +27,8 @@

   // Account for the vertical phase needing 3 lines prior and 4 lines post

   int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {

-    vpx_convolve8_c(src, src_stride,

-                    dst, dst_stride,

-                    filter_x, x_step_q4,

-                    filter_y, y_step_q4,

-                    w, h);

-    return;

-  }

+  assert(y_step_q4 == 16);

+  assert(x_step_q4 == 16);

   /* Filter starting 3 lines back. The neon implementation will ignore the

    * given height and filter a multiple of 4 lines. Since this goes in to

@@ -59,14 +55,8 @@

   DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);

   int intermediate_height = h + 7;

-  if (x_step_q4 != 16 || y_step_q4 != 16) {

-    vpx_convolve8_avg_c(src, src_stride,

-                        dst, dst_stride,

-                        filter_x, x_step_q4,

-                        filter_y, y_step_q4,

-                        w, h);

-    return;

-  }

+  assert(y_step_q4 == 16);

+  assert(x_step_q4 == 16);

   /* This implementation has the same issues as above. In addition, we only want

    * to average the values after both passes.