shithub: libvpx

--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm

+++ /dev/null

@@ -1,54 +1,0 @@

-;

-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.

-;

-;  Use of this source code is governed by a BSD-style license and patent

-;  grant that can be found in the LICENSE file in the root of the source

-;  tree. All contributing project authors may be found in the AUTHORS

-;  file in the root of the source tree.

-;

-    EXPORT  |vp8_dc_only_idct_add_neon|

-    ARM

-    REQUIRE8

-    PRESERVE8

-    AREA ||.text||, CODE, READONLY, ALIGN=2

-;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,

-;                            int pred_stride, unsigned char *dst_ptr,

-;                            int dst_stride)

-; r0  input_dc

-; r1  pred_ptr

-; r2  pred_stride

-; r3  dst_ptr

-; sp  dst_stride

-|vp8_dc_only_idct_add_neon| PROC

-    add             r0, r0, #4

-    asr             r0, r0, #3

-    ldr             r12, [sp]

-    vdup.16         q0, r0

-    vld1.32         {d2[0]}, [r1], r2

-    vld1.32         {d2[1]}, [r1], r2

-    vld1.32         {d4[0]}, [r1], r2

-    vld1.32         {d4[1]}, [r1]

-    vaddw.u8        q1, q0, d2

-    vaddw.u8        q2, q0, d4

-    vqmovun.s16     d2, q1

-    vqmovun.s16     d4, q2

-    vst1.32         {d2[0]}, [r3], r12

-    vst1.32         {d2[1]}, [r3], r12

-    vst1.32         {d4[0]}, [r3], r12

-    vst1.32         {d4[1]}, [r3]

-    bx              lr

-    ENDP

-    END

--- /dev/null

+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.c

@@ -1,0 +1,42 @@

+/*

+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.

+ *

+ *  Use of this source code is governed by a BSD-style license

+ *  that can be found in the LICENSE file in the root of the source

+ *  tree. An additional intellectual property rights grant can be found

+ *  in the file PATENTS.  All contributing project authors may

+ *  be found in the AUTHORS file in the root of the source tree.

+ */

+#include <arm_neon.h>

+void vp8_dc_only_idct_add_neon(

+        int16_t input_dc,

+        unsigned char *pred_ptr,

+        int pred_stride,

+        unsigned char *dst_ptr,

+        int dst_stride) {

+    int i;

+    uint16_t a1 = ((input_dc + 4) >> 3);

+    uint32x2_t d2u32 = vdup_n_u32(0);

+    uint8x8_t d2u8;

+    uint16x8_t q1u16;

+    uint16x8_t qAdd;

+    qAdd = vdupq_n_u16(a1);

+    for (i = 0; i < 2; i++) {

+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);

+        pred_ptr += pred_stride;

+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);

+        pred_ptr += pred_stride;

+        q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));

+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));

+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);

+        dst_ptr += dst_stride;

+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);

+        dst_ptr += dst_stride;

+    }

+}

--- a/vp8/vp8_common.mk

+++ b/vp8/vp8_common.mk

@@ -159,7 +159,6 @@

 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)

 # common (neon)

-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dc_only_idct_add_neon$(ASM)

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/iwalsh_neon$(ASM)

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfilter_neon$(ASM)

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)

@@ -187,6 +186,7 @@

 # common (neon intrinsics)

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/bilinearpredict_neon.c

 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/copymem_neon.c

+VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dc_only_idct_add_neon.c

 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))