ref: 4afb83e3b049b00e4e1f061fe8db9012a7e0104c
parent: 964e98fb766eb16db14568ce90539b88f2d85c1d
parent: ae62909aabb8f86a5ab723633a6caead01a8d123
author: zhilwang <[email protected]>
date: Mon Jul 28 11:33:01 EDT 2014
Merge pull request #1227 from mstorsjo/unify-asm-indentation Unify the indentation in the new aarch64 assembly files
--- a/codec/encoder/core/arm64/memory_aarch64_neon.S
+++ b/codec/encoder/core/arm64/memory_aarch64_neon.S
@@ -36,26 +36,26 @@
WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
- eor v0.16b, v0.16b, v0.16b
- cmp x1, #32
- b.eq mem_zero_32_neon_start
- b.lt mem_zero_24_neon_start
+ eor v0.16b, v0.16b, v0.16b
+ cmp x1, #32
+ b.eq mem_zero_32_neon_start
+ b.lt mem_zero_24_neon_start
mem_zero_loop:
- subs x1, x1, #64
- st1 {v0.16b}, [x0], #16
- st1 {v0.16b}, [x0], #16
- st1 {v0.16b}, [x0], #16
- st1 {v0.16b}, [x0], #16
- b.ne mem_zero_loop
- b mem_zero_end
+ subs x1, x1, #64
+ st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x0], #16
+ b.ne mem_zero_loop
+ b mem_zero_end
mem_zero_32_neon_start:
- st1 {v0.16b}, [x0], #16
- st1 {v0.16b}, [x0], #16
- b mem_zero_end
+ st1 {v0.16b}, [x0], #16
+ st1 {v0.16b}, [x0], #16
+ b mem_zero_end
mem_zero_24_neon_start:
- st1 {v0.16b}, [x0], #16
- st1 {v0.8b}, [x0], #8
+ st1 {v0.16b}, [x0], #16
+ st1 {v0.8b}, [x0], #8
mem_zero_end:
WELS_ASM_AARCH64_FUNC_END
--- a/codec/processing/src/arm64/vaa_calc_aarch64_neon.S
+++ b/codec/processing/src/arm64/vaa_calc_aarch64_neon.S
@@ -36,19 +36,18 @@
#ifdef __APPLE__
.macro ABS_SUB_SUM_16BYTES
- ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
- uabal $0, v0.8b, v1.8b
- uabal2 $1, v0.16b,v1.16b
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ uabal $0, v0.8b, v1.8b
+ uabal2 $1, v0.16b,v1.16b
.endm
.macro ABS_SUB_SUM_8x16BYTES
- ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
- uabdl $0, v0.8b, v1.8b
- uabdl2 $1, v0.16b,v1.16b
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ uabdl $0, v0.8b, v1.8b
+ uabdl2 $1, v0.16b,v1.16b
- ABS_SUB_SUM_16BYTES $0, $1
ABS_SUB_SUM_16BYTES $0, $1
ABS_SUB_SUM_16BYTES $0, $1
ABS_SUB_SUM_16BYTES $0, $1
@@ -55,22 +54,22 @@
ABS_SUB_SUM_16BYTES $0, $1
ABS_SUB_SUM_16BYTES $0, $1
ABS_SUB_SUM_16BYTES $0, $1
+ ABS_SUB_SUM_16BYTES $0, $1
.endm
#else
.macro ABS_SUB_SUM_16BYTES arg0, arg1
- ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
- uabal \arg0, v0.8b, v1.8b
- uabal2 \arg1, v0.16b,v1.16b
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ uabal \arg0, v0.8b, v1.8b
+ uabal2 \arg1, v0.16b,v1.16b
.endm
.macro ABS_SUB_SUM_8x16BYTES arg0, arg1
- ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
- uabdl \arg0, v0.8b, v1.8b
- uabdl2 \arg1, v0.16b,v1.16b
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ uabdl \arg0, v0.8b, v1.8b
+ uabdl2 \arg1, v0.16b,v1.16b
- ABS_SUB_SUM_16BYTES \arg0, \arg1
ABS_SUB_SUM_16BYTES \arg0, \arg1
ABS_SUB_SUM_16BYTES \arg0, \arg1
ABS_SUB_SUM_16BYTES \arg0, \arg1
@@ -77,12 +76,13 @@
ABS_SUB_SUM_16BYTES \arg0, \arg1
ABS_SUB_SUM_16BYTES \arg0, \arg1
ABS_SUB_SUM_16BYTES \arg0, \arg1
+ ABS_SUB_SUM_16BYTES \arg0, \arg1
.endm
#endif
/*
* void vaa_calc_sad_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride,
- * int32_t *psadframe, int32_t *psad8x8)
+ * int32_t *psadframe, int32_t *psad8x8)
*/
WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSad_AArch64_neon
eor v31.16b, v31.16b, v31.16b
@@ -121,7 +121,7 @@
.macro SAD_SD_MAD_8x16BYTES
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v31.16b, v0.16b, v1.16b
uaddlp v2.8h, v31.16b
uaddlp v4.8h, v0.16b
@@ -128,7 +128,7 @@
uaddlp v5.8h, v1.16b
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v30.16b, v0.16b, v1.16b
umax v31.16b, v31.16b,v30.16b
uadalp v2.8h, v30.16b
@@ -138,7 +138,7 @@
.endm
/*
* void vaa_calc_sad_bgd_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride,
- * int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
+ * int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
*/
WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadBgd_AArch64_neon
ldr x15, [sp, #0]
@@ -196,7 +196,7 @@
.macro SAD_SSD_BGD_8x16BYTES_1
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v31.16b, v0.16b, v1.16b
umull v30.8h, v31.8b, v31.8b
uaddlp v29.4s, v30.8h
@@ -214,7 +214,7 @@
uaddlp v5.8h, v1.16b
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
umax v31.16b, v31.16b,v3.16b //p_mad
umull v30.8h, v3.8b, v3.8b
@@ -236,7 +236,7 @@
.macro SAD_SSD_BGD_8x16BYTES_2
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v26.16b, v0.16b, v1.16b
umull v30.8h, v26.8b, v26.8b
uadalp v29.4s, v30.8h
@@ -254,7 +254,7 @@
uaddlp v7.8h, v1.16b
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
umax v26.16b, v26.16b,v3.16b //p_mad
umull v30.8h, v3.8b, v3.8b
@@ -347,7 +347,7 @@
.macro SAD_SSD_8x16BYTES_1
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v31.16b, v0.16b, v1.16b
umull v30.8h, v31.8b, v31.8b
uaddlp v29.4s, v30.8h
@@ -363,7 +363,7 @@
uaddlp v2.8h, v31.16b // p_sad
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
umull v30.8h, v3.8b, v3.8b
uadalp v29.4s, v30.8h
@@ -382,7 +382,7 @@
.macro SAD_SSD_8x16BYTES_2
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v26.16b, v0.16b, v1.16b
umull v30.8h, v26.8b, v26.8b
uadalp v29.4s, v30.8h
@@ -400,7 +400,7 @@
uaddlp v7.8h, v1.16b
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
umull v30.8h, v3.8b, v3.8b
uadalp v29.4s, v30.8h
@@ -469,7 +469,7 @@
.macro SAD_VAR_8x16BYTES_1
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v31.16b, v0.16b, v1.16b
uaddlp v2.8h, v31.16b // p_sad
@@ -481,7 +481,7 @@
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
uadalp v2.8h, v3.16b //p_sad
@@ -494,7 +494,7 @@
.endm
.macro SAD_VAR_8x16BYTES_2
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v26.16b, v0.16b, v1.16b
uaddlp v16.8h,v26.16b // p_sad
@@ -505,7 +505,7 @@
uadalp v27.4s, v30.8h // p_sqsum
.rept 7
ld1 {v0.16b}, [x0], x4
- ld1 {v1.16b}, [x1], x4
+ ld1 {v1.16b}, [x1], x4
uabd v3.16b, v0.16b, v1.16b
uadalp v16.8h, v3.16b //p_sad