shithub: openh264

Download patch

ref: 4afb83e3b049b00e4e1f061fe8db9012a7e0104c
parent: 964e98fb766eb16db14568ce90539b88f2d85c1d
parent: ae62909aabb8f86a5ab723633a6caead01a8d123
author: zhilwang <[email protected]>
date: Mon Jul 28 11:33:01 EDT 2014

Merge pull request #1227 from mstorsjo/unify-asm-indentation

Unify the indentation in the new aarch64 assembly files

--- a/codec/encoder/core/arm64/memory_aarch64_neon.S
+++ b/codec/encoder/core/arm64/memory_aarch64_neon.S
@@ -36,26 +36,26 @@
 
 
 WELS_ASM_AARCH64_FUNC_BEGIN WelsSetMemZero_AArch64_neon
-  eor v0.16b, v0.16b, v0.16b
-  cmp x1, #32
-  b.eq mem_zero_32_neon_start
-  b.lt mem_zero_24_neon_start
+    eor v0.16b, v0.16b, v0.16b
+    cmp x1, #32
+    b.eq mem_zero_32_neon_start
+    b.lt mem_zero_24_neon_start
 mem_zero_loop:
-  subs x1, x1, #64
-  st1 {v0.16b}, [x0], #16
-  st1 {v0.16b}, [x0], #16
-  st1 {v0.16b}, [x0], #16
-  st1 {v0.16b}, [x0], #16
-  b.ne mem_zero_loop
-  b mem_zero_end
+    subs x1, x1, #64
+    st1 {v0.16b}, [x0], #16
+    st1 {v0.16b}, [x0], #16
+    st1 {v0.16b}, [x0], #16
+    st1 {v0.16b}, [x0], #16
+    b.ne mem_zero_loop
+    b mem_zero_end
 
 mem_zero_32_neon_start:
-  st1 {v0.16b}, [x0], #16
-  st1 {v0.16b}, [x0], #16
-  b mem_zero_end
+    st1 {v0.16b}, [x0], #16
+    st1 {v0.16b}, [x0], #16
+    b mem_zero_end
 mem_zero_24_neon_start:
-  st1 {v0.16b}, [x0], #16
-  st1 {v0.8b}, [x0], #8
+    st1 {v0.16b}, [x0], #16
+    st1 {v0.8b}, [x0], #8
 mem_zero_end:
 
 WELS_ASM_AARCH64_FUNC_END
--- a/codec/processing/src/arm64/vaa_calc_aarch64_neon.S
+++ b/codec/processing/src/arm64/vaa_calc_aarch64_neon.S
@@ -36,19 +36,18 @@
 
 #ifdef __APPLE__
 .macro ABS_SUB_SUM_16BYTES
-	ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
-	uabal   $0, v0.8b, v1.8b
-	uabal2  $1, v0.16b,v1.16b
+    ld1     {v0.16b}, [x0], x4
+    ld1     {v1.16b}, [x1], x4
+    uabal   $0, v0.8b, v1.8b
+    uabal2  $1, v0.16b,v1.16b
 .endm
 
 .macro ABS_SUB_SUM_8x16BYTES
-	ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
-	uabdl   $0, v0.8b, v1.8b
-	uabdl2  $1, v0.16b,v1.16b
+    ld1     {v0.16b}, [x0], x4
+    ld1     {v1.16b}, [x1], x4
+    uabdl   $0, v0.8b, v1.8b
+    uabdl2  $1, v0.16b,v1.16b
 
-	ABS_SUB_SUM_16BYTES $0, $1
     ABS_SUB_SUM_16BYTES $0, $1
     ABS_SUB_SUM_16BYTES $0, $1
     ABS_SUB_SUM_16BYTES $0, $1
@@ -55,22 +54,22 @@
     ABS_SUB_SUM_16BYTES $0, $1
     ABS_SUB_SUM_16BYTES $0, $1
     ABS_SUB_SUM_16BYTES $0, $1
+    ABS_SUB_SUM_16BYTES $0, $1
 .endm
 #else
 .macro ABS_SUB_SUM_16BYTES arg0, arg1
-	ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
-	uabal   \arg0, v0.8b, v1.8b
-	uabal2  \arg1, v0.16b,v1.16b
+    ld1     {v0.16b}, [x0], x4
+    ld1     {v1.16b}, [x1], x4
+    uabal   \arg0, v0.8b, v1.8b
+    uabal2  \arg1, v0.16b,v1.16b
 .endm
 
 .macro ABS_SUB_SUM_8x16BYTES arg0, arg1
-	ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
-	uabdl   \arg0, v0.8b, v1.8b
-	uabdl2  \arg1, v0.16b,v1.16b
+    ld1     {v0.16b}, [x0], x4
+    ld1     {v1.16b}, [x1], x4
+    uabdl   \arg0, v0.8b, v1.8b
+    uabdl2  \arg1, v0.16b,v1.16b
 
-	ABS_SUB_SUM_16BYTES \arg0, \arg1
     ABS_SUB_SUM_16BYTES \arg0, \arg1
     ABS_SUB_SUM_16BYTES \arg0, \arg1
     ABS_SUB_SUM_16BYTES \arg0, \arg1
@@ -77,12 +76,13 @@
     ABS_SUB_SUM_16BYTES \arg0, \arg1
     ABS_SUB_SUM_16BYTES \arg0, \arg1
     ABS_SUB_SUM_16BYTES \arg0, \arg1
+    ABS_SUB_SUM_16BYTES \arg0, \arg1
 .endm
 #endif
 
 /*
  * void vaa_calc_sad_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride,
- *						int32_t *psadframe, int32_t *psad8x8)
+ *                      int32_t *psadframe, int32_t *psad8x8)
  */
 WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSad_AArch64_neon
     eor     v31.16b, v31.16b, v31.16b
@@ -121,7 +121,7 @@
 
 .macro SAD_SD_MAD_8x16BYTES
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v31.16b, v0.16b, v1.16b
     uaddlp  v2.8h, v31.16b
     uaddlp  v4.8h, v0.16b
@@ -128,7 +128,7 @@
     uaddlp  v5.8h, v1.16b
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v30.16b, v0.16b, v1.16b
     umax    v31.16b, v31.16b,v30.16b
     uadalp  v2.8h, v30.16b
@@ -138,7 +138,7 @@
 .endm
 /*
  * void vaa_calc_sad_bgd_neon(uint8_t *cur_data, uint8_t *ref_data, int32_t pic_width, int32_t pic_height, int32_t pic_stride,
- *							   int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
+ *                             int32_t *psadframe, int32_t *psad8x8, int32_t *p_sd8x8, uint8_t *p_mad8x8)
  */
 WELS_ASM_AARCH64_FUNC_BEGIN VAACalcSadBgd_AArch64_neon
     ldr     x15, [sp, #0]
@@ -196,7 +196,7 @@
 
 .macro SAD_SSD_BGD_8x16BYTES_1
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v31.16b, v0.16b, v1.16b
     umull   v30.8h, v31.8b, v31.8b
     uaddlp  v29.4s, v30.8h
@@ -214,7 +214,7 @@
     uaddlp  v5.8h, v1.16b
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     umax    v31.16b, v31.16b,v3.16b     //p_mad
     umull   v30.8h, v3.8b, v3.8b
@@ -236,7 +236,7 @@
 
 .macro SAD_SSD_BGD_8x16BYTES_2
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v26.16b, v0.16b, v1.16b
     umull   v30.8h, v26.8b, v26.8b
     uadalp  v29.4s, v30.8h
@@ -254,7 +254,7 @@
     uaddlp  v7.8h, v1.16b
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     umax    v26.16b, v26.16b,v3.16b     //p_mad
     umull   v30.8h, v3.8b, v3.8b
@@ -347,7 +347,7 @@
 
 .macro SAD_SSD_8x16BYTES_1
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v31.16b, v0.16b, v1.16b
     umull   v30.8h, v31.8b, v31.8b
     uaddlp  v29.4s, v30.8h
@@ -363,7 +363,7 @@
     uaddlp  v2.8h, v31.16b      //  p_sad
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     umull   v30.8h, v3.8b, v3.8b
     uadalp  v29.4s, v30.8h
@@ -382,7 +382,7 @@
 
 .macro SAD_SSD_8x16BYTES_2
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v26.16b, v0.16b, v1.16b
     umull   v30.8h, v26.8b, v26.8b
     uadalp  v29.4s, v30.8h
@@ -400,7 +400,7 @@
     uaddlp  v7.8h, v1.16b
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     umull   v30.8h, v3.8b, v3.8b
     uadalp  v29.4s, v30.8h
@@ -469,7 +469,7 @@
 
 .macro SAD_VAR_8x16BYTES_1
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v31.16b, v0.16b, v1.16b
     uaddlp  v2.8h, v31.16b      //  p_sad
 
@@ -481,7 +481,7 @@
 
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     uadalp  v2.8h, v3.16b              //p_sad
 
@@ -494,7 +494,7 @@
 .endm
 .macro SAD_VAR_8x16BYTES_2
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v26.16b, v0.16b, v1.16b
     uaddlp  v16.8h,v26.16b      //  p_sad
 
@@ -505,7 +505,7 @@
     uadalp  v27.4s, v30.8h      //  p_sqsum
 .rept 7
     ld1     {v0.16b}, [x0], x4
-	ld1     {v1.16b}, [x1], x4
+    ld1     {v1.16b}, [x1], x4
     uabd    v3.16b, v0.16b, v1.16b
     uadalp  v16.8h, v3.16b              //p_sad