shithub: openh264

Download patch

ref: 53c8af4566c435960263f66d53550411ae1a7827
parent: fe57aa46df2ae9eb54a371d32a3472b31f69efd6
author: zhiliang wang <[email protected]>
date: Thu May 15 11:04:44 EDT 2014

Refine some code

--- a/codec/encoder/core/arm64/pixel_neon_aarch64.S
+++ b/codec/encoder/core/arm64/pixel_neon_aarch64.S
@@ -35,20 +35,16 @@
 #include "arm_arch64_common_macro.S"
 
 .macro CALC_AND_STORE_SAD
-    uaddlp  v2.4s, v2.8h
-    addp    v2.4s, v2.4s, v2.4s
-    addp    v2.2s, v2.2s, v2.2s
-    umov    w0, v2.s[0]
+    saddlv  s2, v2.8h
+    fmov    w0, s2
 .endm
 
 .macro CALC_AND_STORE_SAD_FOUR
-    addp    v0.8h, v28.8h, v29.8h
-    addp    v1.8h, v30.8h, v31.8h
-    addp    v0.8h, v0.8h, v1.8h
-    addp    v0.8h, v0.8h, v0.8h
-    eor     v1.8b, v1.8b, v1.8b
-    saddl   v0.4s, v0.4h, v1.4h
-    st1     {v0.4s}, [x4]
+    saddlv  s28, v28.8h
+    saddlv  s29, v29.8h
+    saddlv  s30, v30.8h
+    saddlv  s31, v31.8h
+    st4     {v28.s, v29.s, v30.s, v31.s}[0], [x4]
 .endm
 
 .macro LOAD_8X8_1
@@ -250,9 +246,8 @@
     ld1     {v1.s}[0], [x2], x3
     uabal   v2.8h, v0.8b, v1.8b
 .endr
-    uaddlp  v2.2s, v2.4h
-    addp    v2.2s, v2.2s, v2.2s
-    umov    w0, v2.s[0]
+    saddlv  s2, v2.4h
+    fmov    w0, s2
 WELS_ASM_ARCH64_FUNC_END
 
 WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad8x8_AArch64_neon