shithub: openh264

Download patch

ref: 0549fb99eb4dd58934e8837b1b8b678e97b437d4
parent: 704cad26f51bbf1e6e6f9f59dd7e73fd28870f5a
parent: cf6ae234130ce180688eb20299078765bb7dc55c
author: zhilwang <[email protected]>
date: Mon Jun 30 06:01:09 EDT 2014

Merge pull request #1034 from mstorsjo/aarch64-combined-satd-sad

Prefer modes in the same order as the reference, in the aarch64 combined intra satd/sad

--- a/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
+++ b/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
@@ -97,14 +97,22 @@
 #ifdef __APPLE__
 .macro SELECT_BEST_COST
     cmp     w1, $0
-    csel    $0, $0, w1, hs
-    cset    w7, lo
+    csel    $0, $0, w1, $2
+    cset    w7, $1
     cmp     w2, $0
     mov     w6, #2
-    csel    $0, $0, w2, hs
-    csel    w7, w7, w6, hs
+    csel    $0, $0, w2, $2
+    csel    w7, w7, w6, $2
 .endm
 
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+    SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+    SELECT_BEST_COST \arg0, lo, hs
+.endm
+
 .macro LOAD_CHROMA_DATA
     sub     x9, $0, x1
     ld1     {$1}, [x9]      //top_cb
@@ -173,16 +181,24 @@
     add     $7, $7, v4.4s
 .endm
 #else
-.macro SELECT_BEST_COST arg0
+.macro SELECT_BEST_COST arg0, arg1, arg2
     cmp     w1, \arg0
-    csel    \arg0, \arg0, w1, hs
-    cset    w7, lo
+    csel    \arg0, \arg0, w1, \arg2
+    cset    w7, \arg1
     cmp     w2, \arg0
     mov     w6, #2
-    csel    \arg0, \arg0, w2, hs
-    csel    w7, w7, w6, hs
+    csel    \arg0, \arg0, w2, \arg2
+    csel    w7, w7, w6, \arg2
 .endm
 
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+    SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+    SELECT_BEST_COST \arg0, lo, hs
+.endm
+
 .macro LOAD_CHROMA_DATA arg0, arg1, arg2
     sub     x9, \arg0, x1
     ld1     {\arg1}, [x9]      //top_cb
@@ -347,7 +363,7 @@
     saddlv  s31, v31.8h
     fmov    w0, s31
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_HIGHER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -399,7 +415,7 @@
     fmov    w2, s31
     add     w2, w2, w5, lsl #1
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_LOWER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -464,7 +480,7 @@
     add     w2, w2, w6
 
     mov     w10, w0
-    SELECT_BEST_COST w10
+    SELECT_BEST_COST_PREFER_HIGHER w10
 
     str     w7, [x5]
 
@@ -579,7 +595,7 @@
     addv    s31, v31.4s
     fmov    w0, s31
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_HIGHER w0
 
     str     w7, [x4]
 WELS_ASM_ARCH64_FUNC_END
@@ -656,7 +672,7 @@
     fmov    w2, s31
     add     w2, w2, w5, lsl #1
 
-    SELECT_BEST_COST w0
+    SELECT_BEST_COST_PREFER_LOWER w0
 
     str     w7, [x4]