ref: 0549fb99eb4dd58934e8837b1b8b678e97b437d4
parent: 704cad26f51bbf1e6e6f9f59dd7e73fd28870f5a
parent: cf6ae234130ce180688eb20299078765bb7dc55c
author: zhilwang <[email protected]>
date: Mon Jun 30 06:01:09 EDT 2014
Merge pull request #1034 from mstorsjo/aarch64-combined-satd-sad Prefer modes in the same order as the reference, in the aarch64 combined intra satd/sad
--- a/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
+++ b/codec/encoder/core/arm64/intra_pred_sad_3_opt_aarch64_neon.S
@@ -97,14 +97,22 @@
#ifdef __APPLE__
.macro SELECT_BEST_COST
cmp w1, $0
- csel $0, $0, w1, hs
- cset w7, lo
+ csel $0, $0, w1, $2
+ cset w7, $1
cmp w2, $0
mov w6, #2
- csel $0, $0, w2, hs
- csel w7, w7, w6, hs
+ csel $0, $0, w2, $2
+ csel w7, w7, w6, $2
.endm
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+ SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+ SELECT_BEST_COST \arg0, lo, hs
+.endm
+
.macro LOAD_CHROMA_DATA
sub x9, $0, x1
ld1 {$1}, [x9] //top_cb
@@ -173,16 +181,24 @@
add $7, $7, v4.4s
.endm
#else
-.macro SELECT_BEST_COST arg0
+.macro SELECT_BEST_COST arg0, arg1, arg2
cmp w1, \arg0
- csel \arg0, \arg0, w1, hs
- cset w7, lo
+ csel \arg0, \arg0, w1, \arg2
+ cset w7, \arg1
cmp w2, \arg0
mov w6, #2
- csel \arg0, \arg0, w2, hs
- csel w7, w7, w6, hs
+ csel \arg0, \arg0, w2, \arg2
+ csel w7, w7, w6, \arg2
.endm
+.macro SELECT_BEST_COST_PREFER_HIGHER arg0
+ SELECT_BEST_COST \arg0, ls, hi
+.endm
+
+.macro SELECT_BEST_COST_PREFER_LOWER arg0
+ SELECT_BEST_COST \arg0, lo, hs
+.endm
+
.macro LOAD_CHROMA_DATA arg0, arg1, arg2
sub x9, \arg0, x1
ld1 {\arg1}, [x9] //top_cb
@@ -347,7 +363,7 @@
saddlv s31, v31.8h
fmov w0, s31
- SELECT_BEST_COST w0
+ SELECT_BEST_COST_PREFER_HIGHER w0
str w7, [x4]
WELS_ASM_ARCH64_FUNC_END
@@ -399,7 +415,7 @@
fmov w2, s31
add w2, w2, w5, lsl #1
- SELECT_BEST_COST w0
+ SELECT_BEST_COST_PREFER_LOWER w0
str w7, [x4]
WELS_ASM_ARCH64_FUNC_END
@@ -464,7 +480,7 @@
add w2, w2, w6
mov w10, w0
- SELECT_BEST_COST w10
+ SELECT_BEST_COST_PREFER_HIGHER w10
str w7, [x5]
@@ -579,7 +595,7 @@
addv s31, v31.4s
fmov w0, s31
- SELECT_BEST_COST w0
+ SELECT_BEST_COST_PREFER_HIGHER w0
str w7, [x4]
WELS_ASM_ARCH64_FUNC_END
@@ -656,7 +672,7 @@
fmov w2, s31
add w2, w2, w5, lsl #1
- SELECT_BEST_COST w0
+ SELECT_BEST_COST_PREFER_LOWER w0
str w7, [x4]