ref: 9fccf1629ea99ab2613601e2ecd48646acb1e219
parent: 2559e19e1944f3ff880274d0a7d172976d082c33
author: cinap_lenrek <[email protected]>
date: Fri Nov 20 01:25:56 EST 2015
libmp: add mpvecadd()/mpvecsub() assembly versions for arm
--- a/sys/src/libmp/arm/mkfile
+++ b/sys/src/libmp/arm/mkfile
@@ -2,7 +2,7 @@
</$objtype/mkfile
LIB=/$objtype/lib/libmp.a
-SFILES=mpvecdigmuladd.s mpvecdigmulsub.s
+SFILES=mpvecdigmuladd.s mpvecdigmulsub.s mpvecadd.s mpvecsub.s
HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
--- /dev/null
+++ b/sys/src/libmp/arm/mpvecadd.s
@@ -1,0 +1,32 @@
+TEXT mpvecadd(SB),$0
+ MOVW alen+4(FP), R4
+ MOVW b+8(FP), R5
+ MOVW blen+12(FP), R6
+ MOVW sum+16(FP), R7
+ MOVW $0, R8
+ MOVW R8, R3
+ CMP R8, R6
+ B.EQ _add1
+ SUB R6, R4, R4
+_addloop1:
+ MOVW.WP 4(R0), R1
+ MOVW.WP 4(R5), R2
+ CMP $1, R3
+ ADC.S R2, R1
+ ADC R8, R8, R3
+ MOVW.WP R1, 4(R7)
+ SUB.S $1, R6
+ B.NE _addloop1
+_add1:
+ CMP R8, R4
+ B.EQ _addend
+_addloop2:
+ MOVW.WP 4(R0), R1
+ ADD.S R3, R1
+ ADC R8, R8, R3
+ MOVW.WP R1, 4(R7)
+ SUB.S $1, R4
+ B.NE _addloop2
+_addend:
+ MOVW R3, (R7)
+ RET
--- a/sys/src/libmp/arm/mpvecdigmuladd.s
+++ b/sys/src/libmp/arm/mpvecdigmuladd.s
@@ -5,12 +5,12 @@
MOVW $0, R2
_muladdloop:
MOVW $0, R1
- MOVW.W.P 4(R0), R3
+ MOVW.WP 4(R0), R3
MULALU R3, R5, (R1, R2)
MOVW (R6), R7
ADD.S R2, R7
ADC $0, R1, R2
- MOVW.W.P R7, 4(R6)
+ MOVW.WP R7, 4(R6)
SUB.S $1, R4
B.NE _muladdloop
MOVW (R6), R7
--- a/sys/src/libmp/arm/mpvecdigmulsub.s
+++ b/sys/src/libmp/arm/mpvecdigmulsub.s
@@ -5,13 +5,13 @@
MOVW $0, R2
_mulsubloop:
MOVW $0, R1
- MOVW.W.P 4(R0), R3
+ MOVW.WP 4(R0), R3
MULALU R3, R5, (R1, R2)
MOVW (R6), R7
SUB.S R2, R7
ADD.CC $1, R1
MOVW R1, R2
- MOVW.W.P R7, 4(R6)
+ MOVW.WP R7, 4(R6)
SUB.S $1, R4
B.NE _mulsubloop
MOVW (R6), R7
--- /dev/null
+++ b/sys/src/libmp/arm/mpvecsub.s
@@ -1,0 +1,31 @@
+TEXT mpvecsub(SB),$0
+ MOVW alen+4(FP), R4
+ MOVW b+8(FP), R5
+ MOVW blen+12(FP), R6
+ MOVW diff+16(FP), R7
+ MOVW $0, R8
+ MOVW R8, R3
+ CMP R8, R6
+ B.EQ _sub1
+ SUB R6, R4, R4
+_subloop1:
+ MOVW.WP 4(R0), R1
+ MOVW.WP 4(R5), R2
+ CMP R3, R8
+ SBC.S R2, R1
+ SBC R8, R8, R3
+ MOVW.WP R1, 4(R7)
+ SUB.S $1, R6
+ B.NE _subloop1
+_sub1:
+ CMP R8, R4
+ RET.EQ
+_subloop2:
+ MOVW.WP 4(R0), R1
+ CMP R3, R8
+ SBC.S R8, R1
+ SBC R8, R8, R3
+ MOVW.WP R1, 4(R7)
+ SUB.S $1, R4
+ B.NE _subloop2
+ RET