shithub: riscv

Download patch

ref: 9fccf1629ea99ab2613601e2ecd48646acb1e219
parent: 2559e19e1944f3ff880274d0a7d172976d082c33
author: cinap_lenrek <[email protected]>
date: Fri Nov 20 01:25:56 EST 2015

libmp: add mpvecadd()/mpvecsub() assembly versions for arm

--- a/sys/src/libmp/arm/mkfile
+++ b/sys/src/libmp/arm/mkfile
@@ -2,7 +2,7 @@
 </$objtype/mkfile
 
 LIB=/$objtype/lib/libmp.a
-SFILES=mpvecdigmuladd.s mpvecdigmulsub.s
+SFILES=mpvecdigmuladd.s mpvecdigmulsub.s mpvecadd.s mpvecsub.s
 
 HFILES=/$objtype/include/u.h /sys/include/mp.h ../port/dat.h
 
--- /dev/null
+++ b/sys/src/libmp/arm/mpvecadd.s
@@ -1,0 +1,32 @@
+TEXT mpvecadd(SB),$0
+	MOVW	alen+4(FP), R4
+	MOVW	b+8(FP), R5
+	MOVW	blen+12(FP), R6
+	MOVW	sum+16(FP), R7
+	MOVW	$0, R8
+	MOVW	R8, R3
+	CMP	R8, R6
+	B.EQ	_add1
+	SUB	R6, R4, R4
+_addloop1:
+	MOVW.WP	4(R0), R1
+	MOVW.WP	4(R5), R2
+	CMP	$1, R3
+	ADC.S	R2, R1
+	ADC	R8, R8, R3
+	MOVW.WP	R1, 4(R7)
+	SUB.S	$1, R6
+	B.NE	_addloop1
+_add1:
+	CMP	R8, R4
+	B.EQ	_addend
+_addloop2:
+	MOVW.WP	4(R0), R1
+	ADD.S	R3, R1
+	ADC	R8, R8, R3
+	MOVW.WP	R1, 4(R7)
+	SUB.S	$1, R4
+	B.NE	_addloop2
+_addend:
+	MOVW	R3, (R7)
+	RET
--- a/sys/src/libmp/arm/mpvecdigmuladd.s
+++ b/sys/src/libmp/arm/mpvecdigmuladd.s
@@ -5,12 +5,12 @@
 	MOVW	$0, R2
 _muladdloop:
 	MOVW	$0, R1
-	MOVW.W.P 4(R0), R3
+	MOVW.WP	4(R0), R3
 	MULALU	R3, R5, (R1, R2)
  	MOVW	(R6), R7
 	ADD.S	R2, R7
 	ADC	$0, R1, R2
-	MOVW.W.P R7, 4(R6)
+	MOVW.WP	R7, 4(R6)
 	SUB.S	$1, R4
 	B.NE	_muladdloop
 	MOVW	(R6), R7
--- a/sys/src/libmp/arm/mpvecdigmulsub.s
+++ b/sys/src/libmp/arm/mpvecdigmulsub.s
@@ -5,13 +5,13 @@
 	MOVW	$0, R2
 _mulsubloop:
 	MOVW	$0, R1
-	MOVW.W.P 4(R0), R3
+	MOVW.WP	4(R0), R3
 	MULALU	R3, R5, (R1, R2)
  	MOVW	(R6), R7
 	SUB.S	R2, R7
 	ADD.CC	$1, R1
 	MOVW	R1, R2
-	MOVW.W.P R7, 4(R6)
+	MOVW.WP	R7, 4(R6)
 	SUB.S	$1, R4
 	B.NE	_mulsubloop
 	MOVW	(R6), R7
--- /dev/null
+++ b/sys/src/libmp/arm/mpvecsub.s
@@ -1,0 +1,31 @@
+TEXT mpvecsub(SB),$0
+	MOVW	alen+4(FP), R4
+	MOVW	b+8(FP), R5
+	MOVW	blen+12(FP), R6
+	MOVW	diff+16(FP), R7
+	MOVW	$0, R8
+	MOVW	R8, R3
+	CMP	R8, R6
+	B.EQ	_sub1
+	SUB	R6, R4, R4
+_subloop1:
+	MOVW.WP	4(R0), R1
+	MOVW.WP	4(R5), R2
+	CMP	R3, R8
+	SBC.S	R2, R1
+	SBC	R8, R8, R3
+	MOVW.WP	R1, 4(R7)
+	SUB.S	$1, R6
+	B.NE	_subloop1
+_sub1:
+	CMP	R8, R4
+	RET.EQ
+_subloop2:
+	MOVW.WP	4(R0), R1
+	CMP	R3, R8
+	SBC.S	R8, R1
+	SBC	R8, R8, R3
+	MOVW.WP	R1, 4(R7)
+	SUB.S	$1, R4
+	B.NE	_subloop2
+	RET