shithub: riscv

--- a/sys/man/1/2l

+++ b/sys/man/1/2l

@@ -102,6 +102,9 @@

 .L _traceout

 at function exits.

.TP

+.B -f

+(ARM only) Generate VFP hardware floating point instructions.

+.TP

 .B -s

 Strip the symbol tables from the output file.

.TP

--- a/sys/src/cmd/5c/txt.c

+++ b/sys/src/cmd/5c/txt.c

@@ -721,8 +721,13 @@

 			regfree(&nod1);

 			p1 = p;

 			regalloc(&nod, t, Z);

-			gins(AMOVF, nodfconst(2147483648.), &nod);

-			gins(AADDF, &nod, t);

+			if(tt == TFLOAT) {

+				gins(AMOVF, nodfconst(2147483648.), &nod);

+				gins(AADDF, &nod, t);

+			} else {

+				gins(AMOVD, nodfconst(2147483648.), &nod);

+				gins(AADDD, &nod, t);

+			}

 			regfree(&nod);

 			patch(p1, pc);

 			return;

@@ -1056,7 +1061,8 @@

 		nextpc();

 		p->as = a;

 		naddr(f1, &p->from);

-		if(a == ACMP && f1->op == OCONST && p->from.offset < 0 && p->from.offset != -p->from.offset) {

+		if(a == ACMP && f1->op == OCONST && p->from.offset < 0 &&

+		    p->from.offset != 0x80000000) {

 			p->as = ACMN;

 			p->from.offset = -p->from.offset;

--- a/sys/src/cmd/5l/asm.c

+++ b/sys/src/cmd/5l/asm.c

@@ -483,12 +483,12 @@

 		if(p->line == oldlc || p->as == ATEXT || p->as == ANOP) {

 			if(p->as == ATEXT)

 				curtext = p;

-			if(debug['L'])

+			if(debug['V'])

 				Bprint(&bso, "%6lux %P\n",

 					p->pc, p);

 			continue;

-		if(debug['L'])

+		if(debug['V'])

 			Bprint(&bso, "\t\t%6ld", lcsize);

 		v = (p->pc - oldpc) / MINLC;

 		while(v) {

@@ -496,7 +496,7 @@

 			if(v < 127)

 				s = v;

 			cput(s+128);	/* 129-255 +pc */

-			if(debug['L'])

+			if(debug['V'])

 				Bprint(&bso, " pc+%ld*%d(%ld)", s, MINLC, s+128);

 			v -= s;

 			lcsize++;

@@ -510,7 +510,7 @@

 			cput(s>>16);

 			cput(s>>8);

 			cput(s);

-			if(debug['L']) {

+			if(debug['V']) {

 				if(s > 0)

 					Bprint(&bso, " lc+%ld(%d,%ld)\n",

 						s, 0, s);

@@ -525,7 +525,7 @@

 		if(s > 0) {

 			cput(0+s);	/* 1-64 +lc */

-			if(debug['L']) {

+			if(debug['V']) {

 				Bprint(&bso, " lc+%ld(%ld)\n", s, 0+s);

 				Bprint(&bso, "%6lux %P\n",

 					p->pc, p);

@@ -532,7 +532,7 @@

 		} else {

 			cput(64-s);	/* 65-128 -lc */

-			if(debug['L']) {

+			if(debug['V']) {

 				Bprint(&bso, " lc%ld(%ld)\n", s, 64-s);

 				Bprint(&bso, "%6lux %P\n",

 					p->pc, p);

@@ -545,7 +545,7 @@

 		cput(s);

 		lcsize++;

-	if(debug['v'] || debug['L'])

+	if(debug['v'] || debug['V'])

 		Bprint(&bso, "lcsize = %ld\n", lcsize);

 	Bflush(&bso);

@@ -1365,6 +1365,53 @@

 		else if(p->as == AMOVH)

 			o2 ^= (1<<6);

 		break;

+	/* VFP ops: */

+	case 74:	/* vfp floating point arith */

+		o1 = opvfprrr(p->as, p->scond);

+		rf = p->from.reg;

+		if(p->from.type == D_FCONST) {

+			diag("invalid floating-point immediate\n%P", p);

+			rf = 0;

+		}

+		rt = p->to.reg;

+		r = p->reg;

+		if(r == NREG)

+			r = rt;

+		o1 |= rt<<12;

+		if(((o1>>20)&0xf) == 0xb)

+			o1 |= rf<<0;

+		else

+			o1 |= r<<16 | rf<<0;

+		break;

+	case 75:	/* vfp floating point compare */

+		o1 = opvfprrr(p->as, p->scond);

+		rf = p->from.reg;

+		if(p->from.type == D_FCONST) {

+			if(p->from.ieee->h != 0 || p->from.ieee->l != 0)

+				diag("invalid floating-point immediate\n%P", p);

+			o1 |= 1<<16;

+			rf = 0;

+		}

+		rt = p->reg;

+		o1 |= rt<<12 | rf<<0;

+		o2 = 0x0ef1fa10;	/* MRS APSR_nzcv, FPSCR */

+		o2 |= (p->scond & C_SCOND) << 28;

+		break;

+	case 76:	/* vfp floating point fix and float */

+		o1 = opvfprrr(p->as, p->scond);

+		rf = p->from.reg;

+		rt = p->to.reg;

+		if(p->from.type == D_REG) {

+			o2 = o1 | rt<<12 | rt<<0;

+			o1 = 0x0e000a10;	/* VMOV F,R */

+			o1 |= (p->scond & C_SCOND) << 28 | rt<<16 | rf<<12;

+		} else {

+			o1 |= FREGTMP<<12 | rf<<0;

+			o2 = 0x0e100a10;	/* VMOV R,F */

+			o2 |= (p->scond & C_SCOND) << 28 | FREGTMP<<16 | rt<<12;

+		}

+		break;

 	if(debug['a'] > 1)

@@ -1494,6 +1541,40 @@

 long

+opvfprrr(int a, int sc)

+{

+	long o;

+	o = (sc & C_SCOND) << 28;

+	if(sc & (C_SBIT|C_PBIT|C_WBIT))

+		diag(".S/.P/.W on vfp instruction");

+	o |= 0xe<<24;

+	switch(a) {

+	case AMOVWD:	return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x8<<16 | 1<<7;

+	case AMOVWF:	return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x8<<16 | 1<<7;

+	case AMOVDW:	return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0xD<<16 | 1<<7;

+	case AMOVFW:	return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0xD<<16 | 1<<7;

+	case AMOVFD:	return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x7<<16 | 1<<7;

+	case AMOVDF:	return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x7<<16 | 1<<7;

+	case AMOVF:	return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x0<<16 | 0<<7;

+	case AMOVD:	return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x0<<16 | 0<<7;

+	case ACMPF:	return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x4<<16 | 0<<7;

+	case ACMPD:	return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x4<<16 | 0<<7;

+	case AADDF:	return o | 0xa<<8 | 0x3<<20;

+	case AADDD:	return o | 0xb<<8 | 0x3<<20;

+	case ASUBF:	return o | 0xa<<8 | 0x3<<20 | 1<<6;

+	case ASUBD:	return o | 0xb<<8 | 0x3<<20 | 1<<6;

+	case AMULF:	return o | 0xa<<8 | 0x2<<20;

+	case AMULD:	return o | 0xb<<8 | 0x2<<20;

+	case ADIVF:	return o | 0xa<<8 | 0x8<<20;

+	case ADIVD:	return o | 0xb<<8 | 0x8<<20;

+	}

+	diag("bad vfp rrr %d", a);

+	prasm(curp);

+	return 0;

+}

+long

 opbra(int a, int sc)

@@ -1628,10 +1709,45 @@

 long

+ovfpmem(int a, int r, long v, int b, int sc, Prog *p)

+{

+	long o;

+	if(sc & (C_SBIT|C_PBIT|C_WBIT))

+		diag(".S/.P/.W on VLDR/VSTR instruction");

+	o = (sc & C_SCOND) << 28;

+	o |= 0xd<<24 | (1<<23);

+	if(v < 0) {

+		v = -v;

+		o ^= 1 << 23;

+	}

+	if(v & 3)

+		diag("odd offset for floating point op: %ld\n%P", v, p);

+	else if(v >= (1<<10))

+		diag("literal span too large: %ld\n%P", v, p);

+	o |= (v>>2) & 0xFF;

+	o |= b << 16;

+	o |= r << 12;

+	switch(a) {

+	default:

+		diag("bad fst %A", a);

+	case AMOVD:

+		o |= 0xb<<8;

+		break;

+	case AMOVF:

+		o |= 0xa<<8;

+		break;

+	}

+	return o;

+}

+long

 ofsr(int a, int r, long v, int b, int sc, Prog *p)

 	long o;

+	if(vfp)

+		return ovfpmem(a, r, v, b, sc, p);

 	if(sc & C_SBIT)

 		diag(".S on FLDR/FSTR instruction");

 	o = (sc & C_SCOND) << 28;

@@ -1703,6 +1819,8 @@

 	Ieee *p;

 	int n;

+	if(vfp)

+		return -1;

 	for(n = sizeof(chipfloats)/sizeof(chipfloats[0]); --n >= 0;){

 		p = &chipfloats[n];

 		if(p->l == e->l && p->h == e->h)

--- a/sys/src/cmd/5l/l.h

+++ b/sys/src/cmd/5l/l.h

@@ -7,7 +7,12 @@

 #define	EXTERN	extern

 #endif

+#define	LIBNAMELEN	300

+void	addlibpath(char*);

+int	fileexists(char*);

+char*	findlib(char*);

 typedef	struct	Adr	Adr;

 typedef	struct	Sym	Sym;

 typedef	struct	Autom	Auto;

@@ -134,6 +139,7 @@

 	LTO		= 1<<1,

 	LPOOL		= 1<<2,

 	V4		= 1<<3,	/* arm v4 arch */

+	VFP		= 1<<4,	/* arm vfpv3 floating point */

 	C_NONE		= 0,

 	C_REG,

@@ -269,6 +275,7 @@

 EXTERN	Prog	zprg;

 EXTERN	int	dtype;

 EXTERN	int	armv4;

+EXTERN	int vfp;

 EXTERN	int	doexp, dlm;

 EXTERN	int	imports, nimports;

@@ -309,6 +316,7 @@

 int	Sconv(Fmt*);

 int	aclass(Adr*);

 void	addhist(long, int);

+void	addlibpath(char*);

 void	append(Prog*, Prog*);

 void	asmb(void);

 void	asmdyn(void);

@@ -336,7 +344,9 @@

 void	errorexit(void);

 void	exchange(Prog*);

 void	export(void);

+int	fileexists(char*);

 int	find1(long, int);

+char*	findlib(char*);

 void	follow(void);

 void	gethunk(void);

 void	histtoauto(void);

@@ -361,6 +371,7 @@

 long	opirr(int);

 Optab*	oplook(Prog*);

 long	oprrr(int, int);

+long	opvfprrr(int, int);

 long	olr(long, int, int, int);

 long	olhr(long, int, int, int);

 long	olrr(int, int, int, int);

--- a/sys/src/cmd/5l/noop.c

+++ b/sys/src/cmd/5l/noop.c

@@ -302,6 +302,30 @@

 			break;

+		/*

+		 * 5c code generation for unsigned -> double made the

+		 * unfortunate assumption that single and double floating

+		 * point registers are aliased - true for emulated 7500

+		 * but not for vfp.  Now corrected, but this test is

+		 * insurance against old 5c compiled code in libraries.

+		 */

+		case AMOVWD:

+			if((q = p->link) != P && q->as == ACMP)

+			if((q = q->link) != P && q->as == AMOVF)

+			if((q1 = q->link) != P && q1->as == AADDF)

+			if(q1->to.type == D_FREG && q1->to.reg == p->to.reg) {

+				q1->as = AADDD;

+				q1 = prg();

+				q1->scond = q->scond;

+				q1->line = q->line;

+				q1->as = AMOVFD;

+				q1->from = q->to;

+				q1->to = q1->from;

+				q1->link = q->link;

+				q->link = q1;

+			}

+			break;

 		case ADIV:

 		case ADIVU:

 		case AMOD:

--- a/sys/src/cmd/5l/optab.c

+++ b/sys/src/cmd/5l/optab.c

@@ -211,6 +211,14 @@

 	{ ACASE,	C_REG,	C_NONE,	C_NONE,		62, 4, 0 },

 	{ ABCASE,	C_NONE, C_NONE, C_SBRA,		63, 4, 0 },

+	{ AADDF,	C_FREG,	C_NONE,	C_FREG,		74, 4, 0, VFP },

+	{ AADDF,	C_FREG,	C_REG,	C_FREG,		74, 4, 0, VFP },

+	{ AMOVF,	C_FREG, C_NONE, C_FREG,		74, 4, 0, VFP },

+	{ ACMPF,	C_FREG,	C_REG,	C_NONE,		75, 8, 0, VFP },

+	{ ACMPF,	C_FCON,	C_REG,	C_NONE,		75, 8, 0, VFP },

+	{ AMOVFW,	C_FREG,	C_NONE,	C_REG,		76, 8, 0, VFP },

+	{ AMOVFW,	C_REG,	C_NONE,	C_FREG,		76, 8, 0, VFP },

 	{ AMOVH,	C_REG,	C_NONE,	C_HEXT,		70, 4, REGSB,	V4 },

 	{ AMOVH,	C_REG,	C_NONE, C_HAUTO,	70, 4, REGSP,	V4 },

 	{ AMOVH,	C_REG,	C_NONE,	C_HOREG,	70, 4, 0,	V4 },

--- a/sys/src/cmd/5l/span.c

+++ b/sys/src/cmd/5l/span.c

@@ -639,6 +639,9 @@

 	n = (p2->flag&V4) - (p1->flag&V4);	/* architecture version */

 	if(n)

 		return n;

+	n = (p2->flag&VFP) - (p1->flag&VFP);	/* floating point arch */

+	if(n)

+		return n;

 	n = p1->a1 - p2->a1;

 	if(n)

 		return n;

@@ -657,14 +660,18 @@

 	int i, n, r;

 	armv4 = !debug['h'];

+	vfp = debug['f'];

 	for(i=0; i<C_GOK; i++)

 		for(n=0; n<C_GOK; n++)

 			xcmp[i][n] = cmp(n, i);

-	for(n=0; optab[n].as != AXXX; n++)

+	for(n=0; optab[n].as != AXXX; n++) {

+		if((optab[n].flag & VFP) && !vfp)

+			optab[n].as = AXXX;

 		if((optab[n].flag & V4) && !armv4) {

 			optab[n].as = AXXX;

 			break;

+	}

 	qsort(optab, n, sizeof(optab[0]), ocmp);

 	for(i=0; i<n; i++) {

 		r = optab[i].as;

@@ -679,6 +686,8 @@

 		default:

 			diag("unknown op in build: %A", r);

 			errorexit();

+		case AXXX:

+			break;

 		case AADD:

 			oprange[AAND] = oprange[r];

 			oprange[AEOR] = oprange[r];

--- a/sys/src/libmach/5db.c

+++ b/sys/src/libmach/5db.c

@@ -135,7 +135,7 @@

int

 armclass(long w)

-	int op, done;

+	int op, done, cp;

 	op = (w >> 25) & 0x7;

 	switch(op) {

@@ -220,8 +220,62 @@

 		op = (48+24+4+4+2) + ((w >> 24) & 0x1);

 		break;

 	case 7:	/* coprocessor crap */

+		cp = (w >> 8) & 0xF;

+		if(cp == 10 || cp == 11){	/* vfp */

+			if((w >> 4) & 0x1){

+				/* vfp register transfer */

+				switch((w >> 21) & 0x7){

+				case 0:

+					op = 118 + ((w >> 20) & 0x1);

+					break;

+				case 7:

+					op = 118+2 + ((w >> 20) & 0x1);

+					break;

+				default:

+					op = (48+24+4+4+2+2+4+4);

+					break;

+				}

+				break;

+			}

+			/* vfp data processing */

+			if(((w >> 23) & 0x1) == 0){

+				op = 100 + ((w >> 19) & 0x6) + ((w >> 6) & 0x1);

+				break;

+			}

+			switch(((w >> 19) & 0x6) + ((w >> 6) & 0x1)){

+			case 0:

+				op = 108;

+				break;

+			case 7:

+				if(((w >> 19) & 0x1) == 0)

+					if(((w >> 17) & 0x1) == 0)

+						op = 109 + ((w >> 16) & 0x4) +

+							((w >> 15) & 0x2) +

+							((w >> 7) & 0x1);

+					else if(((w >> 16) & 0x7) == 0x7)

+						op = 117;

+				else

+					switch((w >> 16) & 0x7){

+					case 0:

+					case 4:

+					case 5:

+						op = 117;

+						break;

+					}

+				break;

+			}

+			if(op == 7)

+				op = (48+24+4+4+2+2+4+4);

+			break;

+		}

 		op = (48+24+4+4+2+2) + ((w >> 3) & 0x2) + ((w >> 20) & 0x1);

 		break;

+	case 6:	/* vfp load / store */

+		if(((w >> 21) &0x9) == 0x8){

+			op = 122 + ((w >> 20) & 0x1);

+			break;

+		}

+		/* fall through */

 	default:

 		op = (48+24+4+4+2+2+4+4);

 		break;

@@ -298,7 +352,7 @@

  * Print value v as name[+offset]

*/

 static int

-gsymoff(char *buf, int n, long v, int space)

+gsymoff(char *buf, int n, ulong v, int space)

 	Symbol s;

 	int r;

@@ -405,6 +459,20 @@

 	format(o->o, i, o->a);

+static void

+armvstdi(Opcode *o, Instr *i)

+{

+	ulong v;

+	v = (i->w & 0xff) << 2;

+	if(!(i->w & (1<<23)))

+		v = -v;

+	i->imm = v;

+	i->rn = (i->w >> 16) & 0xf;

+	i->rd = (i->w >> 12) & 0xf;

+	format(o->o, i, o->a);

+}

 /* arm V4 ld/st halfword, signed byte */

 static void

 armhwby(Opcode *o, Instr *i)

@@ -870,6 +938,40 @@

 /* 99 */

 	"RFEV7%P%a",	armbdt, 0,	"(R%n)",

+/* 100 */

+	"MLA%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"MLS%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"NMLS%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"NMLA%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"MUL%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"NMUL%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"ADD%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"SUB%f%C",	armdps,	0,	"F%s,F%n,F%d",

+	"DIV%f%C",	armdps,	0,	"F%s,F%n,F%d",

+/* 109 */

+	"MOV%f%C",	armdps,	0,	"F%s,F%d",

+	"ABS%f%C",	armdps,	0,	"F%s,F%d",

+	"NEG%f%C",	armdps,	0,	"F%s,F%d",

+	"SQRT%f%C",	armdps,	0,	"F%s,F%d",

+	"CMP%f%C",	armdps,	0,	"F%s,F%d",

+	"CMPE%f%C",	armdps,	0,	"F%s,F%d",

+	"CMP%f%C",	armdps,	0,	"$0.0,F%d",

+	"CMPE%f%C",	armdps,	0,	"$0.0,F%d",

+/* 117 */

+	"MOV%F%R%C",	armdps, 0,	"F%s,F%d",

+/* 118 */

+	"MOVW%C",	armdps, 0,	"R%d,F%n",

+	"MOVW%C",	armdps, 0,	"F%n,R%d",

+	"MOVW%C",	armdps, 0,	"R%d,%x",

+	"MOVW%C",	armdps, 0,	"%x,R%d",

+/* 122 */

+	"MOV%f%C",	armvstdi,	0,	"F%d,%I",

+	"MOV%f%C",	armvstdi,	0,	"%I,F%d",

};

 static void

@@ -1011,12 +1113,74 @@

 		case 'b':

 			i->curr += symoff(i->curr, i->end-i->curr,

-				i->imm, CTEXT);

+				(ulong)i->imm, CTEXT);

 			break;

 		case 'g':

 			i->curr += gsymoff(i->curr, i->end-i->curr,

 				i->imm, CANY);

+			break;

+		case 'f':

+			switch((i->w >> 8) & 0xF){

+			case 10:

+				bprint(i, "F");

+				break;

+			case 11:

+				bprint(i, "D");

+				break;

+			}

+			break;

+		case 'F':

+			switch(((i->w >> 15) & 0xE) + ((i->w >> 8) & 0x1)){

+			case 0x0:

+				bprint(i, ((i->w >> 7) & 0x1)? "WF" : "WF.U");

+				break;

+			case 0x1:

+				bprint(i, ((i->w >> 7) & 0x1)? "WD" : "WD.U");

+				break;

+			case 0x8:

+				bprint(i, "FW.U");

+				break;

+			case 0x9:

+				bprint(i, "DW.U");

+				break;

+			case 0xA:

+				bprint(i, "FW");

+				break;

+			case 0xB:

+				bprint(i, "DW");

+				break;

+			case 0xE:

+				bprint(i, "FD");

+				break;

+			case 0xF:

+				bprint(i, "DF");

+				break;

+			}

+			break;

+		case 'R':

+			if(((i->w >> 7) & 0x1) == 0)

+				bprint(i, "R");

+			break;

+		case 'x':

+			switch(i->rn){

+			case 0:

+				bprint(i, "FPSID");

+				break;

+			case 1:

+				bprint(i, "FPSCR");

+				break;

+			case 2:

+				bprint(i, "FPEXC");

+				break;

+			default:

+				bprint(i, "FPS(%d)", i->rn);

+				break;

+			}

 			break;

 		case 'r':