shithub: riscv

Download patch

ref: bc895417f804bbb78410a365412bcf8ab59a44b4
parent: 23742053f5ee70cd394c33f28f3e6547e9e8e31d
author: mischief <[email protected]>
date: Sat Aug 8 23:44:03 EDT 2015

libc: fix spim endianness

--- a/sys/src/libc/spim/mkfile
+++ b/sys/src/libc/spim/mkfile
@@ -2,7 +2,7 @@
 </$objtype/mkfile
 
 LIB=/$objtype/lib/libc.a
-SFILES=\
+MIPSSFILES=\
 	argv0.s\
 	atom.s\
 	getcallerpc.s\
@@ -19,23 +19,33 @@
 	strcmp.s\
 	strcpy.s\
 	tas.s\
-	vlop.s\
 
-CFILES=\
+MIPSCFILES=\
 	cycles.c\
 	lock.c\
 	notejmp.c\
+
+SFILES=\
+	vlop.s\
+
+CFILES=\
 	sqrt.c\
 	vlrt.c\
 
 HFILES=/sys/include/libc.h
 
-OFILES=${CFILES:%.c=%.$O} ${SFILES:%.s=%.$O}
+OFILES=\
+	${MIPSSFILES:%.s=%.$O}\
+	${SFILES:%.s=%.$O}\
+	${MIPSCFILES:%.c=%.$O}\
+	${CFILES:%.c=%.$O}\
 
-%.$O:	../mips/%.c
-	$CC -I../mips $CFLAGS ../mips/$stem.c
+MIPSS=`{echo $MIPSSFILES | sed 's/\.s//g; s/ /|/g'}
+^($MIPSS)\.$O:R:	'../mips/\1.s'
+	$AS $AFLAGS ../mips/$stem1.s
 
-%.$O:	../mips/%.s
-	$AS -I../mips $AFLAGS ../mips/$stem.s
+MIPSC=`{echo $MIPSCFILES | sed 's/\.c//g; s/ /|/g'}
+^($MIPSC)\.$O:R:	'../mips/\1.c'
+	$CC $CFLAGS ../mips/$stem1.c
 
 </sys/src/cmd/mksyslib
--- /dev/null
+++ b/sys/src/libc/spim/sqrt.c
@@ -1,0 +1,103 @@
+#include <u.h>
+#include <libc.h>
+
+static	long	sqtab[64] =
+{
+	0x6cdb2, 0x726d4, 0x77ea3, 0x7d52f, 0x82a85, 0x87eb1, 0x8d1c0, 0x923bd,
+	0x974b2, 0x9c4a8, 0xa13a9, 0xa61be, 0xaaeee, 0xafb41, 0xb46bf, 0xb916e,
+	0xbdb55, 0xc247a, 0xc6ce3, 0xcb495, 0xcfb95, 0xd41ea, 0xd8796, 0xdcca0,
+	0xe110c, 0xe54dd, 0xe9818, 0xedac0, 0xf1cd9, 0xf5e67, 0xf9f6e, 0xfdfef,
+	0x01fe0, 0x05ee6, 0x09cfd, 0x0da30, 0x11687, 0x1520c, 0x18cc8, 0x1c6c1,
+	0x20000, 0x2388a, 0x27068, 0x2a79e, 0x2de32, 0x3142b, 0x3498c, 0x37e5b,
+	0x3b29d, 0x3e655, 0x41989, 0x44c3b, 0x47e70, 0x4b02b, 0x4e16f, 0x51241,
+	0x542a2, 0x57296, 0x5a220, 0x5d142, 0x60000, 0x62e5a, 0x65c55, 0x689f2,
+};
+
+double
+sqrt(double arg)
+{
+	int e, ms;
+	double a, t;
+	union
+	{
+		double	d;
+		struct
+		{
+			long	ls;
+			long	ms;
+		};
+	} u;
+
+	u.d = arg;
+	ms = u.ms;
+
+	/*
+	 * sign extend the mantissa with
+	 * exponent. result should be > 0 for
+	 * normal case.
+	 */
+	e = ms >> 20;
+	if(e <= 0) {
+		if(e == 0)
+			return 0;
+		return NaN();
+	}
+
+	/*
+	 * pick up arg/4 by adjusting exponent
+	 */
+	u.ms = ms - (2 << 20);
+	a = u.d;
+
+	/*
+	 * use 5 bits of mantissa and 1 bit
+	 * of exponent to form table index.
+	 * insert exponent/2 - 1.
+	 */
+	e = (((e - 1023) >> 1) + 1022) << 20;
+	u.ms = *(long*)((char*)sqtab + ((ms >> 13) & 0xfc)) | e;
+	u.ls = 0;
+
+	/*
+	 * three laps of newton
+	 */
+	e = 1 << 20;
+	t = u.d;
+	u.d = t + a/t;
+	u.ms -= e;		/* u.d /= 2; */
+	t = u.d;
+	u.d = t + a/t;
+	u.ms -= e;		/* u.d /= 2; */
+	t = u.d;
+
+	return t + a/t;
+}
+
+/*
+ * this is the program that generated the table.
+ * it calls sqrt by some other means.
+ * 
+ * void
+ * main(void)
+ * {
+ * 	int i;
+ * 	union	U
+ * 	{
+ * 		double	d;
+ * 		struct
+ * 		{
+ * 			long	ms;
+ * 			long	ls;
+ * 		};
+ * 	} u;
+ * 
+ * 	for(i=0; i<64; i++) {
+ * 		u.ms = (i<<15) | 0x3fe04000;
+ * 		u.ls = 0;
+ * 		u.d = sqrt(u.d);
+ * 		print(" 0x%.5lux,", u.ms & 0xfffff);
+ * 	}
+ * 	print("\n");
+ * 	exits(0);
+ * }
+ */
--- /dev/null
+++ b/sys/src/libc/spim/vlop.s
@@ -1,0 +1,20 @@
+/*
+ * from https://bitbucket.org/cherry9/plan9-loongson
+ */
+TEXT	_mulv(SB), $0
+	MOVW	8(FP), R2	/* hi1 */
+	MOVW	4(FP), R3	/* lo1 */
+	MOVW	16(FP), R4	/* hi2 */
+	MOVW	12(FP), R5	/* lo2 */
+	MULU	R5, R3	/* lo1*lo2 -> hi:lo*/
+	MOVW	LO, R6
+	MOVW	HI, R7
+	MULU	R3, R4	/* lo1*hi2 -> _:hi */
+	MOVW	LO, R8
+	ADDU	R8, R7
+	MULU	R2, R5	/* hi1*lo2 -> _:hi */
+	MOVW	LO, R8
+	ADDU	R8, R7
+	MOVW	R6, 0(R1)	/* lo */
+	MOVW	R7, 4(R1)	/* hi */
+	RET
--- /dev/null
+++ b/sys/src/libc/spim/vlrt.c
@@ -1,0 +1,723 @@
+typedef	unsigned long	ulong;
+typedef	unsigned int	uint;
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef	signed char	schar;
+
+#define	SIGN(n)	(1UL<<(n-1))
+
+typedef	struct	Vlong	Vlong;
+struct	Vlong
+{
+	union
+	{
+		struct
+		{
+			ulong	lo;
+			ulong	hi;
+		};
+		struct
+		{
+			ushort	loms;
+			ushort	lols;
+			ushort	hims;
+			ushort	hils;
+		};
+	};
+};
+
+void	abort(void);
+
+/* needed by profiler; can't be profiled. */
+#pragma profile off
+void
+_addv(Vlong *r, Vlong a, Vlong b)
+{
+	ulong lo, hi;
+
+	lo = a.lo + b.lo;
+	hi = a.hi + b.hi;
+	if(lo < a.lo)
+		hi++;
+	r->lo = lo;
+	r->hi = hi;
+}
+
+void
+_subv(Vlong *r, Vlong a, Vlong b)
+{
+	ulong lo, hi;
+
+	lo = a.lo - b.lo;
+	hi = a.hi - b.hi;
+	if(lo > a.lo)
+		hi--;
+	r->lo = lo;
+	r->hi = hi;
+}
+
+#pragma profile on
+
+void
+_d2v(Vlong *y, double d)
+{
+	union { double d; struct Vlong; } x;
+	ulong xhi, xlo, ylo, yhi;
+	int sh;
+
+	x.d = d;
+
+	xhi = (x.hi & 0xfffff) | 0x100000;
+	xlo = x.lo;
+	sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+	ylo = 0;
+	yhi = 0;
+	if(sh >= 0) {
+		/* v = (hi||lo) >> sh */
+		if(sh < 32) {
+			if(sh == 0) {
+				ylo = xlo;
+				yhi = xhi;
+			} else {
+				ylo = (xlo >> sh) | (xhi << (32-sh));
+				yhi = xhi >> sh;
+			}
+		} else {
+			if(sh == 32) {
+				ylo = xhi;
+			} else
+			if(sh < 64) {
+				ylo = xhi >> (sh-32);
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh = -sh;
+		if(sh <= 10) {
+			ylo = xlo << sh;
+			yhi = (xhi << sh) | (xlo >> (32-sh));
+		} else {
+			/* overflow */
+			yhi = d;	/* causes something awful */
+		}
+	}
+	if(x.hi & SIGN(32)) {
+		if(ylo != 0) {
+			ylo = -ylo;
+			yhi = ~yhi;
+		} else
+			yhi = -yhi;
+	}
+
+	y->hi = yhi;
+	y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+
+	_d2v(y, f);
+}
+
+double
+_v2d(Vlong x)
+{
+	if(x.hi & SIGN(32)) {
+		if(x.lo) {
+			x.lo = -x.lo;
+			x.hi = ~x.hi;
+		} else
+			x.hi = -x.hi;
+		return -((long)x.hi*4294967296. + x.lo);
+	}
+	return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+	return _v2d(x);
+}
+
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
+	int i;
+
+	numhi = num.hi;
+	numlo = num.lo;
+	denhi = den.hi;
+	denlo = den.lo;
+
+	/*
+	 * get a divide by zero
+	 */
+	if(denlo==0 && denhi==0) {
+		numlo = numlo / denlo;
+	}
+
+	/*
+	 * set up the divisor and find the number of iterations needed
+	 */
+	if(numhi >= SIGN(32)) {
+		quohi = SIGN(32);
+		quolo = 0;
+	} else {
+		quohi = numhi;
+		quolo = numlo;
+	}
+	i = 0;
+	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+		denhi = (denhi<<1) | (denlo>>31);
+		denlo <<= 1;
+		i++;
+	}
+
+	quohi = 0;
+	quolo = 0;
+	for(; i >= 0; i--) {
+		quohi = (quohi<<1) | (quolo>>31);
+		quolo <<= 1;
+		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+			t = numlo;
+			numlo -= denlo;
+			if(numlo > t)
+				numhi--;
+			numhi -= denhi;
+			quolo |= 1;
+		}
+		denlo = (denlo>>1) | (denhi<<31);
+		denhi >>= 1;
+	}
+
+	if(qp) {
+		qp->lo = quolo;
+		qp->hi = quohi;
+	}
+	if(rp) {
+		rp->lo = numlo;
+		rp->hi = numhi;
+	}
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		q->hi = 0;
+		q->lo = n.lo / d.lo;
+		return;
+	}
+	dodiv(n, d, q, 0);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		r->hi = 0;
+		r->lo = n.lo % d.lo;
+		return;
+	}
+	dodiv(n, d, 0, r);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+	if(v->lo == 0) {
+		v->hi = -v->hi;
+		return;
+	}
+	v->lo = -v->lo;
+	v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		q->lo = (long)n.lo / (long)d.lo;
+		q->hi = ((long)q->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, q, 0);
+	if(nneg != dneg)
+		vneg(q);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		r->lo = (long)n.lo % (long)d.lo;
+		r->hi = ((long)r->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, 0, r);
+	if(nneg)
+		vneg(r);
+}
+
+void
+_rshav(Vlong *r, Vlong a, int b)
+{
+	long t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = t>>31;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = t>>31;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_rshlv(Vlong *r, Vlong a, int b)
+{
+	ulong t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = 0;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_lshv(Vlong *r, Vlong a, int b)
+{
+	ulong t;
+
+	t = a.lo;
+	if(b >= 32) {
+		r->lo = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->hi = 0;
+			return;
+		}
+		r->hi = t << (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->lo = t;
+		r->hi = a.hi;
+		return;
+	}
+	r->lo = t << b;
+	r->hi = (t >> (32-b)) | (a.hi << b);
+}
+
+void
+_andv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi & b.hi;
+	r->lo = a.lo & b.lo;
+}
+
+void
+_orv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi | b.hi;
+	r->lo = a.lo | b.lo;
+}
+
+void
+_xorv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi ^ b.hi;
+	r->lo = a.lo ^ b.lo;
+}
+
+void
+_vpp(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+}
+
+void
+_vmm(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+}
+
+void
+_ppv(Vlong *l, Vlong *r)
+{
+
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_mmv(Vlong *l, Vlong *r)
+{
+
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+	Vlong t, u;
+
+	u.lo = 0;
+	u.hi = 0;
+	switch(type) {
+	default:
+		abort();
+		break;
+
+	case 1:	/* schar */
+		t.lo = *(schar*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(schar*)lv = u.lo;
+		break;
+
+	case 2:	/* uchar */
+		t.lo = *(uchar*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uchar*)lv = u.lo;
+		break;
+
+	case 3:	/* short */
+		t.lo = *(short*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(short*)lv = u.lo;
+		break;
+
+	case 4:	/* ushort */
+		t.lo = *(ushort*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(ushort*)lv = u.lo;
+		break;
+
+	case 9:	/* int */
+		t.lo = *(int*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(int*)lv = u.lo;
+		break;
+
+	case 10:	/* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 5:	/* long */
+		t.lo = *(long*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(long*)lv = u.lo;
+		break;
+
+	case 6:	/* ulong */
+		t.lo = *(ulong*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(ulong*)lv = u.lo;
+		break;
+
+	case 7:	/* vlong */
+	case 8:	/* uvlong */
+		fn(&u, *(Vlong*)lv, rv);
+		*(Vlong*)lv = u;
+		break;
+	}
+	*ret = u;
+}
+
+void
+_p2v(Vlong *ret, void *p)
+{
+	long t;
+
+	t = (ulong)p;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sl2v(Vlong *ret, long sl)
+{
+	long t;
+
+	t = sl;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ul2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_si2v(Vlong *ret, int si)
+{
+	long t;
+
+	t = si;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ui2v(Vlong *ret, uint ui)
+{
+	long t;
+
+	t = ui;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sh2v(Vlong *ret, long sh)
+{
+	long t;
+
+	t = (sh << 16) >> 16;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uh2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul & 0xffff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sc2v(Vlong *ret, long uc)
+{
+	long t;
+
+	t = (uc << 24) >> 24;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uc2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul & 0xff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+long
+_v2sc(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xff;
+	return (t << 24) >> 24;
+}
+
+long
+_v2uc(Vlong rv)
+{
+
+	return rv.lo & 0xff;
+}
+
+long
+_v2sh(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xffff;
+	return (t << 16) >> 16;
+}
+
+long
+_v2uh(Vlong rv)
+{
+
+	return rv.lo & 0xffff;
+}
+
+long
+_v2sl(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ul(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2si(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ui(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+int
+_testv(Vlong rv)
+{
+	return rv.lo || rv.hi;
+}
+
+int
+_eqv(Vlong lv, Vlong rv)
+{
+	return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+int
+_nev(Vlong lv, Vlong rv)
+{
+	return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+int
+_ltv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_gtv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_gev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi || 
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+int
+_lov(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi || 
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lsv(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi || 
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_hiv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi || 
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_hsv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi || 
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}