shithub: riscv

Download patch

ref: 5bf09937da68b5b55e33cc24ea021ed08b2ae6e0
parent: 57ff297cc07982da819b996f51cd5f31c1af429d
author: cinap_lenrek <[email protected]>
date: Sun Jun 26 11:09:27 EDT 2016

5c: support for bit ROL, native 64 bit arithmetic

--- a/sys/src/cmd/5c/5.out.h
+++ b/sys/src/cmd/5c/5.out.h
@@ -158,6 +158,8 @@
 
 	ACLREX,
 
+	AROR,
+
 	ALAST,
 };
 
--- a/sys/src/cmd/5c/cgen.c
+++ b/sys/src/cmd/5c/cgen.c
@@ -180,6 +180,7 @@
 	case OAND:
 	case OOR:
 	case OXOR:
+	case OROL:
 	case OLSHR:
 	case OASHL:
 	case OASHR:
@@ -410,6 +411,10 @@
 				break;
 			}
 		}
+		if(typev[l->type->etype]) {
+			cgen64(n, nn);
+			break;
+		}
 		regalloc(&nod, l, nn);
 		cgen(l, &nod);
 		regalloc(&nod1, n, &nod);
@@ -858,11 +863,320 @@
 	cursafe = curs;
 }
 
+static void
+freepair(Node *n)
+{
+	n->left->xoffset = reg[n->left->reg];
+	reg[n->left->reg] = 0;
+	n->right->xoffset = reg[n->right->reg];
+	reg[n->right->reg] = 0;
+}
+static void
+unfreepair(Node *n)
+{
+	reg[n->left->reg] = n->left->xoffset;
+	n->left->xoffset = 0;
+	reg[n->right->reg] = n->right->xoffset;
+	n->right->xoffset = 0;
+}
+
+int
+cgen64(Node *n, Node *nn)
+{
+	Node nod0, nod1, nod2, nod3, *l, *r;
+	int o, a, ml, mr, nnsaved;
+	long curs;
+
+	if(!machcap(n))
+		return 0;
+
+	if(debug['g']){
+		prtree(nn, "cgen64 nn");
+		prtree(n, "cgen64 n");
+	}
+
+	if(nn != Z && nn->op != OREGPAIR && typev[n->type->etype]){
+		if(nn->complex > n->complex){
+			reglpcgen(&nod0, nn, 1);
+			nod0.type = n->type;
+			regalloc(&nod1, n, Z);
+			cgen(n, &nod1);
+			cgen(&nod1, &nod0);
+			regfree(&nod0);
+			regfree(&nod1);
+		} else {
+			regalloc(&nod1, n, Z);
+			cgen(n, &nod1);
+			cgen(&nod1, nn);
+			regfree(&nod1);
+		}
+		return 1;
+	}
+
+	nnsaved = 0;
+	curs = cursafe;
+	o = n->op;
+	l = n->left;
+	r = n->right;
+	switch(o){
+	default:
+		return 0;
+
+	case OCAST:
+		if(typeilp[n->type->etype] && typev[l->type->etype]){
+			if(l->op == ONAME || l->op == OINDREG)
+				nod0 = *l;
+			else if((l->op == OLSHR || l->op == OASHR)
+			&& (l->right->op == OCONST && l->right->vconst == 32)
+			&& (l->left->op == ONAME || l->left->op == OINDREG)){
+				nod0 = *l->left;
+				nod0.xoffset += SZ_LONG;
+			} else {
+				if(nn->complex > l->complex){
+					reglpcgen(&nod0, nn, 1);
+					regalloc(&nod1, l, Z);
+					cgen(l, &nod1);
+					cgen(nod1.left, &nod0);
+					regfree(&nod0);
+					regfree(&nod1);
+				} else {
+					regalloc(&nod0, l, Z);
+					cgen(l, &nod0);
+					cgen(nod0.left, nn);
+					regfree(&nod0);
+				}
+				goto Out;
+			}
+			nod0.type = n->type;
+			cgen(&nod0, nn);
+			goto Out;
+		}
+		if(typev[n->type->etype] && typeilp[l->type->etype]){
+			regalloc(&nod1, l, nn->left);
+			a = reg[nn->right->reg];
+			reg[nn->right->reg] = 0;
+			cgen(l, &nod1);
+			reg[nn->right->reg] = a;
+			if(typeu[n->type->etype] || typeu[l->type->etype])
+				gmove(nodconst(0), nn->right);
+			else
+				gopcode(OASHR, nodconst(31), &nod1, nn->right);
+			regfree(&nod1);
+			goto Out;
+		}
+		return 0;
+
+	case OASASHL:	o = OASHL;	goto asop;
+	case OASASHR:	o = OASHR;	goto asop;
+	case OASLSHR:	o = OLSHR;	goto asop;
+
+	case OASADD:	o = OADD;	goto asop;
+	case OASSUB:	o = OSUB;	goto asop;
+	case OASAND:	o = OAND;	goto asop;
+	case OASXOR:	o = OXOR;	goto asop;
+	case OASOR:	o = OOR;	goto asop;
+	asop:	
+		nod0 = *n;
+		nod0.op = o;
+		nod0.left = &nod1;
+		nod1 = *l;
+		if(side(l)){
+			nod1.op = OIND;
+			nod1.left = &nod3;
+			nod1.right = Z;
+			nod1.complex = 1;
+
+			nod1.type = typ(TIND, l->type);
+			regsalloc(&nod3, &nod1);
+			nod1.type = l->type;
+
+			regalloc(&nod2, &nod3, nn != Z ? nn->left : Z);
+			lcgen(l, &nod2);
+			gmove(&nod2, &nod3);
+			regfree(&nod2);
+		}
+		if(nn == Z)
+			cgen(&nod0, &nod1);
+		else {
+			cgen(&nod0, nn);
+			cgen(nn, &nod1);
+		}
+		goto Out;
+
+	case OASHL:
+		cgen(l, nn);
+		assert(r->op == OCONST);
+		a = r->vconst & 63;
+		if(a == 0)
+			goto Out;
+		if(a == 1){
+			gins(AADD, nn->left, nn->left);
+			p->scond |= C_SBIT;
+			gins(AADC, nn->right, nn->right);
+			goto Out;
+		}
+		if(a < 32){
+			gopcode(OASHL, nodconst(a), Z, nn->right);
+			gopcode(OOR, nn->left, Z, nn->right);
+			p->from.offset = nn->left->reg | (32-a)<<7 | 1<<5;
+			p->from.reg = NREG;
+			p->from.type = D_SHIFT;
+			gopcode(OASHL, nodconst(a), Z, nn->left);
+			goto Out;
+		}
+		if(a == 32)
+			gmove(nn->left, nn->right);
+		else
+			gopcode(o, nodconst(a-32), nn->left, nn->right);
+		gmove(nodconst(0), nn->left);
+		goto Out;
+
+	case OLSHR:
+	case OASHR:
+		cgen(l, nn);
+		assert(r->op == OCONST);
+		a = r->vconst & 63;
+		if(a == 0)
+			goto Out;
+		if(a < 32){
+			gopcode(OLSHR, nodconst(a), Z, nn->left);
+			gopcode(OOR, nn->right, Z, nn->left);
+			p->from.offset = nn->right->reg | (32-a)<<7;
+			p->from.reg = NREG;
+			p->from.type = D_SHIFT;
+			gopcode(o, nodconst(a), Z, nn->right);
+			goto Out;
+		}
+		if(a == 32)
+			gmove(nn->right, nn->left);
+		else
+			gopcode(o, nodconst(a-32), nn->right, nn->left);
+		if(o == OASHR)
+			gopcode(o, nodconst(31), Z, nn->right);
+		else
+			gmove(nodconst(0), nn->right);
+		goto Out;
+
+	case OADD:
+	case OSUB:
+	case OAND:
+	case OXOR:
+	case OOR:
+		ml = o == OADD && l->op == OLMUL && machcap(l);
+		mr = o == OADD && r->op == OLMUL && machcap(r);
+		if(ml && !mr){
+			cgen(r, nn);
+			n = l;
+		} else if(mr && !ml){
+			cgen(l, nn);
+			n = r;
+		} else {
+			if(r->complex > l->complex){
+				cgen(r, nn);
+				n = l;
+			} else {
+				cgen(l, nn);
+				n = r;
+			}
+		}
+		if(n->complex >= FNX){
+			regsalloc(&nod0, nn);
+			gmove(nn, &nod0);
+			nnsaved = 1;
+		}
+		if(ml || mr){
+			l = n->left;
+			r = n->right;
+			a = AMULALU;
+			break;
+		}
+		regalloc(&nod1, n, Z);
+		if(nnsaved) freepair(nn);
+		cgen(n, &nod1);
+		if(nnsaved){
+			unfreepair(nn);
+			gmove(&nod0, nn);
+		}
+
+		switch(o){
+		case OADD:
+			gins(AADD, nod1.left, nn->left);
+			p->scond |= C_SBIT;
+			gins(AADC, nod1.right, nn->right);
+			break;
+		case OSUB:
+			if(n == r){
+				gins(ASUB, nod1.left, nn->left);
+				p->scond |= C_SBIT;
+				gins(ASBC, nod1.right, nn->right);
+			} else {
+				gins(ASUB, nn->left, nn->left);
+				p->reg = nod1.left->reg;
+				p->scond |= C_SBIT;
+				gins(ASBC, nn->right, nn->right);
+				p->reg = nod1.right->reg;
+			}
+			break;
+		default:
+			gopcode(o, nod1.left, Z, nn->left);
+			gopcode(o, nod1.right, Z, nn->right);
+		}
+		regfree(&nod1);
+		goto Out;
+
+	case OMUL:
+		a = AMULL;
+		break;
+
+	case OLMUL:
+		a = AMULLU;
+		break;
+	}
+
+	if(r->complex > l->complex) {
+		l = r;
+		r = n->left;
+	}
+
+	regalloc(&nod1, l, Z);
+	if(nnsaved) freepair(nn);
+	cgen(l, &nod1);
+	if(nnsaved) unfreepair(nn);
+	if(r->complex >= FNX) {
+		regsalloc(&nod3, &nod1);
+		gmove(&nod1, &nod3);
+		if(nnsaved) freepair(nn);
+		cgen(r, &nod1);
+		if(nnsaved) unfreepair(nn);
+		regalloc(&nod2, &nod3, Z);
+		gmove(&nod3, &nod2);
+	} else {
+		regalloc(&nod2, r, Z);
+		if(nnsaved) freepair(nn);
+		cgen(r, &nod2);
+		if(nnsaved) unfreepair(nn);
+	}
+	if(nnsaved)
+		gmove(&nod0, nn);
+
+	gins(a, &nod1, nn->right);
+	p->reg = nod2.reg;
+	p->to.type = D_REGREG;
+	p->to.offset = nn->left->reg;
+
+	regfree(&nod1);
+	regfree(&nod2);
+
+Out:
+	cursafe = curs;
+	return 1;
+}
+
 void
 sugen(Node *n, Node *nn, long w)
 {
 	Prog *p1;
-	Node nod0, nod1, nod2, nod3, nod4, *l, *r;
+	Node nod0, nod1, nod2, nod3, nod4, *l, *r, *d;
 	Type *t;
 	long pc1;
 	int i, m, c;
@@ -893,11 +1207,13 @@
 				break;
 			}
 
-			t = nn->type;
-			nn->type = types[TLONG];
-			reglcgen(&nod1, nn, Z);
-			nn->type = t;
+			if(nn->op == OREGPAIR){
+				gopcode(OAS, nod32const(n->vconst), Z, nn->left);
+				gopcode(OAS, nod32const(n->vconst>>32), Z, nn->right);
+				break;
+			}
 
+			reglpcgen(&nod1, nn, 1);
 			if(align(0, types[TCHAR], Aarg1))	/* isbigendian */
 				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
 			else
@@ -907,7 +1223,6 @@
 				gopcode(OAS, nod32const(n->vconst), Z, &nod1);
 			else
 				gopcode(OAS, nod32const(n->vconst>>32), Z, &nod1);
-
 			regfree(&nod1);
 			break;
 		}
@@ -1021,16 +1336,27 @@
 			break;
 		}
 		if(nn->op != OIND) {
-			nn = new1(OADDR, nn, Z);
-			nn->type = types[TIND];
-			nn->addable = 0;
+			if(nn->op == OREGPAIR) {
+				regsalloc(&nod1, nn);
+				d = &nod1;
+			}else
+				d = nn;
+			d = new1(OADDR, d, Z);
+			d->type = types[TIND];
+			d->addable = 0;
 		} else
-			nn = nn->left;
-		n = new(OFUNC, n->left, new(OLIST, nn, n->right));
+			d = nn->left;
+		n = new(OFUNC, n->left, new(OLIST, d, n->right));
 		n->complex = FNX;
 		n->type = types[TVOID];
 		n->left->type = types[TVOID];
-		cgen(n, Z);
+		if(nn->op == OREGPAIR){
+			freepair(nn);
+			cgen(n, Z);
+			unfreepair(nn);
+			gmove(&nod1, nn);
+		} else
+			cgen(n, Z);
 		break;
 
 	case OCOND:
@@ -1052,8 +1378,7 @@
 	return;
 
 copy:
-	if(nn == Z)
-		return;
+	if(nn != Z)
 	if(n->complex >= FNX && nn->complex >= FNX) {
 		t = nn->type;
 		nn->type = types[TLONG];
@@ -1079,43 +1404,39 @@
 	}
 
 	w /= SZ_LONG;
-	if(w <= 2) {
+	if(w == 2 && cgen64(n, nn))
+		return;
+
+	if(nn == Z)
+		return;
+
+	if(w == 2) {
 		if(n->complex > nn->complex) {
-			reglpcgen(&nod1, n, 1);
-			reglpcgen(&nod2, nn, 1);
+			if(n->op != OREGPAIR && n->op != ONAME && n->op != OINDREG)
+				reglpcgen(&nod1, n, 1);
+			else
+				nod1 = *n;
+			if(nn->op != OREGPAIR && nn->op != ONAME && nn->op != OINDREG)
+				reglpcgen(&nod2, nn, 1);
+			else
+				nod2 = *nn;
 		} else {
-			reglpcgen(&nod2, nn, 1);
-			reglpcgen(&nod1, n, 1);
+			if(nn->op != OREGPAIR && nn->op != ONAME && nn->op != OINDREG)
+				reglpcgen(&nod2, nn, 1);
+			else
+				nod2 = *nn;
+			if(n->op != OREGPAIR && n->op != ONAME && n->op != OINDREG)
+				reglpcgen(&nod1, n, 1);
+			else
+				nod1 = *n;
 		}
-		regalloc(&nod3, &regnode, Z);
-		regalloc(&nod4, &regnode, Z);
-		if(nod3.reg > nod4.reg){
-			/* code below assumes nod3 loaded first */
-			Node t = nod3; nod3 = nod4; nod4 = t;
-		}
-		nod0 = *nodconst((1<<nod3.reg)|(1<<nod4.reg));
-		if(w == 2 && nod1.xoffset == 0)
-			gmovm(&nod1, &nod0, 0);
-		else {
-			gmove(&nod1, &nod3);
-			if(w == 2) {
-				nod1.xoffset += SZ_LONG;
-				gmove(&nod1, &nod4);
-			}
-		}
-		if(w == 2 && nod2.xoffset == 0)
-			gmovm(&nod0, &nod2, 0);
-		else {
-			gmove(&nod3, &nod2);
-			if(w == 2) {
-				nod2.xoffset += SZ_LONG;
-				gmove(&nod4, &nod2);
-			}
-		}
-		regfree(&nod1);
-		regfree(&nod2);
-		regfree(&nod3);
-		regfree(&nod4);
+		nod1.type = types[TVLONG];
+		nod2.type = types[TVLONG];
+		gmove(&nod1, &nod2);
+		if(n->op != OREGPAIR && n->op != ONAME && n->op != OINDREG)
+			regfree(&nod1);
+		if(nn->op != OREGPAIR && nn->op != ONAME && nn->op != OINDREG)
+			regfree(&nod2);
 		return;
 	}
 
--- a/sys/src/cmd/5c/gc.h
+++ b/sys/src/cmd/5c/gc.h
@@ -226,6 +226,7 @@
 void	boolgen(Node*, int, Node*);
 void	sugen(Node*, Node*, long);
 void	layout(Node*, Node*, int, int, Node*);
+int	cgen64(Node*, Node*);
 
 /*
  * txt.c
--- a/sys/src/cmd/5c/list.c
+++ b/sys/src/cmd/5c/list.c
@@ -64,6 +64,9 @@
 		strcat(sc, ".W");
 	if(s & C_UBIT)		/* ambiguous with FBIT */
 		strcat(sc, ".U");
+	if(a == AMULL || a == AMULAL || a == AMULLU || a == AMULALU)
+		snprint(str, sizeof str, "	%A%s	%D,R%d,%D", a, sc, &p->from, p->reg, &p->to);
+	else
 	if(a == AMOVM) {
 		if(p->from.type == D_CONST)
 			snprint(str, sizeof str, "	%A%s	%R,%D", a, sc, &p->from, &p->to);
@@ -146,6 +149,10 @@
 			snprint(str, sizeof str, "%N(R%d)", a, a->reg);
 		else
 			snprint(str, sizeof str, "%N", a);
+		break;
+
+	case D_REGREG:
+		snprint(str, sizeof str, "(R%d,R%d)", a->reg, (char)a->offset);
 		break;
 
 	case D_REG:
--- /dev/null
+++ b/sys/src/cmd/5c/machcap.c
@@ -1,0 +1,49 @@
+#include "gc.h"
+
+int
+machcap(Node *n)
+{
+	if(n == Z)
+		return 0;	/* test */
+	switch(n->op) {
+	case OASADD:
+	case OASSUB:
+	case OASAND:
+	case OASXOR:
+	case OASOR:
+	case OADD:
+	case OSUB:
+	case OAND:
+	case OXOR:
+	case OOR:
+		if(typev[n->type->etype] && typev[n->left->type->etype] && typev[n->right->type->etype])
+			return 1;
+		break;
+
+	case OMUL:
+	case OLMUL:
+		if(typev[n->type->etype] && typeil[n->left->type->etype] && typeil[n->right->type->etype]
+		&& typeu[n->type->etype] == typeu[n->left->type->etype]
+		&& typeu[n->type->etype] == typeu[n->right->type->etype])
+			return 1;
+		break;
+
+	case OASASHL:
+	case OASASHR:
+	case OASLSHR:
+	case OASHL:
+	case OASHR:
+	case OLSHR:
+		if(typev[n->type->etype] && typev[n->left->type->etype] && n->right->op == OCONST)
+			return 1;
+		break;
+
+	case OCAST:
+		if(typeilp[n->type->etype] && typev[n->left->type->etype])
+			return 1;
+		if(typev[n->type->etype] && typeilp[n->left->type->etype])
+			return 1;
+		break;
+	}
+	return 0;
+}
--- a/sys/src/cmd/5c/mkfile
+++ b/sys/src/cmd/5c/mkfile
@@ -13,6 +13,7 @@
 	sgen.$O\
 	swt.$O\
 	txt.$O\
+	machcap.$O\
 
 HFILES=\
 	gc.h\
--- a/sys/src/cmd/5c/peep.c
+++ b/sys/src/cmd/5c/peep.c
@@ -249,7 +249,6 @@
 int
 regtyp(Adr *a)
 {
-
 	if(a->type == D_REG)
 		return 1;
 	if(a->type == D_FREG)
@@ -302,6 +301,7 @@
 		case ASLL:
 		case ASRL:
 		case ASRA:
+		case AROR:
 		case AORR:
 		case AAND:
 		case AEOR:
@@ -888,6 +888,25 @@
 			print(" (???)");
 		return 2;
 
+	case AMULL:
+	case AMULAL:
+	case AMULLU:
+	case AMULALU:
+		if(v->type != D_REG)
+			return 0;
+		if(copyau(&p->to, v))
+			return (p->as == AMULAL || p->as == AMULALU) ? 2 : 3;
+		if(p->from.reg == v->reg || p->reg == v->reg){
+			if(s != A && !copyau(&p->to, s) && p->from.reg != s->reg && p->reg != s->reg){
+				if(p->from.reg == v->reg)
+					p->from.reg = s->reg;
+				if(p->reg == v->reg)
+					p->reg = s->reg;
+			}
+			return 1;
+		}
+		return 0;
+
 	case AMOVM:
 		if(v->type != D_REG)
 			return 0;
@@ -974,6 +993,7 @@
 	case ASLL:
 	case ASRL:
 	case ASRA:
+	case AROR:
 	case AORR:
 	case AAND:
 	case AEOR:
@@ -1110,6 +1130,7 @@
 	case ASLL:
 	case ASRL:
 	case ASRA:
+	case AROR:
 	case AORR:
 	case AAND:
 	case AEOR:
@@ -1170,7 +1191,7 @@
 		return 1;
 	if(v->type == D_REG) {
 		if(a->type == D_OREG) {
-			if(v->reg == a->reg)
+			if(a->reg == v->reg)
 				return 1;
 		} else if(a->type == D_SHIFT) {
 			if((a->offset&0xf) == v->reg)
@@ -1177,6 +1198,9 @@
 				return 1;
 			if((a->offset&(1<<4)) && (a->offset>>8) == v->reg)
 				return 1;
+		} else if(a->type == D_REGREG) {
+			if(a->reg == v->reg || a->offset == v->reg)
+				return 1;
 		}
 	}
 	return 0;
@@ -1212,7 +1236,12 @@
 				a->offset = (a->offset&~0xf)|s->reg;
 			if((a->offset&(1<<4)) && (a->offset>>8) == v->reg)
 				a->offset = (a->offset&~(0xf<<8))|(s->reg<<8);
-		} else
+		} else if(a->type == D_REGREG) {
+			if(a->offset == v->reg)
+				a->offset = s->reg;
+			if(a->reg == v->reg)
+				a->reg = s->reg;
+		} else 
 			a->reg = s->reg;
 	}
 	return 0;
--- a/sys/src/cmd/5c/sgen.c
+++ b/sys/src/cmd/5c/sgen.c
@@ -109,6 +109,25 @@
 
 	case OMUL:
 	case OLMUL:
+		if(typev[n->type->etype]){
+			/* try to lift 32->64 bit cast */
+			if(typev[l->type->etype] && l->op == OCAST && typeil[l->left->type->etype]
+			&& typeu[n->type->etype] == typeu[l->left->type->etype])
+				l = l->left;
+			if(typev[r->type->etype] && r->op == OCAST && typeil[r->left->type->etype]
+			&& typeu[n->type->etype] == typeu[r->left->type->etype])
+				r = r->left;
+
+			if(typeil[l->type->etype] && typeil[r->type->etype]){
+				n->left = l;
+				n->right = r;
+				xcom(l);
+				xcom(r);
+				break;
+			}
+			l = n->left;
+			r = n->right;
+		}
 		xcom(l);
 		xcom(r);
 		t = vlog(r);
@@ -123,7 +142,6 @@
 			n->left = r;
 			n->right = l;
 			r = l;
-			l = n->left;
 			r->vconst = t;
 			r->type = types[TINT];
 		}
@@ -171,6 +189,13 @@
 		}
 		break;
 
+	case OOR:
+		xcom(l);
+		xcom(r);
+		if(typeil[n->type->etype])
+			rolor(n);
+		break;
+
 	default:
 		if(l != Z)
 			xcom(l);
@@ -180,7 +205,8 @@
 	}
 	if(n->addable >= 10)
 		return;
-
+	l = n->left;
+	r = n->right;
 	if(l != Z)
 		n->complex = l->complex;
 	if(r != Z) {
--- a/sys/src/cmd/5c/swt.c
+++ b/sys/src/cmd/5c/swt.c
@@ -523,6 +523,10 @@
 	case D_PSR:
 		break;
 
+	case D_REGREG:
+		*bp++ = a->offset;
+		break;
+
 	case D_OREG:
 	case D_CONST:
 	case D_BRANCH:
--- a/sys/src/cmd/5c/txt.c
+++ b/sys/src/cmd/5c/txt.c
@@ -297,6 +297,7 @@
 				j = REGRET+1;
 			if(reg[j] == 0 && resvreg[j] == 0) {
 				i = j;
+				lasti = (i - REGRET) % 3;
 				goto out;
 			}
 			j++;
@@ -306,7 +307,6 @@
 
 	case TFLOAT:
 	case TDOUBLE:
-	case TVLONG:
 		if(o != Z && o->op == OREGISTER) {
 			i = o->reg;
 			if(i >= NREG && i < NREG+NFREG)
@@ -324,6 +324,32 @@
 		}
 		diag(tn, "out of float registers");
 		goto err;
+
+	case TVLONG:
+	case TUVLONG:
+		n->op = OREGPAIR;
+		n->complex = 0;	/* already in registers */
+		n->addable = 11;
+		n->type = tn->type;
+		n->lineno = nearln;
+		n->left = alloc(sizeof(Node));
+		n->right = alloc(sizeof(Node));
+		if(o != Z && o->op == OREGPAIR) {
+			regalloc(n->left, &regnode, o->left);
+			regalloc(n->right, &regnode, o->right);
+		} else {
+			regalloc(n->left, &regnode, Z);
+			regalloc(n->right, &regnode, Z);
+		}
+		if(n->left->reg > n->right->reg){
+			j = n->left->reg;
+			n->left->reg = n->right->reg;
+			n->right->reg = j;
+		}
+		n->right->type = types[TULONG];
+		if(tn->type->etype == TUVLONG)
+			n->left->type = types[TULONG];
+		return;
 	}
 	diag(tn, "unknown type in regalloc: %T", tn->type);
 err:
@@ -331,9 +357,6 @@
 	return;
 out:
 	reg[i]++;
-	lasti++;
-	if(lasti >= 5)
-		lasti = 0;
 	nodreg(n, tn, i);
 }
 
@@ -352,6 +375,11 @@
 {
 	int i;
 
+	if(n->op == OREGPAIR) {
+		regfree(n->left);
+		regfree(n->right);
+		return;
+	}
 	i = 0;
 	if(n->op != OREGISTER && n->op != OINDREG)
 		goto err;
@@ -604,11 +632,30 @@
 			a = AMOVHU;
 			break;
 		}
-		if(typechlp[ft] && typeilp[tt])
-			regalloc(&nod, t, t);
-		else
-			regalloc(&nod, f, t);
-		gins(a, f, &nod);
+		if(typev[ft]) {
+			if(typev[tt]) {
+				nod1 = *f;
+				regalloc(&nod, f, t);
+				if(f->op == OINDREG && f->xoffset == 0 && nod.left->reg < nod.right->reg) {
+					gmovm(&nod1, nodconst((1<<nod.left->reg)|(1<<nod.right->reg)), 0);
+				} else {
+					/* low order first, because its value will be used first */
+					gins(AMOVW, &nod1, nod.left);
+					nod1.xoffset += SZ_LONG;
+					gins(AMOVW, &nod1, nod.right);
+				}
+			} else {
+				/* assumed not float or double */
+				regalloc(&nod, &regnode, t);
+				gins(AMOVW, f, &nod);
+			}
+		} else {
+			if(typechlp[ft] && typeilp[tt])
+				regalloc(&nod, t, t);
+			else
+				regalloc(&nod, f, t);
+			gins(a, f, &nod);
+		}
 		gmove(&nod, t);
 		regfree(&nod);
 		return;
@@ -639,7 +686,6 @@
 		case TFLOAT:
 			a = AMOVF;
 			break;
-		case TVLONG:
 		case TDOUBLE:
 			a = AMOVD;
 			break;
@@ -649,7 +695,17 @@
 		else
 			regalloc(&nod, t, Z);
 		gmove(f, &nod);
-		gins(a, &nod, t);
+		if(typev[tt]) {
+			nod1 = *t;
+			if(t->op == OINDREG && t->xoffset == 0 && nod.left->reg < nod.right->reg){
+				gmovm(nodconst((1<<nod.left->reg)|(1<<nod.right->reg)), &nod1, 0);
+			} else {
+				gins(a, nod.left, &nod1);
+				nod1.xoffset += SZ_LONG;
+				gins(a, nod.right, &nod1);
+			}
+		} else
+			gins(a, &nod, t);
 		regfree(&nod);
 		return;
 	}
@@ -660,7 +716,6 @@
 	a = AGOK;
 	switch(ft) {
 	case TDOUBLE:
-	case TVLONG:
 	case TFLOAT:
 		switch(tt) {
 		case TDOUBLE:
@@ -876,6 +931,15 @@
 			break;
 		}
 		break;
+	case TVLONG:
+	case TUVLONG:
+		switch(tt) {
+		case TVLONG:
+		case TUVLONG:
+			a = AMOVW;
+			break;
+		}
+		break;
 	}
 	if(a == AGOK)
 		diag(Z, "bad opcode in gmove %T -> %T", f->type, t->type);
@@ -882,7 +946,13 @@
 	if(a == AMOVW || a == AMOVF || a == AMOVD)
 	if(samaddr(f, t))
 		return;
-	gins(a, f, t);
+	if(typev[ft]) {
+		if(f->op != OREGPAIR || t->op != OREGPAIR)
+			diag(Z, "bad vlong in gmove (%O->%O)", f->op, t->op);
+		gins(a, f->left, t->left);
+		gins(a, f->right, t->right);
+	} else
+		gins(a, f, t);
 }
 
 void
@@ -1002,6 +1072,12 @@
 		a = ASLL;
 		break;
 
+	case OROL:
+		assert(f1->op == OCONST);
+		f1->vconst = 32-f1->vconst;
+		a = AROR;
+		break;
+
 	case OFUNC:
 		a = ABL;
 		break;
@@ -1151,6 +1227,9 @@
 		if(f->reg != t->reg)
 			break;
 		return 1;
+
+	case OREGPAIR:
+		return samaddr(f->left, t->left) && samaddr(f->right, t->right);
 	}
 	return 0;
 }