shithub: riscv

Download patch

ref: 8968426327c93f47ff0d8123941b7d1d2077d6c7
parent: 9616f6187202900fa8d86d197d5b7bc56f83609b
author: aiju <devnull@localhost>
date: Thu Aug 24 04:06:41 EDT 2017

vmx(1): memory map improvements, x86 simulator for MMIO

--- a/sys/src/cmd/vmx/dat.h
+++ b/sys/src/cmd/vmx/dat.h
@@ -1,3 +1,7 @@
+typedef char s8int;
+typedef short s16int;
+typedef int s32int;
+
 typedef struct PCIDev PCIDev;
 typedef struct PCICap PCICap;
 typedef struct PCIBar PCIBar;
@@ -33,6 +37,7 @@
 #define R13 "r13"
 #define R14 "r14"
 #define R15 "r15"
+#define RFLAGS "flags"
 
 enum {
 	MMIORD = 0,
@@ -44,8 +49,13 @@
 	uintptr start, end;
 	enum {
 		REGALLOC = 1, /* allocate memory for region */
-		REGRO = 2, /* read-only */
+		REGR = 2, /* can read */
+		REGW = 4, /* can write */
+		REGX = 8, /* can execute */
 		
+		REGRWX = REGR|REGW|REGX,
+		REGRX = REGR|REGX,
+		
 		/* E820 types, 0 == omitted from memory map */
 		REGFREE = 1<<8, /* report to OS as free */
 		REGRES = 2<<8, /* report to OS as reserved */
@@ -54,6 +64,7 @@
 	uvlong segoff;
 	void *v, *ve;
 	Region *next;
+	int (*mmio)(uintptr, void *, int, int);
 };
 
 extern Region *mmap;
@@ -132,6 +143,7 @@
 	ACCR,
 	ACCW,
 	ACCX,
+	ACCSAFE = 0x100, /* don't post exceptions on fault */
 };
 
 /* used to speed up consecutive x86access calls */
@@ -139,5 +151,7 @@
 struct TLB {
 	int asz, seg, acc;
 	uintptr start, end;
+	uintptr pabase;
+	Region *reg;
 	uchar *base;
 };
--- a/sys/src/cmd/vmx/exith.c
+++ b/sys/src/cmd/vmx/exith.c
@@ -16,12 +16,15 @@
 	u32int ilen, iinfo;
 };
 
-static char *x86reg[16] = {
+char *x86reg[16] = {
 	RAX, RCX, RDX, RBX,
 	RSP, RBP, RSI, RDI,
 	R8, R9, R10, R11,
 	R12, R13, R14, R15
 };
+char *x86segreg[8] = {
+	"cs", "ds", "es", "fs", "gs", "ss",
+};
 
 static void
 skipinstr(ExitInfo *ei)
@@ -29,44 +32,6 @@
 	rset(RPC, rget(RPC) + ei->ilen);
 }
 
-static int
-stepmmio(uvlong pa, uvlong *val, int size, ExitInfo *ei)
-{
-	extern uchar *tmp;
-	extern uvlong tmpoff;
-	void *targ;
-	uvlong pc;
-	char buf[ERRMAX];
-	extern int getexit;
-	
-	memset(tmp, 0, BY2PG);
-	targ = tmp + (pa & 0xfff);
-	switch(size){
-	case 1: *(u8int*)targ = *val; break;
-	case 2: *(u16int*)targ = *val; break;
-	case 4: *(u32int*)targ = *val; break;
-	case 8: *(u64int*)targ = *val; break;
-	}
-	pc = rget(RPC);
-	rcflush(0);
-	if(ctl("step -map %#ullx vm %#ullx", pa & ~0xfff, tmpoff) < 0){
-		rerrstr(buf, sizeof(buf));
-		if(strcmp(buf, "step failed") == 0){
-			vmerror("vmx step failure (old pc=%#ullx, new pc=%#ullx, cause=%#q)", pc, rget(RPC), ei->raw);
-			getexit++;
-			return -1;
-		}
-		sysfatal("ctl(stepmmio): %r");
-	}
-	switch(size){
-	case 1: *val = *(u8int*)targ; break;
-	case 2: *val = *(u16int*)targ; break;
-	case 4: *val = *(u32int*)targ; break;
-	case 8: *val = *(u64int*)targ; break;
-	}
-	return 0;
-}
-
 static void
 iohandler(ExitInfo *ei)
 {
@@ -130,15 +95,6 @@
 		rsetsz(RSI, addr, asz);
 }
 
-typedef struct MemHandler MemHandler;
-struct MemHandler {
-	uvlong lo, hi;
-	uvlong (*f)(int, uvlong, uvlong);
-};
-
-MemHandler memh[32];
-int nmemh;
-
 static uvlong
 defaultmmio(int op, uvlong addr, uvlong val)
 {
@@ -156,36 +112,8 @@
 static void
 eptfault(ExitInfo *ei)
 {
-	MemHandler *h;
-	static MemHandler def = {.f defaultmmio};
-	int size;
-	uvlong val;
-	
-	for(h = memh; h < memh + nmemh; h++)
-		if(ei->pa >= h->lo && ei->pa <= h->hi)
-			break;
-	if(h == memh + nmemh)
-		h = &def;
-	size = 8;
-	if((ei->qual & 5) != 0){
-		val = h->f(MMIORD, ei->pa, 0);
-		stepmmio(ei->pa, &val, size, ei);
-	}else{
-		val = h->f(MMIOWRP, ei->pa, 0);
-		if(stepmmio(ei->pa, &val, size, ei) < 0)
-			return;
-		h->f(MMIOWR, ei->pa, val);
-	}
-}
-
-void
-registermmio(uvlong lo, uvlong hi, uvlong (*f)(int, uvlong, uvlong))
-{
-	assert(nmemh < nelem(memh));
-	memh[nmemh].lo = lo;
-	memh[nmemh].hi = hi;
-	memh[nmemh].f = f;
-	nmemh++;
+	if(x86step() > 0)
+		skipinstr(ei);
 }
 
 typedef struct CPUID CPUID;
--- a/sys/src/cmd/vmx/fns.h
+++ b/sys/src/cmd/vmx/fns.h
@@ -23,6 +23,7 @@
 PCIBar *mkpcibar(PCIDev *, u8int, u32int, u32int, void *, void *);
 PCICap *mkpcicap(PCIDev *, u8int, u32int (*)(PCICap *, u8int), void(*)(PCICap *, u8int, u32int, u32int));
 u32int allocbdf(void);
+Region *regptr(u64int);
 void *gptr(u64int, u64int);
 void *gend(void *);
 uintptr gpa(void *);
@@ -50,3 +51,4 @@
 uintptr vmemwrite(void *, uintptr, uintptr);
 int x86access(int, uintptr, int, uvlong*, int, int, TLB *);
 u32int io(int, u16int, u32int, int);
+int x86step(void);
--- a/sys/src/cmd/vmx/ide.c
+++ b/sys/src/cmd/vmx/ide.c
@@ -372,6 +372,12 @@
 	case 0x30: case 0x31: /* write (pio) */
 		idegoio(d, 1);
 		break;
+	case 0x40: case 0x41: /* read verify */
+		while(--d->cnt != 0)
+			ideincaddr(d);
+		d->stat = IDEDRDY|IDEDSC;
+		ideirq(d, 1);
+		break;
 	case 0x90: /* diagnostics */
 		d = (d - ide & ~1) + ide;
 		d[0].err = 0;
--- a/sys/src/cmd/vmx/vmx.c
+++ b/sys/src/cmd/vmx/vmx.c
@@ -6,7 +6,7 @@
 #include "fns.h"
 
 Region *mmap;
-int ctlfd, regsfd, waitfd;
+int ctlfd, regsfd, mapfd, waitfd;
 Channel *waitch, *sleepch, *notifch;
 enum { MSEC = 1000*1000, MinSleep = MSEC, SleeperPoll = 2000*MSEC } ;
 int getexit, state;
@@ -58,15 +58,28 @@
 	return rc;
 }
 
+void
+modregion(Region *r)
+{
+	if(r->segname == nil){
+		if(fprint(mapfd, "--- wb %#ullx %#ullx\n", r->start, r->end) < 0)
+			vmerror("updating memory map: %r");
+	}else
+		if(fprint(mapfd, "%c%c%c wb %#ullx %#ullx %s %#ullx\n",
+			(r->type & REGR) != 0 ? 'r' : '-',
+			(r->type & REGW) != 0 ? 'w' : '-',
+			(r->type & REGX) != 0 ? 'x' : '-',
+			r->start, r->end, r->segname, r->segoff) < 0)
+			vmerror("updating memory map: %r");
+}
+
 static void
 vmxsetup(void)
 {
-	static int fd;
+	int fd;
 	static char buf[128];
 	Region *r;
-	uvlong start, end, off;
-	char *name;
-	int rc, type;
+	int rc;
 	
 	fd = open("#X/status", OREAD);
 	if(fd < 0) sysfatal("open: %r");
@@ -85,29 +98,10 @@
 	regsfd = open("#X/regs", ORDWR);
 	if(regsfd < 0) sysfatal("open: %r");
 	
-	fd = open("#X/map", OWRITE|OTRUNC);
-	if(fd < 0) sysfatal("open: %r");
-	for(r = mmap; r != nil; ){
-		if(r->segname == nil){
-			r = r->next;
-			continue;
-		}
-		start = r->start;
-		end = r->end;
-		name = r->segname;
-		off = r->segoff;
-		type = r->type;
-		while(r = r->next, r != nil){
-			if(r->segname == nil)
-				continue;
-			if(r->start != end || r->segoff != off + end - start || ((r->type ^ type) & REGRO) != 0)
-				break;
-			end = r->end;
-		}
-		if(fprint(fd, "r%cx wb %#ullx %#ullx %s %#ullx\n", (type & REGRO) != 0 ? '-' : 'w', start, end, name, off) < 0)
-			sysfatal("writing memory map: %r");
-	}
-	close(fd);
+	mapfd = open("#X/map", OWRITE|OTRUNC);
+	if(mapfd < 0) sysfatal("open: %r");
+	for(r = mmap; r != nil; r = r->next)
+		modregion(r);
 	
 	waitfd = open("#X/wait", OREAD);
 	if(waitfd < 0) sysfatal("open: %r");
@@ -273,6 +267,17 @@
 	return r;
 }
 
+Region *
+regptr(u64int addr)
+{
+	Region *r;
+
+	for(r = mmap; r != nil; r = r->next)
+		if(addr >= r->start && addr < r->end)
+			return r;
+	return nil;
+}
+
 void *
 gptr(u64int addr, u64int len)
 {
@@ -317,8 +322,8 @@
 	return (u8int *) v + gavail(v);
 }
 
-void *tmp;
-uvlong tmpoff;
+void *tmp, *vgamem;
+uvlong tmpoff, vgamemoff;
 
 static void
 mksegment(char *sn)
@@ -329,7 +334,8 @@
 	char buf[256];
 	u8int *gmem, *p;
 
-	sz = BY2PG;
+	sz = BY2PG; /* temporary page */
+	sz += 256*1024; /* vga */
 	for(r = mmap; r != nil; r = r->next){
 		if((r->type & REGALLOC) == 0)
 			continue;
@@ -362,6 +368,12 @@
 		p += r->end - r->start;
 		r->ve = p;
 	}
+	vgamem = p;
+	vgamemoff = p - gmem;
+	regptr(0xa0000)->segoff = vgamemoff;
+	regptr(0xa0000)->v = vgamem;
+	p += 256*1024;
+	regptr(0xa0000)->ve = p;
 	tmp = p;
 	tmpoff = p - gmem;
 }
@@ -638,18 +650,18 @@
 	cmdlinev = argv + 1;
 	
 	if(gmemsz < 1<<20) sysfatal("640 KB of RAM is not enough for everyone");
-	mkregion(0, 0xa0000, REGALLOC|REGFREE);
+	mkregion(0, 0xa0000, REGALLOC|REGFREE|REGRWX);
 	mkregion(0xa0000, 0xc0000, REGALLOC);
-	mkregion(0xc0000, 0x100000, REGALLOC|REGRES);
+	mkregion(0xc0000, 0x100000, REGALLOC|REGRES|REGRWX);
 	if(fbsz != 0 && fbaddr < gmemsz){
-		mkregion(0x100000, fbaddr, REGALLOC|REGFREE);
-		mkregion(fbaddr + fbsz, gmemsz, REGALLOC|REGFREE);
+		mkregion(0x100000, fbaddr, REGALLOC|REGFREE|REGRWX);
+		mkregion(fbaddr + fbsz, gmemsz, REGALLOC|REGFREE|REGRWX);
 	}else
-		mkregion(0x100000, gmemsz, REGALLOC|REGFREE);
+		mkregion(0x100000, gmemsz, REGALLOC|REGFREE|REGRWX);
 	if(fbsz != 0){
 		if(fbaddr < 1<<20) sysfatal("framebuffer must not be within first 1 MB");
 		if(fbaddr != (u32int) fbaddr || (u32int)(fbaddr+fbsz) < fbaddr) sysfatal("framebuffer must be within first 4 GB");
-		mkregion(fbaddr, fbaddr+fbsz, REGALLOC);
+		mkregion(fbaddr, fbaddr+fbsz, REGALLOC|REGRWX);
 	}
 	mksegment("vm");
 	vmxsetup();
--- a/sys/src/cmd/vmx/x86.c
+++ b/sys/src/cmd/vmx/x86.c
@@ -1,6 +1,8 @@
 #include <u.h>
 #include <libc.h>
 #include <thread.h>
+#include <bio.h>
+#include <mach.h>
 #include "dat.h"
 #include "fns.h"
 #include "x86.h"
@@ -156,6 +158,7 @@
 	static char *baser[] = {"csbase", "dsbase", "esbase", "fsbase", "gsbase", "ssbase"};
 	static char *limitr[] = {"cslimit", "dslimit", "eslimit", "fslimit", "gslimit", "sslimit"};
 	static char *permr[] = {"csperm", "dsperm", "esperm", "fsperm", "gsperm", "ssperm"};
+	uvlong tval;
 	u32int limit, perm;
 	uintptr addr, base, szmax;
 	int pperm, wp, i;
@@ -162,6 +165,7 @@
 	uintptr pa[8], pav;
 	uintptr l;
 	uchar *ptr;
+	Region *r;
 
 	switch(asz){
 	case 2: addr0 = (u16int)addr0; break;
@@ -171,10 +175,12 @@
 		vmerror("invalid asz=%d in x86access", asz);
 		assert(0);
 	}
-	assert(seg < SEGMAX && (uint)acc <= ACCX);
+	assert(seg < SEGMAX && (u8int)acc <= ACCX);
 	addr = addr0;
-	if(tlb != nil && tlb->asz == asz && tlb->seg == seg && tlb->acc == acc && addr >= tlb->start && addr + sz >= addr && addr + sz < tlb->end){
+	if(tlb != nil && tlb->asz == asz && tlb->seg == seg && tlb->acc == (u8int)acc && addr >= tlb->start && addr + sz >= addr && addr + sz < tlb->end){
 		ptr = tlb->base + addr;
+		pa[0] = tlb->pabase + addr;
+		r = tlb->reg;
 		goto fast;
 	}
 	if(sizeof(uintptr) == 8 && asz == 8){
@@ -182,8 +188,10 @@
 			addr += rget(baser[seg]);
 		if((u16int)((addr >> 48) + 1) > 1){
 		gpf:
-			vmdebug("gpf");
-			postexc("#gp", 0);
+			if((acc & ACCSAFE) == 0){
+				vmdebug("gpf");
+				postexc("#gp", 0);
+			}
 			return -1;
 		}
 		if((vlong)addr >= 0)
@@ -200,8 +208,10 @@
 		}else{
 			if((u64int)addr + sz - 1 >= limit){
 			limfault:
-				vmdebug("limit fault");
-				postexc(seg == SEGSS ? "#ss" : "#gp", 0);
+				if((acc & ACCSAFE) == 0){
+					vmdebug("limit fault");
+					postexc(seg == SEGSS ? "#ss" : "#gp", 0);
+				}
 				return -1;
 			}
 			szmax = limit - addr + 1;
@@ -208,7 +218,7 @@
 		}
 		if((perm & 0x10080) != 0x80)
 			goto gpf;
-		switch(acc){
+		switch((u8int)acc){
 		case ACCR: if((perm & 0xa) == 8) goto gpf; break;
 		case ACCW: if((perm & 0xa) != 2) goto gpf; break;
 		case ACCX: if((perm & 8) == 0) goto gpf; break;
@@ -222,12 +232,14 @@
 		l = translator()(addr+i, &pav, &pperm);
 		if(l == 0){
 		pf:
-			vmdebug("page fault @ %#p", addr+i);
-			postexc("#pf", pperm & 1 | (acc == ACCW) << 1 | (cpl == 3) << 2 | (acc == ACCX) << 4);
-			rset("cr2", addr+i);
+			if((acc & ACCSAFE) == 0){
+				vmdebug("page fault @ %#p", addr+i);
+				postexc("#pf", pperm & 1 | ((u8int)acc == ACCW) << 1 | (cpl == 3) << 2 | ((u8int)acc == ACCX) << 4);
+				rset("cr2", addr+i);
+			}
 			return -1;
 		}
-		if((cpl == 3 || wp) && acc == ACCW && (pperm & 2) == 0)
+		if((cpl == 3 || wp) && (u8int)acc == ACCW && (pperm & 2) == 0)
 			goto pf;
 		if(cpl == 3 && (pperm & 4) == 0)
 			goto pf;
@@ -236,25 +248,31 @@
 			pa[i++] = pav++;
 	}
 	if(szmax >= sz){
-		ptr = gptr(pa[0], sz);
-		if(ptr == nil) goto slow;
+		r = regptr(pa[0]);
+		if(r == nil || pa[0]+sz > r->end) goto slow;
+		ptr = (uchar*)r->v + (pa[0] - r->start);
 		if(tlb != nil){
 			l = gavail(ptr);
 			if(l < szmax) szmax = l;
 			tlb->asz = asz;
 			tlb->seg = seg;
-			tlb->acc = acc;
+			tlb->acc = (u8int)acc;
 			tlb->start = addr0;
 			tlb->end = addr0 + szmax;
+			tlb->reg = r;
 			tlb->base = ptr - addr0;
+			tlb->pabase = pa[0] - addr0;
 		}
 	fast:
-		if(acc == ACCW)
+		if(r->mmio != nil)
+			r->mmio(pa[0], val, sz, (u8int)acc == ACCW);
+		else if(acc == ACCW)
 			switch(sz){
 			case 1: PUT8(ptr, 0, *val); break;
 			case 2: PUT16(ptr, 0, *val); break;
 			case 4: PUT32(ptr, 0, *val); break;
 			case 8: PUT64(ptr, 0, *val); break;
+			default: goto slow;
 			}
 		else
 			switch(sz){
@@ -262,6 +280,7 @@
 			case 2: *val = GET16(ptr, 0); break;
 			case 4: *val = GET32(ptr, 0); break;
 			case 8: *val = GET64(ptr, 0); break;
+			default: goto slow;
 			}
 	}else{
 	slow:
@@ -268,14 +287,804 @@
 		if(acc != ACCW)
 			*val = 0;
 		for(i = 0; i < sz; i++){
-			ptr = gptr(pa[i], 1);
-			if(ptr == nil)
+			r = regptr(pa[i]);
+			if(r == nil)
 				vmerror("x86access: access to unmapped address %#p", pa[i]);
-			else if(acc == ACCW)
-				*ptr = GET8(val, i);
+			else if(acc == ACCW){
+				tval = GET8(val, i);
+				if(r->mmio != nil)
+					r->mmio(pa[i], &tval, 1, 1);
+				else
+					PUT8(r->v, pa[i] - r->start, tval);
+			}else{
+				if(r->mmio != nil)
+					r->mmio(pa[i], &tval, 1, 0);
+				else
+					tval = GET8(r->v, pa[i] - r->start);
+				PUT8(val, i, tval);
+			}	
+		}
+	}
+	return 0;
+}
+
+enum {
+	ONOPE,	OADC,	OADD,	OAND,	OASZ,	OCALL,	OCMP,	OCMPS,	ODEC,
+	OENTER,	OEX,	OIMUL,	OINC,	OINS,	OLEAVE,	OLOCK,	OLODS,	OMOV,	OMOVS,
+	OOR,	OOSZ,	OOUTS,	OPOP,	OPOPA,	OPOPF,	OPUSH,	OPUSHA,	OPUSHF,
+	OREP,	OREPNE,	ORET,	OSBB,	OSCAS,	OSEG,	OSTOS,	OSUB,
+	OTEST,	OXCHG,	OXLAT,	OXOR,	OROL,	OROR,	ORCL,	ORCR,
+	OSHL,	OSHR,	OSAR,	ONOT,	ONEG,	ODIV,	OIDIV,	OMUL,
+	OJMP,
+};
+
+static char *onames[] = {
+	[ONOPE]"ONOPE", [OADC]"OADC", [OADD]"OADD", [OAND]"OAND", [OASZ]"OASZ", [OCALL]"OCALL", [OCMP]"OCMP", [OCMPS]"OCMPS", [ODEC]"ODEC",
+	[OENTER]"OENTER", [OIMUL]"OIMUL", [OINC]"OINC", [OINS]"OINS", [OLEAVE]"OLEAVE", [OLOCK]"OLOCK", [OLODS]"OLODS", [OMOV]"OMOV", [OMOVS]"OMOVS",
+	[OOR]"OOR", [OOSZ]"OOSZ", [OOUTS]"OOUTS", [OPOP]"OPOP", [OPOPA]"OPOPA", [OPOPF]"OPOPF", [OPUSH]"OPUSH", [OPUSHA]"OPUSHA", [OPUSHF]"OPUSHF",
+	[OREP]"OREP", [OREPNE]"OREPNE", [ORET]"ORET", [OSBB]"OSBB", [OSCAS]"OSCAS", [OSEG]"OSEG", [OSTOS]"OSTOS", [OSUB]"OSUB",
+	[OTEST]"OTEST", [OXCHG]"OXCHG", [OXLAT]"OXLAT", [OXOR]"OXOR", [OEX]"OEX", [OROL]"OROL", [OROR]"OROR", [ORCL]"ORCL", [ORCR]"ORCR",
+	[OSHL]"OSHL", [OSHR]"OSHR", [OSAR]"OSAR", [ONOT]"ONOT", [ONEG]"ONEG", [ODIV]"ODIV", [OIDIV]"OIDIV", [OMUL]"OMUL", [OJMP]"OJMP"
+};
+#define enumconv(x,buf,tab) ((x)<nelem(tab)?(tab)[x]:(sprint(buf,"%d",(x)),buf))
+
+/*
+	size fields:
+	0 b byte
+	1 v short/long/vlong (16-bit,32-bit,64-bit mode)
+	2 z short/long/long
+	3 w short
+*/
+
+enum {
+	ANOPE,
+	A1 = 1, /* constant 1 */
+	
+	/* general purpose registers with size+1 in high nibble */
+	AGPRb = 0x10,
+	AGPRv = 0x20,
+	AGPRz = 0x30,
+	AAXb = 0x10,	ACXb = 0x11,	ADXb = 0x12,	ABXb = 0x13,	ASPb = 0x14,	ABPb = 0x15,	ASIb = 0x16,	ADIb = 0x17,
+	AAXv = 0x20,	ACXv = 0x21,	ADXv = 0x22,	ABXv = 0x23,	ASPv = 0x24,	ABPv = 0x25,	ASIv = 0x26,	ADIv = 0x27,
+	AAXz = 0x30,	ACXz = 0x31,	ADXz = 0x32,	ABXz = 0x33,	ASPz = 0x34,	ABPz = 0x35,	ASIz = 0x36,	ADIz = 0x37,
+	
+	ASEG = 0x40,	ACS = 0x40,	ADS = 0x41,	AES = 0x42,	AFS = 0x43,	AGS = 0x44,	ASS = 0x45,
+	
+	/* below has valid size in lower nibble */
+	AGOTSZ = 0x50,
+	AOb = 0x50,	AOv = 0x51,
+	
+	AIMM = 0x70,
+	AIb = 0x70,		AIz = 0x72,
+	/* below involves modrm */
+	AMODRM = 0x80,
+	AEb = 0x80,	AEv = 0x81,
+	AGb = 0x90,	AGv = 0x91,
+	ASw = 0xA3,
+};
+
+static char *anames[] = {
+	[ANOPE]"ANOPE",	[AEb]"AEb",	[AEv]"AEv",	[AGb]"AGb",	[AGv]"AGv",	[AIb]"AIb",	[AIz]"AIz",
+	[ASw]"ASw",	[AOb]"AOb",	[AOv]"AOv",
+	[ACS]"ACS",	[ADS]"ADS",	[AES]"AES",	[AFS]"AFS",	[AGS]"AGS",	[ASS]"ASS",
+	[AAXb]"AAXb",	[ABXb]"ABXb",	[ACXb]"ACXb",	[ADXb]"ADXb",	[ABPb]"ABPb",	[ASPb]"ASPb",	[ASIb]"ASIb",	[ADIb]"ADIb",
+	[AAXv]"AAXv",	[ABXv]"ABXv",	[ACXv]"ACXv",	[ADXv]"ADXv",	[ABPv]"ABPv",	[ASPv]"ASPv",	[ASIv]"ASIv",	[ADIv]"ADIv",
+	[AAXz]"AAXz",	[ABXz]"ABXz",	[ACXz]"ACXz",	[ADXz]"ADXz",	[ABPz]"ABPz",	[ASPz]"ASPz",	[ASIz]"ASIz",	[ADIz]"ADIz",
+};
+/* typically b is dst and c is src */
+#define O(a,b,c) ((a)|(b)<<8|(c)<<16)
+
+/* we only care about operations that can go to memory */
+static u32int optab[256] = {
+/*0*/	O(OADD,AEb,AGb), O(OADD,AEv,AGv), O(OADD,AGb,AEb), O(OADD,AGv,AEv), O(OADD,AAXb,AIb), O(OADD,AAXz,AIz), O(OPUSH,AES,0), O(OPOP,AES,0),
+	O(OOR,AEb,AGb), O(OOR,AEv,AGv), O(OOR,AGb,AEb), O(OOR,AGv,AEv), O(OOR,AAXb,AIb), O(OOR,AAXz,AIz), O(OPUSH,ACS,0), 0,
+	
+/*1*/	O(OADC,AEb,AGb), O(OADC,AEv,AGv), O(OADC,AGb,AEb), O(OADC,AGv,AEv), O(OADC,AAXb,AIb), O(OADC,AAXz,AIz), O(OPUSH,ASS,0), O(OPOP,ASS,0),
+	O(OSBB,AEb,AGb), O(OSBB,AEv,AGv), O(OSBB,AGb,AEb), O(OSBB,AGv,AEv), O(OSBB,AAXb,AIb), O(OSBB,AAXz,AIz), O(OPUSH,ADS,0), O(OPOP,ADS,0),
+	
+/*2*/	O(OAND,AEb,AGb), O(OAND,AEv,AGv), O(OAND,AGb,AEb), O(OAND,AGv,AEv), O(OAND,AAXb,AIb), O(OAND,AAXz,AIz), O(OSEG,AES,0), 0/*DAA*/,
+	O(OSUB,AEb,AGb), O(OSUB,AEv,AGv), O(OSUB,AGb,AEb), O(OSUB,AGv,AEv), O(OSUB,AAXb,AIb), O(OSUB,AAXz,AIz), O(OSEG,ACS,0), 0/*DAS*/,
+
+/*3*/	O(OXOR,AEb,AGb), O(OXOR,AEv,AGv), O(OXOR,AGb,AEb), O(OXOR,AGv,AEv), O(OXOR,AAXb,AIb), O(OXOR,AAXz,AIz), O(OSEG,ASS,0), 0/*AAA*/,
+	O(OCMP,AEb,AGb), O(OCMP,AEv,AGv), O(OCMP,AGb,AEb), O(OCMP,AGv,AEv), O(OCMP,AAXb,AIb), O(OCMP,AAXz,AIz), O(OSEG,ADS,0), 0/*AAS*/,
+	
+/*4*/	0, 0, 0, 0, 0, 0, 0, 0, /* rex prefixes */
+	0, 0, 0, 0, 0, 0, 0, 0,
+	
+/*5*/	O(OPUSH,AAXv,0), O(OPUSH,ACXv,0), O(OPUSH,ADXv,0), O(OPUSH,ABXv,0), O(OPUSH,ASPv,0), O(OPUSH,ABPv,0), O(OPUSH,ASIv,0), O(OPUSH,ADIv,0),
+	O(OPOP,AAXv,0), O(OPOP,ACXv,0), O(OPOP,ADXv,0), O(OPOP,ABXv,0), O(OPOP,ASPv,0), O(OPOP,ABPv,0), O(OPOP,ASIv,0), O(OPOP,ADIv,0),
+	
+/*6*/	OPUSHA, OPOPA, 0/*BOUND*/, 0/*ARPL*/, O(OSEG,AFS,0), O(OSEG,AGS,0), OOSZ, OASZ,
+	O(OPUSH,AIz,0), O(OIMUL,AGv,AIz), O(OPUSH,AIb,0), O(OIMUL,AGv,AIb), OINS, OINS, OOUTS, OOUTS,
+	
+/*7*/	0, 0, 0, 0, 0, 0, 0, 0, /* jumps */
+	0, 0, 0, 0, 0, 0, 0, 0,
+	
+/*8*/	OEX, OEX, OEX, OEX, O(OTEST,AEb,AGb), O(OTEST,AEv,AGv), O(OXCHG,AEb,AGb), O(OXCHG,AEv,AGv),
+	O(OMOV,AEb,AGb), O(OMOV,AEv,AGv), O(OMOV,AGb,AEb), O(OMOV,AGv,AEv), O(OMOV,AEv,ASw), 0/*LEA*/, O(OMOV,ASw,AEv), OEX,
+
+/*9*/	0, 0, 0, 0, 0, 0, 0, 0, /* register exchange */
+	0/*CBW*/, 0/*CWD*/, OCALL, 0/*FWAIT*/, OPUSHF, OPOPF, 0/*OSAHF*/, 0/*OLAHF*/,
+	
+/*A*/	O(OMOV,AAXb,AOb), O(OMOV,AAXv,AOv), O(OMOV,AOb,AAXb), O(OMOV,AOv,AAXv), OMOVS, OMOVS, OCMPS, OCMPS,
+	0, 0/*TEST Reg,Imm*/, OSTOS, OSTOS, OLODS, OLODS, OSCAS, OSCAS,
+
+/*B*/	0, 0, 0, 0, 0, 0, 0, 0, /* move immediate to register */
+	0, 0, 0, 0, 0, 0, 0, 0,
+
+/*C*/	OEX, OEX, ORET, ORET, 0/*LES*/, 0/*LDS*/, OEX, OEX,
+	OENTER, OLEAVE, ORET, ORET, 0/*INT3*/, 0/*INTn*/, 0/*INTO*/, 0/*IRET*/,
+	
+/*D*/	OEX, OEX, OEX, OEX, 0/*AAM*/, 0/*AAD*/, 0, OXLAT,
+	0, 0, 0, 0, 0, 0, 0, 0, /* fpu */
+
+/*E*/	0, 0, 0/*LOOPx*/, 0/*JrCXZ*/, 0, 0/*IN*/, 0, 0/*OUT*/,
+	OCALL, OCALL, 0, 0/*JMP*/, 0, 0/*IN*/, 0, 0/*OUT*/,
+
+/*F*/	OLOCK, 0, OREPNE, OREP, 0/*HALT*/, 0/*CMC*/, OEX, OEX,
+	0/*CLC*/, 0/*STC*/, 0/*CLI*/, 0/*STI*/, 0/*CLD*/, 0/*STD*/, OEX, OEX,
+};
+/* OEX tables (operations determined by modrm byte) */
+static u32int optab80[8] = {O(OADD,AEb,AIb), O(OOR,AEb,AIb), O(OADC,AEb,AIb), O(OSBB,AEb,AIb), O(OAND,AEb,AIb), O(OSUB,AEb,AIb), O(OXOR,AEb,AIb), O(OCMP,AEb,AIb)};
+static u32int optab81[8] = {O(OADD,AEv,AIz), O(OOR,AEv,AIz), O(OADC,AEv,AIz), O(OSBB,AEv,AIz), O(OAND,AEv,AIz), O(OSUB,AEv,AIz), O(OXOR,AEv,AIz), O(OCMP,AEv,AIz)};
+/* 0x82 is identical to 0x80 */
+static u32int optab83[8] = {O(OADD,AEv,AIb), O(OOR,AEv,AIb), O(OADC,AEv,AIb), O(OSBB,AEv,AIb), O(OAND,AEv,AIb), O(OSUB,AEv,AIb), O(OXOR,AEv,AIb), O(OCMP,AEv,AIb)};
+static u32int optab8F[8] = {O(OPOP,AEv,0)};
+static u32int optabC0[8] = {O(OROL,AEb,AIb), O(OROR,AEb,AIb), O(ORCL,AEb,AIb), O(ORCR,AEb,AIb), O(OSHL,AEb,AIb), O(OSHR,AEb,AIb), 0, O(OSAR,AEb,AIb)};
+static u32int optabC1[8] = {O(OROL,AEv,AIb), O(OROR,AEv,AIb), O(ORCL,AEv,AIb), O(ORCR,AEv,AIb), O(OSHL,AEv,AIb), O(OSHR,AEv,AIb), 0, O(OSAR,AEv,AIb)};
+static u32int optabD0[8] = {O(OROL,AEb,A1), O(OROR,AEb,A1), O(ORCL,AEb,A1), O(ORCR,AEb,A1), O(OSHL,AEb,A1), O(OSHR,AEb,A1), 0, O(OSAR,AEb,A1)};
+static u32int optabD1[8] = {O(OROL,AEv,A1), O(OROR,AEv,A1), O(ORCL,AEv,A1), O(ORCR,AEv,A1), O(OSHL,AEv,A1), O(OSHR,AEv,A1), 0, O(OSAR,AEv,A1)};
+static u32int optabD2[8] = {O(OROL,AEb,ACXb), O(OROR,AEb,ACXb), O(ORCL,AEb,ACXb), O(ORCR,AEb,ACXb), O(OSHL,AEb,ACXb), O(OSHR,AEb,ACXb), 0, O(OSAR,AEb,ACXb)};
+static u32int optabD3[8] = {O(OROL,AEv,ACXb), O(OROR,AEv,ACXb), O(ORCL,AEv,ACXb), O(ORCR,AEv,ACXb), O(OSHL,AEv,ACXb), O(OSHR,AEv,ACXb), 0, O(OSAR,AEv,ACXb)};
+static u32int optabC6[8] = {O(OMOV,AEb,AIb)};
+static u32int optabC7[8] = {O(OMOV,AEv,AIz)};
+static u32int optabF6[8] = {O(OTEST,AEb,AIb), 0, O(ONOT,AEb,0), O(ONEG,AEb,0), O(OMUL,AAXb,AEb), O(OIMUL,AAXb,0), O(ODIV,AAXb,0), O(OIDIV,AAXb,0)};
+static u32int optabF7[8] = {O(OTEST,AEv,AIz), 0, O(ONOT,AEv,0), O(ONEG,AEv,0), O(OMUL,AAXv,AEv), O(OIMUL,AAXv,0), O(ODIV,AAXv,0), O(OIDIV,AAXv,0)};
+static u32int optabFE[8] = {O(OINC,AEb,0),O(ODEC,AEb,0)};
+static u32int optabFF[8] = {O(OINC,AEv,0),O(ODEC,AEv,0),OCALL,OCALL,OJMP,OJMP,O(OPUSH,AEv,0),0};
+
+typedef struct Instr Instr;
+typedef struct Oper Oper;
+/* for registers we put the number in addr and add +0x10 for "high bytes" (AH etc) */
+struct Oper {
+	enum { OPNONE, OPREG, OPSEG, OPIMM, OPMEM } type;
+	uintptr addr;
+	int sz;
+	uvlong val;
+};
+struct Instr {
+	u8int bytes[16];
+	int nbytes;
+	u8int opcode; /* first byte after the prefixes */
+	u32int inf;
+	u8int modrm, sib;
+	vlong disp;
+	uvlong imm;
+	enum {
+		INSLOCK = 0x1,
+		INSREP = 0x2,
+		INSREPNE = 0x4,
+		INSOSZ = 0x8,
+		INSASZ = 0x10,
+		INSMODRM = 0x20,
+		INSSIB = 0x40,
+		INSDISP8 = 0x80,
+		INSDISP16 = 0x100,
+		INSDISP32 = 0x200,
+		INSDISP64 = 0x400,
+		INSIMM8 = 0x800,
+		INSIMM16 = 0x1000,
+		INSIMM32 = 0x2000,
+	/*	INSIMM64 = 0x4000, not yet */
+	} flags;
+	int seg;
+	u8int osz, asz;
+	Oper op[2];
+};
+
+struct Step {
+	uintptr pc, npc;
+	u8int mode;
+	TLB tlb;
+	Instr;
+} step;
+
+static int
+fetch8(int acc)
+{
+	uvlong v;
+	
+	if(step.nbytes >= sizeof(step.bytes)){
+		if((acc & ACCSAFE) == 0){
+			vmerror("x86step: instruction too long (pc=%#p)", step.pc);
+			postexc("#ud", NOERRC);
+		}
+		return -1;
+	}
+	if(x86access(SEGCS, step.npc, step.mode, &v, 1, ACCX|acc, &step.tlb) < 0){
+		vmerror("x86step: fault while trying to load %#p, shouldn't happen", step.pc);
+		return -1;
+	}
+	step.npc++;
+	step.bytes[step.nbytes++] = v;
+	return (u8int)v;
+}
+
+static int
+fetch16(int acc)
+{
+	int r0, r1;
+	
+	if(r0 = fetch8(acc), r0 < 0) return -1;
+	if(r1 = fetch8(acc), r1 < 0) return -1;
+	return r0 | r1 << 8;
+}
+
+static vlong
+fetch32(int acc)
+{
+	int r0, r1, r2, r3;
+	
+	if(r0 = fetch8(acc), r0 < 0) return -1;
+	if(r1 = fetch8(acc), r1 < 0) return -1;
+	if(r2 = fetch8(acc), r2 < 0) return -1;
+	if(r3 = fetch8(acc), r3 < 0) return -1;
+	return r0 | r1 << 8 | r2 << 16 | r3 << 24;
+}
+
+static int
+fetch64(int acc, uvlong *p)
+{
+	vlong r0, r1;
+	
+	if(r0 = fetch32(acc), r0 < 0) return -1;
+	if(r1 = fetch32(acc), r1 < 0) return -1;
+	*p = r0 | r1 << 32;
+	return 0;
+}
+
+static long
+machread(int, void *vb, long n, vlong soff)
+{
+	uvlong o;
+	
+	o = soff;
+	if(o < step.pc) return 0;
+	if(o >= step.pc+step.nbytes) return 0;
+	if(n > step.pc+step.nbytes-o)
+		n = step.pc+step.nbytes-o;
+	memmove(vb, step.bytes+(o-step.pc), n);
+	return n;
+}
+
+static void
+giveup(void)
+{
+	static Map *m;
+	char buf[128];
+	char *p, *e;
+	extern Machdata i386mach;
+	int i, rc;
+	
+	if(m == nil){
+		m = newmap(nil, 1);
+		setmap(m, -1, 0, -1, 0, "text");
+		m->seg[0].read = machread;
+	}
+	p = buf;
+	e = buf + sizeof(buf);
+	while(fetch8(ACCSAFE) >= 0)
+		;
+	if(rc = i386mach.das(m, step.pc, 0, buf, sizeof(buf)), rc >= 0){
+		p += strlen(buf);
+		p = seprint(p, e, " # ");
+	}else
+		rc = step.nbytes;
+	for(i = 0; i < rc; i++)
+		p = seprint(p, e, "%.2x ", step.bytes[i]);
+	vmerror("x86step: unimplemented instruction %s", buf);	
+}
+
+static int
+grab(void)
+{
+	int op;
+	int rc;
+	vlong vrc;
+	u32int inf;
+	u32int *tab;
+
+again:
+	op = fetch8(0);
+	if(op < 0) return -1;
+	inf = optab[op];
+	if(inf == 0){ giveup(); return -1; }
+	switch((u8int)inf){
+	case OLOCK: step.flags |= INSLOCK; goto again;
+	case OREP: step.flags |= INSREP; goto again;
+	case OREPNE: step.flags |= INSREPNE; goto again;
+	case OOSZ: step.flags |= INSOSZ; step.osz = step.osz == 2 ? 4 : 2; goto again;
+	case OASZ: step.flags |= INSASZ; step.asz = step.asz == 2 ? 4 : 2; goto again;
+	case OSEG: step.seg = inf >> 8; goto again;
+	}
+	step.opcode = op;
+	if((u8int)(inf >> 8) >= AMODRM || (u8int)(inf >> 16) >= AMODRM || inf == OEX){
+		rc = fetch8(0);
+		if(rc < 0) return -1;
+		step.modrm = rc;
+		step.flags |= INSMODRM;
+		if(step.asz != 2 && (step.modrm & 0x07) == 0x04 && step.modrm < 0xc0){
+			rc = fetch8(0); if(rc < 0) return -1;
+			step.sib = rc;
+			step.flags |= INSSIB;
+		}
+		switch(step.modrm >> 6){
+		case 1:
+			rc = fetch8(0); if(rc < 0) return -1;
+			step.disp = (s8int)rc;
+			step.flags |= INSDISP8;
+			break;
+		case 0:
+			if((step.modrm & 7) != (step.asz == 2) + 5 && (step.sib & 7) != 5)
+				break;
+			/* wet floor */
+		case 2:
+			if(step.asz == 2){
+				rc = fetch16(0); if(rc < 0) return -1;
+				step.disp = (s16int)rc;
+				step.flags |= INSDISP16;
+			}else{
+				vrc = fetch32(0); if(vrc < 0) return -1;
+				step.disp = (s32int)vrc;
+				step.flags |= INSDISP32;
+			}
+			break;
+		}
+	}
+	if(inf == OEX){
+		switch(op){
+		case 0x80: case 0x82: tab = optab80; break;
+		case 0x81: tab = optab81; break;
+		case 0x83: tab = optab83; break;
+		case 0x8f: tab = optab8F; break;
+		case 0xc0: tab = optabC0; break;
+		case 0xc1: tab = optabC1; break;
+		case 0xd0: tab = optabD0; break;
+		case 0xd1: tab = optabD1; break;
+		case 0xd2: tab = optabD2; break;
+		case 0xd3: tab = optabD3; break;
+		case 0xc6: tab = optabC6; break;
+		case 0xc7: tab = optabC7; break;
+		case 0xf6: tab = optabF6; break;
+		case 0xf7: tab = optabF7; break;
+		case 0xfe: tab = optabFE; break;
+		case 0xff: tab = optabFF; break;
+		default: tab = nil;
+		}
+		if(tab == nil || (inf = tab[step.modrm >> 3 & 7]) == 0){
+			giveup();
+			return -1;
+		}
+	}
+	if(((u8int)(inf >> 8) & 0xf0) == AIMM){
+		rc = inf >> 8 & 0xf;
+	imm:
+		switch(rc){
+		case 0:
+			rc = fetch8(0); if(rc < 0) return -1;
+			step.imm = rc;
+			step.flags |= INSIMM8;
+			break;
+		case 2:
+			switch(step.osz){
+			case 2:
+				rc = fetch16(0); if(rc < 0) return -1;
+				step.imm = rc;
+				step.flags |= INSIMM16;
+				break;
+			case 4:
+			case 8:
+				vrc = fetch32(0); if(vrc < 0) return -1;
+				step.imm = vrc;
+				step.flags |= INSIMM32;
+				break;
+			}
+			break;
+		default:
+			vmerror("x86step: grab: immediate size=%d, shouldn't happen", rc);
+			giveup();
+			return -1;
+		}
+	}else if((u8int)(inf >> 16 & 0xf0) == AIMM){
+		rc = inf >> 16 & 0xf;
+		goto imm;
+	}
+	if(((u8int)(inf >> 8) & 0xf0) == AOb || (u8int)(inf >> 16 & 0xf0) == AOb)
+		switch(step.asz){
+		case 2:
+			rc = fetch16(0); if(rc < 0) return -1;
+			step.disp = rc;
+			step.flags |= INSDISP16;
+			break;
+		case 4:
+			vrc = fetch32(0); if(vrc < 0) return -1;
+			step.disp = vrc;
+			step.flags |= INSDISP32;
+			break;
+		case 8:
+			if(fetch64(0, (uvlong *) &step.disp) < 0) return -1;
+			step.flags |= INSDISP64;
+			break;			
+		}
+	step.inf = inf;
+	return 0;
+}
+
+static void
+decreg(Oper *o, int n, int sz)
+{
+	o->type = OPREG;
+	o->sz = sz;
+	if(sz == 1 && n >= 4){
+		o->addr = n ^ 0x14;
+		o->val = (u8int)(rget(x86reg[n&3]) >> 8);
+	}else{
+		o->addr = n;
+		o->val = rgetsz(x86reg[n], sz);
+	}
+}
+
+static void
+decmodrm(Oper *o, int sz)
+{
+	u8int mod, m;
+	
+	mod = step.modrm >> 6;
+	m = step.modrm & 7;
+	if(mod == 3){
+		decreg(o, m, sz);
+		return;
+	}
+	o->type = OPMEM;
+	o->sz = sz;
+	if(step.asz == 2){
+		switch(m){
+		case 0: o->addr = rget(RBX) + rget(RSI); break;
+		case 1: o->addr = rget(RBX) + rget(RDI); break;
+		case 2: o->addr = rget(RBP) + rget(RSI); break;
+		case 3: o->addr = rget(RBX) + rget(RDI); break;
+		case 4: o->addr = rget(RSI); break;
+		case 5: o->addr = rget(RDI); break;
+		case 6: o->addr = mod == 0 ? 0 : rget(RBP); break;
+		case 7: o->addr = rget(RBX); break;
+		}
+		o->addr = (u16int)(o->addr + step.disp);
+		if(step.seg < 0)
+			if(m == 6 && mod != 0)
+				step.seg = SEGSS;
 			else
-				PUT8(val, i, *ptr);	
+				step.seg = SEGDS;
+		return;
+	}
+	if(m != 4){
+		if((step.modrm & 0xc7) == 5)
+			o->addr = 0;
+		else
+			o->addr = rget(x86reg[m]);
+		o->addr = (u32int)(o->addr + step.disp);
+		if(step.seg < 0)
+			if(m == 5 && mod != 0)
+				step.seg = SEGSS;
+			else
+				step.seg = SEGDS; 
+		return;
+	}
+	if((step.sib >> 3 & 7) != 4)
+		o->addr = rget(x86reg[step.sib >> 3 & 7]);
+	else
+		o->addr = 0;
+	o->addr <<= step.sib >> 6;
+	if((step.sib & 7) != 5 || mod != 0)
+		o->addr += rget(x86reg[step.sib & 7]);
+	o->addr = (u32int)(o->addr + step.disp);
+	if(step.seg < 0)
+		if((step.sib & 7) == 4 || (step.sib & 7) == 5 && mod != 0)
+			step.seg = SEGSS;
+		else
+			step.seg = SEGDS;
+}
+
+static int
+parseoper(void)
+{
+	int i;
+	u8int f;
+	Oper *o;
+	u8int sizes[4] = {1, step.osz, step.osz == 8 ? 4 : step.osz, 2};
+	
+	for(i = 0; i < 2; i++){
+		f = step.inf >> 8 * (i + 1);
+		o = &step.op[i];
+		switch(f & 0xf0){
+		case AGPRb:
+		case AGPRv:
+		case AGPRz:
+			o->type = OPREG;
+			o->addr = f & 0xf;
+			o->val = rget(x86reg[f & 0xf]);
+			o->sz = sizes[(f >> 4) - 1];
+			break;
+		case ASEG:
+			o->type = OPSEG;
+			o->addr = f & 0xf;
+			o->val = rget(x86segreg[f & 0xf]);
+			o->sz = 2;
+			break;
+		case AOb:
+			o->type = OPMEM;
+			o->addr = step.disp;
+			o->sz = sizes[f & 0xf];
+			if(step.seg < 0)
+				step.seg = SEGDS;
+			break;
+		case AIMM:
+			o->type = OPIMM;
+			o->val = step.imm;
+			o->sz = sizes[f & 0xf];
+			break;
+		case AEb:
+			decmodrm(o, sizes[f & 0xf]);
+			break;
+		case A1:
+			o->type = OPIMM;
+			o->val = 1;
+			o->sz = 1;
+			break;
+		case AGb:
+			decreg(o, step.modrm >> 3 & 7, sizes[f & 0xf]);
+			break;
 		}
 	}
 	return 0;
+}
+
+static int
+opwrite(Oper *o, uvlong v)
+{
+	char *n;
+	
+	switch(o->type){
+	case OPREG:
+		n = x86reg[o->addr & 0xf];
+		if((o->addr & 0x10) != 0)
+			rset(n, rget(n) & ~0xff00ULL | (u8int)v << 8);
+		else
+			rsetsz(n, v, o->sz);
+		return 0;
+	case OPMEM:
+		if(x86access(step.seg, o->addr, step.asz, &v, o->sz, ACCW, &step.tlb) < 0)
+			return -1;
+		return 0;
+	case OPSEG:
+		giveup();
+		return -1;
+	default:
+		vmerror("x86step: opwrite: unhandled o->type==%d, shouldn't happen", o->type);
+		giveup();
+		return -1;
+	}
+}
+
+static int
+opread(Oper *o, uvlong *v)
+{
+	switch(o->type){
+	case OPREG:
+	case OPSEG:
+	case OPIMM:
+		*v = o->val;
+		return 0;
+	case OPMEM:
+		if(x86access(step.seg, o->addr, step.asz, v, o->sz, ACCR, &step.tlb) < 0)
+			return -1;
+		return 0;
+	default:
+		vmerror("x86step: opread: unhandled o->type==%d, shouldn't happen", o->type);
+		giveup();
+		return -1;
+	}
+}
+
+static vlong
+alu(int op, vlong a, int asz, vlong b, int bsz, uvlong *flags)
+{
+	vlong c;
+	vlong amsk, sbit;
+	u32int flout;
+	u8int p;
+	
+	flout = 0;
+	amsk = (-1ULL)>>64-8*asz;
+	sbit = 1<<8*asz-1;
+	b = b << 64 - 8*bsz >> 64 - 8*bsz;
+	switch(op){
+	case OADD:
+	case OADC:
+		c = (a & amsk) + (b & amsk);
+		if(op == OADC) c += *flags & 1;
+		if((~(a ^ b) & (a ^ c) & 1<<sbit) != 0) flout |= OF;
+		if((a & 0xf) + (b & 0xf) >= 0x10) flout |= AF;
+		goto addsub;
+	case OSUB:
+	case OSBB:
+	case OCMP:
+		c = (a & amsk) - (b & amsk);
+		if(op == OSBB) c -= *flags & 1;
+		if(((a ^ b) & (a ^ c) & 1<<sbit) != 0) flout |= OF;
+		if((a & 0xf) < (b & 0xf)) flout |= AF;
+	addsub:
+		if((c & ~amsk) != 0) flout |= CF;
+	logic:
+		if((c & 1<<sbit) != 0) flout |= SF;
+		if((c & amsk) == 0) flout |= ZF;
+		p = c;
+		if(0x69966996 << (p ^ p >> 4) < 0) flout |= PF;
+		break;
+	case OAND: c = a & b; goto logic;
+	case OOR: c = a | b; goto logic;
+	case OXOR: c = a ^ b; goto logic;
+	default:
+		vmerror("x86step: alu: unhandled case op==%d, shouldn't happen", op);
+		return 0;
+	}
+	*flags ^= (*flags ^ flout) & (CF|SF|ZF|OF|AF|PF);
+	return c & amsk;
+}
+
+static int
+opcstring(void)
+{
+	int sz, srcseg, rc, inc;
+	uvlong srcaddr, dstaddr;
+	uvlong v;
+	uvlong cx;
+	char buf[16];
+	
+	if((step.opcode & 1) != 0)
+		sz = step.osz;
+	else
+		sz = 1;
+	srcseg = step.seg >= 0 ? step.seg : SEGDS;
+	srcaddr = rget(RSI);
+	dstaddr = rget(RDI);
+	if((step.flags & INSREP) != 0)
+		cx = rgetsz(RCX, step.asz);
+	else
+		cx = 1;
+	if((rget(RFLAGS) & 0x400) != 0)
+		inc = -sz;
+	else
+		inc = sz;
+
+	rc = 1;
+	switch((u8int)step.inf){
+	case OLODS:
+		for(; cx > 0; cx--){
+			if(x86access(srcseg, srcaddr, step.asz, &v, sz, ACCR, &step.tlb) < 0){
+				rc = 0;
+				break;
+			}
+			rsetsz(RAX, v, sz);
+			srcaddr += inc;
+		}
+		break;
+	case OSTOS:
+		v = rget(RAX);
+		for(; cx > 0; cx--){
+			if(x86access(SEGES, dstaddr, step.asz, &v, sz, ACCW, &step.tlb) < 0){
+				rc = 0;
+				break;
+			}
+			dstaddr += inc;
+		}
+		break;
+	case OMOVS:
+		for(; cx > 0; cx--){
+			if(x86access(srcseg, srcaddr, step.asz, &v, sz, ACCR, &step.tlb) < 0 ||
+			   x86access(SEGES, dstaddr, step.asz, &v, sz, ACCW, &step.tlb) < 0){
+				rc = 0;
+				break;
+			}
+			srcaddr += inc;
+			dstaddr += inc;
+		}
+		break;
+	default:
+		vmerror("x86step: opcstring: unhandled case %s", enumconv((u8int)step.inf, buf, onames));
+		giveup();
+		return 0;
+	}
+	rsetsz(RSI, srcaddr, step.asz);
+	rsetsz(RDI, dstaddr, step.asz);
+
+	if((step.flags & (INSREP|INSREPNE)) != 0)
+		rsetsz(RCX, cx, step.asz);
+	return rc;
+}
+
+static int
+opcstack(void)
+{
+	uvlong val, sp;
+	int spsz;
+	
+	/* todo: get stack pointer size from stack segment */
+	spsz = step.mode;
+	sp = rgetsz(RSP, spsz);
+	switch((u8int)step.inf){
+	case OPUSH:
+		if(opread(&step.op[0], &val) < 0) return 0;
+		if(step.op[0].sz < step.osz && step.op[0].type != OPSEG)
+			val = (vlong)val << 64 - 8 * step.op[0].sz >> 64 - 8 * step.op[0].sz;
+		sp -= step.osz;
+		if(x86access(SEGSS, sp, spsz, &val, step.osz, ACCW, &step.tlb) < 0) return 0;
+		break;
+	case OPOP:
+		if(x86access(SEGSS, sp, spsz, &val, step.osz, ACCR, &step.tlb) < 0) return 0;
+		if(opwrite(&step.op[0], val) < 0) return 0;
+		sp += step.osz;
+		break;
+	default:
+		vmerror("x86step: stack: unhandled case op==%d, shouldn't happen", (u8int)step.inf);
+		return 0;
+	}
+	rsetsz(RSP, sp, spsz);
+	return 1;
+}
+
+int
+x86step(void)
+{
+	uvlong val, valb;
+	uvlong rflags;
+	char buf[16];
+	
+	memset(&step, 0, sizeof(step));
+	step.seg = -1;
+	step.pc = rget(RPC);
+	step.npc = step.pc;
+	step.mode = 4;
+	step.asz = step.osz = step.mode;
+	if(grab() < 0 || parseoper() < 0)
+		return 0;
+//	print("flags=%#ux modrm=%#ux sib=%#ux disp=%#ullx imm=%#ullx\n", step.flags, step.modrm, step.sib, step.disp, step.imm);
+//	print("op0: type=%#ux addr=%#ullx val=%#ullx sz=%d\n", , );
+//	print("op1: type=%#ux addr=%#ullx val=%#ullx sz=%d\n", step.op[1].type, step.op[1].addr, step.op[1].val, step.op[1].sz);
+	print("%#.*p %s (%#ux,%d,%#ullx,%#ullx) (%#ux,%d,%#ullx,%#ullx) si %#llux di %#llux\n", 2*step.mode, step.pc, enumconv((u8int)step.inf,buf,onames), step.op[0].type, step.op[0].sz, step.op[0].addr, step.op[0].val, step.op[1].type, step.op[1].sz, step.op[1].addr, step.op[1].val, rget(RSI), rget(RDI));
+	switch((u8int)step.inf){
+	case OMOV:
+		if((step.flags & (INSREP|INSREPNE|INSLOCK)) != 0) {giveup(); return 0;}
+		if(opread(&step.op[1], &val) < 0) return 0;
+		if(opwrite(&step.op[0], val) < 0) return 0;
+		return 1;
+	case OSTOS: case OLODS: case OMOVS:
+		if((step.flags & (INSREPNE|INSLOCK)) != 0) {giveup(); return 0;}
+		return opcstring();
+	case OADD: case OADC: case OSUB: case OSBB: case OCMP: case OAND: case OOR: case OXOR:
+		if((step.flags & (INSREP|INSREPNE)) != 0) {giveup(); return 0;}
+		if(opread(&step.op[0], &val) < 0) return 0;
+		if(opread(&step.op[1], &valb) < 0) return 0;
+		rflags = rget(RFLAGS);
+		val = alu((u8int)step.inf, val, step.op[0].sz, valb, step.op[1].sz, &rflags);
+		if((u8int)step.inf != OCMP && opwrite(&step.op[0], val) < 0) return 0;
+		rset(RFLAGS, rflags);
+		return 1;
+	case OPUSH: case OPOP:
+		if((step.flags & (INSLOCK|INSREPNE|INSLOCK)) != 0) {giveup(); return 0;}
+		return opcstack();
+	default:
+		vmerror("x86step: unhandled case %s", enumconv((u8int)step.inf, buf, onames));
+		giveup();
+		return 0;
+	}
 }
--- a/sys/src/cmd/vmx/x86.h
+++ b/sys/src/cmd/vmx/x86.h
@@ -27,3 +27,15 @@
 	
 	EferLme	= 1<<8,
 };
+
+extern char *x86reg[16];
+extern char *x86segreg[8];
+
+enum {
+	CF	= 1<<0,
+	PF	= 1<<2,
+	AF	= 1<<4,
+	ZF	= 1<<6,
+	SF	= 1<<7,
+	OF	= 1<<11,
+};