shithub: riscv

--- /dev/null

+++ b/sys/src/9/xen/archxen.c

@@ -1,0 +1,115 @@

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "io.h"

+static int

+identify(void)

+{

+	m->havepge = 0;

+	return 0;

+}

+static void

+intrinit(void)

+{

+	ulong machs;

+	int i, ncpu;

+	char *cp;

+	char node[32];

+	char buf[32];

+	if((cp = getconf("*nomp")) != nil && strtol(cp, 0, 0) != 0)

+		return;

+	ncpu = MAX_VIRT_CPUS;

+	if (cp = getconf("*ncpu")) {

+		ncpu = strtol(cp, 0, 0);

+		if (ncpu < 1)

+			ncpu = 1;

+	}

+	machs = 1;

+	for (i = 1; i < ncpu; i++) {

+		sprint(node, "cpu/%d/availability", i);

+		if (xenstore_read(node, buf, sizeof buf) <= 0)

+			break;

+		print("%s: %s\n", node, buf);

+		if (strcmp(buf, "online") == 0) {

+			machs |= 1<<i;

+			conf.nmach++;

+		}

+	}

+	if (conf.nmach > 1) {

+		print("Sorry, SMP not supported yet: 1 of %lud CPUs startd\n", conf.nmach);

+		conf.nmach = 1;

+	}

+}

+static void

+shutdown(void)

+{

+	HYPERVISOR_shutdown(1);

+}

+int xenintrenable(Vctl *v);

+int xenintrvecno(int irq);

+int xenintrdisable(int irq);

+void	xentimerenable(void);

+uvlong	xentimerread(uvlong*);

+void	xentimerset(uvlong);

+PCArch archxen = {

+.id=		"Xen",

+.ident=		identify,

+.reset=		shutdown,

+.intrinit=	intrinit,

+.intrenable=	xenintrenable,

+.intrvecno=	xenintrvecno,

+.intrdisable=	xenintrdisable,

+.clockenable=	xentimerenable,

+.fastclock=	xentimerread,

+.timerset=	xentimerset,

+};

+/*

+ * Placeholders to satisfy external references in generic devarch.c

+ */

+ulong	getcr4(void)	{ return 0; }

+void	putcr4(ulong)	{}

+int	inb(int)	{ return 0; }

+ushort	ins(int)	{ return 0; }

+ulong	inl(int)	{ return 0; }

+void	outb(int, int)	{}

+void	outs(int, ushort)	{}

+void	outl(int, ulong)	{}

+void	i8042reset(void)	{}

+void	i8253enable(void)	{}

+void	i8253init(void)	{}

+void	i8253link(void)	{}

+uvlong	i8253read(uvlong*)	{ return 0; }

+void	i8253timerset(uvlong)	{}

+int	i8259disable(int)	{ return 0; }

+int	i8259enable(Vctl*)	{ return 0; }

+void	i8259init(void)	{}

+int	i8259isr(int)	{ return 0; }

+void	i8259on(void)	{}

+void	i8259off(void)	{}

+int	i8259vecno(int)	{ return 0; }

+int	mtrrprint(char*, long) { return 0; }

+int	mtrr(uvlong, uvlong, char *) { return 0; }

+/*

+ * XXX until fpsave is debugged

+ */

+void

+fpssesave(FPsave* f)

+{

+	fpx87save(f);

+}

+void

+fpsserestore(FPsave* f)

+{

+	fpx87restore(f);

+}

--- /dev/null

+++ b/sys/src/9/xen/cppx

@@ -1,0 +1,41 @@

+#!/bin/rc

+awk '

+function qq(s) {

+	gsub("\"", "£", s)

+	return "\"@" s "\""

+}

+/^#include/	{ next

+}

+/^#define.*\\$/ {

+	save[n++] = $0

+	print qq($0 "\\")

+	next

+}

+(n > 0) && /\\$/ {

+	save[n++] = $0

+	print qq($0 "\\")

+	next

+}

+(n > 0)  {

+	save[n++] = $0

+	print qq($0)

+	for (i = 0; i < n; i++) print save[i]

+	n = 0

+	next

+}

+/^# *((define)|(error)).*[^\\]*$/ {

+	print qq($0)

+	print

+	next

+}

+/^# *undef.*[^\\]*$/ {

+	print

+	print qq($0)

+	next

+}

+	{ print

+}

+' $* |

+cpp -P |

+sed -e 's/£/"/g' -e 's/^"@(.*\\)\\"$/\1/' -e 's/^"@(.*)"$/\1/'

--- /dev/null

+++ b/sys/src/9/xen/dat.h

@@ -1,0 +1,53 @@

+#include "../pc/dat.h"

+typedef unsigned char uint8_t;

+typedef unsigned char uint8;

+typedef unsigned short uint16_t;

+typedef unsigned long uint32_t;

+typedef unsigned long long uint64_t;

+typedef char int8_t;

+typedef short int16_t;

+typedef long int32_t;

+typedef long long int64_t;

+#define __attribute__(x)

+enum {

+	EINVAL,

+	EACCES,

+	EEXIST,

+	EISDIR,

+	ENOENT,

+	ENOMEM,

+	ENOSPC,

+	EIO,

+	ENOTEMPTY,

+	ENOSYS,

+	EROFS,

+	EBUSY,

+	EAGAIN,

+	EISCONN,

+};

+#include "xendat.h"

+#undef mk_unsigned_long

+#define mk_unsigned_long(x) ((unsigned long)(x))

+#ifndef set_xen_guest_handle

+#define set_xen_guest_handle(hnd, val)	hnd = val

+#endif

+extern ulong hypervisor_virt_start;

+extern ulong *patomfn, *matopfn;

+extern start_info_t *xenstart;

+extern ulong xentop;

+extern shared_info_t *HYPERVISOR_shared_info;

+/*

+ * Fake kmap

+ * XXX is this still viable?

+ */

+#undef VA

+#define	VA(k)		((ulong)(k))

+#define	kmap(p)		(KMap*)((p)->pa|KZERO)

+#define	kunmap(k)

--- /dev/null

+++ b/sys/src/9/xen/devrtc.c

@@ -1,0 +1,96 @@

+/*

+ * Xen wall clock

+ */

+#include	"u.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+#include	"../port/error.h"

+enum{

+	Qdir = 0,

+	Qrtc,

+};

+Dirtab rtcdir[]={

+	".",	{Qdir, 0, QTDIR},	0,	0555,

+	"rtc",		{Qrtc, 0},	0,	0664,

+};

+static long

+rtcread(Chan *c, void *a, long n, vlong offset)

+{

+	if(c->qid.type & QTDIR)

+		return devdirread(c, a, n, rtcdir, nelem(rtcdir), devgen);

+	switch((ulong)c->qid.path){

+	case Qrtc:

+		return readnum((ulong)offset, a, n, xenwallclock(), 12);

+	}

+	error(Ebadarg);

+	return 0;

+}

+static long

+rtcwrite(Chan*c, void*, long n, vlong)

+{

+	switch((ulong)c->qid.path){

+	case Qrtc:

+		return n;

+	}

+	error(Eperm);

+	return 0;

+}

+static Chan*

+rtcattach(char* spec)

+{

+	return devattach('r', spec);

+}

+static Walkqid*

+rtcwalk(Chan* c, Chan *nc, char** name, int nname)

+{

+	return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen);

+}

+static int

+rtcstat(Chan* c, uchar* dp, int n)

+{

+	return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen);

+}

+static Chan*

+rtcopen(Chan* c, int omode)

+{

+	return devopen(c, omode, rtcdir, nelem(rtcdir), devgen);

+}

+static void

+rtcclose(Chan*)

+{

+}

+Dev rtcdevtab = {

+	'r',

+	"rtc",

+	devreset,

+	devinit,

+	devshutdown,

+	rtcattach,

+	rtcwalk,

+	rtcstat,

+	rtcopen,

+	devcreate,

+	rtcclose,

+	rtcread,

+	devbread,

+	rtcwrite,

+	devbwrite,

+	devremove,

+	devwstat,

+};

--- /dev/null

+++ b/sys/src/9/xen/devxenstore.c

@@ -1,0 +1,590 @@

+/*

+ * Driver for xenstore - database shared between domains, used by xenbus to

+ * communicate configuration info.

+ */

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "../port/error.h"

+#include "../pc/io.h"

+#define LOG(a)

+typedef struct Aux Aux;

+enum {

+	Qtopdir,

+	Qctl,

+	Qwatch,

+	WRITING = 0,

+	READING,

+	WATCHING,

+	MAXIO = 8*1024,

+};

+Dirtab xsdir[] = {

+	".",	{Qtopdir, 0, QTDIR},	0,	0555,

+	"xenstore",	{Qctl, 0},	0,	0660,

+	"xenwatch", {Qwatch, 0}, 0, 0440,

+};

+struct {

+	struct xenstore_domain_interface	*intf;

+	struct xsd_sockmsg	hdr;

+	int	hdrvalid;

+	int	evtchn;

+	int	nextreqid;

+	Aux *rhead;

+	Aux *kernelaux;

+	Queue *evq;

+	Rendez wr;

+	Rendez rr;

+	QLock;

+	Lock rlock;

+} xenstore;

+struct Aux {

+	QLock;

+	Rendez qr;

+	Queue *ioq;

+	Aux	*next;

+	int state;

+	int	reqid;

+};

+static char Ephase[] = "phase error";

+static char Eproto[] = "protocol error";

+static char NodeShutdown[] = "control/shutdown";

+static void xenbusproc(void*);

+static int

+notfull(void*)

+{

+	struct xenstore_domain_interface *xs = xenstore.intf;

+	return (xs->req_prod-xs->req_cons) < XENSTORE_RING_SIZE;

+}

+static int

+notempty(void*)

+{

+	struct xenstore_domain_interface *xs = xenstore.intf;

+	return xs->rsp_prod > xs->rsp_cons;

+}

+static int

+ishead(void* a)

+{

+	return xenstore.rhead == a;

+}

+static void

+xsintr(Ureg*, void*)

+{

+	LOG(dprint("xsintr\n");)

+	wakeup(&xenstore.rr);

+	wakeup(&xenstore.wr);

+}

+static void

+xwrite(Queue *q, char *buf, int len)

+{

+	struct xenstore_domain_interface *xs;

+	int m, n;

+	XENSTORE_RING_IDX idx;

+	xs = xenstore.intf;

+	while (len > 0) {

+		n = XENSTORE_RING_SIZE - (xs->req_prod - xs->req_cons);

+		if (n == 0) {

+			xenchannotify(xenstore.evtchn);

+			sleep(&xenstore.wr, notfull, 0);

+			continue;

+		}

+		if (n > len)

+			n = len;

+		idx = MASK_XENSTORE_IDX(xs->req_prod);

+		m = XENSTORE_RING_SIZE - idx;

+		if (m > n)

+			m = n;

+		if (q)

+			qread(q, xs->req+idx, m);

+		else

+			memmove(xs->req+idx, buf, m);

+		if (m < n) {

+			if (q)

+				qread(q, xs->req, n-m);

+			else

+				memmove(xs->req, buf+m, n-m);

+		}

+		coherence();

+		xs->req_prod += n;

+		xenchannotify(xenstore.evtchn);

+		if (buf)

+			buf += n;

+		len -= n;

+	}

+}

+static void

+xread(Queue *q, char *buf, int len)

+{

+	struct xenstore_domain_interface *xs = xenstore.intf;

+	int n, m;

+	XENSTORE_RING_IDX idx;

+	for (n = len; n > 0; n -= m) {

+		while (xs->rsp_prod == xs->rsp_cons) {

+			xenchannotify(xenstore.evtchn);

+			if (up == 0)

+				HYPERVISOR_yield();

+			else

+				sleep(&xenstore.rr, notempty, 0);

+		}

+		idx = MASK_XENSTORE_IDX(xs->rsp_cons);

+		m = xs->rsp_prod - xs->rsp_cons;

+		if (m > n)

+			m = n;

+		if (m > XENSTORE_RING_SIZE - idx)

+			m = XENSTORE_RING_SIZE - idx;

+		if (q)

+			qwrite(q, xs->rsp+idx, m);

+		else if (buf) {

+			memmove(buf, xs->rsp+idx, m);

+			buf += m;

+		}

+		coherence();

+		xs->rsp_cons += m;

+	}

+	xenchannotify(xenstore.evtchn);

+}

+static void

+xsrpc(Aux *aux)

+{

+	Queue *q;

+	Aux *l, *r, **lp;

+	struct xsd_sockmsg hdr;

+	long n;

+	q = aux->ioq;

+	if (aux->state == WATCHING)

+		aux->reqid = 0;

+	else {

+		/* get the request header and check validity */

+		if (qlen(q) < sizeof hdr)

+			error(Eproto);

+		qread(q, &hdr, sizeof hdr);

+		n = hdr.len;

+		if (qlen(q) != n)

+			error(Eproto);

+		qlock(&xenstore);

+		/* generate a unique request id */

+		aux->reqid = ++xenstore.nextreqid;

+		hdr.req_id = aux->reqid;

+		hdr.tx_id = 0;

+		/* send the request */

+		xwrite(0, (char*)&hdr, sizeof hdr);

+		xwrite(q, 0, n);

+		qunlock(&xenstore);

+	}

+	/* join list of requests awaiting response */

+	ilock(&xenstore.rlock);

+	if (xenstore.rhead == 0) {

+		aux->next = 0;

+		xenstore.rhead = aux;

+	} else {

+		aux->next = xenstore.rhead->next;

+		xenstore.rhead->next = aux;

+	}

+	iunlock(&xenstore.rlock);

+	/* loop until matching response header has been received */

+	if (waserror()) {

+		ilock(&xenstore.rlock);

+		for (lp = &xenstore.rhead; *lp && *lp != aux; lp = &(*lp)->next)

+			;

+		if (*lp != 0) {

+			*lp = (*lp)->next;

+			if (lp == &xenstore.rhead && *lp)

+				wakeup(&(*lp)->qr);

+		}

+		iunlock(&xenstore.rlock);

+		nexterror();

+	}

+	for (;;) {

+		/* wait until this request reaches head of queue */

+		if (xenstore.rhead != aux)

+			sleep(&aux->qr, ishead, aux);

+		/* wait until a response header (maybe for another request) has been read */

+		if (!xenstore.hdrvalid) {

+			xread(0, (char*)&xenstore.hdr, sizeof xenstore.hdr);

+			xenstore.hdrvalid = 1;

+		}

+		if (xenstore.hdr.req_id == aux->reqid)

+			break;

+		/* response was for a different request: move matching request to head of queue */

+		ilock(&xenstore.rlock);

+		for (l = xenstore.rhead; r = l->next; l = r)

+			if (xenstore.hdr.req_id == r->reqid) {

+				l->next = r->next;

+				r->next = xenstore.rhead;

+				xenstore.rhead = r;

+				break;

+			}

+		iunlock(&xenstore.rlock);

+		if (r) {

+			/* wake the matching request */

+			wakeup(&r->qr);

+		} else {

+			/* response without a request: should be a watch event */

+			xenstore.hdrvalid = 0;

+			xread(0, 0, xenstore.hdr.len);

+			continue;

+		}

+	}

+	/* queue the response header, and data if any, for the caller to read */

+	qwrite(q, &xenstore.hdr, sizeof xenstore.hdr);

+	xenstore.hdrvalid = 0;

+	/* read the data, if any */

+	if (xenstore.hdr.len > 0)

+		xread(q, 0, xenstore.hdr.len);

+	/* remove finished request and wake the next request on the queue */

+	ilock(&xenstore.rlock);

+	xenstore.rhead = aux->next;

+	iunlock(&xenstore.rlock);

+	poperror();

+	if (xenstore.rhead != 0)

+		wakeup(&xenstore.rhead->qr);

+}

+static void

+xsreset()

+{

+	LOG(dprint("xsreset\n");)

+}

+static void

+xsinit()

+{

+	intrenable(xenstore.evtchn, xsintr, 0, BUSUNKNOWN, "Xen store");

+	kproc("xenbus", xenbusproc, 0);

+}

+static Chan*

+xsattach(char *spec)

+{

+	return devattach('x', spec);

+}

+static Walkqid*

+xswalk(Chan *c, Chan *nc, char **name, int nname)

+{

+	return devwalk(c, nc, name, nname, xsdir, nelem(xsdir), devgen);

+}

+static int

+xsstat(Chan *c, uchar *dp, int n)

+{

+	return devstat(c, dp, n, xsdir, nelem(xsdir), devgen);

+}

+static Aux*

+auxalloc(int initstate)

+{

+	Aux *aux;

+	Queue *q;

+	aux = mallocz(sizeof(Aux), 1);

+	if (aux == 0)

+		return 0;

+	q = qopen(MAXIO, 0, 0, 0);

+	if (q == 0) {

+		free(aux);

+		return 0;

+	}

+	qnoblock(q, 1);

+	aux->state = initstate;

+	aux->ioq = q;

+	return aux;

+}

+static Chan*

+xsopen(Chan *c, int omode)

+{

+	Aux *aux;

+	int state;

+	c = devopen(c, omode, xsdir, nelem(xsdir), devgen);

+	state = WRITING;

+	switch ((ulong)c->qid.path) {

+	case Qwatch:

+		state = WATCHING;

+	/* fall through */

+	case Qctl:

+		aux = auxalloc(state);

+		if (aux == 0) {

+			c->flag &= ~COPEN;

+			error(Enomem);

+		}

+		c->aux = aux;

+		break;

+	}

+	return c;

+}

+static void

+xsclose(Chan* c)

+{

+	Aux *aux;

+	if ((c->flag&COPEN) == 0)

+		return;

+	switch ((ulong)c->qid.path) {

+	case Qwatch:

+	case Qctl:

+		if ((aux = (Aux*)c->aux) != 0) {

+			qfree(aux->ioq);

+			free(aux);

+			c->aux = 0;

+		}

+		break;

+	}

+}

+static long

+xsread(Chan *c, void *a, long n, vlong off)

+{

+	Aux *aux;

+	Queue *q;

+	long nr;

+	USED(off);

+	if (c->qid.type == QTDIR)

+		return devdirread(c, a, n, xsdir, nelem(xsdir), devgen);

+	aux = (Aux*)c->aux;

+	qlock(aux);

+	if (waserror()) {

+		qunlock(aux);

+		nexterror();

+	}

+	q = aux->ioq;

+	switch (aux->state) {

+	case WRITING:

+		if (qlen(q) == 0)

+			error(Ephase);

+		xsrpc(aux);

+		aux->state = READING;

+		break;

+	case WATCHING:

+		if (qlen(q) == 0)

+			xsrpc(aux);

+		break;

+	}

+	if (!qcanread(q))

+		nr = 0;

+	else

+		nr = qread(q, a, n);

+	qunlock(aux);

+	poperror();

+	return nr;

+}

+static long

+xswrite(Chan *c, void *a, long n, vlong off)

+{

+	Aux *aux;

+	Queue *q;

+	long nr;

+	if (c->qid.type == QTDIR)

+		error(Eperm);

+	if ((ulong)c->qid.path == Qwatch)

+		error(Ebadusefd);

+	aux = (Aux*)c->aux;

+	qlock(aux);

+	if (waserror()) {

+		qunlock(aux);

+		nexterror();

+	}

+	q = aux->ioq;

+	if ((off == 0 || aux->state == READING) && qlen(q) > 0)

+		qflush(q);

+	aux->state = WRITING;

+	nr = qwrite(aux->ioq, a, n);

+	qunlock(aux);

+	poperror();

+	return nr;

+}

+Dev xenstoredevtab = {

+	'x',

+	"xenstore",

+	xsreset,

+	xsinit,

+	devshutdown,

+	xsattach,

+	xswalk,

+	xsstat,

+	xsopen,

+	devcreate,

+	xsclose,

+	xsread,

+	devbread,

+	xswrite,

+	devbwrite,

+	devremove,

+	devwstat,

+};

+static char*

+xscmd(Aux *aux, char *buf, int cmd, char *s, char *val)

+{

+	struct xsd_sockmsg *msg;

+	char *arg;

+	long n;

+	msg = (struct xsd_sockmsg*)buf;

+	arg = buf + sizeof(*msg);

+	msg->type = cmd;

+	msg->len = strlen(s)+1;

+	if (val) {

+		msg->len += strlen(val);

+		if (cmd == XS_WATCH)

+			msg->len++;		/* stupid special case */

+	}

+	strcpy(arg, s);

+	if (val)

+		strcpy(arg+strlen(s)+1, val);

+	n = sizeof(*msg)+msg->len;

+	if (up == 0) {

+		msg->req_id = 1;

+		msg->tx_id = 0;

+		xwrite(0, buf, n);

+		xread(0, buf, sizeof(*msg));

+		xread(0, arg, msg->len);

+	} else {

+		qlock(aux);

+		if (qlen(aux->ioq) > 0)

+			qflush(aux->ioq);

+		qwrite(aux->ioq, buf, n);

+		xsrpc(aux);

+		qread(aux->ioq, buf, sizeof(*msg));

+		LOG(dprint("xs: type %d req_id %d len %d\n", msg->type, msg->req_id, msg->len);)

+		// XXX buffer overflow

+		qread(aux->ioq, arg, msg->len);

+		qunlock(aux);

+	}

+	arg[msg->len] = 0;

+	if (msg->type == XS_ERROR) {

+		return 0;

+	}

+	return arg;

+}

+static void

+intfinit(void)

+{

+	if (xenstore.intf == 0) {

+		xenstore.intf = (struct xenstore_domain_interface*)mmumapframe(XENBUS, xenstart->store_mfn);

+		xenstore.evtchn = xenstart->store_evtchn;

+		xenstore.kernelaux = auxalloc(WRITING);

+	}

+}

+void

+xenstore_write(char *s, char *val)

+{

+	char buf[512];

+	intfinit();

+	xscmd(xenstore.kernelaux, buf, XS_WRITE, s, val);

+}

+int

+xenstore_read(char *s, char *val, int len)

+{

+	char buf[512];

+	char *p;

+	intfinit();

+	p = xscmd(xenstore.kernelaux, buf, XS_READ, s, nil);

+	if (p == 0)

+		return -1;

+	strecpy(val, val+len, p);

+	return 1;

+}

+void

+xenstore_setd(char *dir, char *node, int value)

+{

+	int off;

+	char buf[12];

+	off = strlen(dir);

+	sprint(dir+off, "%s", node);

+	sprint(buf, "%ud", value);

+	xenstore_write(dir, buf);

+	dir[off] = 0;

+}

+int

+xenstore_gets(char *dir, char *node, char *buf, int buflen)

+{

+	int off;

+	int n;

+	off = strlen(dir);

+	sprint(dir+off, "%s", node);

+	n = xenstore_read(dir, buf, buflen);

+	dir[off] = 0;

+	return n;

+}

+static void

+xenbusproc(void*)

+{

+	Chan *c;

+	Aux *aux;

+	char *p;

+	struct xsd_sockmsg msg;

+	char buf[512];

+	int n, m;

+	c = namec("#x/xenstore", Aopen, ORDWR, 0);

+	aux = (Aux*)c->aux;

+	c = namec("#x/xenwatch", Aopen, OREAD, 0);

+	xscmd(aux, buf, XS_WATCH, NodeShutdown, "$");

+	for (;;) {

+		xsread(c, &msg, sizeof(msg), 0);

+		for (n = msg.len; n > 0; n -= m)

+			m = xsread(c, buf, msg.len, sizeof(msg));

+		buf[msg.len] = 0;

+		if (strcmp(buf, NodeShutdown) != 0)

+			continue;

+		p = xscmd(aux, buf, XS_READ, NodeShutdown, nil);

+		if (p == nil)

+			continue;

+		if (strcmp(p, "poweroff") == 0)

+			reboot(nil, nil, 0);

+		else if (strcmp(p, "reboot") == 0)

+			exit(0);

+		else {

+			print("xenbus: %s=%s\n", NodeShutdown, p);

+			xscmd(aux, buf, XS_WRITE, NodeShutdown, "");

+		}

+	}

+}

--- /dev/null

+++ b/sys/src/9/xen/dpart.c

@@ -1,0 +1,248 @@

+#include <u.h>

+#include <libc.h>

+#include <disk.h>

+typedef void Fs;

+#include "/sys/src/boot/pc/dosfs.h"

+enum {

+	Npart = 32

+};

+#define	GSHORT(p)		(((p)[1]<<8)|(p)[0])

+#define	GLONG(p)			((GSHORT(p+2)<<16)|GSHORT(p))

+int

+readdisk(Disk *d, void *buf, vlong off, int len)

+{

+	if(seek(d->fd, off, 0) == -1

+	|| read(d->fd, buf, len) != len)

+		return -1;

+	return 0;

+}

+void

+addpart(Disk *d, char *name, ulong s, ulong e)

+{

+	print("%s: part %s %lud %lud\n", d->prefix, name, s, e);

+	fprint(d->ctlfd, "part %s %lud %lud\n", name, s, e);

+}

+int

+isdos(int t)

+{

+	return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X;

+}

+int

+isextend(int t)

+{

+	return t==EXTEND || t==EXTHUGE || t==LEXTEND;

+}

+/* build a cdboot partition if there is an embedded boot floppy image */

+int

+cdpart(Disk *d)

+{

+	uchar buf[2048];

+	ulong a, n;

+	uchar *p;

+	if(readdisk(d, buf, 17*2048, 2048) == -1

+	|| strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0)

+		return 0;

+	p = buf + 0x47;

+	a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);

+	if(readdisk(d, buf, a*2048, 2048) == -1

+	|| memcmp((char*)buf, "\x01\x00\x00\x00", 4) != 0

+	|| memcmp((char*)buf+30, "\x55\xAA", 2) != 0

+	|| buf[0x20] != 0x88)

+		return 0;

+	p = buf+0x28;

+	a = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);

+	switch(buf[0x21]) {

+	case 1: n = 1200*1024; break;

+	case 2: n = 1440*1024; break;

+	case 3: n = 2880*1024; break;

+	default: return 0;

+	}

+	a = a * (uvlong)2048 / d->secsize;

+	n /= d->secsize;

+	addpart(d, "cdboot", a, a+n);

+	return 1;

+}

+int

+p9part(Disk *d, char *name, ulong pstart)

+{

+	char partbuf[512];

+	char *field[4], *line[Npart+1], *name2;

+	ulong start, end;

+	int i, n;

+	name2 = smprint("%s%s", d->prefix, name);

+	d = opendisk(name2, 1, 0);

+	if(!d) {

+		fprint(2, "%s: %r\n", name2);

+		free(name2);

+		return 0;

+	}

+	free(name2);

+	if(readdisk(d, partbuf, 512, sizeof partbuf) == -1)

+		return 0;

+	partbuf[sizeof partbuf - 1] = '\0';

+	if(strncmp(partbuf, "part ", 5) != 0

+	|| (n = getfields(partbuf, line, Npart+1, 0, "\n")) == 0)

+		return 0;

+	for(i = 0; i < n; i++) {

+		if(strncmp(line[i], "part ", 5) != 0)

+			break;

+		if(getfields(line[i], field, 4, 0, " ") != 4)

+			break;

+		start = strtoul(field[2], 0, 0);

+		end = strtoul(field[3], 0, 0);

+		if(start >= end)

+			break;

+		addpart(d, field[1], pstart+start, pstart+end);

+	}

+	return 0;

+}

+int

+mbrpart(Disk *d)

+{

+	uchar mbrbuf[512];

+	char name[10];

+	Dospart *dp;

+	ulong taboffset, start, end;

+	ulong firstxpart, nxtxpart;

+	int i, nplan9, havedos;

+#define readmbr()	\

+	if(readdisk(d, mbrbuf, (uvlong)taboffset*512, sizeof mbrbuf) == -1	\

+	|| mbrbuf[0x1FE] != 0x55 || mbrbuf[0x1FF] != 0xAA)	\

+		return 0

+	if(d->secsize > 512)

+		return 0;

+	dp = (Dospart*)&mbrbuf[0x1BE];

+	taboffset = 0;

+	if(1) {

+		/* get the MBR (allowing for DMDDO) */

+		readmbr();

+		for(i = 0; i < 4; i++) {

+			if(dp[i].type == DMDDO) {

+				taboffset = 63;

+				readmbr();

+				i = -1;		/* start over */

+			}

+		}

+	}

+	/*

+	 * Read the partitions, first from the MBR and then

+	 * from successive extended partition tables.

+	 */

+	nplan9 = 0;

+	havedos = 0;

+	firstxpart = 0;

+	for(;;) {

+		readmbr();

+		nxtxpart = 0;

+		for(i = 0; i < 4; i++) {

+			/* partition offsets are relative to taboffset */

+			start = taboffset+GLONG(dp[i].start);

+			end = start+GLONG(dp[i].len);

+			if(dp[i].type == PLAN9) {

+				if(nplan9 == 0)

+					strcpy(name, "plan9");

+				else

+					sprint(name, "plan9.%d", nplan9);

+				addpart(d, name, start, end);

+				p9part(d, name, start);

+				nplan9++;

+			}

+			if(!havedos && isdos(dp[i].type)) {

+				havedos = 1;

+				addpart(d, "dos", start, end);

+			}

+			/* nxtxpart is relative to firstxpart (or 0), not taboffset */

+			if(isextend(dp[i].type))

+				nxtxpart = start-taboffset+firstxpart;

+		}

+		if(!nxtxpart)

+			break;

+		if(!firstxpart)

+			firstxpart = nxtxpart;

+		taboffset = nxtxpart;

+	}

+	return nplan9 + havedos;

+}

+void

+partall(void)

+{

+	Disk *d;

+	Dir *ent;

+	char *name;

+	int fd, i, n;

+	fd = open("#S", OREAD);

+	if(fd == -1) {

+		fprint(2, "No disk\n");

+		return;

+	}

+	while((n = dirread(fd, &ent)) > 0) {

+		for(i = 0; i < n; i++) {

+			if(ent[i].mode & DMDIR) {

+				name = smprint("#S/%s/data", ent[i].name);

+				d = opendisk(name, 1, 0);

+				if(!d) {

+					fprint(2, "%s: %r\n", name);

+					continue;

+				}

+				// XXX not safe yet: if(!mbrpart(d) && !cdpart(d) && !p9part(d, "data", 0))

+				if(!mbrpart(d) && !cdpart(d))

+					fprint(2, "%s: no partitions\n", name);

+				close(d->fd);

+			}

+		}

+	}

+	close(fd);

+}

+void

+main(int argc, char **argv)

+{

+	USED(argc, argv);

+	fmtinstall('r', errfmt);

+	bind("#c", "/dev", MBEFORE);

+	open("/dev/cons", OREAD);

+	open("/dev/cons", OWRITE);

+	open("/dev/cons", OWRITE);

+	partall();

+	close(0);

+	close(1);

+	close(2);

+	exec("/boot/boot2", argv);

+	exits(0);

+}

--- /dev/null

+++ b/sys/src/9/xen/etherxen.c

@@ -1,0 +1,494 @@

+/*

+ * Xen virtual network interface frontend

+ */

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "io.h"

+#include "../port/error.h"

+#include "../port/netif.h"

+#include "etherif.h"

+#define LOG(a)

+enum {

+	Nvif	= 4,

+	Ntb		= 16,

+	Nrb		= 32,

+};

+typedef struct Ctlr Ctlr;

+typedef union Txframe Txframe;

+typedef union Rxframe Rxframe;

+struct Ctlr {

+	int	attached;

+	int	backend;

+	int	vifno;

+	int	evtchn;

+	int rxcopy;

+	Txframe	*txframes;

+	Txframe	*freetxframe;

+	Rxframe	*rxframes;

+	netif_tx_front_ring_t txring;

+	netif_rx_front_ring_t rxring;

+	int	*txrefs;

+	int	*rxrefs;

+	int	txringref;

+	int	rxringref;

+	Lock	txlock;

+	QLock	attachlock;

+	Rendez	wtxframe;

+	Rendez	wtxblock;

+	ulong interrupts;

+	ulong transmits;

+	ulong receives;

+	ulong txerrors;

+	ulong rxerrors;

+	ulong rxoverflows;

+};

+union Txframe {

+	struct {

+		Txframe *next;

+		char data[2];

+	} tf;

+	uchar page[BY2PG];

+};

+union Rxframe {

+	uchar page[BY2PG];

+};

+static int nvif;

+/*

+ * conversions to machine page numbers, pages and addresses

+ */

+#define MFN(pa)		(patomfn[(pa)>>PGSHIFT])

+#define MFNPG(pa)		(MFN(pa)<<PGSHIFT)

+#define PA2MA(pa)		(MFNPG(pa) | PGOFF(pa))

+#define VA2MA(va)		PA2MA(PADDR(va))

+static int

+puttxrequest(Ctlr *ctlr, netif_tx_request_t *tr)

+{

+	netif_tx_request_t *req;

+	int i, notify;

+	LOG(dprint("puttxrequest id %d ref %d size %d\n", tr->id, tr->gref, tr->size);)

+	i = ctlr->txring.req_prod_pvt;

+	req = RING_GET_REQUEST(&ctlr->txring, i);

+	memmove(req, tr, sizeof(*req));

+	ctlr->txring.req_prod_pvt = i+1;

+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->txring, notify);

+	return notify;

+}

+static int

+putrxrequest(Ctlr *ctlr, netif_rx_request_t *rr)

+{

+	netif_rx_request_t *req;

+	int i;

+	int notify;

+	LOG(dprint("putrxrequest %d %d\n", rr->id, rr->gref);)

+	i = ctlr->rxring.req_prod_pvt;

+	req = RING_GET_REQUEST(&ctlr->rxring, i);

+	memmove(req, rr, sizeof(*req));

+	ctlr->rxring.req_prod_pvt = i+1;

+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->rxring, notify);

+	return notify;

+}

+static int

+gettxresponse(Ctlr *ctlr, netif_tx_response_t *tr)

+{

+	int i, avail;

+	netif_tx_response_t *rx;

+	RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->txring, avail);

+	if (!avail)

+		return 0;

+	i = ctlr->txring.rsp_cons;

+	rx = RING_GET_RESPONSE(&ctlr->txring, i);

+	LOG(dprint("gettxresponse id %d status %d\n", rx->id, rx->status);)

+	if(rx->status)

+		ctlr->txerrors++;

+	*tr = *rx;

+	ctlr->txring.rsp_cons = ++i;

+	return 1;

+}

+static int

+getrxresponse(Ctlr *ctlr, netif_rx_response_t* rr)

+{

+	int i, avail;

+	netif_rx_response_t *rx;

+	RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->rxring, avail);

+	if (!avail)

+		return 0;

+	i = ctlr->rxring.rsp_cons;

+	rx = RING_GET_RESPONSE(&ctlr->rxring, i);

+	LOG(dprint("getrxresponse id %d offset %d flags %ux status %d\n", rx->id, rx->offset, rx->flags, rx->status);)

+	*rr = *rx;

+	ctlr->rxring.rsp_cons = ++i;

+	return 1;

+}

+static int

+ringinit(Ctlr *ctlr, char *a)

+{

+	netif_tx_sring_t *txr;

+	netif_rx_sring_t *rxr;

+	txr = (netif_tx_sring_t*)a;

+	memset(txr, 0, BY2PG);

+	SHARED_RING_INIT(txr);

+	FRONT_RING_INIT(&ctlr->txring, txr, BY2PG);

+	ctlr->txringref = shareframe(ctlr->backend, txr, 1);

+	rxr = (netif_rx_sring_t*)(a+BY2PG);

+	SHARED_RING_INIT(rxr);

+	FRONT_RING_INIT(&ctlr->rxring, rxr, BY2PG);

+	ctlr->rxringref = shareframe(ctlr->backend, rxr, 1);

+	return 2*BY2PG;

+}

+static int

+vifsend(Ctlr *ctlr, Block *bp)

+{

+	netif_tx_request_t tr;

+	Txframe *tx;

+	int id;

+	ilock(&ctlr->txlock);

+	tx = ctlr->freetxframe;

+	ctlr->freetxframe = tx->tf.next;

+	iunlock(&ctlr->txlock);

+	id = tx - ctlr->txframes;

+	tr.gref = ctlr->txrefs[id];

+	tr.offset = tx->tf.data - (char*)tx;

+	tr.flags = 0;	// XXX checksum?

+	tr.id = id;

+	tr.size = BLEN(bp);

+	memmove(tx->tf.data, bp->rp, tr.size);

+	return puttxrequest(ctlr, &tr);

+}

+static int

+vifsenddone(Ctlr *ctlr, netif_tx_response_t *tr)

+{

+	Txframe *tx;

+	tx = &ctlr->txframes[tr->id];	// XXX check validity of id

+	ilock(&ctlr->txlock);

+	tx->tf.next = ctlr->freetxframe;

+	ctlr->freetxframe = tx;

+	iunlock(&ctlr->txlock);

+	return 1;

+}

+static int

+vifrecv(Ctlr *ctlr, Rxframe *rx)

+{

+	netif_rx_request_t rr;

+	int id;

+	int ref;

+	id = rx - ctlr->rxframes;

+	if (ctlr->rxcopy)

+		ref = ctlr->rxrefs[id];

+	else {

+		ref = donateframe(ctlr->backend, rx);

+		ctlr->rxrefs[id] = ref;

+	}

+	rr.id = id;

+	rr.gref = ref;

+	return putrxrequest(ctlr, &rr);

+}

+static int

+vifrecvdone(Ether *ether, netif_rx_response_t *rr)

+{

+	Ctlr *ctlr;

+	Rxframe *rx;

+	Block *bp;

+	int len;

+	ctlr = ether->ctlr;

+	rx = &ctlr->rxframes[rr->id];	// XXX check validity of id

+	if (!ctlr->rxcopy)

+		acceptframe(ctlr->rxrefs[rr->id], rx);

+	if ((len = rr->status) <= 0) {

+		ctlr->rxerrors++;

+		vifrecv(ctlr, rx);

+		return 1;

+	}

+	if(len > sizeof(Etherpkt) || (bp = iallocb(sizeof(Etherpkt))) == nil) {

+		ctlr->rxoverflows++;

+		vifrecv(ctlr, rx);

+		return 1;

+	}

+	ctlr->receives++;

+	memmove(bp->base, rx->page + rr->offset, len);

+	vifrecv(ctlr, rx);

+	bp->rp = bp->base;

+	bp->wp = bp->rp + len;

+	bp->free = 0;

+	bp->next = 0;

+	bp->list = 0;

+	if (rr->flags & NETRXF_data_validated)

+		bp->flag |= Btcpck|Budpck;

+	etheriq(ether, bp, 1);

+	return 0;

+}

+static int

+wtxframe(void *a)

+{

+	return ((struct Ctlr*)a)->freetxframe != 0;

+}

+static int

+wtxblock(void *a)

+{

+	return qcanread(((struct Ether*)a)->oq);

+}

+static void

+etherxenproc(void *a)

+{

+	Ether *ether = a;

+	Ctlr *ctlr = ether->ctlr;

+	Block *bp;

+	int notify;

+	for (;;) {

+		while (ctlr->freetxframe == 0)

+			sleep(&ctlr->wtxframe, wtxframe, ctlr);

+		while ((bp = qget(ether->oq)) == 0)

+			sleep(&ctlr->wtxblock, wtxblock, ether);

+		notify = vifsend(ctlr, bp);

+		freeb(bp);

+		if (notify)

+			xenchannotify(ctlr->evtchn);

+	}

+}

+static void

+etherxentransmit(Ether *ether)

+{

+	Ctlr *ctlr;

+	ctlr = ether->ctlr;

+	ctlr->transmits++;

+	wakeup(&ctlr->wtxblock);

+}

+static void

+etherxenintr(Ureg*, void *a)

+{

+	Ether *ether = a;

+	Ctlr *ctlr = ether->ctlr;

+	int txnotify;

+	netif_tx_response_t tr;

+	netif_rx_response_t rr;

+	ctlr->interrupts++;

+	txnotify = 0;

+	while (getrxresponse(ctlr, &rr))

+		vifrecvdone(ether, &rr);

+	while (gettxresponse(ctlr, &tr)) {

+		if (vifsenddone(ctlr, &tr))

+			txnotify = 1;

+	}

+	if (txnotify)

+		wakeup(&ctlr->wtxframe);

+}

+static long

+etherxenctl(Ether *ether, void *buf, long n)

+{

+	uchar ea[Eaddrlen];

+	Cmdbuf *cb;

+	cb = parsecmd(buf, n);

+	if(cb->nf >= 2

+	&& strcmp(cb->f[0], "ea")==0

+	&& parseether(ea, cb->f[1]) == 0){

+		free(cb);

+		memmove(ether->ea, ea, Eaddrlen);

+		memmove(ether->addr, ether->ea, Eaddrlen);

+		return 0;

+	}

+	free(cb);

+	error(Ebadctl);

+	return -1;	/* not reached */

+}

+static void

+backendconnect(Ctlr *ctlr)

+{

+	char dir[64];

+	char buf[64];

+	sprint(dir, "device/vif/%d/", ctlr->vifno);

+	xenstore_setd(dir, "state", XenbusStateInitialising);

+	xenstore_setd(dir, "tx-ring-ref", ctlr->txringref);

+	xenstore_setd(dir, "rx-ring-ref", ctlr->rxringref);

+	xenstore_setd(dir, "event-channel", ctlr->evtchn);

+	print("etherxen: request-rx-copy=%d\n", ctlr->rxcopy);

+	if (ctlr->rxcopy)

+		xenstore_setd(dir, "request-rx-copy", 1);

+	xenstore_setd(dir, "state", XenbusStateConnected);

+	xenstore_gets(dir, "backend", buf, sizeof buf);

+	sprint(dir, "%s/", buf);

+	HYPERVISOR_yield();

+	xenstore_gets(dir, "state", buf, sizeof buf);

+	while (strtol(buf, 0, 0) != XenbusStateConnected) {

+		print("etherxen: waiting for vif %d to connect\n", ctlr->vifno);

+		tsleep(&up->sleep, return0, 0, 1000);

+		xenstore_gets(dir, "state", buf, sizeof buf);

+	}

+}

+static void

+etherxenattach(Ether *ether)

+{

+	Ctlr *ctlr;

+	char *p;

+	Txframe *tx;

+	int npage, i;

+	LOG(dprint("etherxenattach\n");)

+	ctlr = ether->ctlr;

+	qlock(&ctlr->attachlock);

+	if (ctlr->attached) {

+		qunlock(&ctlr->attachlock);

+		return;

+	}

+	npage = 2 + Ntb + Nrb;

+	p = (char*)xspanalloc(npage<<PGSHIFT, BY2PG, 0);

+	p += ringinit(ctlr, p);

+	ctlr->txrefs = malloc(Ntb*sizeof(int));

+	ctlr->rxrefs = malloc(Nrb*sizeof(int));

+	ctlr->txframes = (Txframe*)p;

+	for (i = 0; i < Ntb; i++, p += BY2PG) {

+		tx = (Txframe*)p;

+		if (i != Ntb-1)

+			tx->tf.next = tx + 1;

+		else

+			tx->tf.next = 0;

+		ctlr->txrefs[i] = shareframe(ctlr->backend, tx, 0);

+	}

+	ctlr->freetxframe = ctlr->txframes;

+	ctlr->rxframes = (Rxframe*)p;

+	for (i = 0; i < Nrb; i++, p += BY2PG) {

+		if (ctlr->rxcopy)

+			ctlr->rxrefs[i] = shareframe(ctlr->backend, (Rxframe*)p, 1);

+		vifrecv(ctlr, (Rxframe*)p);

+	}

+	ctlr->evtchn = xenchanalloc(ctlr->backend);

+	intrenable(ctlr->evtchn, etherxenintr, ether, BUSUNKNOWN, "vif");

+	kproc("vif", etherxenproc, ether);

+	backendconnect(ctlr);

+	ctlr->attached = 1;

+	qunlock(&ctlr->attachlock);

+}

+static void

+etherxenmulticast(void* arg, uchar* addr, int on)

+{

+	USED(arg, addr, on);

+}

+static long

+ifstat(Ether* ether, void* a, long n, ulong offset)

+{

+	Ctlr *ctlr;

+	char *buf, *p;

+	int l, len;

+	ctlr = ether->ctlr;

+	if(n == 0)

+		return 0;

+	if((p = malloc(READSTR)) == nil)

+		error(Enomem);

+	l = snprint(p, READSTR, "intr: %lud\n", ctlr->interrupts);

+	l += snprint(p+l, READSTR-l, "transmits: %lud\n", ctlr->transmits);

+	l += snprint(p+l, READSTR-l, "receives: %lud\n", ctlr->receives);

+	l += snprint(p+l, READSTR-l, "txerrors: %lud\n", ctlr->txerrors);

+	l += snprint(p+l, READSTR-l, "rxerrors: %lud\n", ctlr->rxerrors);

+	snprint(p+l, READSTR-l, "rxoverflows: %lud\n", ctlr->rxoverflows);

+	buf = a;

+	len = readstr(offset, buf, n, p);

+	free(p);

+	return len;

+}

+static int

+pnp(Ether* ether)

+{

+	uchar ea[Eaddrlen];

+	char dir[64];

+	char buf[64];

+	Ctlr *ctlr;

+	int domid, rxcopy;

+	if (nvif > Nvif)

+		return -1;

+	sprint(dir, "device/vif/%d/", nvif);

+	if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0)

+		return -1;

+	domid = strtol(buf, 0, 0);

+	if (xenstore_gets(dir, "mac", buf, sizeof buf) <= 0)

+		return -1;

+	if (parseether(ea, buf) < 0)

+		return -1;

+	if (xenstore_gets(dir, "backend", buf, sizeof buf) <= 0)

+		return 1;

+	sprint(dir, "%s/", buf);

+	rxcopy = 0;

+	if (xenstore_gets(dir, "feature-rx-copy", buf, sizeof buf) >= 0)

+		rxcopy = strtol(buf, 0, 0);

+	ether->ctlr = ctlr = malloc(sizeof(Ctlr));

+	memset(ctlr, 0, sizeof(Ctlr));

+	ctlr->backend = domid;

+	ctlr->vifno = nvif++;

+	ctlr->rxcopy = rxcopy;

+	memmove(ether->ea, ea, sizeof ether->ea);

+	ether->mbps = 100;	// XXX what speed?

+	ether->attach = etherxenattach;

+	ether->detach = nil;

+	ether->transmit = etherxentransmit;

+	ether->irq = -1;

+	ether->tbdf = BUSUNKNOWN;

+	ether->interrupt = etherxenintr;

+	ether->ifstat = ifstat;

+	ether->ctl = etherxenctl;

+	ether->promiscuous = nil;

+	ether->multicast = etherxenmulticast;

+	ether->arg = ether;

+	return 0;

+}

+void

+etherxenlink(void)

+{

+	addethercard("xen", pnp);

+}

--- /dev/null

+++ b/sys/src/9/xen/fns.h

@@ -1,0 +1,164 @@

+#include "../port/portfns.h"

+Dirtab*	addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong));

+void	archinit(void);

+void	bootargs(ulong);

+ulong	cankaddr(ulong);

+int		cistrcmp(char*, char*);

+int		cistrncmp(char*, char*, int);

+#define	clearmmucache()				/* x86 doesn't have one */

+void	clockintr(Ureg*, void*);

+int		(*cmpswap)(long*, long, long);

+int		cmpswap486(long*, long, long);

+void	(*coherence)(void);

+void	cpuid(int, ulong regs[]);

+int		cpuidentify(void);

+void	cpuidprint(void);

+void	(*cycles)(uvlong*);

+void	delay(int);

+#define	evenaddr(x)				/* x86 doesn't care */

+void	fpclear(void);

+void	fpenv(FPsave*);

+void	fpinit(void);

+void	fpoff(void);

+void	(*fprestore)(FPsave*);

+void	(*fpsave)(FPsave*);

+void	fpsserestore(FPsave*);

+void	fpsserestore0(FPsave*);

+void	fpssesave(FPsave*);

+void	fpssesave0(FPsave*);

+ulong	fpstatus(void);

+void	fpx87restore(FPsave*);

+void	fpx87save(FPsave*);

+ulong	getcr4(void);

+char*	getconf(char*);

+void	guesscpuhz(int);

+void	halt(void);

+void	mwait(void*);

+void	i8042reset(void);

+void	i8253enable(void);

+void	i8253init(void);

+void	i8253link(void);

+uvlong	i8253read(uvlong*);

+void	i8253timerset(uvlong);

+int	i8259disable(int);

+int	i8259enable(Vctl*);

+void	i8259init(void);

+int	i8259isr(int);

+void	i8259on(void);

+void	i8259off(void);

+int	i8259vecno(int);

+void	idle(void);

+void	idlehands(void);

+int	inb(int);

+void	insb(int, void*, int);

+ushort	ins(int);

+void	inss(int, void*, int);

+ulong	inl(int);

+void	insl(int, void*, int);

+int	intrdisable(int, void (*)(Ureg *, void *), void*, int, char*);

+void	intrenable(int, void (*)(Ureg*, void*), void*, int, char*);

+int	ioalloc(int, int, int, char*);

+int	isaconfig(char*, int, ISAConf*);

+void	kbdenable(void);

+#define	kmapinval()

+void	lgdt(ushort[3]);					// XXX remove and in l.s

+void	lidt(ushort[3]);					// XXX remove and in l.s

+void	links(void);

+void	ltr(ulong);						// XXX remove?

+void	mach0init(void);

+void	mathinit(void);

+void	mb386(void);

+void	mb586(void);

+void	mfence(void);

+void mmuflushtlb(Page*);

+void	mmuinit(void);

+ulong	mmukmap(ulong, ulong, int);

+int	mmukmapsync(ulong);

+#define	mmunewpage(x)

+ulong*	mmuwalk(ulong*, ulong, int, int);

+int	mtrr(uvlong, uvlong, char *);

+int	mtrrprint(char *, long);

+void	outb(int, int);

+void	outsb(int, void*, int);

+void	outs(int, ushort);

+void	outss(int, void*, int);

+void	outl(int, ulong);

+void	outsl(int, void*, int);

+void	printcpufreq(void);

+void	procrestore(Proc*);

+void	procsave(Proc*);

+void	procsetup(Proc*);

+void	procfork(Proc*);

+void	putcr4(ulong);

+int		rdmsr(int, vlong*);

+int		screenprint(char*, ...);			/* debugging */

+void	(*screenputs)(char*, int);

+void	touser(void*);

+void	trap(Ureg*);

+void	trapenable(int, void (*)(Ureg*, void*), void*, char*);

+void	trapinit(void);

+int		tas(void*);

+#define	userureg(ur) (((ur)->cs & 0xFFFF) == UESEL)

+void	vectortable(void);

+int		wrmsr(int, vlong);

+uint	xchgl(uint*, uint);

+uint	xchgw(ushort*, uint);

+uint	xchgb(uchar*, uint);

+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))

+#define KADDR(a)	((void*)((ulong)(a)|KZERO))

+#define PADDR(a)	((ulong)(a)&~KZERO)

+#define	dcflush(a, b)

+/* Xen functions */

+#define rmb()	coherence()

+#define wmb()	coherence()

+void mb(void);

+void hypervisor_callback(void), failsafe_callback(void);

+void xenconsinit(void);

+ulong mmumapframe(ulong, ulong);

+void mmumapcpu0(void);

+void dprint(char *, ...);

+void xenupdate(ulong *ptr, ulong val);

+void xenupdatema(ulong *ptr, uvlong val);

+int xenpdptpin(ulong va);

+int xenpgdpin(ulong va);

+int xenptpin(ulong va);

+void xenptunpin(ulong va);

+void xenptswitch(ulong pa);

+void xentlbflush(void);

+int ffs(ulong);

+void xengrantinit(void);

+int xengrant(domid_t domid, ulong frame, int flags);

+int xengrantend(int ref);

+void acceptframe(int ref, void *va);

+int donateframe(int domid, void *va);

+int shareframe(int domid, void *va, int write);

+void xenchannotify(int);

+void xenupcall(Ureg*);

+ulong xenwallclock(void);

+int xenstore_read(char*, char*, int);

+void xenstore_write(char*, char*);

+void xenstore_setd(char *dir, char *node, int value);

+int xenstore_gets(char *dir, char *node, char *buf, int buflen);

+int xenchanalloc(int);

+long HYPERVISOR_set_timer_op(uvlong timeout);

+int HYPERVISOR_set_trap_table(trap_info_t *table);

+int HYPERVISOR_mmu_update(mmu_update_t *req, int count, int *success_count, domid_t domid);

+int HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid);

+int HYPERVISOR_set_gdt(ulong *frame_list, int entries);

+int HYPERVISOR_stack_switch(ulong ss, ulong esp);

+int HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc);

+int HYPERVISOR_fpu_taskswitch(void);

+int HYPERVISOR_yield(void);

+int HYPERVISOR_block(void);

+int HYPERVISOR_shutdown(int);

+int HYPERVISOR_multicall(void *call_list, int nr_calls);

+int HYPERVISOR_event_channel_op(void *op);

+int HYPERVISOR_xen_version(int cmd, void *arg);

+int HYPERVISOR_console_io(int cmd, int count, char *str);

+int HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count);

+int HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg);

--- /dev/null

+++ b/sys/src/9/xen/l.s

@@ -1,0 +1,678 @@

+#include "xendefs.h"

+#include "mem.h"

+/*

+ * Some machine instructions not handled by 8[al].

+ */

+#define OP16		BYTE $0x66

+#define DELAY		BYTE $0xEB; BYTE $0x00	/* JMP .+2 */

+#define CPUID		BYTE $0x0F; BYTE $0xA2	/* CPUID, argument in AX */

+#define WRMSR		BYTE $0x0F; BYTE $0x30	/* WRMSR, argument in AX/DX (lo/hi) */

+#define RDTSC 		BYTE $0x0F; BYTE $0x31	/* RDTSC, result in AX/DX (lo/hi) */

+#define RDMSR		BYTE $0x0F; BYTE $0x32	/* RDMSR, result in AX/DX (lo/hi) */

+#define HLT		BYTE $0xF4

+#define BSFL		BYTE $0xf; BYTE $0xbc; BYTE $0xc0	/* bsfl AX,AX */

+/*

+ * Macros for calculating offsets within the page directory base

+ * and page tables. Note that these are assembler-specific hence

+ * the '<<2'.

+ */

+#define PDO(a)		(((((a))>>22) & 0x03FF)<<2)

+#define PTO(a)		(((((a))>>12) & 0x03FF)<<2)

+/*

+ * Entry point from XEN's "linux" builder.

+ * At this point RAM from 0..4M ("physical") is mapped at KZERO,

+ * the kernel is loaded, we're running on a boot stack and a

+ * boot page table.  The start_info structure describes the

+ * situation, and is pointed to by SI.

+ * The stack is the highest used address.

+ */

+TEXT _start(SB), $0

+	MOVL	SP, xentop(SB)		/* set global for top of mapped region */

+	MOVL	SI, xenstart(SB)		/* set global to start_info_t */

+	MOVL	$0, AX			/* clear EFLAGS */

+	PUSHL	AX

+	POPFL

+	CALL	mmumapcpu0(SB)	/* make mapping before using stack */

+	MOVL	$(MACHADDR+MACHSIZE-4), SP	/* set stack */

+	CALL	main(SB)

+/*

+ * Park a processor. Should never fall through a return from main to here,

+ * should only be called by application processors when shutting down.

+ */

+TEXT idle(SB), $0

+_idle:

+	STI

+	HLT

+	JMP	_idle

+/*

+ * Read/write various system registers.

+ * CR4 and the 'model specific registers' should only be read/written

+ * after it has been determined the processor supports them

+ */

+TEXT lgdt(SB), $0				/* GDTR - global descriptor table */

+	MOVL	gdtptr+0(FP), AX

+	MOVL	(AX), GDTR

+	RET

+TEXT lidt(SB), $0				/* IDTR - interrupt descriptor table */

+	MOVL	idtptr+0(FP), AX

+	MOVL	(AX), IDTR

+	RET

+TEXT ltr(SB), $0				/* TR - task register */

+	MOVL	tptr+0(FP), AX

+	MOVW	AX, TASK

+	RET

+TEXT rtsr(SB), $0

+	MOVW	TASK, AX

+	RET

+TEXT _cycles(SB), $0				/* time stamp counter; cycles since power up */

+	RDTSC

+	MOVL	vlong+0(FP), CX			/* &vlong */

+	MOVL	AX, 0(CX)			/* lo */

+	MOVL	DX, 4(CX)			/* hi */

+	RET

+TEXT rdmsr(SB), $0				/* model-specific register */

+	MOVL	index+0(FP), CX

+	RDMSR

+	MOVL	vlong+4(FP), CX			/* &vlong */

+	MOVL	AX, 0(CX)			/* lo */

+	MOVL	DX, 4(CX)			/* hi */

+	RET

+TEXT wrmsr(SB), $0

+	MOVL	index+0(FP), CX

+	MOVL	lo+4(FP), AX

+	MOVL	hi+8(FP), DX

+/* Xen doesn't let us do this

+	WRMSR

+ */

+	RET

+/*

+ * Try to determine the CPU type which requires fiddling with EFLAGS.

+ * If the Id bit can be toggled then the CPUID instruction can be used

+ * to determine CPU identity and features. First have to check if it's

+ * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be

+ * toggled then it's an older 486 of some kind.

+ *

+ *	cpuid(fun, regs[4]);

+ */

+TEXT cpuid(SB), $0

+	MOVL	$0x240000, AX

+	PUSHL	AX

+	POPFL					/* set Id|Ac */

+	PUSHFL

+	POPL	BX				/* retrieve value */

+	MOVL	$0, AX

+	PUSHL	AX

+	POPFL					/* clear Id|Ac, EFLAGS initialised */

+	PUSHFL

+	POPL	AX				/* retrieve value */

+	XORL	BX, AX

+	TESTL	$0x040000, AX			/* Ac */

+	JZ	_cpu386				/* can't set this bit on 386 */

+	TESTL	$0x200000, AX			/* Id */

+	JZ	_cpu486				/* can't toggle this bit on some 486 */

+	MOVL	fn+0(FP), AX

+	CPUID

+	JMP	_cpuid

+_cpu486:

+	MOVL	$0x400, AX

+	JMP	_maybezapax

+_cpu386:

+	MOVL	$0x300, AX

+_maybezapax:

+	CMPL	fn+0(FP), $1

+	JE	_zaprest

+	XORL	AX, AX

+_zaprest:

+	XORL	BX, BX

+	XORL	CX, CX

+	XORL	DX, DX

+_cpuid:

+	MOVL	regs+4(FP), BP

+	MOVL	AX, 0(BP)

+	MOVL	BX, 4(BP)

+	MOVL	CX, 8(BP)

+	MOVL	DX, 12(BP)

+	RET

+/*

+ * Floating point.

+ * Note: the encodings for the FCLEX, FINIT, FSAVE, FSTCW, FSENV and FSTSW

+ * instructions do NOT have the WAIT prefix byte (i.e. they act like their

+ * FNxxx variations) so WAIT instructions must be explicitly placed in the

+ * code as necessary.

+ */

+#define	FPOFF(l)						 ;\

+	MOVL	CR0, AX 					 ;\

+	ANDL	$0xC, AX			/* EM, TS */	 ;\

+	CMPL	AX, $0x8					 ;\

+	JEQ 	l						 ;\

+	WAIT							 ;\

+l:								 ;\

+	MOVL	CR0, AX						 ;\

+	ANDL	$~0x4, AX			/* EM=0 */	 ;\

+	ORL	$0x28, AX			/* NE=1, TS=1 */ ;\

+	MOVL	AX, CR0

+#define	FPON							 ;\

+	MOVL	CR0, AX						 ;\

+	ANDL	$~0xC, AX			/* EM=0, TS=0 */ ;\

+	MOVL	AX, CR0

+TEXT fpoff(SB), $0				/* disable */

+	FPOFF(l1)

+	RET

+TEXT fpinit(SB), $0				/* enable and init */

+	FPON

+	FINIT

+	WAIT

+	/* setfcr(FPPDBL|FPRNR|FPINVAL|FPZDIV|FPOVFL) */

+	/* note that low 6 bits are masks, not enables, on this chip */

+	PUSHW	$0x0232

+	FLDCW	0(SP)

+	POPW	AX

+	WAIT

+	RET

+TEXT fpx87save(SB), $0				/* save state and disable */

+	MOVL	p+0(FP), AX

+	FSAVE	0(AX)				/* no WAIT */

+	FPOFF(l2)

+	RET

+TEXT fpx87restore(SB), $0				/* enable and restore state */

+	FPON

+	MOVL	p+0(FP), AX

+	FRSTOR	0(AX)

+	WAIT

+	RET

+TEXT fpstatus(SB), $0				/* get floating point status */

+	FSTSW	AX

+	RET

+TEXT fpenv(SB), $0				/* save state without waiting */

+	MOVL	p+0(FP), AX

+	FSTENV	0(AX)

+	RET

+TEXT fpclear(SB), $0				/* clear pending exceptions */

+	FPON

+	FCLEX					/* no WAIT */

+	FPOFF(l3)

+	RET

+/*

+ * Test-And-Set

+ */

+TEXT tas(SB), $0

+	MOVL	$0xDEADDEAD, AX

+	MOVL	lock+0(FP), BX

+	XCHGL	AX, (BX)			/* lock->key */

+	RET

+TEXT _xinc(SB), $0				/* void _xinc(long*); */

+	MOVL	l+0(FP), AX

+	LOCK;	INCL 0(AX)

+	RET

+TEXT _xdec(SB), $0				/* long _xdec(long*); */

+	MOVL	l+0(FP), BX

+	XORL	AX, AX

+	LOCK;	DECL 0(BX)

+	JLT	_xdeclt

+	JGT	_xdecgt

+	RET

+_xdecgt:

+	INCL	AX

+	RET

+_xdeclt:

+	DECL	AX

+	RET

+TEXT	getstack(SB), $0

+	MOVL	SP, AX

+	RET

+TEXT mb386(SB), $0

+	POPL	AX				/* return PC */

+	PUSHFL

+	PUSHL	CS

+	PUSHL	AX

+	IRETL

+TEXT mb586(SB), $0

+	XORL	AX, AX

+	CPUID

+	RET

+TEXT sfence(SB), $0

+	BYTE $0x0f

+	BYTE $0xae

+	BYTE $0xf8

+	RET

+TEXT lfence(SB), $0

+	BYTE $0x0f

+	BYTE $0xae

+	BYTE $0xe8

+	RET

+TEXT mfence(SB), $0

+	BYTE $0x0f

+	BYTE $0xae

+	BYTE $0xf0

+	RET

+TEXT xchgw(SB), $0

+	MOVL	v+4(FP), AX

+	MOVL	p+0(FP), BX

+	XCHGW	AX, (BX)

+	RET

+TEXT xchgb(SB), $0

+	MOVL	v+4(FP), AX

+	MOVL	p+0(FP), BX

+	XCHGB	AX, (BX)

+	RET

+TEXT xchgl(SB), $0

+	MOVL	v+4(FP), AX

+	MOVL	p+0(FP), BX

+	XCHGL	AX, (BX)

+	RET

+/* Return the position of the first bit set.  Undefined if zero. */

+TEXT ffs(SB), $0

+	MOVL	v+0(FP), AX

+	BSFL

+	RET

+TEXT cmpswap486(SB), $0

+	MOVL	addr+0(FP), BX

+	MOVL	old+4(FP), AX

+	MOVL	new+8(FP), CX

+	LOCK

+	BYTE $0x0F; BYTE $0xB1; BYTE $0x0B	/* CMPXCHGL CX, (BX) */

+	JNZ didnt

+	MOVL	$1, AX

+	RET

+didnt:

+	XORL	AX,AX

+	RET

+TEXT mul64fract(SB), $0

+	MOVL	r+0(FP), CX

+	XORL	BX, BX				/* BX = 0 */

+	MOVL	a+8(FP), AX

+	MULL	b+16(FP)			/* a1*b1 */

+	MOVL	AX, 4(CX)			/* r2 = lo(a1*b1) */

+	MOVL	a+8(FP), AX

+	MULL	b+12(FP)			/* a1*b0 */

+	MOVL	AX, 0(CX)			/* r1 = lo(a1*b0) */

+	ADDL	DX, 4(CX)			/* r2 += hi(a1*b0) */

+	MOVL	a+4(FP), AX

+	MULL	b+16(FP)			/* a0*b1 */

+	ADDL	AX, 0(CX)			/* r1 += lo(a0*b1) */

+	ADCL	DX, 4(CX)			/* r2 += hi(a0*b1) + carry */

+	MOVL	a+4(FP), AX

+	MULL	b+12(FP)			/* a0*b0 */

+	ADDL	DX, 0(CX)			/* r1 += hi(a0*b0) */

+	ADCL	BX, 4(CX)			/* r2 += carry */

+	RET

+/*

+ *  label consists of a stack pointer and a PC

+ */

+TEXT gotolabel(SB), $0

+	MOVL	label+0(FP), AX

+	MOVL	0(AX), SP			/* restore sp */

+	MOVL	4(AX), AX			/* put return pc on the stack */

+	MOVL	AX, 0(SP)

+	MOVL	$1, AX				/* return 1 */

+	RET

+TEXT setlabel(SB), $0

+	MOVL	label+0(FP), AX

+	MOVL	SP, 0(AX)			/* store sp */

+	MOVL	0(SP), BX			/* store return pc */

+	MOVL	BX, 4(AX)

+	MOVL	$0, AX				/* return 0 */

+	RET

+TEXT mwait(SB), $0

+	MOVL	addr+0(FP), AX

+	MOVL	(AX), CX

+	ORL	CX, CX

+	JNZ	_mwaitdone

+	XORL	DX, DX

+	BYTE $0x0f; BYTE $0x01; BYTE $0xc8	/* MONITOR */

+	MOVL	(AX), CX

+	ORL	CX, CX

+	JNZ	_mwaitdone

+	XORL	AX, AX

+	BYTE $0x0f; BYTE $0x01; BYTE $0xc9	/* MWAIT */

+_mwaitdone:

+	RET

+/*

+ * Interrupt/exception handling.

+ * Each entry in the vector table calls either _strayintr or _strayintrx depending

+ * on whether an error code has been automatically pushed onto the stack

+ * (_strayintrx) or not, in which case a dummy entry must be pushed before retrieving

+ * the trap type from the vector table entry and placing it on the stack as part

+ * of the Ureg structure.

+ * Exceptions to this are the syscall vector and the page fault

+ * vector.  Syscalls are dispatched seperately.  Page faults

+ * have to take care of the extra cr2 parameter that xen places

+ * at the top of the stack.

+ * The size of each entry in the vector table (6 bytes) is known in trapinit().

+ */

+TEXT _strayintr(SB), $0

+	PUSHL	AX			/* save AX */

+	MOVL	4(SP), AX		/* return PC from vectortable(SB) */

+	JMP	intrcommon

+TEXT _strayintrx(SB), $0

+	XCHGL	AX, (SP)		/* swap AX with vectortable CALL PC */

+intrcommon:

+	PUSHL	DS			/* save DS */

+	PUSHL	$(KDSEL)

+	POPL	DS			/* fix up DS */

+	MOVBLZX	(AX), AX		/* trap type -> AX */

+	XCHGL	AX, 4(SP)		/* exchange trap type with saved AX */

+	PUSHL	ES			/* save ES */

+	PUSHL	$(KDSEL)

+	POPL	ES			/* fix up ES */

+	PUSHL	FS			/* save the rest of the Ureg struct */

+	PUSHL	GS

+	PUSHAL

+	PUSHL	SP			/* Ureg* argument to trap */

+	CALL	trap(SB)

+TEXT forkret(SB), $0

+	POPL	AX

+	POPAL

+	POPL	GS

+	POPL	FS

+	POPL	ES

+	POPL	DS

+	ADDL	$8, SP			/* pop error code and trap type */

+	IRETL

+TEXT vectortable(SB), $0

+	CALL _strayintr(SB); BYTE $0x00		/* divide error */

+	CALL _strayintr(SB); BYTE $0x01		/* debug exception */

+	CALL _strayintr(SB); BYTE $0x02		/* NMI interrupt */

+	CALL _strayintr(SB); BYTE $0x03		/* breakpoint */

+	CALL _strayintr(SB); BYTE $0x04		/* overflow */

+	CALL _strayintr(SB); BYTE $0x05		/* bound */

+	CALL _strayintr(SB); BYTE $0x06		/* invalid opcode */

+	CALL _strayintr(SB); BYTE $0x07		/* no coprocessor available */

+	CALL _strayintrx(SB); BYTE $0x08	/* double fault */

+	CALL _strayintr(SB); BYTE $0x09		/* coprocessor segment overflow */

+	CALL _strayintrx(SB); BYTE $0x0A	/* invalid TSS */

+	CALL _strayintrx(SB); BYTE $0x0B	/* segment not available */

+	CALL _strayintrx(SB); BYTE $0x0C	/* stack exception */

+	CALL _strayintrx(SB); BYTE $0x0D	/* general protection error */

+	CALL _strayintrx(SB); BYTE $0x0E	/* page fault */

+	CALL _strayintr(SB); BYTE $0x0F		/*  */

+	CALL _strayintr(SB); BYTE $0x10		/* coprocessor error */

+	CALL _strayintrx(SB); BYTE $0x11	/* alignment check */

+	CALL _strayintr(SB); BYTE $0x12		/* machine check */

+	CALL _strayintr(SB); BYTE $0x13

+	CALL _strayintr(SB); BYTE $0x14

+	CALL _strayintr(SB); BYTE $0x15

+	CALL _strayintr(SB); BYTE $0x16

+	CALL _strayintr(SB); BYTE $0x17

+	CALL _strayintr(SB); BYTE $0x18

+	CALL _strayintr(SB); BYTE $0x19

+	CALL _strayintr(SB); BYTE $0x1A

+	CALL _strayintr(SB); BYTE $0x1B

+	CALL _strayintr(SB); BYTE $0x1C

+	CALL _strayintr(SB); BYTE $0x1D

+	CALL _strayintr(SB); BYTE $0x1E

+	CALL _strayintr(SB); BYTE $0x1F

+	CALL _strayintr(SB); BYTE $0x20		/* VectorLAPIC */

+	CALL _strayintr(SB); BYTE $0x21

+	CALL _strayintr(SB); BYTE $0x22

+	CALL _strayintr(SB); BYTE $0x23

+	CALL _strayintr(SB); BYTE $0x24

+	CALL _strayintr(SB); BYTE $0x25

+	CALL _strayintr(SB); BYTE $0x26

+	CALL _strayintr(SB); BYTE $0x27

+	CALL _strayintr(SB); BYTE $0x28

+	CALL _strayintr(SB); BYTE $0x29

+	CALL _strayintr(SB); BYTE $0x2A

+	CALL _strayintr(SB); BYTE $0x2B

+	CALL _strayintr(SB); BYTE $0x2C

+	CALL _strayintr(SB); BYTE $0x2D

+	CALL _strayintr(SB); BYTE $0x2E

+	CALL _strayintr(SB); BYTE $0x2F

+	CALL _strayintr(SB); BYTE $0x30

+	CALL _strayintr(SB); BYTE $0x31

+	CALL _strayintr(SB); BYTE $0x32

+	CALL _strayintr(SB); BYTE $0x33

+	CALL _strayintr(SB); BYTE $0x34

+	CALL _strayintr(SB); BYTE $0x35

+	CALL _strayintr(SB); BYTE $0x36

+	CALL _strayintr(SB); BYTE $0x37

+	CALL _strayintr(SB); BYTE $0x38

+	CALL _strayintr(SB); BYTE $0x39

+	CALL _strayintr(SB); BYTE $0x3A

+	CALL _strayintr(SB); BYTE $0x3B

+	CALL _strayintr(SB); BYTE $0x3C

+	CALL _strayintr(SB); BYTE $0x3D

+	CALL _strayintr(SB); BYTE $0x3E

+	CALL _strayintr(SB); BYTE $0x3F

+	CALL _syscallintr(SB); BYTE $0x40	/* VectorSYSCALL */

+	CALL _strayintr(SB); BYTE $0x41

+	CALL _strayintr(SB); BYTE $0x42

+	CALL _strayintr(SB); BYTE $0x43

+	CALL _strayintr(SB); BYTE $0x44

+	CALL _strayintr(SB); BYTE $0x45

+	CALL _strayintr(SB); BYTE $0x46

+	CALL _strayintr(SB); BYTE $0x47

+	CALL _strayintr(SB); BYTE $0x48

+	CALL _strayintr(SB); BYTE $0x49

+	CALL _strayintr(SB); BYTE $0x4A

+	CALL _strayintr(SB); BYTE $0x4B

+	CALL _strayintr(SB); BYTE $0x4C

+	CALL _strayintr(SB); BYTE $0x4D

+	CALL _strayintr(SB); BYTE $0x4E

+	CALL _strayintr(SB); BYTE $0x4F

+	CALL _strayintr(SB); BYTE $0x50

+	CALL _strayintr(SB); BYTE $0x51

+	CALL _strayintr(SB); BYTE $0x52

+	CALL _strayintr(SB); BYTE $0x53

+	CALL _strayintr(SB); BYTE $0x54

+	CALL _strayintr(SB); BYTE $0x55

+	CALL _strayintr(SB); BYTE $0x56

+	CALL _strayintr(SB); BYTE $0x57

+	CALL _strayintr(SB); BYTE $0x58

+	CALL _strayintr(SB); BYTE $0x59

+	CALL _strayintr(SB); BYTE $0x5A

+	CALL _strayintr(SB); BYTE $0x5B

+	CALL _strayintr(SB); BYTE $0x5C

+	CALL _strayintr(SB); BYTE $0x5D

+	CALL _strayintr(SB); BYTE $0x5E

+	CALL _strayintr(SB); BYTE $0x5F

+	CALL _strayintr(SB); BYTE $0x60

+	CALL _strayintr(SB); BYTE $0x61

+	CALL _strayintr(SB); BYTE $0x62

+	CALL _strayintr(SB); BYTE $0x63

+	CALL _strayintr(SB); BYTE $0x64

+	CALL _strayintr(SB); BYTE $0x65

+	CALL _strayintr(SB); BYTE $0x66

+	CALL _strayintr(SB); BYTE $0x67

+	CALL _strayintr(SB); BYTE $0x68

+	CALL _strayintr(SB); BYTE $0x69

+	CALL _strayintr(SB); BYTE $0x6A

+	CALL _strayintr(SB); BYTE $0x6B

+	CALL _strayintr(SB); BYTE $0x6C

+	CALL _strayintr(SB); BYTE $0x6D

+	CALL _strayintr(SB); BYTE $0x6E

+	CALL _strayintr(SB); BYTE $0x6F

+	CALL _strayintr(SB); BYTE $0x70

+	CALL _strayintr(SB); BYTE $0x71

+	CALL _strayintr(SB); BYTE $0x72

+	CALL _strayintr(SB); BYTE $0x73

+	CALL _strayintr(SB); BYTE $0x74

+	CALL _strayintr(SB); BYTE $0x75

+	CALL _strayintr(SB); BYTE $0x76

+	CALL _strayintr(SB); BYTE $0x77

+	CALL _strayintr(SB); BYTE $0x78

+	CALL _strayintr(SB); BYTE $0x79

+	CALL _strayintr(SB); BYTE $0x7A

+	CALL _strayintr(SB); BYTE $0x7B

+	CALL _strayintr(SB); BYTE $0x7C

+	CALL _strayintr(SB); BYTE $0x7D

+	CALL _strayintr(SB); BYTE $0x7E

+	CALL _strayintr(SB); BYTE $0x7F

+	CALL _strayintr(SB); BYTE $0x80		/* Vector[A]PIC */

+	CALL _strayintr(SB); BYTE $0x81

+	CALL _strayintr(SB); BYTE $0x82

+	CALL _strayintr(SB); BYTE $0x83

+	CALL _strayintr(SB); BYTE $0x84

+	CALL _strayintr(SB); BYTE $0x85

+	CALL _strayintr(SB); BYTE $0x86

+	CALL _strayintr(SB); BYTE $0x87

+	CALL _strayintr(SB); BYTE $0x88

+	CALL _strayintr(SB); BYTE $0x89

+	CALL _strayintr(SB); BYTE $0x8A

+	CALL _strayintr(SB); BYTE $0x8B

+	CALL _strayintr(SB); BYTE $0x8C

+	CALL _strayintr(SB); BYTE $0x8D

+	CALL _strayintr(SB); BYTE $0x8E

+	CALL _strayintr(SB); BYTE $0x8F

+	CALL _strayintr(SB); BYTE $0x90

+	CALL _strayintr(SB); BYTE $0x91

+	CALL _strayintr(SB); BYTE $0x92

+	CALL _strayintr(SB); BYTE $0x93

+	CALL _strayintr(SB); BYTE $0x94

+	CALL _strayintr(SB); BYTE $0x95

+	CALL _strayintr(SB); BYTE $0x96

+	CALL _strayintr(SB); BYTE $0x97

+	CALL _strayintr(SB); BYTE $0x98

+	CALL _strayintr(SB); BYTE $0x99

+	CALL _strayintr(SB); BYTE $0x9A

+	CALL _strayintr(SB); BYTE $0x9B

+	CALL _strayintr(SB); BYTE $0x9C

+	CALL _strayintr(SB); BYTE $0x9D

+	CALL _strayintr(SB); BYTE $0x9E

+	CALL _strayintr(SB); BYTE $0x9F

+	CALL _strayintr(SB); BYTE $0xA0

+	CALL _strayintr(SB); BYTE $0xA1

+	CALL _strayintr(SB); BYTE $0xA2

+	CALL _strayintr(SB); BYTE $0xA3

+	CALL _strayintr(SB); BYTE $0xA4

+	CALL _strayintr(SB); BYTE $0xA5

+	CALL _strayintr(SB); BYTE $0xA6

+	CALL _strayintr(SB); BYTE $0xA7

+	CALL _strayintr(SB); BYTE $0xA8

+	CALL _strayintr(SB); BYTE $0xA9

+	CALL _strayintr(SB); BYTE $0xAA

+	CALL _strayintr(SB); BYTE $0xAB

+	CALL _strayintr(SB); BYTE $0xAC

+	CALL _strayintr(SB); BYTE $0xAD

+	CALL _strayintr(SB); BYTE $0xAE

+	CALL _strayintr(SB); BYTE $0xAF

+	CALL _strayintr(SB); BYTE $0xB0

+	CALL _strayintr(SB); BYTE $0xB1

+	CALL _strayintr(SB); BYTE $0xB2

+	CALL _strayintr(SB); BYTE $0xB3

+	CALL _strayintr(SB); BYTE $0xB4

+	CALL _strayintr(SB); BYTE $0xB5

+	CALL _strayintr(SB); BYTE $0xB6

+	CALL _strayintr(SB); BYTE $0xB7

+	CALL _strayintr(SB); BYTE $0xB8

+	CALL _strayintr(SB); BYTE $0xB9

+	CALL _strayintr(SB); BYTE $0xBA

+	CALL _strayintr(SB); BYTE $0xBB

+	CALL _strayintr(SB); BYTE $0xBC

+	CALL _strayintr(SB); BYTE $0xBD

+	CALL _strayintr(SB); BYTE $0xBE

+	CALL _strayintr(SB); BYTE $0xBF

+	CALL _strayintr(SB); BYTE $0xC0

+	CALL _strayintr(SB); BYTE $0xC1

+	CALL _strayintr(SB); BYTE $0xC2

+	CALL _strayintr(SB); BYTE $0xC3

+	CALL _strayintr(SB); BYTE $0xC4

+	CALL _strayintr(SB); BYTE $0xC5

+	CALL _strayintr(SB); BYTE $0xC6

+	CALL _strayintr(SB); BYTE $0xC7

+	CALL _strayintr(SB); BYTE $0xC8

+	CALL _strayintr(SB); BYTE $0xC9

+	CALL _strayintr(SB); BYTE $0xCA

+	CALL _strayintr(SB); BYTE $0xCB

+	CALL _strayintr(SB); BYTE $0xCC

+	CALL _strayintr(SB); BYTE $0xCD

+	CALL _strayintr(SB); BYTE $0xCE

+	CALL _strayintr(SB); BYTE $0xCF

+	CALL _strayintr(SB); BYTE $0xD0

+	CALL _strayintr(SB); BYTE $0xD1

+	CALL _strayintr(SB); BYTE $0xD2

+	CALL _strayintr(SB); BYTE $0xD3

+	CALL _strayintr(SB); BYTE $0xD4

+	CALL _strayintr(SB); BYTE $0xD5

+	CALL _strayintr(SB); BYTE $0xD6

+	CALL _strayintr(SB); BYTE $0xD7

+	CALL _strayintr(SB); BYTE $0xD8

+	CALL _strayintr(SB); BYTE $0xD9

+	CALL _strayintr(SB); BYTE $0xDA

+	CALL _strayintr(SB); BYTE $0xDB

+	CALL _strayintr(SB); BYTE $0xDC

+	CALL _strayintr(SB); BYTE $0xDD

+	CALL _strayintr(SB); BYTE $0xDE

+	CALL _strayintr(SB); BYTE $0xDF

+	CALL _strayintr(SB); BYTE $0xE0

+	CALL _strayintr(SB); BYTE $0xE1

+	CALL _strayintr(SB); BYTE $0xE2

+	CALL _strayintr(SB); BYTE $0xE3

+	CALL _strayintr(SB); BYTE $0xE4

+	CALL _strayintr(SB); BYTE $0xE5

+	CALL _strayintr(SB); BYTE $0xE6

+	CALL _strayintr(SB); BYTE $0xE7

+	CALL _strayintr(SB); BYTE $0xE8

+	CALL _strayintr(SB); BYTE $0xE9

+	CALL _strayintr(SB); BYTE $0xEA

+	CALL _strayintr(SB); BYTE $0xEB

+	CALL _strayintr(SB); BYTE $0xEC

+	CALL _strayintr(SB); BYTE $0xED

+	CALL _strayintr(SB); BYTE $0xEE

+	CALL _strayintr(SB); BYTE $0xEF

+	CALL _strayintr(SB); BYTE $0xF0

+	CALL _strayintr(SB); BYTE $0xF1

+	CALL _strayintr(SB); BYTE $0xF2

+	CALL _strayintr(SB); BYTE $0xF3

+	CALL _strayintr(SB); BYTE $0xF4

+	CALL _strayintr(SB); BYTE $0xF5

+	CALL _strayintr(SB); BYTE $0xF6

+	CALL _strayintr(SB); BYTE $0xF7

+	CALL _strayintr(SB); BYTE $0xF8

+	CALL _strayintr(SB); BYTE $0xF9

+	CALL _strayintr(SB); BYTE $0xFA

+	CALL _strayintr(SB); BYTE $0xFB

+	CALL _strayintr(SB); BYTE $0xFC

+	CALL _strayintr(SB); BYTE $0xFD

+	CALL _strayintr(SB); BYTE $0xFE

+	CALL _strayintr(SB); BYTE $0xFF

--- /dev/null

+++ b/sys/src/9/xen/main.c

@@ -1,0 +1,801 @@

+#include	"u.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+#include	"io.h"

+#include	"ureg.h"

+#include	"init.h"

+#include	"pool.h"

+#include	"reboot.h"

+#include	<tos.h>

+Mach *m;

+#define BOOTARGS	(xenstart->cmd_line)

+#define	BOOTARGSLEN	(sizeof xenstart->cmd_line)

+#define	MAXCONF		64

+enum {

+	/* space for syscall args, return PC, top-of-stack struct */

+	Ustkheadroom	= sizeof(Sargs) + sizeof(uintptr) + sizeof(Tos),

+};

+char bootdisk[KNAMELEN];

+Conf conf;

+char *confname[MAXCONF];

+char *confval[MAXCONF];

+int nconf;

+uchar *sp;	/* user stack of init proc */

+int idle_spin;

+static void

+options(void)

+{

+	long i, n;

+	char *cp, *line[MAXCONF], *p, *q;

+	/*

+	 *  parse configuration args from dos file plan9.ini

+	 */

+	cp = BOOTARGS;	/* where b.com leaves its config */

+	cp[BOOTARGSLEN-1] = 0;

+	/*

+	 * Strip out '\r', change '\t' -> ' '.

+	 */

+	p = cp;

+	for(q = cp; *q; q++){

+		if(*q == '\r')

+			continue;

+		if(*q == '\t')

+			*q = ' ';

+		*p++ = *q;

+	}

+	*p = 0;

+	n = getfields(cp, line, MAXCONF, 1, "\n");

+	for(i = 0; i < n; i++){

+		if(*line[i] == '#')

+			continue;

+		cp = strchr(line[i], '=');

+		if(cp == nil)

+			continue;

+		*cp++ = '\0';

+		confname[nconf] = line[i];

+		confval[nconf] = cp;

+		nconf++;

+	}

+}

+void

+main(void)

+{

+	mach0init();

+	options();

+	quotefmtinstall();

+	xenconsinit();

+	//consdebug = rdb;

+	print("\nPlan 9 (%s)\n", xenstart->magic);

+	cpuidentify();

+	// meminit() is not for us

+	confinit();

+	archinit();

+	xinit();

+	trapinit();

+	printinit();

+	cpuidprint();

+	mmuinit();

+	if(arch->intrinit)	/* launches other processors on an mp */

+		arch->intrinit();

+	timersinit();

+	mathinit();

+	kbdenable();

+	xengrantinit();

+	if(arch->clockenable)

+		arch->clockenable();

+	procinit0();

+	initseg();

+	links();

+//	conf.monitor = 1;

+	chandevreset();

+	pageinit();

+	swapinit();

+	userinit();

+	active.thunderbirdsarego = 1;

+	schedinit();

+}

+void

+mach0init(void)

+{

+	m = (Mach*)MACHADDR;

+	m->machno = 0;

+	conf.nmach = 1;

+	MACHP(0) = (Mach*)CPU0MACH;

+	m->pdb = (ulong*)xenstart->pt_base;

+#ifdef NOT

+	m->gdt = (Segdesc*)CPU0GDT;

+#endif

+	machinit();

+	active.machs = 1;

+	active.exiting = 0;

+}

+void

+machinit(void)

+{

+	int machno;

+	ulong *pdb;

+	Segdesc *gdt;

+	machno = m->machno;

+	pdb = m->pdb;

+	gdt = m->gdt;

+	memset(m, 0, sizeof(Mach));

+	m->machno = machno;

+	m->pdb = pdb;

+	m->gdt = gdt;

+	m->perf.period = 1;

+	/*

+	 * For polled uart output at boot, need

+	 * a default delay constant. 100000 should

+	 * be enough for a while. Cpuidentify will

+	 * calculate the real value later.

+	 */

+	m->loopconst = 100000;

+	m->cpumhz = 1000;				// XXX!

+	HYPERVISOR_shared_info = (shared_info_t*)mmumapframe(XENSHARED, (xenstart->shared_info)>>PGSHIFT);

+	// XXX m->shared = &HYPERVISOR_shared_info->vcpu_data[m->machno];

+}

+void

+init0(void)

+{

+	int i;

+	char buf[2*KNAMELEN];

+	up->nerrlab = 0;

+	spllo();

+	/*

+	 * These are o.k. because rootinit is null.

+	 * Then early kproc's will have a root and dot.

+	 */

+	up->slash = namec("#/", Atodir, 0, 0);

+	pathclose(up->slash->path);

+	up->slash->path = newpath("/");

+	up->dot = cclone(up->slash);

+	chandevinit();

+	if(!waserror()){

+		snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);

+		ksetenv("terminal", buf, 0);

+		ksetenv("cputype", "386", 0);

+		if(cpuserver)

+			ksetenv("service", "cpu", 0);

+		else

+			ksetenv("service", "terminal", 0);

+		ksetenv("readparts", "1", 0);

+		for(i = 0; i < nconf; i++){

+			if(confname[i][0] != '*')

+				ksetenv(confname[i], confval[i], 0);

+			ksetenv(confname[i], confval[i], 1);

+		}

+		poperror();

+	}

+	kproc("alarm", alarmkproc, 0);

+	touser(sp);

+}

+void

+userinit(void)

+{

+	Proc *p;

+	Segment *s;

+	KMap *k;

+	Page *pg;

+	p = newproc();

+	p->pgrp = newpgrp();

+	p->egrp = smalloc(sizeof(Egrp));

+	p->egrp->ref = 1;

+	p->fgrp = dupfgrp(nil);

+	p->rgrp = newrgrp();

+	p->procmode = 0640;

+	kstrdup(&eve, "");

+	kstrdup(&p->text, "*init*");

+	kstrdup(&p->user, eve);

+	p->fpstate = FPinit;

+	fpoff();

+	/*

+	 * Kernel Stack

+	 *

+	 * N.B. make sure there's enough space for syscall to check

+	 *	for valid args and

+	 *	4 bytes for gotolabel's return PC

+	 */

+	p->sched.pc = (ulong)init0;

+	p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);

+	/*

+	 * User Stack

+	 */

+	s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BY2PG);

+	p->seg[SSEG] = s;

+	pg = newpage(1, 0, USTKTOP-BY2PG);

+	segpage(s, pg);

+	k = kmap(pg);

+	bootargs(VA(k));

+	kunmap(k);

+	/*

+	 * Text

+	 */

+	s = newseg(SG_TEXT, UTZERO, 1);

+	s->flushme++;

+	p->seg[TSEG] = s;

+	pg = newpage(1, 0, UTZERO);

+	memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));

+	segpage(s, pg);

+	k = kmap(s->map[0]->pages[0]);

+	memmove((ulong*)VA(k), initcode, sizeof initcode);

+	kunmap(k);

+	ready(p);

+}

+uchar *

+pusharg(char *p)

+{

+	int n;

+	n = strlen(p)+1;

+	sp -= n;

+	memmove(sp, p, n);

+	return sp;

+}

+void

+bootargs(ulong base)

+{

+ 	int i, ac;

+	uchar *av[32];

+	uchar **lsp;

+	sp = (uchar*)base + BY2PG - Ustkheadroom;

+	ac = 0;

+	av[ac++] = pusharg("/386/9dos");

+	av[ac++] = pusharg("-D");

+	/* 4 byte word align stack */

+	sp = (uchar*)((ulong)sp & ~3);

+	/* build argc, argv on stack */

+	sp -= (ac+1)*sizeof(sp);

+	lsp = (uchar**)sp;

+	for(i = 0; i < ac; i++)

+		*lsp++ = av[i] + ((USTKTOP - BY2PG) - base);

+	*lsp = 0;

+	sp += (USTKTOP - BY2PG) - base - sizeof(ulong);

+}

+char*

+getconf(char *name)

+{

+	int i;

+	for(i = 0; i < nconf; i++)

+		if(cistrcmp(confname[i], name) == 0)

+			return confval[i];

+	return 0;

+}

+static void

+writeconf(void)

+{

+	char *p, *q;

+	int n;

+	p = getconfenv();

+	if(waserror()) {

+		free(p);

+		nexterror();

+	}

+	/* convert to name=value\n format */

+	for(q=p; *q; q++) {

+		q += strlen(q);

+		*q = '=';

+		q += strlen(q);

+		*q = '\n';

+	}

+	n = q - p + 1;

+	if(n >= BOOTARGSLEN)

+		error("kernel configuration too large");

+	memmove(BOOTARGS, p, n);

+	poperror();

+	free(p);

+}

+void

+confinit(void)

+{

+	char *p;

+	int i, userpcnt;

+	ulong kpages;

+	for(i = 0; i < nconf; i++)

+		print("%s=%s\n", confname[i], confval[i]);

+	/*

+	 * all ram above xentop is free, but must be mappable

+	 * to virt addrs less than VIRT_START.

+	 */

+	kpages = PADDR(hypervisor_virt_start)>>PGSHIFT;

+	if(xenstart->nr_pages <= kpages)

+		kpages = xenstart->nr_pages;

+	else

+		print("Warning: Plan 9 / Xen limitation - "

+			  "using only %lud of %lud available RAM pages\n",

+			  kpages, xenstart->nr_pages);

+	xentop = PGROUND(PADDR(xentop));

+	conf.mem[0].npage = kpages - (xentop>>PGSHIFT);

+	conf.mem[0].base = xentop;

+	if(p = getconf("*kernelpercent"))

+		userpcnt = 100 - strtol(p, 0, 0);

+	else

+		userpcnt = 0;

+	conf.npage = 0;

+	for(i=0; i<nelem(conf.mem); i++)

+		conf.npage += conf.mem[i].npage;

+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;

+	if(cpuserver)

+		conf.nproc *= 3;

+	if(conf.nproc > 2000)

+		conf.nproc = 2000;

+	conf.nimage = 200;

+	conf.nswap = conf.nproc*80;

+	conf.nswppo = 4096;

+	if(cpuserver) {

+		if(userpcnt < 10)

+			userpcnt = 70;

+		kpages = conf.npage - (conf.npage*userpcnt)/100;

+		/*

+		 * Hack for the big boys. Only good while physmem < 4GB.

+		 * Give the kernel fixed max + enough to allocate the

+		 * page pool.

+		 * This is an overestimate as conf.upages < conf.npages.

+		 * The patch of nimage is a band-aid, scanning the whole

+		 * page list in imagereclaim just takes too long.

+		 */

+		if(kpages > (64*MB + conf.npage*sizeof(Page))/BY2PG){

+			kpages = (64*MB + conf.npage*sizeof(Page))/BY2PG;

+			conf.nimage = 2000;

+			kpages += (conf.nproc*KSTACK)/BY2PG;

+		}

+	} else {

+		if(userpcnt < 10) {

+			if(conf.npage*BY2PG < 16*MB)

+				userpcnt = 40;

+			else

+				userpcnt = 60;

+		}

+		kpages = conf.npage - (conf.npage*userpcnt)/100;

+		/*

+		 * Make sure terminals with low memory get at least

+		 * 4MB on the first Image chunk allocation.

+		 */

+		if(conf.npage*BY2PG < 16*MB)

+			imagmem->minarena = 4*1024*1024;

+	}

+	/*

+	 * can't go past the end of virtual memory

+	 * (ulong)-KZERO is 2^32 - KZERO

+	 */

+	if(kpages > ((ulong)-KZERO)/BY2PG)

+		kpages = ((ulong)-KZERO)/BY2PG;

+	conf.upages = conf.npage - kpages;

+	conf.ialloc = (kpages/2)*BY2PG;

+	/*

+	 * Guess how much is taken by the large permanent

+	 * datastructures. Mntcache and Mntrpc are not accounted for

+	 * (probably ~300KB).

+	 */

+	kpages *= BY2PG;

+	kpages -= conf.upages*sizeof(Page)

+		+ conf.nproc*sizeof(Proc)

+		+ conf.nimage*sizeof(Image)

+		+ conf.nswap

+		+ conf.nswppo*sizeof(Page);

+	mainmem->maxsize = kpages;

+	if(!cpuserver){

+		/*

+		 * give terminals lots of image memory, too; the dynamic

+		 * allocation will balance the load properly, hopefully.

+		 * be careful with 32-bit overflow.

+		 */

+		imagmem->maxsize = kpages;

+	}

+}

+static char* mathmsg[] =

+{

+	nil,	/* handled below */

+	"denormalized operand",

+	"division by zero",

+	"numeric overflow",

+	"numeric underflow",

+	"precision loss",

+};

+static void

+mathnote(void)

+{

+	int i;

+	ulong status;

+	char *msg, note[ERRMAX];

+	status = up->fpsave.status;

+	/*

+	 * Some attention should probably be paid here to the

+	 * exception masks and error summary.

+	 */

+	msg = "unknown exception";

+	for(i = 1; i <= 5; i++){

+		if(!((1<<i) & status))

+			continue;

+		msg = mathmsg[i];

+		break;

+	}

+	if(status & 0x01){

+		if(status & 0x40){

+			if(status & 0x200)

+				msg = "stack overflow";

+			else

+				msg = "stack underflow";

+		}else

+			msg = "invalid operation";

+	}

+ 	snprint(note, sizeof note, "sys: fp: %s fppc=0x%lux status=0x%lux",

+ 		msg, up->fpsave.pc, status);

+	postnote(up, 1, note, NDebug);

+}

+/*

+ *  math coprocessor error

+ */

+static void

+matherror(Ureg *ur, void*)

+{

+	/*

+	 *  a write cycle to port 0xF0 clears the interrupt latch attached

+	 *  to the error# line from the 387

+	 */

+	if(!(m->cpuiddx & 0x01))

+		outb(0xF0, 0xFF);

+	/*

+	 *  save floating point state to check out error

+	 */

+	fpenv(&up->fpsave);

+	mathnote();

+	if(ur->pc & KZERO)

+		panic("fp: status %ux fppc=0x%lux pc=0x%lux",

+			up->fpsave.status, up->fpsave.pc, ur->pc);

+}

+/*

+ *  math coprocessor emulation fault

+ */

+static void

+mathemu(Ureg *ureg, void*)

+{

+	if(up->fpstate & FPillegal){

+		/* someone did floating point in a note handler */

+		postnote(up, 1, "sys: floating point in note handler", NDebug);

+		return;

+	}

+	switch(up->fpstate){

+	case FPinit:

+		fpinit();

+		up->fpstate = FPactive;

+		break;

+	case FPinactive:

+		/*

+		 * Before restoring the state, check for any pending

+		 * exceptions, there's no way to restore the state without

+		 * generating an unmasked exception.

+		 * More attention should probably be paid here to the

+		 * exception masks and error summary.

+		 */

+		if((up->fpsave.status & ~up->fpsave.control) & 0x07F){

+			mathnote();

+			break;

+		}

+		fprestore(&up->fpsave);

+		up->fpstate = FPactive;

+		break;

+	case FPactive:

+		panic("math emu pid %ld %s pc 0x%lux",

+			up->pid, up->text, ureg->pc);

+		break;

+	}

+}

+/*

+ *  math coprocessor segment overrun

+ */

+static void

+mathover(Ureg*, void*)

+{

+	pexit("math overrun", 0);

+}

+void

+mathinit(void)

+{

+	trapenable(VectorCERR, matherror, 0, "matherror");

+	//if(X86FAMILY(m->cpuidax) == 3)

+	//	intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");

+	trapenable(VectorCNA, mathemu, 0, "mathemu");

+	trapenable(VectorCSO, mathover, 0, "mathover");

+}

+/*

+ *  set up floating point for a new process

+ */

+void

+procsetup(Proc*p)

+{

+	p->fpstate = FPinit;

+	fpoff();

+}

+void

+procfork(Proc *p)

+{

+	int s;

+	p->kentry = up->kentry;

+	p->pcycles = -p->kentry;

+	/* inherit user descriptors */

+	memmove(p->gdt, up->gdt, sizeof(p->gdt));

+	/* copy local descriptor table */

+	if(up->ldt != nil && up->nldt > 0){

+		p->ldt = smalloc(sizeof(Segdesc) * up->nldt);

+		memmove(p->ldt, up->ldt, sizeof(Segdesc) * up->nldt);

+		p->nldt = up->nldt;

+	}

+	/* save floating point state */

+	s = splhi();

+	switch(up->fpstate & ~FPillegal){

+	case FPactive:

+		fpsave(&up->fpsave);

+		up->fpstate = FPinactive;

+	case FPinactive:

+		p->fpsave = up->fpsave;

+		p->fpstate = FPinactive;

+	}

+	splx(s);

+}

+void

+procrestore(Proc *p)

+{

+	uvlong t;

+	if(p->kp)

+		return;

+	cycles(&t);

+	p->pcycles -= t;

+}

+/*

+ *  Save the mach dependent part of the process state.

+ */

+void

+procsave(Proc *p)

+{

+	uvlong t;

+	cycles(&t);

+	p->pcycles += t;

+	if(p->fpstate == FPactive){

+		if(p->state == Moribund)

+			fpclear();

+		else{

+			/*

+			 * Fpsave() stores without handling pending

+			 * unmasked exeptions. Postnote() can't be called

+			 * here as sleep() already has up->rlock, so

+			 * the handling of pending exceptions is delayed

+			 * until the process runs again and generates an

+			 * emulation fault to activate the FPU.

+			 */

+			fpsave(&p->fpsave);

+		}

+		p->fpstate = FPinactive;

+	}

+	/*

+	 * While this processor is in the scheduler, the process could run

+	 * on another processor and exit, returning the page tables to

+	 * the free list where they could be reallocated and overwritten.

+	 * When this processor eventually has to get an entry from the

+	 * trashed page tables it will crash.

+	 *

+	 * If there's only one processor, this can't happen.

+	 * You might think it would be a win not to do this in that case,

+	 * especially on VMware, but it turns out not to matter.

+	 */

+	mmuflushtlb(0);

+}

+static void

+shutdown(int ispanic)

+{

+	int ms, once;

+	lock(&active);

+	if(ispanic)

+		active.ispanic = ispanic;

+	else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)

+		active.ispanic = 0;

+	once = active.machs & (1<<m->machno);

+	active.machs &= ~(1<<m->machno);

+	active.exiting = 1;

+	unlock(&active);

+	if(once)

+		print("cpu%d: exiting\n", m->machno);

+	//spllo();

+	for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){

+		delay(TK2MS(2));

+		if(active.machs == 0 && consactive() == 0)

+			break;

+	}

+	if(getconf("*debug"))

+		delay(5*60*1000);

+	if(active.ispanic){

+		if(!cpuserver)

+			for(;;)

+				halt();

+		delay(10000);

+	}else

+		delay(1000);

+}

+void

+reboot(void *entry, void *code, ulong size)

+{

+	void (*f)(ulong, ulong, ulong);

+	//ulong *pdb;

+	writeconf();

+	shutdown(0);

+	/*

+	 * should be the only processor running now

+	 */

+	print("shutting down...\n");

+	delay(200);

+	splhi();

+	/* turn off buffered serial console */

+	serialoq = nil;

+	/* shutdown devices */

+	chandevshutdown();

+	/* reboot(0, ...) on Xen causes domU shutdown */

+	if(entry == 0)

+		HYPERVISOR_shutdown(0);

+	/*

+	 * Modify the machine page table to directly map the low 4MB of memory

+	 * This allows the reboot code to turn off the page mapping

+	 */

+	//pdb = m->pdb;

+	//pdb[PDX(0)] = pdb[PDX(KZERO)];

+	mmuflushtlb(0);

+	/* setup reboot trampoline function */

+	f = (void*)REBOOTADDR;

+	memmove(f, rebootcode, sizeof(rebootcode));

+	print("rebooting...\n");

+	/* off we go - never to return */

+	(*f)(PADDR(entry), PADDR(code), size);

+}

+void

+exit(int ispanic)

+{

+	shutdown(ispanic);

+	arch->reset();

+}

+int

+cistrcmp(char *a, char *b)

+{

+	int ac, bc;

+	for(;;){

+		ac = *a++;

+		bc = *b++;

+		if(ac >= 'A' && ac <= 'Z')

+			ac = 'a' + (ac - 'A');

+		if(bc >= 'A' && bc <= 'Z')

+			bc = 'a' + (bc - 'A');

+		ac -= bc;

+		if(ac)

+			return ac;

+		if(bc == 0)

+			break;

+	}

+	return 0;

+}

+int

+cistrncmp(char *a, char *b, int n)

+{

+	unsigned ac, bc;

+	while(n > 0){

+		ac = *a++;

+		bc = *b++;

+		n--;

+		if(ac >= 'A' && ac <= 'Z')

+			ac = 'a' + (ac - 'A');

+		if(bc >= 'A' && bc <= 'Z')

+			bc = 'a' + (bc - 'A');

+		ac -= bc;

+		if(ac)

+			return ac;

+		if(bc == 0)

+			break;

+	}

+	return 0;

+}

--- /dev/null

+++ b/sys/src/9/xen/mem.h

@@ -1,0 +1,160 @@

+/*

+ * Memory and machine-specific definitions.  Used in C and assembler.

+ */

+#define MIN(a, b)	((a) < (b)? (a): (b))

+#define MAX(a, b)	((a) > (b)? (a): (b))

+/*

+ * Sizes

+ */

+#define	BI2BY		8			/* bits per byte */

+#define	BI2WD		32			/* bits per word */

+#define	BY2WD		4			/* bytes per word */

+#define	BY2V		8			/* bytes per double word */

+#define	BY2PG		4096			/* bytes per page */

+#define	WD2PG		(BY2PG/BY2WD)		/* words per page */

+#define	PGSHIFT		12			/* log(BY2PG) */

+#define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))

+#define	PGROUND(s)	ROUND(s, BY2PG)

+#define	BLOCKALIGN	8

+#define FPalign		16			/* required for FXSAVE */

+#define	MAXMACH		8			/* max # cpus system can run */

+#define	MAX_VIRT_CPUS	MAXMACH

+#define	KSTACK		4096			/* Size of kernel stack */

+/*

+ * Time

+ */

+#define	HZ		(100)			/* clock frequency */

+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */

+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */

+/*

+ * Fundamental addresses

+ */

+#define	REBOOTADDR	0x00001000		/* reboot code - physical address */

+#define	APBOOTSTRAP	0x80001000		/* AP bootstrap code */

+#define	MACHADDR	0x80002000		/* as seen by current processor */

+#define	CPU0MACH	MACHADDR		/* Mach for bootstrap processor */

+#define	XENCONSOLE	0x80003000		/* xen console ring */

+#define	XENSHARED	0x80004000		/* xen shared page */

+#define	XENBUS		0x80005000		/* xenbus aka xenstore ring */

+#define	XENGRANTTAB	0x80006000		/* grant table */

+#define	MACHSIZE	BY2PG

+/*

+ *  Address spaces

+ *

+ *  User is at 0-2GB

+ *  Kernel is at 2GB-4GB

+ */

+#define	UZERO		0			/* base of user address space */

+#define	UTZERO		(UZERO+BY2PG)		/* first address in user text */

+#define UTROUND(t)	ROUNDUP((t), BY2PG)

+#define	KZERO		0x80000000		/* base of kernel address space */

+#define	KTZERO		0x80010000		/* first address in kernel text */

+#define	USTKTOP		(KZERO-BY2PG)		/* byte just beyond user stack */

+#define	USTKSIZE	(16*1024*1024)		/* size of user stack */

+#define	TSTKTOP		(USTKTOP-USTKSIZE)	/* end of new stack in sysexec */

+#define	TSTKSIZ 	100

+/*

+ *  known x86 segments (in GDT) and their selectors

+ */

+#define	NULLSEG	0	/* null segment */

+#define	KDSEG	1	/* kernel data/stack */

+#define	KESEG	2	/* kernel executable */

+#define	UDSEG	3	/* user data/stack */

+#define	UESEG	4	/* user executable */

+#define	TSSSEG	5	/* task segment */

+#define	APMCSEG		6	/* APM code segment */

+#define	APMCSEG16	7	/* APM 16-bit code segment */

+#define	APMDSEG		8	/* APM data segment */

+#define	PROCSEG0	11	/* per process descriptor0 */

+#define	NPROCSEG	3	/* number of per process descriptors */

+#define	NGDT		13	/* number of GDT entries required */

+/* #define	APM40SEG	8	/* APM segment 0x40 */

+#define	SELGDT	(0<<2)	/* selector is in gdt */

+#define	SELLDT	(1<<2)	/* selector is in ldt */

+#define	SELECTOR(i, t, p)	(((i)<<3) | (t) | (p))

+#define	NULLSEL	SELECTOR(NULLSEG, SELGDT, 0)

+/* these are replaced by XEN entries */

+#ifdef NOPE  // XXX investigate more

+#define	KDSEL	SELECTOR(KDSEG, SELGDT, 0)

+#define	KESEL	SELECTOR(KESEG, SELGDT, 0)

+#define	UESEL	SELECTOR(UESEG, SELGDT, 3)

+#define	UDSEL	SELECTOR(UDSEG, SELGDT, 3)

+/* comment out to make sure unused ... */

+#define	TSSSEL	SELECTOR(TSSSEG, SELGDT, 0)

+#define	APMCSEL 	SELECTOR(APMCSEG, SELGDT, 0)

+#define	APMCSEL16	SELECTOR(APMCSEG16, SELGDT, 0)

+#define	APMDSEL		SELECTOR(APMDSEG, SELGDT, 0)

+/* #define	APM40SEL	SELECTOR(APM40SEG, SELGDT, 0) */

+#else

+/* use the selectors that xen gives us */

+#define KESEL FLAT_KERNEL_CS

+#define KDSEL FLAT_KERNEL_DS

+#define UESEL FLAT_USER_CS

+#define UDSEL FLAT_USER_DS

+#endif

+/*

+ *  fields in segment descriptors

+ */

+#define	SEGDATA	(0x10<<8)	/* data/stack segment */

+#define	SEGEXEC	(0x18<<8)	/* executable segment */

+#define	SEGTSS	(0x9<<8)	/* TSS segment */

+#define	SEGCG	(0x0C<<8)	/* call gate */

+#define	SEGIG	(0x0E<<8)	/* interrupt gate */

+#define	SEGTG	(0x0F<<8)	/* trap gate */

+#define	SEGTYPE	(0x1F<<8)

+#define	SEGP	(1<<15)		/* segment present */

+#define	SEGPL(x) ((x)<<13)	/* priority level */

+#define	SEGB	(1<<22)		/* granularity 1==4k (for expand-down) */

+#define	SEGG	(1<<23)		/* granularity 1==4k (for other) */

+#define	SEGE	(1<<10)		/* expand down */

+#define	SEGW	(1<<9)		/* writable (for data/stack) */

+#define	SEGR	(1<<9)		/* readable (for code) */

+#define	SEGD	(1<<22)		/* default 1==32bit (for code) */

+/*

+ *  virtual MMU

+ */

+#define	PTEMAPMEM	(1024*1024)

+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)

+#define	SEGMAPSIZE	1984

+#define	SSEGMAPSIZE	16

+#define	PPN(x)		((x)&~(BY2PG-1))

+#define	PGOFF(x)		((x)&(BY2PG-1))

+/*

+ *  physical MMU

+ */

+#define	PTEVALID	(1<<0)

+#define	PTEWT		(1<<3)

+#define	PTEUNCACHED	(1<<4)

+#define	PTEWRITE	(1<<1)

+#define	PTERONLY	(0<<1)

+#define	PTEKERNEL	(0<<2)

+#define	PTEUSER		(1<<2)

+#define	PTESIZE		(1<<7)

+#define	PTEGLOBAL	(1<<8)

+/*

+ * Macros for calculating offsets within the page directory base

+ * and page tables.

+ */

+#define PAX(va)		(paemode? ((ulong)(va)>>29) & 0x6 : 0)

+#define	PDX(va)		(paemode? (((ulong)(va))>>20) & 0x03FE : (((ulong)(va))>>22) & 0x03FF)

+#define	PTX(va)		(paemode? (((ulong)(va))>>11) & 0x03FE : (((ulong)(va))>>12) & 0x03FF)

+#define PDB(pdb,va)	(paemode? KADDR(MAPPN((pdb)[((ulong)(va)>>29) & 0x6])) : pdb)

+#define	getpgcolor(a)	0

--- /dev/null

+++ b/sys/src/9/xen/mkfile

@@ -1,0 +1,188 @@

+CONF=xenpcf

+CONFLIST=xenpcf

+objtype=386

+</$objtype/mkfile

+p=9

+KTZERO=0x80010000

+KZERO=0x80000000

+KPZERO=0x10000

+PAE=yes

+DEVS=`{rc ../port/mkdevlist $CONF}

+PORT=\

+	alarm.$O\

+	alloc.$O\

+	allocb.$O\

+	auth.$O\

+	cache.$O\

+	chan.$O\

+	dev.$O\

+	edf.$O\

+	fault.$O\

+	page.$O\

+	parse.$O\

+	pgrp.$O\

+	portclock.$O\

+	print.$O\

+	proc.$O\

+	qio.$O\

+	qlock.$O\

+	rebootcmd.$O\

+	segment.$O\

+	swap.$O\

+	sysfile.$O\

+	sysproc.$O\

+	taslock.$O\

+	tod.$O\

+	xalloc.$O\

+XEN=\

+	xengrant.$O\

+	xentimer.$O\

+	xensystem.$O\

+SCHED=`{ls -p xen-public/sched*.h >[2]/dev/null}

+ARCH=`{test -d xen-public/arch-x86 && echo arch-x86/xen-x86_32.h arch-x86/xen.h || echo arch-x86_32.h }

+XENHEADERS=\

+	$ARCH\

+	xen.h\

+	event_channel.h\

+	grant_table.h\

+	memory.h\

+	physdev.h\

+	$SCHED\

+	io/ring.h\

+	io/blkif.h\

+	io/console.h\

+	io/netif.h\

+	io/xenbus.h\

+	io/xs_wire.h\

+OBJ=\

+	l.$O\

+	plan9l.$O\

+	xen.$O\

+	main.$O\

+	mmu.$O\

+	random.$O\

+	rdb.$O\

+	trap.$O\

+	$CONF.root.$O\

+	$CONF.rootc.$O\

+	$DEVS\

+	$PORT\

+	$XEN\

+LIB=\

+	/$objtype/lib/libmemlayer.a\

+	/$objtype/lib/libmemdraw.a\

+	/$objtype/lib/libdraw.a\

+	/$objtype/lib/libip.a\

+	/$objtype/lib/libsec.a\

+	/$objtype/lib/libmp.a\

+	/$objtype/lib/libc.a\

+ETHER=`{echo devether.c ether*.c | sed 's/\.c/.'$O'/g'}

+VGA=`{echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'}

+SDEV=`{echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'}

+PAE=`{echo $PAE | sed 's/yes/yes[extended-cr3]/'}

+XENELF='LOADER=generic,XEN_VER=xen-3.0,ELF_PADDR_OFFSET=0,VIRT_BASE='$KZERO',VIRT_ENTRY='$KTZERO',PAE='$PAE

+#$p$CONF:	$CONF.c $OBJ $LIB xenbin

+#	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c

+#	$LD -o $target.tmp -T$KTZERO -l $OBJ $CONF.$O $LIB

+#	./xenbin <$target.tmp >$target

+#	rm $target.tmp

+#	size $target

+$p$CONF:	$CONF.c $OBJ $LIB xenelf

+	$CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c

+	$LD -o $target.elf -H5 -T$KTZERO -P$KPZERO -l $OBJ $CONF.$O $LIB

+	./xenelf $target.elf $target __xen_guest ''$XENELF''

+	size $target

+$p$CONF.gz: $p$CONF

+	#strip -o /fd/1 $p$CONF | gzip -9 > $p$CONF.gz

+	gzip -9 $p$CONF > $p$CONF.gz

+install:V: $p$CONF $p$CONF.gz

+	cp $p$CONF $p$CONF.gz /$objtype/

+	# import lookout / /n/lookout && cp $p$CONF $p$CONF.gz /n/lookout/$objtype/

+# copies generated by the rule below

+PCHEADERS=uncached.h etherif.h ethermii.h mp.h io.h

+REPCH=`{echo $PCHEADERS | sed 's/\.h//g; s/ /|/g'}

+^($REPCH)\.h:R:	'../pc/\1.h'

+	cp $prereq .

+REPCC=`{../port/mkfilelist ../pc}

+^($REPCC)\.$O:R:	'../pc/\1.c'

+	$CC $CFLAGS -I. -. ../pc/$stem1.c

+<../boot/bootmkfile

+<../port/portmkfile

+<|../port/mkbootrules $CONF

+ptclbsum386.$O:	../pc/ptclbsum386.s

+	$AS $AFLAGS ../pc/ptclbsum386.s

+# we inherited these.. revisit.

+$ETHER: 	etherif.h ../port/netif.h

+$SDEV:	../port/sd.h

+main.$O:	init.h reboot.h

+trap.$O:	/sys/include/tos.h

+%.$O:	/$objtype/include/u.h ../port/lib.h mem.h dat.h fns.h io.h ../port/error.h ../port/portdat.h ../port/portfns.h xendat.h xendefs.h

+xendefs.h: xendat.h

+	grep '^#define[ 	]+FLAT_' xendat.h >$target

+xendat.h:

+	{ echo '#define __i386__ __i386__'; \

+	  echo '#define __XEN_INTERFACE_VERSION__ 0x00030201'; \

+	  echo '#define XEN_GUEST_HANDLE_00030205(type) type *'; \

+	  cat xen-public/^($XENHEADERS) } | \

+	./cppx > $target

+init.h:	../port/initcode.c ../pc/init9.c

+	$CC ../port/initcode.c

+	$CC ../pc/init9.c

+	$LD -l -R1 -o init.out init9.$O initcode.$O /386/lib/libc.a

+	{echo 'uchar initcode[]={'

+	 strip -o /fd/1 init.out | xd -1x |

+		sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'

+	 echo '};'} > init.h

+reboot.h:	../pc/rebootcode.s

+	$AS ../pc/rebootcode.s

+	$LD -l -s -T0x1000 -R4 -o reboot.out rebootcode.$O

+	{echo 'uchar rebootcode[]={'

+	 xd -1x reboot.out |

+		sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g'

+	 echo '};'} > reboot.h

+acid:V:

+	$CC -a -w main.c>acid

+dpart: dpart.$O

+	$LD -o dpart dpart.$O

+xenstore: xenstore.$O

+	$LD -o xenstore xenstore.$O

+# XXX this is wrong if we're cross-compiling

+xenbin:	xenbin.$O

+	$LD -o xenbin xenbin.$O

+xenelf: xenelf.$O

+	$LD -o xenelf xenelf.$O

+%.clean:V:

+	rm -f $stem.c [9bz]$stem [9bz]$stem.gz 9$stem.elf boot$stem.* reboot.h init.h xendat.h xendefs.h $PCHEADERS dpart xenbin xenelf xenstore

--- /dev/null

+++ b/sys/src/9/xen/mmu.c

@@ -1,0 +1,595 @@

+#include	"u.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+#include	"io.h"

+int paemode;

+uvlong *xenpdpt;	/* this needs to go in Mach for multiprocessor guest */

+#define LOG(a)

+#define PUTMMULOG(a)

+#define MFN(pa)		(patomfn[(pa)>>PGSHIFT])

+#define	MAPPN(x)	(paemode? matopfn[*(uvlong*)(&x)>>PGSHIFT]<<PGSHIFT : matopfn[(x)>>PGSHIFT]<<PGSHIFT)

+#define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }

+#define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }

+#define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\

+			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }

+Segdesc gdt[NGDT] =

+{

+[NULLSEG]	{ 0, 0},		/* null descriptor */

+[KDSEG]		DATASEGM(0),		/* kernel data/stack */

+[KESEG]		EXECSEGM(0),		/* kernel code */

+[UDSEG]		DATASEGM(3),		/* user data/stack */

+[UESEG]		EXECSEGM(3),		/* user code */

+[TSSSEG]	TSSSEGM(0,0),		/* tss segment */

+};

+/* note: pdb must already be pinned */

+static void

+taskswitch(Page *pdb, ulong stack)

+{

+	Tss *tss;

+	tss = m->tss;

+	tss->ss0 = KDSEL;

+	tss->esp0 = stack;

+	tss->ss1 = KDSEL;

+	tss->esp1 = stack;

+	tss->ss2 = KDSEL;

+	tss->esp2 = stack;

+	//tss->cr3 = pdb;

+	HYPERVISOR_stack_switch(KDSEL, stack);

+	mmuflushtlb(pdb);

+}

+void

+mmuflushtlb(Page *pdb)

+{

+	int s, i;

+	if(!paemode){

+		if(pdb)

+			xenptswitch(pdb->pa);

+		else

+			xenptswitch(PADDR(m->pdb));

+	}else{

+		if(pdb){

+			s = splhi();

+			for(i = 0; i < 3; i++){

+				xenupdate((ulong*)&xenpdpt[i], pdb->pa | PTEVALID);

+				pdb = pdb->next;

+			}

+			splx(s);

+		}else{

+			s = splhi();

+			for(i = 0; i < 3; i++)

+				xenupdatema((ulong*)&xenpdpt[i], ((uvlong*)m->pdb)[i]);

+			splx(s);

+		}

+		xentlbflush();

+	}

+}

+/*

+ * On processors that support it, we set the PTEGLOBAL bit in

+ * page table and page directory entries that map kernel memory.

+ * Doing this tells the processor not to bother flushing them

+ * from the TLB when doing the TLB flush associated with a

+ * context switch (write to CR3).  Since kernel memory mappings

+ * are never removed, this is safe.  (If we ever remove kernel memory

+ * mappings, we can do a full flush by turning off the PGE bit in CR4,

+ * writing to CR3, and then turning the PGE bit back on.)

+ *

+ * See also mmukmap below.

+ *

+ * Processor support for the PTEGLOBAL bit is enabled in devarch.c.

+ */

+static void

+memglobal(void)

+{

+	int i, j;

+	ulong *pde, *pte;

+	/* only need to do this once, on bootstrap processor */

+	if(m->machno != 0)

+		return;

+	if(!m->havepge)

+		return;

+	pde = m->pdb;

+	for(i=512; i<1024; i++){	/* 512: start at entry for virtual 0x80000000 */

+		if(pde[i] & PTEVALID){

+			pde[i] |= PTEGLOBAL;

+			if(!(pde[i] & PTESIZE)){

+				pte = KADDR(pde[i]&~(BY2PG-1));

+				for(j=0; j<1024; j++)

+					if(pte[j] & PTEVALID)

+						pte[j] |= PTEGLOBAL;

+			}

+		}

+	}

+}

+ulong

+mmumapframe(ulong va, ulong mfn)

+{

+	ulong *pte, pdbx;

+	uvlong ma;

+	/*

+	 * map machine frame number to a virtual address.

+	 * When called the pagedir and page table exist, we just

+	 * need to fill in a page table entry.

+	 */

+	ma = ((uvlong)mfn<<PGSHIFT) | PTEVALID|PTEWRITE;

+	pdbx = PDX(va);

+	pte = KADDR(MAPPN(PDB(m->pdb,va)[pdbx]));

+	xenupdatema(&pte[PTX(va)], ma);

+	return va;

+}

+void

+mmumapcpu0(void)

+{

+	ulong *pdb, *pte, va, pa, pdbx;

+	if(strstr(xenstart->magic, "x86_32p"))

+		paemode = 1;

+	hypervisor_virt_start = paemode ? 0xF5800000 : 0xFC000000;

+	patomfn = (ulong*)xenstart->mfn_list;

+	matopfn = (ulong*)hypervisor_virt_start;

+	/* Xen bug ? can't touch top entry in PDPT */

+	if(paemode)

+		hypervisor_virt_start = 0xC0000000;

+	/*

+	 * map CPU0MACH at MACHADDR.

+	 * When called the pagedir and page table exist, we just

+	 * need to fill in a page table entry.

+	 */

+	pdb = (ulong*)xenstart->pt_base;

+	va = MACHADDR;

+	pa = PADDR(CPU0MACH) | PTEVALID|PTEWRITE;

+	pdbx = PDX(va);

+	pdb = PDB(pdb, va);

+	pte = KADDR(MAPPN(pdb[pdbx]));

+	xenupdate(&pte[PTX(va)], pa);

+}

+void

+mmuinit(void)

+{

+//XXX	ulong x;

+//XXX	ushort ptr[3];

+	ulong *pte, npgs, pa;

+	extern int rtsr(void);

+	if(paemode){

+		int i;

+		xenpdpt = (uvlong*)m->pdb;

+		m->pdb = xspanalloc(32, 32, 0);

+		/* clear "reserved" bits in initial page directory pointers -- Xen bug? */

+		for(i = 0; i < 4; i++)

+			((uvlong*)m->pdb)[i] = xenpdpt[i] & ~0x1E6LL;

+	}

+	/*

+	 * So far only memory up to xentop is mapped, map the rest.

+	 * We cant use large pages because our contiguous PA space

+	 * is not necessarily contiguous in MA.

+	 */

+	npgs = conf.mem[0].npage;

+	for(pa=conf.mem[0].base; npgs; npgs--, pa+=BY2PG) {

+		pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 1);

+		if(!pte)

+			panic("mmuinit");

+		xenupdate(pte, pa|PTEVALID|PTEWRITE);

+	}

+	memglobal();

+	m->tss = malloc(sizeof(Tss));

+	memset(m->tss, 0, sizeof(Tss));

+	m->tss->iomap = 0xDFFF<<16;

+	/*

+	 * We used to keep the GDT in the Mach structure, but it

+	 * turns out that that slows down access to the rest of the

+	 * page.  Since the Mach structure is accessed quite often,

+	 * it pays off anywhere from a factor of 1.25 to 2 on real

+	 * hardware to separate them (the AMDs are more sensitive

+	 * than Intels in this regard).  Under VMware it pays off

+	 * a factor of about 10 to 100.

+	 */

+#ifdef we_dont_set_gdt_or_lidt

+	memmove(m->gdt, gdt, sizeof gdt);

+	x = (ulong)m->tss;

+	m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);

+	m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;

+	ptr[0] = sizeof(gdt)-1;

+	x = (ulong)m->gdt;

+	ptr[1] = x & 0xFFFF;

+	ptr[2] = (x>>16) & 0xFFFF;

+	lgdt(ptr);

+	ptr[0] = sizeof(Segdesc)*256-1;

+	x = IDTADDR;

+	ptr[1] = x & 0xFFFF;

+	ptr[2] = (x>>16) & 0xFFFF;

+	lidt(ptr);

+#endif

+#ifdef we_may_eventually_want_this

+	/* make kernel text unwritable */

+	for(x = KTZERO; x < (ulong)etext; x += BY2PG){

+		p = mmuwalk(m->pdb, x, 2, 0);

+		if(p == nil)

+			panic("mmuinit");

+		*p &= ~PTEWRITE;

+	}

+#endif

+	taskswitch(0,  (ulong)m + BY2PG);

+#ifdef we_dont_do_this

+	ltr(TSSSEL);

+#endif

+}

+void

+flushmmu(void)

+{

+	int s;

+	s = splhi();

+	up->newtlb = 1;

+	mmuswitch(up);

+	splx(s);

+}

+static ulong*

+mmupdb(Page *pg, ulong va)

+{

+	int i;

+	for(i = PAX(va); i > 0; i -= 2)

+		pg = pg->next;

+	return (ulong*)pg->va;

+}

+/* this can be called with an active pdb, so use Xen calls to zero it out.

+  */

+static void

+mmuptefree(Proc* proc)

+{

+	ulong *pdb, va;

+	Page **last, *page;

+	if(proc->mmupdb && proc->mmuused){

+		last = &proc->mmuused;

+		for(page = *last; page; page = page->next){

+			/* this is no longer a pte page so make it readwrite */

+			va = page->daddr;

+			pdb = mmupdb(proc->mmupdb, va);

+			xenupdatema(&pdb[PDX(va)], 0);

+			xenptunpin(page->va);

+			last = &page->next;

+		}

+		*last = proc->mmufree;

+		proc->mmufree = proc->mmuused;

+		proc->mmuused = 0;

+	}

+}

+void

+mmuswitch(Proc* proc)

+{

+	//ulong *pdb;

+	if(proc->newtlb){

+		mmuptefree(proc);

+		proc->newtlb = 0;

+	}

+	if(proc->mmupdb){

+		//XXX doesn't work for some reason, but it's not needed for uniprocessor

+		//pdb = (ulong*)proc->mmupdb->va;

+		//xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]);

+		taskswitch(proc->mmupdb, (ulong)(proc->kstack+KSTACK));

+	}

+	else

+		taskswitch(0, (ulong)(proc->kstack+KSTACK));

+}

+void

+mmurelease(Proc* proc)

+{

+	Page *page, *next;

+	/*

+	 * Release any pages allocated for a page directory base or page-tables

+	 * for this process:

+	 *   switch to the prototype pdb for this processor (m->pdb);

+	 *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)

+	 *   onto the process' free list (proc->mmufree). This has the side-effect of

+	 *   cleaning any user entries in the pdb (proc->mmupdb);

+	 *   if there's a pdb put it in the cache of pre-initialised pdb's

+	 *   for this processor (m->pdbpool) or on the process' free list;

+	 *   finally, place any pages freed back into the free pool (palloc).

+	 * This routine is only called from sched() with palloc locked.

+	 */

+	taskswitch(0, (ulong)m + BY2PG);

+	mmuptefree(proc);

+	if((page = proc->mmupdb) != 0){

+		proc->mmupdb = 0;

+		while(page){

+			next = page->next;

+			/* its not a page table anymore, mark it rw */

+			xenptunpin(page->va);

+			if(paemode || m->pdbcnt > 10){

+				page->next = proc->mmufree;

+				proc->mmufree = page;

+			}

+			else{

+				page->next = m->pdbpool;

+				m->pdbpool = page;

+				m->pdbcnt++;

+			}

+			page = next;

+		}

+	}

+	for(page = proc->mmufree; page; page = next){

+		next = page->next;

+		if(--page->ref)

+			panic("mmurelease: page->ref %d\n", page->ref);

+		pagechainhead(page);

+	}

+	if(proc->mmufree && palloc.r.p)

+		wakeup(&palloc.r);

+	proc->mmufree = 0;

+}

+static Page*

+mmupdballoc(ulong va, void *mpdb)

+{

+	int s;

+	Page *page;

+	Page *badpages, *pg;

+	s = splhi();

+	/*

+	 * All page tables must be read-only.  We will mark them

+	 * readwrite later when we free them and they are no

+	 * longer used as page tables.

+	 */

+	if(m->pdbpool == 0){

+		spllo();

+		badpages = 0;

+		for (;;) {

+			page = newpage(0, 0, 0);

+			page->va = VA(kmap(page));

+			if(mpdb)

+				memmove((void*)page->va, mpdb, BY2PG);

+			else

+				memset((void*)page->va, 0, BY2PG);

+			if (xenpgdpin(page->va))

+				break;

+			/*

+			 * XXX Plan 9 is a bit lax about putting pages on the free list when they are

+			 * still mapped (r/w) by some process's page table.  From Plan 9's point

+			 * of view this is safe because the any such process will have up->newtlb set,

+			 * so the mapping will be cleared before the process is dispatched.  But the Xen

+			 * hypervisor has no way of knowing this, so it refuses to pin the page for use

+			 * as a pagetable.

+			 */

+			if(0) print("bad pgdpin %lux va %lux copy %lux %s\n", MFN(PADDR(page->va)), va, (ulong)mpdb, up? up->text: "");

+			page->next = badpages;

+			badpages = page;

+		}

+		while (badpages != 0) {

+			pg = badpages;

+			badpages = badpages->next;

+			putpage(pg);

+		}

+	}

+	else{

+		page = m->pdbpool;

+		m->pdbpool = page->next;

+		m->pdbcnt--;

+		if (!xenpgdpin(page->va))

+			panic("xenpgdpin");

+	}

+	splx(s);

+	page->next = 0;

+	return page;

+}

+void

+checkmmu(ulong va, ulong pa)

+{

+	ulong *pdb, *pte;

+	int pdbx;

+	if(up->mmupdb == 0)

+		return;

+	pdb = mmupdb(up->mmupdb, va);

+	pdbx = PDX(va);

+	if(MAPPN(pdb[pdbx]) == 0){

+		/* okay to be empty - will fault and get filled */

+		return;

+	}

+	pte = KADDR(MAPPN(pdb[pdbx]));

+	if(MAPPN(pte[PTX(va)]) != pa){

+		if(!paemode)

+		  print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%08lux (0x%08lux)\n",

+			up->pid, up->text,

+			va, pa, pte[PTX(va)], MAPPN(pte[PTX(va)]));

+		else

+		  print("%ld %s: va=0x%08lux pa=0x%08lux pte=0x%16llux (0x%08lux)\n",

+			up->pid, up->text,

+			va, pa, *(uvlong*)&pte[PTX(va)], MAPPN(pte[PTX(va)]));

+	}

+}

+void

+putmmu(ulong va, ulong pa, Page*)

+{

+	int pdbx;

+	Page *page;

+	Page *badpages, *pg;

+	ulong *pdb, *pte;

+	int i, s;

+	PUTMMULOG(dprint("putmmu va %lux pa %lux\n", va, pa);)

+	if(up->mmupdb == 0){

+		if(!paemode)

+			up->mmupdb = mmupdballoc(va, m->pdb);

+		else {

+			page = 0;

+			for(i = 4; i >= 0; i -= 2){

+				if(m->pdb[i])

+					pg = mmupdballoc(va, KADDR(MAPPN(m->pdb[i])));

+				else

+					pg = mmupdballoc(va, 0);

+				pg->next = page;

+				page = pg;

+			}

+			up->mmupdb = page;

+		}

+	}

+	pdb = mmupdb(up->mmupdb, va);

+	pdbx = PDX(va);

+	if(PPN(pdb[pdbx]) == 0){

+		PUTMMULOG(dprint("new pt page for index %d pdb %lux\n", pdbx, (ulong)pdb);)

+		/* mark page as readonly before using as a page table */

+		if(up->mmufree == 0){

+			badpages = 0;

+			for (;;) {

+				page = newpage(1, 0, 0);

+				page->va = VA(kmap(page));

+				if (xenptpin(page->va))

+					break;

+				if(0) print("bad pin %lux va %lux %s\n", MFN(PADDR(page->va)), va, up->text);

+				page->next = badpages;

+				badpages = page;

+			}

+			while (badpages != 0) {

+				pg = badpages;

+				badpages = badpages->next;

+				putpage(pg);

+			}

+		}

+		else {

+			page = up->mmufree;

+			up->mmufree = page->next;

+			memset((void*)page->va, 0, BY2PG);

+			if (!xenptpin(page->va))

+				panic("xenptpin");

+		}

+		xenupdate(&pdb[pdbx], page->pa|PTEVALID|PTEUSER|PTEWRITE);

+		page->daddr = va;

+		page->next = up->mmuused;

+		up->mmuused = page;

+	}

+	pte = KADDR(MAPPN(pdb[pdbx]));

+	PUTMMULOG(dprint("pte %lux index %lud old %lux new %lux mfn %lux\n", (ulong)pte, PTX(va), pte[PTX(va)], pa|PTEUSER, MFN(pa));)

+	xenupdate(&pte[PTX(va)], pa|PTEUSER);

+	s = splhi();

+	//XXX doesn't work for some reason, but it's not needed for uniprocessor

+	//xenupdate(&pdb[PDX(MACHADDR)], m->pdb[PDX(MACHADDR)]);

+	mmuflushtlb(up->mmupdb);

+	splx(s);

+}

+ulong*

+mmuwalk(ulong* pdb, ulong va, int level, int create)

+{

+	ulong pa, va2, *table;

+	/*

+	 * Walk the page-table pointed to by pdb and return a pointer

+	 * to the entry for virtual address va at the requested level.

+	 * If the entry is invalid and create isn't requested then bail

+	 * out early. Otherwise, for the 2nd level walk, allocate a new

+	 * page-table page and register it in the 1st level.

+	 */

+	if(paemode){

+		pdb = &pdb[PAX(va)];

+		if(!(*pdb & PTEVALID)){

+			if(create == 0)

+				return 0;

+			panic("mmuwalk: missing pgdir ptr for va=%lux\n", va);

+		}

+		pdb = KADDR(MAPPN(*pdb));

+	}

+	table = &pdb[PDX(va)];

+	if(!(*table & PTEVALID) && create == 0)

+		return 0;

+	switch(level){

+	default:

+		return 0;

+	case 1:

+		return table;

+	case 2:

+		if(*table & PTESIZE)

+			panic("mmuwalk2: va %luX entry %luX\n", va, *table);

+		if(!(*table & PTEVALID)){

+			va2 = (ulong)xspanalloc(BY2PG, BY2PG, 0);

+			pa = PADDR(va2);

+			xenptpin(va2);

+			xenupdate(table, pa|PTEWRITE|PTEVALID);

+		}

+		table = KADDR(MAPPN(*table));

+		return &table[PTX(va)];

+	}

+}

+int

+mmukmapsync(ulong va)

+{

+	USED(va);

+	return 0;

+}

+/*

+ * More debugging.

+ */

+void

+countpagerefs(ulong *ref, int print)

+{

+	USED(ref);

+	USED(print);

+}

+/*

+ * Return the number of bytes that can be accessed via KADDR(pa).

+ * If pa is not a valid argument to KADDR, return 0.

+ */

+ulong

+cankaddr(ulong pa)

+{

+	if(pa >= -KZERO)

+		return 0;

+	return -KZERO - pa;

+}

--- /dev/null

+++ b/sys/src/9/xen/plan9l.s

@@ -1,0 +1,53 @@

+#include "xendefs.h"

+#include "mem.h"

+/*

+ * This must match io.h.

+ */

+#define VectorSYSCALL	0x40

+/*

+ *  Used to get to the first process:

+ * 	set up an interrupt return frame and IRET to user level.

+ */

+TEXT touser(SB), $0

+	PUSHL	$(UDSEL)			/* old ss */

+	MOVL	sp+0(FP), AX			/* old sp */

+	PUSHL	AX

+	MOVL	$0x200, AX			/* interrupt enable flag */

+	PUSHL	AX				/* old flags */

+	PUSHL	$(UESEL)			/* old cs */

+	PUSHL	$(UTZERO+32)			/* old pc */

+	MOVL	$(UDSEL), AX

+	MOVW	AX, DS

+	MOVW	AX, ES

+	MOVW	AX, GS

+	MOVW	AX, FS

+	IRETL

+/*

+ * This is merely _strayintr from l.s optimised to vector

+ * to syscall() without going through trap().

+ */

+TEXT _syscallintr(SB), $0

+	PUSHL	$VectorSYSCALL			/* trap type */

+	PUSHL	DS

+	PUSHL	ES

+	PUSHL	FS

+	PUSHL	GS

+	PUSHAL

+	MOVL	$(KDSEL), AX

+	MOVW	AX, DS

+	MOVW	AX, ES

+	PUSHL	SP

+	CALL	syscall(SB)

+	POPL	AX

+	POPAL

+	POPL	GS

+	POPL	FS

+	POPL	ES

+	POPL	DS

+	ADDL	$8, SP				/* pop error code and trap type */

+	IRETL

--- /dev/null

+++ b/sys/src/9/xen/screen.h

@@ -1,0 +1,186 @@

+typedef struct Cursor Cursor;

+typedef struct Cursorinfo Cursorinfo;

+struct Cursorinfo {

+	Cursor;

+	Lock;

+};

+/* devmouse.c */

+extern void mousetrack(int, int, int, int);

+extern void absmousetrack(int, int, int, int);

+extern Point mousexy(void);

+extern void mouseaccelerate(int);

+extern int m3mouseputc(Queue*, int);

+extern int m5mouseputc(Queue*, int);

+extern int mouseputc(Queue*, int);

+extern Cursorinfo cursor;

+extern Cursor arrow;

+/*

+ * Generic VGA registers.

+ */

+enum {

+	MiscW		= 0x03C2,	/* Miscellaneous Output (W) */

+	MiscR		= 0x03CC,	/* Miscellaneous Output (R) */

+	Status0		= 0x03C2,	/* Input status 0 (R) */

+	Status1		= 0x03DA,	/* Input Status 1 (R) */

+	FeatureR	= 0x03CA,	/* Feature Control (R) */

+	FeatureW	= 0x03DA,	/* Feature Control (W) */

+	Seqx		= 0x03C4,	/* Sequencer Index, Data at Seqx+1 */

+	Crtx		= 0x03D4,	/* CRT Controller Index, Data at Crtx+1 */

+	Grx		= 0x03CE,	/* Graphics Controller Index, Data at Grx+1 */

+	Attrx		= 0x03C0,	/* Attribute Controller Index and Data */

+	PaddrW		= 0x03C8,	/* Palette Address Register, write */

+	Pdata		= 0x03C9,	/* Palette Data Register */

+	Pixmask		= 0x03C6,	/* Pixel Mask Register */

+	PaddrR		= 0x03C7,	/* Palette Address Register, read */

+	Pstatus		= 0x03C7,	/* DAC Status (RO) */

+	Pcolours	= 256,		/* Palette */

+	Pred		= 0,

+	Pgreen		= 1,

+	Pblue		= 2,

+	Pblack		= 0x00,

+	Pwhite		= 0xFF,

+};

+#define VGAMEM()	0xA0000

+#define vgai(port)		inb(port)

+#define vgao(port, data)	outb(port, data)

+extern int vgaxi(long, uchar);

+extern int vgaxo(long, uchar, uchar);

+/*

+ */

+typedef struct VGAdev VGAdev;

+typedef struct VGAcur VGAcur;

+typedef struct VGAscr VGAscr;

+struct VGAdev {

+	char*	name;

+	void	(*enable)(VGAscr*);

+	void	(*disable)(VGAscr*);

+	void	(*page)(VGAscr*, int);

+	void	(*linear)(VGAscr*, int, int);

+	void	(*drawinit)(VGAscr*);

+	int	(*fill)(VGAscr*, Rectangle, ulong);

+	void	(*ovlctl)(VGAscr*, Chan*, void*, int);

+	int	(*ovlwrite)(VGAscr*, void*, int, vlong);

+	void (*flush)(VGAscr*, Rectangle);

+};

+struct VGAcur {

+	char*	name;

+	void	(*enable)(VGAscr*);

+	void	(*disable)(VGAscr*);

+	void	(*load)(VGAscr*, Cursor*);

+	int	(*move)(VGAscr*, Point);

+	int	doespanning;

+};

+/*

+ */

+struct VGAscr {

+	Lock	devlock;

+	VGAdev*	dev;

+	Pcidev*	pci;

+	VGAcur*	cur;

+	ulong	storage;

+	Cursor;

+	int	useflush;

+	ulong	paddr;		/* frame buffer */

+	void*	vaddr;

+	int		apsize;

+	ulong	io;				/* device specific registers */

+	ulong	*mmio;

+	ulong	colormap[Pcolours][3];

+	int	palettedepth;

+	Memimage* gscreen;

+	Memdata* gscreendata;

+	Memsubfont* memdefont;

+	int	(*fill)(VGAscr*, Rectangle, ulong);

+	int	(*scroll)(VGAscr*, Rectangle, Rectangle);

+	void	(*blank)(VGAscr*, int);

+	ulong	id;	/* internal identifier for driver use */

+	int isblank;

+	int overlayinit;

+	int softscreen;

+};

+extern VGAscr vgascreen[];

+enum {

+	Backgnd		= 0,	/* black */

+};

+/* mouse.c */

+extern void mousectl(Cmdbuf*);

+extern void mouseresize(void);

+extern void mouseredraw(void);

+/* screen.c */

+extern int		hwaccel;	/* use hw acceleration; default on */

+extern int		hwblank;	/* use hw blanking; default on */

+extern int		panning;	/* use virtual screen panning; default off */

+extern void addvgaseg(char*, ulong, ulong);

+extern uchar* attachscreen(Rectangle*, ulong*, int*, int*, int*);

+extern void	flushmemscreen(Rectangle);

+extern void	cursoron(void);

+extern void	cursoroff(void);

+extern void	setcursor(Cursor*);

+extern int	screensize(int, int, int, ulong);

+extern int	screenaperture(int, int);

+extern Rectangle physgscreenr;	/* actual monitor size */

+extern void	blankscreen(int);

+extern VGAcur swcursor;

+extern void swcursorinit(void);

+extern void swcursorhide(void);

+extern void swcursoravoid(Rectangle);

+extern void swcursorunhide(void);

+/* devdraw.c */

+extern void	deletescreenimage(void);

+extern void	resetscreenimage(void);

+extern int		drawhasclients(void);

+extern ulong	blanktime;

+extern void	setscreenimageclipr(Rectangle);

+extern void	drawflush(void);

+extern int drawidletime(void);

+extern QLock	drawlock;

+/* vga.c */

+extern void	vgascreenwin(VGAscr*);

+extern void	vgaimageinit(ulong);

+extern void	vgalinearpci(VGAscr*);

+extern void	vgalinearaddr(VGAscr*, ulong, int);

+extern void	drawblankscreen(int);

+extern void	vgablank(VGAscr*, int);

+extern Lock	vgascreenlock;

+#define ishwimage(i)	(vgascreen[0].gscreendata && (i)->data->bdata == vgascreen[0].gscreendata->bdata)

+/* swcursor.c */

+void		swcursorhide(void);

+void		swcursoravoid(Rectangle);

+void		swcursordraw(Point);

+void		swcursorload(Cursor *);

+void		swcursorinit(void);

--- /dev/null

+++ b/sys/src/9/xen/sdxen.c

@@ -1,0 +1,356 @@

+/*

+ * Xen block storage device frontend

+ *

+ * The present implementation follows the principle of

+ * "what's the simplest thing that could possibly work?".

+ * We can think about performance later.

+ * We can think about dynamically attaching and removing devices later.

+ */

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "io.h"

+#include "ureg.h"

+#include "../port/error.h"

+#include "../port/sd.h"

+#define LOG(a)

+/*

+ * conversions to machine page numbers, pages and addresses

+ */

+#define MFN(pa)		(patomfn[(pa)>>PGSHIFT])

+#define MFNPG(pa)		(MFN(pa)<<PGSHIFT)

+#define PA2MA(pa)		(MFNPG(pa) | PGOFF(pa))

+#define VA2MA(va)		PA2MA(PADDR(va))

+#define VA2MFN(va)		MFN(PADDR(va))

+enum {

+	Ndevs		= 4,

+	MajorDevSD	= 0x800,

+	MajorDevHDA	= 0x300,

+	MajorDevHDC	= 0x1600,

+	MajorDevXVD	= 0xCA00,

+};

+extern SDifc sdxenifc;

+typedef struct Ctlr Ctlr;

+struct Ctlr {

+	int	online;

+	ulong	secsize;

+	ulong	sectors;

+	int	backend;

+	int	devid;

+	int	evtchn;

+	blkif_front_ring_t ring;

+	int	ringref;

+	Lock	ringlock;

+	char	*frame;

+	QLock	iolock;

+	int	iodone;

+	Rendez	wiodone;

+};

+static int

+ringinit(Ctlr *ctlr, char *a)

+{

+	blkif_sring_t *sr;

+	sr = (blkif_sring_t*)a;

+	memset(sr, 0, BY2PG);

+	SHARED_RING_INIT(sr);

+	FRONT_RING_INIT(&ctlr->ring, sr, BY2PG);

+	ctlr->ringref = shareframe(ctlr->backend, sr, 1);

+	return BY2PG;

+}

+static int

+vbdsend(Ctlr *ctlr, int write, int ref, int nb, uvlong bno)

+{

+	blkif_request_t *req;

+	int i, notify;

+	ilock(&ctlr->ringlock);		// XXX conservative

+	i = ctlr->ring.req_prod_pvt;

+	req = RING_GET_REQUEST(&ctlr->ring, i);	// XXX overflow?

+	req->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;

+	req->nr_segments = 1;

+	req->handle = ctlr->devid;

+	req->id = 1;

+	req->sector_number = bno;

+	req->seg[0].gref = ref;

+	req->seg[0].first_sect = 0;

+	req->seg[0].last_sect = nb-1;

+	ctlr->ring.req_prod_pvt = i+1;

+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&ctlr->ring, notify);

+	iunlock(&ctlr->ringlock);

+	return notify;

+}

+static void

+backendconnect(Ctlr *ctlr)

+{

+	char dir[64];

+	char buf[64];

+	sprint(dir, "device/vbd/%d/", ctlr->devid);

+	xenstore_setd(dir, "ring-ref", ctlr->ringref);

+	xenstore_setd(dir, "event-channel", ctlr->evtchn);

+	xenstore_setd(dir, "state", XenbusStateInitialised);

+	xenstore_gets(dir, "backend", buf, sizeof buf);

+	sprint(dir, "%s/", buf);

+	HYPERVISOR_yield();

+	xenstore_gets(dir, "state", buf, sizeof buf);

+	while (strtol(buf, 0, 0) != XenbusStateConnected) {

+		print("sdxen: waiting for vbd %d to connect\n", ctlr->devid);

+		tsleep(&up->sleep, return0, 0, 1000);

+		xenstore_gets(dir, "state", buf, sizeof buf);

+	}

+	xenstore_gets(dir, "sector-size", buf, sizeof buf);

+	ctlr->secsize = strtol(buf, 0, 0);

+	xenstore_gets(dir, "sectors", buf, sizeof buf);

+	ctlr->sectors = strtol(buf, 0, 0);

+	print("sdxen: backend %s secsize %ld sectors %ld\n", dir, ctlr->secsize, ctlr->sectors);

+	if (ctlr->secsize > BY2PG)

+		panic("sdxen: sector size bigger than mmu page size");

+}

+static void

+backendactivate(Ctlr *ctlr)

+{

+	char dir[64];

+	sprint(dir, "device/vbd/%d/", ctlr->devid);

+	xenstore_setd(dir, "state", XenbusStateConnected);

+}

+static SDev*

+xenpnp(void)

+{

+	SDev *sdev[Ndevs];

+	static char idno[Ndevs] = { '0', 'C', 'D', 'E' };

+	static char nunit[Ndevs] = { 8, 2, 2, 8 };

+	int i;

+	for (i = 0; i < Ndevs; i++) {

+		sdev[i] = mallocz(sizeof(SDev), 1);

+		sdev[i]->ifc = &sdxenifc;

+		sdev[i]->idno = idno[i];

+		sdev[i]->nunit = nunit[i];

+		sdev[i]->ctlr = (Ctlr**)mallocz(sdev[i]->nunit*sizeof(Ctlr*), 1);

+		if (i > 0)

+			sdev[i]->next = sdev[i-1];

+	}

+	return sdev[Ndevs-1];

+}

+static int

+linuxdev(int idno, int subno)

+{

+	switch (idno) {

+	case '0':

+		return MajorDevSD + 16*subno;

+	case 'C':

+		return MajorDevHDA + 64*subno;

+	case 'D':

+		return MajorDevHDC + 64*subno;

+	case 'E':

+		return MajorDevXVD + 16*subno;

+	default:

+		return 0;

+	}

+}

+static int

+xenverify(SDunit *unit)

+{

+	Ctlr *ctlr;

+	char dir[64];

+	char buf[64];

+	int devid;

+	int npage;

+	char *p;

+	if (unit->subno > unit->dev->nunit)

+		return 0;

+	devid = linuxdev(unit->dev->idno, unit->subno);

+	sprint(dir, "device/vbd/%d/", devid);

+	if (xenstore_gets(dir, "backend-id", buf, sizeof buf) <= 0)

+		return 0;

+	ctlr = mallocz(sizeof(Ctlr), 1);

+	((Ctlr**)unit->dev->ctlr)[unit->subno] = ctlr;

+	ctlr->devid = devid;

+	ctlr->backend = strtol(buf, 0, 0);

+	npage = 2;

+	p = xspanalloc(npage<<PGSHIFT, BY2PG, 0);

+	p += ringinit(ctlr, p);

+	ctlr->frame = p;

+	ctlr->evtchn = xenchanalloc(ctlr->backend);

+	backendconnect(ctlr);

+	unit->inquiry[0] = 0;		// XXX how do we know if it's a CD?

+	unit->inquiry[2] = 2;

+	unit->inquiry[3] = 2;

+	unit->inquiry[4] = sizeof(unit->inquiry)-4;

+	strcpy((char*)&unit->inquiry[8], "Xen block device");

+	return 1;

+}

+static int

+wiodone(void *a)

+{

+	return ((Ctlr*)a)->iodone != 0;

+}

+static void

+sdxenintr(Ureg *, void *a)

+{

+	Ctlr *ctlr = a;

+	blkif_response_t *rsp;

+	int i, avail;

+	ilock(&ctlr->ringlock);	// XXX conservative

+	for (;;) {

+		RING_FINAL_CHECK_FOR_RESPONSES(&ctlr->ring, avail);

+		if (!avail)

+			break;

+		i = ctlr->ring.rsp_cons;

+		rsp = RING_GET_RESPONSE(&ctlr->ring, i);

+		LOG(dprint("sdxen rsp %llud %d %d\n", rsp->id, rsp->operation, rsp->status);)

+		if (rsp->status == BLKIF_RSP_OKAY)

+			ctlr->iodone = 1;

+		else

+			ctlr->iodone = -1;

+		ctlr->ring.rsp_cons = ++i;

+	}

+	iunlock(&ctlr->ringlock);

+	if (ctlr->iodone)

+		wakeup(&ctlr->wiodone);

+}

+static Ctlr *kickctlr;

+static void

+kickme(void)

+{

+	Ctlr *ctlr = kickctlr;

+	shared_info_t *s;

+	if (ctlr) {

+		s = HYPERVISOR_shared_info;

+		dprint("tick %d %d prod %d cons %d pending %x mask %x\n",

+			 m->ticks, ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons,

+			s->evtchn_pending[0], s->evtchn_mask[0]);

+		sdxenintr(0, ctlr);

+	}

+}

+static int

+xenonline(SDunit *unit)

+{

+	Ctlr *ctlr;

+	ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno];

+	unit->sectors = ctlr->sectors;

+	unit->secsize = ctlr->secsize;

+	if (ctlr->online == 0) {

+		intrenable(ctlr->evtchn, sdxenintr, ctlr, BUSUNKNOWN, "vbd");

+		//kickctlr = ctlr;

+		//addclock0link(kickme, 10000);

+		backendactivate(ctlr);

+		ctlr->online = 1;

+	}

+	return 1;

+}

+static int

+xenrio(SDreq*)

+{

+	return -1;

+}

+static long

+xenbio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno)

+{

+	Ctlr *ctlr;

+	char *buf;

+	long bcount, len;

+	int ref;

+	int n;

+	USED(lun);	// XXX meaningless

+	ctlr = ((Ctlr**)unit->dev->ctlr)[unit->subno];

+	LOG(("xenbio %c %lux %ld %lld\n", write? 'w' : 'r', (ulong)data, nb, bno);)

+	buf = data;

+	// XXX extra copying & fragmentation could be avoided by

+	// redefining sdmalloc() to get page-aligned buffers

+	if ((ulong)data&(BY2PG-1))

+		buf = ctlr->frame;

+	bcount = BY2PG/unit->secsize;

+	qlock(&ctlr->iolock);

+	for (n = nb; n > 0; n -= bcount) {

+		ref = shareframe(ctlr->backend, buf, !write);

+		if (bcount > n)

+			bcount = n;

+		len = bcount*unit->secsize;

+		if (write && buf == ctlr->frame)

+			memmove(buf, data, len);

+		ctlr->iodone = 0;

+		if (vbdsend(ctlr, write, ref, bcount, bno))

+			xenchannotify(ctlr->evtchn);

+		LOG(dprint("sleeping %d prod %d cons %d pending %x mask %x \n", ctlr->iodone, ctlr->ring.sring->rsp_prod, ctlr->ring.rsp_cons,

+						HYPERVISOR_shared_info->evtchn_pending[0], HYPERVISOR_shared_info->evtchn_mask[0]);)

+		sleep(&ctlr->wiodone, wiodone, ctlr);

+		xengrantend(ref);

+		if (ctlr->iodone < 0) {

+			qunlock(&ctlr->iolock);

+			return -1;

+		}

+		if (buf == ctlr->frame) {

+			if (!write)

+				memmove(data, buf, len);

+			data = (char*)data + len;

+		} else

+			buf += len;

+		bno += bcount;

+	}

+	qunlock(&ctlr->iolock);

+	return (nb-n)*unit->secsize;

+}

+static void

+xenclear(SDev *)

+{

+}

+SDifc sdxenifc = {

+	"xen",				/* name */

+	xenpnp,				/* pnp */

+	0,			/* legacy */

+	0,			/* enable */

+	0,			/* disable */

+	xenverify,			/* verify */

+	xenonline,			/* online */

+	xenrio,				/* rio */

+	0,			/* rctl */

+	0,			/* wctl */

+	xenbio,				/* bio */

+	0,			/* probe */

+	xenclear,			/* clear */

+	0,			/* stat */

+};

--- /dev/null

+++ b/sys/src/9/xen/trap.c

@@ -1,0 +1,1108 @@

+#include	"u.h"

+#include	"tos.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+#include	"io.h"

+#include	"ureg.h"

+#include	"../port/error.h"

+#include	<trace.h>

+#define INTRLOG(a)

+#define SETUPLOG(a)

+#define SYSCALLLOG(a)

+#define FAULTLOG(a)

+#define FAULTLOGFAST(a)

+#define POSTNOTELOG(a)

+#define TRAPLOG(a)

+int faultpanic = 0;

+enum {

+	/* trap_info_t flags */

+	SPL0 = 0,

+	SPL3 = 3,

+	EvDisable = 4,

+};

+void	noted(Ureg*, ulong);

+static void debugbpt(Ureg*, void*);

+static void fault386(Ureg*, void*);

+static void safe_fault386(Ureg*, void*);

+static void doublefault(Ureg*, void*);

+static void unexpected(Ureg*, void*);

+static void _dumpstack(Ureg*);

+static Lock vctllock;

+static Vctl *vctl[256];

+enum

+{

+	Ntimevec = 20		/* number of time buckets for each intr */

+};

+ulong intrtimes[256][Ntimevec];

+void

+intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)

+{

+	int vno;

+	Vctl *v;

+/**/

+	SETUPLOG(dprint("intrenable: irq %d, f %p, a %p, tbdf 0x%x, name %s\n",

+			irq, f, a, tbdf, name);)

+/**/

+	if(f == nil){

+		print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",

+			irq, tbdf, name);

+		return;

+	}

+	v = xalloc(sizeof(Vctl));

+	v->isintr = 1;

+	v->irq = irq;

+	v->tbdf = tbdf;

+	v->f = f;

+	v->a = a;

+	strncpy(v->name, name, KNAMELEN-1);

+	v->name[KNAMELEN-1] = 0;

+	ilock(&vctllock);

+	vno = arch->intrenable(v);

+	if(vno == -1){

+		iunlock(&vctllock);

+		print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",

+			irq, tbdf, v->name);

+		xfree(v);

+		return;

+	}

+	if(vctl[vno]){

+		if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)

+			panic("intrenable: handler: %s %s %p %p %p %p\n",

+				vctl[vno]->name, v->name,

+				vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);

+		v->next = vctl[vno];

+	}

+	vctl[vno] = v;

+	SETUPLOG(dprint("INTRENABLE: vctl[%d] is %p\n", vno, vctl[vno]);)

+	iunlock(&vctllock);

+}

+int

+intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)

+{

+	Vctl **pv, *v;

+	int vno;

+	/*

+	 * For now, none of this will work with the APIC code,

+	 * there is no mapping between irq and vector as the IRQ

+	 * is pretty meaningless.

+	 */

+	if(arch->intrvecno == nil)

+		return -1;

+	vno = arch->intrvecno(irq);

+	ilock(&vctllock);

+	pv = &vctl[vno];

+	while (*pv &&

+		  ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||

+		   strcmp((*pv)->name, name)))

+		pv = &((*pv)->next);

+	assert(*pv);

+	v = *pv;

+	*pv = (*pv)->next;	/* Link out the entry */

+	if(vctl[vno] == nil && arch->intrdisable != nil)

+		arch->intrdisable(irq);

+	iunlock(&vctllock);

+	xfree(v);

+	return 0;

+}

+static long

+irqallocread(Chan*, void *vbuf, long n, vlong offset)

+{

+	char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];

+	int m, vno;

+	long oldn;

+	Vctl *v;

+	if(n < 0 || offset < 0)

+		error(Ebadarg);

+	oldn = n;

+	buf = vbuf;

+	for(vno=0; vno<nelem(vctl); vno++){

+		for(v=vctl[vno]; v; v=v->next){

+			m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);

+			if(m <= offset)	/* if do not want this, skip entry */

+				offset -= m;

+			else{

+				/* skip offset bytes */

+				m -= offset;

+				p = str+offset;

+				offset = 0;

+				/* write at most max(n,m) bytes */

+				if(m > n)

+					m = n;

+				memmove(buf, p, m);

+				n -= m;

+				buf += m;

+				if(n == 0)

+					return oldn;

+			}

+		}

+	}

+	return oldn - n;

+}

+void

+trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)

+{

+	Vctl *v;

+	if(vno < 0 || vno >= VectorPIC)

+		panic("trapenable: vno %d\n", vno);

+	v = xalloc(sizeof(Vctl));

+	v->tbdf = BUSUNKNOWN;

+	v->f = f;

+	v->a = a;

+	strncpy(v->name, name, KNAMELEN);

+	v->name[KNAMELEN-1] = 0;

+	lock(&vctllock);

+	if(vctl[vno])

+		v->next = vctl[vno]->next;

+	vctl[vno] = v;

+	unlock(&vctllock);

+}

+static void

+nmienable(void)

+{

+	/* leave this here in case plan 9 ever makes it to dom0 */

+#ifdef NOWAY

+	/*

+	 * Hack: should be locked with NVRAM access.

+	 */

+	outb(0x70, 0x80);		/* NMI latch clear */

+	outb(0x70, 0);

+	x = inb(0x61) & 0x07;		/* Enable NMI */

+	outb(0x61, 0x08|x);

+	outb(0x61, x);

+#endif

+}

+/* we started out doing the 'giant bulk init' for all traps.

+  * we're going to do them one-by-one since error analysis is

+  * so much easier that way.

+  */

+void

+trapinit(void)

+{

+	trap_info_t t[2];

+	ulong vaddr;

+	int v, flag;

+	HYPERVISOR_set_callbacks(

+		KESEL, (ulong)hypervisor_callback,

+		KESEL, (ulong)failsafe_callback);

+	/* XXX rework as single hypercall once debugged */

+	t[1].address = 0;

+	vaddr = (ulong)vectortable;

+	for(v = 0; v < 256; v++){

+		switch(v){

+		case VectorBPT:

+		case VectorSYSCALL:

+			flag = SPL3 | EvDisable;

+			break;

+		default:

+			flag = SPL0 | EvDisable;

+			break;

+		}

+		t[0] = (trap_info_t){ v, flag, KESEL, vaddr };

+		if(HYPERVISOR_set_trap_table(t) < 0)

+			panic("trapinit: FAIL: try to set: 0x%x, 0x%x, 0x%x, 0x%ulx\n",

+				t[0].vector, t[0].flags, t[0].cs, t[0].address);

+		vaddr += 6;

+	}

+	/*

+	 * Special traps.

+	 * Syscall() is called directly without going through trap().

+	 */

+	trapenable(VectorBPT, debugbpt, 0, "debugpt");

+	trapenable(VectorPF, fault386, 0, "fault386");

+	trapenable(Vector2F, doublefault, 0, "doublefault");

+	trapenable(Vector15, unexpected, 0, "unexpected");

+	nmienable();

+	addarchfile("irqalloc", 0444, irqallocread, nil);

+}

+static char* excname[32] = {

+	"divide error",

+	"debug exception",

+	"nonmaskable interrupt",

+	"breakpoint",

+	"overflow",

+	"bounds check",

+	"invalid opcode",

+	"coprocessor not available",

+	"double fault",

+	"coprocessor segment overrun",

+	"invalid TSS",

+	"segment not present",

+	"stack exception",

+	"general protection violation",

+	"page fault",

+	"15 (reserved)",

+	"coprocessor error",

+	"alignment check",

+	"machine check",

+	"19 (reserved)",

+	"20 (reserved)",

+	"21 (reserved)",

+	"22 (reserved)",

+	"23 (reserved)",

+	"24 (reserved)",

+	"25 (reserved)",

+	"26 (reserved)",

+	"27 (reserved)",

+	"28 (reserved)",

+	"29 (reserved)",

+	"30 (reserved)",

+	"31 (reserved)",

+};

+/*

+ *  keep histogram of interrupt service times

+ */

+void

+intrtime(Mach*, int vno)

+{

+	ulong diff;

+	ulong x;

+	x = perfticks();

+	diff = x - m->perf.intrts;

+	m->perf.intrts = x;

+	m->perf.inintr += diff;

+	if(up == nil && m->perf.inidle > diff)

+		m->perf.inidle -= diff;

+	diff /= m->cpumhz*100;	// quantum = 100µsec

+	if(diff >= Ntimevec)

+		diff = Ntimevec-1;

+	intrtimes[vno][diff]++;

+}

+/* go to user space */

+void

+kexit(Ureg*)

+{

+	uvlong t;

+	Tos *tos;

+	/* precise time accounting, kernel exit */

+	tos = (Tos*)(USTKTOP-sizeof(Tos));

+	cycles(&t);

+	tos->kcycles += t - up->kentry;

+	tos->pcycles = up->pcycles;

+	tos->pid = up->pid;

+	INTRLOG(dprint("leave kexit, TOS %p\n", tos);)

+}

+/*

+ *  All traps come here.  It is slower to have all traps call trap()

+ *  rather than directly vectoring the handler.  However, this avoids a

+ *  lot of code duplication and possible bugs.  The only exception is

+ *  VectorSYSCALL.

+ *  Trap is called with interrupts (and events) disabled via interrupt-gates.

+ */

+void

+trap(Ureg* ureg)

+{

+	int clockintr, i, vno, user;

+	char buf[ERRMAX];

+	Vctl *ctl, *v;

+	Mach *mach;

+	TRAPLOG(dprint("trap ureg %lux %lux\n", (ulong*)ureg, ureg->trap);)

+	m->perf.intrts = perfticks();

+	user = (ureg->cs & 0xFFFF) == UESEL;

+	if(user){

+		up->dbgreg = ureg;

+		cycles(&up->kentry);

+	}

+	clockintr = 0;

+	vno = ureg->trap;

+	if(vno < 0 || vno >= 256)

+		panic("bad interrupt number %d\n", vno);

+	TRAPLOG(dprint("trap: vno is 0x%x, vctl[%d] is %p\n", vno, vno, vctl[vno]);)

+	if(ctl = vctl[vno]){

+		INTRLOG(dprint("ctl is %p, isintr is %d\n", ctl, ctl->isintr);)

+		if(ctl->isintr){

+			m->intr++;

+			if(vno >= VectorPIC && vno != VectorSYSCALL)

+				m->lastintr = ctl->irq;

+		}

+		INTRLOG(dprint("ctl %p, isr %p\n", ctl, ctl->isr);)

+		if(ctl->isr)

+			ctl->isr(vno);

+		for(v = ctl; v != nil; v = v->next){

+			INTRLOG(dprint("ctl %p, f is %p\n", v, v->f);)

+			if(v->f)

+				v->f(ureg, v->a);

+		}

+		INTRLOG(dprint("ctl %p, eoi %p\n", ctl, ctl->eoi);)

+		if(ctl->eoi)

+			ctl->eoi(vno);

+		if(ctl->isintr){

+			intrtime(m, vno);

+			//if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)

+			if (ctl->tbdf != BUSUNKNOWN && ctl->irq == VIRQ_TIMER)

+				clockintr = 1;

+			if(up && !clockintr)

+				preempted();

+		}

+	}

+	else if(vno <= nelem(excname) && user){

+		spllo();

+		sprint(buf, "sys: trap: %s", excname[vno]);

+		postnote(up, 1, buf, NDebug);

+	}

+	else if(vno >= VectorPIC && vno != VectorSYSCALL){

+		/*

+		 * An unknown interrupt.

+		 * Check for a default IRQ7. This can happen when

+		 * the IRQ input goes away before the acknowledge.

+		 * In this case, a 'default IRQ7' is generated, but

+		 * the corresponding bit in the ISR isn't set.

+		 * In fact, just ignore all such interrupts.

+		 */

+		/* call all interrupt routines, just in case */

+		for(i = VectorPIC; i <= MaxIrqLAPIC; i++){

+			ctl = vctl[i];

+			if(ctl == nil)

+				continue;

+			if(!ctl->isintr)

+				continue;

+			for(v = ctl; v != nil; v = v->next){

+				if(v->f)

+					v->f(ureg, v->a);

+			}

+			/* should we do this? */

+			if(ctl->eoi)

+				ctl->eoi(i);

+		}

+		iprint("cpu%d: spurious interrupt %d, last %d\n",

+			m->machno, vno, m->lastintr);

+		if(0)if(conf.nmach > 1){

+			for(i = 0; i < 32; i++){

+				if(!(active.machs & (1<<i)))

+					continue;

+				mach = MACHP(i);

+				if(m->machno == mach->machno)

+					continue;

+				print(" cpu%d: last %d",

+					mach->machno, mach->lastintr);

+			}

+			print("\n");

+		}

+		m->spuriousintr++;

+		if(user)

+			kexit(ureg);

+		return;

+	}

+	else{

+		if(vno == VectorNMI){

+			nmienable();

+			if(m->machno != 0){

+				print("cpu%d: PC %8.8luX\n",

+					m->machno, ureg->pc);

+				for(;;);

+			}

+		}

+		dumpregs(ureg);

+		if(!user){

+			ureg->sp = (ulong)&ureg->sp;

+			_dumpstack(ureg);

+		}

+		if(vno < nelem(excname))

+			panic("%s", excname[vno]);

+		panic("unknown trap/intr: %d\n", vno);

+	}

+	splhi();

+	/* delaysched set because we held a lock or because our quantum ended */

+	if(up && up->delaysched && clockintr){

+		INTRLOG(dprint("calling sched in trap? \n");)

+		sched();

+		INTRLOG(dprint("Back from calling sched in trap?\n");)

+		splhi();

+	}

+	if(user){

+		if(up->procctl || up->nnote)

+			notify(ureg);

+		kexit(ureg);

+	}

+	if (ureg->trap == 0xe) {

+		/*

+		  * on page fault, we need to restore the old spl

+		  * Xen won't do it for us.

+		  * XXX verify this.

+		  */

+		if (ureg->flags & 0x200)

+			spllo();

+	}

+}

+void

+dumpregs2(Ureg* ureg)

+{

+	if(up)

+		print("cpu%d: registers for %s %lud\n",

+			m->machno, up->text, up->pid);

+	else

+		print("cpu%d: registers for kernel\n", m->machno);

+	print("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",

+		ureg->flags, ureg->trap, ureg->ecode, ureg->pc);

+	print(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);

+	print("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",

+		ureg->ax, ureg->bx, ureg->cx, ureg->dx);

+	print("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",

+		ureg->si, ureg->di, ureg->bp);

+	print("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",

+		ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,

+		ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);

+}

+void

+dumpregs(Ureg* ureg)

+{

+	extern ulong etext;

+	dumpregs2(ureg);

+	/*

+	 * Processor control registers.

+	 * If machine check exception, time stamp counter, page size extensions

+	 * or enhanced virtual 8086 mode extensions are supported, there is a

+	 * CR4. If there is a CR4 and machine check extensions, read the machine

+	 * check address and machine check type registers if RDMSR supported.

+	 */

+	print("SKIPPING get of crx and other such stuff.\n");/* */

+#ifdef NOT

+	print("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",

+		getcr0(), getcr2(), getcr3());

+	if(m->cpuiddx & 0x9A){

+		print(" CR4 %8.8lux", getcr4());

+		if((m->cpuiddx & 0xA0) == 0xA0){

+			rdmsr(0x00, &mca);

+			rdmsr(0x01, &mct);

+			print("\n  MCA %8.8llux MCT %8.8llux", mca, mct);

+		}

+	}

+#endif

+	print("\n  ur %lux up %lux\n", (ulong)ureg, (ulong)up);

+}

+/*

+ * Fill in enough of Ureg to get a stack trace, and call a function.

+ * Used by debugging interface rdb.

+ */

+void

+callwithureg(void (*fn)(Ureg*))

+{

+	Ureg ureg;

+	ureg.pc = getcallerpc(&fn);

+	ureg.sp = (ulong)&fn;

+	fn(&ureg);

+}

+static void

+_dumpstack(Ureg *ureg)

+{

+	ulong l, v, i, estack;

+	extern ulong etext;

+	int x;

+	if(getconf("*nodumpstack")){

+		iprint("dumpstack disabled\n");

+		return;

+	}

+	iprint("dumpstack\n");

+	x = 0;

+	x += print("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);

+	i = 0;

+	if(up

+	&& (ulong)&l >= (ulong)up->kstack

+	&& (ulong)&l <= (ulong)up->kstack+KSTACK)

+		estack = (ulong)up->kstack+KSTACK;

+	else if((ulong)&l >= (ulong)m->stack

+	&& (ulong)&l <= (ulong)m+BY2PG)

+		estack = (ulong)m+MACHSIZE;

+	else

+		return;

+	x += print("estackx %.8lux\n", estack);

+	for(l=(ulong)&l; l<estack; l+=4){

+		v = *(ulong*)l;

+		if((KTZERO < v && v < (ulong)&etext) || estack-l<32){

+			/*

+			 * we could Pick off general CALL (((uchar*)v)[-5] == 0xE8)

+			 * and CALL indirect through AX (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),

+			 * but this is too clever and misses faulting address.

+			 */

+			x += print("%.8lux=%.8lux ", l, v);

+			i++;

+		}

+		if(i == 4){

+			i = 0;

+			x += print("\n");

+		}

+	}

+	if(i)

+		print("\n");

+	print("EOF\n");

+}

+void

+dumpstack(void)

+{

+	callwithureg(_dumpstack);

+}

+static void

+debugbpt(Ureg* ureg, void*)

+{

+	char buf[ERRMAX];

+	print("debugbpt\n");

+	if(up == 0)

+		panic("kernel bpt");

+	/* restore pc to instruction that caused the trap */

+	ureg->pc--;

+	sprint(buf, "sys: breakpoint");

+	postnote(up, 1, buf, NDebug);

+	print("debugbpt for proc %lud\n", up->pid);

+}

+static void

+doublefault(Ureg*, void*)

+{

+	panic("double fault");

+}

+static void

+unexpected(Ureg* ureg, void*)

+{

+	print("unexpected trap %lud; ignoring\n", ureg->trap);

+}

+static void

+fault386(Ureg* ureg, void* )

+{

+	ulong addr;

+	int read, user, n, insyscall;

+	char buf[ERRMAX];

+	addr = HYPERVISOR_shared_info->vcpu_info[m->machno].arch.cr2;

+	if (faultpanic) {

+		dprint("cr2 is 0x%lx\n", addr);

+		//dumpregs(ureg);

+		dumpstack();

+		panic("fault386");

+		exit(1);

+	}

+	user = (ureg->cs & 0xFFFF) == UESEL;

+	if(!user && mmukmapsync(addr))

+		return;

+	read = !(ureg->ecode & 2);

+	if(up == nil)

+		panic("fault but up is zero; pc 0x%8.8lux addr 0x%8.8lux\n", ureg->pc, addr);

+	insyscall = up->insyscall;

+	up->insyscall = 1;

+	n = fault(addr, read);

+	if(n < 0){

+		if(!user){

+			dumpregs(ureg);

+			panic("fault: 0x%lux\n", addr);

+		}

+		sprint(buf, "sys: trap: fault %s addr=0x%lux",

+			read? "read" : "write", addr);

+		dprint("Posting %s to %lud\n", buf, up->pid);

+		postnote(up, 1, buf, NDebug);

+	}

+	up->insyscall = insyscall;

+	FAULTLOG(dprint("fault386: all done\n");)

+}

+/*

+ *  system calls

+ */

+#include "../port/systab.h"

+/*

+ *  Syscall is called directly from assembler without going through trap().

+ */

+void

+syscall(Ureg* ureg)

+{

+	char *e;

+	ulong	sp;

+	long	ret;

+	int	i, s;

+	ulong scallnr;

+	SYSCALLLOG(dprint("%d: syscall ...#%ld(%s)\n",

+			up->pid, ureg->ax, sysctab[ureg->ax]);)

+	if((ureg->cs & 0xFFFF) != UESEL)

+		panic("syscall: cs 0x%4.4luX\n", ureg->cs);

+	cycles(&up->kentry);

+	m->syscall++;

+	up->insyscall = 1;

+	up->pc = ureg->pc;

+	up->dbgreg = ureg;

+	if(up->procctl == Proc_tracesyscall){

+		up->procctl = Proc_stopme;

+		procctl(up);

+	}

+	scallnr = ureg->ax;

+	up->scallnr = scallnr;

+	if(scallnr == RFORK && up->fpstate == FPactive){

+		fpsave(&up->fpsave);

+		up->fpstate = FPinactive;

+	}

+	spllo();

+	sp = ureg->usp;

+	up->nerrlab = 0;

+	ret = -1;

+	if(!waserror()){

+		if(scallnr >= nsyscall || systab[scallnr] == 0){

+			pprint("bad sys call number %lud pc %lux\n",

+				scallnr, ureg->pc);

+			postnote(up, 1, "sys: bad sys call", NDebug);

+			error(Ebadarg);

+		}

+		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))

+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);

+		up->s = *((Sargs*)(sp+BY2WD));

+		up->psstate = sysctab[scallnr];

+		ret = systab[scallnr]((va_list)up->s.args);

+		poperror();

+	}else{

+		/* failure: save the error buffer for errstr */

+		e = up->syserrstr;

+		up->syserrstr = up->errstr;

+		up->errstr = e;

+		if(0 && up->pid == 1)

+			print("syscall %lud error %s\n", scallnr, up->syserrstr);

+	}

+	if(up->nerrlab){

+		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);

+		for(i = 0; i < NERR; i++)

+			print("sp=%lux pc=%lux\n",

+				up->errlab[i].sp, up->errlab[i].pc);

+		panic("error stack");

+	}

+	SYSCALLLOG(dprint("%d: Syscall %d returns %d, ureg %p\n", up->pid, scallnr, ret, ureg);)

+	/*

+	 *  Put return value in frame.  On the x86 the syscall is

+	 *  just another trap and the return value from syscall is

+	 *  ignored.  On other machines the return value is put into

+	 *  the results register by caller of syscall.

+	 */

+	ureg->ax = ret;

+	if(up->procctl == Proc_tracesyscall){

+		up->procctl = Proc_stopme;

+		s = splhi();

+		procctl(up);

+		splx(s);

+	}

+	up->insyscall = 0;

+	up->psstate = 0;

+	INTRLOG(dprint("cleared insyscall\n");)

+	if(scallnr == NOTED)

+		noted(ureg, *(ulong*)(sp+BY2WD));

+	if(scallnr!=RFORK && (up->procctl || up->nnote)){

+		splhi();

+		notify(ureg);

+	}

+	/* if we delayed sched because we held a lock, sched now */

+	if(up->delaysched)

+		sched();

+	INTRLOG(dprint("before kexit\n");)

+	kexit(ureg);

+}

+/*

+ *  Call user, if necessary, with note.

+ *  Pass user the Ureg struct and the note on his stack.

+ */

+int

+notify(Ureg* ureg)

+{

+	int l;

+	ulong s, sp;

+	Note *n;

+	if(up->procctl)

+		procctl(up);

+	if(up->nnote == 0)

+		return 0;

+	if(up->fpstate == FPactive){

+		fpsave(&up->fpsave);

+		up->fpstate = FPinactive;

+	}

+	up->fpstate |= FPillegal;

+	s = spllo();

+	qlock(&up->debug);

+	up->notepending = 0;

+	n = &up->note[0];

+	if(strncmp(n->msg, "sys:", 4) == 0){

+		l = strlen(n->msg);

+		if(l > ERRMAX-15)	/* " pc=0x12345678\0" */

+			l = ERRMAX-15;

+		sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);

+	}

+	if(n->flag!=NUser && (up->notified || up->notify==0)){

+		if(n->flag == NDebug)

+			pprint("suicide: %s\n", n->msg);

+		qunlock(&up->debug);

+		pexit(n->msg, n->flag!=NDebug);

+	}

+	if(up->notified){

+		qunlock(&up->debug);

+		splhi();

+		return 0;

+	}

+	if(!up->notify){

+		qunlock(&up->debug);

+		pexit(n->msg, n->flag!=NDebug);

+	}

+	sp = ureg->usp;

+	sp -= sizeof(Ureg);

+	if(!okaddr((ulong)up->notify, 1, 0)

+	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){

+		pprint("suicide: bad address in notify\n");

+		qunlock(&up->debug);

+		pexit("Suicide", 0);

+	}

+	up->ureg = (void*)sp;

+	memmove((Ureg*)sp, ureg, sizeof(Ureg));

+	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */

+	up->ureg = (void*)sp;

+	sp -= BY2WD+ERRMAX;

+	memmove((char*)sp, up->note[0].msg, ERRMAX);

+	sp -= 3*BY2WD;

+	*(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;		/* arg 2 is string */

+	*(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;	/* arg 1 is ureg* */

+	*(ulong*)(sp+0*BY2WD) = 0;			/* arg 0 is pc */

+	ureg->usp = sp;

+	ureg->pc = (ulong)up->notify;

+	up->notified = 1;

+	up->nnote--;

+	memmove(&up->lastnote, &up->note[0], sizeof(Note));

+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));

+	qunlock(&up->debug);

+	splx(s);

+	return 1;

+}

+/*

+ *   Return user to state before notify()

+ */

+void

+noted(Ureg* ureg, ulong arg0)

+{

+	Ureg *nureg;

+	ulong oureg, sp;

+	qlock(&up->debug);

+	if(arg0!=NRSTR && !up->notified) {

+		qunlock(&up->debug);

+		pprint("call to noted() when not notified\n");

+		pexit("Suicide", 0);

+	}

+	up->notified = 0;

+	nureg = up->ureg;	/* pointer to user returned Ureg struct */

+	up->fpstate &= ~FPillegal;

+	/* sanity clause */

+	oureg = (ulong)nureg;

+	if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){

+		pprint("bad ureg in noted or call to noted when not notified\n");

+		qunlock(&up->debug);

+		pexit("Suicide", 0);

+	}

+	/*

+	 * Check the segment selectors are all valid, otherwise

+	 * a fault will be taken on attempting to return to the

+	 * user process.

+	 * Take care with the comparisons as different processor

+	 * generations push segment descriptors in different ways.

+	 */

+	if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL

+	  || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL

+	  || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){

+		pprint("bad segment selector in noted\n");

+		pprint("cs is %#lux, wanted %#ux\n", nureg->cs, UESEL);

+		pprint("ds is %#lux, wanted %#ux\n", nureg->ds, UDSEL);

+		pprint("es is %#lux, fs is %#lux, gs %#lux, wanted %#ux\n",

+			ureg->es, ureg->fs, ureg->gs, UDSEL);

+		pprint("ss is %#lux, wanted %#ux\n", nureg->ss, UDSEL);

+		qunlock(&up->debug);

+		pexit("Suicide", 0);

+	}

+	/* don't let user change system flags */

+	nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);

+	memmove(ureg, nureg, sizeof(Ureg));

+	switch(arg0){

+	case NCONT:

+	case NRSTR:

+		if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){

+			qunlock(&up->debug);

+			pprint("suicide: trap in noted\n");

+			pexit("Suicide", 0);

+		}

+		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));

+		qunlock(&up->debug);

+		break;

+	case NSAVE:

+		if(!okaddr(nureg->pc, BY2WD, 0)

+		|| !okaddr(nureg->usp, BY2WD, 0)){

+			qunlock(&up->debug);

+			pprint("suicide: trap in noted\n");

+			pexit("Suicide", 0);

+		}

+		qunlock(&up->debug);

+		sp = oureg-4*BY2WD-ERRMAX;

+		splhi();

+		ureg->sp = sp;

+		((ulong*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */

+		((ulong*)sp)[0] = 0;		/* arg 0 is pc */

+		break;

+	default:

+		pprint("unknown noted arg 0x%lux\n", arg0);

+		up->lastnote.flag = NDebug;

+		/* fall through */

+	case NDFLT:

+		if(up->lastnote.flag == NDebug){

+			qunlock(&up->debug);

+			pprint("suicide: %s\n", up->lastnote.msg);

+		} else

+			qunlock(&up->debug);

+		pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);

+	}

+}

+uintptr

+execregs(uintptr entry, ulong ssize, ulong nargs)

+{

+	ulong *sp;

+	Ureg *ureg;

+	up->fpstate = FPinit;

+	fpoff();

+	sp = (ulong*)(USTKTOP - ssize);

+	*--sp = nargs;

+	ureg = up->dbgreg;

+	ureg->usp = (ulong)sp;

+	ureg->pc = entry;

+//	print("execregs returns 0x%x\n", USTKTOP-sizeof(Tos));

+	return USTKTOP-sizeof(Tos);		/* address of kernel/user shared data */

+}

+/*

+ *  return the userpc the last exception happened at

+ */

+ulong

+userpc(void)

+{

+	Ureg *ureg;

+	ureg = (Ureg*)up->dbgreg;

+	return ureg->pc;

+}

+/* This routine must save the values of registers the user is not permitted

+ * to write from devproc and then restore the saved values before returning.

+ */

+void

+setregisters(Ureg* ureg, char* pureg, char* uva, int n)

+{

+	ulong flags;

+	ulong cs;

+	ulong ss;

+	flags = ureg->flags;

+	cs = ureg->cs;

+	ss = ureg->ss;

+	memmove(pureg, uva, n);

+	ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);

+	ureg->cs = cs;

+	ureg->ss = ss;

+}

+static void

+linkproc(void)

+{

+	spllo();

+	up->kpfun(up->kparg);

+	pexit("kproc dying", 0);

+}

+void

+kprocchild(Proc* p, void (*func)(void*), void* arg)

+{

+	/*

+	 * gotolabel() needs a word on the stack in

+	 * which to place the return PC used to jump

+	 * to linkproc().

+	 */

+	p->sched.pc = (ulong)linkproc;

+	p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;

+	p->kpfun = func;

+	p->kparg = arg;

+}

+void

+forkchild(Proc *p, Ureg *ureg)

+{

+	Ureg *cureg;

+	/*

+	 * Add 2*BY2WD to the stack to account for

+	 *  - the return PC

+	 *  - trap's argument (ur)

+	 */

+	p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);

+	p->sched.pc = (ulong)forkret;

+	cureg = (Ureg*)(p->sched.sp+2*BY2WD);

+	memmove(cureg, ureg, sizeof(Ureg));

+	/* return value of syscall in child */

+	cureg->ax = 0;

+	/* Things from bottom of syscall which were never executed */

+	p->psstate = 0;

+	p->insyscall = 0;

+}

+/* Give enough context in the ureg to produce a kernel stack for

+ * a sleeping process

+ */

+void

+setkernur(Ureg* ureg, Proc* p)

+{

+	ureg->pc = p->sched.pc;

+	ureg->sp = p->sched.sp+4;

+}

+ulong

+dbgpc(Proc *p)

+{

+	Ureg *ureg;

+	ureg = p->dbgreg;

+	if(ureg == 0)

+		return 0;

+	return ureg->pc;

+}

+/*

+ * install_safe_pf_handler / install_normal_pf_handler:

+ *

+ * These are used within the failsafe_callback handler in entry.S to avoid

+ * taking a full page fault when reloading FS and GS. This is because FS and

+ * GS could be invalid at pretty much any point while Xenolinux executes (we

+ * don't set them to safe values on entry to the kernel). At *any* point Xen

+ * may be entered due to a hardware interrupt --- on exit from Xen an invalid

+ * FS/GS will cause our failsafe_callback to be executed. This could occur,

+ * for example, while the mmu_update_queue is in an inconsistent state. This

+ * is disastrous because the normal page-fault handler touches the update

+ * queue!

+ *

+ * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS

+ * to zero if they cannot be reloaded -- at this point executing a normal

+ * page fault would not change this effect. The safe page-fault handler

+ * ensures this end result (blow away the selector value) without the dangers

+ * of the normal page-fault handler.

+ *

+ * NB. Perhaps this can all go away after we have implemented writeable

+ * page tables. :-)

+ */

+static void

+safe_fault386(Ureg* , void* ) {

+	panic("DO SAFE PAGE FAULT!\n");

+}

+unsigned long install_safe_pf_handler(void)

+{

+	dprint("called from failsafe callback\n");

+	trapenable(VectorPF, safe_fault386, 0, "safe_fault386");

+	return 0;

+}

+void install_normal_pf_handler(unsigned long)

+{

+	trapenable(VectorPF, fault386, 0, "fault386");

+}

--- /dev/null

+++ b/sys/src/9/xen/uartxen.c

@@ -1,0 +1,334 @@

+/*

+ * xencons.c

+ *	Access to xen consoles.

+ */

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "../port/error.h"

+#include "../pc/io.h"

+extern PhysUart xenphysuart;

+static Uart xenuart = {

+	.name = "xencons",

+	.freq = 1843200,

+	.phys = &xenphysuart,

+};

+struct {

+	struct xencons_interface *intf;

+	int evtchn;

+	Lock txlock;

+} xencons;

+/*

+ * Debug print to xen "emergency console".

+ * Output only appears if xen is built with verbose=y

+ */

+void

+dprint(char *fmt, ...)

+{

+	int n;

+	va_list arg;

+	char buf[PRINTSIZE];

+	va_start(arg, fmt);

+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;

+	va_end(arg);

+	HYPERVISOR_console_io(CONSOLEIO_write, n, buf);

+}

+static void kick(Uart*);

+/*

+ * Emit a string to the guest OS console, bypassing the queue

+ *   - before serialoq is initialised

+ *   - when rdb is activated

+ *   - from iprint() for messages from interrupt routines

+ * If ring is full, just throw extra output away.

+ */

+void

+xenuartputs(char *s, int n)

+{

+	struct xencons_interface *con = xencons.intf;

+	unsigned long prod;

+	int c;

+	ilock(&xencons.txlock);

+	prod = con->out_prod;

+	while (n-- > 0 && (prod - con->out_cons) < sizeof(con->out)) {

+		c = *s++;

+		/*

+		if (c == '\n')

+			con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r';

+		*/

+		con->out[MASK_XENCONS_IDX(prod++, con->out)] = c;

+	}

+	coherence();

+	con->out_prod = prod;

+	xenchannotify(xencons.evtchn);

+	iunlock(&xencons.txlock);

+}

+/*

+ * Handle channel event from console

+ */

+static void

+interrupt(Ureg*, void *arg)

+{

+	char c;

+	unsigned long cons;

+	Uart *uart;

+	struct xencons_interface *con = xencons.intf;

+	uart = &xenuart;

+	cons = con->in_cons;

+	coherence();

+	while (cons != con->in_prod) {

+		c = con->in[MASK_XENCONS_IDX(cons++, con->in)];

+		uartrecv(uart, c & 0xFF);

+	}

+	coherence();

+	con->in_cons = cons;

+	kick(nil);

+}

+static Uart*

+pnp(void)

+{

+	return &xenuart;

+}

+static void

+enable(Uart*, int ie)

+{

+	if(ie)

+		intrenable(xencons.evtchn, interrupt, 0, BUSUNKNOWN, "Xen console");

+}

+static void

+disable(Uart*)

+{

+}

+/*

+ * Send queued output to guest OS console

+ */

+static void

+kick(Uart*)

+{

+	struct xencons_interface *con = xencons.intf;

+	unsigned long prod;

+	long avail, idx, n, m;

+	ilock(&xencons.txlock);

+	prod = con->out_prod;

+	avail = sizeof(con->out) - (prod - con->out_cons);

+	while (avail > 0) {

+		idx = MASK_XENCONS_IDX(prod, con->out);

+		m = sizeof(con->out) - idx;

+		if (m > avail)

+			m = avail;

+		n = qconsume(serialoq, con->out+idx, m);

+		if (n < 0)

+			break;

+		prod += n;

+		avail -= n;

+	}

+	coherence();

+	con->out_prod = prod;

+	xenchannotify(xencons.evtchn);

+	iunlock(&xencons.txlock);

+}

+static void

+donothing(Uart*, int)

+{

+}

+static int

+donothingint(Uart*, int)

+{

+	return 0;

+}

+static int

+baud(Uart *uart, int n)

+{

+	if(n <= 0)

+		return -1;

+	uart->baud = n;

+	return 0;

+}

+static int

+bits(Uart *uart, int n)

+{

+	switch(n){

+	case 7:

+	case 8:

+		break;

+	default:

+		return -1;

+	}

+	uart->bits = n;

+	return 0;

+}

+static int

+stop(Uart *uart, int n)

+{

+	if(n != 1)

+		return -1;

+	uart->stop = n;

+	return 0;

+}

+static int

+parity(Uart *uart, int n)

+{

+	if(n != 'n')

+		return -1;

+	uart->parity = n;

+	return 0;

+}

+static long

+status(Uart *uart, void *buf, long n, long offset)

+{

+	char *p;

+	p = malloc(READSTR);

+	if(p == nil)

+		error(Enomem);

+	snprint(p, READSTR,

+		"b%d\n"

+		"dev(%d) type(%d) framing(%d) overruns(%d) "

+		"berr(%d) serr(%d)\n",

+		uart->baud,

+		uart->dev,

+		uart->type,

+		uart->ferr,

+		uart->oerr,

+		uart->berr,

+		uart->serr

+	);

+	n = readstr(offset, buf, n, p);

+	free(p);

+	return n;

+}

+void

+xenputc(Uart*, int c)

+{

+	struct xencons_interface *con = xencons.intf;

+	unsigned long prod;

+	c &= 0xFF;

+	ilock(&xencons.txlock);

+	/*

+	while(con->out_cons == con->out_prod)

+		HYPERVISOR_yield();

+	*/

+	if(con->out_cons == con->out_prod){

+		iunlock(&xencons.txlock);

+		return;

+	}

+	prod = con->out_prod;

+	if((con->out[MASK_XENCONS_IDX(prod++, con->out)] = c) == '\n')

+		con->out[MASK_XENCONS_IDX(prod++, con->out)] = '\r';

+	coherence();

+	con->out_prod = prod;

+	xenchannotify(xencons.evtchn);

+	iunlock(&xencons.txlock);

+}

+int

+xengetc(Uart*)

+{

+	struct xencons_interface *con = xencons.intf;

+	char c;

+	c = 0;

+	if(con->in_cons != con->in_prod){

+		coherence();

+		c = con->in[MASK_XENCONS_IDX(con->in_cons++, con->in)];

+		if (con->in_cons == con->in_prod)

+			xenchannotify(xencons.evtchn);

+	}

+	return c;

+}

+PhysUart xenphysuart = {

+	.name		= "xenuart",

+	.pnp		= pnp,

+	.enable		= enable,

+	.disable	= disable,

+	.kick		= kick,

+	.dobreak	= donothing,

+	.baud		= baud,

+	.bits		= bits,

+	.stop		= stop,

+	.parity		= parity,

+	.modemctl	= donothing,

+	.rts		= donothing,

+	.dtr		= donothing,

+	.status		= status,

+	.fifo		= donothing,

+	.getc		= xengetc,

+	.putc		= xenputc,

+};

+/* console=0 to enable */

+void

+xenconsinit(void)

+{

+	xencons.intf = (struct xencons_interface*)mmumapframe(XENCONSOLE, xenstart->console_mfn);

+	xencons.evtchn = xenstart->console_evtchn;

+	consuart = &xenuart;

+}

+void

+kbdenable(void)

+{

+	Uart *uart;

+	int n;

+	char *p, *cmd;

+	if((p = getconf("console")) == nil)

+		return;

+	n = strtoul(p, &cmd, 0);

+	if(p == cmd || n != 0)

+		return;

+	uart = &xenuart;

+	(*uart->phys->enable)(uart, 0);

+	uartctl(uart, "b9600 l8 pn s1");

+	if(*cmd != '\0')

+		uartctl(uart, cmd);

+	consuart = uart;

+	uart->console = 1;

+	uartputs("CONSOLE1\n", 9);

+	//*(char*)0 = 0;

+}

--- /dev/null

+++ b/sys/src/9/xen/xen-public/COPYING

@@ -1,0 +1,38 @@

+XEN NOTICE

+==========

+This copyright applies to all files within this subdirectory and its

+subdirectories:

+  include/public/*.h

+  include/public/hvm/*.h

+  include/public/io/*.h

+The intention is that these files can be freely copied into the source

+tree of an operating system when porting that OS to run on Xen. Doing

+so does *not* cause the OS to become subject to the terms of the GPL.

+All other files in the Xen source distribution are covered by version

+2 of the GNU General Public License except where explicitly stated

+otherwise within individual source files.

+ -- Keir Fraser (on behalf of the Xen team)

+=====================================================================

+Permission is hereby granted, free of charge, to any person obtaining a copy

+of this software and associated documentation files (the "Software"), to

+deal in the Software without restriction, including without limitation the

+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+sell copies of the Software, and to permit persons to whom the Software is

+furnished to do so, subject to the following conditions:

+The above copyright notice and this permission notice shall be included in

+all copies or substantial portions of the Software.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+DEALINGS IN THE SOFTWARE.

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-arm.h

@@ -1,0 +1,252 @@

+/******************************************************************************

+ * arch-arm.h

+ *

+ * Guest OS interface to ARM Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright 2011 (C) Citrix Systems

+ */

+#ifndef __XEN_PUBLIC_ARCH_ARM_H__

+#define __XEN_PUBLIC_ARCH_ARM_H__

+/* hypercall calling convention

+ * ----------------------------

+ *

+ * A hypercall is issued using the ARM HVC instruction.

+ *

+ * A hypercall can take up to 5 arguments. These are passed in

+ * registers, the first argument in x0/r0 (for arm64/arm32 guests

+ * respectively irrespective of whether the underlying hypervisor is

+ * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2,

+ * the forth in x3/r3 and the fifth in x4/r4.

+ *

+ * The hypercall number is passed in r12 (arm) or x16 (arm64). In both

+ * cases the relevant ARM procedure calling convention specifies this

+ * is an inter-procedure-call scratch register (e.g. for use in linker

+ * stubs). This use does not conflict with use during a hypercall.

+ *

+ * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG.

+ *

+ * The return value is in x0/r0.

+ *

+ * The hypercall will clobber x16/r12 and the argument registers used

+ * by that hypercall (except r0 which is the return value) i.e. in

+ * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a

+ * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3.

+ *

+ * Parameter structs passed to hypercalls are laid out according to

+ * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA

+ * EABI) and Procedure Call Standard for the ARM 64-bit Architecture

+ * (AAPCS64). Where there is a conflict the 64-bit standard should be

+ * used regardless of guest type. Structures which are passed as

+ * hypercall arguments are always little endian.

+ */

+#define XEN_HYPERCALL_TAG   0XEA1

+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))

+#ifndef __ASSEMBLY__

+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \

+    typedef union { type *p; unsigned long q; }                 \

+        __guest_handle_ ## name;                                \

+    typedef union { type *p; uint64_aligned_t q; }              \

+        __guest_handle_64_ ## name;

+/*

+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field

+ * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes

+ * aligned.

+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an

+ * hypercall argument. It is 4 bytes on aarch and 8 bytes on aarch64.

+ */

+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \

+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \

+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)

+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)

+#define __XEN_GUEST_HANDLE(name)        __guest_handle_64_ ## name

+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)

+/* this is going to be changed on 64 bit */

+#define XEN_GUEST_HANDLE_PARAM(name)    __guest_handle_ ## name

+#define set_xen_guest_handle_raw(hnd, val)                  \

+    do {                                                    \

+        typeof(&(hnd)) _sxghr_tmp = &(hnd);                 \

+        _sxghr_tmp->q = 0;                                  \

+        _sxghr_tmp->p = val;                                \

+    } while ( 0 )

+#ifdef __XEN_TOOLS__

+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)

+#endif

+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)

+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)

+/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */

+# define __DECL_REG(n64, n32) union {          \

+        uint64_t n64;                          \

+        uint32_t n32;                          \

+    }

+#else

+/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */

+#define __DECL_REG(n64, n32) uint64_t n64

+#endif

+struct vcpu_guest_core_regs

+{

+    /*         Aarch64       Aarch32 */

+    __DECL_REG(x0,           r0_usr);

+    __DECL_REG(x1,           r1_usr);

+    __DECL_REG(x2,           r2_usr);

+    __DECL_REG(x3,           r3_usr);

+    __DECL_REG(x4,           r4_usr);

+    __DECL_REG(x5,           r5_usr);

+    __DECL_REG(x6,           r6_usr);

+    __DECL_REG(x7,           r7_usr);

+    __DECL_REG(x8,           r8_usr);

+    __DECL_REG(x9,           r9_usr);

+    __DECL_REG(x10,          r10_usr);

+    __DECL_REG(x11,          r11_usr);

+    __DECL_REG(x12,          r12_usr);

+    __DECL_REG(x13,          sp_usr);

+    __DECL_REG(x14,          lr_usr);

+    __DECL_REG(x15,          __unused_sp_hyp);

+    __DECL_REG(x16,          lr_irq);

+    __DECL_REG(x17,          sp_irq);

+    __DECL_REG(x18,          lr_svc);

+    __DECL_REG(x19,          sp_svc);

+    __DECL_REG(x20,          lr_abt);

+    __DECL_REG(x21,          sp_abt);

+    __DECL_REG(x22,          lr_und);

+    __DECL_REG(x23,          sp_und);

+    __DECL_REG(x24,          r8_fiq);

+    __DECL_REG(x25,          r9_fiq);

+    __DECL_REG(x26,          r10_fiq);

+    __DECL_REG(x27,          r11_fiq);

+    __DECL_REG(x28,          r12_fiq);

+    __DECL_REG(x29,          sp_fiq);

+    __DECL_REG(x30,          lr_fiq);

+    /* Return address and mode */

+    __DECL_REG(pc64,         pc32);             /* ELR_EL2 */

+    uint32_t cpsr;                              /* SPSR_EL2 */

+    union {

+        uint32_t spsr_el1;       /* AArch64 */

+        uint32_t spsr_svc;       /* AArch32 */

+    };

+    /* AArch32 guests only */

+    uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt;

+    /* AArch64 guests only */

+    uint64_t sp_el0;

+    uint64_t sp_el1, elr_el1;

+};

+typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t);

+#undef __DECL_REG

+typedef uint64_t xen_pfn_t;

+#define PRI_xen_pfn PRIx64

+/* Maximum number of virtual CPUs in legacy multi-processor guests. */

+/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */

+#define XEN_LEGACY_MAX_VCPUS 1

+typedef uint64_t xen_ulong_t;

+#define PRI_xen_ulong PRIx64

+struct vcpu_guest_context {

+#define _VGCF_online                   0

+#define VGCF_online                    (1<<_VGCF_online)

+    uint32_t flags;                         /* VGCF_* */

+    struct vcpu_guest_core_regs user_regs;  /* Core CPU registers */

+    uint32_t sctlr, ttbcr;

+    uint64_t ttbr0, ttbr1;

+};

+typedef struct vcpu_guest_context vcpu_guest_context_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);

+struct arch_vcpu_info { };

+typedef struct arch_vcpu_info arch_vcpu_info_t;

+struct arch_shared_info { };

+typedef struct arch_shared_info arch_shared_info_t;

+typedef uint64_t xen_callback_t;

+#endif /* ifndef __ASSEMBLY __ */

+/* PSR bits (CPSR, SPSR)*/

+/* 32 bit modes */

+#define PSR_MODE_USR 0x10

+#define PSR_MODE_FIQ 0x11

+#define PSR_MODE_IRQ 0x12

+#define PSR_MODE_SVC 0x13

+#define PSR_MODE_MON 0x16

+#define PSR_MODE_ABT 0x17

+#define PSR_MODE_HYP 0x1a

+#define PSR_MODE_UND 0x1b

+#define PSR_MODE_SYS 0x1f

+/* 64 bit modes */

+#ifdef __aarch64__

+#define PSR_MODE_BIT  0x10 /* Set iff AArch32 */

+#define PSR_MODE_EL3h 0x0d

+#define PSR_MODE_EL3t 0x0c

+#define PSR_MODE_EL2h 0x09

+#define PSR_MODE_EL2t 0x08

+#define PSR_MODE_EL1h 0x05

+#define PSR_MODE_EL1t 0x04

+#define PSR_MODE_EL0t 0x00

+#endif

+#define PSR_THUMB       (1<<5)        /* Thumb Mode enable */

+#define PSR_FIQ_MASK    (1<<6)        /* Fast Interrupt mask */

+#define PSR_IRQ_MASK    (1<<7)        /* Interrupt mask */

+#define PSR_ABT_MASK    (1<<8)        /* Asynchronous Abort mask */

+#define PSR_BIG_ENDIAN  (1<<9)        /* Big Endian Mode */

+#define PSR_IT_MASK     (0x0600fc00)  /* Thumb If-Then Mask */

+#define PSR_JAZELLE     (1<<24)       /* Jazelle Mode */

+#define PSR_GUEST_INIT  (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)

+#endif /*  __XEN_PUBLIC_ARCH_ARM_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-arm/hvm/save.h

@@ -1,0 +1,39 @@

+/*

+ * Structure definitions for HVM state that is held by Xen and must

+ * be saved along with the domain's memory and device-model state.

+ *

+ * Copyright (c) 2012 Citrix Systems Ltd.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_SAVE_ARM_H__

+#define __XEN_PUBLIC_HVM_SAVE_ARM_H__

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/cpuid.h

@@ -1,0 +1,68 @@

+/******************************************************************************

+ * arch-x86/cpuid.h

+ *

+ * CPUID interface to Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2007 Citrix Systems, Inc.

+ *

+ * Authors:

+ *    Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__

+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__

+/* Xen identification leaves start at 0x40000000. */

+#define XEN_CPUID_FIRST_LEAF 0x40000000

+#define XEN_CPUID_LEAF(i)    (XEN_CPUID_FIRST_LEAF + (i))

+/*

+ * Leaf 1 (0x40000000)

+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX

+ *      are supported by the Xen host.

+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification

+ *      of a Xen host.

+ */

+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */

+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */

+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */

+/*

+ * Leaf 2 (0x40000001)

+ * EAX[31:16]: Xen major version.

+ * EAX[15: 0]: Xen minor version.

+ * EBX-EDX: Reserved (currently all zeroes).

+ */

+/*

+ * Leaf 3 (0x40000002)

+ * EAX: Number of hypercall transfer pages. This register is always guaranteed

+ *      to specify one hypercall page.

+ * EBX: Base address of Xen-specific MSRs.

+ * ECX: Features 1. Unused bits are set to zero.

+ * EDX: Features 2. Unused bits are set to zero.

+ */

+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */

+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0

+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)

+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/hvm/save.h

@@ -1,0 +1,600 @@

+/*

+ * Structure definitions for HVM state that is held by Xen and must

+ * be saved along with the domain's memory and device-model state.

+ *

+ * Copyright (c) 2007 XenSource Ltd.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__

+#define __XEN_PUBLIC_HVM_SAVE_X86_H__

+/*

+ * Save/restore header: general info about the save file.

+ */

+#define HVM_FILE_MAGIC   0x54381286

+#define HVM_FILE_VERSION 0x00000001

+struct hvm_save_header {

+    uint32_t magic;             /* Must be HVM_FILE_MAGIC */

+    uint32_t version;           /* File format version */

+    uint64_t changeset;         /* Version of Xen that saved this file */

+    uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */

+    uint32_t gtsc_khz;        /* Guest's TSC frequency in kHz */

+};

+DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);

+/*

+ * Processor

+ *

+ * Compat: Pre-3.4 didn't have msr_tsc_aux

+ */

+struct hvm_hw_cpu {

+    uint8_t  fpu_regs[512];

+    uint64_t rax;

+    uint64_t rbx;

+    uint64_t rcx;

+    uint64_t rdx;

+    uint64_t rbp;

+    uint64_t rsi;

+    uint64_t rdi;

+    uint64_t rsp;

+    uint64_t r8;

+    uint64_t r9;

+    uint64_t r10;

+    uint64_t r11;

+    uint64_t r12;

+    uint64_t r13;

+    uint64_t r14;

+    uint64_t r15;

+    uint64_t rip;

+    uint64_t rflags;

+    uint64_t cr0;

+    uint64_t cr2;

+    uint64_t cr3;

+    uint64_t cr4;

+    uint64_t dr0;

+    uint64_t dr1;

+    uint64_t dr2;

+    uint64_t dr3;

+    uint64_t dr6;

+    uint64_t dr7;

+    uint32_t cs_sel;

+    uint32_t ds_sel;

+    uint32_t es_sel;

+    uint32_t fs_sel;

+    uint32_t gs_sel;

+    uint32_t ss_sel;

+    uint32_t tr_sel;

+    uint32_t ldtr_sel;

+    uint32_t cs_limit;

+    uint32_t ds_limit;

+    uint32_t es_limit;

+    uint32_t fs_limit;

+    uint32_t gs_limit;

+    uint32_t ss_limit;

+    uint32_t tr_limit;

+    uint32_t ldtr_limit;

+    uint32_t idtr_limit;

+    uint32_t gdtr_limit;

+    uint64_t cs_base;

+    uint64_t ds_base;

+    uint64_t es_base;

+    uint64_t fs_base;

+    uint64_t gs_base;

+    uint64_t ss_base;

+    uint64_t tr_base;

+    uint64_t ldtr_base;

+    uint64_t idtr_base;

+    uint64_t gdtr_base;

+    uint32_t cs_arbytes;

+    uint32_t ds_arbytes;

+    uint32_t es_arbytes;

+    uint32_t fs_arbytes;

+    uint32_t gs_arbytes;

+    uint32_t ss_arbytes;

+    uint32_t tr_arbytes;

+    uint32_t ldtr_arbytes;

+    uint64_t sysenter_cs;

+    uint64_t sysenter_esp;

+    uint64_t sysenter_eip;

+    /* msr for em64t */

+    uint64_t shadow_gs;

+    /* msr content saved/restored. */

+    uint64_t msr_flags;

+    uint64_t msr_lstar;

+    uint64_t msr_star;

+    uint64_t msr_cstar;

+    uint64_t msr_syscall_mask;

+    uint64_t msr_efer;

+    uint64_t msr_tsc_aux;

+    /* guest's idea of what rdtsc() would return */

+    uint64_t tsc;

+    /* pending event, if any */

+    union {

+        uint32_t pending_event;

+        struct {

+            uint8_t  pending_vector:8;

+            uint8_t  pending_type:3;

+            uint8_t  pending_error_valid:1;

+            uint32_t pending_reserved:19;

+            uint8_t  pending_valid:1;

+        };

+    };

+    /* error code for pending event */

+    uint32_t error_code;

+};

+struct hvm_hw_cpu_compat {

+    uint8_t  fpu_regs[512];

+    uint64_t rax;

+    uint64_t rbx;

+    uint64_t rcx;

+    uint64_t rdx;

+    uint64_t rbp;

+    uint64_t rsi;

+    uint64_t rdi;

+    uint64_t rsp;

+    uint64_t r8;

+    uint64_t r9;

+    uint64_t r10;

+    uint64_t r11;

+    uint64_t r12;

+    uint64_t r13;

+    uint64_t r14;

+    uint64_t r15;

+    uint64_t rip;

+    uint64_t rflags;

+    uint64_t cr0;

+    uint64_t cr2;

+    uint64_t cr3;

+    uint64_t cr4;

+    uint64_t dr0;

+    uint64_t dr1;

+    uint64_t dr2;

+    uint64_t dr3;

+    uint64_t dr6;

+    uint64_t dr7;

+    uint32_t cs_sel;

+    uint32_t ds_sel;

+    uint32_t es_sel;

+    uint32_t fs_sel;

+    uint32_t gs_sel;

+    uint32_t ss_sel;

+    uint32_t tr_sel;

+    uint32_t ldtr_sel;

+    uint32_t cs_limit;

+    uint32_t ds_limit;

+    uint32_t es_limit;

+    uint32_t fs_limit;

+    uint32_t gs_limit;

+    uint32_t ss_limit;

+    uint32_t tr_limit;

+    uint32_t ldtr_limit;

+    uint32_t idtr_limit;

+    uint32_t gdtr_limit;

+    uint64_t cs_base;

+    uint64_t ds_base;

+    uint64_t es_base;

+    uint64_t fs_base;

+    uint64_t gs_base;

+    uint64_t ss_base;

+    uint64_t tr_base;

+    uint64_t ldtr_base;

+    uint64_t idtr_base;

+    uint64_t gdtr_base;

+    uint32_t cs_arbytes;

+    uint32_t ds_arbytes;

+    uint32_t es_arbytes;

+    uint32_t fs_arbytes;

+    uint32_t gs_arbytes;

+    uint32_t ss_arbytes;

+    uint32_t tr_arbytes;

+    uint32_t ldtr_arbytes;

+    uint64_t sysenter_cs;

+    uint64_t sysenter_esp;

+    uint64_t sysenter_eip;

+    /* msr for em64t */

+    uint64_t shadow_gs;

+    /* msr content saved/restored. */

+    uint64_t msr_flags;

+    uint64_t msr_lstar;

+    uint64_t msr_star;

+    uint64_t msr_cstar;

+    uint64_t msr_syscall_mask;

+    uint64_t msr_efer;

+    /*uint64_t msr_tsc_aux; COMPAT */

+    /* guest's idea of what rdtsc() would return */

+    uint64_t tsc;

+    /* pending event, if any */

+    union {

+        uint32_t pending_event;

+        struct {

+            uint8_t  pending_vector:8;

+            uint8_t  pending_type:3;

+            uint8_t  pending_error_valid:1;

+            uint32_t pending_reserved:19;

+            uint8_t  pending_valid:1;

+        };

+    };

+    /* error code for pending event */

+    uint32_t error_code;

+};

+static inline int _hvm_hw_fix_cpu(void *h) {

+    union hvm_hw_cpu_union {

+        struct hvm_hw_cpu nat;

+        struct hvm_hw_cpu_compat cmp;

+    } *ucpu = (union hvm_hw_cpu_union *)h;

+    /* If we copy from the end backwards, we should

+     * be able to do the modification in-place */

+    ucpu->nat.error_code = ucpu->cmp.error_code;

+    ucpu->nat.pending_event = ucpu->cmp.pending_event;

+    ucpu->nat.tsc = ucpu->cmp.tsc;

+    ucpu->nat.msr_tsc_aux = 0;

+    return 0;

+}

+DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \

+                             struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu);

+/*

+ * PIC

+ */

+struct hvm_hw_vpic {

+    /* IR line bitmasks. */

+    uint8_t irr;

+    uint8_t imr;

+    uint8_t isr;

+    /* Line IRx maps to IRQ irq_base+x */

+    uint8_t irq_base;

+    /*

+     * Where are we in ICW2-4 initialisation (0 means no init in progress)?

+     * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1).

+     * Bit 2: ICW1.IC4  (1 == ICW4 included in init sequence)

+     * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence)

+     */

+    uint8_t init_state:4;

+    /* IR line with highest priority. */

+    uint8_t priority_add:4;

+    /* Reads from A=0 obtain ISR or IRR? */

+    uint8_t readsel_isr:1;

+    /* Reads perform a polling read? */

+    uint8_t poll:1;

+    /* Automatically clear IRQs from the ISR during INTA? */

+    uint8_t auto_eoi:1;

+    /* Automatically rotate IRQ priorities during AEOI? */

+    uint8_t rotate_on_auto_eoi:1;

+    /* Exclude slave inputs when considering in-service IRQs? */

+    uint8_t special_fully_nested_mode:1;

+    /* Special mask mode excludes masked IRs from AEOI and priority checks. */

+    uint8_t special_mask_mode:1;

+    /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */

+    uint8_t is_master:1;

+    /* Edge/trigger selection. */

+    uint8_t elcr;

+    /* Virtual INT output. */

+    uint8_t int_output;

+};

+DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);

+/*

+ * IO-APIC

+ */

+#define VIOAPIC_NUM_PINS  48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */

+struct hvm_hw_vioapic {

+    uint64_t base_address;

+    uint32_t ioregsel;

+    uint32_t id;

+    union vioapic_redir_entry

+    {

+        uint64_t bits;

+        struct {

+            uint8_t vector;

+            uint8_t delivery_mode:3;

+            uint8_t dest_mode:1;

+            uint8_t delivery_status:1;

+            uint8_t polarity:1;

+            uint8_t remote_irr:1;

+            uint8_t trig_mode:1;

+            uint8_t mask:1;

+            uint8_t reserve:7;

+            uint8_t reserved[4];

+            uint8_t dest_id;

+        } fields;

+    } redirtbl[VIOAPIC_NUM_PINS];

+};

+DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);

+/*

+ * LAPIC

+ */

+struct hvm_hw_lapic {

+    uint64_t             apic_base_msr;

+    uint32_t             disabled; /* VLAPIC_xx_DISABLED */

+    uint32_t             timer_divisor;

+    uint64_t             tdt_msr;

+};

+DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);

+struct hvm_hw_lapic_regs {

+    uint8_t data[1024];

+};

+DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs);

+/*

+ * IRQs

+ */

+struct hvm_hw_pci_irqs {

+    /*

+     * Virtual interrupt wires for a single PCI bus.

+     * Indexed by: device*4 + INTx#.

+     */

+    union {

+        unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */

+        uint64_t pad[2];

+    };

+};

+DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs);

+struct hvm_hw_isa_irqs {

+    /*

+     * Virtual interrupt wires for ISA devices.

+     * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).

+     */

+    union {

+        unsigned long i[1];  /* DECLARE_BITMAP(i, 16); */

+        uint64_t pad[1];

+    };

+};

+DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs);

+struct hvm_hw_pci_link {

+    /*

+     * PCI-ISA interrupt router.

+     * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using

+     * the traditional 'barber's pole' mapping ((device + INTx#) & 3).

+     * The router provides a programmable mapping from each link to a GSI.

+     */

+    uint8_t route[4];

+    uint8_t pad0[4];

+};

+DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link);

+/*

+ *  PIT

+ */

+struct hvm_hw_pit {

+    struct hvm_hw_pit_channel {

+        uint32_t count; /* can be 65536 */

+        uint16_t latched_count;

+        uint8_t count_latched;

+        uint8_t status_latched;

+        uint8_t status;

+        uint8_t read_state;

+        uint8_t write_state;

+        uint8_t write_latch;

+        uint8_t rw_mode;

+        uint8_t mode;

+        uint8_t bcd; /* not supported */

+        uint8_t gate; /* timer start */

+    } channels[3];  /* 3 x 16 bytes */

+    uint32_t speaker_data_on;

+    uint32_t pad0;

+};

+DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit);

+/*

+ * RTC

+ */

+#define RTC_CMOS_SIZE 14

+struct hvm_hw_rtc {

+    /* CMOS bytes */

+    uint8_t cmos_data[RTC_CMOS_SIZE];

+    /* Index register for 2-part operations */

+    uint8_t cmos_index;

+    uint8_t pad0;

+};

+DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc);

+/*

+ * HPET

+ */

+#define HPET_TIMER_NUM     3    /* 3 timers supported now */

+struct hvm_hw_hpet {

+    /* Memory-mapped, software visible registers */

+    uint64_t capability;        /* capabilities */

+    uint64_t res0;              /* reserved */

+    uint64_t config;            /* configuration */

+    uint64_t res1;              /* reserved */

+    uint64_t isr;               /* interrupt status reg */

+    uint64_t res2[25];          /* reserved */

+    uint64_t mc64;              /* main counter */

+    uint64_t res3;              /* reserved */

+    struct {                    /* timers */

+        uint64_t config;        /* configuration/cap */

+        uint64_t cmp;           /* comparator */

+        uint64_t fsb;           /* FSB route, not supported now */

+        uint64_t res4;          /* reserved */

+    } timers[HPET_TIMER_NUM];

+    uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */

+    /* Hidden register state */

+    uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */

+};

+DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);

+/*

+ * PM timer

+ */

+struct hvm_hw_pmtimer {

+    uint32_t tmr_val;   /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */

+    uint16_t pm1a_sts;  /* PM1a_EVT_BLK.PM1a_STS: status register */

+    uint16_t pm1a_en;   /* PM1a_EVT_BLK.PM1a_EN: enable register */

+};

+DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer);

+/*

+ * MTRR MSRs

+ */

+struct hvm_hw_mtrr {

+#define MTRR_VCNT 8

+#define NUM_FIXED_MSR 11

+    uint64_t msr_pat_cr;

+    /* mtrr physbase & physmask msr pair*/

+    uint64_t msr_mtrr_var[MTRR_VCNT*2];

+    uint64_t msr_mtrr_fixed[NUM_FIXED_MSR];

+    uint64_t msr_mtrr_cap;

+    uint64_t msr_mtrr_def_type;

+};

+DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);

+/*

+ * The save area of XSAVE/XRSTOR.

+ */

+struct hvm_hw_cpu_xsave {

+    uint64_t xfeature_mask;

+    uint64_t xcr0;                 /* Updated by XSETBV */

+    uint64_t xcr0_accum;           /* Updated by XSETBV */

+    struct {

+        struct { char x[512]; } fpu_sse;

+        struct {

+            uint64_t xstate_bv;         /* Updated by XRSTOR */

+            uint64_t reserved[7];

+        } xsave_hdr;                    /* The 64-byte header */

+        struct { char x[0]; } ymm;    /* YMM */

+    } save_area;

+};

+#define CPU_XSAVE_CODE  16

+/*

+ * Viridian hypervisor context.

+ */

+struct hvm_viridian_domain_context {

+    uint64_t hypercall_gpa;

+    uint64_t guest_os_id;

+};

+DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context);

+struct hvm_viridian_vcpu_context {

+    uint64_t apic_assist;

+};

+DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context);

+struct hvm_vmce_vcpu {

+    uint64_t caps;

+    uint64_t mci_ctl2_bank0;

+    uint64_t mci_ctl2_bank1;

+};

+DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu);

+struct hvm_tsc_adjust {

+    uint64_t tsc_adjust;

+};

+DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);

+/*

+ * Largest type-code in use

+ */

+#define HVM_SAVE_CODE_MAX 19

+#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/xen-mca.h

@@ -1,0 +1,440 @@

+/******************************************************************************

+ * arch-x86/mca.h

+ *

+ * Contributed by Advanced Micro Devices, Inc.

+ * Author: Christoph Egger <[email protected]>

+ *

+ * Guest OS machine check interface to x86 Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+/* Full MCA functionality has the following Usecases from the guest side:

+ *

+ * Must have's:

+ * 1. Dom0 and DomU register machine check trap callback handlers

+ *    (already done via "set_trap_table" hypercall)

+ * 2. Dom0 registers machine check event callback handler

+ *    (doable via EVTCHNOP_bind_virq)

+ * 3. Dom0 and DomU fetches machine check data

+ * 4. Dom0 wants Xen to notify a DomU

+ * 5. Dom0 gets DomU ID from physical address

+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")

+ *

+ * Nice to have's:

+ * 7. Dom0 wants Xen to deactivate a physical CPU

+ *    This is better done as separate task, physical CPU hotplugging,

+ *    and hypercall(s) should be sysctl's

+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to

+ *    move a DomU (or Dom0 itself) away from a malicious page

+ *    producing correctable errors.

+ * 9. offlining physical page:

+ *    Xen free's and never re-uses a certain physical page.

+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's

+ *     and tell Xen to trigger a machine check

+ */

+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__

+#define __XEN_PUBLIC_ARCH_X86_MCA_H__

+/* Hypercall */

+#define __HYPERVISOR_mca __HYPERVISOR_arch_0

+/*

+ * The xen-unstable repo has interface version 0x03000001; out interface

+ * is incompatible with that and any future minor revisions, so we

+ * choose a different version number range that is numerically less

+ * than that used in xen-unstable.

+ */

+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003

+/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */

+#define XEN_MC_NONURGENT  0x0001

+/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */

+#define XEN_MC_URGENT     0x0002

+/* IN: Dom0 acknowledges previosly-fetched telemetry */

+#define XEN_MC_ACK        0x0004

+/* OUT: All is ok */

+#define XEN_MC_OK           0x0

+/* OUT: Domain could not fetch data. */

+#define XEN_MC_FETCHFAILED  0x1

+/* OUT: There was no machine check data to fetch. */

+#define XEN_MC_NODATA       0x2

+/* OUT: Between notification time and this hypercall an other

+ *  (most likely) correctable error happened. The fetched data,

+ *  does not match the original machine check data. */

+#define XEN_MC_NOMATCH      0x4

+/* OUT: DomU did not register MC NMI handler. Try something else. */

+#define XEN_MC_CANNOTHANDLE 0x8

+/* OUT: Notifying DomU failed. Retry later or try something else. */

+#define XEN_MC_NOTDELIVERED 0x10

+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */

+#ifndef __ASSEMBLY__

+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */

+/*

+ * Machine Check Architecure:

+ * structs are read-only and used to report all kinds of

+ * correctable and uncorrectable errors detected by the HW.

+ * Dom0 and DomU: register a handler to get notified.

+ * Dom0 only: Correctable errors are reported via VIRQ_MCA

+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers

+ */

+#define MC_TYPE_GLOBAL          0

+#define MC_TYPE_BANK            1

+#define MC_TYPE_EXTENDED        2

+#define MC_TYPE_RECOVERY        3

+struct mcinfo_common {

+    uint16_t type;      /* structure type */

+    uint16_t size;      /* size of this struct in bytes */

+};

+#define MC_FLAG_CORRECTABLE     (1 << 0)

+#define MC_FLAG_UNCORRECTABLE   (1 << 1)

+#define MC_FLAG_RECOVERABLE	(1 << 2)

+#define MC_FLAG_POLLED		(1 << 3)

+#define MC_FLAG_RESET		(1 << 4)

+#define MC_FLAG_CMCI		(1 << 5)

+#define MC_FLAG_MCE		(1 << 6)

+/* contains global x86 mc information */

+struct mcinfo_global {

+    struct mcinfo_common common;

+    /* running domain at the time in error (most likely the impacted one) */

+    uint16_t mc_domid;

+    uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */

+    uint32_t mc_socketid; /* physical socket of the physical core */

+    uint16_t mc_coreid; /* physical impacted core */

+    uint16_t mc_core_threadid; /* core thread of physical core */

+    uint32_t mc_apicid;

+    uint32_t mc_flags;

+    uint64_t mc_gstatus; /* global status */

+};

+/* contains bank local x86 mc information */

+struct mcinfo_bank {

+    struct mcinfo_common common;

+    uint16_t mc_bank; /* bank nr */

+    uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0

+                        * and if mc_addr is valid. Never valid on DomU. */

+    uint64_t mc_status; /* bank status */

+    uint64_t mc_addr;   /* bank address, only valid

+                         * if addr bit is set in mc_status */

+    uint64_t mc_misc;

+    uint64_t mc_ctrl2;

+    uint64_t mc_tsc;

+};

+struct mcinfo_msr {

+    uint64_t reg;   /* MSR */

+    uint64_t value; /* MSR value */

+};

+/* contains mc information from other

+ * or additional mc MSRs */

+struct mcinfo_extended {

+    struct mcinfo_common common;

+    /* You can fill up to five registers.

+     * If you need more, then use this structure

+     * multiple times. */

+    uint32_t mc_msrs; /* Number of msr with valid values. */

+    /*

+     * Currently Intel extended MSR (32/64) include all gp registers

+     * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be

+     * useful at present. So expand this array to 16/32 to leave room.

+     */

+    struct mcinfo_msr mc_msr[sizeof(void *) * 4];

+};

+/* Recovery Action flags. Giving recovery result information to DOM0 */

+/* Xen takes successful recovery action, the error is recovered */

+#define REC_ACTION_RECOVERED (0x1 << 0)

+/* No action is performed by XEN */

+#define REC_ACTION_NONE (0x1 << 1)

+/* It's possible DOM0 might take action ownership in some case */

+#define REC_ACTION_NEED_RESET (0x1 << 2)

+/* Different Recovery Action types, if the action is performed successfully,

+ * REC_ACTION_RECOVERED flag will be returned.

+ */

+/* Page Offline Action */

+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)

+/* CPU offline Action */

+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)

+/* L3 cache disable Action */

+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)

+/* Below interface used between XEN/DOM0 for passing XEN's recovery action

+ * information to DOM0.

+ * usage Senario: After offlining broken page, XEN might pass its page offline

+ * recovery action result to DOM0. DOM0 will save the information in

+ * non-volatile memory for further proactive actions, such as offlining the

+ * easy broken page earlier when doing next reboot.

+*/

+struct page_offline_action

+{

+    /* Params for passing the offlined page number to DOM0 */

+    uint64_t mfn;

+    uint64_t status;

+};

+struct cpu_offline_action

+{

+    /* Params for passing the identity of the offlined CPU to DOM0 */

+    uint32_t mc_socketid;

+    uint16_t mc_coreid;

+    uint16_t mc_core_threadid;

+};

+#define MAX_UNION_SIZE 16

+struct mcinfo_recovery

+{

+    struct mcinfo_common common;

+    uint16_t mc_bank; /* bank nr */

+    uint8_t action_flags;

+    uint8_t action_types;

+    union {

+        struct page_offline_action page_retire;

+        struct cpu_offline_action cpu_offline;

+        uint8_t pad[MAX_UNION_SIZE];

+    } action_info;

+};

+#define MCINFO_HYPERCALLSIZE	1024

+#define MCINFO_MAXSIZE		768

+#define MCINFO_FLAGS_UNCOMPLETE 0x1

+struct mc_info {

+    /* Number of mcinfo_* entries in mi_data */

+    uint32_t mi_nentries;

+    uint32_t flags;

+    uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];

+};

+typedef struct mc_info mc_info_t;

+DEFINE_XEN_GUEST_HANDLE(mc_info_t);

+#define __MC_MSR_ARRAYSIZE 8

+#define __MC_NMSRS 1

+#define MC_NCAPS	7	/* 7 CPU feature flag words */

+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */

+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */

+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */

+#define MC_CAPS_LINUX	3	/* Linux-defined */

+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */

+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */

+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */

+struct mcinfo_logical_cpu {

+    uint32_t mc_cpunr;

+    uint32_t mc_chipid;

+    uint16_t mc_coreid;

+    uint16_t mc_threadid;

+    uint32_t mc_apicid;

+    uint32_t mc_clusterid;

+    uint32_t mc_ncores;

+    uint32_t mc_ncores_active;

+    uint32_t mc_nthreads;

+    int32_t mc_cpuid_level;

+    uint32_t mc_family;

+    uint32_t mc_vendor;

+    uint32_t mc_model;

+    uint32_t mc_step;

+    char mc_vendorid[16];

+    char mc_brandid[64];

+    uint32_t mc_cpu_caps[MC_NCAPS];

+    uint32_t mc_cache_size;

+    uint32_t mc_cache_alignment;

+    int32_t mc_nmsrvals;

+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];

+};

+typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);

+/*

+ * OS's should use these instead of writing their own lookup function

+ * each with its own bugs and drawbacks.

+ * We use macros instead of static inline functions to allow guests

+ * to include this header in assembly files (*.S).

+ */

+/* Prototype:

+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);

+ */

+#define x86_mcinfo_nentries(_mi)    \

+    (_mi)->mi_nentries

+/* Prototype:

+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);

+ */

+#define x86_mcinfo_first(_mi)       \

+    ((struct mcinfo_common *)(_mi)->mi_data)

+/* Prototype:

+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);

+ */

+#define x86_mcinfo_next(_mic)       \

+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))

+/* Prototype:

+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);

+ */

+#define x86_mcinfo_lookup(_ret, _mi, _type)    \

+    do {                                                        \

+        uint32_t found, i;                                      \

+        struct mcinfo_common *_mic;                             \

+                                                                \

+        found = 0;                                              \

+	(_ret) = NULL;						\

+	if (_mi == NULL) break;					\

+        _mic = x86_mcinfo_first(_mi);                           \

+        for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \

+            if (_mic->type == (_type)) {                        \

+                found = 1;                                      \

+                break;                                          \

+            }                                                   \

+            _mic = x86_mcinfo_next(_mic);                       \

+        }                                                       \

+        (_ret) = found ? _mic : NULL;                           \

+    } while (0)

+/* Usecase 1

+ * Register machine check trap callback handler

+ *    (already done via "set_trap_table" hypercall)

+ */

+/* Usecase 2

+ * Dom0 registers machine check event callback handler

+ * done by EVTCHNOP_bind_virq

+ */

+/* Usecase 3

+ * Fetch machine check data from hypervisor.

+ * Note, this hypercall is special, because both Dom0 and DomU must use this.

+ */

+#define XEN_MC_fetch            1

+struct xen_mc_fetch {

+    /* IN/OUT variables. */

+    uint32_t flags;	/* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,

+                           XEN_MC_ACK if ack'ing an earlier fetch */

+			/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,

+			   XEN_MC_NODATA, XEN_MC_NOMATCH */

+    uint32_t _pad0;

+    uint64_t fetch_id;	/* OUT: id for ack, IN: id we are ack'ing */

+    /* OUT variables. */

+    XEN_GUEST_HANDLE(mc_info_t) data;

+};

+typedef struct xen_mc_fetch xen_mc_fetch_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);

+/* Usecase 4

+ * This tells the hypervisor to notify a DomU about the machine check error

+ */

+#define XEN_MC_notifydomain     2

+struct xen_mc_notifydomain {

+    /* IN variables. */

+    uint16_t mc_domid;    /* The unprivileged domain to notify. */

+    uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.

+                           * Usually echo'd value from the fetch hypercall. */

+    /* IN/OUT variables. */

+    uint32_t flags;

+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */

+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */

+};

+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);

+#define XEN_MC_physcpuinfo 3

+struct xen_mc_physcpuinfo {

+	/* IN/OUT */

+	uint32_t ncpus;

+	uint32_t _pad0;

+	/* OUT */

+	XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;

+};

+#define XEN_MC_msrinject    4

+#define MC_MSRINJ_MAXMSRS       8

+struct xen_mc_msrinject {

+       /* IN */

+	uint32_t mcinj_cpunr;           /* target processor id */

+	uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */

+	uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */

+	uint32_t _pad0;

+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];

+};

+/* Flags for mcinj_flags above; bits 16-31 are reserved */

+#define MC_MSRINJ_F_INTERPOSE   0x1

+#define XEN_MC_mceinject    5

+struct xen_mc_mceinject {

+	unsigned int mceinj_cpunr;      /* target processor id */

+};

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+#define XEN_MC_inject_v2        6

+#define XEN_MC_INJECT_TYPE_MASK     0x7

+#define XEN_MC_INJECT_TYPE_MCE      0x0

+#define XEN_MC_INJECT_TYPE_CMCI     0x1

+#define XEN_MC_INJECT_CPU_BROADCAST 0x8

+struct xen_mc_inject_v2 {

+	uint32_t flags;

+	struct xenctl_bitmap cpumap;

+};

+#endif

+struct xen_mc {

+    uint32_t cmd;

+    uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */

+    union {

+        struct xen_mc_fetch        mc_fetch;

+        struct xen_mc_notifydomain mc_notifydomain;

+        struct xen_mc_physcpuinfo  mc_physcpuinfo;

+        struct xen_mc_msrinject    mc_msrinject;

+        struct xen_mc_mceinject    mc_mceinject;

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+        struct xen_mc_inject_v2    mc_inject_v2;

+#endif

+    } u;

+};

+typedef struct xen_mc xen_mc_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mc_t);

+#endif /* __ASSEMBLY__ */

+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_32.h

@@ -1,0 +1,171 @@

+/******************************************************************************

+ * xen-x86_32.h

+ *

+ * Guest OS interface to x86 32-bit Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004-2007, K A Fraser

+ */

+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__

+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__

+/*

+ * Hypercall interface:

+ *  Input:  %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6)

+ *  Output: %eax

+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):

+ *  call hypercall_page + hypercall-number * 32

+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)

+ */

+/*

+ * These flat segments are in the Xen-private section of every GDT. Since these

+ * are also present in the initial GDT, many OSes will be able to avoid

+ * installing their own GDT.

+ */

+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */

+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */

+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */

+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */

+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */

+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */

+#define FLAT_KERNEL_CS FLAT_RING1_CS

+#define FLAT_KERNEL_DS FLAT_RING1_DS

+#define FLAT_KERNEL_SS FLAT_RING1_SS

+#define FLAT_USER_CS    FLAT_RING3_CS

+#define FLAT_USER_DS    FLAT_RING3_DS

+#define FLAT_USER_SS    FLAT_RING3_SS

+#define __HYPERVISOR_VIRT_START_PAE    0xF5800000

+#define __MACH2PHYS_VIRT_START_PAE     0xF5800000

+#define __MACH2PHYS_VIRT_END_PAE       0xF6800000

+#define HYPERVISOR_VIRT_START_PAE      \

+    mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)

+#define MACH2PHYS_VIRT_START_PAE       \

+    mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)

+#define MACH2PHYS_VIRT_END_PAE         \

+    mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)

+/* Non-PAE bounds are obsolete. */

+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000

+#define __MACH2PHYS_VIRT_START_NONPAE  0xFC000000

+#define __MACH2PHYS_VIRT_END_NONPAE    0xFC400000

+#define HYPERVISOR_VIRT_START_NONPAE   \

+    mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)

+#define MACH2PHYS_VIRT_START_NONPAE    \

+    mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)

+#define MACH2PHYS_VIRT_END_NONPAE      \

+    mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)

+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE

+#define __MACH2PHYS_VIRT_START  __MACH2PHYS_VIRT_START_PAE

+#define __MACH2PHYS_VIRT_END    __MACH2PHYS_VIRT_END_PAE

+#ifndef HYPERVISOR_VIRT_START

+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)

+#endif

+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)

+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)

+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)

+#ifndef machine_to_phys_mapping

+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)

+#endif

+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+#undef ___DEFINE_XEN_GUEST_HANDLE

+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \

+    typedef struct { type *p; }                                 \

+        __guest_handle_ ## name;                                \

+    typedef struct { union { type *p; uint64_aligned_t q; }; }  \

+        __guest_handle_64_ ## name

+#undef set_xen_guest_handle_raw

+#define set_xen_guest_handle_raw(hnd, val)                  \

+    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \

+         (hnd).p = val;                                     \

+    } while ( 0 )

+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))

+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name

+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)

+#endif

+#ifndef __ASSEMBLY__

+struct cpu_user_regs {

+    uint32_t ebx;

+    uint32_t ecx;

+    uint32_t edx;

+    uint32_t esi;

+    uint32_t edi;

+    uint32_t ebp;

+    uint32_t eax;

+    uint16_t error_code;    /* private */

+    uint16_t entry_vector;  /* private */

+    uint32_t eip;

+    uint16_t cs;

+    uint8_t  saved_upcall_mask;

+    uint8_t  _pad0;

+    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */

+    uint32_t esp;

+    uint16_t ss, _pad1;

+    uint16_t es, _pad2;

+    uint16_t ds, _pad3;

+    uint16_t fs, _pad4;

+    uint16_t gs, _pad5;

+};

+typedef struct cpu_user_regs cpu_user_regs_t;

+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);

+/*

+ * Page-directory addresses above 4GB do not fit into architectural %cr3.

+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests

+ * must use the following accessor macros to pack/unpack valid MFNs.

+ */

+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))

+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))

+struct arch_vcpu_info {

+    unsigned long cr2;

+    unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */

+};

+typedef struct arch_vcpu_info arch_vcpu_info_t;

+struct xen_callback {

+    unsigned long cs;

+    unsigned long eip;

+};

+typedef struct xen_callback xen_callback_t;

+#endif /* !__ASSEMBLY__ */

+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/xen-x86_64.h

@@ -1,0 +1,202 @@

+/******************************************************************************

+ * xen-x86_64.h

+ *

+ * Guest OS interface to x86 64-bit Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004-2006, K A Fraser

+ */

+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__

+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__

+/*

+ * Hypercall interface:

+ *  Input:  %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6)

+ *  Output: %rax

+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):

+ *  call hypercall_page + hypercall-number * 32

+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)

+ */

+/*

+ * 64-bit segment selectors

+ * These flat segments are in the Xen-private section of every GDT. Since these

+ * are also present in the initial GDT, many OSes will be able to avoid

+ * installing their own GDT.

+ */

+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */

+#define FLAT_RING3_CS64 0xe033  /* GDT index 261 */

+#define FLAT_RING3_DS32 0xe02b  /* GDT index 262 */

+#define FLAT_RING3_DS64 0x0000  /* NULL selector */

+#define FLAT_RING3_SS32 0xe02b  /* GDT index 262 */

+#define FLAT_RING3_SS64 0xe02b  /* GDT index 262 */

+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64

+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32

+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64

+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64

+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32

+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64

+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64

+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32

+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64

+#define FLAT_USER_DS64 FLAT_RING3_DS64

+#define FLAT_USER_DS32 FLAT_RING3_DS32

+#define FLAT_USER_DS   FLAT_USER_DS64

+#define FLAT_USER_CS64 FLAT_RING3_CS64

+#define FLAT_USER_CS32 FLAT_RING3_CS32

+#define FLAT_USER_CS   FLAT_USER_CS64

+#define FLAT_USER_SS64 FLAT_RING3_SS64

+#define FLAT_USER_SS32 FLAT_RING3_SS32

+#define FLAT_USER_SS   FLAT_USER_SS64

+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000

+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000

+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000

+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000

+#ifndef HYPERVISOR_VIRT_START

+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)

+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)

+#endif

+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)

+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)

+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)

+#ifndef machine_to_phys_mapping

+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)

+#endif

+/*

+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)

+ *  @which == SEGBASE_*  ;  @base == 64-bit base address

+ * Returns 0 on success.

+ */

+#define SEGBASE_FS          0

+#define SEGBASE_GS_USER     1

+#define SEGBASE_GS_KERNEL   2

+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */

+/*

+ * int HYPERVISOR_iret(void)

+ * All arguments are on the kernel stack, in the following format.

+ * Never returns if successful. Current kernel context is lost.

+ * The saved CS is mapped as follows:

+ *   RING0 -> RING3 kernel mode.

+ *   RING1 -> RING3 kernel mode.

+ *   RING2 -> RING3 kernel mode.

+ *   RING3 -> RING3 user mode.

+ * However RING0 indicates that the guest kernel should return to iteself

+ * directly with

+ *      orb   $3,1*8(%rsp)

+ *      iretq

+ * If flags contains VGCF_in_syscall:

+ *   Restore RAX, RIP, RFLAGS, RSP.

+ *   Discard R11, RCX, CS, SS.

+ * Otherwise:

+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.

+ * All other registers are saved on hypercall entry and restored to user.

+ */

+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */

+#define _VGCF_in_syscall 8

+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)

+#define VGCF_IN_SYSCALL  VGCF_in_syscall

+#ifndef __ASSEMBLY__

+struct iret_context {

+    /* Top of stack (%rsp at point of hypercall). */

+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;

+    /* Bottom of iret stack frame. */

+};

+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)

+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */

+#define __DECL_REG(name) union { \

+    uint64_t r ## name, e ## name; \

+    uint32_t _e ## name; \

+}

+#else

+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */

+#define __DECL_REG(name) uint64_t r ## name

+#endif

+struct cpu_user_regs {

+    uint64_t r15;

+    uint64_t r14;

+    uint64_t r13;

+    uint64_t r12;

+    __DECL_REG(bp);

+    __DECL_REG(bx);

+    uint64_t r11;

+    uint64_t r10;

+    uint64_t r9;

+    uint64_t r8;

+    __DECL_REG(ax);

+    __DECL_REG(cx);

+    __DECL_REG(dx);

+    __DECL_REG(si);

+    __DECL_REG(di);

+    uint32_t error_code;    /* private */

+    uint32_t entry_vector;  /* private */

+    __DECL_REG(ip);

+    uint16_t cs, _pad0[1];

+    uint8_t  saved_upcall_mask;

+    uint8_t  _pad1[3];

+    __DECL_REG(flags);      /* rflags.IF == !saved_upcall_mask */

+    __DECL_REG(sp);

+    uint16_t ss, _pad2[3];

+    uint16_t es, _pad3[3];

+    uint16_t ds, _pad4[3];

+    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */

+    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */

+};

+typedef struct cpu_user_regs cpu_user_regs_t;

+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);

+#undef __DECL_REG

+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)

+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)

+struct arch_vcpu_info {

+    unsigned long cr2;

+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */

+};

+typedef struct arch_vcpu_info arch_vcpu_info_t;

+typedef unsigned long xen_callback_t;

+#endif /* !__ASSEMBLY__ */

+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86/xen.h

@@ -1,0 +1,260 @@

+/******************************************************************************

+ * arch-x86/xen.h

+ *

+ * Guest OS interface to x86 Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004-2006, K A Fraser

+ */

+#include "../xen.h"

+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__

+#define __XEN_PUBLIC_ARCH_X86_XEN_H__

+/* Structural guest handles introduced in 0x00030201. */

+#if __XEN_INTERFACE_VERSION__ >= 0x00030201

+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \

+    typedef struct { type *p; } __guest_handle_ ## name

+#else

+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \

+    typedef type * __guest_handle_ ## name

+#endif

+/*

+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field

+ * in a struct in memory.

+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an

+ * hypercall argument.

+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but

+ * they might not be on other architectures.

+ */

+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \

+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \

+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)

+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)

+#define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name

+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)

+#define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)

+#define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)

+#ifdef __XEN_TOOLS__

+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)

+#endif

+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)

+#if defined(__i386__)

+#include "xen-x86_32.h"

+#elif defined(__x86_64__)

+#include "xen-x86_64.h"

+#endif

+#ifndef __ASSEMBLY__

+typedef unsigned long xen_pfn_t;

+#define PRI_xen_pfn "lx"

+#endif

+/*

+ * `incontents 200 segdesc Segment Descriptor Tables

+ */

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);

+ * `

+ */

+/*

+ * A number of GDT entries are reserved by Xen. These are not situated at the

+ * start of the GDT because some stupid OSes export hard-coded selector values

+ * in their ABI. These hard-coded values are always near the start of the GDT,

+ * so Xen places itself out of the way, at the far end of the GDT.

+ *

+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op

+ */

+#define FIRST_RESERVED_GDT_PAGE  14

+#define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)

+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);

+ * `

+ * ` @pa   The machine physical address of the descriptor to

+ * `       update. Must be either a descriptor page or writable.

+ * ` @desc The descriptor value to update, in the same format as a

+ * `       native descriptor table entry.

+ */

+/* Maximum number of virtual CPUs in legacy multi-processor guests. */

+#define XEN_LEGACY_MAX_VCPUS 32

+#ifndef __ASSEMBLY__

+typedef unsigned long xen_ulong_t;

+#define PRI_xen_ulong "lx"

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);

+ * `

+ * Sets the stack segment and pointer for the current vcpu.

+ */

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);

+ * `

+ */

+/*

+ * Send an array of these to HYPERVISOR_set_trap_table().

+ * Terminate the array with a sentinel entry, with traps[].address==0.

+ * The privilege level specifies which modes may enter a trap via a software

+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate

+ * privilege levels as follows:

+ *  Level == 0: Noone may enter

+ *  Level == 1: Kernel may enter

+ *  Level == 2: Kernel may enter

+ *  Level == 3: Everyone may enter

+ */

+#define TI_GET_DPL(_ti)      ((_ti)->flags & 3)

+#define TI_GET_IF(_ti)       ((_ti)->flags & 4)

+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))

+#define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))

+struct trap_info {

+    uint8_t       vector;  /* exception vector                              */

+    uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */

+    uint16_t      cs;      /* code selector                                 */

+    unsigned long address; /* code offset                                   */

+};

+typedef struct trap_info trap_info_t;

+DEFINE_XEN_GUEST_HANDLE(trap_info_t);

+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */

+/*

+ * The following is all CPU context. Note that the fpu_ctxt block is filled

+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.

+ */

+struct vcpu_guest_context {

+    /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */

+    struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */

+#define VGCF_I387_VALID                (1<<0)

+#define VGCF_IN_KERNEL                 (1<<2)

+#define _VGCF_i387_valid               0

+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)

+#define _VGCF_in_kernel                2

+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)

+#define _VGCF_failsafe_disables_events 3

+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)

+#define _VGCF_syscall_disables_events  4

+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)

+#define _VGCF_online                   5

+#define VGCF_online                    (1<<_VGCF_online)

+    unsigned long flags;                    /* VGCF_* flags                 */

+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */

+    struct trap_info trap_ctxt[256];        /* Virtual IDT                  */

+    unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */

+    unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */

+    unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */

+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */

+    unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */

+    unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */

+#ifdef __i386__

+    unsigned long event_callback_cs;        /* CS:EIP of event callback     */

+    unsigned long event_callback_eip;

+    unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */

+    unsigned long failsafe_callback_eip;

+#else

+    unsigned long event_callback_eip;

+    unsigned long failsafe_callback_eip;

+#ifdef __XEN__

+    union {

+        unsigned long syscall_callback_eip;

+        struct {

+            unsigned int event_callback_cs;    /* compat CS of event cb     */

+            unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */

+        };

+    };

+#else

+    unsigned long syscall_callback_eip;

+#endif

+#endif

+    unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */

+#ifdef __x86_64__

+    /* Segment base addresses. */

+    uint64_t      fs_base;

+    uint64_t      gs_base_kernel;

+    uint64_t      gs_base_user;

+#endif

+};

+typedef struct vcpu_guest_context vcpu_guest_context_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);

+struct arch_shared_info {

+    unsigned long max_pfn;                  /* max pfn that appears in table */

+    /* Frame containing list of mfns containing list of mfns containing p2m. */

+    xen_pfn_t     pfn_to_mfn_frame_list_list;

+    unsigned long nmi_reason;

+    uint64_t pad[32];

+};

+typedef struct arch_shared_info arch_shared_info_t;

+#endif /* !__ASSEMBLY__ */

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_fpu_taskswitch(int set);

+ * `

+ * Sets (if set!=0) or clears (if set==0) CR0.TS.

+ */

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);

+ *

+ * ` unsigned long

+ * ` HYPERVISOR_get_debugreg(int regno);

+ * For 0<=reg<=7, returns the debug register value.

+ * For other values of reg, returns ((unsigned long)-EINVAL).

+ * (Unfortunately, this interface is defective.)

+ */

+/*

+ * Prefix forces emulation of some non-trapping instructions.

+ * Currently only CPUID.

+ */

+#ifdef __ASSEMBLY__

+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;

+#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid

+#else

+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "

+#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"

+#endif

+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86_32.h

@@ -1,0 +1,27 @@

+/******************************************************************************

+ * arch-x86_32.h

+ *

+ * Guest OS interface to x86 32-bit Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004-2006, K A Fraser

+ */

+#include "arch-x86/xen.h"

--- /dev/null

+++ b/sys/src/9/xen/xen-public/arch-x86_64.h

@@ -1,0 +1,43 @@

+/******************************************************************************

+ * arch-x86_64.h

+ *

+ * Guest OS interface to x86 64-bit Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004-2006, K A Fraser

+ */

+#include "arch-x86/xen.h"

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_set_callbacks(unsigned long event_selector,

+ * `                          unsigned long event_address,

+ * `                          unsigned long failsafe_selector,

+ * `                          unsigned long failsafe_address);

+ * `

+ * Register for callbacks on events.  When an event (from an event

+ * channel) occurs, event_address is used as the value of eip.

+ *

+ * A similar callback occurs if the segment selectors are invalid.

+ * failsafe_address is used as the value of eip.

+ *

+ * On x86_64, event_selector and failsafe_selector are ignored (???).

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/callback.h

@@ -1,0 +1,121 @@

+/******************************************************************************

+ * callback.h

+ *

+ * Register guest OS callbacks with Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2006, Ian Campbell

+ */

+#ifndef __XEN_PUBLIC_CALLBACK_H__

+#define __XEN_PUBLIC_CALLBACK_H__

+#include "xen.h"

+/*

+ * Prototype for this hypercall is:

+ *   long callback_op(int cmd, void *extra_args)

+ * @cmd        == CALLBACKOP_??? (callback operation).

+ * @extra_args == Operation-specific extra arguments (NULL if none).

+ */

+/* x86: Callback for event delivery. */

+#define CALLBACKTYPE_event                 0

+/* x86: Failsafe callback when guest state cannot be restored by Xen. */

+#define CALLBACKTYPE_failsafe              1

+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */

+#define CALLBACKTYPE_syscall               2

+/*

+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel

+ *     feature is enabled. Do not use this callback type in new code.

+ */

+#define CALLBACKTYPE_sysenter_deprecated   3

+/* x86: Callback for NMI delivery. */

+#define CALLBACKTYPE_nmi                   4

+/*

+ * x86: sysenter is only available as follows:

+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled

+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs

+ *                      ('32-on-32-on-64', '32-on-64-on-64')

+ *                      [nb. also 64-bit guest applications on Intel CPUs

+ *                           ('64-on-64-on-64'), but syscall is preferred]

+ */

+#define CALLBACKTYPE_sysenter              5

+/*

+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs

+ *                    ('32-on-32-on-64', '32-on-64-on-64')

+ */

+#define CALLBACKTYPE_syscall32             7

+/*

+ * Disable event deliver during callback? This flag is ignored for event and

+ * NMI callbacks: event delivery is unconditionally disabled.

+ */

+#define _CALLBACKF_mask_events             0

+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)

+/*

+ * Register a callback.

+ */

+#define CALLBACKOP_register                0

+struct callback_register {

+    uint16_t type;

+    uint16_t flags;

+    xen_callback_t address;

+};

+typedef struct callback_register callback_register_t;

+DEFINE_XEN_GUEST_HANDLE(callback_register_t);

+/*

+ * Unregister a callback.

+ *

+ * Not all callbacks can be unregistered. -EINVAL will be returned if

+ * you attempt to unregister such a callback.

+ */

+#define CALLBACKOP_unregister              1

+struct callback_unregister {

+    uint16_t type;

+    uint16_t _unused;

+};

+typedef struct callback_unregister callback_unregister_t;

+DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);

+#if __XEN_INTERFACE_VERSION__ < 0x00030207

+#undef CALLBACKTYPE_sysenter

+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated

+#endif

+#endif /* __XEN_PUBLIC_CALLBACK_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/dom0_ops.h

@@ -1,0 +1,120 @@

+/******************************************************************************

+ * dom0_ops.h

+ *

+ * Process command requests from domain-0 guest OS.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2002-2003, B Dragovic

+ * Copyright (c) 2002-2006, K Fraser

+ */

+#ifndef __XEN_PUBLIC_DOM0_OPS_H__

+#define __XEN_PUBLIC_DOM0_OPS_H__

+#include "xen.h"

+#include "platform.h"

+#if __XEN_INTERFACE_VERSION__ >= 0x00030204

+#error "dom0_ops.h is a compatibility interface only"

+#endif

+#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION

+#define DOM0_SETTIME          XENPF_settime

+#define dom0_settime          xenpf_settime

+#define dom0_settime_t        xenpf_settime_t

+#define DOM0_ADD_MEMTYPE      XENPF_add_memtype

+#define dom0_add_memtype      xenpf_add_memtype

+#define dom0_add_memtype_t    xenpf_add_memtype_t

+#define DOM0_DEL_MEMTYPE      XENPF_del_memtype

+#define dom0_del_memtype      xenpf_del_memtype

+#define dom0_del_memtype_t    xenpf_del_memtype_t

+#define DOM0_READ_MEMTYPE     XENPF_read_memtype

+#define dom0_read_memtype     xenpf_read_memtype

+#define dom0_read_memtype_t   xenpf_read_memtype_t

+#define DOM0_MICROCODE        XENPF_microcode_update

+#define dom0_microcode        xenpf_microcode_update

+#define dom0_microcode_t      xenpf_microcode_update_t

+#define DOM0_PLATFORM_QUIRK   XENPF_platform_quirk

+#define dom0_platform_quirk   xenpf_platform_quirk

+#define dom0_platform_quirk_t xenpf_platform_quirk_t

+typedef uint64_t cpumap_t;

+/* Unsupported legacy operation -- defined for API compatibility. */

+#define DOM0_MSR                 15

+struct dom0_msr {

+    /* IN variables. */

+    uint32_t write;

+    cpumap_t cpu_mask;

+    uint32_t msr;

+    uint32_t in1;

+    uint32_t in2;

+    /* OUT variables. */

+    uint32_t out1;

+    uint32_t out2;

+};

+typedef struct dom0_msr dom0_msr_t;

+DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);

+/* Unsupported legacy operation -- defined for API compatibility. */

+#define DOM0_PHYSICAL_MEMORY_MAP 40

+struct dom0_memory_map_entry {

+    uint64_t start, end;

+    uint32_t flags; /* reserved */

+    uint8_t  is_ram;

+};

+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;

+DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);

+struct dom0_op {

+    uint32_t cmd;

+    uint32_t interface_version; /* DOM0_INTERFACE_VERSION */

+    union {

+        struct dom0_msr               msr;

+        struct dom0_settime           settime;

+        struct dom0_add_memtype       add_memtype;

+        struct dom0_del_memtype       del_memtype;

+        struct dom0_read_memtype      read_memtype;

+        struct dom0_microcode         microcode;

+        struct dom0_platform_quirk    platform_quirk;

+        struct dom0_memory_map_entry  physical_memory_map;

+        uint8_t                       pad[128];

+    } u;

+};

+typedef struct dom0_op dom0_op_t;

+DEFINE_XEN_GUEST_HANDLE(dom0_op_t);

+#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/domctl.h

@@ -1,0 +1,998 @@

+/******************************************************************************

+ * domctl.h

+ *

+ * Domain management operations. For use by node control stack.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2002-2003, B Dragovic

+ * Copyright (c) 2002-2006, K Fraser

+ */

+#ifndef __XEN_PUBLIC_DOMCTL_H__

+#define __XEN_PUBLIC_DOMCTL_H__

+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)

+#error "domctl operations are intended for use by node control tools only"

+#endif

+#include "xen.h"

+#include "grant_table.h"

+#include "hvm/save.h"

+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009

+/*

+ * NB. xen_domctl.domain is an IN/OUT parameter for this operation.

+ * If it is specified as zero, an id is auto-allocated and returned.

+ */

+/* XEN_DOMCTL_createdomain */

+struct xen_domctl_createdomain {

+    /* IN parameters */

+    uint32_t ssidref;

+    xen_domain_handle_t handle;

+ /* Is this an HVM guest (as opposed to a PV guest)? */

+#define _XEN_DOMCTL_CDF_hvm_guest     0

+#define XEN_DOMCTL_CDF_hvm_guest      (1U<<_XEN_DOMCTL_CDF_hvm_guest)

+ /* Use hardware-assisted paging if available? */

+#define _XEN_DOMCTL_CDF_hap           1

+#define XEN_DOMCTL_CDF_hap            (1U<<_XEN_DOMCTL_CDF_hap)

+ /* Should domain memory integrity be verifed by tboot during Sx? */

+#define _XEN_DOMCTL_CDF_s3_integrity  2

+#define XEN_DOMCTL_CDF_s3_integrity   (1U<<_XEN_DOMCTL_CDF_s3_integrity)

+ /* Disable out-of-sync shadow page tables? */

+#define _XEN_DOMCTL_CDF_oos_off       3

+#define XEN_DOMCTL_CDF_oos_off        (1U<<_XEN_DOMCTL_CDF_oos_off)

+    uint32_t flags;

+};

+typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);

+/* XEN_DOMCTL_getdomaininfo */

+struct xen_domctl_getdomaininfo {

+    /* OUT variables. */

+    domid_t  domain;              /* Also echoed in domctl.domain */

+ /* Domain is scheduled to die. */

+#define _XEN_DOMINF_dying     0

+#define XEN_DOMINF_dying      (1U<<_XEN_DOMINF_dying)

+ /* Domain is an HVM guest (as opposed to a PV guest). */

+#define _XEN_DOMINF_hvm_guest 1

+#define XEN_DOMINF_hvm_guest  (1U<<_XEN_DOMINF_hvm_guest)

+ /* The guest OS has shut down. */

+#define _XEN_DOMINF_shutdown  2

+#define XEN_DOMINF_shutdown   (1U<<_XEN_DOMINF_shutdown)

+ /* Currently paused by control software. */

+#define _XEN_DOMINF_paused    3

+#define XEN_DOMINF_paused     (1U<<_XEN_DOMINF_paused)

+ /* Currently blocked pending an event.     */

+#define _XEN_DOMINF_blocked   4

+#define XEN_DOMINF_blocked    (1U<<_XEN_DOMINF_blocked)

+ /* Domain is currently running.            */

+#define _XEN_DOMINF_running   5

+#define XEN_DOMINF_running    (1U<<_XEN_DOMINF_running)

+ /* Being debugged.  */

+#define _XEN_DOMINF_debugged  6

+#define XEN_DOMINF_debugged   (1U<<_XEN_DOMINF_debugged)

+ /* XEN_DOMINF_shutdown guest-supplied code.  */

+#define XEN_DOMINF_shutdownmask 255

+#define XEN_DOMINF_shutdownshift 16

+    uint32_t flags;              /* XEN_DOMINF_* */

+    uint64_aligned_t tot_pages;

+    uint64_aligned_t max_pages;

+    uint64_aligned_t outstanding_pages;

+    uint64_aligned_t shr_pages;

+    uint64_aligned_t paged_pages;

+    uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */

+    uint64_aligned_t cpu_time;

+    uint32_t nr_online_vcpus;    /* Number of VCPUs currently online. */

+    uint32_t max_vcpu_id;        /* Maximum VCPUID in use by this domain. */

+    uint32_t ssidref;

+    xen_domain_handle_t handle;

+    uint32_t cpupool;

+};

+typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);

+/* XEN_DOMCTL_getmemlist */

+struct xen_domctl_getmemlist {

+    /* IN variables. */

+    /* Max entries to write to output buffer. */

+    uint64_aligned_t max_pfns;

+    /* Start index in guest's page list. */

+    uint64_aligned_t start_pfn;

+    XEN_GUEST_HANDLE_64(uint64) buffer;

+    /* OUT variables. */

+    uint64_aligned_t num_pfns;

+};

+typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);

+/* XEN_DOMCTL_getpageframeinfo */

+#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28

+#define XEN_DOMCTL_PFINFO_NOTAB   (0x0U<<28)

+#define XEN_DOMCTL_PFINFO_L1TAB   (0x1U<<28)

+#define XEN_DOMCTL_PFINFO_L2TAB   (0x2U<<28)

+#define XEN_DOMCTL_PFINFO_L3TAB   (0x3U<<28)

+#define XEN_DOMCTL_PFINFO_L4TAB   (0x4U<<28)

+#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28)

+#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)

+#define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */

+#define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */

+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */

+#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)

+struct xen_domctl_getpageframeinfo {

+    /* IN variables. */

+    uint64_aligned_t gmfn; /* GMFN to query */

+    /* OUT variables. */

+    /* Is the page PINNED to a type? */

+    uint32_t type;         /* see above type defs */

+};

+typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);

+/* XEN_DOMCTL_getpageframeinfo2 */

+struct xen_domctl_getpageframeinfo2 {

+    /* IN variables. */

+    uint64_aligned_t num;

+    /* IN/OUT variables. */

+    XEN_GUEST_HANDLE_64(uint32) array;

+};

+typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);

+/* XEN_DOMCTL_getpageframeinfo3 */

+struct xen_domctl_getpageframeinfo3 {

+    /* IN variables. */

+    uint64_aligned_t num;

+    /* IN/OUT variables. */

+    XEN_GUEST_HANDLE_64(xen_pfn_t) array;

+};

+/*

+ * Control shadow pagetables operation

+ */

+/* XEN_DOMCTL_shadow_op */

+/* Disable shadow mode. */

+#define XEN_DOMCTL_SHADOW_OP_OFF         0

+/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */

+#define XEN_DOMCTL_SHADOW_OP_ENABLE      32

+/* Log-dirty bitmap operations. */

+ /* Return the bitmap and clean internal copy for next round. */

+#define XEN_DOMCTL_SHADOW_OP_CLEAN       11

+ /* Return the bitmap but do not modify internal copy. */

+#define XEN_DOMCTL_SHADOW_OP_PEEK        12

+/* Memory allocation accessors. */

+#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION   30

+#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION   31

+/* Legacy enable operations. */

+ /* Equiv. to ENABLE with no mode flags. */

+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST       1

+ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */

+#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY   2

+ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */

+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE  3

+/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */

+ /*

+  * Shadow pagetables are refcounted: guest does not use explicit mmu

+  * operations nor write-protect its pagetables.

+  */

+#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  (1 << 1)

+ /*

+  * Log pages in a bitmap as they are dirtied.

+  * Used for live relocation to determine which pages must be re-sent.

+  */

+#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2)

+ /*

+  * Automatically translate GPFNs into MFNs.

+  */

+#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3)

+ /*

+  * Xen does not steal virtual address space from the guest.

+  * Requires HVM support.

+  */

+#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL  (1 << 4)

+struct xen_domctl_shadow_op_stats {

+    uint32_t fault_count;

+    uint32_t dirty_count;

+};

+typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t);

+struct xen_domctl_shadow_op {

+    /* IN variables. */

+    uint32_t       op;       /* XEN_DOMCTL_SHADOW_OP_* */

+    /* OP_ENABLE */

+    uint32_t       mode;     /* XEN_DOMCTL_SHADOW_ENABLE_* */

+    /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */

+    uint32_t       mb;       /* Shadow memory allocation in MB */

+    /* OP_PEEK / OP_CLEAN */

+    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;

+    uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */

+    struct xen_domctl_shadow_op_stats stats;

+};

+typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);

+/* XEN_DOMCTL_max_mem */

+struct xen_domctl_max_mem {

+    /* IN variables. */

+    uint64_aligned_t max_memkb;

+};

+typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);

+/* XEN_DOMCTL_setvcpucontext */

+/* XEN_DOMCTL_getvcpucontext */

+struct xen_domctl_vcpucontext {

+    uint32_t              vcpu;                  /* IN */

+    XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */

+};

+typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);

+/* XEN_DOMCTL_getvcpuinfo */

+struct xen_domctl_getvcpuinfo {

+    /* IN variables. */

+    uint32_t vcpu;

+    /* OUT variables. */

+    uint8_t  online;                  /* currently online (not hotplugged)? */

+    uint8_t  blocked;                 /* blocked waiting for an event? */

+    uint8_t  running;                 /* currently scheduled on its CPU? */

+    uint64_aligned_t cpu_time;        /* total cpu time consumed (ns) */

+    uint32_t cpu;                     /* current mapping   */

+};

+typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);

+/* Get/set the NUMA node(s) with which the guest has affinity with. */

+/* XEN_DOMCTL_setnodeaffinity */

+/* XEN_DOMCTL_getnodeaffinity */

+struct xen_domctl_nodeaffinity {

+    struct xenctl_bitmap nodemap;/* IN */

+};

+typedef struct xen_domctl_nodeaffinity xen_domctl_nodeaffinity_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_nodeaffinity_t);

+/* Get/set which physical cpus a vcpu can execute on. */

+/* XEN_DOMCTL_setvcpuaffinity */

+/* XEN_DOMCTL_getvcpuaffinity */

+struct xen_domctl_vcpuaffinity {

+    uint32_t  vcpu;              /* IN */

+    struct xenctl_bitmap cpumap; /* IN/OUT */

+};

+typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t);

+/* XEN_DOMCTL_max_vcpus */

+struct xen_domctl_max_vcpus {

+    uint32_t max;           /* maximum number of vcpus */

+};

+typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);

+/* XEN_DOMCTL_scheduler_op */

+/* Scheduler types. */

+#define XEN_SCHEDULER_SEDF     4

+#define XEN_SCHEDULER_CREDIT   5

+#define XEN_SCHEDULER_CREDIT2  6

+#define XEN_SCHEDULER_ARINC653 7

+/* Set or get info? */

+#define XEN_DOMCTL_SCHEDOP_putinfo 0

+#define XEN_DOMCTL_SCHEDOP_getinfo 1

+struct xen_domctl_scheduler_op {

+    uint32_t sched_id;  /* XEN_SCHEDULER_* */

+    uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */

+    union {

+        struct xen_domctl_sched_sedf {

+            uint64_aligned_t period;

+            uint64_aligned_t slice;

+            uint64_aligned_t latency;

+            uint32_t extratime;

+            uint32_t weight;

+        } sedf;

+        struct xen_domctl_sched_credit {

+            uint16_t weight;

+            uint16_t cap;

+        } credit;

+        struct xen_domctl_sched_credit2 {

+            uint16_t weight;

+        } credit2;

+    } u;

+};

+typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t);

+/* XEN_DOMCTL_setdomainhandle */

+struct xen_domctl_setdomainhandle {

+    xen_domain_handle_t handle;

+};

+typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t);

+/* XEN_DOMCTL_setdebugging */

+struct xen_domctl_setdebugging {

+    uint8_t enable;

+};

+typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t);

+/* XEN_DOMCTL_irq_permission */

+struct xen_domctl_irq_permission {

+    uint8_t pirq;

+    uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */

+};

+typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);

+/* XEN_DOMCTL_iomem_permission */

+struct xen_domctl_iomem_permission {

+    uint64_aligned_t first_mfn;/* first page (physical page number) in range */

+    uint64_aligned_t nr_mfns;  /* number of pages in range (>0) */

+    uint8_t  allow_access;     /* allow (!0) or deny (0) access to range? */

+};

+typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);

+/* XEN_DOMCTL_ioport_permission */

+struct xen_domctl_ioport_permission {

+    uint32_t first_port;              /* first port int range */

+    uint32_t nr_ports;                /* size of port range */

+    uint8_t  allow_access;            /* allow or deny access to range? */

+};

+typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);

+/* XEN_DOMCTL_hypercall_init */

+struct xen_domctl_hypercall_init {

+    uint64_aligned_t  gmfn;           /* GMFN to be initialised */

+};

+typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);

+/* XEN_DOMCTL_arch_setup */

+#define _XEN_DOMAINSETUP_hvm_guest 0

+#define XEN_DOMAINSETUP_hvm_guest  (1UL<<_XEN_DOMAINSETUP_hvm_guest)

+#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save)  */

+#define XEN_DOMAINSETUP_query  (1UL<<_XEN_DOMAINSETUP_query)

+#define _XEN_DOMAINSETUP_sioemu_guest 2

+#define XEN_DOMAINSETUP_sioemu_guest  (1UL<<_XEN_DOMAINSETUP_sioemu_guest)

+typedef struct xen_domctl_arch_setup {

+    uint64_aligned_t flags;  /* XEN_DOMAINSETUP_* */

+} xen_domctl_arch_setup_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);

+/* XEN_DOMCTL_settimeoffset */

+struct xen_domctl_settimeoffset {

+    int32_t  time_offset_seconds; /* applied to domain wallclock time */

+};

+typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);

+/* XEN_DOMCTL_gethvmcontext */

+/* XEN_DOMCTL_sethvmcontext */

+typedef struct xen_domctl_hvmcontext {

+    uint32_t size; /* IN/OUT: size of buffer / bytes filled */

+    XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call

+                                        * gethvmcontext with NULL

+                                        * buffer to get size req'd */

+} xen_domctl_hvmcontext_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);

+/* XEN_DOMCTL_set_address_size */

+/* XEN_DOMCTL_get_address_size */

+typedef struct xen_domctl_address_size {

+    uint32_t size;

+} xen_domctl_address_size_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);

+/* XEN_DOMCTL_real_mode_area */

+struct xen_domctl_real_mode_area {

+    uint32_t log; /* log2 of Real Mode Area size */

+};

+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);

+/* XEN_DOMCTL_sendtrigger */

+#define XEN_DOMCTL_SENDTRIGGER_NMI    0

+#define XEN_DOMCTL_SENDTRIGGER_RESET  1

+#define XEN_DOMCTL_SENDTRIGGER_INIT   2

+#define XEN_DOMCTL_SENDTRIGGER_POWER  3

+#define XEN_DOMCTL_SENDTRIGGER_SLEEP  4

+struct xen_domctl_sendtrigger {

+    uint32_t  trigger;  /* IN */

+    uint32_t  vcpu;     /* IN */

+};

+typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);

+/* Assign PCI device to HVM guest. Sets up IOMMU structures. */

+/* XEN_DOMCTL_assign_device */

+/* XEN_DOMCTL_test_assign_device */

+/* XEN_DOMCTL_deassign_device */

+struct xen_domctl_assign_device {

+    uint32_t  machine_sbdf;   /* machine PCI ID of assigned device */

+};

+typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);

+/* Retrieve sibling devices infomation of machine_sbdf */

+/* XEN_DOMCTL_get_device_group */

+struct xen_domctl_get_device_group {

+    uint32_t  machine_sbdf;     /* IN */

+    uint32_t  max_sdevs;        /* IN */

+    uint32_t  num_sdevs;        /* OUT */

+    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */

+};

+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);

+/* Pass-through interrupts: bind real irq -> hvm devfn. */

+/* XEN_DOMCTL_bind_pt_irq */

+/* XEN_DOMCTL_unbind_pt_irq */

+typedef enum pt_irq_type_e {

+    PT_IRQ_TYPE_PCI,

+    PT_IRQ_TYPE_ISA,

+    PT_IRQ_TYPE_MSI,

+    PT_IRQ_TYPE_MSI_TRANSLATE,

+} pt_irq_type_t;

+struct xen_domctl_bind_pt_irq {

+    uint32_t machine_irq;

+    pt_irq_type_t irq_type;

+    uint32_t hvm_domid;

+    union {

+        struct {

+            uint8_t isa_irq;

+        } isa;

+        struct {

+            uint8_t bus;

+            uint8_t device;

+            uint8_t intx;

+        } pci;

+        struct {

+            uint8_t gvec;

+            uint32_t gflags;

+            uint64_aligned_t gtable;

+        } msi;

+    } u;

+};

+typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t);

+/* Bind machine I/O address range -> HVM address range. */

+/* XEN_DOMCTL_memory_mapping */

+#define DPCI_ADD_MAPPING         1

+#define DPCI_REMOVE_MAPPING      0

+struct xen_domctl_memory_mapping {

+    uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */

+    uint64_aligned_t first_mfn; /* first page (machine page) in range */

+    uint64_aligned_t nr_mfns;   /* number of pages in range (>0) */

+    uint32_t add_mapping;       /* add or remove mapping */

+    uint32_t padding;           /* padding for 64-bit aligned structure */

+};

+typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);

+/* Bind machine I/O port range -> HVM I/O port range. */

+/* XEN_DOMCTL_ioport_mapping */

+struct xen_domctl_ioport_mapping {

+    uint32_t first_gport;     /* first guest IO port*/

+    uint32_t first_mport;     /* first machine IO port */

+    uint32_t nr_ports;        /* size of port range */

+    uint32_t add_mapping;     /* add or remove mapping */

+};

+typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);

+/*

+ * Pin caching type of RAM space for x86 HVM domU.

+ */

+/* XEN_DOMCTL_pin_mem_cacheattr */

+/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */

+#define XEN_DOMCTL_MEM_CACHEATTR_UC  0

+#define XEN_DOMCTL_MEM_CACHEATTR_WC  1

+#define XEN_DOMCTL_MEM_CACHEATTR_WT  4

+#define XEN_DOMCTL_MEM_CACHEATTR_WP  5

+#define XEN_DOMCTL_MEM_CACHEATTR_WB  6

+#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7

+struct xen_domctl_pin_mem_cacheattr {

+    uint64_aligned_t start, end;

+    uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */

+};

+typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);

+/* XEN_DOMCTL_set_ext_vcpucontext */

+/* XEN_DOMCTL_get_ext_vcpucontext */

+struct xen_domctl_ext_vcpucontext {

+    /* IN: VCPU that this call applies to. */

+    uint32_t         vcpu;

+    /*

+     * SET: Size of struct (IN)

+     * GET: Size of struct (OUT, up to 128 bytes)

+     */

+    uint32_t         size;

+#if defined(__i386__) || defined(__x86_64__)

+    /* SYSCALL from 32-bit mode and SYSENTER callback information. */

+    /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */

+    uint64_aligned_t syscall32_callback_eip;

+    uint64_aligned_t sysenter_callback_eip;

+    uint16_t         syscall32_callback_cs;

+    uint16_t         sysenter_callback_cs;

+    uint8_t          syscall32_disables_events;

+    uint8_t          sysenter_disables_events;

+#if defined(__GNUC__)

+    union {

+        uint64_aligned_t mcg_cap;

+        struct hvm_vmce_vcpu vmce;

+    };

+#else

+    struct hvm_vmce_vcpu vmce;

+#endif

+#endif

+};

+typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);

+/*

+ * Set the target domain for a domain

+ */

+/* XEN_DOMCTL_set_target */

+struct xen_domctl_set_target {

+    domid_t target;

+};

+typedef struct xen_domctl_set_target xen_domctl_set_target_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);

+#if defined(__i386__) || defined(__x86_64__)

+# define XEN_CPUID_INPUT_UNUSED  0xFFFFFFFF

+/* XEN_DOMCTL_set_cpuid */

+struct xen_domctl_cpuid {

+  uint32_t input[2];

+  uint32_t eax;

+  uint32_t ebx;

+  uint32_t ecx;

+  uint32_t edx;

+};

+typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);

+#endif

+/* XEN_DOMCTL_subscribe */

+struct xen_domctl_subscribe {

+    uint32_t port; /* IN */

+};

+typedef struct xen_domctl_subscribe xen_domctl_subscribe_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t);

+/*

+ * Define the maximum machine address size which should be allocated

+ * to a guest.

+ */

+/* XEN_DOMCTL_set_machine_address_size */

+/* XEN_DOMCTL_get_machine_address_size */

+/*

+ * Do not inject spurious page faults into this domain.

+ */

+/* XEN_DOMCTL_suppress_spurious_page_faults */

+/* XEN_DOMCTL_debug_op */

+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF         0

+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON          1

+struct xen_domctl_debug_op {

+    uint32_t op;   /* IN */

+    uint32_t vcpu; /* IN */

+};

+typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);

+/*

+ * Request a particular record from the HVM context

+ */

+/* XEN_DOMCTL_gethvmcontext_partial */

+typedef struct xen_domctl_hvmcontext_partial {

+    uint32_t type;                      /* IN: Type of record required */

+    uint32_t instance;                  /* IN: Instance of that type */

+    XEN_GUEST_HANDLE_64(uint8) buffer;  /* OUT: buffer to write record into */

+} xen_domctl_hvmcontext_partial_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);

+/* XEN_DOMCTL_disable_migrate */

+typedef struct xen_domctl_disable_migrate {

+    uint32_t disable; /* IN: 1: disable migration and restore */

+} xen_domctl_disable_migrate_t;

+/* XEN_DOMCTL_gettscinfo */

+/* XEN_DOMCTL_settscinfo */

+struct xen_guest_tsc_info {

+    uint32_t tsc_mode;

+    uint32_t gtsc_khz;

+    uint32_t incarnation;

+    uint32_t pad;

+    uint64_aligned_t elapsed_nsec;

+};

+typedef struct xen_guest_tsc_info xen_guest_tsc_info_t;

+DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t);

+typedef struct xen_domctl_tsc_info {

+    XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */

+    xen_guest_tsc_info_t info; /* IN */

+} xen_domctl_tsc_info_t;

+/* XEN_DOMCTL_gdbsx_guestmemio      guest mem io */

+struct xen_domctl_gdbsx_memio {

+    /* IN */

+    uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */

+    uint64_aligned_t gva;    /* guest virtual address */

+    uint64_aligned_t uva;    /* user buffer virtual address */

+    uint32_t         len;    /* number of bytes to read/write */

+    uint8_t          gwr;    /* 0 = read from guest. 1 = write to guest */

+    /* OUT */

+    uint32_t         remain; /* bytes remaining to be copied */

+};

+/* XEN_DOMCTL_gdbsx_pausevcpu */

+/* XEN_DOMCTL_gdbsx_unpausevcpu */

+struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */

+    uint32_t         vcpu;         /* which vcpu */

+};

+/* XEN_DOMCTL_gdbsx_domstatus */

+struct xen_domctl_gdbsx_domstatus {

+    /* OUT */

+    uint8_t          paused;     /* is the domain paused */

+    uint32_t         vcpu_id;    /* any vcpu in an event? */

+    uint32_t         vcpu_ev;    /* if yes, what event? */

+};

+/*

+ * Memory event operations

+ */

+/* XEN_DOMCTL_mem_event_op */

+/*

+ * Domain memory paging

+ * Page memory in and out.

+ * Domctl interface to set up and tear down the

+ * pager<->hypervisor interface. Use XENMEM_paging_op*

+ * to perform per-page operations.

+ *

+ * The XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE domctl returns several

+ * non-standard error codes to indicate why paging could not be enabled:

+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest

+ * EMLINK - guest has iommu passthrough enabled

+ * EXDEV  - guest has PoD enabled

+ * EBUSY  - guest has or had paging enabled, ring buffer still active

+ */

+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING            1

+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE     0

+#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE    1

+/*

+ * Access permissions.

+ *

+ * As with paging, use the domctl for teardown/setup of the

+ * helper<->hypervisor interface.

+ *

+ * There are HVM hypercalls to set the per-page access permissions of every

+ * page in a domain.  When one of these permissions--independent, read,

+ * write, and execute--is violated, the VCPU is paused and a memory event

+ * is sent with what happened.  (See public/mem_event.h) .

+ *

+ * The memory event handler can then resume the VCPU and redo the access

+ * with a XENMEM_access_op_resume hypercall.

+ *

+ * The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE domctl returns several

+ * non-standard error codes to indicate why access could not be enabled:

+ * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest

+ * EBUSY  - guest has or had access enabled, ring buffer still active

+ */

+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS            2

+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE     0

+#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE    1

+/*

+ * Sharing ENOMEM helper.

+ *

+ * As with paging, use the domctl for teardown/setup of the

+ * helper<->hypervisor interface.

+ *

+ * If setup, this ring is used to communicate failed allocations

+ * in the unshare path. XENMEM_sharing_op_resume is used to wake up

+ * vcpus that could not unshare.

+ *

+ * Note that shring can be turned on (as per the domctl below)

+ * *without* this ring being setup.

+ */

+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING           3

+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE    0

+#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE   1

+/* Use for teardown/setup of helper<->hypervisor interface for paging,

+ * access and sharing.*/

+struct xen_domctl_mem_event_op {

+    uint32_t       op;           /* XEN_DOMCTL_MEM_EVENT_OP_*_* */

+    uint32_t       mode;         /* XEN_DOMCTL_MEM_EVENT_OP_* */

+    uint32_t port;              /* OUT: event channel for ring */

+};

+typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t);

+/*

+ * Memory sharing operations

+ */

+/* XEN_DOMCTL_mem_sharing_op.

+ * The CONTROL sub-domctl is used for bringup/teardown. */

+#define XEN_DOMCTL_MEM_SHARING_CONTROL          0

+struct xen_domctl_mem_sharing_op {

+    uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */

+    union {

+        uint8_t enable;                   /* CONTROL */

+    } u;

+};

+typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);

+struct xen_domctl_audit_p2m {

+    /* OUT error counts */

+    uint64_t orphans;

+    uint64_t m2p_bad;

+    uint64_t p2m_bad;

+};

+typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t);

+struct xen_domctl_set_virq_handler {

+    uint32_t virq; /* IN */

+};

+typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t);

+#if defined(__i386__) || defined(__x86_64__)

+/* XEN_DOMCTL_setvcpuextstate */

+/* XEN_DOMCTL_getvcpuextstate */

+struct xen_domctl_vcpuextstate {

+    /* IN: VCPU that this call applies to. */

+    uint32_t         vcpu;

+    /*

+     * SET: xfeature support mask of struct (IN)

+     * GET: xfeature support mask of struct (IN/OUT)

+     * xfeature mask is served as identifications of the saving format

+     * so that compatible CPUs can have a check on format to decide

+     * whether it can restore.

+     */

+    uint64_aligned_t         xfeature_mask;

+    /*

+     * SET: Size of struct (IN)

+     * GET: Size of struct (IN/OUT)

+     */

+    uint64_aligned_t         size;

+    XEN_GUEST_HANDLE_64(uint64) buffer;

+};

+typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t);

+#endif

+/* XEN_DOMCTL_set_access_required: sets whether a memory event listener

+ * must be present to handle page access events: if false, the page

+ * access will revert to full permissions if no one is listening;

+ *  */

+struct xen_domctl_set_access_required {

+    uint8_t access_required;

+};

+typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);

+struct xen_domctl_set_broken_page_p2m {

+    uint64_aligned_t pfn;

+};

+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);

+struct xen_domctl {

+    uint32_t cmd;

+#define XEN_DOMCTL_createdomain                   1

+#define XEN_DOMCTL_destroydomain                  2

+#define XEN_DOMCTL_pausedomain                    3

+#define XEN_DOMCTL_unpausedomain                  4

+#define XEN_DOMCTL_getdomaininfo                  5

+#define XEN_DOMCTL_getmemlist                     6

+#define XEN_DOMCTL_getpageframeinfo               7

+#define XEN_DOMCTL_getpageframeinfo2              8

+#define XEN_DOMCTL_setvcpuaffinity                9

+#define XEN_DOMCTL_shadow_op                     10

+#define XEN_DOMCTL_max_mem                       11

+#define XEN_DOMCTL_setvcpucontext                12

+#define XEN_DOMCTL_getvcpucontext                13

+#define XEN_DOMCTL_getvcpuinfo                   14

+#define XEN_DOMCTL_max_vcpus                     15

+#define XEN_DOMCTL_scheduler_op                  16

+#define XEN_DOMCTL_setdomainhandle               17

+#define XEN_DOMCTL_setdebugging                  18

+#define XEN_DOMCTL_irq_permission                19

+#define XEN_DOMCTL_iomem_permission              20

+#define XEN_DOMCTL_ioport_permission             21

+#define XEN_DOMCTL_hypercall_init                22

+#define XEN_DOMCTL_arch_setup                    23

+#define XEN_DOMCTL_settimeoffset                 24

+#define XEN_DOMCTL_getvcpuaffinity               25

+#define XEN_DOMCTL_real_mode_area                26

+#define XEN_DOMCTL_resumedomain                  27

+#define XEN_DOMCTL_sendtrigger                   28

+#define XEN_DOMCTL_subscribe                     29

+#define XEN_DOMCTL_gethvmcontext                 33

+#define XEN_DOMCTL_sethvmcontext                 34

+#define XEN_DOMCTL_set_address_size              35

+#define XEN_DOMCTL_get_address_size              36

+#define XEN_DOMCTL_assign_device                 37

+#define XEN_DOMCTL_bind_pt_irq                   38

+#define XEN_DOMCTL_memory_mapping                39

+#define XEN_DOMCTL_ioport_mapping                40

+#define XEN_DOMCTL_pin_mem_cacheattr             41

+#define XEN_DOMCTL_set_ext_vcpucontext           42

+#define XEN_DOMCTL_get_ext_vcpucontext           43

+#define XEN_DOMCTL_set_opt_feature               44 /* Obsolete IA64 only */

+#define XEN_DOMCTL_test_assign_device            45

+#define XEN_DOMCTL_set_target                    46

+#define XEN_DOMCTL_deassign_device               47

+#define XEN_DOMCTL_unbind_pt_irq                 48

+#define XEN_DOMCTL_set_cpuid                     49

+#define XEN_DOMCTL_get_device_group              50

+#define XEN_DOMCTL_set_machine_address_size      51

+#define XEN_DOMCTL_get_machine_address_size      52

+#define XEN_DOMCTL_suppress_spurious_page_faults 53

+#define XEN_DOMCTL_debug_op                      54

+#define XEN_DOMCTL_gethvmcontext_partial         55

+#define XEN_DOMCTL_mem_event_op                  56

+#define XEN_DOMCTL_mem_sharing_op                57

+#define XEN_DOMCTL_disable_migrate               58

+#define XEN_DOMCTL_gettscinfo                    59

+#define XEN_DOMCTL_settscinfo                    60

+#define XEN_DOMCTL_getpageframeinfo3             61

+#define XEN_DOMCTL_setvcpuextstate               62

+#define XEN_DOMCTL_getvcpuextstate               63

+#define XEN_DOMCTL_set_access_required           64

+#define XEN_DOMCTL_audit_p2m                     65

+#define XEN_DOMCTL_set_virq_handler              66

+#define XEN_DOMCTL_set_broken_page_p2m           67

+#define XEN_DOMCTL_setnodeaffinity               68

+#define XEN_DOMCTL_getnodeaffinity               69

+#define XEN_DOMCTL_gdbsx_guestmemio            1000

+#define XEN_DOMCTL_gdbsx_pausevcpu             1001

+#define XEN_DOMCTL_gdbsx_unpausevcpu           1002

+#define XEN_DOMCTL_gdbsx_domstatus             1003

+    uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */

+    domid_t  domain;

+    union {

+        struct xen_domctl_createdomain      createdomain;

+        struct xen_domctl_getdomaininfo     getdomaininfo;

+        struct xen_domctl_getmemlist        getmemlist;

+        struct xen_domctl_getpageframeinfo  getpageframeinfo;

+        struct xen_domctl_getpageframeinfo2 getpageframeinfo2;

+        struct xen_domctl_getpageframeinfo3 getpageframeinfo3;

+        struct xen_domctl_nodeaffinity      nodeaffinity;

+        struct xen_domctl_vcpuaffinity      vcpuaffinity;

+        struct xen_domctl_shadow_op         shadow_op;

+        struct xen_domctl_max_mem           max_mem;

+        struct xen_domctl_vcpucontext       vcpucontext;

+        struct xen_domctl_getvcpuinfo       getvcpuinfo;

+        struct xen_domctl_max_vcpus         max_vcpus;

+        struct xen_domctl_scheduler_op      scheduler_op;

+        struct xen_domctl_setdomainhandle   setdomainhandle;

+        struct xen_domctl_setdebugging      setdebugging;

+        struct xen_domctl_irq_permission    irq_permission;

+        struct xen_domctl_iomem_permission  iomem_permission;

+        struct xen_domctl_ioport_permission ioport_permission;

+        struct xen_domctl_hypercall_init    hypercall_init;

+        struct xen_domctl_arch_setup        arch_setup;

+        struct xen_domctl_settimeoffset     settimeoffset;

+        struct xen_domctl_disable_migrate   disable_migrate;

+        struct xen_domctl_tsc_info          tsc_info;

+        struct xen_domctl_real_mode_area    real_mode_area;

+        struct xen_domctl_hvmcontext        hvmcontext;

+        struct xen_domctl_hvmcontext_partial hvmcontext_partial;

+        struct xen_domctl_address_size      address_size;

+        struct xen_domctl_sendtrigger       sendtrigger;

+        struct xen_domctl_get_device_group  get_device_group;

+        struct xen_domctl_assign_device     assign_device;

+        struct xen_domctl_bind_pt_irq       bind_pt_irq;

+        struct xen_domctl_memory_mapping    memory_mapping;

+        struct xen_domctl_ioport_mapping    ioport_mapping;

+        struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;

+        struct xen_domctl_ext_vcpucontext   ext_vcpucontext;

+        struct xen_domctl_set_target        set_target;

+        struct xen_domctl_subscribe         subscribe;

+        struct xen_domctl_debug_op          debug_op;

+        struct xen_domctl_mem_event_op      mem_event_op;

+        struct xen_domctl_mem_sharing_op    mem_sharing_op;

+#if defined(__i386__) || defined(__x86_64__)

+        struct xen_domctl_cpuid             cpuid;

+        struct xen_domctl_vcpuextstate      vcpuextstate;

+#endif

+        struct xen_domctl_set_access_required access_required;

+        struct xen_domctl_audit_p2m         audit_p2m;

+        struct xen_domctl_set_virq_handler  set_virq_handler;

+        struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;

+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;

+        struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;

+        struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;

+        uint8_t                             pad[128];

+    } u;

+};

+typedef struct xen_domctl xen_domctl_t;

+DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);

+#endif /* __XEN_PUBLIC_DOMCTL_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/elfnote.h

@@ -1,0 +1,271 @@

+/******************************************************************************

+ * elfnote.h

+ *

+ * Definitions used for the Xen ELF notes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2006, Ian Campbell, XenSource Ltd.

+ */

+#ifndef __XEN_PUBLIC_ELFNOTE_H__

+#define __XEN_PUBLIC_ELFNOTE_H__

+/*

+ * `incontents 200 elfnotes ELF notes

+ *

+ * The notes should live in a PT_NOTE segment and have "Xen" in the

+ * name field.

+ *

+ * Numeric types are either 4 or 8 bytes depending on the content of

+ * the desc field.

+ *

+ * LEGACY indicated the fields in the legacy __xen_guest string which

+ * this a note type replaces.

+ *

+ * String values (for non-legacy) are NULL terminated ASCII, also known

+ * as ASCIZ type.

+ */

+/*

+ * NAME=VALUE pair (string).

+ */

+#define XEN_ELFNOTE_INFO           0

+/*

+ * The virtual address of the entry point (numeric).

+ *

+ * LEGACY: VIRT_ENTRY

+ */

+#define XEN_ELFNOTE_ENTRY          1

+/* The virtual address of the hypercall transfer page (numeric).

+ *

+ * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page

+ * number not a virtual address)

+ */

+#define XEN_ELFNOTE_HYPERCALL_PAGE 2

+/* The virtual address where the kernel image should be mapped (numeric).

+ *

+ * Defaults to 0.

+ *

+ * LEGACY: VIRT_BASE

+ */

+#define XEN_ELFNOTE_VIRT_BASE      3

+/*

+ * The offset of the ELF paddr field from the acutal required

+ * psuedo-physical address (numeric).

+ *

+ * This is used to maintain backwards compatibility with older kernels

+ * which wrote __PAGE_OFFSET into that field. This field defaults to 0

+ * if not present.

+ *

+ * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)

+ */

+#define XEN_ELFNOTE_PADDR_OFFSET   4

+/*

+ * The version of Xen that we work with (string).

+ *

+ * LEGACY: XEN_VER

+ */

+#define XEN_ELFNOTE_XEN_VERSION    5

+/*

+ * The name of the guest operating system (string).

+ *

+ * LEGACY: GUEST_OS

+ */

+#define XEN_ELFNOTE_GUEST_OS       6

+/*

+ * The version of the guest operating system (string).

+ *

+ * LEGACY: GUEST_VER

+ */

+#define XEN_ELFNOTE_GUEST_VERSION  7

+/*

+ * The loader type (string).

+ *

+ * LEGACY: LOADER

+ */

+#define XEN_ELFNOTE_LOADER         8

+/*

+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or

+ * "bimodal").

+ *

+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting

+ * may be given as "yes,bimodal" which will cause older Xen to treat

+ * this kernel as PAE.

+ *

+ * LEGACY: PAE (n.b. The legacy interface included a provision to

+ * indicate 'extended-cr3' support allowing L3 page tables to be

+ * placed above 4G. It is assumed that any kernel new enough to use

+ * these ELF notes will include this and therefore "yes" here is

+ * equivalent to "yes[entended-cr3]" in the __xen_guest interface.

+ */

+#define XEN_ELFNOTE_PAE_MODE       9

+/*

+ * The features supported/required by this kernel (string).

+ *

+ * The string must consist of a list of feature names (as given in

+ * features.h, without the "XENFEAT_" prefix) separated by '|'

+ * characters. If a feature is required for the kernel to function

+ * then the feature name must be preceded by a '!' character.

+ *

+ * LEGACY: FEATURES

+ */

+#define XEN_ELFNOTE_FEATURES      10

+/*

+ * The kernel requires the symbol table to be loaded (string = "yes" or "no")

+ * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence

+ * of this string as a boolean flag rather than requiring "yes" or

+ * "no".

+ */

+#define XEN_ELFNOTE_BSD_SYMTAB    11

+/*

+ * The lowest address the hypervisor hole can begin at (numeric).

+ *

+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence

+ * also indicates to the hypervisor that the kernel can deal with the

+ * hole starting at a higher address.

+ */

+#define XEN_ELFNOTE_HV_START_LOW  12

+/*

+ * List of maddr_t-sized mask/value pairs describing how to recognize

+ * (non-present) L1 page table entries carrying valid MFNs (numeric).

+ */

+#define XEN_ELFNOTE_L1_MFN_VALID  13

+/*

+ * Whether or not the guest supports cooperative suspend cancellation.

+ * This is a numeric value.

+ *

+ * Default is 0

+ */

+#define XEN_ELFNOTE_SUSPEND_CANCEL 14

+/*

+ * The (non-default) location the initial phys-to-machine map should be

+ * placed at by the hypervisor (Dom0) or the tools (DomU).

+ * The kernel must be prepared for this mapping to be established using

+ * large pages, despite such otherwise not being available to guests.

+ * The kernel must also be able to handle the page table pages used for

+ * this mapping not being accessible through the initial mapping.

+ * (Only x86-64 supports this at present.)

+ */

+#define XEN_ELFNOTE_INIT_P2M      15

+/*

+ * Whether or not the guest can deal with being passed an initrd not

+ * mapped through its initial page tables.

+ */

+#define XEN_ELFNOTE_MOD_START_PFN 16

+/*

+ * The features supported by this kernel (numeric).

+ *

+ * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a

+ * kernel to specify support for features that older hypervisors don't

+ * know about. The set of features 4.2 and newer hypervisors will

+ * consider supported by the kernel is the combination of the sets

+ * specified through this and the string note.

+ *

+ * LEGACY: FEATURES

+ */

+#define XEN_ELFNOTE_SUPPORTED_FEATURES 17

+/*

+ * The number of the highest elfnote defined.

+ */

+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES

+/*

+ * System information exported through crash notes.

+ *

+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO

+ * note in case of a system crash. This note will contain various

+ * information about the system, see xen/include/xen/elfcore.h.

+ */

+#define XEN_ELFNOTE_CRASH_INFO 0x1000001

+/*

+ * System registers exported through crash notes.

+ *

+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS

+ * note per cpu in case of a system crash. This note is architecture

+ * specific and will contain registers not saved in the "CORE" note.

+ * See xen/include/xen/elfcore.h for more information.

+ */

+#define XEN_ELFNOTE_CRASH_REGS 0x1000002

+/*

+ * xen dump-core none note.

+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE

+ * in its dump file to indicate that the file is xen dump-core

+ * file. This note doesn't have any other information.

+ * See tools/libxc/xc_core.h for more information.

+ */

+#define XEN_ELFNOTE_DUMPCORE_NONE               0x2000000

+/*

+ * xen dump-core header note.

+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER

+ * in its dump file.

+ * See tools/libxc/xc_core.h for more information.

+ */

+#define XEN_ELFNOTE_DUMPCORE_HEADER             0x2000001

+/*

+ * xen dump-core xen version note.

+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION

+ * in its dump file. It contains the xen version obtained via the

+ * XENVER hypercall.

+ * See tools/libxc/xc_core.h for more information.

+ */

+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION        0x2000002

+/*

+ * xen dump-core format version note.

+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION

+ * in its dump file. It contains a format version identifier.

+ * See tools/libxc/xc_core.h for more information.

+ */

+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION     0x2000003

+#endif /* __XEN_PUBLIC_ELFNOTE_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/event_channel.h

@@ -1,0 +1,294 @@

+/******************************************************************************

+ * event_channel.h

+ *

+ * Event channels between domains.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2003-2004, K A Fraser.

+ */

+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__

+#define __XEN_PUBLIC_EVENT_CHANNEL_H__

+#include "xen.h"

+/*

+ * `incontents 150 evtchn Event Channels

+ *

+ * Event channels are the basic primitive provided by Xen for event

+ * notifications. An event is the Xen equivalent of a hardware

+ * interrupt. They essentially store one bit of information, the event

+ * of interest is signalled by transitioning this bit from 0 to 1.

+ *

+ * Notifications are received by a guest via an upcall from Xen,

+ * indicating when an event arrives (setting the bit). Further

+ * notifications are masked until the bit is cleared again (therefore,

+ * guests must check the value of the bit after re-enabling event

+ * delivery to ensure no missed notifications).

+ *

+ * Event notifications can be masked by setting a flag; this is

+ * equivalent to disabling interrupts and can be used to ensure

+ * atomicity of certain operations in the guest kernel.

+ *

+ * Event channels are represented by the evtchn_* fields in

+ * struct shared_info and struct vcpu_info.

+ */

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args)

+ * `

+ * @cmd  == EVTCHNOP_* (event-channel operation).

+ * @args == struct evtchn_* Operation-specific extra arguments (NULL if none).

+ */

+/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */

+#define EVTCHNOP_bind_interdomain 0

+#define EVTCHNOP_bind_virq        1

+#define EVTCHNOP_bind_pirq        2

+#define EVTCHNOP_close            3

+#define EVTCHNOP_send             4

+#define EVTCHNOP_status           5

+#define EVTCHNOP_alloc_unbound    6

+#define EVTCHNOP_bind_ipi         7

+#define EVTCHNOP_bind_vcpu        8

+#define EVTCHNOP_unmask           9

+#define EVTCHNOP_reset           10

+/* ` } */

+typedef uint32_t evtchn_port_t;

+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);

+/*

+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as

+ * accepting interdomain bindings from domain <remote_dom>. A fresh port

+ * is allocated in <dom> and returned as <port>.

+ * NOTES:

+ *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.

+ *  2. <rdom> may be DOMID_SELF, allowing loopback connections.

+ */

+struct evtchn_alloc_unbound {

+    /* IN parameters */

+    domid_t dom, remote_dom;

+    /* OUT parameters */

+    evtchn_port_t port;

+};

+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;

+/*

+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between

+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify

+ * a port that is unbound and marked as accepting bindings from the calling

+ * domain. A fresh port is allocated in the calling domain and returned as

+ * <local_port>.

+ * NOTES:

+ *  1. <remote_dom> may be DOMID_SELF, allowing loopback connections.

+ */

+struct evtchn_bind_interdomain {

+    /* IN parameters. */

+    domid_t remote_dom;

+    evtchn_port_t remote_port;

+    /* OUT parameters. */

+    evtchn_port_t local_port;

+};

+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;

+/*

+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified

+ * vcpu.

+ * NOTES:

+ *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list

+ *     in xen.h for the classification of each VIRQ.

+ *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be

+ *     re-bound via EVTCHNOP_bind_vcpu.

+ *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.

+ *     The allocated event channel is bound to the specified vcpu and the

+ *     binding cannot be changed.

+ */

+struct evtchn_bind_virq {

+    /* IN parameters. */

+    uint32_t virq; /* enum virq */

+    uint32_t vcpu;

+    /* OUT parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_bind_virq evtchn_bind_virq_t;

+/*

+ * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>).

+ * NOTES:

+ *  1. A physical IRQ may be bound to at most one event channel per domain.

+ *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.

+ */

+struct evtchn_bind_pirq {

+    /* IN parameters. */

+    uint32_t pirq;

+#define BIND_PIRQ__WILL_SHARE 1

+    uint32_t flags; /* BIND_PIRQ__* */

+    /* OUT parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;

+/*

+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.

+ * NOTES:

+ *  1. The allocated event channel is bound to the specified vcpu. The binding

+ *     may not be changed.

+ */

+struct evtchn_bind_ipi {

+    uint32_t vcpu;

+    /* OUT parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;

+/*

+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is

+ * interdomain then the remote end is placed in the unbound state

+ * (EVTCHNSTAT_unbound), awaiting a new connection.

+ */

+struct evtchn_close {

+    /* IN parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_close evtchn_close_t;

+/*

+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local

+ * endpoint is <port>.

+ */

+struct evtchn_send {

+    /* IN parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_send evtchn_send_t;

+/*

+ * EVTCHNOP_status: Get the current status of the communication channel which

+ * has an endpoint at <dom, port>.

+ * NOTES:

+ *  1. <dom> may be specified as DOMID_SELF.

+ *  2. Only a sufficiently-privileged domain may obtain the status of an event

+ *     channel for which <dom> is not DOMID_SELF.

+ */

+struct evtchn_status {

+    /* IN parameters */

+    domid_t  dom;

+    evtchn_port_t port;

+    /* OUT parameters */

+#define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */

+#define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/

+#define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */

+#define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */

+#define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */

+#define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */

+    uint32_t status;

+    uint32_t vcpu;                 /* VCPU to which this channel is bound.   */

+    union {

+        struct {

+            domid_t dom;

+        } unbound;                 /* EVTCHNSTAT_unbound */

+        struct {

+            domid_t dom;

+            evtchn_port_t port;

+        } interdomain;             /* EVTCHNSTAT_interdomain */

+        uint32_t pirq;             /* EVTCHNSTAT_pirq        */

+        uint32_t virq;             /* EVTCHNSTAT_virq        */

+    } u;

+};

+typedef struct evtchn_status evtchn_status_t;

+/*

+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an

+ * event is pending.

+ * NOTES:

+ *  1. IPI-bound channels always notify the vcpu specified at bind time.

+ *     This binding cannot be changed.

+ *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.

+ *     This binding cannot be changed.

+ *  3. All other channels notify vcpu0 by default. This default is set when

+ *     the channel is allocated (a port that is freed and subsequently reused

+ *     has its binding reset to vcpu0).

+ */

+struct evtchn_bind_vcpu {

+    /* IN parameters. */

+    evtchn_port_t port;

+    uint32_t vcpu;

+};

+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;

+/*

+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver

+ * a notification to the appropriate VCPU if an event is pending.

+ */

+struct evtchn_unmask {

+    /* IN parameters. */

+    evtchn_port_t port;

+};

+typedef struct evtchn_unmask evtchn_unmask_t;

+/*

+ * EVTCHNOP_reset: Close all event channels associated with specified domain.

+ * NOTES:

+ *  1. <dom> may be specified as DOMID_SELF.

+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.

+ */

+struct evtchn_reset {

+    /* IN parameters. */

+    domid_t dom;

+};

+typedef struct evtchn_reset evtchn_reset_t;

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)

+ * `

+ * Superceded by new event_channel_op() hypercall since 0x00030202.

+ */

+struct evtchn_op {

+    uint32_t cmd; /* enum event_channel_op */

+    union {

+        struct evtchn_alloc_unbound    alloc_unbound;

+        struct evtchn_bind_interdomain bind_interdomain;

+        struct evtchn_bind_virq        bind_virq;

+        struct evtchn_bind_pirq        bind_pirq;

+        struct evtchn_bind_ipi         bind_ipi;

+        struct evtchn_close            close;

+        struct evtchn_send             send;

+        struct evtchn_status           status;

+        struct evtchn_bind_vcpu        bind_vcpu;

+        struct evtchn_unmask           unmask;

+    } u;

+};

+typedef struct evtchn_op evtchn_op_t;

+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);

+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/features.h

@@ -1,0 +1,109 @@

+/******************************************************************************

+ * features.h

+ *

+ * Feature flags, reported by XENVER_get_features.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2006, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_FEATURES_H__

+#define __XEN_PUBLIC_FEATURES_H__

+/*

+ * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES

+ *

+ * The list of all the features the guest supports. They are set by

+ * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES

+ * string. The format is the  feature names (as given here without the

+ * "XENFEAT_" prefix) separated by '|' characters.

+ * If a feature is required for the kernel to function then the feature name

+ * must be preceded by a '!' character.

+ *

+ * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the

+ * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0,

+ */

+/*

+ * If set, the guest does not need to write-protect its pagetables, and can

+ * update them via direct writes.

+ */

+#define XENFEAT_writable_page_tables       0

+/*

+ * If set, the guest does not need to write-protect its segment descriptor

+ * tables, and can update them via direct writes.

+ */

+#define XENFEAT_writable_descriptor_tables 1

+/*

+ * If set, translation between the guest's 'pseudo-physical' address space

+ * and the host's machine address space are handled by the hypervisor. In this

+ * mode the guest does not need to perform phys-to/from-machine translations

+ * when performing page table operations.

+ */

+#define XENFEAT_auto_translated_physmap    2

+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */

+#define XENFEAT_supervisor_mode_kernel     3

+/*

+ * If set, the guest does not need to allocate x86 PAE page directories

+ * below 4GB. This flag is usually implied by auto_translated_physmap.

+ */

+#define XENFEAT_pae_pgdir_above_4gb        4

+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */

+#define XENFEAT_mmu_pt_update_preserve_ad  5

+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */

+#define XENFEAT_highmem_assist             6

+/*

+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel

+ * available pte bits.

+ */

+#define XENFEAT_gnttab_map_avail_bits      7

+/* x86: Does this Xen host support the HVM callback vector type? */

+#define XENFEAT_hvm_callback_vector        8

+/* x86: pvclock algorithm is safe to use on HVM */

+#define XENFEAT_hvm_safe_pvclock           9

+/* x86: pirq can be used by HVM guests */

+#define XENFEAT_hvm_pirqs                 10

+/* operation as Dom0 is supported */

+#define XENFEAT_dom0                      11

+#define XENFEAT_NR_SUBMAPS 1

+#endif /* __XEN_PUBLIC_FEATURES_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/gcov.h

@@ -1,0 +1,115 @@

+/******************************************************************************

+ * gcov.h

+ *

+ * Coverage structures exported by Xen.

+ * Structure is different from Gcc one.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2013, Citrix Systems R&D Ltd.

+ */

+#ifndef __XEN_PUBLIC_GCOV_H__

+#define __XEN_PUBLIC_GCOV_H__ __XEN_PUBLIC_GCOV_H__

+#define XENCOV_COUNTERS         5

+#define XENCOV_TAG_BASE         0x58544300u

+#define XENCOV_TAG_FILE         (XENCOV_TAG_BASE+0x46u)

+#define XENCOV_TAG_FUNC         (XENCOV_TAG_BASE+0x66u)

+#define XENCOV_TAG_COUNTER(n)   (XENCOV_TAG_BASE+0x30u+((n)&0xfu))

+#define XENCOV_TAG_END          (XENCOV_TAG_BASE+0x2eu)

+#define XENCOV_IS_TAG_COUNTER(n) \

+    ((n) >= XENCOV_TAG_COUNTER(0) && (n) < XENCOV_TAG_COUNTER(XENCOV_COUNTERS))

+#define XENCOV_COUNTER_NUM(n) ((n)-XENCOV_TAG_COUNTER(0))

+/*

+ * The main structure for the blob is

+ * BLOB := FILE.. END

+ * FILE := TAG_FILE VERSION STAMP FILENAME COUNTERS FUNCTIONS

+ * FILENAME := LEN characters

+ *   characters are padded to 32 bit

+ * LEN := 32 bit value

+ * COUNTERS := TAG_COUNTER(n) NUM COUNTER..

+ * NUM := 32 bit valie

+ * COUNTER := 64 bit value

+ * FUNCTIONS := TAG_FUNC NUM FUNCTION..

+ * FUNCTION := IDENT CHECKSUM NUM_COUNTERS

+ *

+ * All tagged structures are aligned to 8 bytes

+ */

+/**

+ * File information

+ * Prefixed with XENCOV_TAG_FILE and a string with filename

+ * Aligned to 8 bytes

+ */

+struct xencov_file

+{

+    uint32_t tag; /* XENCOV_TAG_FILE */

+    uint32_t version;

+    uint32_t stamp;

+    uint32_t fn_len;

+    char filename[1];

+};

+/**

+ * Counters information

+ * Prefixed with XENCOV_TAG_COUNTER(n) where n is 0..(XENCOV_COUNTERS-1)

+ * Aligned to 8 bytes

+ */

+struct xencov_counter

+{

+    uint32_t tag; /* XENCOV_TAG_COUNTER(n) */

+    uint32_t num;

+    uint64_t values[1];

+};

+/**

+ * Information for each function

+ * Number of counter is equal to the number of counter structures got before

+ */

+struct xencov_function

+{

+    uint32_t ident;

+    uint32_t checksum;

+    uint32_t num_counters[1];

+};

+/**

+ * Information for all functions

+ * Aligned to 8 bytes

+ */

+struct xencov_functions

+{

+    uint32_t tag; /* XENCOV_TAG_FUNC */

+    uint32_t num;

+    struct xencov_function xencov_function[1];

+};

+/**

+ * Terminator

+ */

+struct xencov_end

+{

+    uint32_t tag; /* XENCOV_TAG_END */

+};

+#endif /* __XEN_PUBLIC_GCOV_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/grant_table.h

@@ -1,0 +1,662 @@

+/******************************************************************************

+ * grant_table.h

+ *

+ * Interface for granting foreign access to page frames, and receiving

+ * page-ownership transfers.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004, K A Fraser

+ */

+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__

+#define __XEN_PUBLIC_GRANT_TABLE_H__

+#include "xen.h"

+/*

+ * `incontents 150 gnttab Grant Tables

+ *

+ * Xen's grant tables provide a generic mechanism to memory sharing

+ * between domains. This shared memory interface underpins the split

+ * device drivers for block and network IO.

+ *

+ * Each domain has its own grant table. This is a data structure that

+ * is shared with Xen; it allows the domain to tell Xen what kind of

+ * permissions other domains have on its pages. Entries in the grant

+ * table are identified by grant references. A grant reference is an

+ * integer, which indexes into the grant table. It acts as a

+ * capability which the grantee can use to perform operations on the

+ * granter’s memory.

+ *

+ * This capability-based system allows shared-memory communications

+ * between unprivileged domains. A grant reference also encapsulates

+ * the details of a shared page, removing the need for a domain to

+ * know the real machine address of a page it is sharing. This makes

+ * it possible to share memory correctly with domains running in

+ * fully virtualised memory.

+ */

+/***********************************

+ * GRANT TABLE REPRESENTATION

+ */

+/* Some rough guidelines on accessing and updating grant-table entries

+ * in a concurrency-safe manner. For more information, Linux contains a

+ * reference implementation for guest OSes (drivers/xen/grant_table.c, see

+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD

+ *

+ * NB. WMB is a no-op on current-generation x86 processors. However, a

+ *     compiler barrier will still be required.

+ *

+ * Introducing a valid entry into the grant table:

+ *  1. Write ent->domid.

+ *  2. Write ent->frame:

+ *      GTF_permit_access:   Frame to which access is permitted.

+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new

+ *                           frame, or zero if none.

+ *  3. Write memory barrier (WMB).

+ *  4. Write ent->flags, inc. valid type.

+ *

+ * Invalidating an unused GTF_permit_access entry:

+ *  1. flags = ent->flags.

+ *  2. Observe that !(flags & (GTF_reading|GTF_writing)).

+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).

+ *  NB. No need for WMB as reuse of entry is control-dependent on success of

+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.

+ *

+ * Invalidating an in-use GTF_permit_access entry:

+ *  This cannot be done directly. Request assistance from the domain controller

+ *  which can set a timeout on the use of a grant entry and take necessary

+ *  action. (NB. This is not yet implemented!).

+ *

+ * Invalidating an unused GTF_accept_transfer entry:

+ *  1. flags = ent->flags.

+ *  2. Observe that !(flags & GTF_transfer_committed). [*]

+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).

+ *  NB. No need for WMB as reuse of entry is control-dependent on success of

+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.

+ *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.

+ *      The guest must /not/ modify the grant entry until the address of the

+ *      transferred frame is written. It is safe for the guest to spin waiting

+ *      for this to occur (detect by observing GTF_transfer_completed in

+ *      ent->flags).

+ *

+ * Invalidating a committed GTF_accept_transfer entry:

+ *  1. Wait for (ent->flags & GTF_transfer_completed).

+ *

+ * Changing a GTF_permit_access from writable to read-only:

+ *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.

+ *

+ * Changing a GTF_permit_access from read-only to writable:

+ *  Use SMP-safe bit-setting instruction.

+ */

+/*

+ * Reference to a grant entry in a specified domain's grant table.

+ */

+typedef uint32_t grant_ref_t;

+/*

+ * A grant table comprises a packed array of grant entries in one or more

+ * page frames shared between Xen and a guest.

+ * [XEN]: This field is written by Xen and read by the sharing guest.

+ * [GST]: This field is written by the guest and read by Xen.

+ */

+/*

+ * Version 1 of the grant table entry structure is maintained purely

+ * for backwards compatibility.  New guests should use version 2.

+ */

+#if __XEN_INTERFACE_VERSION__ < 0x0003020a

+#define grant_entry_v1 grant_entry

+#define grant_entry_v1_t grant_entry_t

+#endif

+struct grant_entry_v1 {

+    /* GTF_xxx: various type and flag information.  [XEN,GST] */

+    uint16_t flags;

+    /* The domain being granted foreign privileges. [GST] */

+    domid_t  domid;

+    /*

+     * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]

+     * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]

+     */

+    uint32_t frame;

+};

+typedef struct grant_entry_v1 grant_entry_v1_t;

+/* The first few grant table entries will be preserved across grant table

+ * version changes and may be pre-populated at domain creation by tools.

+ */

+#define GNTTAB_NR_RESERVED_ENTRIES     8

+#define GNTTAB_RESERVED_CONSOLE        0

+#define GNTTAB_RESERVED_XENSTORE       1

+/*

+ * Type of grant entry.

+ *  GTF_invalid: This grant entry grants no privileges.

+ *  GTF_permit_access: Allow @domid to map/access @frame.

+ *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame

+ *                       to this guest. Xen writes the page number to @frame.

+ *  GTF_transitive: Allow @domid to transitively access a subrange of

+ *                  @trans_grant in @trans_domid.  No mappings are allowed.

+ */

+#define GTF_invalid         (0U<<0)

+#define GTF_permit_access   (1U<<0)

+#define GTF_accept_transfer (2U<<0)

+#define GTF_transitive      (3U<<0)

+#define GTF_type_mask       (3U<<0)

+/*

+ * Subflags for GTF_permit_access.

+ *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]

+ *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]

+ *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]

+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]

+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid

+ *                will only be allowed to copy from the grant, and not

+ *                map it. [GST]

+ */

+#define _GTF_readonly       (2)

+#define GTF_readonly        (1U<<_GTF_readonly)

+#define _GTF_reading        (3)

+#define GTF_reading         (1U<<_GTF_reading)

+#define _GTF_writing        (4)

+#define GTF_writing         (1U<<_GTF_writing)

+#define _GTF_PWT            (5)

+#define GTF_PWT             (1U<<_GTF_PWT)

+#define _GTF_PCD            (6)

+#define GTF_PCD             (1U<<_GTF_PCD)

+#define _GTF_PAT            (7)

+#define GTF_PAT             (1U<<_GTF_PAT)

+#define _GTF_sub_page       (8)

+#define GTF_sub_page        (1U<<_GTF_sub_page)

+/*

+ * Subflags for GTF_accept_transfer:

+ *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed

+ *      to transferring ownership of a page frame. When a guest sees this flag

+ *      it must /not/ modify the grant entry until GTF_transfer_completed is

+ *      set by Xen.

+ *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag

+ *      after reading GTF_transfer_committed. Xen will always write the frame

+ *      address, followed by ORing this flag, in a timely manner.

+ */

+#define _GTF_transfer_committed (2)

+#define GTF_transfer_committed  (1U<<_GTF_transfer_committed)

+#define _GTF_transfer_completed (3)

+#define GTF_transfer_completed  (1U<<_GTF_transfer_completed)

+/*

+ * Version 2 grant table entries.  These fulfil the same role as

+ * version 1 entries, but can represent more complicated operations.

+ * Any given domain will have either a version 1 or a version 2 table,

+ * and every entry in the table will be the same version.

+ *

+ * The interface by which domains use grant references does not depend

+ * on the grant table version in use by the other domain.

+ */

+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a

+/*

+ * Version 1 and version 2 grant entries share a common prefix.  The

+ * fields of the prefix are documented as part of struct

+ * grant_entry_v1.

+ */

+struct grant_entry_header {

+    uint16_t flags;

+    domid_t  domid;

+};

+typedef struct grant_entry_header grant_entry_header_t;

+/*

+ * Version 2 of the grant entry structure.

+ */

+union grant_entry_v2 {

+    grant_entry_header_t hdr;

+    /*

+     * This member is used for V1-style full page grants, where either:

+     *

+     * -- hdr.type is GTF_accept_transfer, or

+     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.

+     *

+     * In that case, the frame field has the same semantics as the

+     * field of the same name in the V1 entry structure.

+     */

+    struct {

+        grant_entry_header_t hdr;

+        uint32_t pad0;

+        uint64_t frame;

+    } full_page;

+    /*

+     * If the grant type is GTF_grant_access and GTF_sub_page is set,

+     * @domid is allowed to access bytes [@page_off,@page_off+@length)

+     * in frame @frame.

+     */

+    struct {

+        grant_entry_header_t hdr;

+        uint16_t page_off;

+        uint16_t length;

+        uint64_t frame;

+    } sub_page;

+    /*

+     * If the grant is GTF_transitive, @domid is allowed to use the

+     * grant @gref in domain @trans_domid, as if it was the local

+     * domain.  Obviously, the transitive access must be compatible

+     * with the original grant.

+     *

+     * The current version of Xen does not allow transitive grants

+     * to be mapped.

+     */

+    struct {

+        grant_entry_header_t hdr;

+        domid_t trans_domid;

+        uint16_t pad0;

+        grant_ref_t gref;

+    } transitive;

+    uint32_t __spacer[4]; /* Pad to a power of two */

+};

+typedef union grant_entry_v2 grant_entry_v2_t;

+typedef uint16_t grant_status_t;

+#endif /* __XEN_INTERFACE_VERSION__ */

+/***********************************

+ * GRANT TABLE QUERIES AND USES

+ */

+/* ` enum neg_errnoval

+ * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd,

+ * `                           void *args,

+ * `                           unsigned int count)

+ * `

+ *

+ * @args points to an array of a per-command data structure. The array

+ * has @count members

+ */

+/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */

+#define GNTTABOP_map_grant_ref        0

+#define GNTTABOP_unmap_grant_ref      1

+#define GNTTABOP_setup_table          2

+#define GNTTABOP_dump_table           3

+#define GNTTABOP_transfer             4

+#define GNTTABOP_copy                 5

+#define GNTTABOP_query_size           6

+#define GNTTABOP_unmap_and_replace    7

+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a

+#define GNTTABOP_set_version          8

+#define GNTTABOP_get_status_frames    9

+#define GNTTABOP_get_version          10

+#define GNTTABOP_swap_grant_ref	      11

+#endif /* __XEN_INTERFACE_VERSION__ */

+/* ` } */

+/*

+ * Handle to track a mapping created via a grant reference.

+ */

+typedef uint32_t grant_handle_t;

+/*

+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access

+ * by devices and/or host CPUs. If successful, <handle> is a tracking number

+ * that must be presented later to destroy the mapping(s). On error, <handle>

+ * is a negative status code.

+ * NOTES:

+ *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address

+ *     via which I/O devices may access the granted frame.

+ *  2. If GNTMAP_host_map is specified then a mapping will be added at

+ *     either a host virtual address in the current address space, or at

+ *     a PTE at the specified machine address.  The type of mapping to

+ *     perform is selected through the GNTMAP_contains_pte flag, and the

+ *     address is specified in <host_addr>.

+ *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a

+ *     host mapping is destroyed by other means then it is *NOT* guaranteed

+ *     to be accounted to the correct grant reference!

+ */

+struct gnttab_map_grant_ref {

+    /* IN parameters. */

+    uint64_t host_addr;

+    uint32_t flags;               /* GNTMAP_* */

+    grant_ref_t ref;

+    domid_t  dom;

+    /* OUT parameters. */

+    int16_t  status;              /* => enum grant_status */

+    grant_handle_t handle;

+    uint64_t dev_bus_addr;

+};

+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);

+/*

+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings

+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that

+ * field is ignored. If non-zero, they must refer to a device/host mapping

+ * that is tracked by <handle>

+ * NOTES:

+ *  1. The call may fail in an undefined manner if either mapping is not

+ *     tracked by <handle>.

+ *  3. After executing a batch of unmaps, it is guaranteed that no stale

+ *     mappings will remain in the device or host TLBs.

+ */

+struct gnttab_unmap_grant_ref {

+    /* IN parameters. */

+    uint64_t host_addr;

+    uint64_t dev_bus_addr;

+    grant_handle_t handle;

+    /* OUT parameters. */

+    int16_t  status;              /* => enum grant_status */

+};

+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);

+/*

+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least

+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.

+ * Only <nr_frames> addresses are written, even if the table is larger.

+ * NOTES:

+ *  1. <dom> may be specified as DOMID_SELF.

+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.

+ *  3. Xen may not support more than a single grant-table page per domain.

+ */

+struct gnttab_setup_table {

+    /* IN parameters. */

+    domid_t  dom;

+    uint32_t nr_frames;

+    /* OUT parameters. */

+    int16_t  status;              /* => enum grant_status */

+#if __XEN_INTERFACE_VERSION__ < 0x00040300

+    XEN_GUEST_HANDLE(ulong) frame_list;

+#else

+    XEN_GUEST_HANDLE(xen_pfn_t) frame_list;

+#endif

+};

+typedef struct gnttab_setup_table gnttab_setup_table_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);

+/*

+ * GNTTABOP_dump_table: Dump the contents of the grant table to the

+ * xen console. Debugging use only.

+ */

+struct gnttab_dump_table {

+    /* IN parameters. */

+    domid_t dom;

+    /* OUT parameters. */

+    int16_t status;               /* => enum grant_status */

+};

+typedef struct gnttab_dump_table gnttab_dump_table_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);

+/*

+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The

+ * foreign domain has previously registered its interest in the transfer via

+ * <domid, ref>.

+ *

+ * Note that, even if the transfer fails, the specified page no longer belongs

+ * to the calling domain *unless* the error is GNTST_bad_page.

+ */

+struct gnttab_transfer {

+    /* IN parameters. */

+    xen_pfn_t     mfn;

+    domid_t       domid;

+    grant_ref_t   ref;

+    /* OUT parameters. */

+    int16_t       status;

+};

+typedef struct gnttab_transfer gnttab_transfer_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);

+/*

+ * GNTTABOP_copy: Hypervisor based copy

+ * source and destinations can be eithers MFNs or, for foreign domains,

+ * grant references. the foreign domain has to grant read/write access

+ * in its grant table.

+ *

+ * The flags specify what type source and destinations are (either MFN

+ * or grant reference).

+ *

+ * Note that this can also be used to copy data between two domains

+ * via a third party if the source and destination domains had previously

+ * grant appropriate access to their pages to the third party.

+ *

+ * source_offset specifies an offset in the source frame, dest_offset

+ * the offset in the target frame and  len specifies the number of

+ * bytes to be copied.

+ */

+#define _GNTCOPY_source_gref      (0)

+#define GNTCOPY_source_gref       (1<<_GNTCOPY_source_gref)

+#define _GNTCOPY_dest_gref        (1)

+#define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)

+struct gnttab_copy {

+    /* IN parameters. */

+    struct {

+        union {

+            grant_ref_t ref;

+            xen_pfn_t   gmfn;

+        } u;

+        domid_t  domid;

+        uint16_t offset;

+    } source, dest;

+    uint16_t      len;

+    uint16_t      flags;          /* GNTCOPY_* */

+    /* OUT parameters. */

+    int16_t       status;

+};

+typedef struct gnttab_copy  gnttab_copy_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);

+/*

+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared

+ * grant table.

+ * NOTES:

+ *  1. <dom> may be specified as DOMID_SELF.

+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.

+ */

+struct gnttab_query_size {

+    /* IN parameters. */

+    domid_t  dom;

+    /* OUT parameters. */

+    uint32_t nr_frames;

+    uint32_t max_nr_frames;

+    int16_t  status;              /* => enum grant_status */

+};

+typedef struct gnttab_query_size gnttab_query_size_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);

+/*

+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings

+ * tracked by <handle> but atomically replace the page table entry with one

+ * pointing to the machine address under <new_addr>.  <new_addr> will be

+ * redirected to the null entry.

+ * NOTES:

+ *  1. The call may fail in an undefined manner if either mapping is not

+ *     tracked by <handle>.

+ *  2. After executing a batch of unmaps, it is guaranteed that no stale

+ *     mappings will remain in the device or host TLBs.

+ */

+struct gnttab_unmap_and_replace {

+    /* IN parameters. */

+    uint64_t host_addr;

+    uint64_t new_addr;

+    grant_handle_t handle;

+    /* OUT parameters. */

+    int16_t  status;              /* => enum grant_status */

+};

+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);

+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a

+/*

+ * GNTTABOP_set_version: Request a particular version of the grant

+ * table shared table structure.  This operation can only be performed

+ * once in any given domain.  It must be performed before any grants

+ * are activated; otherwise, the domain will be stuck with version 1.

+ * The only defined versions are 1 and 2.

+ */

+struct gnttab_set_version {

+    /* IN/OUT parameters */

+    uint32_t version;

+};

+typedef struct gnttab_set_version gnttab_set_version_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);

+/*

+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant

+ * status for <dom>. In grant format version 2, the status is separated

+ * from the other shared grant fields to allow more efficient synchronization

+ * using barriers instead of atomic cmpexch operations.

+ * <nr_frames> specify the size of vector <frame_list>.

+ * The frame addresses are returned in the <frame_list>.

+ * Only <nr_frames> addresses are returned, even if the table is larger.

+ * NOTES:

+ *  1. <dom> may be specified as DOMID_SELF.

+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.

+ */

+struct gnttab_get_status_frames {

+    /* IN parameters. */

+    uint32_t nr_frames;

+    domid_t  dom;

+    /* OUT parameters. */

+    int16_t  status;              /* => enum grant_status */

+    XEN_GUEST_HANDLE(uint64_t) frame_list;

+};

+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);

+/*

+ * GNTTABOP_get_version: Get the grant table version which is in

+ * effect for domain <dom>.

+ */

+struct gnttab_get_version {

+    /* IN parameters */

+    domid_t dom;

+    uint16_t pad;

+    /* OUT parameters */

+    uint32_t version;

+};

+typedef struct gnttab_get_version gnttab_get_version_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);

+/*

+ * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries.

+ */

+struct gnttab_swap_grant_ref {

+    /* IN parameters */

+    grant_ref_t ref_a;

+    grant_ref_t ref_b;

+    /* OUT parameters */

+    int16_t status;             /* => enum grant_status */

+};

+typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t;

+DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t);

+#endif /* __XEN_INTERFACE_VERSION__ */

+/*

+ * Bitfield values for gnttab_map_grant_ref.flags.

+ */

+ /* Map the grant entry for access by I/O devices. */

+#define _GNTMAP_device_map      (0)

+#define GNTMAP_device_map       (1<<_GNTMAP_device_map)

+ /* Map the grant entry for access by host CPUs. */

+#define _GNTMAP_host_map        (1)

+#define GNTMAP_host_map         (1<<_GNTMAP_host_map)

+ /* Accesses to the granted frame will be restricted to read-only access. */

+#define _GNTMAP_readonly        (2)

+#define GNTMAP_readonly         (1<<_GNTMAP_readonly)

+ /*

+  * GNTMAP_host_map subflag:

+  *  0 => The host mapping is usable only by the guest OS.

+  *  1 => The host mapping is usable by guest OS + current application.

+  */

+#define _GNTMAP_application_map (3)

+#define GNTMAP_application_map  (1<<_GNTMAP_application_map)

+ /*

+  * GNTMAP_contains_pte subflag:

+  *  0 => This map request contains a host virtual address.

+  *  1 => This map request contains the machine addess of the PTE to update.

+  */

+#define _GNTMAP_contains_pte    (4)

+#define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)

+#define _GNTMAP_can_fail        (5)

+#define GNTMAP_can_fail         (1<<_GNTMAP_can_fail)

+/*

+ * Bits to be placed in guest kernel available PTE bits (architecture

+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).

+ */

+#define _GNTMAP_guest_avail0    (16)

+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)

+/*

+ * Values for error status returns. All errors are -ve.

+ */

+/* ` enum grant_status { */

+#define GNTST_okay             (0)  /* Normal return.                        */

+#define GNTST_general_error    (-1) /* General undefined error.              */

+#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */

+#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */

+#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */

+#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */

+#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/

+#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */

+#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */

+#define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */

+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */

+#define GNTST_address_too_big (-11) /* transfer page address too large.      */

+#define GNTST_eagain          (-12) /* Operation not done; try again.        */

+/* ` } */

+#define GNTTABOP_error_msgs {                   \

+    "okay",                                     \

+    "undefined error",                          \

+    "unrecognised domain id",                   \

+    "invalid grant reference",                  \

+    "invalid mapping handle",                   \

+    "invalid virtual address",                  \

+    "invalid device address",                   \

+    "no spare translation slot in the I/O MMU", \

+    "permission denied",                        \

+    "bad page",                                 \

+    "copy arguments cross page boundary",       \

+    "page address size too large",              \

+    "operation not done; try again"             \

+}

+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/e820.h

@@ -1,0 +1,34 @@

+/*

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_E820_H__

+#define __XEN_PUBLIC_HVM_E820_H__

+/* E820 location in HVM virtual address space. */

+#define HVM_E820_PAGE        0x00090000

+#define HVM_E820_NR_OFFSET   0x000001E8

+#define HVM_E820_OFFSET      0x000002D0

+#define HVM_BELOW_4G_RAM_END        0xF0000000

+#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END

+#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)

+#endif /* __XEN_PUBLIC_HVM_E820_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/hvm_info_table.h

@@ -1,0 +1,72 @@

+/******************************************************************************

+ * hvm/hvm_info_table.h

+ *

+ * HVM parameter and information table, written into guest memory map.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__

+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__

+#define HVM_INFO_PFN         0x09F

+#define HVM_INFO_OFFSET      0x800

+#define HVM_INFO_PADDR       ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)

+/* Maximum we can support with current vLAPIC ID mapping. */

+#define HVM_MAX_VCPUS        128

+struct hvm_info_table {

+    char        signature[8]; /* "HVM INFO" */

+    uint32_t    length;

+    uint8_t     checksum;

+    /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */

+    uint8_t     apic_mode;

+    /* How many CPUs does this domain have? */

+    uint32_t    nr_vcpus;

+    /*

+     * MEMORY MAP provided by HVM domain builder.

+     * Notes:

+     *  1. page_to_phys(x) = x << 12

+     *  2. If a field is zero, the corresponding range does not exist.

+     */

+    /*

+     *  0x0 to page_to_phys(low_mem_pgend)-1:

+     *    RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF)

+     */

+    uint32_t    low_mem_pgend;

+    /*

+     *  page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF:

+     *    Reserved for special memory mappings

+     */

+    uint32_t    reserved_mem_pgstart;

+    /*

+     *  0x100000000 to page_to_phys(high_mem_pgend)-1:

+     *    RAM above 4GB

+     */

+    uint32_t    high_mem_pgend;

+    /* Bitmap of which CPUs are online at boot time. */

+    uint8_t     vcpu_online[(HVM_MAX_VCPUS + 7)/8];

+};

+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/hvm_op.h

@@ -1,0 +1,275 @@

+/*

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__

+#define __XEN_PUBLIC_HVM_HVM_OP_H__

+#include "../xen.h"

+#include "../trace.h"

+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */

+#define HVMOP_set_param           0

+#define HVMOP_get_param           1

+struct xen_hvm_param {

+    domid_t  domid;    /* IN */

+    uint32_t index;    /* IN */

+    uint64_t value;    /* IN/OUT */

+};

+typedef struct xen_hvm_param xen_hvm_param_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);

+/* Set the logical level of one of a domain's PCI INTx wires. */

+#define HVMOP_set_pci_intx_level  2

+struct xen_hvm_set_pci_intx_level {

+    /* Domain to be updated. */

+    domid_t  domid;

+    /* PCI INTx identification in PCI topology (domain:bus:device:intx). */

+    uint8_t  domain, bus, device, intx;

+    /* Assertion level (0 = unasserted, 1 = asserted). */

+    uint8_t  level;

+};

+typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t);

+/* Set the logical level of one of a domain's ISA IRQ wires. */

+#define HVMOP_set_isa_irq_level   3

+struct xen_hvm_set_isa_irq_level {

+    /* Domain to be updated. */

+    domid_t  domid;

+    /* ISA device identification, by ISA IRQ (0-15). */

+    uint8_t  isa_irq;

+    /* Assertion level (0 = unasserted, 1 = asserted). */

+    uint8_t  level;

+};

+typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t);

+#define HVMOP_set_pci_link_route  4

+struct xen_hvm_set_pci_link_route {

+    /* Domain to be updated. */

+    domid_t  domid;

+    /* PCI link identifier (0-3). */

+    uint8_t  link;

+    /* ISA IRQ (1-15), or 0 (disable link). */

+    uint8_t  isa_irq;

+};

+typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);

+/* Flushes all VCPU TLBs: @arg must be NULL. */

+#define HVMOP_flush_tlbs          5

+typedef enum {

+    HVMMEM_ram_rw,             /* Normal read/write guest RAM */

+    HVMMEM_ram_ro,             /* Read-only; writes are discarded */

+    HVMMEM_mmio_dm,            /* Reads and write go to the device model */

+} hvmmem_type_t;

+/* Following tools-only interfaces may change in future. */

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+/* Track dirty VRAM. */

+#define HVMOP_track_dirty_vram    6

+struct xen_hvm_track_dirty_vram {

+    /* Domain to be tracked. */

+    domid_t  domid;

+    /* First pfn to track. */

+    uint64_aligned_t first_pfn;

+    /* Number of pages to track. */

+    uint64_aligned_t nr;

+    /* OUT variable. */

+    /* Dirty bitmap buffer. */

+    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;

+};

+typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);

+/* Notify that some pages got modified by the Device Model. */

+#define HVMOP_modified_memory    7

+struct xen_hvm_modified_memory {

+    /* Domain to be updated. */

+    domid_t  domid;

+    /* First pfn. */

+    uint64_aligned_t first_pfn;

+    /* Number of pages. */

+    uint64_aligned_t nr;

+};

+typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t);

+#define HVMOP_set_mem_type    8

+/* Notify that a region of memory is to be treated in a specific way. */

+struct xen_hvm_set_mem_type {

+    /* Domain to be updated. */

+    domid_t domid;

+    /* Memory type */

+    uint16_t hvmmem_type;

+    /* Number of pages. */

+    uint32_t nr;

+    /* First pfn. */

+    uint64_aligned_t first_pfn;

+};

+typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);

+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

+/* Hint from PV drivers for pagetable destruction. */

+#define HVMOP_pagetable_dying        9

+struct xen_hvm_pagetable_dying {

+    /* Domain with a pagetable about to be destroyed. */

+    domid_t  domid;

+    uint16_t pad[3]; /* align next field on 8-byte boundary */

+    /* guest physical address of the toplevel pagetable dying */

+    uint64_t gpa;

+};

+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);

+/* Get the current Xen time, in nanoseconds since system boot. */

+#define HVMOP_get_time              10

+struct xen_hvm_get_time {

+    uint64_t now;      /* OUT */

+};

+typedef struct xen_hvm_get_time xen_hvm_get_time_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t);

+#define HVMOP_xentrace              11

+struct xen_hvm_xentrace {

+    uint16_t event, extra_bytes;

+    uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)];

+};

+typedef struct xen_hvm_xentrace xen_hvm_xentrace_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t);

+/* Following tools-only interfaces may change in future. */

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+#define HVMOP_set_mem_access        12

+typedef enum {

+    HVMMEM_access_n,

+    HVMMEM_access_r,

+    HVMMEM_access_w,

+    HVMMEM_access_rw,

+    HVMMEM_access_x,

+    HVMMEM_access_rx,

+    HVMMEM_access_wx,

+    HVMMEM_access_rwx,

+    HVMMEM_access_rx2rw,       /* Page starts off as r-x, but automatically

+                                * change to r-w on a write */

+    HVMMEM_access_n2rwx,       /* Log access: starts off as n, automatically

+                                * goes to rwx, generating an event without

+                                * pausing the vcpu */

+    HVMMEM_access_default      /* Take the domain default */

+} hvmmem_access_t;

+/* Notify that a region of memory is to have specific access types */

+struct xen_hvm_set_mem_access {

+    /* Domain to be updated. */

+    domid_t domid;

+    /* Memory type */

+    uint16_t hvmmem_access; /* hvm_access_t */

+    /* Number of pages, ignored on setting default access */

+    uint32_t nr;

+    /* First pfn, or ~0ull to set the default access for new pages */

+    uint64_aligned_t first_pfn;

+};

+typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t);

+#define HVMOP_get_mem_access        13

+/* Get the specific access type for that region of memory */

+struct xen_hvm_get_mem_access {

+    /* Domain to be queried. */

+    domid_t domid;

+    /* Memory type: OUT */

+    uint16_t hvmmem_access; /* hvm_access_t */

+    /* pfn, or ~0ull for default access for new pages.  IN */

+    uint64_aligned_t pfn;

+};

+typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t);

+#define HVMOP_inject_trap            14

+/* Inject a trap into a VCPU, which will get taken up on the next

+ * scheduling of it. Note that the caller should know enough of the

+ * state of the CPU before injecting, to know what the effect of

+ * injecting the trap will be.

+ */

+struct xen_hvm_inject_trap {

+    /* Domain to be queried. */

+    domid_t domid;

+    /* VCPU */

+    uint32_t vcpuid;

+    /* Vector number */

+    uint32_t vector;

+    /* Trap type (HVMOP_TRAP_*) */

+    uint32_t type;

+/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */

+# define HVMOP_TRAP_ext_int    0 /* external interrupt */

+# define HVMOP_TRAP_nmi        2 /* nmi */

+# define HVMOP_TRAP_hw_exc     3 /* hardware exception */

+# define HVMOP_TRAP_sw_int     4 /* software interrupt (CD nn) */

+# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */

+# define HVMOP_TRAP_sw_exc     6 /* INT3 (CC), INTO (CE) */

+    /* Error code, or ~0u to skip */

+    uint32_t error_code;

+    /* Intruction length */

+    uint32_t insn_len;

+    /* CR2 for page faults */

+    uint64_aligned_t cr2;

+};

+typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t);

+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

+#define HVMOP_get_mem_type    15

+/* Return hvmmem_type_t for the specified pfn. */

+struct xen_hvm_get_mem_type {

+    /* Domain to be queried. */

+    domid_t domid;

+    /* OUT variable. */

+    uint16_t mem_type;

+    uint16_t pad[2]; /* align next field on 8-byte boundary */

+    /* IN variable. */

+    uint64_t pfn;

+};

+typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t);

+/* Following tools-only interfaces may change in future. */

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+/* MSI injection for emulated devices */

+#define HVMOP_inject_msi         16

+struct xen_hvm_inject_msi {

+    /* Domain to be injected */

+    domid_t   domid;

+    /* Data -- lower 32 bits */

+    uint32_t  data;

+    /* Address (0xfeexxxxx) */

+    uint64_t  addr;

+};

+typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t;

+DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t);

+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/hvm_xs_strings.h

@@ -1,0 +1,80 @@

+/******************************************************************************

+ * hvm/hvm_xs_strings.h

+ *

+ * HVM xenstore strings used in HVMLOADER.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__

+#define __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__

+#define HVM_XS_HVMLOADER               "hvmloader"

+#define HVM_XS_BIOS                    "hvmloader/bios"

+#define HVM_XS_GENERATION_ID_ADDRESS   "hvmloader/generation-id-address"

+#define HVM_XS_ALLOW_MEMORY_RELOCATE   "hvmloader/allow-memory-relocate"

+/* The following values allow additional ACPI tables to be added to the

+ * virtual ACPI BIOS that hvmloader constructs. The values specify the guest

+ * physical address and length of a block of ACPI tables to add. The format of

+ * the block is simply concatenated raw tables (which specify their own length

+ * in the ACPI header).

+ */

+#define HVM_XS_ACPI_PT_ADDRESS         "hvmloader/acpi/address"

+#define HVM_XS_ACPI_PT_LENGTH          "hvmloader/acpi/length"

+/* Any number of SMBIOS types can be passed through to an HVM guest using

+ * the following xenstore values. The values specify the guest physical

+ * address and length of a block of SMBIOS structures for hvmloader to use.

+ * The block is formatted in the following way:

+ *

+ * <length><struct><length><struct>...

+ *

+ * Each length separator is a 32b integer indicating the length of the next

+ * SMBIOS structure. For DMTF defined types (0 - 121), the passed in struct

+ * will replace the default structure in hvmloader. In addition, any

+ * OEM/vendortypes (128 - 255) will all be added.

+ */

+#define HVM_XS_SMBIOS_PT_ADDRESS       "hvmloader/smbios/address"

+#define HVM_XS_SMBIOS_PT_LENGTH        "hvmloader/smbios/length"

+/* Set to 1 to enable SMBIOS default portable battery (type 22) values. */

+#define HVM_XS_SMBIOS_DEFAULT_BATTERY  "hvmloader/smbios/default_battery"

+/* The following xenstore values are used to override some of the default

+ * string values in the SMBIOS table constructed in hvmloader.

+ */

+#define HVM_XS_BIOS_STRINGS            "bios-strings"

+#define HVM_XS_BIOS_VENDOR             "bios-strings/bios-vendor"

+#define HVM_XS_BIOS_VERSION            "bios-strings/bios-version"

+#define HVM_XS_SYSTEM_MANUFACTURER     "bios-strings/system-manufacturer"

+#define HVM_XS_SYSTEM_PRODUCT_NAME     "bios-strings/system-product-name"

+#define HVM_XS_SYSTEM_VERSION          "bios-strings/system-version"

+#define HVM_XS_SYSTEM_SERIAL_NUMBER    "bios-strings/system-serial-number"

+#define HVM_XS_ENCLOSURE_MANUFACTURER  "bios-strings/enclosure-manufacturer"

+#define HVM_XS_ENCLOSURE_SERIAL_NUMBER "bios-strings/enclosure-serial-number"

+#define HVM_XS_BATTERY_MANUFACTURER    "bios-strings/battery-manufacturer"

+#define HVM_XS_BATTERY_DEVICE_NAME     "bios-strings/battery-device-name"

+/* 1 to 99 OEM strings can be set in xenstore using values of the form

+ * below. These strings will be loaded into the SMBIOS type 11 structure.

+ */

+#define HVM_XS_OEM_STRINGS             "bios-strings/oem-%d"

+#endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/ioreq.h

@@ -1,0 +1,122 @@

+/*

+ * ioreq.h: I/O request definitions for device models

+ * Copyright (c) 2004, Intel Corporation.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef _IOREQ_H_

+#define _IOREQ_H_

+#define IOREQ_READ      1

+#define IOREQ_WRITE     0

+#define STATE_IOREQ_NONE        0

+#define STATE_IOREQ_READY       1

+#define STATE_IOREQ_INPROCESS   2

+#define STATE_IORESP_READY      3

+#define IOREQ_TYPE_PIO          0 /* pio */

+#define IOREQ_TYPE_COPY         1 /* mmio ops */

+#define IOREQ_TYPE_TIMEOFFSET   7

+#define IOREQ_TYPE_INVALIDATE   8 /* mapcache */

+/*

+ * VMExit dispatcher should cooperate with instruction decoder to

+ * prepare this structure and notify service OS and DM by sending

+ * virq

+ */

+struct ioreq {

+    uint64_t addr;          /* physical address */

+    uint64_t data;          /* data (or paddr of data) */

+    uint32_t count;         /* for rep prefixes */

+    uint32_t size;          /* size in bytes */

+    uint32_t vp_eport;      /* evtchn for notifications to/from device model */

+    uint16_t _pad0;

+    uint8_t state:4;

+    uint8_t data_is_ptr:1;  /* if 1, data above is the guest paddr

+                             * of the real data to use. */

+    uint8_t dir:1;          /* 1=read, 0=write */

+    uint8_t df:1;

+    uint8_t _pad1:1;

+    uint8_t type;           /* I/O type */

+};

+typedef struct ioreq ioreq_t;

+struct shared_iopage {

+    struct ioreq vcpu_ioreq[1];

+};

+typedef struct shared_iopage shared_iopage_t;

+struct buf_ioreq {

+    uint8_t  type;   /* I/O type                    */

+    uint8_t  pad:1;

+    uint8_t  dir:1;  /* 1=read, 0=write             */

+    uint8_t  size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */

+    uint32_t addr:20;/* physical address            */

+    uint32_t data;   /* data                        */

+};

+typedef struct buf_ioreq buf_ioreq_t;

+#define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */

+struct buffered_iopage {

+    unsigned int read_pointer;

+    unsigned int write_pointer;

+    buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];

+}; /* NB. Size of this structure must be no greater than one page. */

+typedef struct buffered_iopage buffered_iopage_t;

+/*

+ * ACPI Control/Event register locations. Location is controlled by a

+ * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION.

+ */

+/* Version 0 (default): Traditional Xen locations. */

+#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40

+#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04)

+#define ACPI_PM_TMR_BLK_ADDRESS_V0   (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08)

+#define ACPI_GPE0_BLK_ADDRESS_V0     (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20)

+#define ACPI_GPE0_BLK_LEN_V0         0x08

+/* Version 1: Locations preferred by modern Qemu. */

+#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000

+#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04)

+#define ACPI_PM_TMR_BLK_ADDRESS_V1   (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08)

+#define ACPI_GPE0_BLK_ADDRESS_V1     0xafe0

+#define ACPI_GPE0_BLK_LEN_V1         0x04

+/* Compatibility definitions for the default location (version 0). */

+#define ACPI_PM1A_EVT_BLK_ADDRESS    ACPI_PM1A_EVT_BLK_ADDRESS_V0

+#define ACPI_PM1A_CNT_BLK_ADDRESS    ACPI_PM1A_CNT_BLK_ADDRESS_V0

+#define ACPI_PM_TMR_BLK_ADDRESS      ACPI_PM_TMR_BLK_ADDRESS_V0

+#define ACPI_GPE0_BLK_ADDRESS        ACPI_GPE0_BLK_ADDRESS_V0

+#define ACPI_GPE0_BLK_LEN            ACPI_GPE0_BLK_LEN_V0

+#endif /* _IOREQ_H_ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/params.h

@@ -1,0 +1,150 @@

+/*

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__

+#define __XEN_PUBLIC_HVM_PARAMS_H__

+#include "hvm_op.h"

+/*

+ * Parameter space for HVMOP_{set,get}_param.

+ */

+/*

+ * How should CPU0 event-channel notifications be delivered?

+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).

+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:

+ *                  Domain = val[47:32], Bus  = val[31:16],

+ *                  DevFn  = val[15: 8], IntX = val[ 1: 0]

+ * val[63:56] == 2: val[7:0] is a vector number, check for

+ *                  XENFEAT_hvm_callback_vector to know if this delivery

+ *                  method is available.

+ * If val == 0 then CPU0 event-channel notifications are not delivered.

+ */

+#define HVM_PARAM_CALLBACK_IRQ 0

+/*

+ * These are not used by Xen. They are here for convenience of HVM-guest

+ * xenbus implementations.

+ */

+#define HVM_PARAM_STORE_PFN    1

+#define HVM_PARAM_STORE_EVTCHN 2

+#define HVM_PARAM_PAE_ENABLED  4

+#define HVM_PARAM_IOREQ_PFN    5

+#define HVM_PARAM_BUFIOREQ_PFN 6

+#define HVM_PARAM_BUFIOREQ_EVTCHN 26

+#if defined(__i386__) || defined(__x86_64__)

+/* Expose Viridian interfaces to this HVM guest? */

+#define HVM_PARAM_VIRIDIAN     9

+#endif

+/*

+ * Set mode for virtual timers (currently x86 only):

+ *  delay_for_missed_ticks (default):

+ *   Do not advance a vcpu's time beyond the correct delivery time for

+ *   interrupts that have been missed due to preemption. Deliver missed

+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual

+ *   time stepwise for each one.

+ *  no_delay_for_missed_ticks:

+ *   As above, missed interrupts are delivered, but guest time always tracks

+ *   wallclock (i.e., real) time while doing so.

+ *  no_missed_ticks_pending:

+ *   No missed interrupts are held pending. Instead, to ensure ticks are

+ *   delivered at some non-zero rate, if we detect missed ticks then the

+ *   internal tick alarm is not disabled if the VCPU is preempted during the

+ *   next tick period.

+ *  one_missed_tick_pending:

+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.

+ *   Guest time always tracks wallclock (i.e., real) time.

+ */

+#define HVM_PARAM_TIMER_MODE   10

+#define HVMPTM_delay_for_missed_ticks    0

+#define HVMPTM_no_delay_for_missed_ticks 1

+#define HVMPTM_no_missed_ticks_pending   2

+#define HVMPTM_one_missed_tick_pending   3

+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */

+#define HVM_PARAM_HPET_ENABLED 11

+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */

+#define HVM_PARAM_IDENT_PT     12

+/* Device Model domain, defaults to 0. */

+#define HVM_PARAM_DM_DOMAIN    13

+/* ACPI S state: currently support S0 and S3 on x86. */

+#define HVM_PARAM_ACPI_S_STATE 14

+/* TSS used on Intel when CR0.PE=0. */

+#define HVM_PARAM_VM86_TSS     15

+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */

+#define HVM_PARAM_VPT_ALIGN    16

+/* Console debug shared memory ring and event channel */

+#define HVM_PARAM_CONSOLE_PFN    17

+#define HVM_PARAM_CONSOLE_EVTCHN 18

+/*

+ * Select location of ACPI PM1a and TMR control blocks. Currently two locations

+ * are supported, specified by version 0 or 1 in this parameter:

+ *   - 0: default, use the old addresses

+ *        PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48

+ *   - 1: use the new default qemu addresses

+ *        PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008

+ * You can find these address definitions in <hvm/ioreq.h>

+ */

+#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19

+/* Enable blocking memory events, async or sync (pause vcpu until response)

+ * onchangeonly indicates messages only on a change of value */

+#define HVM_PARAM_MEMORY_EVENT_CR0          20

+#define HVM_PARAM_MEMORY_EVENT_CR3          21

+#define HVM_PARAM_MEMORY_EVENT_CR4          22

+#define HVM_PARAM_MEMORY_EVENT_INT3         23

+#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP  25

+#define HVM_PARAM_MEMORY_EVENT_MSR          30

+#define HVMPME_MODE_MASK       (3 << 0)

+#define HVMPME_mode_disabled   0

+#define HVMPME_mode_async      1

+#define HVMPME_mode_sync       2

+#define HVMPME_onchangeonly    (1 << 2)

+/* Boolean: Enable nestedhvm (hvm only) */

+#define HVM_PARAM_NESTEDHVM    24

+/* Params for the mem event rings */

+#define HVM_PARAM_PAGING_RING_PFN   27

+#define HVM_PARAM_ACCESS_RING_PFN   28

+#define HVM_PARAM_SHARING_RING_PFN  29

+/* SHUTDOWN_* action in case of a triple fault */

+#define HVM_PARAM_TRIPLE_FAULT_REASON 31

+#define HVM_NR_PARAMS          32

+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/pvdrivers.h

@@ -1,0 +1,47 @@

+/*

+ * pvdrivers.h: Register of PV drivers product numbers.

+ * Copyright (c) 2012, Citrix Systems Inc.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef _XEN_PUBLIC_PVDRIVERS_H_

+#define _XEN_PUBLIC_PVDRIVERS_H_

+/*

+ * This is the master registry of product numbers for

+ * PV drivers.

+ * If you need a new product number allocating, please

+ * post to [email protected].  You should NOT use

+ * a product number without allocating one.

+ * If you maintain a separate versioning and distribution path

+ * for PV drivers you should have a separate product number so

+ * that your drivers can be separated from others.

+ *

+ * During development, you may use the product ID to

+ * indicate a driver which is yet to be released.

+ */

+#define PVDRIVERS_PRODUCT_LIST(EACH)                         \

+        EACH("xensource-windows", 0x0001) /* Citrix */       \

+        EACH("gplpv-windows",     0x0002) /* James Harper */ \

+        EACH("linux",             0x0003)                    \

+        EACH("experimental",      0xffff)

+#endif /* _XEN_PUBLIC_PVDRIVERS_H_ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/hvm/save.h

@@ -1,0 +1,111 @@

+/*

+ * hvm/save.h

+ *

+ * Structure definitions for HVM state that is held by Xen and must

+ * be saved along with the domain's memory and device-model state.

+ *

+ * Copyright (c) 2007 XenSource Ltd.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_HVM_SAVE_H__

+#define __XEN_PUBLIC_HVM_SAVE_H__

+/*

+ * Structures in this header *must* have the same layout in 32bit

+ * and 64bit environments: this means that all fields must be explicitly

+ * sized types and aligned to their sizes, and the structs must be

+ * a multiple of eight bytes long.

+ *

+ * Only the state necessary for saving and restoring (i.e. fields

+ * that are analogous to actual hardware state) should go in this file.

+ * Internal mechanisms should be kept in Xen-private headers.

+ */

+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)

+#error "Anonymous structs/unions are a GNU extension."

+#endif

+/*

+ * Each entry is preceded by a descriptor giving its type and length

+ */

+struct hvm_save_descriptor {

+    uint16_t typecode;          /* Used to demux the various types below */

+    uint16_t instance;          /* Further demux within a type */

+    uint32_t length;            /* In bytes, *not* including this descriptor */

+};

+/*

+ * Each entry has a datatype associated with it: for example, the CPU state

+ * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU),

+ * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU).

+ * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system

+ * ugliness.

+ */

+#ifdef __XEN__

+# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix)     \

+    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \

+    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \

+    struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; }

+# include <xen/lib.h> /* BUG() */

+# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type)                         \

+    static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \

+    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \

+    struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; }

+#else

+# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix)     \

+    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}

+# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type)                         \

+    struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}

+#endif

+#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t)

+#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x)))

+#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c))

+#ifdef __XEN__

+# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t)

+# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x)))

+# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1)

+# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst)

+#endif

+/*

+ * The series of save records is teminated by a zero-type, zero-length

+ * descriptor.

+ */

+struct hvm_save_end {};

+DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end);

+#if defined(__i386__) || defined(__x86_64__)

+#include "../arch-x86/hvm/save.h"

+#elif defined(__arm__) || defined(__aarch64__)

+#include "../arch-arm/hvm/save.h"

+#else

+#error "unsupported architecture"

+#endif

+#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/blkif.h

@@ -1,0 +1,556 @@

+/******************************************************************************

+ * blkif.h

+ *

+ * Unified block-device I/O interface for Xen guest OSes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2003-2004, Keir Fraser

+ * Copyright (c) 2012, Spectra Logic Corporation

+ */

+#ifndef __XEN_PUBLIC_IO_BLKIF_H__

+#define __XEN_PUBLIC_IO_BLKIF_H__

+#include "ring.h"

+#include "../grant_table.h"

+/*

+ * Front->back notifications: When enqueuing a new request, sending a

+ * notification can be made conditional on req_event (i.e., the generic

+ * hold-off mechanism provided by the ring macros). Backends must set

+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).

+ *

+ * Back->front notifications: When enqueuing a new response, sending a

+ * notification can be made conditional on rsp_event (i.e., the generic

+ * hold-off mechanism provided by the ring macros). Frontends must set

+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).

+ */

+#ifndef blkif_vdev_t

+#define blkif_vdev_t   uint16_t

+#endif

+#define blkif_sector_t uint64_t

+/*

+ * Feature and Parameter Negotiation

+ * =================================

+ * The two halves of a Xen block driver utilize nodes within the XenStore to

+ * communicate capabilities and to negotiate operating parameters.  This

+ * section enumerates these nodes which reside in the respective front and

+ * backend portions of the XenStore, following the XenBus convention.

+ *

+ * All data in the XenStore is stored as strings.  Nodes specifying numeric

+ * values are encoded in decimal.  Integer value ranges listed below are

+ * expressed as fixed sized integer types capable of storing the conversion

+ * of a properly formated node string, without loss of information.

+ *

+ * Any specified default value is in effect if the corresponding XenBus node

+ * is not present in the XenStore.

+ *

+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the

+ * driver side whose XenBus tree contains them.

+ *

+ * XenStore nodes marked "DEPRECATED" in their notes section should only be

+ * used to provide interoperability with legacy implementations.

+ *

+ * See the XenBus state transition diagram below for details on when XenBus

+ * nodes must be published and when they can be queried.

+ *

+ *****************************************************************************

+ *                            Backend XenBus Nodes

+ *****************************************************************************

+ *

+ *------------------ Backend Device Identification (PRIVATE) ------------------

+ *

+ * mode

+ *      Values:         "r" (read only), "w" (writable)

+ *

+ *      The read or write access permissions to the backing store to be

+ *      granted to the frontend.

+ *

+ * params

+ *      Values:         string

+ *

+ *      A free formatted string providing sufficient information for the

+ *      backend driver to open the backing device.  (e.g. the path to the

+ *      file or block device representing the backing store.)

+ *

+ * type

+ *      Values:         "file", "phy", "tap"

+ *

+ *      The type of the backing device/object.

+ *

+ *--------------------------------- Features ---------------------------------

+ *

+ * feature-barrier

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *

+ *      A value of "1" indicates that the backend can process requests

+ *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests

+ *      of this type may still be returned at any time with the

+ *      BLKIF_RSP_EOPNOTSUPP result code.

+ *

+ * feature-flush-cache

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *

+ *      A value of "1" indicates that the backend can process requests

+ *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests

+ *      of this type may still be returned at any time with the

+ *      BLKIF_RSP_EOPNOTSUPP result code.

+ *

+ * feature-discard

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *

+ *      A value of "1" indicates that the backend can process requests

+ *      containing the BLKIF_OP_DISCARD request opcode.  Requests

+ *      of this type may still be returned at any time with the

+ *      BLKIF_RSP_EOPNOTSUPP result code.

+ *

+ * feature-persistent

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *      Notes: 7

+ *

+ *      A value of "1" indicates that the backend can keep the grants used

+ *      by the frontend driver mapped, so the same set of grants should be

+ *      used in all transactions. The maximum number of grants the backend

+ *      can map persistently depends on the implementation, but ideally it

+ *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this

+ *      feature the backend doesn't need to unmap each grant, preventing

+ *      costly TLB flushes. The backend driver should only map grants

+ *      persistently if the frontend supports it. If a backend driver chooses

+ *      to use the persistent protocol when the frontend doesn't support it,

+ *      it will probably hit the maximum number of persistently mapped grants

+ *      (due to the fact that the frontend won't be reusing the same grants),

+ *      and fall back to non-persistent mode. Backend implementations may

+ *      shrink or expand the number of persistently mapped grants without

+ *      notifying the frontend depending on memory constraints (this might

+ *      cause a performance degradation).

+ *

+ *      If a backend driver wants to limit the maximum number of persistently

+ *      mapped grants to a value less than RING_SIZE *

+ *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to

+ *      discard the grants that are less commonly used. Using a LRU in the

+ *      backend driver paired with a LIFO queue in the frontend will

+ *      allow us to have better performance in this scenario.

+ *

+ *----------------------- Request Transport Parameters ------------------------

+ *

+ * max-ring-page-order

+ *      Values:         <uint32_t>

+ *      Default Value:  0

+ *      Notes:          1, 3

+ *

+ *      The maximum supported size of the request ring buffer in units of

+ *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,

+ *      etc.).

+ *

+ * max-ring-pages

+ *      Values:         <uint32_t>

+ *      Default Value:  1

+ *      Notes:          DEPRECATED, 2, 3

+ *

+ *      The maximum supported size of the request ring buffer in units of

+ *      machine pages.  The value must be a power of 2.

+ *

+ *------------------------- Backend Device Properties -------------------------

+ *

+ * discard-aligment

+ *      Values:         <uint32_t>

+ *      Default Value:  0

+ *      Notes:          4, 5

+ *

+ *      The offset, in bytes from the beginning of the virtual block device,

+ *      to the first, addressable, discard extent on the underlying device.

+ *

+ * discard-granularity

+ *      Values:         <uint32_t>

+ *      Default Value:  <"sector-size">

+ *      Notes:          4

+ *

+ *      The size, in bytes, of the individually addressable discard extents

+ *      of the underlying device.

+ *

+ * discard-secure

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *

+ *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD

+ *      requests with the BLKIF_DISCARD_SECURE flag set.

+ *

+ * info

+ *      Values:         <uint32_t> (bitmap)

+ *

+ *      A collection of bit flags describing attributes of the backing

+ *      device.  The VDISK_* macros define the meaning of each bit

+ *      location.

+ *

+ * sector-size

+ *      Values:         <uint32_t>

+ *

+ *      The logical sector size, in bytes, of the backend device.

+ *

+ * physical-sector-size

+ *      Values:         <uint32_t>

+ *

+ *      The physical sector size, in bytes, of the backend device.

+ *

+ * sectors

+ *      Values:         <uint64_t>

+ *

+ *      The size of the backend device, expressed in units of its logical

+ *      sector size ("sector-size").

+ *

+ *****************************************************************************

+ *                            Frontend XenBus Nodes

+ *****************************************************************************

+ *

+ *----------------------- Request Transport Parameters -----------------------

+ *

+ * event-channel

+ *      Values:         <uint32_t>

+ *

+ *      The identifier of the Xen event channel used to signal activity

+ *      in the ring buffer.

+ *

+ * ring-ref

+ *      Values:         <uint32_t>

+ *      Notes:          6

+ *

+ *      The Xen grant reference granting permission for the backend to map

+ *      the sole page in a single page sized ring buffer.

+ *

+ * ring-ref%u

+ *      Values:         <uint32_t>

+ *      Notes:          6

+ *

+ *      For a frontend providing a multi-page ring, a "number of ring pages"

+ *      sized list of nodes, each containing a Xen grant reference granting

+ *      permission for the backend to map the page of the ring located

+ *      at page index "%u".  Page indexes are zero based.

+ *

+ * protocol

+ *      Values:         string (XEN_IO_PROTO_ABI_*)

+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE

+ *

+ *      The machine ABI rules governing the format of all ring request and

+ *      response structures.

+ *

+ * ring-page-order

+ *      Values:         <uint32_t>

+ *      Default Value:  0

+ *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)

+ *      Notes:          1, 3

+ *

+ *      The size of the frontend allocated request ring buffer in units

+ *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,

+ *      etc.).

+ *

+ * num-ring-pages

+ *      Values:         <uint32_t>

+ *      Default Value:  1

+ *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))

+ *      Notes:          DEPRECATED, 2, 3

+ *

+ *      The size of the frontend allocated request ring buffer in units of

+ *      machine pages.  The value must be a power of 2.

+ *

+ * feature-persistent

+ *      Values:         0/1 (boolean)

+ *      Default Value:  0

+ *      Notes: 7, 8, 9

+ *

+ *      A value of "1" indicates that the frontend will reuse the same grants

+ *      for all transactions, allowing the backend to map them with write

+ *      access (even when it should be read-only). If the frontend hits the

+ *      maximum number of allowed persistently mapped grants, it can fallback

+ *      to non persistent mode. This will cause a performance degradation,

+ *      since the the backend driver will still try to map those grants

+ *      persistently. Since the persistent grants protocol is compatible with

+ *      the previous protocol, a frontend driver can choose to work in

+ *      persistent mode even when the backend doesn't support it.

+ *

+ *      It is recommended that the frontend driver stores the persistently

+ *      mapped grants in a LIFO queue, so a subset of all persistently mapped

+ *      grants gets used commonly. This is done in case the backend driver

+ *      decides to limit the maximum number of persistently mapped grants

+ *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.

+ *

+ *------------------------- Virtual Device Properties -------------------------

+ *

+ * device-type

+ *      Values:         "disk", "cdrom", "floppy", etc.

+ *

+ * virtual-device

+ *      Values:         <uint32_t>

+ *

+ *      A value indicating the physical device to virtualize within the

+ *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI

+ *      disk", etc.)

+ *

+ *      See docs/misc/vbd-interface.txt for details on the format of this

+ *      value.

+ *

+ * Notes

+ * -----

+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer

+ *     PV drivers.

+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions

+ *     including a distribution deployed on certain nodes of the Amazon

+ *     EC2 cluster.

+ * (3) Support for multi-page ring buffers was implemented independently,

+ *     in slightly different forms, by both Citrix and RedHat/Amazon.

+ *     For full interoperability, block front and backends should publish

+ *     identical ring parameters, adjusted for unit differences, to the

+ *     XenStore nodes used in both schemes.

+ * (4) Devices that support discard functionality may internally allocate

+ *     space (discardable extents) in units that are larger than the

+ *     exported logical block size.

+ * (5) The discard-alignment parameter allows a physical device to be

+ *     partitioned into virtual devices that do not necessarily begin or

+ *     end on a discardable extent boundary.

+ * (6) When there is only a single page allocated to the request ring,

+ *     'ring-ref' is used to communicate the grant reference for this

+ *     page to the backend.  When using a multi-page ring, the 'ring-ref'

+ *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.

+ * (7) When using persistent grants data has to be copied from/to the page

+ *     where the grant is currently mapped. The overhead of doing this copy

+ *     however doesn't suppress the speed improvement of not having to unmap

+ *     the grants.

+ * (8) The frontend driver has to allow the backend driver to map all grants

+ *     with write access, even when they should be mapped read-only, since

+ *     further requests may reuse these grants and require write permissions.

+ * (9) Linux implementation doesn't have a limit on the maximum number of

+ *     grants that can be persistently mapped in the frontend driver, but

+ *     due to the frontent driver implementation it should never be bigger

+ *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.

+ */

+/*

+ * STATE DIAGRAMS

+ *

+ *****************************************************************************

+ *                                   Startup                                 *

+ *****************************************************************************

+ *

+ * Tool stack creates front and back nodes with state XenbusStateInitialising.

+ *

+ * Front                                Back

+ * =================================    =====================================

+ * XenbusStateInitialising              XenbusStateInitialising

+ *  o Query virtual device               o Query backend device identification

+ *    properties.                          data.

+ *  o Setup OS device instance.          o Open and validate backend device.

+ *                                       o Publish backend features and

+ *                                         transport parameters.

+ *                                                      |

+ *                                                      |

+ *                                                      V

+ *                                      XenbusStateInitWait

+ *

+ * o Query backend features and

+ *   transport parameters.

+ * o Allocate and initialize the

+ *   request ring.

+ * o Publish transport parameters

+ *   that will be in effect during

+ *   this connection.

+ *              |

+ *              |

+ *              V

+ * XenbusStateInitialised

+ *

+ *                                       o Query frontend transport parameters.

+ *                                       o Connect to the request ring and

+ *                                         event channel.

+ *                                       o Publish backend device properties.

+ *                                                      |

+ *                                                      |

+ *                                                      V

+ *                                      XenbusStateConnected

+ *

+ *  o Query backend device properties.

+ *  o Finalize OS virtual device

+ *    instance.

+ *              |

+ *              |

+ *              V

+ * XenbusStateConnected

+ *

+ * Note: Drivers that do not support any optional features, or the negotiation

+ *       of transport parameters, can skip certain states in the state machine:

+ *

+ *       o A frontend may transition to XenbusStateInitialised without

+ *         waiting for the backend to enter XenbusStateInitWait.  In this

+ *         case, default transport parameters are in effect and any

+ *         transport parameters published by the frontend must contain

+ *         their default values.

+ *

+ *       o A backend may transition to XenbusStateInitialised, bypassing

+ *         XenbusStateInitWait, without waiting for the frontend to first

+ *         enter the XenbusStateInitialised state.  In this case, default

+ *         transport parameters are in effect and any transport parameters

+ *         published by the backend must contain their default values.

+ *

+ *       Drivers that support optional features and/or transport parameter

+ *       negotiation must tolerate these additional state transition paths.

+ *       In general this means performing the work of any skipped state

+ *       transition, if it has not already been performed, in addition to the

+ *       work associated with entry into the current state.

+ */

+/*

+ * REQUEST CODES.

+ */

+#define BLKIF_OP_READ              0

+#define BLKIF_OP_WRITE             1

+/*

+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER

+ * operation code ("barrier request") must be completed prior to the

+ * execution of the barrier request.  All writes issued after the barrier

+ * request must not execute until after the completion of the barrier request.

+ *

+ * Optional.  See "feature-barrier" XenBus node documentation above.

+ */

+#define BLKIF_OP_WRITE_BARRIER     2

+/*

+ * Commit any uncommitted contents of the backing device's volatile cache

+ * to stable storage.

+ *

+ * Optional.  See "feature-flush-cache" XenBus node documentation above.

+ */

+#define BLKIF_OP_FLUSH_DISKCACHE   3

+/*

+ * Used in SLES sources for device specific command packet

+ * contained within the request. Reserved for that purpose.

+ */

+#define BLKIF_OP_RESERVED_1        4

+/*

+ * Indicate to the backend device that a region of storage is no longer in

+ * use, and may be discarded at any time without impact to the client.  If

+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the

+ * discarded region on the device must be rendered unrecoverable before the

+ * command returns.

+ *

+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),

+ * command on a native device.

+ *

+ * More information about trim/unmap operations can be found at:

+ * http://t13.org/Documents/UploadedDocuments/docs2008/

+ *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc

+ * http://www.seagate.com/staticfiles/support/disc/manuals/

+ *     Interface%20manuals/100293068c.pdf

+ *

+ * Optional.  See "feature-discard", "discard-alignment",

+ * "discard-granularity", and "discard-secure" in the XenBus node

+ * documentation above.

+ */

+#define BLKIF_OP_DISCARD           5

+/*

+ * Maximum scatter/gather segments per request.

+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.

+ * NB. This could be 12 if the ring indexes weren't stored in the same page.

+ */

+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11

+/*

+ * NB. first_sect and last_sect in blkif_request_segment, as well as

+ * sector_number in blkif_request, are always expressed in 512-byte units.

+ * However they must be properly aligned to the real sector size of the

+ * physical disk, which is reported in the "physical-sector-size" node in

+ * the backend xenbus info. Also the xenbus "sectors" node is expressed in

+ * 512-byte units.

+ */

+struct blkif_request_segment {

+    grant_ref_t gref;        /* reference to I/O buffer frame        */

+    /* @first_sect: first sector in frame to transfer (inclusive).   */

+    /* @last_sect: last sector in frame to transfer (inclusive).     */

+    uint8_t     first_sect, last_sect;

+};

+/*

+ * Starting ring element for any I/O request.

+ */

+struct blkif_request {

+    uint8_t        operation;    /* BLKIF_OP_???                         */

+    uint8_t        nr_segments;  /* number of segments                   */

+    blkif_vdev_t   handle;       /* only for read/write requests         */

+    uint64_t       id;           /* private guest value, echoed in resp  */

+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */

+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];

+};

+typedef struct blkif_request blkif_request_t;

+/*

+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD

+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)

+ */

+struct blkif_request_discard {

+    uint8_t        operation;    /* BLKIF_OP_DISCARD                     */

+    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */

+#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0      */

+    blkif_vdev_t   handle;       /* same as for read/write requests      */

+    uint64_t       id;           /* private guest value, echoed in resp  */

+    blkif_sector_t sector_number;/* start sector idx on disk             */

+    uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/

+};

+typedef struct blkif_request_discard blkif_request_discard_t;

+struct blkif_response {

+    uint64_t        id;              /* copied from request */

+    uint8_t         operation;       /* copied from request */

+    int16_t         status;          /* BLKIF_RSP_???       */

+};

+typedef struct blkif_response blkif_response_t;

+/*

+ * STATUS RETURN CODES.

+ */

+ /* Operation not supported (only happens on barrier writes). */

+#define BLKIF_RSP_EOPNOTSUPP  -2

+ /* Operation failed for some unspecified reason (-EIO). */

+#define BLKIF_RSP_ERROR       -1

+ /* Operation completed successfully. */

+#define BLKIF_RSP_OKAY         0

+/*

+ * Generate blkif ring structures and types.

+ */

+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);

+#define VDISK_CDROM        0x1

+#define VDISK_REMOVABLE    0x2

+#define VDISK_READONLY     0x4

+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/console.h

@@ -1,0 +1,51 @@

+/******************************************************************************

+ * console.h

+ *

+ * Console I/O interface for Xen guest OSes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Keir Fraser

+ */

+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__

+#define __XEN_PUBLIC_IO_CONSOLE_H__

+typedef uint32_t XENCONS_RING_IDX;

+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))

+struct xencons_interface {

+    char in[1024];

+    char out[2048];

+    XENCONS_RING_IDX in_cons, in_prod;

+    XENCONS_RING_IDX out_cons, out_prod;

+};

+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/fbif.h

@@ -1,0 +1,176 @@

+/*

+ * fbif.h -- Xen virtual frame buffer device

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) 2005 Anthony Liguori <[email protected]>

+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_IO_FBIF_H__

+#define __XEN_PUBLIC_IO_FBIF_H__

+/* Out events (frontend -> backend) */

+/*

+ * Out events may be sent only when requested by backend, and receipt

+ * of an unknown out event is an error.

+ */

+/* Event type 1 currently not used */

+/*

+ * Framebuffer update notification event

+ * Capable frontend sets feature-update in xenstore.

+ * Backend requests it by setting request-update in xenstore.

+ */

+#define XENFB_TYPE_UPDATE 2

+struct xenfb_update

+{

+    uint8_t type;    /* XENFB_TYPE_UPDATE */

+    int32_t x;      /* source x */

+    int32_t y;      /* source y */

+    int32_t width;  /* rect width */

+    int32_t height; /* rect height */

+};

+/*

+ * Framebuffer resize notification event

+ * Capable backend sets feature-resize in xenstore.

+ */

+#define XENFB_TYPE_RESIZE 3

+struct xenfb_resize

+{

+    uint8_t type;    /* XENFB_TYPE_RESIZE */

+    int32_t width;   /* width in pixels */

+    int32_t height;  /* height in pixels */

+    int32_t stride;  /* stride in bytes */

+    int32_t depth;   /* depth in bits */

+    int32_t offset;  /* offset of the framebuffer in bytes */

+};

+#define XENFB_OUT_EVENT_SIZE 40

+union xenfb_out_event

+{

+    uint8_t type;

+    struct xenfb_update update;

+    struct xenfb_resize resize;

+    char pad[XENFB_OUT_EVENT_SIZE];

+};

+/* In events (backend -> frontend) */

+/*

+ * Frontends should ignore unknown in events.

+ */

+/*

+ * Framebuffer refresh period advice

+ * Backend sends it to advise the frontend their preferred period of

+ * refresh.  Frontends that keep the framebuffer constantly up-to-date

+ * just ignore it.  Frontends that use the advice should immediately

+ * refresh the framebuffer (and send an update notification event if

+ * those have been requested), then use the update frequency to guide

+ * their periodical refreshs.

+ */

+#define XENFB_TYPE_REFRESH_PERIOD 1

+#define XENFB_NO_REFRESH 0

+struct xenfb_refresh_period

+{

+    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */

+    uint32_t period; /* period of refresh, in ms,

+                      * XENFB_NO_REFRESH if no refresh is needed */

+};

+#define XENFB_IN_EVENT_SIZE 40

+union xenfb_in_event

+{

+    uint8_t type;

+    struct xenfb_refresh_period refresh_period;

+    char pad[XENFB_IN_EVENT_SIZE];

+};

+/* shared page */

+#define XENFB_IN_RING_SIZE 1024

+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)

+#define XENFB_IN_RING_OFFS 1024

+#define XENFB_IN_RING(page) \

+    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))

+#define XENFB_IN_RING_REF(page, idx) \

+    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])

+#define XENFB_OUT_RING_SIZE 2048

+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)

+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)

+#define XENFB_OUT_RING(page) \

+    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))

+#define XENFB_OUT_RING_REF(page, idx) \

+    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])

+struct xenfb_page

+{

+    uint32_t in_cons, in_prod;

+    uint32_t out_cons, out_prod;

+    int32_t width;          /* the width of the framebuffer (in pixels) */

+    int32_t height;         /* the height of the framebuffer (in pixels) */

+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */

+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */

+    uint8_t depth;          /* the depth of a pixel (in bits) */

+    /*

+     * Framebuffer page directory

+     *

+     * Each directory page holds PAGE_SIZE / sizeof(*pd)

+     * framebuffer pages, and can thus map up to PAGE_SIZE *

+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and

+     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs

+     * 64 bit.  256 directories give enough room for a 512 Meg

+     * framebuffer with a max resolution of 12,800x10,240.  Should

+     * be enough for a while with room leftover for expansion.

+     */

+    unsigned long pd[256];

+};

+/*

+ * Wart: xenkbd needs to know default resolution.  Put it here until a

+ * better solution is found, but don't leak it to the backend.

+ */

+#ifdef __KERNEL__

+#define XENFB_WIDTH 800

+#define XENFB_HEIGHT 600

+#define XENFB_DEPTH 32

+#endif

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/fsif.h

@@ -1,0 +1,192 @@

+/******************************************************************************

+ * fsif.h

+ *

+ * Interface to FS level split device drivers.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2007, Grzegorz Milos, <[email protected]>.

+ */

+#ifndef __XEN_PUBLIC_IO_FSIF_H__

+#define __XEN_PUBLIC_IO_FSIF_H__

+#include "ring.h"

+#include "../grant_table.h"

+#define REQ_FILE_OPEN        1

+#define REQ_FILE_CLOSE       2

+#define REQ_FILE_READ        3

+#define REQ_FILE_WRITE       4

+#define REQ_STAT             5

+#define REQ_FILE_TRUNCATE    6

+#define REQ_REMOVE           7

+#define REQ_RENAME           8

+#define REQ_CREATE           9

+#define REQ_DIR_LIST        10

+#define REQ_CHMOD           11

+#define REQ_FS_SPACE        12

+#define REQ_FILE_SYNC       13

+struct fsif_open_request {

+    grant_ref_t gref;

+};

+struct fsif_close_request {

+    uint32_t fd;

+};

+struct fsif_read_request {

+    uint32_t fd;

+    int32_t pad;

+    uint64_t len;

+    uint64_t offset;

+    grant_ref_t grefs[1];  /* Variable length */

+};

+struct fsif_write_request {

+    uint32_t fd;

+    int32_t pad;

+    uint64_t len;

+    uint64_t offset;

+    grant_ref_t grefs[1];  /* Variable length */

+};

+struct fsif_stat_request {

+    uint32_t fd;

+};

+/* This structure is a copy of some fields from stat structure, returned

+ * via the ring. */

+struct fsif_stat_response {

+    int32_t  stat_mode;

+    uint32_t stat_uid;

+    uint32_t stat_gid;

+    int32_t  stat_ret;

+    int64_t  stat_size;

+    int64_t  stat_atime;

+    int64_t  stat_mtime;

+    int64_t  stat_ctime;

+};

+struct fsif_truncate_request {

+    uint32_t fd;

+    int32_t pad;

+    int64_t length;

+};

+struct fsif_remove_request {

+    grant_ref_t gref;

+};

+struct fsif_rename_request {

+    uint16_t old_name_offset;

+    uint16_t new_name_offset;

+    grant_ref_t gref;

+};

+struct fsif_create_request {

+    int8_t directory;

+    int8_t pad;

+    int16_t pad2;

+    int32_t mode;

+    grant_ref_t gref;

+};

+struct fsif_list_request {

+    uint32_t offset;

+    grant_ref_t gref;

+};

+#define NR_FILES_SHIFT  0

+#define NR_FILES_SIZE   16   /* 16 bits for the number of files mask */

+#define NR_FILES_MASK   (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT)

+#define ERROR_SIZE      32   /* 32 bits for the error mask */

+#define ERROR_SHIFT     (NR_FILES_SIZE + NR_FILES_SHIFT)

+#define ERROR_MASK      (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT)

+#define HAS_MORE_SHIFT  (ERROR_SHIFT + ERROR_SIZE)

+#define HAS_MORE_FLAG   (1ULL << HAS_MORE_SHIFT)

+struct fsif_chmod_request {

+    uint32_t fd;

+    int32_t mode;

+};

+struct fsif_space_request {

+    grant_ref_t gref;

+};

+struct fsif_sync_request {

+    uint32_t fd;

+};

+/* FS operation request */

+struct fsif_request {

+    uint8_t type;                 /* Type of the request                  */

+    uint8_t pad;

+    uint16_t id;                  /* Request ID, copied to the response   */

+    uint32_t pad2;

+    union {

+        struct fsif_open_request     fopen;

+        struct fsif_close_request    fclose;

+        struct fsif_read_request     fread;

+        struct fsif_write_request    fwrite;

+        struct fsif_stat_request     fstat;

+        struct fsif_truncate_request ftruncate;

+        struct fsif_remove_request   fremove;

+        struct fsif_rename_request   frename;

+        struct fsif_create_request   fcreate;

+        struct fsif_list_request     flist;

+        struct fsif_chmod_request    fchmod;

+        struct fsif_space_request    fspace;

+        struct fsif_sync_request     fsync;

+    } u;

+};

+typedef struct fsif_request fsif_request_t;

+/* FS operation response */

+struct fsif_response {

+    uint16_t id;

+    uint16_t pad1;

+    uint32_t pad2;

+    union {

+        uint64_t ret_val;

+        struct fsif_stat_response fstat;

+    } u;

+};

+typedef struct fsif_response fsif_response_t;

+#define FSIF_RING_ENTRY_SIZE   64

+#define FSIF_NR_READ_GNTS  ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) /  \

+                                sizeof(grant_ref_t) + 1)

+#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \

+                                sizeof(grant_ref_t) + 1)

+DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response);

+#define STATE_INITIALISED     "init"

+#define STATE_READY           "ready"

+#define STATE_CLOSING         "closing"

+#define STATE_CLOSED          "closed"

+#endif

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/kbdif.h

@@ -1,0 +1,132 @@

+/*

+ * kbdif.h -- Xen virtual keyboard/mouse

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) 2005 Anthony Liguori <[email protected]>

+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_IO_KBDIF_H__

+#define __XEN_PUBLIC_IO_KBDIF_H__

+/* In events (backend -> frontend) */

+/*

+ * Frontends should ignore unknown in events.

+ */

+/* Pointer movement event */

+#define XENKBD_TYPE_MOTION  1

+/* Event type 2 currently not used */

+/* Key event (includes pointer buttons) */

+#define XENKBD_TYPE_KEY     3

+/*

+ * Pointer position event

+ * Capable backend sets feature-abs-pointer in xenstore.

+ * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting

+ * request-abs-update in xenstore.

+ */

+#define XENKBD_TYPE_POS     4

+struct xenkbd_motion

+{

+    uint8_t type;        /* XENKBD_TYPE_MOTION */

+    int32_t rel_x;       /* relative X motion */

+    int32_t rel_y;       /* relative Y motion */

+    int32_t rel_z;       /* relative Z motion (wheel) */

+};

+struct xenkbd_key

+{

+    uint8_t type;         /* XENKBD_TYPE_KEY */

+    uint8_t pressed;      /* 1 if pressed; 0 otherwise */

+    uint32_t keycode;     /* KEY_* from linux/input.h */

+};

+struct xenkbd_position

+{

+    uint8_t type;        /* XENKBD_TYPE_POS */

+    int32_t abs_x;       /* absolute X position (in FB pixels) */

+    int32_t abs_y;       /* absolute Y position (in FB pixels) */

+    int32_t rel_z;       /* relative Z motion (wheel) */

+};

+#define XENKBD_IN_EVENT_SIZE 40

+union xenkbd_in_event

+{

+    uint8_t type;

+    struct xenkbd_motion motion;

+    struct xenkbd_key key;

+    struct xenkbd_position pos;

+    char pad[XENKBD_IN_EVENT_SIZE];

+};

+/* Out events (frontend -> backend) */

+/*

+ * Out events may be sent only when requested by backend, and receipt

+ * of an unknown out event is an error.

+ * No out events currently defined.

+ */

+#define XENKBD_OUT_EVENT_SIZE 40

+union xenkbd_out_event

+{

+    uint8_t type;

+    char pad[XENKBD_OUT_EVENT_SIZE];

+};

+/* shared page */

+#define XENKBD_IN_RING_SIZE 2048

+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)

+#define XENKBD_IN_RING_OFFS 1024

+#define XENKBD_IN_RING(page) \

+    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))

+#define XENKBD_IN_RING_REF(page, idx) \

+    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])

+#define XENKBD_OUT_RING_SIZE 1024

+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)

+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)

+#define XENKBD_OUT_RING(page) \

+    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))

+#define XENKBD_OUT_RING_REF(page, idx) \

+    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])

+struct xenkbd_page

+{

+    uint32_t in_cons, in_prod;

+    uint32_t out_cons, out_prod;

+};

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/libxenvchan.h

@@ -1,0 +1,97 @@

+/**

+ * @file

+ * @section AUTHORS

+ *

+ * Copyright (C) 2010  Rafal Wojtczuk  <[email protected]>

+ *

+ *  Authors:

+ *       Rafal Wojtczuk  <[email protected]>

+ *       Daniel De Graaf <[email protected]>

+ *

+ * @section LICENSE

+ *

+ *  This library is free software; you can redistribute it and/or

+ *  modify it under the terms of the GNU Lesser General Public

+ *  License as published by the Free Software Foundation; either

+ *  version 2.1 of the License, or (at your option) any later version.

+ *

+ *  This library is distributed in the hope that it will be useful,

+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of

+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

+ *  Lesser General Public License for more details.

+ *

+ *  You should have received a copy of the GNU Lesser General Public

+ *  License along with this library; if not, write to the Free Software

+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA

+ *

+ * @section DESCRIPTION

+ *

+ *  Originally borrowed from the Qubes OS Project, http://www.qubes-os.org,

+ *  this code has been substantially rewritten to use the gntdev and gntalloc

+ *  devices instead of raw MFNs and map_foreign_range.

+ *

+ *  This is a library for inter-domain communication.  A standard Xen ring

+ *  buffer is used, with a datagram-based interface built on top.  The grant

+ *  reference and event channels are shared in XenStore under a user-specified

+ *  path.

+ *

+ *  The ring.h macros define an asymmetric interface to a shared data structure

+ *  that assumes all rings reside in a single contiguous memory space. This is

+ *  not suitable for vchan because the interface to the ring is symmetric except

+ *  for the setup. Unlike the producer-consumer rings defined in ring.h, the

+ *  size of the rings used in vchan are determined at execution time instead of

+ *  compile time, so the macros in ring.h cannot be used to access the rings.

+ */

+#include <stdint.h>

+#include <sys/types.h>

+struct ring_shared {

+	uint32_t cons, prod;

+};

+#define VCHAN_NOTIFY_WRITE 0x1

+#define VCHAN_NOTIFY_READ 0x2

+/**

+ * vchan_interface: primary shared data structure

+ */

+struct vchan_interface {

+	/**

+	 * Standard consumer/producer interface, one pair per buffer

+	 * left is client write, server read

+	 * right is client read, server write

+	 */

+	struct ring_shared left, right;

+	/**

+	 * size of the rings, which determines their location

+	 * 10   - at offset 1024 in ring's page

+	 * 11   - at offset 2048 in ring's page

+	 * 12+  - uses 2^(N-12) grants to describe the multi-page ring

+	 * These should remain constant once the page is shared.

+	 * Only one of the two orders can be 10 (or 11).

+	 */

+	uint16_t left_order, right_order;

+	/**

+	 * Shutdown detection:

+	 *  0: client (or server) has exited

+	 *  1: client (or server) is connected

+	 *  2: client has not yet connected

+	 */

+	uint8_t cli_live, srv_live;

+	/**

+	 * Notification bits:

+	 *  VCHAN_NOTIFY_WRITE: send notify when data is written

+	 *  VCHAN_NOTIFY_READ: send notify when data is read (consumed)

+	 * cli_notify is used for the client to inform the server of its action

+	 */

+	uint8_t cli_notify, srv_notify;

+	/**

+	 * Grant list: ordering is left, right. Must not extend into actual ring

+	 * or grow beyond the end of the initial shared page.

+	 * These should remain constant once the page is shared, to allow

+	 * for possible remapping by a client that restarts.

+	 */

+	uint32_t grants[0];

+};

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/netif.h

@@ -1,0 +1,236 @@

+/******************************************************************************

+ * netif.h

+ *

+ * Unified network-device I/O interface for Xen guest OSes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2003-2004, Keir Fraser

+ */

+#ifndef __XEN_PUBLIC_IO_NETIF_H__

+#define __XEN_PUBLIC_IO_NETIF_H__

+#include "ring.h"

+#include "../grant_table.h"

+/*

+ * Older implementation of Xen network frontend / backend has an

+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of

+ * ring slots a skb can use. Netfront / netback may not work as

+ * expected when frontend and backend have different MAX_SKB_FRAGS.

+ *

+ * A better approach is to add mechanism for netfront / netback to

+ * negotiate this value. However we cannot fix all possible

+ * frontends, so we need to define a value which states the minimum

+ * slots backend must support.

+ *

+ * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS

+ * (18), which is proved to work with most frontends. Any new backend

+ * which doesn't negotiate with frontend should expect frontend to

+ * send a valid packet using slots up to this value.

+ */

+#define XEN_NETIF_NR_SLOTS_MIN 18

+/*

+ * Notifications after enqueuing any type of message should be conditional on

+ * the appropriate req_event or rsp_event field in the shared ring.

+ * If the client sends notification for rx requests then it should specify

+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume

+ * that it cannot safely queue packets (as it may not be kicked to send them).

+ */

+/*

+ * "feature-split-event-channels" is introduced to separate guest TX

+ * and RX notification. Backend either doesn't support this feature or

+ * advertises it via xenstore as 0 (disabled) or 1 (enabled).

+ *

+ * To make use of this feature, frontend should allocate two event

+ * channels for TX and RX, advertise them to backend as

+ * "event-channel-tx" and "event-channel-rx" respectively. If frontend

+ * doesn't want to use this feature, it just writes "event-channel"

+ * node as before.

+ */

+/*

+ * This is the 'wire' format for packets:

+ *  Request 1: netif_tx_request -- NETTXF_* (any flags)

+ * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)

+ * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)

+ *  Request 4: netif_tx_request -- NETTXF_more_data

+ *  Request 5: netif_tx_request -- NETTXF_more_data

+ *  ...

+ *  Request N: netif_tx_request -- 0

+ */

+/* Protocol checksum field is blank in the packet (hardware offload)? */

+#define _NETTXF_csum_blank     (0)

+#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)

+/* Packet data has been validated against protocol checksum. */

+#define _NETTXF_data_validated (1)

+#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)

+/* Packet continues in the next request descriptor. */

+#define _NETTXF_more_data      (2)

+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)

+/* Packet to be followed by extra descriptor(s). */

+#define _NETTXF_extra_info     (3)

+#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)

+#define XEN_NETIF_MAX_TX_SIZE 0xFFFF

+struct netif_tx_request {

+    grant_ref_t gref;      /* Reference to buffer page */

+    uint16_t offset;       /* Offset within buffer page */

+    uint16_t flags;        /* NETTXF_* */

+    uint16_t id;           /* Echoed in response message. */

+    uint16_t size;         /* Packet size in bytes.       */

+};

+typedef struct netif_tx_request netif_tx_request_t;

+/* Types of netif_extra_info descriptors. */

+#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */

+#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */

+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */

+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */

+#define XEN_NETIF_EXTRA_TYPE_MAX       (4)

+/* netif_extra_info flags. */

+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)

+#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)

+/* GSO types - only TCPv4 currently supported. */

+#define XEN_NETIF_GSO_TYPE_TCPV4        (1)

+/*

+ * This structure needs to fit within both netif_tx_request and

+ * netif_rx_response for compatibility.

+ */

+struct netif_extra_info {

+    uint8_t type;  /* XEN_NETIF_EXTRA_TYPE_* */

+    uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */

+    union {

+        /*

+         * XEN_NETIF_EXTRA_TYPE_GSO:

+         */

+        struct {

+            /*

+             * Maximum payload size of each segment. For example, for TCP this

+             * is just the path MSS.

+             */

+            uint16_t size;

+            /*

+             * GSO type. This determines the protocol of the packet and any

+             * extra features required to segment the packet properly.

+             */

+            uint8_t type; /* XEN_NETIF_GSO_TYPE_* */

+            /* Future expansion. */

+            uint8_t pad;

+            /*

+             * GSO features. This specifies any extra GSO features required

+             * to process this packet, such as ECN support for TCPv4.

+             */

+            uint16_t features; /* XEN_NETIF_GSO_FEAT_* */

+        } gso;

+        /*

+         * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:

+         * Backend advertises availability via 'feature-multicast-control'

+         * xenbus node containing value '1'.

+         * Frontend requests this feature by advertising

+         * 'request-multicast-control' xenbus node containing value '1'.

+         * If multicast control is requested then multicast flooding is

+         * disabled and the frontend must explicitly register its interest

+         * in multicast groups using dummy transmit requests containing

+         * MCAST_{ADD,DEL} extra-info fragments.

+         */

+        struct {

+            uint8_t addr[6]; /* Address to add/remove. */

+        } mcast;

+        uint16_t pad[3];

+    } u;

+};

+typedef struct netif_extra_info netif_extra_info_t;

+struct netif_tx_response {

+    uint16_t id;

+    int16_t  status;       /* NETIF_RSP_* */

+};

+typedef struct netif_tx_response netif_tx_response_t;

+struct netif_rx_request {

+    uint16_t    id;        /* Echoed in response message.        */

+    grant_ref_t gref;      /* Reference to incoming granted frame */

+};

+typedef struct netif_rx_request netif_rx_request_t;

+/* Packet data has been validated against protocol checksum. */

+#define _NETRXF_data_validated (0)

+#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)

+/* Protocol checksum field is blank in the packet (hardware offload)? */

+#define _NETRXF_csum_blank     (1)

+#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)

+/* Packet continues in the next request descriptor. */

+#define _NETRXF_more_data      (2)

+#define  NETRXF_more_data      (1U<<_NETRXF_more_data)

+/* Packet to be followed by extra descriptor(s). */

+#define _NETRXF_extra_info     (3)

+#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)

+struct netif_rx_response {

+    uint16_t id;

+    uint16_t offset;       /* Offset in page of start of received packet  */

+    uint16_t flags;        /* NETRXF_* */

+    int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */

+};

+typedef struct netif_rx_response netif_rx_response_t;

+/*

+ * Generate netif ring structures and types.

+ */

+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);

+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);

+#define NETIF_RSP_DROPPED         -2

+#define NETIF_RSP_ERROR           -1

+#define NETIF_RSP_OKAY             0

+/* No response: used for auxiliary requests (e.g., netif_tx_extra). */

+#define NETIF_RSP_NULL             1

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/pciif.h

@@ -1,0 +1,124 @@

+/*

+ * PCI Backend/Frontend Common Data Structures & Macros

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ *   Author: Ryan Wilson <[email protected]>

+ */

+#ifndef __XEN_PCI_COMMON_H__

+#define __XEN_PCI_COMMON_H__

+/* Be sure to bump this number if you change this file */

+#define XEN_PCI_MAGIC "7"

+/* xen_pci_sharedinfo flags */

+#define _XEN_PCIF_active     (0)

+#define XEN_PCIF_active      (1<<_XEN_PCIF_active)

+#define _XEN_PCIB_AERHANDLER (1)

+#define XEN_PCIB_AERHANDLER  (1<<_XEN_PCIB_AERHANDLER)

+#define _XEN_PCIB_active     (2)

+#define XEN_PCIB_active      (1<<_XEN_PCIB_active)

+/* xen_pci_op commands */

+#define XEN_PCI_OP_conf_read    	(0)

+#define XEN_PCI_OP_conf_write   	(1)

+#define XEN_PCI_OP_enable_msi   	(2)

+#define XEN_PCI_OP_disable_msi  	(3)

+#define XEN_PCI_OP_enable_msix  	(4)

+#define XEN_PCI_OP_disable_msix 	(5)

+#define XEN_PCI_OP_aer_detected 	(6)

+#define XEN_PCI_OP_aer_resume		(7)

+#define XEN_PCI_OP_aer_mmio		(8)

+#define XEN_PCI_OP_aer_slotreset	(9)

+/* xen_pci_op error numbers */

+#define XEN_PCI_ERR_success          (0)

+#define XEN_PCI_ERR_dev_not_found   (-1)

+#define XEN_PCI_ERR_invalid_offset  (-2)

+#define XEN_PCI_ERR_access_denied   (-3)

+#define XEN_PCI_ERR_not_implemented (-4)

+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */

+#define XEN_PCI_ERR_op_failed       (-5)

+/*

+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))

+ * Should not exceed 128

+ */

+#define SH_INFO_MAX_VEC     128

+struct xen_msix_entry {

+    uint16_t vector;

+    uint16_t entry;

+};

+struct xen_pci_op {

+    /* IN: what action to perform: XEN_PCI_OP_* */

+    uint32_t cmd;

+    /* OUT: will contain an error number (if any) from errno.h */

+    int32_t err;

+    /* IN: which device to touch */

+    uint32_t domain; /* PCI Domain/Segment */

+    uint32_t bus;

+    uint32_t devfn;

+    /* IN: which configuration registers to touch */

+    int32_t offset;

+    int32_t size;

+    /* IN/OUT: Contains the result after a READ or the value to WRITE */

+    uint32_t value;

+    /* IN: Contains extra infor for this operation */

+    uint32_t info;

+    /*IN:  param for msi-x */

+    struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];

+};

+/*used for pcie aer handling*/

+struct xen_pcie_aer_op

+{

+    /* IN: what action to perform: XEN_PCI_OP_* */

+    uint32_t cmd;

+    /*IN/OUT: return aer_op result or carry error_detected state as input*/

+    int32_t err;

+    /* IN: which device to touch */

+    uint32_t domain; /* PCI Domain/Segment*/

+    uint32_t bus;

+    uint32_t devfn;

+};

+struct xen_pci_sharedinfo {

+    /* flags - XEN_PCIF_* */

+    uint32_t flags;

+    struct xen_pci_op op;

+    struct xen_pcie_aer_op aer_op;

+};

+#endif /* __XEN_PCI_COMMON_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/protocols.h

@@ -1,0 +1,40 @@

+/******************************************************************************

+ * protocols.h

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PROTOCOLS_H__

+#define __XEN_PROTOCOLS_H__

+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"

+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"

+#define XEN_IO_PROTO_ABI_ARM        "arm-abi"

+#if defined(__i386__)

+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32

+#elif defined(__x86_64__)

+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64

+#elif defined(__arm__) || defined(__aarch64__)

+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM

+#else

+# error arch fixup needed here

+#endif

+#endif

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/ring.h

@@ -1,0 +1,312 @@

+/******************************************************************************

+ * ring.h

+ *

+ * Shared producer-consumer ring macros.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Tim Deegan and Andrew Warfield November 2004.

+ */

+#ifndef __XEN_PUBLIC_IO_RING_H__

+#define __XEN_PUBLIC_IO_RING_H__

+#include "../xen-compat.h"

+#if __XEN_INTERFACE_VERSION__ < 0x00030208

+#define xen_mb()  mb()

+#define xen_rmb() rmb()

+#define xen_wmb() wmb()

+#endif

+typedef unsigned int RING_IDX;

+/* Round a 32-bit unsigned constant down to the nearest power of two. */

+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))

+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))

+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))

+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))

+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))

+/*

+ * Calculate size of a shared ring, given the total available space for the

+ * ring and indexes (_sz), and the name tag of the request/response structure.

+ * A ring contains as many entries as will fit, rounded down to the nearest

+ * power of two (so we can mask with (size-1) to loop around).

+ */

+#define __CONST_RING_SIZE(_s, _sz) \

+    (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \

+	    sizeof(((struct _s##_sring *)0)->ring[0])))

+/*

+ * The same for passing in an actual pointer instead of a name tag.

+ */

+#define __RING_SIZE(_s, _sz) \

+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))

+/*

+ * Macros to make the correct C datatypes for a new kind of ring.

+ *

+ * To make a new ring datatype, you need to have two message structures,

+ * let's say request_t, and response_t already defined.

+ *

+ * In a header where you want the ring datatype declared, you then do:

+ *

+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);

+ *

+ * These expand out to give you a set of types, as you can see below.

+ * The most important of these are:

+ *

+ *     mytag_sring_t      - The shared ring.

+ *     mytag_front_ring_t - The 'front' half of the ring.

+ *     mytag_back_ring_t  - The 'back' half of the ring.

+ *

+ * To initialize a ring in your code you need to know the location and size

+ * of the shared memory area (PAGE_SIZE, for instance). To initialise

+ * the front half:

+ *

+ *     mytag_front_ring_t front_ring;

+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);

+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);

+ *

+ * Initializing the back follows similarly (note that only the front

+ * initializes the shared ring):

+ *

+ *     mytag_back_ring_t back_ring;

+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);

+ */

+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \

+                                                                        \

+/* Shared ring entry */                                                 \

+union __name##_sring_entry {                                            \

+    __req_t req;                                                        \

+    __rsp_t rsp;                                                        \

+};                                                                      \

+                                                                        \

+/* Shared ring page */                                                  \

+struct __name##_sring {                                                 \

+    RING_IDX req_prod, req_event;                                       \

+    RING_IDX rsp_prod, rsp_event;                                       \

+    union {                                                             \

+        struct {                                                        \

+            uint8_t smartpoll_active;                                   \

+        } netif;                                                        \

+        struct {                                                        \

+            uint8_t msg;                                                \

+        } tapif_user;                                                   \

+        uint8_t pvt_pad[4];                                             \

+    } private;                                                          \

+    uint8_t __pad[44];                                                  \

+    union __name##_sring_entry ring[1]; /* variable-length */           \

+};                                                                      \

+                                                                        \

+/* "Front" end's private variables */                                   \

+struct __name##_front_ring {                                            \

+    RING_IDX req_prod_pvt;                                              \

+    RING_IDX rsp_cons;                                                  \

+    unsigned int nr_ents;                                               \

+    struct __name##_sring *sring;                                       \

+};                                                                      \

+                                                                        \

+/* "Back" end's private variables */                                    \

+struct __name##_back_ring {                                             \

+    RING_IDX rsp_prod_pvt;                                              \

+    RING_IDX req_cons;                                                  \

+    unsigned int nr_ents;                                               \

+    struct __name##_sring *sring;                                       \

+};                                                                      \

+                                                                        \

+/* Syntactic sugar */                                                   \

+typedef struct __name##_sring __name##_sring_t;                         \

+typedef struct __name##_front_ring __name##_front_ring_t;               \

+typedef struct __name##_back_ring __name##_back_ring_t

+/*

+ * Macros for manipulating rings.

+ *

+ * FRONT_RING_whatever works on the "front end" of a ring: here

+ * requests are pushed on to the ring and responses taken off it.

+ *

+ * BACK_RING_whatever works on the "back end" of a ring: here

+ * requests are taken off the ring and responses put on.

+ *

+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.

+ * This is OK in 1-for-1 request-response situations where the

+ * requestor (front end) never has more than RING_SIZE()-1

+ * outstanding requests.

+ */

+/* Initialising empty rings */

+#define SHARED_RING_INIT(_s) do {                                       \

+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \

+    (_s)->req_event = (_s)->rsp_event = 1;                              \

+    (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \

+    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \

+} while(0)

+#define FRONT_RING_INIT(_r, _s, __size) do {                            \

+    (_r)->req_prod_pvt = 0;                                             \

+    (_r)->rsp_cons = 0;                                                 \

+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \

+    (_r)->sring = (_s);                                                 \

+} while (0)

+#define BACK_RING_INIT(_r, _s, __size) do {                             \

+    (_r)->rsp_prod_pvt = 0;                                             \

+    (_r)->req_cons = 0;                                                 \

+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \

+    (_r)->sring = (_s);                                                 \

+} while (0)

+/* How big is this ring? */

+#define RING_SIZE(_r)                                                   \

+    ((_r)->nr_ents)

+/* Number of free requests (for use on front side only). */

+#define RING_FREE_REQUESTS(_r)                                          \

+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))

+/* Test if there is an empty slot available on the front ring.

+ * (This is only meaningful from the front. )

+ */

+#define RING_FULL(_r)                                                   \

+    (RING_FREE_REQUESTS(_r) == 0)

+/* Test if there are outstanding messages to be processed on a ring. */

+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \

+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)

+#ifdef __GNUC__

+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \

+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \

+    unsigned int rsp = RING_SIZE(_r) -                                  \

+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \

+    req < rsp ? req : rsp;                                              \

+})

+#else

+/* Same as above, but without the nice GCC ({ ... }) syntax. */

+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \

+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \

+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \

+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \

+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))

+#endif

+/* Direct access to individual ring elements, by index. */

+#define RING_GET_REQUEST(_r, _idx)                                      \

+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))

+#define RING_GET_RESPONSE(_r, _idx)                                     \

+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))

+/* Loop termination condition: Would the specified index overflow the ring? */

+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \

+    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))

+/* Ill-behaved frontend determination: Can there be this many requests? */

+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)                           \

+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))

+#define RING_PUSH_REQUESTS(_r) do {                                     \

+    xen_wmb(); /* back sees requests /before/ updated producer index */ \

+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \

+} while (0)

+#define RING_PUSH_RESPONSES(_r) do {                                    \

+    xen_wmb(); /* front sees resps /before/ updated producer index */   \

+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \

+} while (0)

+/*

+ * Notification hold-off (req_event and rsp_event):

+ *

+ * When queueing requests or responses on a shared ring, it may not always be

+ * necessary to notify the remote end. For example, if requests are in flight

+ * in a backend, the front may be able to queue further requests without

+ * notifying the back (if the back checks for new requests when it queues

+ * responses).

+ *

+ * When enqueuing requests or responses:

+ *

+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument

+ *  is a boolean return value. True indicates that the receiver requires an

+ *  asynchronous notification.

+ *

+ * After dequeuing requests or responses (before sleeping the connection):

+ *

+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().

+ *  The second argument is a boolean return value. True indicates that there

+ *  are pending messages on the ring (i.e., the connection should not be put

+ *  to sleep).

+ *

+ *  These macros will set the req_event/rsp_event field to trigger a

+ *  notification on the very next message that is enqueued. If you want to

+ *  create batches of work (i.e., only receive a notification after several

+ *  messages have been enqueued) then you will need to create a customised

+ *  version of the FINAL_CHECK macro in your own code, which sets the event

+ *  field appropriately.

+ */

+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \

+    RING_IDX __old = (_r)->sring->req_prod;                             \

+    RING_IDX __new = (_r)->req_prod_pvt;                                \

+    xen_wmb(); /* back sees requests /before/ updated producer index */ \

+    (_r)->sring->req_prod = __new;                                      \

+    xen_mb(); /* back sees new requests /before/ we check req_event */  \

+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \

+                 (RING_IDX)(__new - __old));                            \

+} while (0)

+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \

+    RING_IDX __old = (_r)->sring->rsp_prod;                             \

+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \

+    xen_wmb(); /* front sees resps /before/ updated producer index */   \

+    (_r)->sring->rsp_prod = __new;                                      \

+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \

+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \

+                 (RING_IDX)(__new - __old));                            \

+} while (0)

+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \

+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \

+    if (_work_to_do) break;                                             \

+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \

+    xen_mb();                                                           \

+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \

+} while (0)

+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \

+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \

+    if (_work_to_do) break;                                             \

+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \

+    xen_mb();                                                           \

+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \

+} while (0)

+#endif /* __XEN_PUBLIC_IO_RING_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/tpmif.h

@@ -1,0 +1,143 @@

+/******************************************************************************

+ * tpmif.h

+ *

+ * TPM I/O interface for Xen guest OSes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, IBM Corporation

+ *

+ * Author: Stefan Berger, [email protected]

+ * Grant table support: Mahadevan Gomathisankaran

+ *

+ * This code has been derived from tools/libxc/xen/io/netif.h

+ *

+ * Copyright (c) 2003-2004, Keir Fraser

+ */

+#ifndef __XEN_PUBLIC_IO_TPMIF_H__

+#define __XEN_PUBLIC_IO_TPMIF_H__

+#include "../grant_table.h"

+struct tpmif_tx_request {

+    unsigned long addr;   /* Machine address of packet.   */

+    grant_ref_t ref;      /* grant table access reference */

+    uint16_t unused;

+    uint16_t size;        /* Packet size in bytes.        */

+};

+typedef struct tpmif_tx_request tpmif_tx_request_t;

+/*

+ * The TPMIF_TX_RING_SIZE defines the number of pages the

+ * front-end and backend can exchange (= size of array).

+ */

+typedef uint32_t TPMIF_RING_IDX;

+#define TPMIF_TX_RING_SIZE 1

+/* This structure must fit in a memory page. */

+struct tpmif_ring {

+    struct tpmif_tx_request req;

+};

+typedef struct tpmif_ring tpmif_ring_t;

+struct tpmif_tx_interface {

+    struct tpmif_ring ring[TPMIF_TX_RING_SIZE];

+};

+typedef struct tpmif_tx_interface tpmif_tx_interface_t;

+/******************************************************************************

+ * TPM I/O interface for Xen guest OSes, v2

+ *

+ * Author: Daniel De Graaf <[email protected]>

+ *

+ * This protocol emulates the request/response behavior of a TPM using a Xen

+ * shared memory interface. All interaction with the TPM is at the direction

+ * of the frontend, since a TPM (hardware or virtual) is a passive device -

+ * the backend only processes commands as requested by the frontend.

+ *

+ * The frontend sends a request to the TPM by populating the shared page with

+ * the request packet, changing the state to TPMIF_STATE_SUBMIT, and sending

+ * and event channel notification. When the backend is finished, it will set

+ * the state to TPMIF_STATE_FINISH and send an event channel notification.

+ *

+ * In order to allow long-running commands to be canceled, the frontend can

+ * at any time change the state to TPMIF_STATE_CANCEL and send a notification.

+ * The TPM can either finish the command (changing state to TPMIF_STATE_FINISH)

+ * or can cancel the command and change the state to TPMIF_STATE_IDLE. The TPM

+ * can also change the state to TPMIF_STATE_IDLE instead of TPMIF_STATE_FINISH

+ * if another reason for cancellation is required - for example, a physical

+ * TPM may cancel a command if the interface is seized by another locality.

+ *

+ * The TPM command format is defined by the TCG, and is available at

+ * http://www.trustedcomputinggroup.org/resources/tpm_main_specification

+ */

+enum tpmif_state {

+    TPMIF_STATE_IDLE,        /* no contents / vTPM idle / cancel complete */

+    TPMIF_STATE_SUBMIT,      /* request ready / vTPM working */

+    TPMIF_STATE_FINISH,      /* response ready / vTPM idle */

+    TPMIF_STATE_CANCEL,      /* cancel requested / vTPM working */

+};

+/* Note: The backend should only change state to IDLE or FINISH, while the

+ * frontend should only change to SUBMIT or CANCEL. Status changes do not need

+ * to use atomic operations.

+ */

+/* The shared page for vTPM request/response packets looks like:

+ *

+ *  Offset               Contents

+ *  =================================================

+ *  0                    struct tpmif_shared_page

+ *  16                   [optional] List of grant IDs

+ *  16+4*nr_extra_pages  TPM packet data

+ *

+ * If the TPM packet data extends beyond the end of a single page, the grant IDs

+ * defined in extra_pages are used as if they were mapped immediately following

+ * the primary shared page. The grants are allocated by the frontend and mapped

+ * by the backend. Before sending a request spanning multiple pages, the

+ * frontend should verify that the TPM supports such large requests by querying

+ * the TPM_CAP_PROP_INPUT_BUFFER property from the TPM.

+ */

+struct tpmif_shared_page {

+    uint32_t length;         /* request/response length in bytes */

+    uint8_t state;           /* enum tpmif_state */

+    uint8_t locality;        /* for the current request */

+    uint8_t pad;             /* should be zero */

+    uint8_t nr_extra_pages;  /* extra pages for long packets; may be zero */

+    uint32_t extra_pages[0]; /* grant IDs; length is actually nr_extra_pages */

+};

+typedef struct tpmif_shared_page tpmif_shared_page_t;

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/usbif.h

@@ -1,0 +1,150 @@

+/*

+ * usbif.h

+ *

+ * USB I/O interface for Xen guest OSes.

+ *

+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.

+ * Author: Noboru Iwamatsu <[email protected]>

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_IO_USBIF_H__

+#define __XEN_PUBLIC_IO_USBIF_H__

+#include "ring.h"

+#include "../grant_table.h"

+enum usb_spec_version {

+	USB_VER_UNKNOWN = 0,

+	USB_VER_USB11,

+	USB_VER_USB20,

+	USB_VER_USB30,	/* not supported yet */

+};

+/*

+ *  USB pipe in usbif_request

+ *

+ *  bits 0-5 are specific bits for virtual USB driver.

+ *  bits 7-31 are standard urb pipe.

+ *

+ *  - port number(NEW):	bits 0-4

+ *  				(USB_MAXCHILDREN is 31)

+ *

+ *  - operation flag(NEW):	bit 5

+ *  				(0 = submit urb,

+ *  				 1 = unlink urb)

+ *

+ *  - direction:		bit 7

+ *  				(0 = Host-to-Device [Out]

+ *                           1 = Device-to-Host [In])

+ *

+ *  - device address:	bits 8-14

+ *

+ *  - endpoint:		bits 15-18

+ *

+ *  - pipe type:		bits 30-31

+ *  				(00 = isochronous, 01 = interrupt,

+ *                           10 = control, 11 = bulk)

+ */

+#define usbif_pipeportnum(pipe) ((pipe) & 0x1f)

+#define usbif_setportnum_pipe(pipe, portnum) \

+	((pipe)|(portnum))

+#define usbif_pipeunlink(pipe) ((pipe) & 0x20)

+#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe))

+#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20))

+#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)

+/*

+ * RING for transferring urbs.

+ */

+struct usbif_request_segment {

+	grant_ref_t gref;

+	uint16_t offset;

+	uint16_t length;

+};

+struct usbif_urb_request {

+	uint16_t id; /* request id */

+	uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */

+	/* basic urb parameter */

+	uint32_t pipe;

+	uint16_t transfer_flags;

+	uint16_t buffer_length;

+	union {

+		uint8_t ctrl[8]; /* setup_packet (Ctrl) */

+		struct {

+			uint16_t interval; /* maximum (1024*8) in usb core */

+			uint16_t start_frame; /* start frame */

+			uint16_t number_of_packets; /* number of ISO packet */

+			uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */

+		} isoc;

+		struct {

+			uint16_t interval; /* maximum (1024*8) in usb core */

+			uint16_t pad[3];

+		} intr;

+		struct {

+			uint16_t unlink_id; /* unlink request id */

+			uint16_t pad[3];

+		} unlink;

+	} u;

+	/* urb data segments */

+	struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];

+};

+typedef struct usbif_urb_request usbif_urb_request_t;

+struct usbif_urb_response {

+	uint16_t id; /* request id */

+	uint16_t start_frame;  /* start frame (ISO) */

+	int32_t status; /* status (non-ISO) */

+	int32_t actual_length; /* actual transfer length */

+	int32_t error_count; /* number of ISO errors */

+};

+typedef struct usbif_urb_response usbif_urb_response_t;

+DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);

+#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, PAGE_SIZE)

+/*

+ * RING for notifying connect/disconnect events to frontend

+ */

+struct usbif_conn_request {

+	uint16_t id;

+};

+typedef struct usbif_conn_request usbif_conn_request_t;

+struct usbif_conn_response {

+	uint16_t id; /* request id */

+	uint8_t portnum; /* port number */

+	uint8_t speed; /* usb_device_speed */

+};

+typedef struct usbif_conn_response usbif_conn_response_t;

+DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);

+#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, PAGE_SIZE)

+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/vscsiif.h

@@ -1,0 +1,117 @@

+/******************************************************************************

+ * vscsiif.h

+ *

+ * Based on the blkif.h code.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright(c) FUJITSU Limited 2008.

+ */

+#ifndef __XEN__PUBLIC_IO_SCSI_H__

+#define __XEN__PUBLIC_IO_SCSI_H__

+#include "ring.h"

+#include "../grant_table.h"

+/* commands between backend and frontend */

+#define VSCSIIF_ACT_SCSI_CDB         1    /* SCSI CDB command */

+#define VSCSIIF_ACT_SCSI_ABORT       2    /* SCSI Device(Lun) Abort*/

+#define VSCSIIF_ACT_SCSI_RESET       3    /* SCSI Device(Lun) Reset*/

+#define VSCSIIF_ACT_SCSI_SG_PRESET   4    /* Preset SG elements */

+/*

+ * Maximum scatter/gather segments per request.

+ *

+ * Considering balance between allocating at least 16 "vscsiif_request"

+ * structures on one page (4096 bytes) and the number of scatter/gather

+ * elements needed, we decided to use 26 as a magic number.

+ */

+#define VSCSIIF_SG_TABLESIZE             26

+/*

+ * based on Linux kernel 2.6.18

+ */

+#define VSCSIIF_MAX_COMMAND_SIZE         16

+#define VSCSIIF_SENSE_BUFFERSIZE         96

+struct scsiif_request_segment {

+    grant_ref_t gref;

+    uint16_t offset;

+    uint16_t length;

+};

+typedef struct scsiif_request_segment vscsiif_segment_t;

+struct vscsiif_request {

+    uint16_t rqid;          /* private guest value, echoed in resp  */

+    uint8_t act;            /* command between backend and frontend */

+    uint8_t cmd_len;

+    uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];

+    uint16_t timeout_per_command;     /* The command is issued by twice

+                                         the value in Backend. */

+    uint16_t channel, id, lun;

+    uint16_t padding;

+    uint8_t sc_data_direction;        /* for DMA_TO_DEVICE(1)

+                                         DMA_FROM_DEVICE(2)

+                                         DMA_NONE(3) requests  */

+    uint8_t nr_segments;              /* Number of pieces of scatter-gather */

+    vscsiif_segment_t seg[VSCSIIF_SG_TABLESIZE];

+    uint32_t reserved[3];

+};

+typedef struct vscsiif_request vscsiif_request_t;

+#define VSCSIIF_SG_LIST_SIZE ((sizeof(vscsiif_request_t) - 4) \

+                              / sizeof(vscsiif_segment_t))

+struct vscsiif_sg_list {

+    /* First two fields must match struct vscsiif_request! */

+    uint16_t rqid;          /* private guest value, must match main req */

+    uint8_t act;            /* VSCSIIF_ACT_SCSI_SG_PRESET */

+    uint8_t nr_segments;    /* Number of pieces of scatter-gather */

+    vscsiif_segment_t seg[VSCSIIF_SG_LIST_SIZE];

+};

+typedef struct vscsiif_sg_list vscsiif_sg_list_t;

+struct vscsiif_response {

+    uint16_t rqid;

+    uint8_t act;               /* valid only when backend supports SG_PRESET */

+    uint8_t sense_len;

+    uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];

+    int32_t rslt;

+    uint32_t residual_len;     /* request bufflen -

+                                  return the value from physical device */

+    uint32_t reserved[36];

+};

+typedef struct vscsiif_response vscsiif_response_t;

+DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);

+#endif  /*__XEN__PUBLIC_IO_SCSI_H__*/

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/xenbus.h

@@ -1,0 +1,80 @@

+/*****************************************************************************

+ * xenbus.h

+ *

+ * Xenbus protocol details.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) 2005 XenSource Ltd.

+ */

+#ifndef _XEN_PUBLIC_IO_XENBUS_H

+#define _XEN_PUBLIC_IO_XENBUS_H

+/*

+ * The state of either end of the Xenbus, i.e. the current communication

+ * status of initialisation across the bus.  States here imply nothing about

+ * the state of the connection between the driver and the kernel's device

+ * layers.

+ */

+enum xenbus_state {

+    XenbusStateUnknown       = 0,

+    XenbusStateInitialising  = 1,

+    /*

+     * InitWait: Finished early initialisation but waiting for information

+     * from the peer or hotplug scripts.

+     */

+    XenbusStateInitWait      = 2,

+    /*

+     * Initialised: Waiting for a connection from the peer.

+     */

+    XenbusStateInitialised   = 3,

+    XenbusStateConnected     = 4,

+    /*

+     * Closing: The device is being closed due to an error or an unplug event.

+     */

+    XenbusStateClosing       = 5,

+    XenbusStateClosed        = 6,

+    /*

+     * Reconfiguring: The device is being reconfigured.

+     */

+    XenbusStateReconfiguring = 7,

+    XenbusStateReconfigured  = 8

+};

+typedef enum xenbus_state XenbusState;

+#endif /* _XEN_PUBLIC_IO_XENBUS_H */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/io/xs_wire.h

@@ -1,0 +1,138 @@

+/*

+ * Details of the "wire" protocol between Xen Store Daemon and client

+ * library or guest kernel.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) 2005 Rusty Russell IBM Corporation

+ */

+#ifndef _XS_WIRE_H

+#define _XS_WIRE_H

+enum xsd_sockmsg_type

+{

+    XS_DEBUG,

+    XS_DIRECTORY,

+    XS_READ,

+    XS_GET_PERMS,

+    XS_WATCH,

+    XS_UNWATCH,

+    XS_TRANSACTION_START,

+    XS_TRANSACTION_END,

+    XS_INTRODUCE,

+    XS_RELEASE,

+    XS_GET_DOMAIN_PATH,

+    XS_WRITE,

+    XS_MKDIR,

+    XS_RM,

+    XS_SET_PERMS,

+    XS_WATCH_EVENT,

+    XS_ERROR,

+    XS_IS_DOMAIN_INTRODUCED,

+    XS_RESUME,

+    XS_SET_TARGET,

+    XS_RESTRICT,

+    XS_RESET_WATCHES

+};

+#define XS_WRITE_NONE "NONE"

+#define XS_WRITE_CREATE "CREATE"

+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"

+/* We hand errors as strings, for portability. */

+struct xsd_errors

+{

+    int errnum;

+    const char *errstring;

+};

+#ifdef EINVAL

+#define XSD_ERROR(x) { x, #x }

+/* LINTED: static unused */

+static struct xsd_errors xsd_errors[]

+#if defined(__GNUC__)

+__attribute__((unused))

+#endif

+    = {

+    XSD_ERROR(EINVAL),

+    XSD_ERROR(EACCES),

+    XSD_ERROR(EEXIST),

+    XSD_ERROR(EISDIR),

+    XSD_ERROR(ENOENT),

+    XSD_ERROR(ENOMEM),

+    XSD_ERROR(ENOSPC),

+    XSD_ERROR(EIO),

+    XSD_ERROR(ENOTEMPTY),

+    XSD_ERROR(ENOSYS),

+    XSD_ERROR(EROFS),

+    XSD_ERROR(EBUSY),

+    XSD_ERROR(EAGAIN),

+    XSD_ERROR(EISCONN),

+    XSD_ERROR(E2BIG)

+};

+#endif

+struct xsd_sockmsg

+{

+    uint32_t type;  /* XS_??? */

+    uint32_t req_id;/* Request identifier, echoed in daemon's response.  */

+    uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */

+    uint32_t len;   /* Length of data following this. */

+    /* Generally followed by nul-terminated string(s). */

+};

+enum xs_watch_type

+{

+    XS_WATCH_PATH = 0,

+    XS_WATCH_TOKEN

+};

+/*

+ * `incontents 150 xenstore_struct XenStore wire protocol.

+ *

+ * Inter-domain shared memory communications. */

+#define XENSTORE_RING_SIZE 1024

+typedef uint32_t XENSTORE_RING_IDX;

+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))

+struct xenstore_domain_interface {

+    char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */

+    char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */

+    XENSTORE_RING_IDX req_cons, req_prod;

+    XENSTORE_RING_IDX rsp_cons, rsp_prod;

+};

+/* Violating this is very bad.  See docs/misc/xenstore.txt. */

+#define XENSTORE_PAYLOAD_MAX 4096

+/* Violating these just gets you an error back */

+#define XENSTORE_ABS_PATH_MAX 3072

+#define XENSTORE_REL_PATH_MAX 2048

+#endif /* _XS_WIRE_H */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/kexec.h

@@ -1,0 +1,165 @@

+/******************************************************************************

+ * kexec.h - Public portion

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Xen port written by:

+ * - Simon 'Horms' Horman <[email protected]>

+ * - Magnus Damm <[email protected]>

+ */

+#ifndef _XEN_PUBLIC_KEXEC_H

+#define _XEN_PUBLIC_KEXEC_H

+/* This file describes the Kexec / Kdump hypercall interface for Xen.

+ *

+ * Kexec under vanilla Linux allows a user to reboot the physical machine

+ * into a new user-specified kernel. The Xen port extends this idea

+ * to allow rebooting of the machine from dom0. When kexec for dom0

+ * is used to reboot,  both the hypervisor and the domains get replaced

+ * with some other kernel. It is possible to kexec between vanilla

+ * Linux and Xen and back again. Xen to Xen works well too.

+ *

+ * The hypercall interface for kexec can be divided into three main

+ * types of hypercall operations:

+ *

+ * 1) Range information:

+ *    This is used by the dom0 kernel to ask the hypervisor about various

+ *    address information. This information is needed to allow kexec-tools

+ *    to fill in the ELF headers for /proc/vmcore properly.

+ *

+ * 2) Load and unload of images:

+ *    There are no big surprises here, the kexec binary from kexec-tools

+ *    runs in userspace in dom0. The tool loads/unloads data into the

+ *    dom0 kernel such as new kernel, initramfs and hypervisor. When

+ *    loaded the dom0 kernel performs a load hypercall operation, and

+ *    before releasing all page references the dom0 kernel calls unload.

+ *

+ * 3) Kexec operation:

+ *    This is used to start a previously loaded kernel.

+ */

+#include "xen.h"

+#if defined(__i386__) || defined(__x86_64__)

+#define KEXEC_XEN_NO_PAGES 17

+#endif

+/*

+ * Prototype for this hypercall is:

+ *  int kexec_op(int cmd, void *args)

+ * @cmd  == KEXEC_CMD_...

+ *          KEXEC operation to perform

+ * @args == Operation-specific extra arguments (NULL if none).

+ */

+/*

+ * Kexec supports two types of operation:

+ * - kexec into a regular kernel, very similar to a standard reboot

+ *   - KEXEC_TYPE_DEFAULT is used to specify this type

+ * - kexec into a special "crash kernel", aka kexec-on-panic

+ *   - KEXEC_TYPE_CRASH is used to specify this type

+ *   - parts of our system may be broken at kexec-on-panic time

+ *     - the code should be kept as simple and self-contained as possible

+ */

+#define KEXEC_TYPE_DEFAULT 0

+#define KEXEC_TYPE_CRASH   1

+/* The kexec implementation for Xen allows the user to load two

+ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.

+ * All data needed for a kexec reboot is kept in one xen_kexec_image_t

+ * per "instance". The data mainly consists of machine address lists to pages

+ * together with destination addresses. The data in xen_kexec_image_t

+ * is passed to the "code page" which is one page of code that performs

+ * the final relocations before jumping to the new kernel.

+ */

+typedef struct xen_kexec_image {

+#if defined(__i386__) || defined(__x86_64__)

+    unsigned long page_list[KEXEC_XEN_NO_PAGES];

+#endif

+    unsigned long indirection_page;

+    unsigned long start_address;

+} xen_kexec_image_t;

+/*

+ * Perform kexec having previously loaded a kexec or kdump kernel

+ * as appropriate.

+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]

+ */

+#define KEXEC_CMD_kexec                 0

+typedef struct xen_kexec_exec {

+    int type;

+} xen_kexec_exec_t;

+/*

+ * Load/Unload kernel image for kexec or kdump.

+ * type  == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]

+ * image == relocation information for kexec (ignored for unload) [in]

+ */

+#define KEXEC_CMD_kexec_load            1

+#define KEXEC_CMD_kexec_unload          2

+typedef struct xen_kexec_load {

+    int type;

+    xen_kexec_image_t image;

+} xen_kexec_load_t;

+#define KEXEC_RANGE_MA_CRASH      0 /* machine address and size of crash area */

+#define KEXEC_RANGE_MA_XEN        1 /* machine address and size of Xen itself */

+#define KEXEC_RANGE_MA_CPU        2 /* machine address and size of a CPU note */

+#define KEXEC_RANGE_MA_XENHEAP    3 /* machine address and size of xenheap

+                                     * Note that although this is adjacent

+                                     * to Xen it exists in a separate EFI

+                                     * region on ia64, and thus needs to be

+                                     * inserted into iomem_machine separately */

+#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* Obsolete: machine address and size of

+                                     * the ia64_boot_param */

+#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of

+                                     * of the EFI Memory Map */

+#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */

+/*

+ * Find the address and size of certain memory areas

+ * range == KEXEC_RANGE_... [in]

+ * nr    == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]

+ * size  == number of bytes reserved in window [out]

+ * start == address of the first byte in the window [out]

+ */

+#define KEXEC_CMD_kexec_get_range       3

+typedef struct xen_kexec_range {

+    int range;

+    int nr;

+    unsigned long size;

+    unsigned long start;

+} xen_kexec_range_t;

+#endif /* _XEN_PUBLIC_KEXEC_H */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/mem_event.h

@@ -1,0 +1,82 @@

+/******************************************************************************

+ * mem_event.h

+ *

+ * Memory event common structures.

+ *

+ * Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef _XEN_PUBLIC_MEM_EVENT_H

+#define _XEN_PUBLIC_MEM_EVENT_H

+#include "xen.h"

+#include "io/ring.h"

+/* Memory event flags */

+#define MEM_EVENT_FLAG_VCPU_PAUSED  (1 << 0)

+#define MEM_EVENT_FLAG_DROP_PAGE    (1 << 1)

+#define MEM_EVENT_FLAG_EVICT_FAIL   (1 << 2)

+#define MEM_EVENT_FLAG_FOREIGN      (1 << 3)

+#define MEM_EVENT_FLAG_DUMMY        (1 << 4)

+/* Reasons for the memory event request */

+#define MEM_EVENT_REASON_UNKNOWN     0    /* typical reason */

+#define MEM_EVENT_REASON_VIOLATION   1    /* access violation, GFN is address */

+#define MEM_EVENT_REASON_CR0         2    /* CR0 was hit: gfn is CR0 value */

+#define MEM_EVENT_REASON_CR3         3    /* CR3 was hit: gfn is CR3 value */

+#define MEM_EVENT_REASON_CR4         4    /* CR4 was hit: gfn is CR4 value */

+#define MEM_EVENT_REASON_INT3        5    /* int3 was hit: gla/gfn are RIP */

+#define MEM_EVENT_REASON_SINGLESTEP  6    /* single step was invoked: gla/gfn are RIP */

+#define MEM_EVENT_REASON_MSR         7    /* MSR was hit: gfn is MSR value, gla is MSR address;

+                                             does NOT honour HVMPME_onchangeonly */

+typedef struct mem_event_st {

+    uint32_t flags;

+    uint32_t vcpu_id;

+    uint64_t gfn;

+    uint64_t offset;

+    uint64_t gla; /* if gla_valid */

+    uint32_t p2mt;

+    uint16_t access_r:1;

+    uint16_t access_w:1;

+    uint16_t access_x:1;

+    uint16_t gla_valid:1;

+    uint16_t available:12;

+    uint16_t reason;

+} mem_event_request_t, mem_event_response_t;

+DEFINE_RING_TYPES(mem_event, mem_event_request_t, mem_event_response_t);

+#endif

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/memory.h

@@ -1,0 +1,472 @@

+/******************************************************************************

+ * memory.h

+ *

+ * Memory reservation and information.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_MEMORY_H__

+#define __XEN_PUBLIC_MEMORY_H__

+#include "xen.h"

+/*

+ * Increase or decrease the specified domain's memory reservation. Returns the

+ * number of extents successfully allocated or freed.

+ * arg == addr of struct xen_memory_reservation.

+ */

+#define XENMEM_increase_reservation 0

+#define XENMEM_decrease_reservation 1

+#define XENMEM_populate_physmap     6

+#if __XEN_INTERFACE_VERSION__ >= 0x00030209

+/*

+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O

+ * devices often have a 32-bit limitation even in 64-bit systems). If zero

+ * then the user has no addressing restriction. This field is not used by

+ * XENMEM_decrease_reservation.

+ */

+#define XENMEMF_address_bits(x)     (x)

+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)

+/* NUMA node to allocate from. */

+#define XENMEMF_node(x)     (((x) + 1) << 8)

+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)

+/* Flag to populate physmap with populate-on-demand entries */

+#define XENMEMF_populate_on_demand (1<<16)

+/* Flag to request allocation only from the node specified */

+#define XENMEMF_exact_node_request  (1<<17)

+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)

+#endif

+struct xen_memory_reservation {

+    /*

+     * XENMEM_increase_reservation:

+     *   OUT: MFN (*not* GMFN) bases of extents that were allocated

+     * XENMEM_decrease_reservation:

+     *   IN:  GMFN bases of extents to free

+     * XENMEM_populate_physmap:

+     *   IN:  GPFN bases of extents to populate with memory

+     *   OUT: GMFN bases of extents that were allocated

+     *   (NB. This command also updates the mach_to_phys translation table)

+     * XENMEM_claim_pages:

+     *   IN: must be zero

+     */

+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;

+    /* Number of extents, and size/alignment of each (2^extent_order pages). */

+    xen_ulong_t    nr_extents;

+    unsigned int   extent_order;

+#if __XEN_INTERFACE_VERSION__ >= 0x00030209

+    /* XENMEMF flags. */

+    unsigned int   mem_flags;

+#else

+    unsigned int   address_bits;

+#endif

+    /*

+     * Domain whose reservation is being changed.

+     * Unprivileged domains can specify only DOMID_SELF.

+     */

+    domid_t        domid;

+};

+typedef struct xen_memory_reservation xen_memory_reservation_t;

+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);

+/*

+ * An atomic exchange of memory pages. If return code is zero then

+ * @out.extent_list provides GMFNs of the newly-allocated memory.

+ * Returns zero on complete success, otherwise a negative error code.

+ * On complete success then always @nr_exchanged == @in.nr_extents.

+ * On partial success @nr_exchanged indicates how much work was done.

+ */

+#define XENMEM_exchange             11

+struct xen_memory_exchange {

+    /*

+     * [IN] Details of memory extents to be exchanged (GMFN bases).

+     * Note that @in.address_bits is ignored and unused.

+     */

+    struct xen_memory_reservation in;

+    /*

+     * [IN/OUT] Details of new memory extents.

+     * We require that:

+     *  1. @in.domid == @out.domid

+     *  2. @in.nr_extents  << @in.extent_order ==

+     *     @out.nr_extents << @out.extent_order

+     *  3. @in.extent_start and @out.extent_start lists must not overlap

+     *  4. @out.extent_start lists GPFN bases to be populated

+     *  5. @out.extent_start is overwritten with allocated GMFN bases

+     */

+    struct xen_memory_reservation out;

+    /*

+     * [OUT] Number of input extents that were successfully exchanged:

+     *  1. The first @nr_exchanged input extents were successfully

+     *     deallocated.

+     *  2. The corresponding first entries in the output extent list correctly

+     *     indicate the GMFNs that were successfully exchanged.

+     *  3. All other input and output extents are untouched.

+     *  4. If not all input exents are exchanged then the return code of this

+     *     command will be non-zero.

+     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!

+     */

+    xen_ulong_t nr_exchanged;

+};

+typedef struct xen_memory_exchange xen_memory_exchange_t;

+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);

+/*

+ * Returns the maximum machine frame number of mapped RAM in this system.

+ * This command always succeeds (it never returns an error code).

+ * arg == NULL.

+ */

+#define XENMEM_maximum_ram_page     2

+/*

+ * Returns the current or maximum memory reservation, in pages, of the

+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.

+ * arg == addr of domid_t.

+ */

+#define XENMEM_current_reservation  3

+#define XENMEM_maximum_reservation  4

+/*

+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.

+ */

+#define XENMEM_maximum_gpfn         14

+/*

+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys

+ * mapping table. Architectures which do not have a m2p table do not implement

+ * this command.

+ * arg == addr of xen_machphys_mfn_list_t.

+ */

+#define XENMEM_machphys_mfn_list    5

+struct xen_machphys_mfn_list {

+    /*

+     * Size of the 'extent_start' array. Fewer entries will be filled if the

+     * machphys table is smaller than max_extents * 2MB.

+     */

+    unsigned int max_extents;

+    /*

+     * Pointer to buffer to fill with list of extent starts. If there are

+     * any large discontiguities in the machine address space, 2MB gaps in

+     * the machphys table will be represented by an MFN base of zero.

+     */

+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;

+    /*

+     * Number of extents written to the above array. This will be smaller

+     * than 'max_extents' if the machphys table is smaller than max_e * 2MB.

+     */

+    unsigned int nr_extents;

+};

+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;

+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);

+/*

+ * Returns the location in virtual address space of the machine_to_phys

+ * mapping table. Architectures which do not have a m2p table, or which do not

+ * map it by default into guest address space, do not implement this command.

+ * arg == addr of xen_machphys_mapping_t.

+ */

+#define XENMEM_machphys_mapping     12

+struct xen_machphys_mapping {

+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */

+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */

+};

+typedef struct xen_machphys_mapping xen_machphys_mapping_t;

+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);

+/* Source mapping space. */

+/* ` enum phys_map_space { */

+#define XENMAPSPACE_shared_info  0 /* shared info page */

+#define XENMAPSPACE_grant_table  1 /* grant table page */

+#define XENMAPSPACE_gmfn         2 /* GMFN */

+#define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap only. */

+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, XENMEM_add_to_physmap_range only. */

+/* ` } */

+/*

+ * Sets the GPFN at which a particular page appears in the specified guest's

+ * pseudophysical address space.

+ * arg == addr of xen_add_to_physmap_t.

+ */

+#define XENMEM_add_to_physmap      7

+struct xen_add_to_physmap {

+    /* Which domain to change the mapping for. */

+    domid_t domid;

+    /* Number of pages to go through for gmfn_range */

+    uint16_t    size;

+    unsigned int space; /* => enum phys_map_space */

+#define XENMAPIDX_grant_table_status 0x80000000

+    /* Index into space being mapped. */

+    xen_ulong_t idx;

+    /* GPFN in domid where the source mapping page should appear. */

+    xen_pfn_t     gpfn;

+};

+typedef struct xen_add_to_physmap xen_add_to_physmap_t;

+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);

+/* A batched version of add_to_physmap. */

+#define XENMEM_add_to_physmap_range 23

+struct xen_add_to_physmap_range {

+    /* IN */

+    /* Which domain to change the mapping for. */

+    domid_t domid;

+    uint16_t space; /* => enum phys_map_space */

+    /* Number of pages to go through */

+    uint16_t size;

+    domid_t foreign_domid; /* IFF gmfn_foreign */

+    /* Indexes into space being mapped. */

+    XEN_GUEST_HANDLE(xen_ulong_t) idxs;

+    /* GPFN in domid where the source mapping page should appear. */

+    XEN_GUEST_HANDLE(xen_pfn_t) gpfns;

+    /* OUT */

+    /* Per index error code. */

+    XEN_GUEST_HANDLE(int) errs;

+};

+typedef struct xen_add_to_physmap_range xen_add_to_physmap_range_t;

+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t);

+/*

+ * Unmaps the page appearing at a particular GPFN from the specified guest's

+ * pseudophysical address space.

+ * arg == addr of xen_remove_from_physmap_t.

+ */

+#define XENMEM_remove_from_physmap      15

+struct xen_remove_from_physmap {

+    /* Which domain to change the mapping for. */

+    domid_t domid;

+    /* GPFN of the current mapping of the page. */

+    xen_pfn_t     gpfn;

+};

+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;

+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);

+/*** REMOVED ***/

+/*#define XENMEM_translate_gpfn_list  8*/

+/*

+ * Returns the pseudo-physical memory map as it was when the domain

+ * was started (specified by XENMEM_set_memory_map).

+ * arg == addr of xen_memory_map_t.

+ */

+#define XENMEM_memory_map           9

+struct xen_memory_map {

+    /*

+     * On call the number of entries which can be stored in buffer. On

+     * return the number of entries which have been stored in

+     * buffer.

+     */

+    unsigned int nr_entries;

+    /*

+     * Entries in the buffer are in the same format as returned by the

+     * BIOS INT 0x15 EAX=0xE820 call.

+     */

+    XEN_GUEST_HANDLE(void) buffer;

+};

+typedef struct xen_memory_map xen_memory_map_t;

+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);

+/*

+ * Returns the real physical memory map. Passes the same structure as

+ * XENMEM_memory_map.

+ * arg == addr of xen_memory_map_t.

+ */

+#define XENMEM_machine_memory_map   10

+/*

+ * Set the pseudo-physical memory map of a domain, as returned by

+ * XENMEM_memory_map.

+ * arg == addr of xen_foreign_memory_map_t.

+ */

+#define XENMEM_set_memory_map       13

+struct xen_foreign_memory_map {

+    domid_t domid;

+    struct xen_memory_map map;

+};

+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;

+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);

+#define XENMEM_set_pod_target       16

+#define XENMEM_get_pod_target       17

+struct xen_pod_target {

+    /* IN */

+    uint64_t target_pages;

+    /* OUT */

+    uint64_t tot_pages;

+    uint64_t pod_cache_pages;

+    uint64_t pod_entries;

+    /* IN */

+    domid_t domid;

+};

+typedef struct xen_pod_target xen_pod_target_t;

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+#ifndef uint64_aligned_t

+#define uint64_aligned_t uint64_t

+#endif

+/*

+ * Get the number of MFNs saved through memory sharing.

+ * The call never fails.

+ */

+#define XENMEM_get_sharing_freed_pages    18

+#define XENMEM_get_sharing_shared_pages   19

+#define XENMEM_paging_op                    20

+#define XENMEM_paging_op_nominate           0

+#define XENMEM_paging_op_evict              1

+#define XENMEM_paging_op_prep               2

+#define XENMEM_access_op                    21

+#define XENMEM_access_op_resume             0

+struct xen_mem_event_op {

+    uint8_t     op;         /* XENMEM_*_op_* */

+    domid_t     domain;

+    /* PAGING_PREP IN: buffer to immediately fill page in */

+    uint64_aligned_t    buffer;

+    /* Other OPs */

+    uint64_aligned_t    gfn;           /* IN:  gfn of page being operated on */

+};

+typedef struct xen_mem_event_op xen_mem_event_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);

+#define XENMEM_sharing_op                   22

+#define XENMEM_sharing_op_nominate_gfn      0

+#define XENMEM_sharing_op_nominate_gref     1

+#define XENMEM_sharing_op_share             2

+#define XENMEM_sharing_op_resume            3

+#define XENMEM_sharing_op_debug_gfn         4

+#define XENMEM_sharing_op_debug_mfn         5

+#define XENMEM_sharing_op_debug_gref        6

+#define XENMEM_sharing_op_add_physmap       7

+#define XENMEM_sharing_op_audit             8

+#define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)

+#define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)

+/* The following allows sharing of grant refs. This is useful

+ * for sharing utilities sitting as "filters" in IO backends

+ * (e.g. memshr + blktap(2)). The IO backend is only exposed

+ * to grant references, and this allows sharing of the grefs */

+#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG   (1ULL << 62)

+#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val)  \

+    (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val)

+#define XENMEM_SHARING_OP_FIELD_IS_GREF(field)         \

+    ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)

+#define XENMEM_SHARING_OP_FIELD_GET_GREF(field)        \

+    ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG))

+struct xen_mem_sharing_op {

+    uint8_t     op;     /* XENMEM_sharing_op_* */

+    domid_t     domain;

+    union {

+        struct mem_sharing_op_nominate {  /* OP_NOMINATE_xxx           */

+            union {

+                uint64_aligned_t gfn;     /* IN: gfn to nominate       */

+                uint32_t      grant_ref;  /* IN: grant ref to nominate */

+            } u;

+            uint64_aligned_t  handle;     /* OUT: the handle           */

+        } nominate;

+        struct mem_sharing_op_share {     /* OP_SHARE/ADD_PHYSMAP */

+            uint64_aligned_t source_gfn;    /* IN: the gfn of the source page */

+            uint64_aligned_t source_handle; /* IN: handle to the source page */

+            uint64_aligned_t client_gfn;    /* IN: the client gfn */

+            uint64_aligned_t client_handle; /* IN: handle to the client page */

+            domid_t  client_domain; /* IN: the client domain id */

+        } share;

+        struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */

+            union {

+                uint64_aligned_t gfn;      /* IN: gfn to debug          */

+                uint64_aligned_t mfn;      /* IN: mfn to debug          */

+                uint32_t gref;     /* IN: gref to debug         */

+            } u;

+        } debug;

+    } u;

+};

+typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);

+/*

+ * Attempt to stake a claim for a domain on a quantity of pages

+ * of system RAM, but _not_ assign specific pageframes.  Only

+ * arithmetic is performed so the hypercall is very fast and need

+ * not be preemptible, thus sidestepping time-of-check-time-of-use

+ * races for memory allocation.  Returns 0 if the hypervisor page

+ * allocator has atomically and successfully claimed the requested

+ * number of pages, else non-zero.

+ *

+ * Any domain may have only one active claim.  When sufficient memory

+ * has been allocated to resolve the claim, the claim silently expires.

+ * Claiming zero pages effectively resets any outstanding claim and

+ * is always successful.

+ *

+ * Note that a valid claim may be staked even after memory has been

+ * allocated for a domain.  In this case, the claim is not incremental,

+ * i.e. if the domain's tot_pages is 3, and a claim is staked for 10,

+ * only 7 additional pages are claimed.

+ *

+ * Caller must be privileged or the hypercall fails.

+ */

+#define XENMEM_claim_pages                  24

+/*

+ * XENMEM_claim_pages flags - the are no flags at this time.

+ * The zero value is appropiate.

+ */

+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

+#endif /* __XEN_PUBLIC_MEMORY_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/nmi.h

@@ -1,0 +1,85 @@

+/******************************************************************************

+ * nmi.h

+ *

+ * NMI callback registration and reason codes.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_NMI_H__

+#define __XEN_PUBLIC_NMI_H__

+#include "xen.h"

+/*

+ * NMI reason codes:

+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.

+ */

+ /* I/O-check error reported via ISA port 0x61, bit 6. */

+#define _XEN_NMIREASON_io_error     0

+#define XEN_NMIREASON_io_error      (1UL << _XEN_NMIREASON_io_error)

+ /* PCI SERR reported via ISA port 0x61, bit 7. */

+#define _XEN_NMIREASON_pci_serr     1

+#define XEN_NMIREASON_pci_serr      (1UL << _XEN_NMIREASON_pci_serr)

+#if __XEN_INTERFACE_VERSION__ < 0x00040300 /* legacy alias of the above */

+ /* Parity error reported via ISA port 0x61, bit 7. */

+#define _XEN_NMIREASON_parity_error 1

+#define XEN_NMIREASON_parity_error  (1UL << _XEN_NMIREASON_parity_error)

+#endif

+ /* Unknown hardware-generated NMI. */

+#define _XEN_NMIREASON_unknown      2

+#define XEN_NMIREASON_unknown       (1UL << _XEN_NMIREASON_unknown)

+/*

+ * long nmi_op(unsigned int cmd, void *arg)

+ * NB. All ops return zero on success, else a negative error code.

+ */

+/*

+ * Register NMI callback for this (calling) VCPU. Currently this only makes

+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.

+ * arg == pointer to xennmi_callback structure.

+ */

+#define XENNMI_register_callback   0

+struct xennmi_callback {

+    unsigned long handler_address;

+    unsigned long pad;

+};

+typedef struct xennmi_callback xennmi_callback_t;

+DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);

+/*

+ * Deregister NMI callback for this (calling) VCPU.

+ * arg == NULL.

+ */

+#define XENNMI_unregister_callback 1

+#endif /* __XEN_PUBLIC_NMI_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/physdev.h

@@ -1,0 +1,376 @@

+/*

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __XEN_PUBLIC_PHYSDEV_H__

+#define __XEN_PUBLIC_PHYSDEV_H__

+#include "xen.h"

+/*

+ * Prototype for this hypercall is:

+ *  int physdev_op(int cmd, void *args)

+ * @cmd  == PHYSDEVOP_??? (physdev operation).

+ * @args == Operation-specific extra arguments (NULL if none).

+ */

+/*

+ * Notify end-of-interrupt (EOI) for the specified IRQ.

+ * @arg == pointer to physdev_eoi structure.

+ */

+#define PHYSDEVOP_eoi                   12

+struct physdev_eoi {

+    /* IN */

+    uint32_t irq;

+};

+typedef struct physdev_eoi physdev_eoi_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);

+/*

+ * Register a shared page for the hypervisor to indicate whether the guest

+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly

+ * once the guest used this function in that the associated event channel

+ * will automatically get unmasked. The page registered is used as a bit

+ * array indexed by Xen's PIRQ value.

+ */

+#define PHYSDEVOP_pirq_eoi_gmfn_v1       17

+/*

+ * Register a shared page for the hypervisor to indicate whether the

+ * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to

+ * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of

+ * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by

+ * Xen's PIRQ value.

+ */

+#define PHYSDEVOP_pirq_eoi_gmfn_v2       28

+struct physdev_pirq_eoi_gmfn {

+    /* IN */

+    xen_pfn_t gmfn;

+};

+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);

+/*

+ * Query the status of an IRQ line.

+ * @arg == pointer to physdev_irq_status_query structure.

+ */

+#define PHYSDEVOP_irq_status_query       5

+struct physdev_irq_status_query {

+    /* IN */

+    uint32_t irq;

+    /* OUT */

+    uint32_t flags; /* XENIRQSTAT_* */

+};

+typedef struct physdev_irq_status_query physdev_irq_status_query_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);

+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */

+#define _XENIRQSTAT_needs_eoi   (0)

+#define  XENIRQSTAT_needs_eoi   (1U<<_XENIRQSTAT_needs_eoi)

+/* IRQ shared by multiple guests? */

+#define _XENIRQSTAT_shared      (1)

+#define  XENIRQSTAT_shared      (1U<<_XENIRQSTAT_shared)

+/*

+ * Set the current VCPU's I/O privilege level.

+ * @arg == pointer to physdev_set_iopl structure.

+ */

+#define PHYSDEVOP_set_iopl               6

+struct physdev_set_iopl {

+    /* IN */

+    uint32_t iopl;

+};

+typedef struct physdev_set_iopl physdev_set_iopl_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);

+/*

+ * Set the current VCPU's I/O-port permissions bitmap.

+ * @arg == pointer to physdev_set_iobitmap structure.

+ */

+#define PHYSDEVOP_set_iobitmap           7

+struct physdev_set_iobitmap {

+    /* IN */

+#if __XEN_INTERFACE_VERSION__ >= 0x00030205

+    XEN_GUEST_HANDLE(uint8) bitmap;

+#else

+    uint8_t *bitmap;

+#endif

+    uint32_t nr_ports;

+};

+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);

+/*

+ * Read or write an IO-APIC register.

+ * @arg == pointer to physdev_apic structure.

+ */

+#define PHYSDEVOP_apic_read              8

+#define PHYSDEVOP_apic_write             9

+struct physdev_apic {

+    /* IN */

+    unsigned long apic_physbase;

+    uint32_t reg;

+    /* IN or OUT */

+    uint32_t value;

+};

+typedef struct physdev_apic physdev_apic_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);

+/*

+ * Allocate or free a physical upcall vector for the specified IRQ line.

+ * @arg == pointer to physdev_irq structure.

+ */

+#define PHYSDEVOP_alloc_irq_vector      10

+#define PHYSDEVOP_free_irq_vector       11

+struct physdev_irq {

+    /* IN */

+    uint32_t irq;

+    /* IN or OUT */

+    uint32_t vector;

+};

+typedef struct physdev_irq physdev_irq_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);

+#define MAP_PIRQ_TYPE_MSI               0x0

+#define MAP_PIRQ_TYPE_GSI               0x1

+#define MAP_PIRQ_TYPE_UNKNOWN           0x2

+#define MAP_PIRQ_TYPE_MSI_SEG           0x3

+#define PHYSDEVOP_map_pirq               13

+struct physdev_map_pirq {

+    domid_t domid;

+    /* IN */

+    int type;

+    /* IN */

+    int index;

+    /* IN or OUT */

+    int pirq;

+    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */

+    int bus;

+    /* IN */

+    int devfn;

+    /* IN */

+    int entry_nr;

+    /* IN */

+    uint64_t table_base;

+};

+typedef struct physdev_map_pirq physdev_map_pirq_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);

+#define PHYSDEVOP_unmap_pirq             14

+struct physdev_unmap_pirq {

+    domid_t domid;

+    /* IN */

+    int pirq;

+};

+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);

+#define PHYSDEVOP_manage_pci_add         15

+#define PHYSDEVOP_manage_pci_remove      16

+struct physdev_manage_pci {

+    /* IN */

+    uint8_t bus;

+    uint8_t devfn;

+};

+typedef struct physdev_manage_pci physdev_manage_pci_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);

+#define PHYSDEVOP_restore_msi            19

+struct physdev_restore_msi {

+    /* IN */

+    uint8_t bus;

+    uint8_t devfn;

+};

+typedef struct physdev_restore_msi physdev_restore_msi_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);

+#define PHYSDEVOP_manage_pci_add_ext     20

+struct physdev_manage_pci_ext {

+    /* IN */

+    uint8_t bus;

+    uint8_t devfn;

+    unsigned is_extfn;

+    unsigned is_virtfn;

+    struct {

+        uint8_t bus;

+        uint8_t devfn;

+    } physfn;

+};

+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);

+/*

+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()

+ * hypercall since 0x00030202.

+ */

+struct physdev_op {

+    uint32_t cmd;

+    union {

+        struct physdev_irq_status_query      irq_status_query;

+        struct physdev_set_iopl              set_iopl;

+        struct physdev_set_iobitmap          set_iobitmap;

+        struct physdev_apic                  apic_op;

+        struct physdev_irq                   irq_op;

+    } u;

+};

+typedef struct physdev_op physdev_op_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);

+#define PHYSDEVOP_setup_gsi    21

+struct physdev_setup_gsi {

+    int gsi;

+    /* IN */

+    uint8_t triggering;

+    /* IN */

+    uint8_t polarity;

+    /* IN */

+};

+typedef struct physdev_setup_gsi physdev_setup_gsi_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t);

+/* leave PHYSDEVOP 22 free */

+/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI

+ * the hypercall returns a free pirq */

+#define PHYSDEVOP_get_free_pirq    23

+struct physdev_get_free_pirq {

+    /* IN */

+    int type;

+    /* OUT */

+    uint32_t pirq;

+};

+typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);

+#define XEN_PCI_MMCFG_RESERVED         0x1

+#define PHYSDEVOP_pci_mmcfg_reserved    24

+struct physdev_pci_mmcfg_reserved {

+    uint64_t address;

+    uint16_t segment;

+    uint8_t start_bus;

+    uint8_t end_bus;

+    uint32_t flags;

+};

+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);

+#define XEN_PCI_DEV_EXTFN              0x1

+#define XEN_PCI_DEV_VIRTFN             0x2

+#define XEN_PCI_DEV_PXM                0x4

+#define PHYSDEVOP_pci_device_add        25

+struct physdev_pci_device_add {

+    /* IN */

+    uint16_t seg;

+    uint8_t bus;

+    uint8_t devfn;

+    uint32_t flags;

+    struct {

+        uint8_t bus;

+        uint8_t devfn;

+    } physfn;

+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L

+    uint32_t optarr[];

+#elif defined(__GNUC__)

+    uint32_t optarr[0];

+#endif

+};

+typedef struct physdev_pci_device_add physdev_pci_device_add_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);

+#define PHYSDEVOP_pci_device_remove     26

+#define PHYSDEVOP_restore_msi_ext       27

+/*

+ * Dom0 should use these two to announce MMIO resources assigned to

+ * MSI-X capable devices won't (prepare) or may (release) change.

+ */

+#define PHYSDEVOP_prepare_msix          30

+#define PHYSDEVOP_release_msix          31

+struct physdev_pci_device {

+    /* IN */

+    uint16_t seg;

+    uint8_t bus;

+    uint8_t devfn;

+};

+typedef struct physdev_pci_device physdev_pci_device_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);

+#define PHYSDEVOP_DBGP_RESET_PREPARE    1

+#define PHYSDEVOP_DBGP_RESET_DONE       2

+#define PHYSDEVOP_DBGP_BUS_UNKNOWN      0

+#define PHYSDEVOP_DBGP_BUS_PCI          1

+#define PHYSDEVOP_dbgp_op               29

+struct physdev_dbgp_op {

+    /* IN */

+    uint8_t op;

+    uint8_t bus;

+    union {

+        struct physdev_pci_device pci;

+    } u;

+};

+typedef struct physdev_dbgp_op physdev_dbgp_op_t;

+DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t);

+/*

+ * Notify that some PIRQ-bound event channels have been unmasked.

+ * ** This command is obsolete since interface version 0x00030202 and is **

+ * ** unsupported by newer versions of Xen.                              **

+ */

+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY      4

+/*

+ * These all-capitals physdev operation names are superceded by the new names

+ * (defined above) since interface version 0x00030202.

+ */

+#define PHYSDEVOP_IRQ_STATUS_QUERY       PHYSDEVOP_irq_status_query

+#define PHYSDEVOP_SET_IOPL               PHYSDEVOP_set_iopl

+#define PHYSDEVOP_SET_IOBITMAP           PHYSDEVOP_set_iobitmap

+#define PHYSDEVOP_APIC_READ              PHYSDEVOP_apic_read

+#define PHYSDEVOP_APIC_WRITE             PHYSDEVOP_apic_write

+#define PHYSDEVOP_ASSIGN_VECTOR          PHYSDEVOP_alloc_irq_vector

+#define PHYSDEVOP_FREE_VECTOR            PHYSDEVOP_free_irq_vector

+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi

+#define PHYSDEVOP_IRQ_SHARED             XENIRQSTAT_shared

+#if __XEN_INTERFACE_VERSION__ < 0x00040200

+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1

+#else

+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2

+#endif

+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/platform.h

@@ -1,0 +1,572 @@

+/******************************************************************************

+ * platform.h

+ *

+ * Hardware platform operations. Intended for use by domain-0 kernel.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2002-2006, K Fraser

+ */

+#ifndef __XEN_PUBLIC_PLATFORM_H__

+#define __XEN_PUBLIC_PLATFORM_H__

+#include "xen.h"

+#define XENPF_INTERFACE_VERSION 0x03000001

+/*

+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,

+ * 1 January, 1970 if the current system time was <system_time>.

+ */

+#define XENPF_settime             17

+struct xenpf_settime {

+    /* IN variables. */

+    uint32_t secs;

+    uint32_t nsecs;

+    uint64_t system_time;

+};

+typedef struct xenpf_settime xenpf_settime_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t);

+/*

+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.

+ * On x86, @type is an architecture-defined MTRR memory type.

+ * On success, returns the MTRR that was used (@reg) and a handle that can

+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.

+ * (x86-specific).

+ */

+#define XENPF_add_memtype         31

+struct xenpf_add_memtype {

+    /* IN variables. */

+    xen_pfn_t mfn;

+    uint64_t nr_mfns;

+    uint32_t type;

+    /* OUT variables. */

+    uint32_t handle;

+    uint32_t reg;

+};

+typedef struct xenpf_add_memtype xenpf_add_memtype_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t);

+/*

+ * Tear down an existing memory-range type. If @handle is remembered then it

+ * should be passed in to accurately tear down the correct setting (in case

+ * of overlapping memory regions with differing types). If it is not known

+ * then @handle should be set to zero. In all cases @reg must be set.

+ * (x86-specific).

+ */

+#define XENPF_del_memtype         32

+struct xenpf_del_memtype {

+    /* IN variables. */

+    uint32_t handle;

+    uint32_t reg;

+};

+typedef struct xenpf_del_memtype xenpf_del_memtype_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t);

+/* Read current type of an MTRR (x86-specific). */

+#define XENPF_read_memtype        33

+struct xenpf_read_memtype {

+    /* IN variables. */

+    uint32_t reg;

+    /* OUT variables. */

+    xen_pfn_t mfn;

+    uint64_t nr_mfns;

+    uint32_t type;

+};

+typedef struct xenpf_read_memtype xenpf_read_memtype_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t);

+#define XENPF_microcode_update    35

+struct xenpf_microcode_update {

+    /* IN variables. */

+    XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */

+    uint32_t length;                  /* Length of microcode data. */

+};

+typedef struct xenpf_microcode_update xenpf_microcode_update_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t);

+#define XENPF_platform_quirk      39

+#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */

+#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */

+#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */

+struct xenpf_platform_quirk {

+    /* IN variables. */

+    uint32_t quirk_id;

+};

+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);

+#define XENPF_efi_runtime_call    49

+#define XEN_EFI_get_time                      1

+#define XEN_EFI_set_time                      2

+#define XEN_EFI_get_wakeup_time               3

+#define XEN_EFI_set_wakeup_time               4

+#define XEN_EFI_get_next_high_monotonic_count 5

+#define XEN_EFI_get_variable                  6

+#define XEN_EFI_set_variable                  7

+#define XEN_EFI_get_next_variable_name        8

+#define XEN_EFI_query_variable_info           9

+#define XEN_EFI_query_capsule_capabilities   10

+#define XEN_EFI_update_capsule               11

+struct xenpf_efi_runtime_call {

+    uint32_t function;

+    /*

+     * This field is generally used for per sub-function flags (defined

+     * below), except for the XEN_EFI_get_next_high_monotonic_count case,

+     * where it holds the single returned value.

+     */

+    uint32_t misc;

+    unsigned long status;

+    union {

+#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001

+        struct {

+            struct xenpf_efi_time {

+                uint16_t year;

+                uint8_t month;

+                uint8_t day;

+                uint8_t hour;

+                uint8_t min;

+                uint8_t sec;

+                uint32_t ns;

+                int16_t tz;

+                uint8_t daylight;

+            } time;

+            uint32_t resolution;

+            uint32_t accuracy;

+        } get_time;

+        struct xenpf_efi_time set_time;

+#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001

+#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002

+        struct xenpf_efi_time get_wakeup_time;

+#define XEN_EFI_SET_WAKEUP_TIME_ENABLE      0x00000001

+#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002

+        struct xenpf_efi_time set_wakeup_time;

+#define XEN_EFI_VARIABLE_NON_VOLATILE       0x00000001

+#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002

+#define XEN_EFI_VARIABLE_RUNTIME_ACCESS     0x00000004

+        struct {

+            XEN_GUEST_HANDLE(void) name;  /* UCS-2/UTF-16 string */

+            unsigned long size;

+            XEN_GUEST_HANDLE(void) data;

+            struct xenpf_efi_guid {

+                uint32_t data1;

+                uint16_t data2;

+                uint16_t data3;

+                uint8_t data4[8];

+            } vendor_guid;

+        } get_variable, set_variable;

+        struct {

+            unsigned long size;

+            XEN_GUEST_HANDLE(void) name;  /* UCS-2/UTF-16 string */

+            struct xenpf_efi_guid vendor_guid;

+        } get_next_variable_name;

+#define XEN_EFI_VARINFO_BOOT_SNAPSHOT       0x00000001

+        struct {

+            uint32_t attr;

+            uint64_t max_store_size;

+            uint64_t remain_store_size;

+            uint64_t max_size;

+        } query_variable_info;

+        struct {

+            XEN_GUEST_HANDLE(void) capsule_header_array;

+            unsigned long capsule_count;

+            uint64_t max_capsule_size;

+            unsigned int reset_type;

+        } query_capsule_capabilities;

+        struct {

+            XEN_GUEST_HANDLE(void) capsule_header_array;

+            unsigned long capsule_count;

+            uint64_t sg_list; /* machine address */

+        } update_capsule;

+    } u;

+};

+typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t);

+#define XENPF_firmware_info       50

+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */

+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */

+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */

+#define XEN_FW_EFI_INFO           4 /* from EFI */

+#define  XEN_FW_EFI_VERSION        0

+#define  XEN_FW_EFI_CONFIG_TABLE   1

+#define  XEN_FW_EFI_VENDOR         2

+#define  XEN_FW_EFI_MEM_INFO       3

+#define  XEN_FW_EFI_RT_VERSION     4

+#define  XEN_FW_EFI_PCI_ROM        5

+#define XEN_FW_KBD_SHIFT_FLAGS    5

+struct xenpf_firmware_info {

+    /* IN variables. */

+    uint32_t type;

+    uint32_t index;

+    /* OUT variables. */

+    union {

+        struct {

+            /* Int13, Fn48: Check Extensions Present. */

+            uint8_t device;                   /* %dl: bios device number */

+            uint8_t version;                  /* %ah: major version      */

+            uint16_t interface_support;       /* %cx: support bitmap     */

+            /* Int13, Fn08: Legacy Get Device Parameters. */

+            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */

+            uint8_t legacy_max_head;          /* %dh: max head #         */

+            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */

+            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */

+            /* NB. First uint16_t of buffer must be set to buffer size.      */

+            XEN_GUEST_HANDLE(void) edd_params;

+        } disk_info; /* XEN_FW_DISK_INFO */

+        struct {

+            uint8_t device;                   /* bios device number  */

+            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */

+        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */

+        struct {

+            /* Int10, AX=4F15: Get EDID info. */

+            uint8_t capabilities;

+            uint8_t edid_transfer_time;

+            /* must refer to 128-byte buffer */

+            XEN_GUEST_HANDLE(uint8) edid;

+        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */

+        union xenpf_efi_info {

+            uint32_t version;

+            struct {

+                uint64_t addr;                /* EFI_CONFIGURATION_TABLE */

+                uint32_t nent;

+            } cfg;

+            struct {

+                uint32_t revision;

+                uint32_t bufsz;               /* input, in bytes */

+                XEN_GUEST_HANDLE(void) name;  /* UCS-2/UTF-16 string */

+            } vendor;

+            struct {

+                uint64_t addr;

+                uint64_t size;

+                uint64_t attr;

+                uint32_t type;

+            } mem;

+            struct {

+                /* IN variables */

+                uint16_t segment;

+                uint8_t bus;

+                uint8_t devfn;

+                uint16_t vendor;

+                uint16_t devid;

+                /* OUT variables */

+                uint64_t address;

+                xen_ulong_t size;

+            } pci_rom;

+        } efi_info; /* XEN_FW_EFI_INFO */

+        /* Int16, Fn02: Get keyboard shift flags. */

+        uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */

+    } u;

+};

+typedef struct xenpf_firmware_info xenpf_firmware_info_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);

+#define XENPF_enter_acpi_sleep    51

+struct xenpf_enter_acpi_sleep {

+    /* IN variables */

+#if __XEN_INTERFACE_VERSION__ < 0x00040300

+    uint16_t pm1a_cnt_val;      /* PM1a control value. */

+    uint16_t pm1b_cnt_val;      /* PM1b control value. */

+#else

+    uint16_t val_a;             /* PM1a control / sleep type A. */

+    uint16_t val_b;             /* PM1b control / sleep type B. */

+#endif

+    uint32_t sleep_state;       /* Which state to enter (Sn). */

+#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001

+    uint32_t flags;             /* XENPF_ACPI_SLEEP_*. */

+};

+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);

+#define XENPF_change_freq         52

+struct xenpf_change_freq {

+    /* IN variables */

+    uint32_t flags; /* Must be zero. */

+    uint32_t cpu;   /* Physical cpu. */

+    uint64_t freq;  /* New frequency (Hz). */

+};

+typedef struct xenpf_change_freq xenpf_change_freq_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);

+/*

+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the

+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is

+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap

+ * bit set are written to. On return, @cpumap_bitmap is modified so that any

+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry

+ * cleared.

+ */

+#define XENPF_getidletime         53

+struct xenpf_getidletime {

+    /* IN/OUT variables */

+    /* IN: CPUs to interrogate; OUT: subset of IN which are present */

+    XEN_GUEST_HANDLE(uint8) cpumap_bitmap;

+    /* IN variables */

+    /* Size of cpumap bitmap. */

+    uint32_t cpumap_nr_cpus;

+    /* Must be indexable for every cpu in cpumap_bitmap. */

+    XEN_GUEST_HANDLE(uint64) idletime;

+    /* OUT variables */

+    /* System time when the idletime snapshots were taken. */

+    uint64_t now;

+};

+typedef struct xenpf_getidletime xenpf_getidletime_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);

+#define XENPF_set_processor_pminfo      54

+/* ability bits */

+#define XEN_PROCESSOR_PM_CX	1

+#define XEN_PROCESSOR_PM_PX	2

+#define XEN_PROCESSOR_PM_TX	4

+/* cmd type */

+#define XEN_PM_CX   0

+#define XEN_PM_PX   1

+#define XEN_PM_TX   2

+#define XEN_PM_PDC  3

+/* Px sub info type */

+#define XEN_PX_PCT   1

+#define XEN_PX_PSS   2

+#define XEN_PX_PPC   4

+#define XEN_PX_PSD   8

+struct xen_power_register {

+    uint32_t     space_id;

+    uint32_t     bit_width;

+    uint32_t     bit_offset;

+    uint32_t     access_size;

+    uint64_t     address;

+};

+struct xen_processor_csd {

+    uint32_t    domain;      /* domain number of one dependent group */

+    uint32_t    coord_type;  /* coordination type */

+    uint32_t    num;         /* number of processors in same domain */

+};

+typedef struct xen_processor_csd xen_processor_csd_t;

+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);

+struct xen_processor_cx {

+    struct xen_power_register  reg; /* GAS for Cx trigger register */

+    uint8_t     type;     /* cstate value, c0: 0, c1: 1, ... */

+    uint32_t    latency;  /* worst latency (ms) to enter/exit this cstate */

+    uint32_t    power;    /* average power consumption(mW) */

+    uint32_t    dpcnt;    /* number of dependency entries */

+    XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */

+};

+typedef struct xen_processor_cx xen_processor_cx_t;

+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);

+struct xen_processor_flags {

+    uint32_t bm_control:1;

+    uint32_t bm_check:1;

+    uint32_t has_cst:1;

+    uint32_t power_setup_done:1;

+    uint32_t bm_rld_set:1;

+};

+struct xen_processor_power {

+    uint32_t count;  /* number of C state entries in array below */

+    struct xen_processor_flags flags;  /* global flags of this processor */

+    XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */

+};

+struct xen_pct_register {

+    uint8_t  descriptor;

+    uint16_t length;

+    uint8_t  space_id;

+    uint8_t  bit_width;

+    uint8_t  bit_offset;

+    uint8_t  reserved;

+    uint64_t address;

+};

+struct xen_processor_px {

+    uint64_t core_frequency; /* megahertz */

+    uint64_t power;      /* milliWatts */

+    uint64_t transition_latency; /* microseconds */

+    uint64_t bus_master_latency; /* microseconds */

+    uint64_t control;        /* control value */

+    uint64_t status;     /* success indicator */

+};

+typedef struct xen_processor_px xen_processor_px_t;

+DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);

+struct xen_psd_package {

+    uint64_t num_entries;

+    uint64_t revision;

+    uint64_t domain;

+    uint64_t coord_type;

+    uint64_t num_processors;

+};

+struct xen_processor_performance {

+    uint32_t flags;     /* flag for Px sub info type */

+    uint32_t platform_limit;  /* Platform limitation on freq usage */

+    struct xen_pct_register control_register;

+    struct xen_pct_register status_register;

+    uint32_t state_count;     /* total available performance states */

+    XEN_GUEST_HANDLE(xen_processor_px_t) states;

+    struct xen_psd_package domain_info;

+    uint32_t shared_type;     /* coordination type of this processor */

+};

+typedef struct xen_processor_performance xen_processor_performance_t;

+DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);

+struct xenpf_set_processor_pminfo {

+    /* IN variables */

+    uint32_t id;    /* ACPI CPU ID */

+    uint32_t type;  /* {XEN_PM_CX, XEN_PM_PX} */

+    union {

+        struct xen_processor_power          power;/* Cx: _CST/_CSD */

+        struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */

+        XEN_GUEST_HANDLE(uint32)            pdc;  /* _PDC */

+    } u;

+};

+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);

+#define XENPF_get_cpuinfo 55

+struct xenpf_pcpuinfo {

+    /* IN */

+    uint32_t xen_cpuid;

+    /* OUT */

+    /* The maxium cpu_id that is present */

+    uint32_t max_present;

+#define XEN_PCPU_FLAGS_ONLINE   1

+    /* Correponding xen_cpuid is not present*/

+#define XEN_PCPU_FLAGS_INVALID  2

+    uint32_t flags;

+    uint32_t apic_id;

+    uint32_t acpi_id;

+};

+typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t);

+#define XENPF_get_cpu_version 48

+struct xenpf_pcpu_version {

+    /* IN */

+    uint32_t xen_cpuid;

+    /* OUT */

+    /* The maxium cpu_id that is present */

+    uint32_t max_present;

+    char vendor_id[12];

+    uint32_t family;

+    uint32_t model;

+    uint32_t stepping;

+};

+typedef struct xenpf_pcpu_version xenpf_pcpu_version_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t);

+#define XENPF_cpu_online    56

+#define XENPF_cpu_offline   57

+struct xenpf_cpu_ol

+{

+    uint32_t cpuid;

+};

+typedef struct xenpf_cpu_ol xenpf_cpu_ol_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t);

+#define XENPF_cpu_hotadd    58

+struct xenpf_cpu_hotadd

+{

+	uint32_t apic_id;

+	uint32_t acpi_id;

+	uint32_t pxm;

+};

+#define XENPF_mem_hotadd    59

+struct xenpf_mem_hotadd

+{

+    uint64_t spfn;

+    uint64_t epfn;

+    uint32_t pxm;

+    uint32_t flags;

+};

+#define XENPF_core_parking  60

+#define XEN_CORE_PARKING_SET 1

+#define XEN_CORE_PARKING_GET 2

+struct xenpf_core_parking {

+    /* IN variables */

+    uint32_t type;

+    /* IN variables:  set cpu nums expected to be idled */

+    /* OUT variables: get cpu nums actually be idled */

+    uint32_t idle_nums;

+};

+typedef struct xenpf_core_parking xenpf_core_parking_t;

+DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_platform_op(const struct xen_platform_op*);

+ */

+struct xen_platform_op {

+    uint32_t cmd;

+    uint32_t interface_version; /* XENPF_INTERFACE_VERSION */

+    union {

+        struct xenpf_settime           settime;

+        struct xenpf_add_memtype       add_memtype;

+        struct xenpf_del_memtype       del_memtype;

+        struct xenpf_read_memtype      read_memtype;

+        struct xenpf_microcode_update  microcode;

+        struct xenpf_platform_quirk    platform_quirk;

+        struct xenpf_efi_runtime_call  efi_runtime_call;

+        struct xenpf_firmware_info     firmware_info;

+        struct xenpf_enter_acpi_sleep  enter_acpi_sleep;

+        struct xenpf_change_freq       change_freq;

+        struct xenpf_getidletime       getidletime;

+        struct xenpf_set_processor_pminfo set_pminfo;

+        struct xenpf_pcpuinfo          pcpu_info;

+        struct xenpf_pcpu_version      pcpu_version;

+        struct xenpf_cpu_ol            cpu_ol;

+        struct xenpf_cpu_hotadd        cpu_add;

+        struct xenpf_mem_hotadd        mem_add;

+        struct xenpf_core_parking      core_parking;

+        uint8_t                        pad[128];

+    } u;

+};

+typedef struct xen_platform_op xen_platform_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t);

+#endif /* __XEN_PUBLIC_PLATFORM_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/sched.h

@@ -1,0 +1,174 @@

+/******************************************************************************

+ * sched.h

+ *

+ * Scheduler state interactions

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_SCHED_H__

+#define __XEN_PUBLIC_SCHED_H__

+#include "event_channel.h"

+/*

+ * `incontents 150 sched Guest Scheduler Operations

+ *

+ * The SCHEDOP interface provides mechanisms for a guest to interact

+ * with the scheduler, including yield, blocking and shutting itself

+ * down.

+ */

+/*

+ * The prototype for this hypercall is:

+ * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)

+ *

+ * @cmd == SCHEDOP_??? (scheduler operation).

+ * @arg == Operation-specific extra argument(s), as described below.

+ * ...  == Additional Operation-specific extra arguments, described below.

+ *

+ * Versions of Xen prior to 3.0.2 provided only the following legacy version

+ * of this hypercall, supporting only the commands yield, block and shutdown:

+ *  long sched_op(int cmd, unsigned long arg)

+ * @cmd == SCHEDOP_??? (scheduler operation).

+ * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)

+ *      == SHUTDOWN_* code (SCHEDOP_shutdown)

+ *

+ * This legacy version is available to new guests as:

+ * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)

+ */

+/* ` enum sched_op { // SCHEDOP_* => struct sched_* */

+/*

+ * Voluntarily yield the CPU.

+ * @arg == NULL.

+ */

+#define SCHEDOP_yield       0

+/*

+ * Block execution of this VCPU until an event is received for processing.

+ * If called with event upcalls masked, this operation will atomically

+ * reenable event delivery and check for pending events before blocking the

+ * VCPU. This avoids a "wakeup waiting" race.

+ * @arg == NULL.

+ */

+#define SCHEDOP_block       1

+/*

+ * Halt execution of this domain (all VCPUs) and notify the system controller.

+ * @arg == pointer to sched_shutdown_t structure.

+ *

+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then this

+ * hypercall takes an additional extra argument which should be the

+ * MFN of the guest's start_info_t.

+ *

+ * In addition, which reason is SHUTDOWN_suspend this hypercall

+ * returns 1 if suspend was cancelled or the domain was merely

+ * checkpointed, and 0 if it is resuming in a new domain.

+ */

+#define SCHEDOP_shutdown    2

+/*

+ * Poll a set of event-channel ports. Return when one or more are pending. An

+ * optional timeout may be specified.

+ * @arg == pointer to sched_poll_t structure.

+ */

+#define SCHEDOP_poll        3

+/*

+ * Declare a shutdown for another domain. The main use of this function is

+ * in interpreting shutdown requests and reasons for fully-virtualized

+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.

+ * @arg == pointer to sched_remote_shutdown_t structure.

+ */

+#define SCHEDOP_remote_shutdown        4

+/*

+ * Latch a shutdown code, so that when the domain later shuts down it

+ * reports this code to the control tools.

+ * @arg == sched_shutdown_t, as for SCHEDOP_shutdown.

+ */

+#define SCHEDOP_shutdown_code 5

+/*

+ * Setup, poke and destroy a domain watchdog timer.

+ * @arg == pointer to sched_watchdog_t structure.

+ * With id == 0, setup a domain watchdog timer to cause domain shutdown

+ *               after timeout, returns watchdog id.

+ * With id != 0 and timeout == 0, destroy domain watchdog timer.

+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.

+ */

+#define SCHEDOP_watchdog    6

+/* ` } */

+struct sched_shutdown {

+    unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */

+};

+typedef struct sched_shutdown sched_shutdown_t;

+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);

+struct sched_poll {

+    XEN_GUEST_HANDLE(evtchn_port_t) ports;

+    unsigned int nr_ports;

+    uint64_t timeout;

+};

+typedef struct sched_poll sched_poll_t;

+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);

+struct sched_remote_shutdown {

+    domid_t domain_id;         /* Remote domain ID */

+    unsigned int reason;       /* SHUTDOWN_* => enum sched_shutdown_reason */

+};

+typedef struct sched_remote_shutdown sched_remote_shutdown_t;

+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);

+struct sched_watchdog {

+    uint32_t id;                /* watchdog ID */

+    uint32_t timeout;           /* timeout */

+};

+typedef struct sched_watchdog sched_watchdog_t;

+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);

+/*

+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control

+ * software to determine the appropriate action. For the most part, Xen does

+ * not care about the shutdown code.

+ */

+/* ` enum sched_shutdown_reason { */

+#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */

+#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */

+#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */

+#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */

+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */

+#define SHUTDOWN_MAX        4  /* Maximum valid shutdown reason.             */

+/* ` } */

+#endif /* __XEN_PUBLIC_SCHED_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/sysctl.h

@@ -1,0 +1,694 @@

+/******************************************************************************

+ * sysctl.h

+ *

+ * System management operations. For use by node control stack.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2002-2006, K Fraser

+ */

+#ifndef __XEN_PUBLIC_SYSCTL_H__

+#define __XEN_PUBLIC_SYSCTL_H__

+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)

+#error "sysctl operations are intended for use by node control tools only"

+#endif

+#include "xen.h"

+#include "domctl.h"

+#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A

+/*

+ * Read console content from Xen buffer ring.

+ */

+/* XEN_SYSCTL_readconsole */

+struct xen_sysctl_readconsole {

+    /* IN: Non-zero -> clear after reading. */

+    uint8_t clear;

+    /* IN: Non-zero -> start index specified by @index field. */

+    uint8_t incremental;

+    uint8_t pad0, pad1;

+    /*

+     * IN:  Start index for consuming from ring buffer (if @incremental);

+     * OUT: End index after consuming from ring buffer.

+     */

+    uint32_t index;

+    /* IN: Virtual address to write console data. */

+    XEN_GUEST_HANDLE_64(char) buffer;

+    /* IN: Size of buffer; OUT: Bytes written to buffer. */

+    uint32_t count;

+};

+typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);

+/* Get trace buffers machine base address */

+/* XEN_SYSCTL_tbuf_op */

+struct xen_sysctl_tbuf_op {

+    /* IN variables */

+#define XEN_SYSCTL_TBUFOP_get_info     0

+#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1

+#define XEN_SYSCTL_TBUFOP_set_evt_mask 2

+#define XEN_SYSCTL_TBUFOP_set_size     3

+#define XEN_SYSCTL_TBUFOP_enable       4

+#define XEN_SYSCTL_TBUFOP_disable      5

+    uint32_t cmd;

+    /* IN/OUT variables */

+    struct xenctl_bitmap cpu_mask;

+    uint32_t             evt_mask;

+    /* OUT variables */

+    uint64_aligned_t buffer_mfn;

+    uint32_t size;  /* Also an IN variable! */

+};

+typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);

+/*

+ * Get physical information about the host machine

+ */

+/* XEN_SYSCTL_physinfo */

+ /* (x86) The platform supports HVM guests. */

+#define _XEN_SYSCTL_PHYSCAP_hvm          0

+#define XEN_SYSCTL_PHYSCAP_hvm           (1u<<_XEN_SYSCTL_PHYSCAP_hvm)

+ /* (x86) The platform supports HVM-guest direct access to I/O devices. */

+#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1

+#define XEN_SYSCTL_PHYSCAP_hvm_directio  (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio)

+struct xen_sysctl_physinfo {

+    uint32_t threads_per_core;

+    uint32_t cores_per_socket;

+    uint32_t nr_cpus;     /* # CPUs currently online */

+    uint32_t max_cpu_id;  /* Largest possible CPU ID on this host */

+    uint32_t nr_nodes;    /* # nodes currently online */

+    uint32_t max_node_id; /* Largest possible node ID on this host */

+    uint32_t cpu_khz;

+    uint64_aligned_t total_pages;

+    uint64_aligned_t free_pages;

+    uint64_aligned_t scrub_pages;

+    uint64_aligned_t outstanding_pages;

+    uint32_t hw_cap[8];

+    /* XEN_SYSCTL_PHYSCAP_??? */

+    uint32_t capabilities;

+};

+typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);

+/*

+ * Get the ID of the current scheduler.

+ */

+/* XEN_SYSCTL_sched_id */

+struct xen_sysctl_sched_id {

+    /* OUT variable */

+    uint32_t sched_id;

+};

+typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t);

+/* Interface for controlling Xen software performance counters. */

+/* XEN_SYSCTL_perfc_op */

+/* Sub-operations: */

+#define XEN_SYSCTL_PERFCOP_reset 1   /* Reset all counters to zero. */

+#define XEN_SYSCTL_PERFCOP_query 2   /* Get perfctr information. */

+struct xen_sysctl_perfc_desc {

+    char         name[80];             /* name of perf counter */

+    uint32_t     nr_vals;              /* number of values for this counter */

+};

+typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t);

+typedef uint32_t xen_sysctl_perfc_val_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t);

+struct xen_sysctl_perfc_op {

+    /* IN variables. */

+    uint32_t       cmd;                /*  XEN_SYSCTL_PERFCOP_??? */

+    /* OUT variables. */

+    uint32_t       nr_counters;       /*  number of counters description  */

+    uint32_t       nr_vals;           /*  number of values  */

+    /* counter information (or NULL) */

+    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;

+    /* counter values (or NULL) */

+    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;

+};

+typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);

+/* XEN_SYSCTL_getdomaininfolist */

+struct xen_sysctl_getdomaininfolist {

+    /* IN variables. */

+    domid_t               first_domain;

+    uint32_t              max_domains;

+    XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;

+    /* OUT variables. */

+    uint32_t              num_domains;

+};

+typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);

+/* Inject debug keys into Xen. */

+/* XEN_SYSCTL_debug_keys */

+struct xen_sysctl_debug_keys {

+    /* IN variables. */

+    XEN_GUEST_HANDLE_64(char) keys;

+    uint32_t nr_keys;

+};

+typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);

+/* Get physical CPU information. */

+/* XEN_SYSCTL_getcpuinfo */

+struct xen_sysctl_cpuinfo {

+    uint64_aligned_t idletime;

+};

+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);

+struct xen_sysctl_getcpuinfo {

+    /* IN variables. */

+    uint32_t max_cpus;

+    XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;

+    /* OUT variables. */

+    uint32_t nr_cpus;

+};

+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);

+/* XEN_SYSCTL_availheap */

+struct xen_sysctl_availheap {

+    /* IN variables. */

+    uint32_t min_bitwidth;  /* Smallest address width (zero if don't care). */

+    uint32_t max_bitwidth;  /* Largest address width (zero if don't care). */

+    int32_t  node;          /* NUMA node of interest (-1 for all nodes). */

+    /* OUT variables. */

+    uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */

+};

+typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);

+/* XEN_SYSCTL_get_pmstat */

+struct pm_px_val {

+    uint64_aligned_t freq;        /* Px core frequency */

+    uint64_aligned_t residency;   /* Px residency time */

+    uint64_aligned_t count;       /* Px transition count */

+};

+typedef struct pm_px_val pm_px_val_t;

+DEFINE_XEN_GUEST_HANDLE(pm_px_val_t);

+struct pm_px_stat {

+    uint8_t total;        /* total Px states */

+    uint8_t usable;       /* usable Px states */

+    uint8_t last;         /* last Px state */

+    uint8_t cur;          /* current Px state */

+    XEN_GUEST_HANDLE_64(uint64) trans_pt;   /* Px transition table */

+    XEN_GUEST_HANDLE_64(pm_px_val_t) pt;

+};

+typedef struct pm_px_stat pm_px_stat_t;

+DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);

+struct pm_cx_stat {

+    uint32_t nr;    /* entry nr in triggers & residencies, including C0 */

+    uint32_t last;  /* last Cx state */

+    uint64_aligned_t idle_time;                 /* idle time from boot */

+    XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */

+    XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */

+    uint64_aligned_t pc2;

+    uint64_aligned_t pc3;

+    uint64_aligned_t pc6;

+    uint64_aligned_t pc7;

+    uint64_aligned_t cc3;

+    uint64_aligned_t cc6;

+    uint64_aligned_t cc7;

+};

+struct xen_sysctl_get_pmstat {

+#define PMSTAT_CATEGORY_MASK 0xf0

+#define PMSTAT_PX            0x10

+#define PMSTAT_CX            0x20

+#define PMSTAT_get_max_px    (PMSTAT_PX | 0x1)

+#define PMSTAT_get_pxstat    (PMSTAT_PX | 0x2)

+#define PMSTAT_reset_pxstat  (PMSTAT_PX | 0x3)

+#define PMSTAT_get_max_cx    (PMSTAT_CX | 0x1)

+#define PMSTAT_get_cxstat    (PMSTAT_CX | 0x2)

+#define PMSTAT_reset_cxstat  (PMSTAT_CX | 0x3)

+    uint32_t type;

+    uint32_t cpuid;

+    union {

+        struct pm_px_stat getpx;

+        struct pm_cx_stat getcx;

+        /* other struct for tx, etc */

+    } u;

+};

+typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);

+/* XEN_SYSCTL_cpu_hotplug */

+struct xen_sysctl_cpu_hotplug {

+    /* IN variables */

+    uint32_t cpu;   /* Physical cpu. */

+#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE  0

+#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1

+    uint32_t op;    /* hotplug opcode */

+};

+typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);

+/*

+ * Get/set xen power management, include

+ * 1. cpufreq governors and related parameters

+ */

+/* XEN_SYSCTL_pm_op */

+struct xen_userspace {

+    uint32_t scaling_setspeed;

+};

+typedef struct xen_userspace xen_userspace_t;

+struct xen_ondemand {

+    uint32_t sampling_rate_max;

+    uint32_t sampling_rate_min;

+    uint32_t sampling_rate;

+    uint32_t up_threshold;

+};

+typedef struct xen_ondemand xen_ondemand_t;

+/*

+ * cpufreq para name of this structure named

+ * same as sysfs file name of native linux

+ */

+#define CPUFREQ_NAME_LEN 16

+struct xen_get_cpufreq_para {

+    /* IN/OUT variable */

+    uint32_t cpu_num;

+    uint32_t freq_num;

+    uint32_t gov_num;

+    /* for all governors */

+    /* OUT variable */

+    XEN_GUEST_HANDLE_64(uint32) affected_cpus;

+    XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies;

+    XEN_GUEST_HANDLE_64(char)   scaling_available_governors;

+    char scaling_driver[CPUFREQ_NAME_LEN];

+    uint32_t cpuinfo_cur_freq;

+    uint32_t cpuinfo_max_freq;

+    uint32_t cpuinfo_min_freq;

+    uint32_t scaling_cur_freq;

+    char scaling_governor[CPUFREQ_NAME_LEN];

+    uint32_t scaling_max_freq;

+    uint32_t scaling_min_freq;

+    /* for specific governor */

+    union {

+        struct  xen_userspace userspace;

+        struct  xen_ondemand ondemand;

+    } u;

+    int32_t turbo_enabled;

+};

+struct xen_set_cpufreq_gov {

+    char scaling_governor[CPUFREQ_NAME_LEN];

+};

+struct xen_set_cpufreq_para {

+    #define SCALING_MAX_FREQ           1

+    #define SCALING_MIN_FREQ           2

+    #define SCALING_SETSPEED           3

+    #define SAMPLING_RATE              4

+    #define UP_THRESHOLD               5

+    uint32_t ctrl_type;

+    uint32_t ctrl_value;

+};

+struct xen_sysctl_pm_op {

+    #define PM_PARA_CATEGORY_MASK      0xf0

+    #define CPUFREQ_PARA               0x10

+    /* cpufreq command type */

+    #define GET_CPUFREQ_PARA           (CPUFREQ_PARA | 0x01)

+    #define SET_CPUFREQ_GOV            (CPUFREQ_PARA | 0x02)

+    #define SET_CPUFREQ_PARA           (CPUFREQ_PARA | 0x03)

+    #define GET_CPUFREQ_AVGFREQ        (CPUFREQ_PARA | 0x04)

+    /* set/reset scheduler power saving option */

+    #define XEN_SYSCTL_pm_op_set_sched_opt_smt    0x21

+    /* cpuidle max_cstate access command */

+    #define XEN_SYSCTL_pm_op_get_max_cstate       0x22

+    #define XEN_SYSCTL_pm_op_set_max_cstate       0x23

+    /* set scheduler migration cost value */

+    #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay   0x24

+    #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay   0x25

+    /* enable/disable turbo mode when in dbs governor */

+    #define XEN_SYSCTL_pm_op_enable_turbo               0x26

+    #define XEN_SYSCTL_pm_op_disable_turbo              0x27

+    uint32_t cmd;

+    uint32_t cpuid;

+    union {

+        struct xen_get_cpufreq_para get_para;

+        struct xen_set_cpufreq_gov  set_gov;

+        struct xen_set_cpufreq_para set_para;

+        uint64_aligned_t get_avgfreq;

+        uint32_t                    set_sched_opt_smt;

+        uint32_t                    get_max_cstate;

+        uint32_t                    set_max_cstate;

+        uint32_t                    get_vcpu_migration_delay;

+        uint32_t                    set_vcpu_migration_delay;

+    } u;

+};

+/* XEN_SYSCTL_page_offline_op */

+struct xen_sysctl_page_offline_op {

+    /* IN: range of page to be offlined */

+#define sysctl_page_offline     1

+#define sysctl_page_online      2

+#define sysctl_query_page_offline  3

+    uint32_t cmd;

+    uint32_t start;

+    uint32_t end;

+    /* OUT: result of page offline request */

+    /*

+     * bit 0~15: result flags

+     * bit 16~31: owner

+     */

+    XEN_GUEST_HANDLE(uint32) status;

+};

+#define PG_OFFLINE_STATUS_MASK    (0xFFUL)

+/* The result is invalid, i.e. HV does not handle it */

+#define PG_OFFLINE_INVALID   (0x1UL << 0)

+#define PG_OFFLINE_OFFLINED  (0x1UL << 1)

+#define PG_OFFLINE_PENDING   (0x1UL << 2)

+#define PG_OFFLINE_FAILED    (0x1UL << 3)

+#define PG_OFFLINE_AGAIN     (0x1UL << 4)

+#define PG_ONLINE_FAILED     PG_OFFLINE_FAILED

+#define PG_ONLINE_ONLINED    PG_OFFLINE_OFFLINED

+#define PG_OFFLINE_STATUS_OFFLINED              (0x1UL << 1)

+#define PG_OFFLINE_STATUS_ONLINE                (0x1UL << 2)

+#define PG_OFFLINE_STATUS_OFFLINE_PENDING       (0x1UL << 3)

+#define PG_OFFLINE_STATUS_BROKEN                (0x1UL << 4)

+#define PG_OFFLINE_MISC_MASK    (0xFFUL << 4)

+/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */

+#define PG_OFFLINE_XENPAGE   (0x1UL << 8)

+#define PG_OFFLINE_DOM0PAGE  (0x1UL << 9)

+#define PG_OFFLINE_ANONYMOUS (0x1UL << 10)

+#define PG_OFFLINE_NOT_CONV_RAM   (0x1UL << 11)

+#define PG_OFFLINE_OWNED     (0x1UL << 12)

+#define PG_OFFLINE_BROKEN    (0x1UL << 13)

+#define PG_ONLINE_BROKEN     PG_OFFLINE_BROKEN

+#define PG_OFFLINE_OWNER_SHIFT 16

+/* XEN_SYSCTL_lockprof_op */

+/* Sub-operations: */

+#define XEN_SYSCTL_LOCKPROF_reset 1   /* Reset all profile data to zero. */

+#define XEN_SYSCTL_LOCKPROF_query 2   /* Get lock profile information. */

+/* Record-type: */

+#define LOCKPROF_TYPE_GLOBAL      0   /* global lock, idx meaningless */

+#define LOCKPROF_TYPE_PERDOM      1   /* per-domain lock, idx is domid */

+#define LOCKPROF_TYPE_N           2   /* number of types */

+struct xen_sysctl_lockprof_data {

+    char     name[40];     /* lock name (may include up to 2 %d specifiers) */

+    int32_t  type;         /* LOCKPROF_TYPE_??? */

+    int32_t  idx;          /* index (e.g. domain id) */

+    uint64_aligned_t lock_cnt;     /* # of locking succeeded */

+    uint64_aligned_t block_cnt;    /* # of wait for lock */

+    uint64_aligned_t lock_time;    /* nsecs lock held */

+    uint64_aligned_t block_time;   /* nsecs waited for lock */

+};

+typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t);

+struct xen_sysctl_lockprof_op {

+    /* IN variables. */

+    uint32_t       cmd;               /* XEN_SYSCTL_LOCKPROF_??? */

+    uint32_t       max_elem;          /* size of output buffer */

+    /* OUT variables (query only). */

+    uint32_t       nr_elem;           /* number of elements available */

+    uint64_aligned_t time;            /* nsecs of profile measurement */

+    /* profile information (or NULL) */

+    XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data;

+};

+typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);

+/* XEN_SYSCTL_topologyinfo */

+#define INVALID_TOPOLOGY_ID  (~0U)

+struct xen_sysctl_topologyinfo {

+    /*

+     * IN: maximum addressable entry in the caller-provided arrays.

+     * OUT: largest cpu identifier in the system.

+     * If OUT is greater than IN then the arrays are truncated!

+     * If OUT is leass than IN then the array tails are not written by sysctl.

+     */

+    uint32_t max_cpu_index;

+    /*

+     * If not NULL, these arrays are filled with core/socket/node identifier

+     * for each cpu.

+     * If a cpu has no core/socket/node information (e.g., cpu not present)

+     * then the sentinel value ~0u is written to each array.

+     * The number of array elements written by the sysctl is:

+     *   min(@max_cpu_index_IN,@max_cpu_index_OUT)+1

+     */

+    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;

+    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;

+    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;

+};

+typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);

+/* XEN_SYSCTL_numainfo */

+#define INVALID_NUMAINFO_ID (~0U)

+struct xen_sysctl_numainfo {

+    /*

+     * IN: maximum addressable entry in the caller-provided arrays.

+     * OUT: largest node identifier in the system.

+     * If OUT is greater than IN then the arrays are truncated!

+     */

+    uint32_t max_node_index;

+    /* NB. Entries are 0 if node is not present. */

+    XEN_GUEST_HANDLE_64(uint64) node_to_memsize;

+    XEN_GUEST_HANDLE_64(uint64) node_to_memfree;

+    /*

+     * Array, of size (max_node_index+1)^2, listing memory access distances

+     * between nodes. If an entry has no node distance information (e.g., node

+     * not present) then the value ~0u is written.

+     *

+     * Note that the array rows must be indexed by multiplying by the minimum

+     * of the caller-provided max_node_index and the returned value of

+     * max_node_index. That is, if the largest node index in the system is

+     * smaller than the caller can handle, a smaller 2-d array is constructed

+     * within the space provided by the caller. When this occurs, trailing

+     * space provided by the caller is not modified. If the largest node index

+     * in the system is larger than the caller can handle, then a 2-d array of

+     * the maximum size handleable by the caller is constructed.

+     */

+    XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;

+};

+typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);

+/* XEN_SYSCTL_cpupool_op */

+#define XEN_SYSCTL_CPUPOOL_OP_CREATE                1  /* C */

+#define XEN_SYSCTL_CPUPOOL_OP_DESTROY               2  /* D */

+#define XEN_SYSCTL_CPUPOOL_OP_INFO                  3  /* I */

+#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU                4  /* A */

+#define XEN_SYSCTL_CPUPOOL_OP_RMCPU                 5  /* R */

+#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN            6  /* M */

+#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO              7  /* F */

+#define XEN_SYSCTL_CPUPOOL_PAR_ANY     0xFFFFFFFF

+struct xen_sysctl_cpupool_op {

+    uint32_t op;          /* IN */

+    uint32_t cpupool_id;  /* IN: CDIARM OUT: CI */

+    uint32_t sched_id;    /* IN: C      OUT: I  */

+    uint32_t domid;       /* IN: M              */

+    uint32_t cpu;         /* IN: AR             */

+    uint32_t n_dom;       /*            OUT: I  */

+    struct xenctl_bitmap cpumap; /*     OUT: IF */

+};

+typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t);

+#define ARINC653_MAX_DOMAINS_PER_SCHEDULE   64

+/*

+ * This structure is used to pass a new ARINC653 schedule from a

+ * privileged domain (ie dom0) to Xen.

+ */

+struct xen_sysctl_arinc653_schedule {

+    /* major_frame holds the time for the new schedule's major frame

+     * in nanoseconds. */

+    uint64_aligned_t     major_frame;

+    /* num_sched_entries holds how many of the entries in the

+     * sched_entries[] array are valid. */

+    uint8_t     num_sched_entries;

+    /* The sched_entries array holds the actual schedule entries. */

+    struct {

+        /* dom_handle must match a domain's UUID */

+        xen_domain_handle_t dom_handle;

+        /* If a domain has multiple VCPUs, vcpu_id specifies which one

+         * this schedule entry applies to. It should be set to 0 if

+         * there is only one VCPU for the domain. */

+        unsigned int vcpu_id;

+        /* runtime specifies the amount of time that should be allocated

+         * to this VCPU per major frame. It is specified in nanoseconds */

+        uint64_aligned_t runtime;

+    } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE];

+};

+typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t);

+struct xen_sysctl_credit_schedule {

+    /* Length of timeslice in milliseconds */

+#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000

+#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1

+    unsigned tslice_ms;

+    /* Rate limit (minimum timeslice) in microseconds */

+#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000

+#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100

+    unsigned ratelimit_us;

+};

+typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t);

+/* XEN_SYSCTL_scheduler_op */

+/* Set or get info? */

+#define XEN_SYSCTL_SCHEDOP_putinfo 0

+#define XEN_SYSCTL_SCHEDOP_getinfo 1

+struct xen_sysctl_scheduler_op {

+    uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */

+    uint32_t sched_id;   /* XEN_SCHEDULER_* (domctl.h) */

+    uint32_t cmd;        /* XEN_SYSCTL_SCHEDOP_* */

+    union {

+        struct xen_sysctl_sched_arinc653 {

+            XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule;

+        } sched_arinc653;

+        struct xen_sysctl_credit_schedule sched_credit;

+    } u;

+};

+typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t);

+/* XEN_SYSCTL_coverage_op */

+/*

+ * Get total size of information, to help allocate

+ * the buffer. The pointer points to a 32 bit value.

+ */

+#define XEN_SYSCTL_COVERAGE_get_total_size 0

+/*

+ * Read coverage information in a single run

+ * You must use a tool to split them.

+ */

+#define XEN_SYSCTL_COVERAGE_read           1

+/*

+ * Reset all the coverage counters to 0

+ * No parameters.

+ */

+#define XEN_SYSCTL_COVERAGE_reset          2

+/*

+ * Like XEN_SYSCTL_COVERAGE_read but reset also

+ * counters to 0 in a single call.

+ */

+#define XEN_SYSCTL_COVERAGE_read_and_reset 3

+struct xen_sysctl_coverage_op {

+    uint32_t cmd;        /* XEN_SYSCTL_COVERAGE_* */

+    union {

+        uint32_t total_size; /* OUT */

+        XEN_GUEST_HANDLE_64(uint8)  raw_info;   /* OUT */

+    } u;

+};

+typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);

+struct xen_sysctl {

+    uint32_t cmd;

+#define XEN_SYSCTL_readconsole                    1

+#define XEN_SYSCTL_tbuf_op                        2

+#define XEN_SYSCTL_physinfo                       3

+#define XEN_SYSCTL_sched_id                       4

+#define XEN_SYSCTL_perfc_op                       5

+#define XEN_SYSCTL_getdomaininfolist              6

+#define XEN_SYSCTL_debug_keys                     7

+#define XEN_SYSCTL_getcpuinfo                     8

+#define XEN_SYSCTL_availheap                      9

+#define XEN_SYSCTL_get_pmstat                    10

+#define XEN_SYSCTL_cpu_hotplug                   11

+#define XEN_SYSCTL_pm_op                         12

+#define XEN_SYSCTL_page_offline_op               14

+#define XEN_SYSCTL_lockprof_op                   15

+#define XEN_SYSCTL_topologyinfo                  16

+#define XEN_SYSCTL_numainfo                      17

+#define XEN_SYSCTL_cpupool_op                    18

+#define XEN_SYSCTL_scheduler_op                  19

+#define XEN_SYSCTL_coverage_op                   20

+    uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */

+    union {

+        struct xen_sysctl_readconsole       readconsole;

+        struct xen_sysctl_tbuf_op           tbuf_op;

+        struct xen_sysctl_physinfo          physinfo;

+        struct xen_sysctl_topologyinfo      topologyinfo;

+        struct xen_sysctl_numainfo          numainfo;

+        struct xen_sysctl_sched_id          sched_id;

+        struct xen_sysctl_perfc_op          perfc_op;

+        struct xen_sysctl_getdomaininfolist getdomaininfolist;

+        struct xen_sysctl_debug_keys        debug_keys;

+        struct xen_sysctl_getcpuinfo        getcpuinfo;

+        struct xen_sysctl_availheap         availheap;

+        struct xen_sysctl_get_pmstat        get_pmstat;

+        struct xen_sysctl_cpu_hotplug       cpu_hotplug;

+        struct xen_sysctl_pm_op             pm_op;

+        struct xen_sysctl_page_offline_op   page_offline;

+        struct xen_sysctl_lockprof_op       lockprof_op;

+        struct xen_sysctl_cpupool_op        cpupool_op;

+        struct xen_sysctl_scheduler_op      scheduler_op;

+        struct xen_sysctl_coverage_op       coverage_op;

+        uint8_t                             pad[128];

+    } u;

+};

+typedef struct xen_sysctl xen_sysctl_t;

+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);

+#endif /* __XEN_PUBLIC_SYSCTL_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/tmem.h

@@ -1,0 +1,148 @@

+/******************************************************************************

+ * tmem.h

+ *

+ * Guest OS interface to Xen Transcendent Memory.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004, K A Fraser

+ */

+#ifndef __XEN_PUBLIC_TMEM_H__

+#define __XEN_PUBLIC_TMEM_H__

+#include "xen.h"

+/* version of ABI */

+#define TMEM_SPEC_VERSION          1

+/* Commands to HYPERVISOR_tmem_op() */

+#define TMEM_CONTROL               0

+#define TMEM_NEW_POOL              1

+#define TMEM_DESTROY_POOL          2

+#define TMEM_NEW_PAGE              3

+#define TMEM_PUT_PAGE              4

+#define TMEM_GET_PAGE              5

+#define TMEM_FLUSH_PAGE            6

+#define TMEM_FLUSH_OBJECT          7

+#define TMEM_READ                  8

+#define TMEM_WRITE                 9

+#define TMEM_XCHG                 10

+/* Privileged commands to HYPERVISOR_tmem_op() */

+#define TMEM_AUTH                 101

+#define TMEM_RESTORE_NEW          102

+/* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */

+#define TMEMC_THAW                   0

+#define TMEMC_FREEZE                 1

+#define TMEMC_FLUSH                  2

+#define TMEMC_DESTROY                3

+#define TMEMC_LIST                   4

+#define TMEMC_SET_WEIGHT             5

+#define TMEMC_SET_CAP                6

+#define TMEMC_SET_COMPRESS           7

+#define TMEMC_QUERY_FREEABLE_MB      8

+#define TMEMC_SAVE_BEGIN             10

+#define TMEMC_SAVE_GET_VERSION       11

+#define TMEMC_SAVE_GET_MAXPOOLS      12

+#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13

+#define TMEMC_SAVE_GET_CLIENT_CAP    14

+#define TMEMC_SAVE_GET_CLIENT_FLAGS  15

+#define TMEMC_SAVE_GET_POOL_FLAGS    16

+#define TMEMC_SAVE_GET_POOL_NPAGES   17

+#define TMEMC_SAVE_GET_POOL_UUID     18

+#define TMEMC_SAVE_GET_NEXT_PAGE     19

+#define TMEMC_SAVE_GET_NEXT_INV      20

+#define TMEMC_SAVE_END               21

+#define TMEMC_RESTORE_BEGIN          30

+#define TMEMC_RESTORE_PUT_PAGE       32

+#define TMEMC_RESTORE_FLUSH_PAGE     33

+/* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */

+#define TMEM_POOL_PERSIST          1

+#define TMEM_POOL_SHARED           2

+#define TMEM_POOL_PRECOMPRESSED    4

+#define TMEM_POOL_PAGESIZE_SHIFT   4

+#define TMEM_POOL_PAGESIZE_MASK  0xf

+#define TMEM_POOL_VERSION_SHIFT   24

+#define TMEM_POOL_VERSION_MASK  0xff

+#define TMEM_POOL_RESERVED_BITS  0x00ffff00

+/* Bits for client flags (save/restore) */

+#define TMEM_CLIENT_COMPRESS       1

+#define TMEM_CLIENT_FROZEN         2

+/* Special errno values */

+#define EFROZEN                 1000

+#define EEMPTY                  1001

+#ifndef __ASSEMBLY__

+typedef xen_pfn_t tmem_cli_mfn_t;

+typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t;

+struct tmem_op {

+    uint32_t cmd;

+    int32_t pool_id;

+    union {

+        struct {

+            uint64_t uuid[2];

+            uint32_t flags;

+            uint32_t arg1;

+        } creat; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */

+        struct {

+            uint32_t subop;

+            uint32_t cli_id;

+            uint32_t arg1;

+            uint32_t arg2;

+            uint64_t oid[3];

+            tmem_cli_va_t buf;

+        } ctrl; /* for cmd == TMEM_CONTROL */

+        struct {

+            uint64_t oid[3];

+            uint32_t index;

+            uint32_t tmem_offset;

+            uint32_t pfn_offset;

+            uint32_t len;

+            tmem_cli_mfn_t cmfn; /* client machine page frame */

+        } gen; /* for all other cmd ("generic") */

+    } u;

+};

+typedef struct tmem_op tmem_op_t;

+DEFINE_XEN_GUEST_HANDLE(tmem_op_t);

+struct tmem_handle {

+    uint32_t pool_id;

+    uint32_t index;

+    uint64_t oid[3];

+};

+#endif

+#endif /* __XEN_PUBLIC_TMEM_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/trace.h

@@ -1,0 +1,310 @@

+/******************************************************************************

+ * include/public/trace.h

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Mark Williamson, (C) 2004 Intel Research Cambridge

+ * Copyright (C) 2005 Bin Ren

+ */

+#ifndef __XEN_PUBLIC_TRACE_H__

+#define __XEN_PUBLIC_TRACE_H__

+#define TRACE_EXTRA_MAX    7

+#define TRACE_EXTRA_SHIFT 28

+/* Trace classes */

+#define TRC_CLS_SHIFT 16

+#define TRC_GEN      0x0001f000    /* General trace            */

+#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */

+#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */

+#define TRC_HVM      0x0008f000    /* Xen HVM trace            */

+#define TRC_MEM      0x0010f000    /* Xen memory trace         */

+#define TRC_PV       0x0020f000    /* Xen PV traces            */

+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */

+#define TRC_HW       0x0080f000    /* Xen hardware-related traces */

+#define TRC_GUEST    0x0800f000    /* Guest-generated traces   */

+#define TRC_ALL      0x0ffff000

+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)

+#define TRC_HD_CYCLE_FLAG (1UL<<31)

+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )

+#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)

+/* Trace subclasses */

+#define TRC_SUBCLS_SHIFT 12

+/* trace subclasses for SVM */

+#define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */

+#define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */

+#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */

+#define TRC_SCHED_CLASS     0x00022000   /* Scheduler-specific    */

+#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */

+/*

+ * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are

+ * reserved for encoding what scheduler produced the information. The

+ * actual event is encoded in the last 9 bits.

+ *

+ * This means we have 8 scheduling IDs available (which means at most 8

+ * schedulers generating events) and, in each scheduler, up to 512

+ * different events.

+ */

+#define TRC_SCHED_ID_BITS 3

+#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS)

+#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT)

+#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK))

+/* Per-scheduler IDs, to identify scheduler specific events */

+#define TRC_SCHED_CSCHED   0

+#define TRC_SCHED_CSCHED2  1

+#define TRC_SCHED_SEDF     2

+#define TRC_SCHED_ARINC653 3

+/* Per-scheduler tracing */

+#define TRC_SCHED_CLASS_EVT(_c, _e) \

+  ( ( TRC_SCHED_CLASS | \

+      ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \

+    (_e & TRC_SCHED_EVT_MASK) )

+/* Trace classes for Hardware */

+#define TRC_HW_PM           0x00801000   /* Power management traces */

+#define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling of IRQs */

+/* Trace events per class */

+#define TRC_LOST_RECORDS        (TRC_GEN + 1)

+#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)

+#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)

+#define TRC_SCHED_RUNSTATE_CHANGE   (TRC_SCHED_MIN + 1)

+#define TRC_SCHED_CONTINUE_RUNNING  (TRC_SCHED_MIN + 2)

+#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)

+#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)

+#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)

+#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)

+#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)

+#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)

+#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)

+#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)

+#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)

+#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)

+#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)

+#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)

+#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)

+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)

+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)

+#define TRC_SCHED_SHUTDOWN_CODE  (TRC_SCHED_VERBOSE + 16)

+#define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)

+#define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)

+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)

+#define TRC_MEM_SET_P2M_ENTRY       (TRC_MEM + 4)

+#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5)

+#define TRC_MEM_POD_POPULATE        (TRC_MEM + 16)

+#define TRC_MEM_POD_ZERO_RECLAIM    (TRC_MEM + 17)

+#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18)

+#define TRC_PV_ENTRY   0x00201000 /* Hypervisor entry points for PV guests. */

+#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */

+#define TRC_PV_HYPERCALL             (TRC_PV_ENTRY +  1)

+#define TRC_PV_TRAP                  (TRC_PV_ENTRY +  3)

+#define TRC_PV_PAGE_FAULT            (TRC_PV_ENTRY +  4)

+#define TRC_PV_FORCED_INVALID_OP     (TRC_PV_ENTRY +  5)

+#define TRC_PV_EMULATE_PRIVOP        (TRC_PV_ENTRY +  6)

+#define TRC_PV_EMULATE_4GB           (TRC_PV_ENTRY +  7)

+#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV_ENTRY +  8)

+#define TRC_PV_PAGING_FIXUP          (TRC_PV_ENTRY +  9)

+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10)

+#define TRC_PV_PTWR_EMULATION        (TRC_PV_ENTRY + 11)

+#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV_ENTRY + 12)

+#define TRC_PV_HYPERCALL_V2          (TRC_PV_ENTRY + 13)

+#define TRC_PV_HYPERCALL_SUBCALL     (TRC_PV_SUBCALL + 14)

+/*

+ * TRC_PV_HYPERCALL_V2 format

+ *

+ * Only some of the hypercall argument are recorded. Bit fields A0 to

+ * A5 in the first extra word are set if the argument is present and

+ * the arguments themselves are packed sequentially in the following

+ * words.

+ *

+ * The TRC_64_FLAG bit is not set for these events (even if there are

+ * 64-bit arguments in the record).

+ *

+ * Word

+ * 0    bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0

+ *          A5   |A4   |A3   |A2   |A1   |A0   |Hypercall op

+ * 1    First 32 bit (or low word of first 64 bit) arg in record

+ * 2    Second 32 bit (or high word of first 64 bit) arg in record

+ * ...

+ *

+ * A0-A5 bitfield values:

+ *

+ *   00b  Argument not present

+ *   01b  32-bit argument present

+ *   10b  64-bit argument present

+ *   11b  Reserved

+ */

+#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i)))

+#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i)))

+#define TRC_PV_HYPERCALL_V2_ARG_MASK  (0xfff00000)

+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)

+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)

+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)

+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)

+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)

+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)

+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)

+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)

+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)

+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)

+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)

+#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)

+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)

+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)

+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)

+/* trace events per subclass */

+#define TRC_HVM_NESTEDFLAG      (0x400)

+#define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)

+#define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)

+#define TRC_HVM_VMEXIT64        (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)

+#define TRC_HVM_PF_XEN          (TRC_HVM_HANDLER + 0x01)

+#define TRC_HVM_PF_XEN64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)

+#define TRC_HVM_PF_INJECT       (TRC_HVM_HANDLER + 0x02)

+#define TRC_HVM_PF_INJECT64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)

+#define TRC_HVM_INJ_EXC         (TRC_HVM_HANDLER + 0x03)

+#define TRC_HVM_INJ_VIRQ        (TRC_HVM_HANDLER + 0x04)

+#define TRC_HVM_REINJ_VIRQ      (TRC_HVM_HANDLER + 0x05)

+#define TRC_HVM_IO_READ         (TRC_HVM_HANDLER + 0x06)

+#define TRC_HVM_IO_WRITE        (TRC_HVM_HANDLER + 0x07)

+#define TRC_HVM_CR_READ         (TRC_HVM_HANDLER + 0x08)

+#define TRC_HVM_CR_READ64       (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)

+#define TRC_HVM_CR_WRITE        (TRC_HVM_HANDLER + 0x09)

+#define TRC_HVM_CR_WRITE64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)

+#define TRC_HVM_DR_READ         (TRC_HVM_HANDLER + 0x0A)

+#define TRC_HVM_DR_WRITE        (TRC_HVM_HANDLER + 0x0B)

+#define TRC_HVM_MSR_READ        (TRC_HVM_HANDLER + 0x0C)

+#define TRC_HVM_MSR_WRITE       (TRC_HVM_HANDLER + 0x0D)

+#define TRC_HVM_CPUID           (TRC_HVM_HANDLER + 0x0E)

+#define TRC_HVM_INTR            (TRC_HVM_HANDLER + 0x0F)

+#define TRC_HVM_NMI             (TRC_HVM_HANDLER + 0x10)

+#define TRC_HVM_SMI             (TRC_HVM_HANDLER + 0x11)

+#define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)

+#define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)

+#define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)

+#define TRC_HVM_INVLPG64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)

+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)

+#define TRC_HVM_IOPORT_READ     (TRC_HVM_HANDLER + 0x16)

+#define TRC_HVM_IOMEM_READ      (TRC_HVM_HANDLER + 0x17)

+#define TRC_HVM_CLTS            (TRC_HVM_HANDLER + 0x18)

+#define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)

+#define TRC_HVM_LMSW64          (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)

+#define TRC_HVM_RDTSC           (TRC_HVM_HANDLER + 0x1a)

+#define TRC_HVM_INTR_WINDOW     (TRC_HVM_HANDLER + 0x20)

+#define TRC_HVM_NPF             (TRC_HVM_HANDLER + 0x21)

+#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22)

+#define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)

+#define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)

+#define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)

+#define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)

+#define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)

+/* trace events for per class */

+#define TRC_PM_FREQ_CHANGE      (TRC_HW_PM + 0x01)

+#define TRC_PM_IDLE_ENTRY       (TRC_HW_PM + 0x02)

+#define TRC_PM_IDLE_EXIT        (TRC_HW_PM + 0x03)

+/* Trace events for IRQs */

+#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1)

+#define TRC_HW_IRQ_MOVE_CLEANUP       (TRC_HW_IRQ + 0x2)

+#define TRC_HW_IRQ_BIND_VECTOR        (TRC_HW_IRQ + 0x3)

+#define TRC_HW_IRQ_CLEAR_VECTOR       (TRC_HW_IRQ + 0x4)

+#define TRC_HW_IRQ_MOVE_FINISH        (TRC_HW_IRQ + 0x5)

+#define TRC_HW_IRQ_ASSIGN_VECTOR      (TRC_HW_IRQ + 0x6)

+#define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)

+#define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)

+/*

+ * Event Flags

+ *

+ * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple

+ * record formats.  These event flags distinguish between the

+ * different formats.

+ */

+#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */

+/* This structure represents a single trace buffer record. */

+struct t_rec {

+    uint32_t event:28;

+    uint32_t extra_u32:3;         /* # entries in trailing extra_u32[] array */

+    uint32_t cycles_included:1;   /* u.cycles or u.no_cycles? */

+    union {

+        struct {

+            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */

+            uint32_t extra_u32[7];         /* event data items */

+        } cycles;

+        struct {

+            uint32_t extra_u32[7];         /* event data items */

+        } nocycles;

+    } u;

+};

+/*

+ * This structure contains the metadata for a single trace buffer.  The head

+ * field, indexes into an array of struct t_rec's.

+ */

+struct t_buf {

+    /* Assume the data buffer size is X.  X is generally not a power of 2.

+     * CONS and PROD are incremented modulo (2*X):

+     *     0 <= cons < 2*X

+     *     0 <= prod < 2*X

+     * This is done because addition modulo X breaks at 2^32 when X is not a

+     * power of 2:

+     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X

+     */

+    uint32_t cons;   /* Offset of next item to be consumed by control tools. */

+    uint32_t prod;   /* Offset of next item to be produced by Xen.           */

+    /*  Records follow immediately after the meta-data header.    */

+};

+/* Structure used to pass MFNs to the trace buffers back to trace consumers.

+ * Offset is an offset into the mapped structure where the mfn list will be held.

+ * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).

+ */

+struct t_info {

+    uint16_t tbuf_size; /* Size in pages of each trace buffer */

+    uint16_t mfn_offset[];  /* Offset within t_info structure of the page list per cpu */

+    /* MFN lists immediately after the header */

+};

+#endif /* __XEN_PUBLIC_TRACE_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/vcpu.h

@@ -1,0 +1,240 @@

+/******************************************************************************

+ * vcpu.h

+ *

+ * VCPU initialisation, query, and hotplug.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_VCPU_H__

+#define __XEN_PUBLIC_VCPU_H__

+#include "xen.h"

+/*

+ * Prototype for this hypercall is:

+ *  int vcpu_op(int cmd, int vcpuid, void *extra_args)

+ * @cmd        == VCPUOP_??? (VCPU operation).

+ * @vcpuid     == VCPU to operate on.

+ * @extra_args == Operation-specific extra arguments (NULL if none).

+ */

+/*

+ * Initialise a VCPU. Each VCPU can be initialised only once. A

+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.

+ *

+ * @extra_arg == pointer to vcpu_guest_context structure containing initial

+ *               state for the VCPU.

+ */

+#define VCPUOP_initialise            0

+/*

+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail

+ * if the VCPU has not been initialised (VCPUOP_initialise).

+ */

+#define VCPUOP_up                    1

+/*

+ * Bring down a VCPU (i.e., make it non-runnable).

+ * There are a few caveats that callers should observe:

+ *  1. This operation may return, and VCPU_is_up may return false, before the

+ *     VCPU stops running (i.e., the command is asynchronous). It is a good

+ *     idea to ensure that the VCPU has entered a non-critical loop before

+ *     bringing it down. Alternatively, this operation is guaranteed

+ *     synchronous if invoked by the VCPU itself.

+ *  2. After a VCPU is initialised, there is currently no way to drop all its

+ *     references to domain memory. Even a VCPU that is down still holds

+ *     memory references via its pagetable base pointer and GDT. It is good

+ *     practise to move a VCPU onto an 'idle' or default page table, LDT and

+ *     GDT before bringing it down.

+ */

+#define VCPUOP_down                  2

+/* Returns 1 if the given VCPU is up. */

+#define VCPUOP_is_up                 3

+/*

+ * Return information about the state and running time of a VCPU.

+ * @extra_arg == pointer to vcpu_runstate_info structure.

+ */

+#define VCPUOP_get_runstate_info     4

+struct vcpu_runstate_info {

+    /* VCPU's current state (RUNSTATE_*). */

+    int      state;

+    /* When was current state entered (system time, ns)? */

+    uint64_t state_entry_time;

+    /*

+     * Time spent in each RUNSTATE_* (ns). The sum of these times is

+     * guaranteed not to drift from system time.

+     */

+    uint64_t time[4];

+};

+typedef struct vcpu_runstate_info vcpu_runstate_info_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);

+/* VCPU is currently running on a physical CPU. */

+#define RUNSTATE_running  0

+/* VCPU is runnable, but not currently scheduled on any physical CPU. */

+#define RUNSTATE_runnable 1

+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */

+#define RUNSTATE_blocked  2

+/*

+ * VCPU is not runnable, but it is not blocked.

+ * This is a 'catch all' state for things like hotplug and pauses by the

+ * system administrator (or for critical sections in the hypervisor).

+ * RUNSTATE_blocked dominates this state (it is the preferred state).

+ */

+#define RUNSTATE_offline  3

+/*

+ * Register a shared memory area from which the guest may obtain its own

+ * runstate information without needing to execute a hypercall.

+ * Notes:

+ *  1. The registered address may be virtual or physical or guest handle,

+ *     depending on the platform. Virtual address or guest handle should be

+ *     registered on x86 systems.

+ *  2. Only one shared area may be registered per VCPU. The shared area is

+ *     updated by the hypervisor each time the VCPU is scheduled. Thus

+ *     runstate.state will always be RUNSTATE_running and

+ *     runstate.state_entry_time will indicate the system time at which the

+ *     VCPU was last scheduled to run.

+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.

+ */

+#define VCPUOP_register_runstate_memory_area 5

+struct vcpu_register_runstate_memory_area {

+    union {

+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;

+        struct vcpu_runstate_info *v;

+        uint64_t p;

+    } addr;

+};

+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);

+/*

+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer

+ * which can be set via these commands. Periods smaller than one millisecond

+ * may not be supported.

+ */

+#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */

+#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */

+struct vcpu_set_periodic_timer {

+    uint64_t period_ns;

+};

+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);

+/*

+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot

+ * timer which can be set via these commands.

+ */

+#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */

+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */

+struct vcpu_set_singleshot_timer {

+    uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */

+    uint32_t flags;            /* VCPU_SSHOTTMR_??? */

+};

+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);

+/* Flags to VCPUOP_set_singleshot_timer. */

+ /* Require the timeout to be in the future (return -ETIME if it's passed). */

+#define _VCPU_SSHOTTMR_future (0)

+#define VCPU_SSHOTTMR_future  (1U << _VCPU_SSHOTTMR_future)

+/*

+ * Register a memory location in the guest address space for the

+ * vcpu_info structure.  This allows the guest to place the vcpu_info

+ * structure in a convenient place, such as in a per-cpu data area.

+ * The pointer need not be page aligned, but the structure must not

+ * cross a page boundary.

+ *

+ * This may be called only once per vcpu.

+ */

+#define VCPUOP_register_vcpu_info   10  /* arg == vcpu_register_vcpu_info_t */

+struct vcpu_register_vcpu_info {

+    uint64_t mfn;    /* mfn of page to place vcpu_info */

+    uint32_t offset; /* offset within page */

+    uint32_t rsvd;   /* unused */

+};

+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);

+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */

+#define VCPUOP_send_nmi             11

+/*

+ * Get the physical ID information for a pinned vcpu's underlying physical

+ * processor.  The physical ID informmation is architecture-specific.

+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id.

+ * This command returns -EINVAL if it is not a valid operation for this VCPU.

+ */

+#define VCPUOP_get_physid           12 /* arg == vcpu_get_physid_t */

+struct vcpu_get_physid {

+    uint64_t phys_id;

+};

+typedef struct vcpu_get_physid vcpu_get_physid_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);

+#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))

+#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))

+/*

+ * Register a memory location to get a secondary copy of the vcpu time

+ * parameters.  The master copy still exists as part of the vcpu shared

+ * memory area, and this secondary copy is updated whenever the master copy

+ * is updated (and using the same versioning scheme for synchronisation).

+ *

+ * The intent is that this copy may be mapped (RO) into userspace so

+ * that usermode can compute system time using the time info and the

+ * tsc.  Usermode will see an array of vcpu_time_info structures, one

+ * for each vcpu, and choose the right one by an existing mechanism

+ * which allows it to get the current vcpu number (such as via a

+ * segment limit).  It can then apply the normal algorithm to compute

+ * system time from the tsc.

+ *

+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.

+ */

+#define VCPUOP_register_vcpu_time_memory_area   13

+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);

+struct vcpu_register_time_memory_area {

+    union {

+        XEN_GUEST_HANDLE(vcpu_time_info_t) h;

+        struct vcpu_time_info *v;

+        uint64_t p;

+    } addr;

+};

+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;

+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);

+#endif /* __XEN_PUBLIC_VCPU_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/version.h

@@ -1,0 +1,96 @@

+/******************************************************************************

+ * version.h

+ *

+ * Xen version, type, and compile information.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2005, Nguyen Anh Quynh <[email protected]>

+ * Copyright (c) 2005, Keir Fraser <[email protected]>

+ */

+#ifndef __XEN_PUBLIC_VERSION_H__

+#define __XEN_PUBLIC_VERSION_H__

+#include "xen.h"

+/* NB. All ops return zero on success, except XENVER_{version,pagesize} */

+/* arg == NULL; returns major:minor (16:16). */

+#define XENVER_version      0

+/* arg == xen_extraversion_t. */

+#define XENVER_extraversion 1

+typedef char xen_extraversion_t[16];

+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))

+/* arg == xen_compile_info_t. */

+#define XENVER_compile_info 2

+struct xen_compile_info {

+    char compiler[64];

+    char compile_by[16];

+    char compile_domain[32];

+    char compile_date[32];

+};

+typedef struct xen_compile_info xen_compile_info_t;

+#define XENVER_capabilities 3

+typedef char xen_capabilities_info_t[1024];

+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))

+#define XENVER_changeset 4

+typedef char xen_changeset_info_t[64];

+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))

+#define XENVER_platform_parameters 5

+struct xen_platform_parameters {

+    xen_ulong_t virt_start;

+};

+typedef struct xen_platform_parameters xen_platform_parameters_t;

+#define XENVER_get_features 6

+struct xen_feature_info {

+    unsigned int submap_idx;    /* IN: which 32-bit submap to return */

+    uint32_t     submap;        /* OUT: 32-bit submap */

+};

+typedef struct xen_feature_info xen_feature_info_t;

+/* Declares the features reported by XENVER_get_features. */

+#include "features.h"

+/* arg == NULL; returns host memory page size. */

+#define XENVER_pagesize 7

+/* arg == xen_domain_handle_t. */

+#define XENVER_guest_handle 8

+#define XENVER_commandline 9

+typedef char xen_commandline_t[1024];

+#endif /* __XEN_PUBLIC_VERSION_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/xen-compat.h

@@ -1,0 +1,44 @@

+/******************************************************************************

+ * xen-compat.h

+ *

+ * Guest OS interface to Xen.  Compatibility layer.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2006, Christian Limpach

+ */

+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__

+#define __XEN_PUBLIC_XEN_COMPAT_H__

+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040300

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+/* Xen is built with matching headers and implements the latest interface. */

+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__

+#elif !defined(__XEN_INTERFACE_VERSION__)

+/* Guests which do not specify a version get the legacy interface. */

+#define __XEN_INTERFACE_VERSION__ 0x00000000

+#endif

+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__

+#error "These header files do not support the requested interface version."

+#endif

+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/xen.h

@@ -1,0 +1,890 @@

+/******************************************************************************

+ * xen.h

+ *

+ * Guest OS interface to Xen.

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (c) 2004, K A Fraser

+ */

+#ifndef __XEN_PUBLIC_XEN_H__

+#define __XEN_PUBLIC_XEN_H__

+#include "xen-compat.h"

+#if defined(__i386__) || defined(__x86_64__)

+#include "arch-x86/xen.h"

+#elif defined(__arm__) || defined (__aarch64__)

+#include "arch-arm.h"

+#else

+#error "Unsupported architecture"

+#endif

+#ifndef __ASSEMBLY__

+/* Guest handles for primitive C types. */

+DEFINE_XEN_GUEST_HANDLE(char);

+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);

+DEFINE_XEN_GUEST_HANDLE(int);

+__DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);

+#if __XEN_INTERFACE_VERSION__ < 0x00040300

+DEFINE_XEN_GUEST_HANDLE(long);

+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);

+#endif

+DEFINE_XEN_GUEST_HANDLE(void);

+DEFINE_XEN_GUEST_HANDLE(uint64_t);

+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);

+DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);

+#endif

+/*

+ * HYPERCALLS

+ */

+/* `incontents 100 hcalls List of hypercalls

+ * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*()

+ */

+#define __HYPERVISOR_set_trap_table        0

+#define __HYPERVISOR_mmu_update            1

+#define __HYPERVISOR_set_gdt               2

+#define __HYPERVISOR_stack_switch          3

+#define __HYPERVISOR_set_callbacks         4

+#define __HYPERVISOR_fpu_taskswitch        5

+#define __HYPERVISOR_sched_op_compat       6 /* compat since 0x00030101 */

+#define __HYPERVISOR_platform_op           7

+#define __HYPERVISOR_set_debugreg          8

+#define __HYPERVISOR_get_debugreg          9

+#define __HYPERVISOR_update_descriptor    10

+#define __HYPERVISOR_memory_op            12

+#define __HYPERVISOR_multicall            13

+#define __HYPERVISOR_update_va_mapping    14

+#define __HYPERVISOR_set_timer_op         15

+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */

+#define __HYPERVISOR_xen_version          17

+#define __HYPERVISOR_console_io           18

+#define __HYPERVISOR_physdev_op_compat    19 /* compat since 0x00030202 */

+#define __HYPERVISOR_grant_table_op       20

+#define __HYPERVISOR_vm_assist            21

+#define __HYPERVISOR_update_va_mapping_otherdomain 22

+#define __HYPERVISOR_iret                 23 /* x86 only */

+#define __HYPERVISOR_vcpu_op              24

+#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */

+#define __HYPERVISOR_mmuext_op            26

+#define __HYPERVISOR_xsm_op               27

+#define __HYPERVISOR_nmi_op               28

+#define __HYPERVISOR_sched_op             29

+#define __HYPERVISOR_callback_op          30

+#define __HYPERVISOR_xenoprof_op          31

+#define __HYPERVISOR_event_channel_op     32

+#define __HYPERVISOR_physdev_op           33

+#define __HYPERVISOR_hvm_op               34

+#define __HYPERVISOR_sysctl               35

+#define __HYPERVISOR_domctl               36

+#define __HYPERVISOR_kexec_op             37

+#define __HYPERVISOR_tmem_op              38

+#define __HYPERVISOR_xc_reserved_op       39 /* reserved for XenClient */

+/* Architecture-specific hypercall definitions. */

+#define __HYPERVISOR_arch_0               48

+#define __HYPERVISOR_arch_1               49

+#define __HYPERVISOR_arch_2               50

+#define __HYPERVISOR_arch_3               51

+#define __HYPERVISOR_arch_4               52

+#define __HYPERVISOR_arch_5               53

+#define __HYPERVISOR_arch_6               54

+#define __HYPERVISOR_arch_7               55

+/* ` } */

+/*

+ * HYPERCALL COMPATIBILITY.

+ */

+/* New sched_op hypercall introduced in 0x00030101. */

+#if __XEN_INTERFACE_VERSION__ < 0x00030101

+#undef __HYPERVISOR_sched_op

+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat

+#endif

+/* New event-channel and physdev hypercalls introduced in 0x00030202. */

+#if __XEN_INTERFACE_VERSION__ < 0x00030202

+#undef __HYPERVISOR_event_channel_op

+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat

+#undef __HYPERVISOR_physdev_op

+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat

+#endif

+/* New platform_op hypercall introduced in 0x00030204. */

+#if __XEN_INTERFACE_VERSION__ < 0x00030204

+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op

+#endif

+/*

+ * VIRTUAL INTERRUPTS

+ *

+ * Virtual interrupts that a guest OS may receive from Xen.

+ *

+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a

+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.

+ * The latter can be allocated only once per guest: they must initially be

+ * allocated to VCPU0 but can subsequently be re-bound.

+ */

+/* ` enum virq { */

+#define VIRQ_TIMER      0  /* V. Timebase update, and/or requested timeout.  */

+#define VIRQ_DEBUG      1  /* V. Request guest to dump debug info.           */

+#define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */

+#define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */

+#define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */

+#define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */

+#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */

+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */

+#define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */

+#define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */

+#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient                     */

+#define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */

+/* Architecture-specific VIRQ definitions. */

+#define VIRQ_ARCH_0    16

+#define VIRQ_ARCH_1    17

+#define VIRQ_ARCH_2    18

+#define VIRQ_ARCH_3    19

+#define VIRQ_ARCH_4    20

+#define VIRQ_ARCH_5    21

+#define VIRQ_ARCH_6    22

+#define VIRQ_ARCH_7    23

+/* ` } */

+#define NR_VIRQS       24

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[],

+ * `                       unsigned count, unsigned *done_out,

+ * `                       unsigned foreigndom)

+ * `

+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).

+ * @count is the length of the above array.

+ * @pdone is an output parameter indicating number of completed operations

+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this

+ *                    hypercall invocation. Can be DOMID_SELF.

+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced

+ *                     in this hypercall invocation. The value of this field

+ *                     (x) encodes the PFD as follows:

+ *                     x == 0 => PFD == DOMID_SELF

+ *                     x != 0 => PFD == x - 1

+ *

+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.

+ * -------------

+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:

+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,

+ * and the new table entry is valid/present, the mapped frame must belong to

+ * FD. If attempting to map an I/O page then the caller assumes the privilege

+ * of the FD.

+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.

+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.

+ * ptr[:2]  -- Machine address of the page-table entry to modify.

+ * val      -- Value to write.

+ *

+ * There also certain implicit requirements when using this hypercall. The

+ * pages that make up a pagetable must be mapped read-only in the guest.

+ * This prevents uncontrolled guest updates to the pagetable. Xen strictly

+ * enforces this, and will disallow any pagetable update which will end up

+ * mapping pagetable page RW, and will disallow using any writable page as a

+ * pagetable. In practice it means that when constructing a page table for a

+ * process, thread, etc, we MUST be very dilligient in following these rules:

+ *  1). Start with top-level page (PGD or in Xen language: L4). Fill out

+ *      the entries.

+ *  2). Keep on going, filling out the upper (PUD or L3), and middle (PMD

+ *      or L2).

+ *  3). Start filling out the PTE table (L1) with the PTE entries. Once

+ *  	done, make sure to set each of those entries to RO (so writeable bit

+ *  	is unset). Once that has been completed, set the PMD (L2) for this

+ *  	PTE table as RO.

+ *  4). When completed with all of the PMD (L2) entries, and all of them have

+ *  	been set to RO, make sure to set RO the PUD (L3). Do the same

+ *  	operation on PGD (L4) pagetable entries that have a PUD (L3) entry.

+ *  5). Now before you can use those pages (so setting the cr3), you MUST also

+ *      pin them so that the hypervisor can verify the entries. This is done

+ *      via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame

+ *      number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(

+ *      MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be

+ *      issued.

+ * For 32-bit guests, the L4 is not used (as there is less pagetables), so

+ * instead use L3.

+ * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE

+ * hypercall. Also if so desired the OS can also try to write to the PTE

+ * and be trapped by the hypervisor (as the PTE entry is RO).

+ *

+ * To deallocate the pages, the operations are the reverse of the steps

+ * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the

+ * pagetable MUST not be in use (meaning that the cr3 is not set to it).

+ *

+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:

+ * Updates an entry in the machine->pseudo-physical mapping table.

+ * ptr[:2]  -- Machine address within the frame whose mapping to modify.

+ *             The frame must belong to the FD, if one is specified.

+ * val      -- Value to write into the mapping entry.

+ *

+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:

+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed

+ * with those in @val.

+ *

+ * @val is usually the machine frame number along with some attributes.

+ * The attributes by default follow the architecture defined bits. Meaning that

+ * if this is a X86_64 machine and four page table layout is used, the layout

+ * of val is:

+ *  - 63 if set means No execute (NX)

+ *  - 46-13 the machine frame number

+ *  - 12 available for guest

+ *  - 11 available for guest

+ *  - 10 available for guest

+ *  - 9 available for guest

+ *  - 8 global

+ *  - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)

+ *  - 6 dirty

+ *  - 5 accessed

+ *  - 4 page cached disabled

+ *  - 3 page write through

+ *  - 2 userspace accessible

+ *  - 1 writeable

+ *  - 0 present

+ *

+ *  The one bits that does not fit with the default layout is the PAGE_PSE

+ *  also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the

+ *  HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB

+ *  (or 2MB) instead of using the PAGE_PSE bit.

+ *

+ *  The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen

+ *  using it as the Page Attribute Table (PAT) bit - for details on it please

+ *  refer to Intel SDM 10.12. The PAT allows to set the caching attributes of

+ *  pages instead of using MTRRs.

+ *

+ *  The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):

+ *                    PAT4                 PAT0

+ *  +-----+-----+----+----+----+-----+----+----+

+ *  | UC  | UC- | WC | WB | UC | UC- | WC | WB |  <= Linux

+ *  +-----+-----+----+----+----+-----+----+----+

+ *  | UC  | UC- | WT | WB | UC | UC- | WT | WB |  <= BIOS (default when machine boots)

+ *  +-----+-----+----+----+----+-----+----+----+

+ *  | rsv | rsv | WP | WC | UC | UC- | WT | WB |  <= Xen

+ *  +-----+-----+----+----+----+-----+----+----+

+ *

+ *  The lookup of this index table translates to looking up

+ *  Bit 7, Bit 4, and Bit 3 of val entry:

+ *

+ *  PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).

+ *

+ *  If all bits are off, then we are using PAT0. If bit 3 turned on,

+ *  then we are using PAT1, if bit 3 and bit 4, then PAT2..

+ *

+ *  As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means

+ *  that if a guest that follows Linux's PAT setup and would like to set Write

+ *  Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is

+ *  set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the

+ *  caching as:

+ *

+ *   WB = none (so PAT0)

+ *   WC = PWT (bit 3 on)

+ *   UC = PWT | PCD (bit 3 and 4 are on).

+ *

+ * To make it work with Xen, it needs to translate the WC bit as so:

+ *

+ *  PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3

+ *

+ * And to translate back it would:

+ *

+ * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.

+ */

+#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.      */

+#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for */

+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */

+/*

+ * MMU EXTENDED OPERATIONS

+ *

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[],

+ * `                      unsigned int count,

+ * `                      unsigned int *pdone,

+ * `                      unsigned int foreigndom)

+ */

+/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.

+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).

+ * Where the FD has some effect, it is described below.

+ *

+ * cmd: MMUEXT_(UN)PIN_*_TABLE

+ * mfn: Machine frame number to be (un)pinned as a p.t. page.

+ *      The frame must belong to the FD, if one is specified.

+ *

+ * cmd: MMUEXT_NEW_BASEPTR

+ * mfn: Machine frame number of new page-table base to install in MMU.

+ *

+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]

+ * mfn: Machine frame number of new page-table base to install in MMU

+ *      when in user space.

+ *

+ * cmd: MMUEXT_TLB_FLUSH_LOCAL

+ * No additional arguments. Flushes local TLB.

+ *

+ * cmd: MMUEXT_INVLPG_LOCAL

+ * linear_addr: Linear address to be flushed from the local TLB.

+ *

+ * cmd: MMUEXT_TLB_FLUSH_MULTI

+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.

+ *

+ * cmd: MMUEXT_INVLPG_MULTI

+ * linear_addr: Linear address to be flushed.

+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.

+ *

+ * cmd: MMUEXT_TLB_FLUSH_ALL

+ * No additional arguments. Flushes all VCPUs' TLBs.

+ *

+ * cmd: MMUEXT_INVLPG_ALL

+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.

+ *

+ * cmd: MMUEXT_FLUSH_CACHE

+ * No additional arguments. Writes back and flushes cache contents.

+ *

+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL

+ * No additional arguments. Writes back and flushes cache contents

+ * on all CPUs in the system.

+ *

+ * cmd: MMUEXT_SET_LDT

+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).

+ * nr_ents: Number of entries in LDT.

+ *

+ * cmd: MMUEXT_CLEAR_PAGE

+ * mfn: Machine frame number to be cleared.

+ *

+ * cmd: MMUEXT_COPY_PAGE

+ * mfn: Machine frame number of the destination page.

+ * src_mfn: Machine frame number of the source page.

+ *

+ * cmd: MMUEXT_[UN]MARK_SUPER

+ * mfn: Machine frame number of head of superpage to be [un]marked.

+ */

+/* ` enum mmuext_cmd { */

+#define MMUEXT_PIN_L1_TABLE      0

+#define MMUEXT_PIN_L2_TABLE      1

+#define MMUEXT_PIN_L3_TABLE      2

+#define MMUEXT_PIN_L4_TABLE      3

+#define MMUEXT_UNPIN_TABLE       4

+#define MMUEXT_NEW_BASEPTR       5

+#define MMUEXT_TLB_FLUSH_LOCAL   6

+#define MMUEXT_INVLPG_LOCAL      7

+#define MMUEXT_TLB_FLUSH_MULTI   8

+#define MMUEXT_INVLPG_MULTI      9

+#define MMUEXT_TLB_FLUSH_ALL    10

+#define MMUEXT_INVLPG_ALL       11

+#define MMUEXT_FLUSH_CACHE      12

+#define MMUEXT_SET_LDT          13

+#define MMUEXT_NEW_USER_BASEPTR 15

+#define MMUEXT_CLEAR_PAGE       16

+#define MMUEXT_COPY_PAGE        17

+#define MMUEXT_FLUSH_CACHE_GLOBAL 18

+#define MMUEXT_MARK_SUPER       19

+#define MMUEXT_UNMARK_SUPER     20

+/* ` } */

+#ifndef __ASSEMBLY__

+struct mmuext_op {

+    unsigned int cmd; /* => enum mmuext_cmd */

+    union {

+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR

+         * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */

+        xen_pfn_t     mfn;

+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */

+        unsigned long linear_addr;

+    } arg1;

+    union {

+        /* SET_LDT */

+        unsigned int nr_ents;

+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */

+#if __XEN_INTERFACE_VERSION__ >= 0x00030205

+        XEN_GUEST_HANDLE(const_void) vcpumask;

+#else

+        const void *vcpumask;

+#endif

+        /* COPY_PAGE */

+        xen_pfn_t src_mfn;

+    } arg2;

+};

+typedef struct mmuext_op mmuext_op_t;

+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);

+#endif

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val,

+ * `                              enum uvm_flags flags)

+ * `

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val,

+ * `                                          enum uvm_flags flags,

+ * `                                          domid_t domid)

+ * `

+ * ` @va: The virtual address whose mapping we want to change

+ * ` @val: The new page table entry, must contain a machine address

+ * ` @flags: Control TLB flushes

+ */

+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */

+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */

+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */

+/* ` enum uvm_flags { */

+#define UVMF_NONE               (0UL<<0) /* No flushing at all.   */

+#define UVMF_TLB_FLUSH          (1UL<<0) /* Flush entire TLB(s).  */

+#define UVMF_INVLPG             (2UL<<0) /* Flush only one entry. */

+#define UVMF_FLUSHTYPE_MASK     (3UL<<0)

+#define UVMF_MULTI              (0UL<<2) /* Flush subset of TLBs. */

+#define UVMF_LOCAL              (0UL<<2) /* Flush local TLB.      */

+#define UVMF_ALL                (1UL<<2) /* Flush all TLBs.       */

+/* ` } */

+/*

+ * Commands to HYPERVISOR_console_io().

+ */

+#define CONSOLEIO_write         0

+#define CONSOLEIO_read          1

+/*

+ * Commands to HYPERVISOR_vm_assist().

+ */

+#define VMASST_CMD_enable                0

+#define VMASST_CMD_disable               1

+/* x86/32 guests: simulate full 4GB segment limits. */

+#define VMASST_TYPE_4gb_segments         0

+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */

+#define VMASST_TYPE_4gb_segments_notify  1

+/*

+ * x86 guests: support writes to bottom-level PTEs.

+ * NB1. Page-directory entries cannot be written.

+ * NB2. Guest must continue to remove all writable mappings of PTEs.

+ */

+#define VMASST_TYPE_writable_pagetables  2

+/* x86/PAE guests: support PDPTs above 4GB. */

+#define VMASST_TYPE_pae_extended_cr3     3

+#define MAX_VMASST_TYPE                  3

+#ifndef __ASSEMBLY__

+typedef uint16_t domid_t;

+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */

+#define DOMID_FIRST_RESERVED (0x7FF0U)

+/* DOMID_SELF is used in certain contexts to refer to oneself. */

+#define DOMID_SELF (0x7FF0U)

+/*

+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.

+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO

+ * is useful to ensure that no mappings to the OS's own heap are accidentally

+ * installed. (e.g., in Linux this could cause havoc as reference counts

+ * aren't adjusted on the I/O-mapping code path).

+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can

+ * be specified by any calling domain.

+ */

+#define DOMID_IO   (0x7FF1U)

+/*

+ * DOMID_XEN is used to allow privileged domains to map restricted parts of

+ * Xen's heap space (e.g., the machine_to_phys table).

+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if

+ * the caller is privileged.

+ */

+#define DOMID_XEN  (0x7FF2U)

+/*

+ * DOMID_COW is used as the owner of sharable pages */

+#define DOMID_COW  (0x7FF3U)

+/* DOMID_INVALID is used to identify pages with unknown owner. */

+#define DOMID_INVALID (0x7FF4U)

+/* Idle domain. */

+#define DOMID_IDLE (0x7FFFU)

+/*

+ * Send an array of these to HYPERVISOR_mmu_update().

+ * NB. The fields are natural pointer/address size for this architecture.

+ */

+struct mmu_update {

+    uint64_t ptr;       /* Machine address of PTE. */

+    uint64_t val;       /* New contents of PTE.    */

+};

+typedef struct mmu_update mmu_update_t;

+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);

+/*

+ * ` enum neg_errnoval

+ * ` HYPERVISOR_multicall(multicall_entry_t call_list[],

+ * `                      unsigned int nr_calls);

+ *

+ * NB. The fields are natural register size for this architecture.

+ */

+struct multicall_entry {

+    unsigned long op, result;

+    unsigned long args[6];

+};

+typedef struct multicall_entry multicall_entry_t;

+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);

+/*

+ * Event channel endpoints per domain:

+ *  1024 if a long is 32 bits; 4096 if a long is 64 bits.

+ */

+#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)

+struct vcpu_time_info {

+    /*

+     * Updates to the following values are preceded and followed by an

+     * increment of 'version'. The guest can therefore detect updates by

+     * looking for changes to 'version'. If the least-significant bit of

+     * the version number is set then an update is in progress and the guest

+     * must wait to read a consistent set of values.

+     * The correct way to interact with the version number is similar to

+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.

+     */

+    uint32_t version;

+    uint32_t pad0;

+    uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */

+    uint64_t system_time;     /* Time, in nanosecs, since boot.    */

+    /*

+     * Current system time:

+     *   system_time +

+     *   ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)

+     * CPU frequency (Hz):

+     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift

+     */

+    uint32_t tsc_to_system_mul;

+    int8_t   tsc_shift;

+    int8_t   pad1[3];

+}; /* 32 bytes */

+typedef struct vcpu_time_info vcpu_time_info_t;

+struct vcpu_info {

+    /*

+     * 'evtchn_upcall_pending' is written non-zero by Xen to indicate

+     * a pending notification for a particular VCPU. It is then cleared

+     * by the guest OS /before/ checking for pending work, thus avoiding

+     * a set-and-check race. Note that the mask is only accessed by Xen

+     * on the CPU that is currently hosting the VCPU. This means that the

+     * pending and mask flags can be updated by the guest without special

+     * synchronisation (i.e., no need for the x86 LOCK prefix).

+     * This may seem suboptimal because if the pending flag is set by

+     * a different CPU then an IPI may be scheduled even when the mask

+     * is set. However, note:

+     *  1. The task of 'interrupt holdoff' is covered by the per-event-

+     *     channel mask bits. A 'noisy' event that is continually being

+     *     triggered can be masked at source at this very precise

+     *     granularity.

+     *  2. The main purpose of the per-VCPU mask is therefore to restrict

+     *     reentrant execution: whether for concurrency control, or to

+     *     prevent unbounded stack usage. Whatever the purpose, we expect

+     *     that the mask will be asserted only for short periods at a time,

+     *     and so the likelihood of a 'spurious' IPI is suitably small.

+     * The mask is read before making an event upcall to the guest: a

+     * non-zero mask therefore guarantees that the VCPU will not receive

+     * an upcall activation. The mask is cleared when the VCPU requests

+     * to block: this avoids wakeup-waiting races.

+     */

+    uint8_t evtchn_upcall_pending;

+    uint8_t evtchn_upcall_mask;

+    xen_ulong_t evtchn_pending_sel;

+    struct arch_vcpu_info arch;

+    struct vcpu_time_info time;

+}; /* 64 bytes (x86) */

+#ifndef __XEN__

+typedef struct vcpu_info vcpu_info_t;

+#endif

+/*

+ * `incontents 200 startofday_shared Start-of-day shared data structure

+ * Xen/kernel shared data -- pointer provided in start_info.

+ *

+ * This structure is defined to be both smaller than a page, and the

+ * only data on the shared page, but may vary in actual size even within

+ * compatible Xen versions; guests should not rely on the size

+ * of this structure remaining constant.

+ */

+struct shared_info {

+    struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];

+    /*

+     * A domain can create "event channels" on which it can send and receive

+     * asynchronous event notifications. There are three classes of event that

+     * are delivered by this mechanism:

+     *  1. Bi-directional inter- and intra-domain connections. Domains must

+     *     arrange out-of-band to set up a connection (usually by allocating

+     *     an unbound 'listener' port and avertising that via a storage service

+     *     such as xenstore).

+     *  2. Physical interrupts. A domain with suitable hardware-access

+     *     privileges can bind an event-channel port to a physical interrupt

+     *     source.

+     *  3. Virtual interrupts ('events'). A domain can bind an event-channel

+     *     port to a virtual interrupt source, such as the virtual-timer

+     *     device or the emergency console.

+     *

+     * Event channels are addressed by a "port index". Each channel is

+     * associated with two bits of information:

+     *  1. PENDING -- notifies the domain that there is a pending notification

+     *     to be processed. This bit is cleared by the guest.

+     *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING

+     *     will cause an asynchronous upcall to be scheduled. This bit is only

+     *     updated by the guest. It is read-only within Xen. If a channel

+     *     becomes pending while the channel is masked then the 'edge' is lost

+     *     (i.e., when the channel is unmasked, the guest must manually handle

+     *     pending notifications as no upcall will be scheduled by Xen).

+     *

+     * To expedite scanning of pending notifications, any 0->1 pending

+     * transition on an unmasked channel causes a corresponding bit in a

+     * per-vcpu selector word to be set. Each bit in the selector covers a

+     * 'C long' in the PENDING bitfield array.

+     */

+    xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];

+    xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];

+    /*

+     * Wallclock time: updated only by control software. Guests should base

+     * their gettimeofday() syscall on this wallclock-base value.

+     */

+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */

+    uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */

+    uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */

+    struct arch_shared_info arch;

+};

+#ifndef __XEN__

+typedef struct shared_info shared_info_t;

+#endif

+/*

+ * `incontents 200 startofday Start-of-day memory layout

+ *

+ *  1. The domain is started within contiguous virtual-memory region.

+ *  2. The contiguous region ends on an aligned 4MB boundary.

+ *  3. This the order of bootstrap elements in the initial virtual region:

+ *      a. relocated kernel image

+ *      b. initial ram disk              [mod_start, mod_len]

+ *      c. list of allocated page frames [mfn_list, nr_pages]

+ *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)

+ *      d. start_info_t structure        [register ESI (x86)]

+ *      e. bootstrap page tables         [pt_base and CR3 (x86)]

+ *      f. bootstrap stack               [register ESP (x86)]

+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.

+ *  5. The initial ram disk may be omitted.

+ *  6. The list of page frames forms a contiguous 'pseudo-physical' memory

+ *     layout for the domain. In particular, the bootstrap virtual-memory

+ *     region is a 1:1 mapping to the first section of the pseudo-physical map.

+ *  7. All bootstrap elements are mapped read-writable for the guest OS. The

+ *     only exception is the bootstrap page table, which is mapped read-only.

+ *  8. There is guaranteed to be at least 512kB padding after the final

+ *     bootstrap element. If necessary, the bootstrap virtual region is

+ *     extended by an extra 4MB to ensure this.

+ *

+ * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page

+ * table layout") a bug caused the pt_base (3.e above) and cr3 to not point

+ * to the start of the guest page tables (it was offset by two pages).

+ * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU

+ * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got

+ * allocated in the order: 'first L1','first L2', 'first L3', so the offset

+ * to the page table base is by two pages back. The initial domain if it is

+ * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the

+ * pages preceding pt_base and mark them as reserved/unused.

+ */

+#define MAX_GUEST_CMDLINE 1024

+struct start_info {

+    /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */

+    char magic[32];             /* "xen-<version>-<platform>".            */

+    unsigned long nr_pages;     /* Total pages allocated to this domain.  */

+    unsigned long shared_info;  /* MACHINE address of shared info struct. */

+    uint32_t flags;             /* SIF_xxx flags.                         */

+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */

+    uint32_t store_evtchn;      /* Event channel for store communication. */

+    union {

+        struct {

+            xen_pfn_t mfn;      /* MACHINE page number of console page.   */

+            uint32_t  evtchn;   /* Event channel for console page.        */

+        } domU;

+        struct {

+            uint32_t info_off;  /* Offset of console_info struct.         */

+            uint32_t info_size; /* Size of console_info struct from start.*/

+        } dom0;

+    } console;

+    /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */

+    unsigned long pt_base;      /* VIRTUAL address of page directory.     */

+    unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */

+    unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */

+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module   */

+                                /* (PFN of pre-loaded module if           */

+                                /*  SIF_MOD_START_PFN set in flags).      */

+    unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */

+    int8_t cmd_line[MAX_GUEST_CMDLINE];

+    /* The pfn range here covers both page table and p->m table frames.   */

+    unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table.    */

+    unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table.  */

+};

+typedef struct start_info start_info_t;

+/* New console union for dom0 introduced in 0x00030203. */

+#if __XEN_INTERFACE_VERSION__ < 0x00030203

+#define console_mfn    console.domU.mfn

+#define console_evtchn console.domU.evtchn

+#endif

+/* These flags are passed in the 'flags' field of start_info_t. */

+#define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */

+#define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */

+#define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */

+#define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */

+#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */

+/*

+ * A multiboot module is a package containing modules very similar to a

+ * multiboot module array. The only differences are:

+ * - the array of module descriptors is by convention simply at the beginning

+ *   of the multiboot module,

+ * - addresses in the module descriptors are based on the beginning of the

+ *   multiboot module,

+ * - the number of modules is determined by a termination descriptor that has

+ *   mod_start == 0.

+ *

+ * This permits to both build it statically and reference it in a configuration

+ * file, and let the PV guest easily rebase the addresses to virtual addresses

+ * and at the same time count the number of modules.

+ */

+struct xen_multiboot_mod_list

+{

+    /* Address of first byte of the module */

+    uint32_t mod_start;

+    /* Address of last byte of the module (inclusive) */

+    uint32_t mod_end;

+    /* Address of zero-terminated command line */

+    uint32_t cmdline;

+    /* Unused, must be zero */

+    uint32_t pad;

+};

+/*

+ * `incontents 200 startofday_dom0_console Dom0_console

+ *

+ * The console structure in start_info.console.dom0

+ *

+ * This structure includes a variety of information required to

+ * have a working VGA/VESA console.

+ */

+typedef struct dom0_vga_console_info {

+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */

+#define XEN_VGATYPE_TEXT_MODE_3 0x03

+#define XEN_VGATYPE_VESA_LFB    0x23

+#define XEN_VGATYPE_EFI_LFB     0x70

+    union {

+        struct {

+            /* Font height, in pixels. */

+            uint16_t font_height;

+            /* Cursor location (column, row). */

+            uint16_t cursor_x, cursor_y;

+            /* Number of rows and columns (dimensions in characters). */

+            uint16_t rows, columns;

+        } text_mode_3;

+        struct {

+            /* Width and height, in pixels. */

+            uint16_t width, height;

+            /* Bytes per scan line. */

+            uint16_t bytes_per_line;

+            /* Bits per pixel. */

+            uint16_t bits_per_pixel;

+            /* LFB physical address, and size (in units of 64kB). */

+            uint32_t lfb_base;

+            uint32_t lfb_size;

+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */

+            uint8_t  red_pos, red_size;

+            uint8_t  green_pos, green_size;

+            uint8_t  blue_pos, blue_size;

+            uint8_t  rsvd_pos, rsvd_size;

+#if __XEN_INTERFACE_VERSION__ >= 0x00030206

+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */

+            uint32_t gbl_caps;

+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */

+            uint16_t mode_attrs;

+#endif

+        } vesa_lfb;

+    } u;

+} dom0_vga_console_info_t;

+#define xen_vga_console_info dom0_vga_console_info

+#define xen_vga_console_info_t dom0_vga_console_info_t

+typedef uint8_t xen_domain_handle_t[16];

+/* Turn a plain number into a C unsigned long constant. */

+#define __mk_unsigned_long(x) x ## UL

+#define mk_unsigned_long(x) __mk_unsigned_long(x)

+__DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);

+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);

+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);

+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);

+#else /* __ASSEMBLY__ */

+/* In assembly code we cannot use C numeric constant suffixes. */

+#define mk_unsigned_long(x) x

+#endif /* !__ASSEMBLY__ */

+/* Default definitions for macros used by domctl/sysctl. */

+#if defined(__XEN__) || defined(__XEN_TOOLS__)

+#ifndef uint64_aligned_t

+#define uint64_aligned_t uint64_t

+#endif

+#ifndef XEN_GUEST_HANDLE_64

+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)

+#endif

+#ifndef __ASSEMBLY__

+struct xenctl_bitmap {

+    XEN_GUEST_HANDLE_64(uint8) bitmap;

+    uint32_t nr_bits;

+};

+#endif

+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */

+#endif /* __XEN_PUBLIC_XEN_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/xencomm.h

@@ -1,0 +1,41 @@

+/*

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) IBM Corp. 2006

+ */

+#ifndef _XEN_XENCOMM_H_

+#define _XEN_XENCOMM_H_

+/* A xencomm descriptor is a scatter/gather list containing physical

+ * addresses corresponding to a virtually contiguous memory area. The

+ * hypervisor translates these physical addresses to machine addresses to copy

+ * to and from the virtually contiguous area.

+ */

+#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */

+#define XENCOMM_INVALID (~0UL)

+struct xencomm_desc {

+    uint32_t magic;

+    uint32_t nr_addrs; /* the number of entries in address[] */

+    uint64_t address[0];

+};

+#endif /* _XEN_XENCOMM_H_ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/xenoprof.h

@@ -1,0 +1,152 @@

+/******************************************************************************

+ * xenoprof.h

+ *

+ * Interface for enabling system wide profiling based on hardware performance

+ * counters

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ *

+ * Copyright (C) 2005 Hewlett-Packard Co.

+ * Written by Aravind Menon & Jose Renato Santos

+ */

+#ifndef __XEN_PUBLIC_XENOPROF_H__

+#define __XEN_PUBLIC_XENOPROF_H__

+#include "xen.h"

+/*

+ * Commands to HYPERVISOR_xenoprof_op().

+ */

+#define XENOPROF_init                0

+#define XENOPROF_reset_active_list   1

+#define XENOPROF_reset_passive_list  2

+#define XENOPROF_set_active          3

+#define XENOPROF_set_passive         4

+#define XENOPROF_reserve_counters    5

+#define XENOPROF_counter             6

+#define XENOPROF_setup_events        7

+#define XENOPROF_enable_virq         8

+#define XENOPROF_start               9

+#define XENOPROF_stop               10

+#define XENOPROF_disable_virq       11

+#define XENOPROF_release_counters   12

+#define XENOPROF_shutdown           13

+#define XENOPROF_get_buffer         14

+#define XENOPROF_set_backtrace      15

+/* AMD IBS support */

+#define XENOPROF_get_ibs_caps       16

+#define XENOPROF_ibs_counter        17

+#define XENOPROF_last_op            17

+#define MAX_OPROF_EVENTS    32

+#define MAX_OPROF_DOMAINS   25

+#define XENOPROF_CPU_TYPE_SIZE 64

+/* Xenoprof performance events (not Xen events) */

+struct event_log {

+    uint64_t eip;

+    uint8_t mode;

+    uint8_t event;

+};

+/* PC value that indicates a special code */

+#define XENOPROF_ESCAPE_CODE (~0ULL)

+/* Transient events for the xenoprof->oprofile cpu buf */

+#define XENOPROF_TRACE_BEGIN 1

+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */

+struct xenoprof_buf {

+    uint32_t event_head;

+    uint32_t event_tail;

+    uint32_t event_size;

+    uint32_t vcpu_id;

+    uint64_t xen_samples;

+    uint64_t kernel_samples;

+    uint64_t user_samples;

+    uint64_t lost_samples;

+    struct event_log event_log[1];

+};

+#ifndef __XEN__

+typedef struct xenoprof_buf xenoprof_buf_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);

+#endif

+struct xenoprof_init {

+    int32_t  num_events;

+    int32_t  is_primary;

+    char cpu_type[XENOPROF_CPU_TYPE_SIZE];

+};

+typedef struct xenoprof_init xenoprof_init_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);

+struct xenoprof_get_buffer {

+    int32_t  max_samples;

+    int32_t  nbuf;

+    int32_t  bufsize;

+    uint64_t buf_gmaddr;

+};

+typedef struct xenoprof_get_buffer xenoprof_get_buffer_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t);

+struct xenoprof_counter {

+    uint32_t ind;

+    uint64_t count;

+    uint32_t enabled;

+    uint32_t event;

+    uint32_t hypervisor;

+    uint32_t kernel;

+    uint32_t user;

+    uint64_t unit_mask;

+};

+typedef struct xenoprof_counter xenoprof_counter_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);

+typedef struct xenoprof_passive {

+    uint16_t domain_id;

+    int32_t  max_samples;

+    int32_t  nbuf;

+    int32_t  bufsize;

+    uint64_t buf_gmaddr;

+} xenoprof_passive_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t);

+struct xenoprof_ibs_counter {

+    uint64_t op_enabled;

+    uint64_t fetch_enabled;

+    uint64_t max_cnt_fetch;

+    uint64_t max_cnt_op;

+    uint64_t rand_en;

+    uint64_t dispatched_ops;

+};

+typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t;

+DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t);

+#endif /* __XEN_PUBLIC_XENOPROF_H__ */

+/*

+ * Local variables:

+ * mode: C

+ * c-file-style: "BSD"

+ * c-basic-offset: 4

+ * tab-width: 4

+ * indent-tabs-mode: nil

+ * End:

+ */

--- /dev/null

+++ b/sys/src/9/xen/xen-public/xsm/flask_op.h

@@ -1,0 +1,201 @@

+/*

+ *  This file contains the flask_op hypercall commands and definitions.

+ *

+ *  Author:  George Coker, <[email protected]>

+ *

+ * Permission is hereby granted, free of charge, to any person obtaining a copy

+ * of this software and associated documentation files (the "Software"), to

+ * deal in the Software without restriction, including without limitation the

+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

+ * sell copies of the Software, and to permit persons to whom the Software is

+ * furnished to do so, subject to the following conditions:

+ *

+ * The above copyright notice and this permission notice shall be included in

+ * all copies or substantial portions of the Software.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

+ * DEALINGS IN THE SOFTWARE.

+ */

+#ifndef __FLASK_OP_H__

+#define __FLASK_OP_H__

+#define XEN_FLASK_INTERFACE_VERSION 1

+struct xen_flask_load {

+    XEN_GUEST_HANDLE(char) buffer;

+    uint32_t size;

+};

+struct xen_flask_setenforce {

+    uint32_t enforcing;

+};

+struct xen_flask_sid_context {

+    /* IN/OUT: sid to convert to/from string */

+    uint32_t sid;

+    /* IN: size of the context buffer

+     * OUT: actual size of the output context string

+     */

+    uint32_t size;

+    XEN_GUEST_HANDLE(char) context;

+};

+struct xen_flask_access {

+    /* IN: access request */

+    uint32_t ssid;

+    uint32_t tsid;

+    uint32_t tclass;

+    uint32_t req;

+    /* OUT: AVC data */

+    uint32_t allowed;

+    uint32_t audit_allow;

+    uint32_t audit_deny;

+    uint32_t seqno;

+};

+struct xen_flask_transition {

+    /* IN: transition SIDs and class */

+    uint32_t ssid;

+    uint32_t tsid;

+    uint32_t tclass;

+    /* OUT: new SID */

+    uint32_t newsid;

+};

+struct xen_flask_userlist {

+    /* IN: starting SID for list */

+    uint32_t start_sid;

+    /* IN: size of user string and output buffer

+     * OUT: number of SIDs returned */

+    uint32_t size;

+    union {

+        /* IN: user to enumerate SIDs */

+        XEN_GUEST_HANDLE(char) user;

+        /* OUT: SID list */

+        XEN_GUEST_HANDLE(uint32) sids;

+    } u;

+};

+struct xen_flask_boolean {

+    /* IN/OUT: numeric identifier for boolean [GET/SET]

+     * If -1, name will be used and bool_id will be filled in. */

+    uint32_t bool_id;

+    /* OUT: current enforcing value of boolean [GET/SET] */

+    uint8_t enforcing;

+    /* OUT: pending value of boolean [GET/SET] */

+    uint8_t pending;

+    /* IN: new value of boolean [SET] */

+    uint8_t new_value;

+    /* IN: commit new value instead of only setting pending [SET] */

+    uint8_t commit;

+    /* IN: size of boolean name buffer [GET/SET]

+     * OUT: actual size of name [GET only] */

+    uint32_t size;

+    /* IN: if bool_id is -1, used to find boolean [GET/SET]

+     * OUT: textual name of boolean [GET only]

+     */

+    XEN_GUEST_HANDLE(char) name;

+};

+struct xen_flask_setavc_threshold {

+    /* IN */

+    uint32_t threshold;

+};

+struct xen_flask_hash_stats {

+    /* OUT */

+    uint32_t entries;

+    uint32_t buckets_used;

+    uint32_t buckets_total;

+    uint32_t max_chain_len;

+};

+struct xen_flask_cache_stats {

+    /* IN */

+    uint32_t cpu;

+    /* OUT */

+    uint32_t lookups;

+    uint32_t hits;

+    uint32_t misses;

+    uint32_t allocations;

+    uint32_t reclaims;

+    uint32_t frees;

+};

+struct xen_flask_ocontext {

+    /* IN */

+    uint32_t ocon;

+    uint32_t sid;

+    uint64_t low, high;

+};

+struct xen_flask_peersid {

+    /* IN */

+    evtchn_port_t evtchn;

+    /* OUT */

+    uint32_t sid;

+};

+struct xen_flask_relabel {

+    /* IN */

+    uint32_t domid;

+    uint32_t sid;

+};

+struct xen_flask_op {

+    uint32_t cmd;

+#define FLASK_LOAD              1

+#define FLASK_GETENFORCE        2

+#define FLASK_SETENFORCE        3

+#define FLASK_CONTEXT_TO_SID    4

+#define FLASK_SID_TO_CONTEXT    5

+#define FLASK_ACCESS            6

+#define FLASK_CREATE            7

+#define FLASK_RELABEL           8

+#define FLASK_USER              9

+#define FLASK_POLICYVERS        10

+#define FLASK_GETBOOL           11

+#define FLASK_SETBOOL           12

+#define FLASK_COMMITBOOLS       13

+#define FLASK_MLS               14

+#define FLASK_DISABLE           15

+#define FLASK_GETAVC_THRESHOLD  16

+#define FLASK_SETAVC_THRESHOLD  17

+#define FLASK_AVC_HASHSTATS     18

+#define FLASK_AVC_CACHESTATS    19

+#define FLASK_MEMBER            20

+#define FLASK_ADD_OCONTEXT      21

+#define FLASK_DEL_OCONTEXT      22

+#define FLASK_GET_PEER_SID      23

+#define FLASK_RELABEL_DOMAIN    24

+    uint32_t interface_version; /* XEN_FLASK_INTERFACE_VERSION */

+    union {

+        struct xen_flask_load load;

+        struct xen_flask_setenforce enforce;

+        /* FLASK_CONTEXT_TO_SID and FLASK_SID_TO_CONTEXT */

+        struct xen_flask_sid_context sid_context;

+        struct xen_flask_access access;

+        /* FLASK_CREATE, FLASK_RELABEL, FLASK_MEMBER */

+        struct xen_flask_transition transition;

+        struct xen_flask_userlist userlist;

+        /* FLASK_GETBOOL, FLASK_SETBOOL */

+        struct xen_flask_boolean boolean;

+        struct xen_flask_setavc_threshold setavc_threshold;

+        struct xen_flask_hash_stats hash_stats;

+        struct xen_flask_cache_stats cache_stats;

+        /* FLASK_ADD_OCONTEXT, FLASK_DEL_OCONTEXT */

+        struct xen_flask_ocontext ocontext;

+        struct xen_flask_peersid peersid;

+        struct xen_flask_relabel relabel;

+    } u;

+};

+typedef struct xen_flask_op xen_flask_op_t;

+DEFINE_XEN_GUEST_HANDLE(xen_flask_op_t);

+#endif

--- /dev/null

+++ b/sys/src/9/xen/xen.s

@@ -1,0 +1,72 @@

+#include "xendefs.h"

+#include "mem.h"

+#define ENTRY(X) TEXT X(SB), $0

+/*

+ * XXX there's a race in here because we can get an upcall

+ * betwen the spllo() (in xenupcall) and the rti.  This will make

+ * handlers stack, which could lead to a blown stack.  Probably

+ * not worth fixing (but possibly worth detecting and panicing).

+ *

+ * For fun get some popcorn and turn off the lights and read the

+ * linux solution (search for scrit/ecrit).

+ */

+ENTRY(hypervisor_callback)

+	SUBL	$8, SP		/* space for ecode and trap type */

+	PUSHL	DS			/* save DS */

+	PUSHL	$(KDSEL)

+	POPL	DS			/* fix up DS */

+	PUSHL	ES			/* save ES */

+	PUSHL	$(KDSEL)

+	POPL	ES			/* fix up ES */

+	PUSHL	FS			/* save the rest of the Ureg struct */

+	PUSHL	GS

+	PUSHAL

+	PUSHL	SP			/* Ureg* argument to trap */

+	CALL xenupcall+0(SB)

+	POPL	AX

+	POPAL

+	POPL	GS

+	POPL	FS

+	POPL	ES

+	POPL	DS

+	ADDL	$8, SP			/* pop error code and trap type */

+	IRETL

+/* Hypervisor uses this for application faults while it executes.*/

+ENTRY(failsafe_callback)

+	IRETL

+	PUSHL	AX

+	CALL 	install_safe_pf_handler(SB)

+	MOVL	32(SP), BX

+	MOVW	BX, DS

+	MOVL	36(SP), BX

+	MOVW	BX, ES

+	MOVL	40(SP), BX

+	MOVW	BX, FS

+	MOVL	44(SP), BX

+	MOVW	BX, GS

+	CALL	install_normal_pf_handler(SB)

+	POPL		AX

+	ADDL	$16, SP

+	IRETL

+/* xen traps with varying argument counts */

+TEXT xencall6(SB), $0

+	MOVL	VDI+20(FP), DI

+TEXT xencall5(SB), $0

+	MOVL	VSI+16(FP), SI

+TEXT xencall4(SB), $0

+	MOVL	VDX+12(FP), DX

+TEXT xencall3(SB), $0

+	MOVL	VCX+8(FP), CX

+TEXT xencall2(SB), $0

+	MOVL	VBX+4(FP), BX

+TEXT xencall1(SB), $0

+	MOVL	op+0(FP), AX

+	INT	$0x82

+	RET

--- /dev/null

+++ b/sys/src/9/xen/xenbin.c

@@ -1,0 +1,162 @@

+/*

+ * Transform a Plan 9 386 bootable image to make it compatible with

+ * the Xen binary image loader:

+ *

+ * - pad the beginning of the text with zeroes so that the image can be loaded at

+ *    guest 'physical' address 0

+ * - insert a Xen header

+ * - pad the end of the text so that data segment is page-aligned in the file

+ * - adjust the linenumber-pc table so Plan 9 debuggers won't be confused

+ */

+#include <u.h>

+#include <libc.h>

+#include <bio.h>

+#include <mach.h>

+#define PAGE	4096

+#define PLAN9HDR	32

+#define XENHDR	32

+#define	KZERO	0x80000000

+#define FLAG_VALID	(1<<16)

+#define FLAG_PAE	(1<<14)

+void

+lput(long n)

+{

+	char buf[sizeof(long)];

+	int i;

+	for (i = sizeof(long)-1; i >= 0; i--) {

+		buf[i] = n;

+		n >>= 8;

+	}

+	write(1, buf, sizeof(long));

+}

+void

+rput(long n)

+{

+	char buf[sizeof(long)];

+	int i;

+	for (i = 0; i < sizeof(long); i++) {

+		buf[i] = n;

+		n >>= 8;

+	}

+	write(1, buf, sizeof(long));

+}

+void

+copy(long n)

+{

+	char buf[PAGE];

+	int m;

+	while (n > 0) {

+		m = sizeof buf;

+		if (m > n)

+			m = n;

+		read(0, buf, m);

+		write(1, buf, m);

+		n -= m;

+	}

+}

+void pad(int n)

+{

+	char buf[PAGE];

+	int m;

+	memset(buf, 0, sizeof buf);

+	while (n > 0) {

+		m = sizeof buf;

+		if (m > n)

+			m = n;

+		write(1, buf, m);

+		n -= m;

+	}

+}

+/*

+ * See /sys/src/cmd/8l/span.c:/^asmlc

+ */

+void adjustlnpc(int v)

+{

+	char buf[PAGE];

+	int n, s;

+	n = 0;

+	while (v) {

+		s = 127;

+		if (v < 127)

+			s = v;

+		buf[n++] = s+128;

+		if (n == sizeof buf) {

+			write(1, buf, n);

+			n = 0;

+		}

+		v -= s;

+	}

+	if (n > 0)

+		write(1, buf, n);

+}

+void

+main(int argc, char **argv)

+{

+	Fhdr fhdr;

+	long newtxtsz;

+	long newentry;

+	long newlnpcsz;

+	long prepad, postpad;

+	long flags;

+	flags = FLAG_VALID;

+	if (argc > 1 && strcmp(argv[1], "-p") == 0)

+		flags |= FLAG_PAE;

+	crackhdr(0, &fhdr);

+	newtxtsz = ((fhdr.txtsz+PLAN9HDR+PAGE-1)&~(PAGE-1)) - PLAN9HDR;

+	newentry = KZERO+PLAN9HDR;

+	prepad = fhdr.entry - newentry;

+	postpad = newtxtsz - fhdr.txtsz;

+	newtxtsz += prepad;

+	newlnpcsz = fhdr.lnpcsz;

+	if (newlnpcsz)

+		newlnpcsz += (prepad+126)/127;

+	/* plan 9 header */

+	lput(4*11*11+7);		/* magic */

+	lput(newtxtsz);			/* sizes */

+	lput(fhdr.datsz);

+	lput(fhdr.bsssz);

+	lput(fhdr.symsz);		/* nsyms */

+	lput(newentry);		/* va of entry */

+	lput(fhdr.sppcsz);		/* sp offsets */

+	lput(newlnpcsz);		/* line offsets */

+	/* xen header */

+	rput(0x336EC578);	/* magic */

+	rput(flags);		/* flags */

+	rput(-(0x336EC578+flags));	/* checksum */

+	rput(newentry);	/* header_addr */

+	rput(KZERO);	/* load_addr */

+	rput(KZERO+newtxtsz+fhdr.datsz);	/* load_end_addr */

+	rput(KZERO+newtxtsz+fhdr.datsz+fhdr.bsssz);	/* bss_end_addr */

+	rput(fhdr.entry);	/* entry_addr */

+	pad(prepad-XENHDR);

+	seek(0, fhdr.txtoff, 0);

+	copy(fhdr.txtsz);

+	pad(postpad);

+	copy(fhdr.datsz);

+	copy(fhdr.symsz);

+	if (newlnpcsz) {

+		adjustlnpc(prepad);

+		copy(fhdr.lnpcsz);

+	}

+	exits(0);

+}

--- /dev/null

+++ b/sys/src/9/xen/xenelf.c

@@ -1,0 +1,157 @@

+#include <u.h>

+#include <libc.h>

+#include "/sys/src/libmach/elf.h"

+enum {

+	Page = 4096,

+};

+#define ROUND(n) ((n+Page-1)&~(Page-1))

+Shdr isect, csect;

+static ushort

+GETS(void *a)

+{

+	uchar *p = a;

+	return p[0] | p[1]<<8;

+}

+static ulong

+GETL(void *a)

+{

+	uchar *p = a;

+	return p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24;

+}

+static void

+PUTS(void *a, ushort v)

+{

+	uchar *p = a;

+	p[0] = v;

+	p[1] = v>>8;

+}

+static void

+PUTL(void *a, ulong v)

+{

+	uchar *p = a;

+	p[0] = v;

+	p[1] = v>>8;

+	p[2] = v>>16;

+	p[3] = v>>24;

+}

+void

+copy(int fin, int fout, ulong src, ulong dst, ulong size)

+{

+	char buf[Page];

+	int n;

+	seek(fin, src, 0);

+	seek(fout, dst, 0);

+	n = Page;

+	while (size > 0) {

+		if (n > size)

+			n = size;

+		read(fin, buf, n);

+		write(fout, buf, n);

+		size -= n;

+	}

+}

+void

+main(int argc, char **argv)

+{

+	Ehdr e;

+	Shdr s;

+	Phdr p;

+	int efd, ofd, ns, i, n;

+	ulong shoff, off, noff, size, msize;

+	char *sname, *sval;

+	if (argc != 5)

+		sysfatal("Usage: xenelf input-elf-file output-elf-file section-name section-contents");

+	efd = open(argv[1], OREAD);

+	if (efd < 0)

+		sysfatal("%s: %r", argv[1]);

+	ofd = create(argv[2], OWRITE, 0666);

+	if (ofd < 0)

+		sysfatal("%s: %r", argv[2]);

+	sname = argv[3];

+	sval = argv[4];

+	read(efd, &e, sizeof e);

+	//if (e.shstrndx)

+	//	sysfatal("section header string index already present");

+	/* page-align loadable segments in file */

+	ns = GETS(&e.phnum);

+	shoff = GETL(&e.phoff);

+	noff = shoff+ns*sizeof(Phdr);

+	noff = ROUND(noff);

+	for (i = 0; i < ns; i++) {

+		seek(efd, shoff+i*sizeof(Phdr), 0);

+		read(efd, &p, sizeof p);

+		off = GETL(&p.offset);

+		PUTL(&p.offset, noff);

+		size = GETL(&p.filesz);

+		copy(efd, ofd, off, noff, size);

+		if (GETL(&p.type) == LOAD) {

+			size = ROUND(size);

+			PUTL(&p.filesz, size);

+			if ((msize = GETL(&p.memsz)) != 0 && size > msize)

+				PUTL(&p.memsz, size);

+		} else {

+			/* memory size for symtab segment is actually line number table size */

+			msize = GETL(&p.memsz);

+			copy(efd, ofd, off+size, noff+size, msize);

+			noff += msize;

+		}

+		noff += size;

+		seek(ofd, shoff+i*sizeof(Phdr), 0);

+		write(ofd, &p, sizeof p);

+	}

+	/* append single-entry shstrndx */

+	PUTL(&isect.offset, seek(ofd, noff, 0));

+	n = strlen(sname);

+	PUTL(&isect.size, n+2);

+	write(ofd, sname+n, 1);

+	write(ofd, sname, n+1);

+	/* append comment section contents */

+	PUTL(&csect.name, 1);

+	PUTL(&csect.offset, seek(ofd, 0, 2));

+	n = strlen(sval);

+	PUTL(&csect.size, n+1);

+	write(ofd, sval, n+1);

+	/* copy existing section headers to end */

+	ns = 0; //GETS(&e.shnum);

+	shoff = GETL(&e.shoff);

+	PUTL(&e.shoff, seek(ofd, 0, 2));

+	for (i = 0; i < ns; i++) {

+		seek(efd, shoff+i*sizeof(Shdr), 0);

+		read(efd, &s, sizeof s);

+		seek(ofd, 0, 2);

+		write(ofd, &s, sizeof s);

+	}

+	/* append section header for comment section */

+	write(ofd, &csect, sizeof csect);

+	++ns;

+	/* append section header for shstrndx */

+	PUTS(&e.shstrndx, ns);

+	++ns;

+	write(ofd, &isect, sizeof isect);

+	/* rewrite elf header */

+	PUTS(&e.shentsize, sizeof(Shdr));

+	PUTS(&e.shnum, ns);

+	seek(ofd, 0, 0);

+	write(ofd, &e, sizeof e);

+	exits(0);

+}

--- /dev/null

+++ b/sys/src/9/xen/xengrant.c

@@ -1,0 +1,100 @@

+/*

+ * Sharing page frames with other domains

+ */

+#include	"u.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+enum {

+	Nframes = 1,	// XXX don't increase this without setting up extra mappings in xengrant_init()

+};

+static struct {

+	Lock;

+	ushort free;

+	ushort *refs;

+} refalloc;

+static grant_entry_t *granttab;

+void

+xengrantinit(void)

+{

+	gnttab_setup_table_t setup;

+	ulong frames[Nframes];

+	int nrefs, i;

+	setup.dom = DOMID_SELF;

+	setup.nr_frames = Nframes;

+	set_xen_guest_handle(setup.frame_list, frames);

+	if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 || setup.status != 0)

+		panic("xen grant table setup");

+	granttab = (grant_entry_t*)mmumapframe(XENGRANTTAB, frames[0]);

+	nrefs = Nframes * BY2PG / sizeof(grant_entry_t);

+	refalloc.refs = (ushort*)malloc(nrefs*sizeof(ushort));

+	for (i = 0; i < nrefs; i++)

+		refalloc.refs[i] = i-1;

+	refalloc.free = nrefs-1;

+}

+static int

+allocref(void)

+{

+	int ref;

+	ilock(&refalloc);

+	ref = refalloc.free;

+	if (ref > 0)

+		refalloc.free = refalloc.refs[ref];

+	iunlock(&refalloc);

+	return ref;

+}

+static void

+freeref(int ref)

+{

+	ilock(&refalloc);

+	refalloc.refs[ref] = refalloc.free;

+	refalloc.free = ref;

+	iunlock(&refalloc);

+}

+int

+xengrant(domid_t domid, ulong frame, int flags)

+{

+	int ref;

+	grant_entry_t *gt;

+	if ((ref = allocref()) < 0)

+		panic("out of xengrant refs");

+	gt = &granttab[ref];

+	gt->frame = frame;

+	gt->domid = domid;

+	coherence();

+	gt->flags = flags;

+	return ref;

+}

+int

+xengrantend(int ref)

+{

+	grant_entry_t *gt;

+	int frame;

+	gt = &granttab[ref];

+	coherence();

+	if (gt->flags&GTF_accept_transfer) {

+		if ((gt->flags&GTF_transfer_completed) == 0)

+			panic("xengrantend transfer in progress");

+	} else {

+		if (gt->flags&(GTF_reading|GTF_writing))

+			panic("xengrantend frame in use");

+	}

+	coherence();

+	frame = gt->frame;

+	gt->flags = GTF_invalid;

+	freeref(ref);

+	return frame;

+}

--- /dev/null

+++ b/sys/src/9/xen/xenpcf

@@ -1,0 +1,62 @@

+dev

+	root		netif

+	cons

+	uart

+	arch

+	env

+	pipe

+	proc

+	mnt

+	srv

+	shr

+	dup

+	rtc

+	ssl

+	tls

+	cap

+	kprof

+	fs

+	xenstore

+	ether		netif

+	ip			arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum386 inferno

+	sd

+link

+	etherxen

+	ethermedium

+	netdevmedium

+	loopbackmedium

+misc

+	archxen

+	sdxen

+	uartxen

+ip

+	tcp

+	udp

+	rudp

+	ipifc

+	icmp

+	icmp6

+	gre

+	ipmux

+	esp

+	il

+port

+	int cpuserver = 0;

+boot boot

+	tcp

+	local

+bootdir

+	boot$CONF.out	boot

+	/$objtype/bin/paqfs

+	/$objtype/bin/auth/factotum

+	bootfs.paq

+	xenstore

--- /dev/null

+++ b/sys/src/9/xen/xenstore.c

@@ -1,0 +1,130 @@

+#include <u.h>

+#include <libc.h>

+typedef ulong uint32_t;

+enum xsd_sockmsg_type

+{

+    XS_DEBUG,

+    XS_DIRECTORY,

+    XS_READ,

+    XS_GET_PERMS,

+    XS_WATCH,

+    XS_UNWATCH,

+    XS_TRANSACTION_START,

+    XS_TRANSACTION_END,

+    XS_INTRODUCE,

+    XS_RELEASE,

+    XS_GET_DOMAIN_PATH,

+    XS_WRITE,

+    XS_MKDIR,

+    XS_RM,

+    XS_SET_PERMS,

+    XS_WATCH_EVENT,

+    XS_ERROR,

+    XS_IS_DOMAIN_INTRODUCED

+};

+struct xsd_sockmsg

+{

+    uint32_t type;  /* XS_??? */

+    uint32_t req_id;/* Request identifier, echoed in daemon's response.  */

+    uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */

+    uint32_t len;   /* Length of data following this. */

+    /* Generally followed by nul-terminated string(s). */

+};

+char*

+xscmd(int fd, enum xsd_sockmsg_type cmd, char *s, char *val)

+{

+	static char buf[512];

+	struct xsd_sockmsg *msg;

+	char *arg;

+	static ulong reqid = 1;

+	int n;

+	msg = (struct xsd_sockmsg*)buf;

+	arg = buf + sizeof(*msg);

+	if(cmd != XS_WATCH_EVENT){

+		msg->type = cmd;

+		msg->req_id = reqid++;

+		msg->tx_id = 0;

+		msg->len = strlen(s)+1;

+		if (val != 0) {

+			msg->len += strlen(val);

+			if (msg->type == XS_WATCH)

+				msg->len++;

+		}

+		strcpy(arg, s);

+		if (val != 0)

+			strcpy(arg+strlen(s)+1, val);

+		if (write(fd, buf, sizeof(*msg)+msg->len) < 0)

+			sysfatal("write: %r");

+	}

+	if ((n = read(fd, buf, sizeof(*msg))) != sizeof(*msg))

+		sysfatal("read hdr %d: %r", n);

+	fprint(2, "type %lud req_id %lud len %lud\n", msg->type, msg->req_id, msg->len);

+	if ((n = read(fd, arg, msg->len)) != msg->len)

+		sysfatal("read data %d: %r", n);

+	if (cmd == XS_DIRECTORY || cmd == XS_WATCH_EVENT) {

+		for (s = arg; s < arg+msg->len; s++) {

+			if (*s == 0) *s = ',';

+			else if (*s < 32) *s += '0';

+		}

+	}

+	arg[msg->len] = 0;

+	return arg;

+}

+void

+usage(void)

+{

+	sysfatal("Usage: xenstore [lrwdme] path [value]\n");

+}

+void

+main(int argc, char *argv[])

+{

+	int fd;

+	if (argc != 3 && argc != 4)

+		usage();

+	if(access("/dev/xenstore", AEXIST) < 0)

+		bind("#x", "/dev", MAFTER);

+	fd = open("/dev/xenstore", ORDWR);

+	if (fd < 0)

+		sysfatal("/dev/xenstore: %r");

+	switch (argv[1][0]) {

+	default:

+		usage();

+		break;

+	case 'r':

+		print("%s\n", xscmd(fd, XS_READ, argv[2], 0));

+		break;

+	case 'l':

+		print("%s\n", xscmd(fd, XS_DIRECTORY, argv[2], 0));

+		break;

+	case 'm':

+		print("%s\n", xscmd(fd, XS_MKDIR, argv[2], 0));

+		break;

+	case 'd':

+		print("%s\n", xscmd(fd, XS_RM, argv[2], 0));

+		break;

+	case 'w':

+		if (argc != 4)

+			usage();

+		print("%s\n", xscmd(fd, XS_WRITE, argv[2], argv[3]));

+		break;

+	case 'e':

+		if (argc != 4)

+			usage();

+		print("%s\n", xscmd(fd, XS_WATCH, argv[2], argv[3]));

+		close(fd);

+		fd = open("/dev/xenwatch", OREAD);

+		if (fd < 0)

+			sysfatal("/dev/xenwatch: %r");

+		for (;;)

+			print("%s\n", xscmd(fd, XS_WATCH_EVENT, 0, 0));

+	}

+}

--- /dev/null

+++ b/sys/src/9/xen/xensystem.c

@@ -1,0 +1,541 @@

+/*

+ * xensystem.c

+ *

+ * TODO: we could handle mmu updates more efficiently by

+ * using a multicall.

+ * XXX perhaps we should check return values and panic on failure?

+ */

+#include	"u.h"

+#include	"../port/lib.h"

+#include	"mem.h"

+#include	"dat.h"

+#include	"fns.h"

+#include	"io.h"

+#include	"ureg.h"

+#define LOG(a)

+/*

+ * These functions replace all the inlines that are used on Linux systems

+ */

+/* in xen.s */

+int xencall1(int op);

+int xencall2(int op, ulong arg1);

+int xencall3(int op, ulong arg1, ulong arg2);

+int xencall4(int op, ulong arg1, ulong arg2, ulong arg3);

+int xencall5(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4);

+int xencall6(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5);

+int

+HYPERVISOR_update_va_mapping(ulong va, uvlong newval, ulong flags)

+{

+	int ret;

+	ret = xencall5(__HYPERVISOR_update_va_mapping, va, newval, newval>>32, flags);

+	if(ret < 0)

+		panic("update_va_mapping failed");

+	return ret;

+}

+long

+HYPERVISOR_set_timer_op(uvlong timeout)

+{

+	ulong hi, lo;

+	hi = timeout>>32;

+	lo = timeout;

+	return xencall3(__HYPERVISOR_set_timer_op, lo, hi);

+}

+int

+HYPERVISOR_set_trap_table(trap_info_t *table)

+{

+	return xencall2(__HYPERVISOR_set_trap_table, (ulong)table);

+}

+int

+HYPERVISOR_mmu_update(mmu_update_t *req, int count,

+	int *success_count, domid_t domid)

+{

+	return xencall5(__HYPERVISOR_mmu_update, (ulong)req, count, (ulong)success_count, domid);

+}

+int

+HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid)

+{

+	return xencall5(__HYPERVISOR_mmuext_op, (ulong)op, count, (ulong)scount, domid);

+}

+int

+HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)

+{

+	return xencall3(__HYPERVISOR_set_gdt, (ulong)frame_list, entries);

+}

+int

+HYPERVISOR_stack_switch(ulong ss, ulong esp)

+{

+	return xencall3(__HYPERVISOR_stack_switch, ss, esp);

+}

+/* XXX match evfunc and fsfunc prototypes? */

+int

+HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc)

+{

+	return xencall5(__HYPERVISOR_set_callbacks, evss, evfunc, fsss, fsfunc);

+}

+int

+HYPERVISOR_fpu_taskswitch(void)

+{

+	return xencall1(__HYPERVISOR_fpu_taskswitch);

+}

+int

+HYPERVISOR_yield(void)

+{

+	return xencall3(__HYPERVISOR_sched_op, SCHEDOP_yield, 0);

+}

+int

+HYPERVISOR_block(void)

+{

+	return xencall3(__HYPERVISOR_sched_op, SCHEDOP_block, 0);

+}

+int

+HYPERVISOR_shutdown(int reboot)

+{

+	sched_shutdown_t arg;

+	arg.reason = reboot? SHUTDOWN_reboot : SHUTDOWN_poweroff;

+	return xencall3(__HYPERVISOR_sched_op, SCHEDOP_shutdown, (ulong)&arg);

+}

+int

+HYPERVISOR_multicall(void *call_list, int nr_calls)

+{

+	return xencall3(__HYPERVISOR_multicall, (ulong)call_list, nr_calls);

+}

+int

+HYPERVISOR_event_channel_op(void *op)

+{

+	return xencall2(__HYPERVISOR_event_channel_op, (ulong)op);

+}

+int

+HYPERVISOR_xen_version(int cmd, void *arg)

+{

+	return xencall3(__HYPERVISOR_xen_version, cmd, (ulong)arg);

+}

+int

+HYPERVISOR_console_io(int cmd, int count, char *str)

+{

+	return xencall4(__HYPERVISOR_console_io, cmd, count, (ulong)str);

+}

+int

+HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count)

+{

+	return xencall4(__HYPERVISOR_grant_table_op, cmd, (ulong)setup, count);

+}

+int

+HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg)

+{

+	return xencall3(__HYPERVISOR_memory_op, cmd, (ulong)arg);

+}

+/*

+ * XXX this comment is leftover from old code.  revisit and update.

+ *

+ * The use of 'barrier' in the following reflects their use as local-lock

+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following

+ * critical operations are executed. All critical operatiosn must complete

+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also

+ * includes these barriers, for example.

+ */

+/*

+ * conversions to machine page numbers, pages and addresses

+ */

+#define MFN(pa)		(patomfn[(pa)>>PGSHIFT])

+#define MFNPG(pa)		((uvlong)MFN(pa)<<PGSHIFT)

+#define PA2MA(pa)		(MFNPG(pa) | PGOFF(pa))

+#define VA2MA(va)		PA2MA(PADDR(va))

+#define VA2MFN(va)		MFN(PADDR(va))

+ulong hypervisor_virt_start;

+ulong xentop;

+start_info_t *xenstart;

+shared_info_t *HYPERVISOR_shared_info;

+ulong *patomfn;

+ulong *matopfn;

+int

+xenpdptpin(ulong va)

+{

+	struct mmuext_op op;

+	ulong mfn;

+	mfn = MFN(PADDR(va));

+	LOG(dprint("pdptpin %lux %lux\n", va, mfn);)

+	print("pdptpin %lux %lux\n", va, mfn);

+	/* mark page readonly first */

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);

+	/*  L3 here refers to page directory pointer table (PAE mode) */

+	op.cmd = MMUEXT_PIN_L3_TABLE;

+	op.arg1.mfn = mfn;

+	if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)

+		return 1;

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);

+	return 0;

+}

+int

+xenpgdpin(ulong va)

+{

+	struct mmuext_op op;

+	ulong mfn;

+	mfn = MFN(PADDR(va));

+	LOG(dprint("pdpin %lux %lux\n", va, mfn);)

+	/* mark page readonly first */

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);

+	/* to confuse you, L2 here refers to page directories */

+	op.cmd = MMUEXT_PIN_L2_TABLE;

+	op.arg1.mfn = mfn;

+	if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)

+		return 1;

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);

+	return 0;

+}

+int

+xenptpin(ulong va)

+{

+	struct mmuext_op op;

+	ulong mfn;

+	mfn = MFN(PADDR(va));

+	LOG(dprint("pin %lux %lux\n", va, mfn);)

+	/* mark page readonly first */

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);

+	/* to confuse you, L1 here refers to page tables */

+	op.cmd = MMUEXT_PIN_L1_TABLE;

+	op.arg1.mfn = mfn;

+	if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)

+		return 1;

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);

+	return 0;

+}

+void

+xenptunpin(ulong va)

+{

+	struct mmuext_op op;

+	ulong mfn;

+	mfn = MFN(PADDR(va));

+	LOG(dprint("unpin %lux %lux\n", va, mfn);)

+	op.cmd = MMUEXT_UNPIN_TABLE;

+	op.arg1.mfn = mfn;

+	if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)

+		panic("xenptunpin va=%lux called from %lux", va, getcallerpc(&va));

+	/* mark page read-write */

+	HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);

+}

+void

+xenptswitch(ulong pa)

+{

+	struct mmuext_op op;

+	op.cmd = MMUEXT_NEW_BASEPTR;

+	op.arg1.mfn = MFN(pa);

+	if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)

+		panic("xenptswitch");

+}

+void

+xentlbflush(void)

+{

+	struct mmuext_op op;

+	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;

+	HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF);

+}

+/* update a pte using a machine page frame number */

+void

+xenupdatema(ulong *ptr, uvlong val)

+{

+	mmu_update_t u;

+	u.ptr = VA2MA(ptr);

+	u.val = val;

+	if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)

+		panic("xenupdatema - pte %lux value %llux (was %llux) called from %lux", (ulong)ptr, val, *(uvlong*)ptr, getcallerpc(&ptr));

+}

+/* update a pte using a guest "physical" page number */

+void

+xenupdate(ulong *ptr, ulong val)

+{

+	mmu_update_t u;

+	u.ptr = VA2MA(ptr);

+	u.val = PA2MA(val);

+	if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)

+		panic("xenupdate - pte %lux value %lux (%llux) called from %lux", (ulong)ptr, val, PA2MA(val), getcallerpc(&ptr));

+}

+void

+acceptframe(int ref, void *va)

+{

+	ulong mfn;

+	mfn = xengrantend(ref);

+	if (mfn == 0)

+		panic("can't accept page frame");

+	LOG(dprint("acceptframe ref %d va %lux mfn %lux\n", ref, (ulong)va, mfn);)

+	VA2MFN(va) = mfn;

+	mmumapframe((ulong)va, mfn);

+}

+int

+donateframe(int domid, void *va)

+{

+	ulong mfn;

+	int ref;

+	ulong *pte;

+	struct xen_memory_reservation mem;

+	mfn = VA2MFN(va);

+	ref = xengrant(domid, mfn, GTF_accept_transfer);

+	LOG(dprint("grant transfer %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)

+	pte = mmuwalk(m->pdb, (ulong)va, 2, 0);

+	xenupdatema(pte, 0);

+	set_xen_guest_handle(mem.extent_start, &mfn);

+	mem.nr_extents = 1;

+	mem.extent_order = 0;

+	mem.address_bits = 0;

+	mem.domid = DOMID_SELF;

+	if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &mem) != 1)

+		panic("XENMEM_decrease_reservation");

+	VA2MFN(va) = ~0;

+	return ref;

+}

+int

+shareframe(int domid, void *va, int write)

+{

+	ulong mfn;

+	int ref;

+	int flags;

+	mfn = VA2MFN(va);

+	flags = GTF_permit_access;

+	if (!write)

+		flags |= GTF_readonly;

+	ref = xengrant(domid, mfn, flags);

+	LOG(dprint("grant shared %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)

+	return ref;

+}

+/*

+ * Upcall from hypervisor, entered with evtchn_upcall_pending masked.

+ */

+void

+xenupcall(Ureg *ureg)

+{

+	vcpu_info_t *vcpu;

+	shared_info_t *s;

+	ulong sel1, sel2, n1, n2, port;

+	ureg->ecode = 0;

+	s = HYPERVISOR_shared_info;

+	vcpu = &HYPERVISOR_shared_info->vcpu_info[0];

+	for (;;) {

+		vcpu->evtchn_upcall_pending = 0;

+		sel1 = xchgl((uint*)&vcpu->evtchn_pending_sel, 0);

+		while(sel1) {

+			n1 = ffs(sel1);

+			sel1 &= ~(1<<n1);

+			sel2 = xchgl((uint*)&s->evtchn_pending[n1], 0);

+			while(sel2) {

+				n2 = ffs(sel2);

+				sel2 &= ~(1<<n2);

+				port = (n1<<5) + n2;

+				ureg->trap = 100+port;

+				trap(ureg);

+			}

+		}

+		if (vcpu->evtchn_upcall_pending)

+			continue;

+		vcpu->evtchn_upcall_mask = 0;

+		if (vcpu->evtchn_upcall_pending == 0)

+			break;

+		vcpu->evtchn_upcall_mask = 1;

+	}

+}

+/*

+ * tbdf field is abused to distinguish virqs from channels:

+ *

+ * tbdf=BUSUNKNOWN -> irq is a virq to be bound to a channel

+ * tbdf=0 -> irq is a channel number

+ */

+int

+xenintrenable(Vctl *v)

+{

+	evtchn_op_t op;

+	uint port;

+	/* XXX locking? */

+	if (v->tbdf != BUSUNKNOWN) {

+		op.cmd = EVTCHNOP_bind_virq;

+		op.u.bind_virq.virq = v->irq;

+		op.u.bind_virq.vcpu = m->machno;

+		if(HYPERVISOR_event_channel_op(&op) != 0)

+			panic("xenintrenable: bind %d failed", v->irq);

+		port = op.u.bind_virq.port;

+	} else

+		port = v->irq;

+	if (port > 155)

+		return -1;

+	HYPERVISOR_shared_info->evtchn_mask[port/32] &= ~(1<<(port%32));

+	if(0)print("xenintrenable %s: irq %d port %d mask[%d] = %#lux\n", v->name, v->irq, port, port/32, HYPERVISOR_shared_info->evtchn_mask[port/32]);

+	return 100+port;

+}

+int

+xenintrdisable(int irq)

+{

+	USED(irq);

+	panic("xenintrdisable notyet\n");

+	return 0;

+}

+int

+xenintrvecno(int irq)

+{

+	return irq;

+}

+int

+islo(void)

+{

+	vcpu_info_t *cpu;

+	cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno];	// XXX m->shared

+	return (cpu->evtchn_upcall_mask == 0);

+}

+/*

+ * Note: Portable code expects spllo <= spl* <= spldone for

+ * accounting purposes.  Lets hope the compiler doesn't reorder

+ * us.

+ */

+int

+spllo(void)

+{

+	vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno];	// XXX m->shared

+	if(cpu->evtchn_upcall_mask == 0)

+		return 0;

+	m->splpc = 0;

+	cpu->evtchn_upcall_mask = 0;

+	/*

+	 * If an event arrived while masked off,

+	 * use a dummy call to trigger delivery

+	 */

+	if (cpu->evtchn_upcall_pending)

+		HYPERVISOR_xen_version(0, 0);

+	return 1;

+}

+int

+splhi(void)

+{

+	ulong dummy;

+	vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno];	// XXX m->shared

+	int oldmask;

+	oldmask = xchgb(&cpu->evtchn_upcall_mask, 1);

+   	if (cpu->evtchn_upcall_mask != 1)

+		panic("xchgb");

+	/* XXX ad-hoc ¨getcallerpc" because we have no arguments */

+	m->splpc = (&dummy)[1];

+	return oldmask;

+}

+void

+splx(int x)

+{

+	if(x)

+		splhi();

+	else

+		spllo();

+}

+/* marker for profiling in portable code */

+void

+spldone(void)

+{

+}

+/* allocate an event channel */

+int

+xenchanalloc(int dom)

+{

+	evtchn_op_t op;

+	op.cmd = EVTCHNOP_alloc_unbound;

+	op.u.alloc_unbound.dom = DOMID_SELF;

+	op.u.alloc_unbound.remote_dom = dom;

+	if (HYPERVISOR_event_channel_op(&op) != 0)

+		panic("xenchanalloc");

+	return op.u.alloc_unbound.port;

+}

+/* notify over an event channel */

+void

+xenchannotify(int port)

+{

+	evtchn_op_t op;

+	op.cmd = EVTCHNOP_send;

+	op.u.send.port = port;

+	HYPERVISOR_event_channel_op(&op);

+}

+void

+halt(void)

+{

+	extern int nrdy;

+	splhi();

+	if (nrdy) {

+		spllo();

+		return;

+	}

+	HYPERVISOR_block();

+}

+void

+mb(void)

+{

+	coherence();

+}

--- /dev/null

+++ b/sys/src/9/xen/xentimer.c

@@ -1,0 +1,136 @@

+#include "u.h"

+#include "../port/lib.h"

+#include "mem.h"

+#include "dat.h"

+#include "fns.h"

+#include "io.h"

+static vcpu_time_info_t shadow[MAX_VIRT_CPUS];		// XXX should be in Mach

+static ulong wallclock;

+static ulong wallclocksystime;

+/*

+ * Return a consistent set of time parameters.

+ */

+static vcpu_time_info_t *

+getshadow(void)

+{

+	vcpu_time_info_t *s, *t;

+	t = &HYPERVISOR_shared_info->vcpu_info[m->machno].time;

+	s = &shadow[m->machno];		// XXX place in mach struct

+	while(t->version != s->version) {

+		if (t->version&1)

+			continue;

+		s->version = t->version;

+		s->tsc_timestamp = t->tsc_timestamp;

+		s->system_time = t->system_time;

+		s->tsc_to_system_mul = t->tsc_to_system_mul;

+		s->tsc_shift = t->tsc_shift;

+	}

+	return s;

+}

+/* just get it from the shared info */

+void

+guesscpuhz(int) // XXX no arg!

+{

+	vcpu_time_info_t *t;

+	t = getshadow();

+	m->cpuhz = (1000000000LL << 32) / t->tsc_to_system_mul;

+	if(t->tsc_shift < 0)

+		m->cpuhz <<= -t->tsc_shift;

+	else

+		m->cpuhz >>= t->tsc_shift;

+	m->cpumhz = m->cpuhz / 1000000L;

+}

+void

+xentimerset(uvlong next)

+{

+	uvlong soon;

+	soon = fastticks(0) + 100000;

+	if (next < soon)

+		next = soon;

+	HYPERVISOR_set_timer_op(next);

+}

+void

+xentimerclock(Ureg* ureg, void*)

+{

+	timerintr(ureg, 0);

+}

+void

+xentimerenable(void)

+{

+	intrenable(VIRQ_TIMER, xentimerclock, nil, 0, "Xen Timer");

+}

+uvlong

+xentimerread(uvlong *hz)

+{

+	uvlong x;

+	uvlong delta, sdelta;

+	vcpu_time_info_t *t;

+	t = getshadow();

+	cycles(&x);

+	delta = x - t->tsc_timestamp;

+	if (t->tsc_shift < 0)

+		delta >>= -t->tsc_shift;

+	else

+		delta <<= t->tsc_shift;

+	mul64fract(&sdelta, delta, t->tsc_to_system_mul);

+	x = t->system_time + sdelta;

+	if (HYPERVISOR_shared_info->wc_sec != wallclock) {

+		wallclock = HYPERVISOR_shared_info->wc_sec;

+		wallclocksystime = x;

+	}

+	if (hz)

+		*hz = 1000000000;

+	return x;

+}

+ulong

+xenwallclock()

+{

+	ulong elapsed;

+	elapsed = (ulong)((xentimerread(0) - wallclocksystime)/1000000000);

+	return wallclock + elapsed;

+}

+void

+microdelay(int microsecs)

+{

+	uvlong targ, hz;

+	targ = xentimerread(&hz);

+	targ += microsecs * hz / 1000000;

+	while(xentimerread(0) < targ)

+		continue;

+}

+void

+delay(int millisecs)

+{

+	microdelay(millisecs * 1000);

+}

+/*

+ *  performance measurement ticks.  must be low overhead.

+ *  doesn't have to count over a second.

+ */

+ulong

+perfticks(void)

+{

+	uvlong x;

+	cycles(&x);

+	return x;

+}